rbbt-dm 1.1.44 → 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/lib/rbbt/matrix.rb +20 -0
- data/lib/rbbt/matrix/barcode.rb +1 -1
- data/lib/rbbt/ml_task.rb +7 -0
- data/lib/rbbt/vector/model.rb +74 -2
- data/lib/rbbt/vector/model/svm.rb +1 -1
- data/test/rbbt/matrix/test_barcode.rb +1 -1
- data/test/rbbt/test_ml_task.rb +12 -0
- data/test/rbbt/vector/test_model.rb +1 -1
- metadata +7 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 73f692fa2323508c640c594a056975f7bd24659aaffe8e9c4fb5e1c19d5b4ee9
|
4
|
+
data.tar.gz: 9cc5cb88059630f1c65e6e2c6e9b9af5e1766aa97cc1192f088ee7043ee2cbef
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bab237c5d577abab5e4fed7f4567f15a95c20faa64dfbc3ba58dfb4054dc715093c2685e9166ce0df91148a3de0ea60aba1ea3de20a40120eb563806f15dd5a2
|
7
|
+
data.tar.gz: 20fe2789e521288b50ff070b5f4ad930115ffcc782fbb656048a601f17e8f7408cbdc14181023e3430194855e40fd3dee8fbea2cb7d28981296ceafa10c6abc7
|
data/lib/rbbt/matrix.rb
CHANGED
@@ -176,6 +176,26 @@ class RbbtMatrix
|
|
176
176
|
matrix
|
177
177
|
end
|
178
178
|
|
179
|
+
def to_name(identifiers = nil)
|
180
|
+
require 'rbbt/tsv/change_id'
|
181
|
+
|
182
|
+
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
183
|
+
|
184
|
+
file = Persist.persist(data_file, :tsv, :prefix => "Name", :check => [data_file], :dir => RbbtMatrix.matrix_dir.values, :no_load => true) do
|
185
|
+
|
186
|
+
data = data_file.tsv(:cast => :to_f)
|
187
|
+
|
188
|
+
identifiers = [identifiers, @identifiers, data.identifiers, Organism.identifiers(organism)].flatten.compact.uniq
|
189
|
+
|
190
|
+
data.change_key("Associated Gene Name", :identifiers => identifiers.reverse) do |v|
|
191
|
+
Misc.mean(v.compact)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
subsets = self.subsets
|
195
|
+
matrix = RbbtMatrix.new file, labels, value_type, "Associated Gene Name", organism
|
196
|
+
matrix.subsets = subsets
|
197
|
+
matrix
|
198
|
+
end
|
179
199
|
def to_barcode_ruby(factor = 2)
|
180
200
|
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
181
201
|
|
data/lib/rbbt/matrix/barcode.rb
CHANGED
data/lib/rbbt/ml_task.rb
ADDED
data/lib/rbbt/vector/model.rb
CHANGED
@@ -4,6 +4,20 @@ class VectorModel
|
|
4
4
|
attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
|
5
5
|
attr_accessor :features, :labels
|
6
6
|
|
7
|
+
def self.R_run(model_file, features, labels, code)
|
8
|
+
TmpFile.with_file do |feature_file|
|
9
|
+
Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
|
10
|
+
Open.write(feature_file + '.class', labels * "\n")
|
11
|
+
|
12
|
+
R.run <<-EOF
|
13
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
14
|
+
labels = scan("#{ feature_file }.class");
|
15
|
+
features = cbind(features, class = labels);
|
16
|
+
#{code}
|
17
|
+
EOF
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
7
21
|
def self.R_train(model_file, features, labels, code)
|
8
22
|
TmpFile.with_file do |feature_file|
|
9
23
|
Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
|
@@ -57,6 +71,11 @@ cat(paste(label, sep="\\n"));
|
|
57
71
|
@labels = []
|
58
72
|
end
|
59
73
|
|
74
|
+
def clear
|
75
|
+
@features = []
|
76
|
+
@labels = []
|
77
|
+
end
|
78
|
+
|
60
79
|
def add(element, label = nil)
|
61
80
|
@features << extract_features.call(element)
|
62
81
|
@labels << label unless label.nil?
|
@@ -67,16 +86,20 @@ cat(paste(label, sep="\\n"));
|
|
67
86
|
when Proc === train_model
|
68
87
|
train_model.call(@model_file, @features, @labels)
|
69
88
|
when String === train_model
|
70
|
-
|
89
|
+
VectorModel.R_train(@model_file, @features, @labels, train_model)
|
71
90
|
end
|
72
91
|
end
|
73
92
|
|
93
|
+
def run(code)
|
94
|
+
VectorModel.R_run(@model_file, @features, @labels, code)
|
95
|
+
end
|
96
|
+
|
74
97
|
def eval(element)
|
75
98
|
case
|
76
99
|
when Proc === eval_model
|
77
100
|
eval_model.call(@model_file, extract_features.call(element), false)
|
78
101
|
when String === eval_model
|
79
|
-
|
102
|
+
VectorModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
|
80
103
|
end
|
81
104
|
end
|
82
105
|
|
@@ -119,4 +142,53 @@ cat(paste(label, sep="\\n"));
|
|
119
142
|
|
120
143
|
acc
|
121
144
|
end
|
145
|
+
|
146
|
+
def cross_validation(folds = 10)
|
147
|
+
|
148
|
+
res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
|
149
|
+
|
150
|
+
feature_folds = Misc.divide(@features, folds)
|
151
|
+
labels_folds = Misc.divide(@labels, folds)
|
152
|
+
|
153
|
+
folds.times do |fix|
|
154
|
+
|
155
|
+
test_set = feature_folds[fix]
|
156
|
+
train_set = feature_folds.values_at(*((0..9).to_a - [fix])).inject([]){|acc,e| acc += e; acc}
|
157
|
+
|
158
|
+
test_labels = labels_folds[fix]
|
159
|
+
train_labels = labels_folds.values_at(*((0..9).to_a - [fix])).flatten
|
160
|
+
|
161
|
+
tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
|
162
|
+
|
163
|
+
@features = train_set
|
164
|
+
@labels = train_labels
|
165
|
+
self.train
|
166
|
+
predictions = self.eval_list test_set, false
|
167
|
+
|
168
|
+
test_labels.zip(predictions).each do |gs,pred|
|
169
|
+
gs = gs.to_i
|
170
|
+
pred = pred > 0.5 ? 1 : 0
|
171
|
+
tp += 1 if gs == pred && gs == 1
|
172
|
+
tn += 1 if gs == pred && gs == 0
|
173
|
+
fp += 1 if gs == 0 && pred == 1
|
174
|
+
fn += 1 if gs == 1 && pred == 0
|
175
|
+
end
|
176
|
+
|
177
|
+
p = tp + fn
|
178
|
+
pp = tp + fp
|
179
|
+
|
180
|
+
pr = tp.to_f / pp
|
181
|
+
re = tp.to_f / p
|
182
|
+
|
183
|
+
f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
|
184
|
+
|
185
|
+
Misc.fingerprint([tp,tn,fp,fn,pr,re,f1])
|
186
|
+
|
187
|
+
Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1}"
|
188
|
+
|
189
|
+
res[fix] = [tp,tn,fp,fn,pr,re,f1]
|
190
|
+
end
|
191
|
+
|
192
|
+
res
|
193
|
+
end
|
122
194
|
end
|
@@ -11,7 +11,7 @@ class TestBarcode < Test::Unit::TestCase
|
|
11
11
|
data["G4"] = [6,6,1,1,1,1]
|
12
12
|
|
13
13
|
TmpFile.with_file(data.to_s) do |file|
|
14
|
-
m =
|
14
|
+
m = RbbtMatrix.new file
|
15
15
|
m.barcode(file+'.barcode')
|
16
16
|
tsv = TSV.open(file+'.barcode')
|
17
17
|
assert tsv["G2"] = [0,1,0,1,0,1]
|
@@ -34,7 +34,7 @@ class TestVectorModel < Test::Unit::TestCase
|
|
34
34
|
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
35
35
|
labels = scan("#{ feature_file }.class", what=numeric());
|
36
36
|
features = cbind(features, class = labels);
|
37
|
-
|
37
|
+
rbbt.require('e1071')
|
38
38
|
model = svm(class ~ ., data = features)
|
39
39
|
save(model, file="#{ model_file }");
|
40
40
|
EOF
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.49
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: priority_queue_cxx17
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/rbbt/matrix/barcode.rb
|
96
96
|
- lib/rbbt/matrix/differential.rb
|
97
97
|
- lib/rbbt/matrix/knowledge_base.rb
|
98
|
+
- lib/rbbt/ml_task.rb
|
98
99
|
- lib/rbbt/network/paths.rb
|
99
100
|
- lib/rbbt/plots/bar.rb
|
100
101
|
- lib/rbbt/plots/heatmap.rb
|
@@ -115,6 +116,7 @@ files:
|
|
115
116
|
- test/rbbt/statistics/test_fisher.rb
|
116
117
|
- test/rbbt/statistics/test_hypergeometric.rb
|
117
118
|
- test/rbbt/statistics/test_random_walk.rb
|
119
|
+
- test/rbbt/test_ml_task.rb
|
118
120
|
- test/rbbt/test_stan.rb
|
119
121
|
- test/rbbt/vector/model/test_svm.rb
|
120
122
|
- test/rbbt/vector/test_model.rb
|
@@ -137,8 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
137
139
|
- !ruby/object:Gem::Version
|
138
140
|
version: '0'
|
139
141
|
requirements: []
|
140
|
-
|
141
|
-
rubygems_version: 2.6.13
|
142
|
+
rubygems_version: 3.0.6
|
142
143
|
signing_key:
|
143
144
|
specification_version: 4
|
144
145
|
summary: Data-mining and statistics
|
@@ -149,6 +150,7 @@ test_files:
|
|
149
150
|
- test/rbbt/statistics/test_fisher.rb
|
150
151
|
- test/rbbt/statistics/test_fdr.rb
|
151
152
|
- test/rbbt/statistics/test_hypergeometric.rb
|
153
|
+
- test/rbbt/test_ml_task.rb
|
152
154
|
- test/rbbt/vector/test_model.rb
|
153
155
|
- test/rbbt/vector/model/test_svm.rb
|
154
156
|
- test/rbbt/test_stan.rb
|