rbbt-dm 1.1.44 → 1.1.49
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/rbbt/matrix.rb +20 -0
- data/lib/rbbt/matrix/barcode.rb +1 -1
- data/lib/rbbt/ml_task.rb +7 -0
- data/lib/rbbt/vector/model.rb +74 -2
- data/lib/rbbt/vector/model/svm.rb +1 -1
- data/test/rbbt/matrix/test_barcode.rb +1 -1
- data/test/rbbt/test_ml_task.rb +12 -0
- data/test/rbbt/vector/test_model.rb +1 -1
- metadata +7 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 73f692fa2323508c640c594a056975f7bd24659aaffe8e9c4fb5e1c19d5b4ee9
|
4
|
+
data.tar.gz: 9cc5cb88059630f1c65e6e2c6e9b9af5e1766aa97cc1192f088ee7043ee2cbef
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bab237c5d577abab5e4fed7f4567f15a95c20faa64dfbc3ba58dfb4054dc715093c2685e9166ce0df91148a3de0ea60aba1ea3de20a40120eb563806f15dd5a2
|
7
|
+
data.tar.gz: 20fe2789e521288b50ff070b5f4ad930115ffcc782fbb656048a601f17e8f7408cbdc14181023e3430194855e40fd3dee8fbea2cb7d28981296ceafa10c6abc7
|
data/lib/rbbt/matrix.rb
CHANGED
@@ -176,6 +176,26 @@ class RbbtMatrix
|
|
176
176
|
matrix
|
177
177
|
end
|
178
178
|
|
179
|
+
def to_name(identifiers = nil)
|
180
|
+
require 'rbbt/tsv/change_id'
|
181
|
+
|
182
|
+
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
183
|
+
|
184
|
+
file = Persist.persist(data_file, :tsv, :prefix => "Name", :check => [data_file], :dir => RbbtMatrix.matrix_dir.values, :no_load => true) do
|
185
|
+
|
186
|
+
data = data_file.tsv(:cast => :to_f)
|
187
|
+
|
188
|
+
identifiers = [identifiers, @identifiers, data.identifiers, Organism.identifiers(organism)].flatten.compact.uniq
|
189
|
+
|
190
|
+
data.change_key("Associated Gene Name", :identifiers => identifiers.reverse) do |v|
|
191
|
+
Misc.mean(v.compact)
|
192
|
+
end
|
193
|
+
end
|
194
|
+
subsets = self.subsets
|
195
|
+
matrix = RbbtMatrix.new file, labels, value_type, "Associated Gene Name", organism
|
196
|
+
matrix.subsets = subsets
|
197
|
+
matrix
|
198
|
+
end
|
179
199
|
def to_barcode_ruby(factor = 2)
|
180
200
|
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
181
201
|
|
data/lib/rbbt/matrix/barcode.rb
CHANGED
data/lib/rbbt/ml_task.rb
ADDED
data/lib/rbbt/vector/model.rb
CHANGED
@@ -4,6 +4,20 @@ class VectorModel
|
|
4
4
|
attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
|
5
5
|
attr_accessor :features, :labels
|
6
6
|
|
7
|
+
def self.R_run(model_file, features, labels, code)
|
8
|
+
TmpFile.with_file do |feature_file|
|
9
|
+
Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
|
10
|
+
Open.write(feature_file + '.class', labels * "\n")
|
11
|
+
|
12
|
+
R.run <<-EOF
|
13
|
+
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
14
|
+
labels = scan("#{ feature_file }.class");
|
15
|
+
features = cbind(features, class = labels);
|
16
|
+
#{code}
|
17
|
+
EOF
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
7
21
|
def self.R_train(model_file, features, labels, code)
|
8
22
|
TmpFile.with_file do |feature_file|
|
9
23
|
Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
|
@@ -57,6 +71,11 @@ cat(paste(label, sep="\\n"));
|
|
57
71
|
@labels = []
|
58
72
|
end
|
59
73
|
|
74
|
+
def clear
|
75
|
+
@features = []
|
76
|
+
@labels = []
|
77
|
+
end
|
78
|
+
|
60
79
|
def add(element, label = nil)
|
61
80
|
@features << extract_features.call(element)
|
62
81
|
@labels << label unless label.nil?
|
@@ -67,16 +86,20 @@ cat(paste(label, sep="\\n"));
|
|
67
86
|
when Proc === train_model
|
68
87
|
train_model.call(@model_file, @features, @labels)
|
69
88
|
when String === train_model
|
70
|
-
|
89
|
+
VectorModel.R_train(@model_file, @features, @labels, train_model)
|
71
90
|
end
|
72
91
|
end
|
73
92
|
|
93
|
+
def run(code)
|
94
|
+
VectorModel.R_run(@model_file, @features, @labels, code)
|
95
|
+
end
|
96
|
+
|
74
97
|
def eval(element)
|
75
98
|
case
|
76
99
|
when Proc === eval_model
|
77
100
|
eval_model.call(@model_file, extract_features.call(element), false)
|
78
101
|
when String === eval_model
|
79
|
-
|
102
|
+
VectorModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
|
80
103
|
end
|
81
104
|
end
|
82
105
|
|
@@ -119,4 +142,53 @@ cat(paste(label, sep="\\n"));
|
|
119
142
|
|
120
143
|
acc
|
121
144
|
end
|
145
|
+
|
146
|
+
def cross_validation(folds = 10)
|
147
|
+
|
148
|
+
res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
|
149
|
+
|
150
|
+
feature_folds = Misc.divide(@features, folds)
|
151
|
+
labels_folds = Misc.divide(@labels, folds)
|
152
|
+
|
153
|
+
folds.times do |fix|
|
154
|
+
|
155
|
+
test_set = feature_folds[fix]
|
156
|
+
train_set = feature_folds.values_at(*((0..9).to_a - [fix])).inject([]){|acc,e| acc += e; acc}
|
157
|
+
|
158
|
+
test_labels = labels_folds[fix]
|
159
|
+
train_labels = labels_folds.values_at(*((0..9).to_a - [fix])).flatten
|
160
|
+
|
161
|
+
tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
|
162
|
+
|
163
|
+
@features = train_set
|
164
|
+
@labels = train_labels
|
165
|
+
self.train
|
166
|
+
predictions = self.eval_list test_set, false
|
167
|
+
|
168
|
+
test_labels.zip(predictions).each do |gs,pred|
|
169
|
+
gs = gs.to_i
|
170
|
+
pred = pred > 0.5 ? 1 : 0
|
171
|
+
tp += 1 if gs == pred && gs == 1
|
172
|
+
tn += 1 if gs == pred && gs == 0
|
173
|
+
fp += 1 if gs == 0 && pred == 1
|
174
|
+
fn += 1 if gs == 1 && pred == 0
|
175
|
+
end
|
176
|
+
|
177
|
+
p = tp + fn
|
178
|
+
pp = tp + fp
|
179
|
+
|
180
|
+
pr = tp.to_f / pp
|
181
|
+
re = tp.to_f / p
|
182
|
+
|
183
|
+
f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
|
184
|
+
|
185
|
+
Misc.fingerprint([tp,tn,fp,fn,pr,re,f1])
|
186
|
+
|
187
|
+
Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1}"
|
188
|
+
|
189
|
+
res[fix] = [tp,tn,fp,fn,pr,re,f1]
|
190
|
+
end
|
191
|
+
|
192
|
+
res
|
193
|
+
end
|
122
194
|
end
|
@@ -11,7 +11,7 @@ class TestBarcode < Test::Unit::TestCase
|
|
11
11
|
data["G4"] = [6,6,1,1,1,1]
|
12
12
|
|
13
13
|
TmpFile.with_file(data.to_s) do |file|
|
14
|
-
m =
|
14
|
+
m = RbbtMatrix.new file
|
15
15
|
m.barcode(file+'.barcode')
|
16
16
|
tsv = TSV.open(file+'.barcode')
|
17
17
|
assert tsv["G2"] = [0,1,0,1,0,1]
|
@@ -34,7 +34,7 @@ class TestVectorModel < Test::Unit::TestCase
|
|
34
34
|
features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
|
35
35
|
labels = scan("#{ feature_file }.class", what=numeric());
|
36
36
|
features = cbind(features, class = labels);
|
37
|
-
|
37
|
+
rbbt.require('e1071')
|
38
38
|
model = svm(class ~ ., data = features)
|
39
39
|
save(model, file="#{ model_file }");
|
40
40
|
EOF
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.49
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: priority_queue_cxx17
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/rbbt/matrix/barcode.rb
|
96
96
|
- lib/rbbt/matrix/differential.rb
|
97
97
|
- lib/rbbt/matrix/knowledge_base.rb
|
98
|
+
- lib/rbbt/ml_task.rb
|
98
99
|
- lib/rbbt/network/paths.rb
|
99
100
|
- lib/rbbt/plots/bar.rb
|
100
101
|
- lib/rbbt/plots/heatmap.rb
|
@@ -115,6 +116,7 @@ files:
|
|
115
116
|
- test/rbbt/statistics/test_fisher.rb
|
116
117
|
- test/rbbt/statistics/test_hypergeometric.rb
|
117
118
|
- test/rbbt/statistics/test_random_walk.rb
|
119
|
+
- test/rbbt/test_ml_task.rb
|
118
120
|
- test/rbbt/test_stan.rb
|
119
121
|
- test/rbbt/vector/model/test_svm.rb
|
120
122
|
- test/rbbt/vector/test_model.rb
|
@@ -137,8 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
137
139
|
- !ruby/object:Gem::Version
|
138
140
|
version: '0'
|
139
141
|
requirements: []
|
140
|
-
|
141
|
-
rubygems_version: 2.6.13
|
142
|
+
rubygems_version: 3.0.6
|
142
143
|
signing_key:
|
143
144
|
specification_version: 4
|
144
145
|
summary: Data-mining and statistics
|
@@ -149,6 +150,7 @@ test_files:
|
|
149
150
|
- test/rbbt/statistics/test_fisher.rb
|
150
151
|
- test/rbbt/statistics/test_fdr.rb
|
151
152
|
- test/rbbt/statistics/test_hypergeometric.rb
|
153
|
+
- test/rbbt/test_ml_task.rb
|
152
154
|
- test/rbbt/vector/test_model.rb
|
153
155
|
- test/rbbt/vector/model/test_svm.rb
|
154
156
|
- test/rbbt/test_stan.rb
|