rbbt-dm 1.1.44 → 1.1.49

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: fa73301479755273960bc84aec02ba3684d834ba
4
- data.tar.gz: 5ea346bea9926dda33f3ccdb78651f5987abfb96
2
+ SHA256:
3
+ metadata.gz: 73f692fa2323508c640c594a056975f7bd24659aaffe8e9c4fb5e1c19d5b4ee9
4
+ data.tar.gz: 9cc5cb88059630f1c65e6e2c6e9b9af5e1766aa97cc1192f088ee7043ee2cbef
5
5
  SHA512:
6
- metadata.gz: f7f3774226a153552bf323ed058b21f405767810dce814166b605a87a6c7473ec5b38a4a56df82ba553c7f5d1521e2c04c401ae7a2b4df00281bf3d9d4ddc5dd
7
- data.tar.gz: c48a92abf268fe4acf1a7d405df8e93446fff00a7ccdbca16b4e0e1ffae8607a40eb67f2495e1908dc34dba2ba61d6079c9b2849078712786036d95f06531ca3
6
+ metadata.gz: bab237c5d577abab5e4fed7f4567f15a95c20faa64dfbc3ba58dfb4054dc715093c2685e9166ce0df91148a3de0ea60aba1ea3de20a40120eb563806f15dd5a2
7
+ data.tar.gz: 20fe2789e521288b50ff070b5f4ad930115ffcc782fbb656048a601f17e8f7408cbdc14181023e3430194855e40fd3dee8fbea2cb7d28981296ceafa10c6abc7
@@ -176,6 +176,26 @@ class RbbtMatrix
176
176
  matrix
177
177
  end
178
178
 
179
+ def to_name(identifiers = nil)
180
+ require 'rbbt/tsv/change_id'
181
+
182
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
183
+
184
+ file = Persist.persist(data_file, :tsv, :prefix => "Name", :check => [data_file], :dir => RbbtMatrix.matrix_dir.values, :no_load => true) do
185
+
186
+ data = data_file.tsv(:cast => :to_f)
187
+
188
+ identifiers = [identifiers, @identifiers, data.identifiers, Organism.identifiers(organism)].flatten.compact.uniq
189
+
190
+ data.change_key("Associated Gene Name", :identifiers => identifiers.reverse) do |v|
191
+ Misc.mean(v.compact)
192
+ end
193
+ end
194
+ subsets = self.subsets
195
+ matrix = RbbtMatrix.new file, labels, value_type, "Associated Gene Name", organism
196
+ matrix.subsets = subsets
197
+ matrix
198
+ end
179
199
  def to_barcode_ruby(factor = 2)
180
200
  name = data_file =~ /:>/ ? File.basename(data_file) : data_file
181
201
 
@@ -38,7 +38,7 @@ rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.r
38
38
  end
39
39
  end
40
40
  key = key.first if Array === key
41
- [key, bars]
41
+ [key, bars]
42
42
  end
43
43
  end
44
44
 
@@ -0,0 +1,7 @@
1
+ class MLTask
2
+ def initialize
3
+ end
4
+
5
+ def pre_process
6
+ end
7
+ end
@@ -4,6 +4,20 @@ class VectorModel
4
4
  attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
5
5
  attr_accessor :features, :labels
6
6
 
7
+ def self.R_run(model_file, features, labels, code)
8
+ TmpFile.with_file do |feature_file|
9
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
10
+ Open.write(feature_file + '.class', labels * "\n")
11
+
12
+ R.run <<-EOF
13
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
14
+ labels = scan("#{ feature_file }.class");
15
+ features = cbind(features, class = labels);
16
+ #{code}
17
+ EOF
18
+ end
19
+ end
20
+
7
21
  def self.R_train(model_file, features, labels, code)
8
22
  TmpFile.with_file do |feature_file|
9
23
  Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
@@ -57,6 +71,11 @@ cat(paste(label, sep="\\n"));
57
71
  @labels = []
58
72
  end
59
73
 
74
+ def clear
75
+ @features = []
76
+ @labels = []
77
+ end
78
+
60
79
  def add(element, label = nil)
61
80
  @features << extract_features.call(element)
62
81
  @labels << label unless label.nil?
@@ -67,16 +86,20 @@ cat(paste(label, sep="\\n"));
67
86
  when Proc === train_model
68
87
  train_model.call(@model_file, @features, @labels)
69
88
  when String === train_model
70
- SVMModel.R_train(@model_file, @features, @labels, train_model)
89
+ VectorModel.R_train(@model_file, @features, @labels, train_model)
71
90
  end
72
91
  end
73
92
 
93
+ def run(code)
94
+ VectorModel.R_run(@model_file, @features, @labels, code)
95
+ end
96
+
74
97
  def eval(element)
75
98
  case
76
99
  when Proc === eval_model
77
100
  eval_model.call(@model_file, extract_features.call(element), false)
78
101
  when String === eval_model
79
- SVMModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
102
+ VectorModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
80
103
  end
81
104
  end
82
105
 
@@ -119,4 +142,53 @@ cat(paste(label, sep="\\n"));
119
142
 
120
143
  acc
121
144
  end
145
+
146
+ def cross_validation(folds = 10)
147
+
148
+ res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
149
+
150
+ feature_folds = Misc.divide(@features, folds)
151
+ labels_folds = Misc.divide(@labels, folds)
152
+
153
+ folds.times do |fix|
154
+
155
+ test_set = feature_folds[fix]
156
+ train_set = feature_folds.values_at(*((0..9).to_a - [fix])).inject([]){|acc,e| acc += e; acc}
157
+
158
+ test_labels = labels_folds[fix]
159
+ train_labels = labels_folds.values_at(*((0..9).to_a - [fix])).flatten
160
+
161
+ tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
162
+
163
+ @features = train_set
164
+ @labels = train_labels
165
+ self.train
166
+ predictions = self.eval_list test_set, false
167
+
168
+ test_labels.zip(predictions).each do |gs,pred|
169
+ gs = gs.to_i
170
+ pred = pred > 0.5 ? 1 : 0
171
+ tp += 1 if gs == pred && gs == 1
172
+ tn += 1 if gs == pred && gs == 0
173
+ fp += 1 if gs == 0 && pred == 1
174
+ fn += 1 if gs == 1 && pred == 0
175
+ end
176
+
177
+ p = tp + fn
178
+ pp = tp + fp
179
+
180
+ pr = tp.to_f / pp
181
+ re = tp.to_f / p
182
+
183
+ f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
184
+
185
+ Misc.fingerprint([tp,tn,fp,fn,pr,re,f1])
186
+
187
+ Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1}"
188
+
189
+ res[fix] = [tp,tn,fp,fn,pr,re,f1]
190
+ end
191
+
192
+ res
193
+ end
122
194
  end
@@ -9,7 +9,7 @@ class SVMModel < VectorModel
9
9
 
10
10
  @train_model =<<-EOF
11
11
  library(e1071);
12
- model = svm(class ~ ., data = features);
12
+ model = svm(class ~ ., data = features, scale=c(0));
13
13
  EOF
14
14
 
15
15
  @eval_model =<<-EOF
@@ -11,7 +11,7 @@ class TestBarcode < Test::Unit::TestCase
11
11
  data["G4"] = [6,6,1,1,1,1]
12
12
 
13
13
  TmpFile.with_file(data.to_s) do |file|
14
- m = Matrix.new file
14
+ m = RbbtMatrix.new file
15
15
  m.barcode(file+'.barcode')
16
16
  tsv = TSV.open(file+'.barcode')
17
17
  assert tsv["G2"] = [0,1,0,1,0,1]
@@ -0,0 +1,12 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
2
+ require 'rbbt/ml_task'
3
+
4
+ class TestMLTask < Test::Unit::TestCase
5
+ def test_MLTask
6
+
7
+ triage = MLTask.new
8
+ ml_task.pre_process do
9
+ end
10
+ end
11
+ end
12
+
@@ -34,7 +34,7 @@ class TestVectorModel < Test::Unit::TestCase
34
34
  features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
35
35
  labels = scan("#{ feature_file }.class", what=numeric());
36
36
  features = cbind(features, class = labels);
37
- library(e1071)
37
+ rbbt.require('e1071')
38
38
  model = svm(class ~ ., data = features)
39
39
  save(model, file="#{ model_file }");
40
40
  EOF
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.44
4
+ version: 1.1.49
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-12-02 00:00:00.000000000 Z
11
+ date: 2020-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: priority_queue_cxx
42
+ name: priority_queue_cxx17
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -95,6 +95,7 @@ files:
95
95
  - lib/rbbt/matrix/barcode.rb
96
96
  - lib/rbbt/matrix/differential.rb
97
97
  - lib/rbbt/matrix/knowledge_base.rb
98
+ - lib/rbbt/ml_task.rb
98
99
  - lib/rbbt/network/paths.rb
99
100
  - lib/rbbt/plots/bar.rb
100
101
  - lib/rbbt/plots/heatmap.rb
@@ -115,6 +116,7 @@ files:
115
116
  - test/rbbt/statistics/test_fisher.rb
116
117
  - test/rbbt/statistics/test_hypergeometric.rb
117
118
  - test/rbbt/statistics/test_random_walk.rb
119
+ - test/rbbt/test_ml_task.rb
118
120
  - test/rbbt/test_stan.rb
119
121
  - test/rbbt/vector/model/test_svm.rb
120
122
  - test/rbbt/vector/test_model.rb
@@ -137,8 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
137
139
  - !ruby/object:Gem::Version
138
140
  version: '0'
139
141
  requirements: []
140
- rubyforge_project:
141
- rubygems_version: 2.6.13
142
+ rubygems_version: 3.0.6
142
143
  signing_key:
143
144
  specification_version: 4
144
145
  summary: Data-mining and statistics
@@ -149,6 +150,7 @@ test_files:
149
150
  - test/rbbt/statistics/test_fisher.rb
150
151
  - test/rbbt/statistics/test_fdr.rb
151
152
  - test/rbbt/statistics/test_hypergeometric.rb
153
+ - test/rbbt/test_ml_task.rb
152
154
  - test/rbbt/vector/test_model.rb
153
155
  - test/rbbt/vector/model/test_svm.rb
154
156
  - test/rbbt/test_stan.rb