rbbt-dm 1.1.44 → 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: fa73301479755273960bc84aec02ba3684d834ba
4
- data.tar.gz: 5ea346bea9926dda33f3ccdb78651f5987abfb96
2
+ SHA256:
3
+ metadata.gz: 73f692fa2323508c640c594a056975f7bd24659aaffe8e9c4fb5e1c19d5b4ee9
4
+ data.tar.gz: 9cc5cb88059630f1c65e6e2c6e9b9af5e1766aa97cc1192f088ee7043ee2cbef
5
5
  SHA512:
6
- metadata.gz: f7f3774226a153552bf323ed058b21f405767810dce814166b605a87a6c7473ec5b38a4a56df82ba553c7f5d1521e2c04c401ae7a2b4df00281bf3d9d4ddc5dd
7
- data.tar.gz: c48a92abf268fe4acf1a7d405df8e93446fff00a7ccdbca16b4e0e1ffae8607a40eb67f2495e1908dc34dba2ba61d6079c9b2849078712786036d95f06531ca3
6
+ metadata.gz: bab237c5d577abab5e4fed7f4567f15a95c20faa64dfbc3ba58dfb4054dc715093c2685e9166ce0df91148a3de0ea60aba1ea3de20a40120eb563806f15dd5a2
7
+ data.tar.gz: 20fe2789e521288b50ff070b5f4ad930115ffcc782fbb656048a601f17e8f7408cbdc14181023e3430194855e40fd3dee8fbea2cb7d28981296ceafa10c6abc7
@@ -176,6 +176,26 @@ class RbbtMatrix
176
176
  matrix
177
177
  end
178
178
 
179
+ def to_name(identifiers = nil)
180
+ require 'rbbt/tsv/change_id'
181
+
182
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
183
+
184
+ file = Persist.persist(data_file, :tsv, :prefix => "Name", :check => [data_file], :dir => RbbtMatrix.matrix_dir.values, :no_load => true) do
185
+
186
+ data = data_file.tsv(:cast => :to_f)
187
+
188
+ identifiers = [identifiers, @identifiers, data.identifiers, Organism.identifiers(organism)].flatten.compact.uniq
189
+
190
+ data.change_key("Associated Gene Name", :identifiers => identifiers.reverse) do |v|
191
+ Misc.mean(v.compact)
192
+ end
193
+ end
194
+ subsets = self.subsets
195
+ matrix = RbbtMatrix.new file, labels, value_type, "Associated Gene Name", organism
196
+ matrix.subsets = subsets
197
+ matrix
198
+ end
179
199
  def to_barcode_ruby(factor = 2)
180
200
  name = data_file =~ /:>/ ? File.basename(data_file) : data_file
181
201
 
@@ -38,7 +38,7 @@ rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.r
38
38
  end
39
39
  end
40
40
  key = key.first if Array === key
41
- [key, bars]
41
+ [key, bars]
42
42
  end
43
43
  end
44
44
 
@@ -0,0 +1,7 @@
1
+ class MLTask
2
+ def initialize
3
+ end
4
+
5
+ def pre_process
6
+ end
7
+ end
@@ -4,6 +4,20 @@ class VectorModel
4
4
  attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
5
5
  attr_accessor :features, :labels
6
6
 
7
+ def self.R_run(model_file, features, labels, code)
8
+ TmpFile.with_file do |feature_file|
9
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
10
+ Open.write(feature_file + '.class', labels * "\n")
11
+
12
+ R.run <<-EOF
13
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
14
+ labels = scan("#{ feature_file }.class");
15
+ features = cbind(features, class = labels);
16
+ #{code}
17
+ EOF
18
+ end
19
+ end
20
+
7
21
  def self.R_train(model_file, features, labels, code)
8
22
  TmpFile.with_file do |feature_file|
9
23
  Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
@@ -57,6 +71,11 @@ cat(paste(label, sep="\\n"));
57
71
  @labels = []
58
72
  end
59
73
 
74
+ def clear
75
+ @features = []
76
+ @labels = []
77
+ end
78
+
60
79
  def add(element, label = nil)
61
80
  @features << extract_features.call(element)
62
81
  @labels << label unless label.nil?
@@ -67,16 +86,20 @@ cat(paste(label, sep="\\n"));
67
86
  when Proc === train_model
68
87
  train_model.call(@model_file, @features, @labels)
69
88
  when String === train_model
70
- SVMModel.R_train(@model_file, @features, @labels, train_model)
89
+ VectorModel.R_train(@model_file, @features, @labels, train_model)
71
90
  end
72
91
  end
73
92
 
93
+ def run(code)
94
+ VectorModel.R_run(@model_file, @features, @labels, code)
95
+ end
96
+
74
97
  def eval(element)
75
98
  case
76
99
  when Proc === eval_model
77
100
  eval_model.call(@model_file, extract_features.call(element), false)
78
101
  when String === eval_model
79
- SVMModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
102
+ VectorModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
80
103
  end
81
104
  end
82
105
 
@@ -119,4 +142,53 @@ cat(paste(label, sep="\\n"));
119
142
 
120
143
  acc
121
144
  end
145
+
146
+ def cross_validation(folds = 10)
147
+
148
+ res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
149
+
150
+ feature_folds = Misc.divide(@features, folds)
151
+ labels_folds = Misc.divide(@labels, folds)
152
+
153
+ folds.times do |fix|
154
+
155
+ test_set = feature_folds[fix]
156
+ train_set = feature_folds.values_at(*((0..9).to_a - [fix])).inject([]){|acc,e| acc += e; acc}
157
+
158
+ test_labels = labels_folds[fix]
159
+ train_labels = labels_folds.values_at(*((0..9).to_a - [fix])).flatten
160
+
161
+ tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
162
+
163
+ @features = train_set
164
+ @labels = train_labels
165
+ self.train
166
+ predictions = self.eval_list test_set, false
167
+
168
+ test_labels.zip(predictions).each do |gs,pred|
169
+ gs = gs.to_i
170
+ pred = pred > 0.5 ? 1 : 0
171
+ tp += 1 if gs == pred && gs == 1
172
+ tn += 1 if gs == pred && gs == 0
173
+ fp += 1 if gs == 0 && pred == 1
174
+ fn += 1 if gs == 1 && pred == 0
175
+ end
176
+
177
+ p = tp + fn
178
+ pp = tp + fp
179
+
180
+ pr = tp.to_f / pp
181
+ re = tp.to_f / p
182
+
183
+ f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
184
+
185
+ Misc.fingerprint([tp,tn,fp,fn,pr,re,f1])
186
+
187
+ Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1}"
188
+
189
+ res[fix] = [tp,tn,fp,fn,pr,re,f1]
190
+ end
191
+
192
+ res
193
+ end
122
194
  end
@@ -9,7 +9,7 @@ class SVMModel < VectorModel
9
9
 
10
10
  @train_model =<<-EOF
11
11
  library(e1071);
12
- model = svm(class ~ ., data = features);
12
+ model = svm(class ~ ., data = features, scale=c(0));
13
13
  EOF
14
14
 
15
15
  @eval_model =<<-EOF
@@ -11,7 +11,7 @@ class TestBarcode < Test::Unit::TestCase
11
11
  data["G4"] = [6,6,1,1,1,1]
12
12
 
13
13
  TmpFile.with_file(data.to_s) do |file|
14
- m = Matrix.new file
14
+ m = RbbtMatrix.new file
15
15
  m.barcode(file+'.barcode')
16
16
  tsv = TSV.open(file+'.barcode')
17
17
  assert tsv["G2"] = [0,1,0,1,0,1]
@@ -0,0 +1,12 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
2
+ require 'rbbt/ml_task'
3
+
4
+ class TestMLTask < Test::Unit::TestCase
5
+ def test_MLTask
6
+
7
+ triage = MLTask.new
8
+ ml_task.pre_process do
9
+ end
10
+ end
11
+ end
12
+
@@ -34,7 +34,7 @@ class TestVectorModel < Test::Unit::TestCase
34
34
  features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
35
35
  labels = scan("#{ feature_file }.class", what=numeric());
36
36
  features = cbind(features, class = labels);
37
- library(e1071)
37
+ rbbt.require('e1071')
38
38
  model = svm(class ~ ., data = features)
39
39
  save(model, file="#{ model_file }");
40
40
  EOF
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.44
4
+ version: 1.1.49
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-12-02 00:00:00.000000000 Z
11
+ date: 2020-08-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: priority_queue_cxx
42
+ name: priority_queue_cxx17
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -95,6 +95,7 @@ files:
95
95
  - lib/rbbt/matrix/barcode.rb
96
96
  - lib/rbbt/matrix/differential.rb
97
97
  - lib/rbbt/matrix/knowledge_base.rb
98
+ - lib/rbbt/ml_task.rb
98
99
  - lib/rbbt/network/paths.rb
99
100
  - lib/rbbt/plots/bar.rb
100
101
  - lib/rbbt/plots/heatmap.rb
@@ -115,6 +116,7 @@ files:
115
116
  - test/rbbt/statistics/test_fisher.rb
116
117
  - test/rbbt/statistics/test_hypergeometric.rb
117
118
  - test/rbbt/statistics/test_random_walk.rb
119
+ - test/rbbt/test_ml_task.rb
118
120
  - test/rbbt/test_stan.rb
119
121
  - test/rbbt/vector/model/test_svm.rb
120
122
  - test/rbbt/vector/test_model.rb
@@ -137,8 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
137
139
  - !ruby/object:Gem::Version
138
140
  version: '0'
139
141
  requirements: []
140
- rubyforge_project:
141
- rubygems_version: 2.6.13
142
+ rubygems_version: 3.0.6
142
143
  signing_key:
143
144
  specification_version: 4
144
145
  summary: Data-mining and statistics
@@ -149,6 +150,7 @@ test_files:
149
150
  - test/rbbt/statistics/test_fisher.rb
150
151
  - test/rbbt/statistics/test_fdr.rb
151
152
  - test/rbbt/statistics/test_hypergeometric.rb
153
+ - test/rbbt/test_ml_task.rb
152
154
  - test/rbbt/vector/test_model.rb
153
155
  - test/rbbt/vector/model/test_svm.rb
154
156
  - test/rbbt/test_stan.rb