rbbt-dm 1.1.43 → 1.1.48

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 270570bf58c397ec1f6da3195f882a5cf73eb1a1
4
- data.tar.gz: 4f7232e2c2cddff960f2f9dce6e4279298b53408
2
+ SHA256:
3
+ metadata.gz: 2d054c98df9e1b451849ddcf01f089d2822746d2a4ac8017f7f40f412de5d8c4
4
+ data.tar.gz: 55df50b751243cf77c023a016b53c258138c24c9b10bf1068fa5bb6385c1c114
5
5
  SHA512:
6
- metadata.gz: faa0514ae85d537007147f8a1c06367202cffa83c5081b68f9e2e23943b02cc5601160fbcfd32f0c041fd511b2f3116e82b14b5313ba57488297f76a74f77433
7
- data.tar.gz: bc1d750d449763f45482318e1f4fe5d54f80c64eb90c3fb08c71d94d282496aa87e54004f309e2c2dac4cc3bc01995880481be490c4bf6510d1e35417276eb85
6
+ metadata.gz: 3b6397645a532feb4bd2aa0dabd715091a2438a07244c11f82ab52f3c40b3fb43e515667b48630e9b8bd28b4d33c0a69cf57d1151dbb0616d1896a4558828fa3
7
+ data.tar.gz: 20b90f5d900ab8e1a6a930edc5573a420213360961fbb796022e2a09f442307b731c30a7da6b535d7fc90d72fe92fb358de5fdebc420814a7ef300f23c798341
@@ -176,6 +176,26 @@ class RbbtMatrix
176
176
  matrix
177
177
  end
178
178
 
179
+ def to_name(identifiers = nil)
180
+ require 'rbbt/tsv/change_id'
181
+
182
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
183
+
184
+ file = Persist.persist(data_file, :tsv, :prefix => "Name", :check => [data_file], :dir => RbbtMatrix.matrix_dir.values, :no_load => true) do
185
+
186
+ data = data_file.tsv(:cast => :to_f)
187
+
188
+ identifiers = [identifiers, @identifiers, data.identifiers, Organism.identifiers(organism)].flatten.compact.uniq
189
+
190
+ data.change_key("Associated Gene Name", :identifiers => identifiers.reverse) do |v|
191
+ Misc.mean(v.compact)
192
+ end
193
+ end
194
+ subsets = self.subsets
195
+ matrix = RbbtMatrix.new file, labels, value_type, "Associated Gene Name", organism
196
+ matrix.subsets = subsets
197
+ matrix
198
+ end
179
199
  def to_barcode_ruby(factor = 2)
180
200
  name = data_file =~ /:>/ ? File.basename(data_file) : data_file
181
201
 
@@ -38,7 +38,7 @@ rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.r
38
38
  end
39
39
  end
40
40
  key = key.first if Array === key
41
- [key, bars]
41
+ [key, bars]
42
42
  end
43
43
  end
44
44
 
@@ -0,0 +1,7 @@
1
+ class MLTask
2
+ def initialize
3
+ end
4
+
5
+ def pre_process
6
+ end
7
+ end
@@ -5,7 +5,7 @@ module Paths
5
5
  def self.dijkstra(adjacency, start_node, end_node = nil, max_steps = nil)
6
6
  return nil unless adjacency.include? start_node
7
7
 
8
- active = FastContainers::PriorityQueue.new(:max)
8
+ active = FastContainers::PriorityQueue.new(:min)
9
9
  distances = Hash.new { 1.0 / 0.0 }
10
10
  parents = Hash.new
11
11
 
@@ -49,7 +49,7 @@ module Paths
49
49
  def self.weighted_dijkstra(adjacency, start_node, end_node = nil, threshold = nil, max_steps = nil)
50
50
  return nil unless adjacency.include? start_node
51
51
 
52
- active = FastContainers::PriorityQueue.new(:max)
52
+ active = FastContainers::PriorityQueue.new(:min)
53
53
  distances = Hash.new { 1.0 / 0.0 }
54
54
  parents = Hash.new
55
55
 
@@ -4,6 +4,20 @@ class VectorModel
4
4
  attr_accessor :directory, :model_file, :extract_features, :train_model, :eval_model
5
5
  attr_accessor :features, :labels
6
6
 
7
+ def self.R_run(model_file, features, labels, code)
8
+ TmpFile.with_file do |feature_file|
9
+ Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
10
+ Open.write(feature_file + '.class', labels * "\n")
11
+
12
+ R.run <<-EOF
13
+ features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
14
+ labels = scan("#{ feature_file }.class");
15
+ features = cbind(features, class = labels);
16
+ #{code}
17
+ EOF
18
+ end
19
+ end
20
+
7
21
  def self.R_train(model_file, features, labels, code)
8
22
  TmpFile.with_file do |feature_file|
9
23
  Open.write(feature_file, features.collect{|feats| feats * "\t"} * "\n")
@@ -57,6 +71,11 @@ cat(paste(label, sep="\\n"));
57
71
  @labels = []
58
72
  end
59
73
 
74
+ def clear
75
+ @features = []
76
+ @labels = []
77
+ end
78
+
60
79
  def add(element, label = nil)
61
80
  @features << extract_features.call(element)
62
81
  @labels << label unless label.nil?
@@ -67,16 +86,20 @@ cat(paste(label, sep="\\n"));
67
86
  when Proc === train_model
68
87
  train_model.call(@model_file, @features, @labels)
69
88
  when String === train_model
70
- SVMModel.R_train(@model_file, @features, @labels, train_model)
89
+ VectorModel.R_train(@model_file, @features, @labels, train_model)
71
90
  end
72
91
  end
73
92
 
93
+ def run(code)
94
+ VectorModel.R_run(@model_file, @features, @labels, code)
95
+ end
96
+
74
97
  def eval(element)
75
98
  case
76
99
  when Proc === eval_model
77
100
  eval_model.call(@model_file, extract_features.call(element), false)
78
101
  when String === eval_model
79
- SVMModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
102
+ VectorModel.R_eval(@model_file, extract_features.call(element), false, eval_model)
80
103
  end
81
104
  end
82
105
 
@@ -119,4 +142,53 @@ cat(paste(label, sep="\\n"));
119
142
 
120
143
  acc
121
144
  end
145
+
146
+ def cross_validation(folds = 10)
147
+
148
+ res = TSV.setup({}, "Fold~TP,TN,FP,FN,P,R,F1#:type=:list")
149
+
150
+ feature_folds = Misc.divide(@features, folds)
151
+ labels_folds = Misc.divide(@labels, folds)
152
+
153
+ folds.times do |fix|
154
+
155
+ test_set = feature_folds[fix]
156
+ train_set = feature_folds.values_at(*((0..9).to_a - [fix])).inject([]){|acc,e| acc += e; acc}
157
+
158
+ test_labels = labels_folds[fix]
159
+ train_labels = labels_folds.values_at(*((0..9).to_a - [fix])).flatten
160
+
161
+ tp, fp, tn, fn, pr, re, f1 = [0, 0, 0, 0, nil, nil, nil]
162
+
163
+ @features = train_set
164
+ @labels = train_labels
165
+ self.train
166
+ predictions = self.eval_list test_set, false
167
+
168
+ test_labels.zip(predictions).each do |gs,pred|
169
+ gs = gs.to_i
170
+ pred = pred > 0.5 ? 1 : 0
171
+ tp += 1 if gs == pred && gs == 1
172
+ tn += 1 if gs == pred && gs == 0
173
+ fp += 1 if gs == 0 && pred == 1
174
+ fn += 1 if gs == 1 && pred == 0
175
+ end
176
+
177
+ p = tp + fn
178
+ pp = tp + fp
179
+
180
+ pr = tp.to_f / pp
181
+ re = tp.to_f / p
182
+
183
+ f1 = (2.0 * tp) / (2.0 * tp + fp + fn)
184
+
185
+ Misc.fingerprint([tp,tn,fp,fn,pr,re,f1])
186
+
187
+ Log.debug "CV Fold #{fix} P:#{"%.3f" % pr} R:#{"%.3f" % re} F1:#{"%.3f" % f1}"
188
+
189
+ res[fix] = [tp,tn,fp,fn,pr,re,f1]
190
+ end
191
+
192
+ res
193
+ end
122
194
  end
@@ -9,7 +9,7 @@ class SVMModel < VectorModel
9
9
 
10
10
  @train_model =<<-EOF
11
11
  library(e1071);
12
- model = svm(class ~ ., data = features);
12
+ model = svm(class ~ ., data = features, scale=c(0));
13
13
  EOF
14
14
 
15
15
  @eval_model =<<-EOF
@@ -11,7 +11,7 @@ class TestBarcode < Test::Unit::TestCase
11
11
  data["G4"] = [6,6,1,1,1,1]
12
12
 
13
13
  TmpFile.with_file(data.to_s) do |file|
14
- m = Matrix.new file
14
+ m = RbbtMatrix.new file
15
15
  m.barcode(file+'.barcode')
16
16
  tsv = TSV.open(file+'.barcode')
17
17
  assert tsv["G2"] = [0,1,0,1,0,1]
@@ -6,7 +6,7 @@ require 'set'
6
6
 
7
7
 
8
8
  class TestNetwork < Test::Unit::TestCase
9
- def _test_dijsktra
9
+ def test_dijsktra
10
10
  network_txt=<<-EOF
11
11
  #: :sep=/\s/#:type=:flat
12
12
  #Start End
@@ -0,0 +1,12 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.rb')
2
+ require 'rbbt/ml_task'
3
+
4
+ class TestMLTask < Test::Unit::TestCase
5
+ def test_MLTask
6
+
7
+ triage = MLTask.new
8
+ ml_task.pre_process do
9
+ end
10
+ end
11
+ end
12
+
@@ -34,7 +34,7 @@ class TestVectorModel < Test::Unit::TestCase
34
34
  features = read.table("#{ feature_file }", sep ="\\t", stringsAsFactors=FALSE);
35
35
  labels = scan("#{ feature_file }.class", what=numeric());
36
36
  features = cbind(features, class = labels);
37
- library(e1071)
37
+ rbbt.require('e1071')
38
38
  model = svm(class ~ ., data = features)
39
39
  save(model, file="#{ model_file }");
40
40
  EOF
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.43
4
+ version: 1.1.48
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-11-29 00:00:00.000000000 Z
11
+ date: 2020-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -95,6 +95,7 @@ files:
95
95
  - lib/rbbt/matrix/barcode.rb
96
96
  - lib/rbbt/matrix/differential.rb
97
97
  - lib/rbbt/matrix/knowledge_base.rb
98
+ - lib/rbbt/ml_task.rb
98
99
  - lib/rbbt/network/paths.rb
99
100
  - lib/rbbt/plots/bar.rb
100
101
  - lib/rbbt/plots/heatmap.rb
@@ -115,6 +116,7 @@ files:
115
116
  - test/rbbt/statistics/test_fisher.rb
116
117
  - test/rbbt/statistics/test_hypergeometric.rb
117
118
  - test/rbbt/statistics/test_random_walk.rb
119
+ - test/rbbt/test_ml_task.rb
118
120
  - test/rbbt/test_stan.rb
119
121
  - test/rbbt/vector/model/test_svm.rb
120
122
  - test/rbbt/vector/test_model.rb
@@ -137,19 +139,19 @@ required_rubygems_version: !ruby/object:Gem::Requirement
137
139
  - !ruby/object:Gem::Version
138
140
  version: '0'
139
141
  requirements: []
140
- rubyforge_project:
141
- rubygems_version: 2.6.8
142
+ rubygems_version: 3.0.6
142
143
  signing_key:
143
144
  specification_version: 4
144
145
  summary: Data-mining and statistics
145
146
  test_files:
146
- - test/test_helper.rb
147
- - test/rbbt/test_stan.rb
148
- - test/rbbt/vector/model/test_svm.rb
149
- - test/rbbt/vector/test_model.rb
150
147
  - test/rbbt/network/test_paths.rb
151
148
  - test/rbbt/matrix/test_barcode.rb
149
+ - test/rbbt/statistics/test_random_walk.rb
152
150
  - test/rbbt/statistics/test_fisher.rb
153
- - test/rbbt/statistics/test_hypergeometric.rb
154
151
  - test/rbbt/statistics/test_fdr.rb
155
- - test/rbbt/statistics/test_random_walk.rb
152
+ - test/rbbt/statistics/test_hypergeometric.rb
153
+ - test/rbbt/test_ml_task.rb
154
+ - test/rbbt/vector/test_model.rb
155
+ - test/rbbt/vector/model/test_svm.rb
156
+ - test/rbbt/test_stan.rb
157
+ - test/test_helper.rb