rbbt-dm 1.1.28 → 1.1.29

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d0744000025bfd9e9a1cefbcf61e54c2f81738d
4
- data.tar.gz: b6169f95244d559e90a444b08b234a5f277c1ca7
3
+ metadata.gz: 27792718ef802b917873c9eec162237e4d90da2f
4
+ data.tar.gz: 8fd679e51737d0b95a72b09c2d3802614b9f856b
5
5
  SHA512:
6
- metadata.gz: b3c06b87ba8b12e9d0451086fe888eaf5df84b5725916c61dd97ae6986c9c6c4150bc2a46004f5b07913f6c43e6bf6bce7119b093eac1f97731a70af8dd749ea
7
- data.tar.gz: f7304dd025fff1f691f4be46bb4e932eb29145f32bae80366813e0d17516de8326cde6c9c24fa3ad58a08c15346dcc62d971df9a5d6ca0d85e0791590883363a
6
+ metadata.gz: c351b6b4ffdae4cdeeb135ed34690cd4c81eb5de8c4311c0c82e11d5e6100a0354047b9a5b73897c4fd32d77fef93dac31094008e99747a6bf5fb8dd0e0f751b
7
+ data.tar.gz: 915b6f886c732373161d1b6bc43b51691e692ca50dafa81307890803e435ba4a8ea8de9a6068d13b35028af173da87dbebbdca02aee5a5ce96eede50ebe7dc3f
data/lib/rbbt/matrix.rb CHANGED
@@ -11,17 +11,17 @@ class Matrix
11
11
  end
12
12
 
13
13
  attr_accessor :data_file, :labels, :value_type, :format, :organism, :identifiers
14
- def initialize(data_file, labels, value_type, format, organism=nil, identifiers=nil)
14
+ def initialize(data_file, labels = nil, value_type = nil, format = nil, organism=nil, identifiers=nil)
15
15
  @data_file = data_file
16
- @labels = labels
17
- @value_type = value_type
16
+ @labels = labels
17
+ @value_type = value_type || 'count'
18
18
  @format = format
19
+ _header = nil
19
20
  @format ||= begin
20
21
  _header ||= TSV.parse_header(@data_file)
21
22
  _header.key_field || "ID"
22
23
  end
23
24
  @organism = organism
24
- _header = nil
25
25
  @organism ||= begin
26
26
  _header ||= TSV.parse_header(@data_file)
27
27
  _header.namespace || Organism.default_code("Hsa")
@@ -150,12 +150,50 @@ class Matrix
150
150
  matrix
151
151
  end
152
152
 
153
+ def to_barcode_ruby(factor = 2)
154
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
155
+
156
+ file = Persist.persist(data_file, :tsv, :prefix => "Barcode #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
157
+ barcode_ruby(filename, factor)
158
+ end
159
+ subsets = self.subsets
160
+ matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
161
+ matrix.subsets = subsets
162
+ matrix
163
+ end
164
+
165
+ def to_barcode(factor = 2)
166
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
167
+
168
+ file = Persist.persist(data_file, :tsv, :prefix => "Barcode R #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
169
+ barcode(filename, factor).to_list
170
+ end
171
+ subsets = self.subsets
172
+ matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
173
+ matrix.subsets = subsets
174
+ matrix
175
+ end
176
+
177
+ def to_activity(factor = 2)
178
+ require 'rbbt/tsv/change_id'
179
+
180
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
181
+
182
+ file = Persist.persist(data_file, :tsv, :prefix => "Activity #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
183
+ activity_cluster(filename, factor)
184
+ end
185
+ subsets = self.subsets
186
+ matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
187
+ matrix.subsets = subsets
188
+ matrix
189
+ end
190
+
153
191
  def tsv(to_gene=true, identifiers = nil)
154
192
  if to_gene and TSV.parse_header(self.data_file).key_field != "Ensembl Gene ID"
155
193
  file = self.to_gene(identifiers).data_file
156
- file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :type => :double, :merge => true, :cast => nil
194
+ file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :type => :double, :merge => true
157
195
  else
158
- self.data_file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :type => :double, :merge => true, :cast => nil
196
+ self.data_file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :merge => true
159
197
  end
160
198
  end
161
199
 
@@ -6,12 +6,45 @@ class Matrix
6
6
  FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
7
7
  cmd =<<-EOF
8
8
  source('#{Rbbt.share.R['barcode.R'].find}')
9
- rbbt.GE.barcode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
9
+ rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
10
10
  EOF
11
11
 
12
12
  R.run(cmd)
13
13
  end
14
14
 
15
+ def barcode_ruby(outfile, factor = 2)
16
+ parser = TSV::Parser.new self.data_file
17
+ dumper = TSV::Dumper.new parser.options.merge(:type => :list, :cast => :to_i)
18
+ dumper.init
19
+
20
+ TSV.traverse parser, :into => dumper, :bar => "Barcoding #{self.data_file}" do |key,values|
21
+ clean_values = values.flatten.compact.collect{|v| v.to_f}
22
+ modes = R.eval("rbbt.get.modes(#{R.ruby2R clean_values})$modes")
23
+ mode = Array === modes ? modes.first : modes
24
+ mode_values = clean_values.select{|v| v.to_f <= mode}
25
+ mode_values.concat mode_values.collect{|v| v+mode}
26
+ sd = Misc.sd mode_values
27
+ if sd.nil?
28
+ [key, [nil] * values.length]
29
+ else
30
+ threshold = mode + sd
31
+ bars = if Array === values.compact.first
32
+ values.collect do |v|
33
+ Misc.mean(v.compact.collect{|v| v.to_f}) > threshold ? 1 : 0
34
+ end
35
+ else
36
+ values.collect do |v|
37
+ v.to_f > threshold ? 1 : 0
38
+ end
39
+ end
40
+ key = key.first if Array === key
41
+ [key, bars]
42
+ end
43
+ end
44
+
45
+ Misc.sensiblewrite(outfile, dumper.stream)
46
+ end
47
+
15
48
  def activity_cluster(outfile, factor = 2)
16
49
 
17
50
  FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
@@ -0,0 +1,21 @@
1
+ require 'rbbt/util/R'
2
+ require 'rbbt/util/R/eval'
3
+
4
+ module Fisher
5
+ def self.test_classification(classes1, classes2, alternative='greater')
6
+ matrix = [0,0,0,0]
7
+ classes1.each_with_index do |c1,i|
8
+ c2 = classes2[i]
9
+ if c1 == 1 and c2 == 1
10
+ matrix[0] += 1
11
+ elsif c1 == 0 and c2 == 1
12
+ matrix[1] += 1
13
+ elsif c1 == 1 and c2 == 0
14
+ matrix[2] += 1
15
+ else
16
+ matrix[3] += 1
17
+ end
18
+ end
19
+ R.eval("fisher.test(matrix(#{R.ruby2R matrix}, nrow=2), alternative = #{R.ruby2R alternative})$p.value")
20
+ end
21
+ end
data/share/R/barcode.R CHANGED
@@ -26,6 +26,42 @@ rbbt.GE.barcode <- function(matrix_file, output_file, sd.factor = 2, key.field =
26
26
  close(file.barcode)
27
27
  }
28
28
 
29
+ rbbt.GE.barcode.mode <- function(matrix_file, output_file, sd.factor = 2, key.field = "Ensembl Gene ID"){
30
+ data = rbbt.tsv(matrix_file)
31
+ data.mean = rowMeans(data, na.rm=T)
32
+
33
+ data.mode = apply(data, 1, function(x){ mode = rbbt.get.modes(x)$modes[1]; lower = x[x <= mode]; return(c(lower, mode, lower+mode));})
34
+ data.empty = sapply(data.mode,function(x){ length(x) < 3})
35
+
36
+ data = data[rownames(data)[!data.empty],]
37
+ data.mode = data.mode[!data.empty]
38
+
39
+ data.sd = sapply(data.mode, sd, na.rm=T)
40
+ data.threshold = as.vector(sapply(data.mode, function(x){return(x[length(x)/2])})) + data.sd
41
+ names(data.threshold) = rownames(data)
42
+
43
+ file.barcode = file(output_file, 'w')
44
+
45
+ cat("#: :type=:list#:cast=:to_i\n", file = file.barcode)
46
+ cat("#", file = file.barcode)
47
+ cat(key.field, file = file.barcode)
48
+ cat("\t", file = file.barcode)
49
+ cat(colnames(data), file = file.barcode, sep="\t")
50
+ cat("\n", file = file.barcode)
51
+
52
+ for (gene in rownames(data)){
53
+ barcode = (data[gene,] - data.threshold[gene]) > 0
54
+
55
+ barcode_value = rep(0, length(data[gene,]))
56
+ barcode_value[barcode] = 1
57
+
58
+ cat(gene, file = file.barcode)
59
+ cat("\t", file = file.barcode)
60
+ cat(barcode_value, file = file.barcode, sep = "\t")
61
+ cat("\n", file = file.barcode)
62
+ }
63
+ close(file.barcode)
64
+ }
29
65
 
30
66
  rbbt.GE.activity_cluster <- function(matrix_file, output_file, key.field = "ID"){
31
67
 
@@ -0,0 +1,25 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/matrix'
3
+ require 'rbbt/matrix/barcode'
4
+
5
+ class TestBarcode < Test::Unit::TestCase
6
+ def test_R_barcode
7
+ data = TSV.setup({}, :key_field => "Gene", :fields => %w(S1 S2 S3 S4 S5 S6) , :type => :list)
8
+ data["G1"] = [1,1,1,4,5,6]
9
+ data["G2"] = [1,6,1,6,1,6]
10
+ data["G3"] = [1,1,1,1,6,6]
11
+ data["G4"] = [6,6,1,1,1,1]
12
+
13
+ TmpFile.with_file(data.to_s) do |file|
14
+ m = Matrix.new file
15
+ m.barcode(file+'.barcode')
16
+ tsv = TSV.open(file+'.barcode')
17
+ assert tsv["G2"] = [0,1,0,1,0,1]
18
+
19
+ m.barcode_ruby(file+'.barcode_ruby')
20
+ tsv = TSV.open(file+'.barcode_ruby')
21
+ assert tsv["G2"] = [0,1,0,1,0,1]
22
+ end
23
+ end
24
+ end
25
+
@@ -0,0 +1,11 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/statistics/fisher'
3
+
4
+ class TestFisher < Test::Unit::TestCase
5
+ def test_classification
6
+ class1 = [0,0,0,0,1,1,1,1,1]
7
+ class2 = [0,0,0,1,1,1,1,1,0]
8
+ iii Fisher.test_classification(class1, class2)
9
+ end
10
+ end
11
+
data/test/test_helper.rb CHANGED
@@ -1,9 +1,22 @@
1
- require 'test/unit'
1
+ gem "test-unit", "~> 3.0"
2
+ gem "minitest", "~> 5.5"
3
+
2
4
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
5
  $LOAD_PATH.unshift(File.dirname(__FILE__))
4
6
 
7
+ require 'test/unit'
8
+ require 'fileutils'
9
+
10
+ require 'rbbt'
11
+ require 'rbbt/resource/path'
12
+
13
+
5
14
  class Test::Unit::TestCase
6
- def get_test_datafile(file)
7
- File.join(File.dirname(__FILE__), 'data', file)
15
+ def self.datafile_test(file)
16
+ Path.setup(File.join(File.dirname(__FILE__), 'data', file.to_s))
17
+ end
18
+
19
+ def datafile_test(file)
20
+ Test::Unit::TestCase.datafile_test(file)
8
21
  end
9
22
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.28
4
+ version: 1.1.29
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-03 00:00:00.000000000 Z
11
+ date: 2016-10-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -99,6 +99,7 @@ files:
99
99
  - lib/rbbt/plots/bar.rb
100
100
  - lib/rbbt/plots/heatmap.rb
101
101
  - lib/rbbt/statistics/fdr.rb
102
+ - lib/rbbt/statistics/fisher.rb
102
103
  - lib/rbbt/statistics/hypergeometric.rb
103
104
  - lib/rbbt/statistics/random_walk.rb
104
105
  - lib/rbbt/statistics/rank_product.rb
@@ -107,8 +108,10 @@ files:
107
108
  - share/R/MA.R
108
109
  - share/R/barcode.R
109
110
  - share/R/heatmap.3.R
111
+ - test/rbbt/matrix/test_barcode.rb
110
112
  - test/rbbt/network/test_paths.rb
111
113
  - test/rbbt/statistics/test_fdr.rb
114
+ - test/rbbt/statistics/test_fisher.rb
112
115
  - test/rbbt/statistics/test_hypergeometric.rb
113
116
  - test/rbbt/statistics/test_random_walk.rb
114
117
  - test/rbbt/vector/model/test_svm.rb
@@ -142,6 +145,8 @@ test_files:
142
145
  - test/rbbt/vector/model/test_svm.rb
143
146
  - test/rbbt/vector/test_model.rb
144
147
  - test/rbbt/network/test_paths.rb
148
+ - test/rbbt/matrix/test_barcode.rb
145
149
  - test/rbbt/statistics/test_random_walk.rb
146
150
  - test/rbbt/statistics/test_fdr.rb
147
151
  - test/rbbt/statistics/test_hypergeometric.rb
152
+ - test/rbbt/statistics/test_fisher.rb