rbbt-dm 1.1.28 → 1.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d0744000025bfd9e9a1cefbcf61e54c2f81738d
4
- data.tar.gz: b6169f95244d559e90a444b08b234a5f277c1ca7
3
+ metadata.gz: 27792718ef802b917873c9eec162237e4d90da2f
4
+ data.tar.gz: 8fd679e51737d0b95a72b09c2d3802614b9f856b
5
5
  SHA512:
6
- metadata.gz: b3c06b87ba8b12e9d0451086fe888eaf5df84b5725916c61dd97ae6986c9c6c4150bc2a46004f5b07913f6c43e6bf6bce7119b093eac1f97731a70af8dd749ea
7
- data.tar.gz: f7304dd025fff1f691f4be46bb4e932eb29145f32bae80366813e0d17516de8326cde6c9c24fa3ad58a08c15346dcc62d971df9a5d6ca0d85e0791590883363a
6
+ metadata.gz: c351b6b4ffdae4cdeeb135ed34690cd4c81eb5de8c4311c0c82e11d5e6100a0354047b9a5b73897c4fd32d77fef93dac31094008e99747a6bf5fb8dd0e0f751b
7
+ data.tar.gz: 915b6f886c732373161d1b6bc43b51691e692ca50dafa81307890803e435ba4a8ea8de9a6068d13b35028af173da87dbebbdca02aee5a5ce96eede50ebe7dc3f
data/lib/rbbt/matrix.rb CHANGED
@@ -11,17 +11,17 @@ class Matrix
11
11
  end
12
12
 
13
13
  attr_accessor :data_file, :labels, :value_type, :format, :organism, :identifiers
14
- def initialize(data_file, labels, value_type, format, organism=nil, identifiers=nil)
14
+ def initialize(data_file, labels = nil, value_type = nil, format = nil, organism=nil, identifiers=nil)
15
15
  @data_file = data_file
16
- @labels = labels
17
- @value_type = value_type
16
+ @labels = labels
17
+ @value_type = value_type || 'count'
18
18
  @format = format
19
+ _header = nil
19
20
  @format ||= begin
20
21
  _header ||= TSV.parse_header(@data_file)
21
22
  _header.key_field || "ID"
22
23
  end
23
24
  @organism = organism
24
- _header = nil
25
25
  @organism ||= begin
26
26
  _header ||= TSV.parse_header(@data_file)
27
27
  _header.namespace || Organism.default_code("Hsa")
@@ -150,12 +150,50 @@ class Matrix
150
150
  matrix
151
151
  end
152
152
 
153
+ def to_barcode_ruby(factor = 2)
154
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
155
+
156
+ file = Persist.persist(data_file, :tsv, :prefix => "Barcode #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
157
+ barcode_ruby(filename, factor)
158
+ end
159
+ subsets = self.subsets
160
+ matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
161
+ matrix.subsets = subsets
162
+ matrix
163
+ end
164
+
165
+ def to_barcode(factor = 2)
166
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
167
+
168
+ file = Persist.persist(data_file, :tsv, :prefix => "Barcode R #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
169
+ barcode(filename, factor).to_list
170
+ end
171
+ subsets = self.subsets
172
+ matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
173
+ matrix.subsets = subsets
174
+ matrix
175
+ end
176
+
177
+ def to_activity(factor = 2)
178
+ require 'rbbt/tsv/change_id'
179
+
180
+ name = data_file =~ /:>/ ? File.basename(data_file) : data_file
181
+
182
+ file = Persist.persist(data_file, :tsv, :prefix => "Activity #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
183
+ activity_cluster(filename, factor)
184
+ end
185
+ subsets = self.subsets
186
+ matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
187
+ matrix.subsets = subsets
188
+ matrix
189
+ end
190
+
153
191
  def tsv(to_gene=true, identifiers = nil)
154
192
  if to_gene and TSV.parse_header(self.data_file).key_field != "Ensembl Gene ID"
155
193
  file = self.to_gene(identifiers).data_file
156
- file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :type => :double, :merge => true, :cast => nil
194
+ file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :type => :double, :merge => true
157
195
  else
158
- self.data_file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :type => :double, :merge => true, :cast => nil
196
+ self.data_file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :merge => true
159
197
  end
160
198
  end
161
199
 
@@ -6,12 +6,45 @@ class Matrix
6
6
  FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
7
7
  cmd =<<-EOF
8
8
  source('#{Rbbt.share.R['barcode.R'].find}')
9
- rbbt.GE.barcode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
9
+ rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
10
10
  EOF
11
11
 
12
12
  R.run(cmd)
13
13
  end
14
14
 
15
+ def barcode_ruby(outfile, factor = 2)
16
+ parser = TSV::Parser.new self.data_file
17
+ dumper = TSV::Dumper.new parser.options.merge(:type => :list, :cast => :to_i)
18
+ dumper.init
19
+
20
+ TSV.traverse parser, :into => dumper, :bar => "Barcoding #{self.data_file}" do |key,values|
21
+ clean_values = values.flatten.compact.collect{|v| v.to_f}
22
+ modes = R.eval("rbbt.get.modes(#{R.ruby2R clean_values})$modes")
23
+ mode = Array === modes ? modes.first : modes
24
+ mode_values = clean_values.select{|v| v.to_f <= mode}
25
+ mode_values.concat mode_values.collect{|v| v+mode}
26
+ sd = Misc.sd mode_values
27
+ if sd.nil?
28
+ [key, [nil] * values.length]
29
+ else
30
+ threshold = mode + sd
31
+ bars = if Array === values.compact.first
32
+ values.collect do |v|
33
+ Misc.mean(v.compact.collect{|v| v.to_f}) > threshold ? 1 : 0
34
+ end
35
+ else
36
+ values.collect do |v|
37
+ v.to_f > threshold ? 1 : 0
38
+ end
39
+ end
40
+ key = key.first if Array === key
41
+ [key, bars]
42
+ end
43
+ end
44
+
45
+ Misc.sensiblewrite(outfile, dumper.stream)
46
+ end
47
+
15
48
  def activity_cluster(outfile, factor = 2)
16
49
 
17
50
  FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
@@ -0,0 +1,21 @@
1
+ require 'rbbt/util/R'
2
+ require 'rbbt/util/R/eval'
3
+
4
+ module Fisher
5
+ def self.test_classification(classes1, classes2, alternative='greater')
6
+ matrix = [0,0,0,0]
7
+ classes1.each_with_index do |c1,i|
8
+ c2 = classes2[i]
9
+ if c1 == 1 and c2 == 1
10
+ matrix[0] += 1
11
+ elsif c1 == 0 and c2 == 1
12
+ matrix[1] += 1
13
+ elsif c1 == 1 and c2 == 0
14
+ matrix[2] += 1
15
+ else
16
+ matrix[3] += 1
17
+ end
18
+ end
19
+ R.eval("fisher.test(matrix(#{R.ruby2R matrix}, nrow=2), alternative = #{R.ruby2R alternative})$p.value")
20
+ end
21
+ end
data/share/R/barcode.R CHANGED
@@ -26,6 +26,42 @@ rbbt.GE.barcode <- function(matrix_file, output_file, sd.factor = 2, key.field =
26
26
  close(file.barcode)
27
27
  }
28
28
 
29
+ rbbt.GE.barcode.mode <- function(matrix_file, output_file, sd.factor = 2, key.field = "Ensembl Gene ID"){
30
+ data = rbbt.tsv(matrix_file)
31
+ data.mean = rowMeans(data, na.rm=T)
32
+
33
+ data.mode = apply(data, 1, function(x){ mode = rbbt.get.modes(x)$modes[1]; lower = x[x <= mode]; return(c(lower, mode, lower+mode));})
34
+ data.empty = sapply(data.mode,function(x){ length(x) < 3})
35
+
36
+ data = data[rownames(data)[!data.empty],]
37
+ data.mode = data.mode[!data.empty]
38
+
39
+ data.sd = sapply(data.mode, sd, na.rm=T)
40
+ data.threshold = as.vector(sapply(data.mode, function(x){return(x[length(x)/2])})) + data.sd
41
+ names(data.threshold) = rownames(data)
42
+
43
+ file.barcode = file(output_file, 'w')
44
+
45
+ cat("#: :type=:list#:cast=:to_i\n", file = file.barcode)
46
+ cat("#", file = file.barcode)
47
+ cat(key.field, file = file.barcode)
48
+ cat("\t", file = file.barcode)
49
+ cat(colnames(data), file = file.barcode, sep="\t")
50
+ cat("\n", file = file.barcode)
51
+
52
+ for (gene in rownames(data)){
53
+ barcode = (data[gene,] - data.threshold[gene]) > 0
54
+
55
+ barcode_value = rep(0, length(data[gene,]))
56
+ barcode_value[barcode] = 1
57
+
58
+ cat(gene, file = file.barcode)
59
+ cat("\t", file = file.barcode)
60
+ cat(barcode_value, file = file.barcode, sep = "\t")
61
+ cat("\n", file = file.barcode)
62
+ }
63
+ close(file.barcode)
64
+ }
29
65
 
30
66
  rbbt.GE.activity_cluster <- function(matrix_file, output_file, key.field = "ID"){
31
67
 
@@ -0,0 +1,25 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/matrix'
3
+ require 'rbbt/matrix/barcode'
4
+
5
+ class TestBarcode < Test::Unit::TestCase
6
+ def test_R_barcode
7
+ data = TSV.setup({}, :key_field => "Gene", :fields => %w(S1 S2 S3 S4 S5 S6) , :type => :list)
8
+ data["G1"] = [1,1,1,4,5,6]
9
+ data["G2"] = [1,6,1,6,1,6]
10
+ data["G3"] = [1,1,1,1,6,6]
11
+ data["G4"] = [6,6,1,1,1,1]
12
+
13
+ TmpFile.with_file(data.to_s) do |file|
14
+ m = Matrix.new file
15
+ m.barcode(file+'.barcode')
16
+ tsv = TSV.open(file+'.barcode')
17
+ assert tsv["G2"] = [0,1,0,1,0,1]
18
+
19
+ m.barcode_ruby(file+'.barcode_ruby')
20
+ tsv = TSV.open(file+'.barcode_ruby')
21
+ assert tsv["G2"] = [0,1,0,1,0,1]
22
+ end
23
+ end
24
+ end
25
+
@@ -0,0 +1,11 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
2
+ require 'rbbt/statistics/fisher'
3
+
4
+ class TestFisher < Test::Unit::TestCase
5
+ def test_classification
6
+ class1 = [0,0,0,0,1,1,1,1,1]
7
+ class2 = [0,0,0,1,1,1,1,1,0]
8
+ iii Fisher.test_classification(class1, class2)
9
+ end
10
+ end
11
+
data/test/test_helper.rb CHANGED
@@ -1,9 +1,22 @@
1
- require 'test/unit'
1
+ gem "test-unit", "~> 3.0"
2
+ gem "minitest", "~> 5.5"
3
+
2
4
  $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
5
  $LOAD_PATH.unshift(File.dirname(__FILE__))
4
6
 
7
+ require 'test/unit'
8
+ require 'fileutils'
9
+
10
+ require 'rbbt'
11
+ require 'rbbt/resource/path'
12
+
13
+
5
14
  class Test::Unit::TestCase
6
- def get_test_datafile(file)
7
- File.join(File.dirname(__FILE__), 'data', file)
15
+ def self.datafile_test(file)
16
+ Path.setup(File.join(File.dirname(__FILE__), 'data', file.to_s))
17
+ end
18
+
19
+ def datafile_test(file)
20
+ Test::Unit::TestCase.datafile_test(file)
8
21
  end
9
22
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.28
4
+ version: 1.1.29
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-10-03 00:00:00.000000000 Z
11
+ date: 2016-10-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -99,6 +99,7 @@ files:
99
99
  - lib/rbbt/plots/bar.rb
100
100
  - lib/rbbt/plots/heatmap.rb
101
101
  - lib/rbbt/statistics/fdr.rb
102
+ - lib/rbbt/statistics/fisher.rb
102
103
  - lib/rbbt/statistics/hypergeometric.rb
103
104
  - lib/rbbt/statistics/random_walk.rb
104
105
  - lib/rbbt/statistics/rank_product.rb
@@ -107,8 +108,10 @@ files:
107
108
  - share/R/MA.R
108
109
  - share/R/barcode.R
109
110
  - share/R/heatmap.3.R
111
+ - test/rbbt/matrix/test_barcode.rb
110
112
  - test/rbbt/network/test_paths.rb
111
113
  - test/rbbt/statistics/test_fdr.rb
114
+ - test/rbbt/statistics/test_fisher.rb
112
115
  - test/rbbt/statistics/test_hypergeometric.rb
113
116
  - test/rbbt/statistics/test_random_walk.rb
114
117
  - test/rbbt/vector/model/test_svm.rb
@@ -142,6 +145,8 @@ test_files:
142
145
  - test/rbbt/vector/model/test_svm.rb
143
146
  - test/rbbt/vector/test_model.rb
144
147
  - test/rbbt/network/test_paths.rb
148
+ - test/rbbt/matrix/test_barcode.rb
145
149
  - test/rbbt/statistics/test_random_walk.rb
146
150
  - test/rbbt/statistics/test_fdr.rb
147
151
  - test/rbbt/statistics/test_hypergeometric.rb
152
+ - test/rbbt/statistics/test_fisher.rb