rbbt-dm 1.1.28 → 1.1.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/matrix.rb +44 -6
- data/lib/rbbt/matrix/barcode.rb +34 -1
- data/lib/rbbt/statistics/fisher.rb +21 -0
- data/share/R/barcode.R +36 -0
- data/test/rbbt/matrix/test_barcode.rb +25 -0
- data/test/rbbt/statistics/test_fisher.rb +11 -0
- data/test/test_helper.rb +16 -3
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 27792718ef802b917873c9eec162237e4d90da2f
|
4
|
+
data.tar.gz: 8fd679e51737d0b95a72b09c2d3802614b9f856b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c351b6b4ffdae4cdeeb135ed34690cd4c81eb5de8c4311c0c82e11d5e6100a0354047b9a5b73897c4fd32d77fef93dac31094008e99747a6bf5fb8dd0e0f751b
|
7
|
+
data.tar.gz: 915b6f886c732373161d1b6bc43b51691e692ca50dafa81307890803e435ba4a8ea8de9a6068d13b35028af173da87dbebbdca02aee5a5ce96eede50ebe7dc3f
|
data/lib/rbbt/matrix.rb
CHANGED
@@ -11,17 +11,17 @@ class Matrix
|
|
11
11
|
end
|
12
12
|
|
13
13
|
attr_accessor :data_file, :labels, :value_type, :format, :organism, :identifiers
|
14
|
-
def initialize(data_file, labels, value_type, format, organism=nil, identifiers=nil)
|
14
|
+
def initialize(data_file, labels = nil, value_type = nil, format = nil, organism=nil, identifiers=nil)
|
15
15
|
@data_file = data_file
|
16
|
-
@labels = labels
|
17
|
-
@value_type = value_type
|
16
|
+
@labels = labels
|
17
|
+
@value_type = value_type || 'count'
|
18
18
|
@format = format
|
19
|
+
_header = nil
|
19
20
|
@format ||= begin
|
20
21
|
_header ||= TSV.parse_header(@data_file)
|
21
22
|
_header.key_field || "ID"
|
22
23
|
end
|
23
24
|
@organism = organism
|
24
|
-
_header = nil
|
25
25
|
@organism ||= begin
|
26
26
|
_header ||= TSV.parse_header(@data_file)
|
27
27
|
_header.namespace || Organism.default_code("Hsa")
|
@@ -150,12 +150,50 @@ class Matrix
|
|
150
150
|
matrix
|
151
151
|
end
|
152
152
|
|
153
|
+
def to_barcode_ruby(factor = 2)
|
154
|
+
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
155
|
+
|
156
|
+
file = Persist.persist(data_file, :tsv, :prefix => "Barcode #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
|
157
|
+
barcode_ruby(filename, factor)
|
158
|
+
end
|
159
|
+
subsets = self.subsets
|
160
|
+
matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
|
161
|
+
matrix.subsets = subsets
|
162
|
+
matrix
|
163
|
+
end
|
164
|
+
|
165
|
+
def to_barcode(factor = 2)
|
166
|
+
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
167
|
+
|
168
|
+
file = Persist.persist(data_file, :tsv, :prefix => "Barcode R #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
|
169
|
+
barcode(filename, factor).to_list
|
170
|
+
end
|
171
|
+
subsets = self.subsets
|
172
|
+
matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
|
173
|
+
matrix.subsets = subsets
|
174
|
+
matrix
|
175
|
+
end
|
176
|
+
|
177
|
+
def to_activity(factor = 2)
|
178
|
+
require 'rbbt/tsv/change_id'
|
179
|
+
|
180
|
+
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
181
|
+
|
182
|
+
file = Persist.persist(data_file, :tsv, :prefix => "Activity #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
|
183
|
+
activity_cluster(filename, factor)
|
184
|
+
end
|
185
|
+
subsets = self.subsets
|
186
|
+
matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
|
187
|
+
matrix.subsets = subsets
|
188
|
+
matrix
|
189
|
+
end
|
190
|
+
|
153
191
|
def tsv(to_gene=true, identifiers = nil)
|
154
192
|
if to_gene and TSV.parse_header(self.data_file).key_field != "Ensembl Gene ID"
|
155
193
|
file = self.to_gene(identifiers).data_file
|
156
|
-
file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :type => :double, :merge => true
|
194
|
+
file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :type => :double, :merge => true
|
157
195
|
else
|
158
|
-
self.data_file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :
|
196
|
+
self.data_file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :merge => true
|
159
197
|
end
|
160
198
|
end
|
161
199
|
|
data/lib/rbbt/matrix/barcode.rb
CHANGED
@@ -6,12 +6,45 @@ class Matrix
|
|
6
6
|
FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
|
7
7
|
cmd =<<-EOF
|
8
8
|
source('#{Rbbt.share.R['barcode.R'].find}')
|
9
|
-
rbbt.GE.barcode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
|
9
|
+
rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
|
10
10
|
EOF
|
11
11
|
|
12
12
|
R.run(cmd)
|
13
13
|
end
|
14
14
|
|
15
|
+
def barcode_ruby(outfile, factor = 2)
|
16
|
+
parser = TSV::Parser.new self.data_file
|
17
|
+
dumper = TSV::Dumper.new parser.options.merge(:type => :list, :cast => :to_i)
|
18
|
+
dumper.init
|
19
|
+
|
20
|
+
TSV.traverse parser, :into => dumper, :bar => "Barcoding #{self.data_file}" do |key,values|
|
21
|
+
clean_values = values.flatten.compact.collect{|v| v.to_f}
|
22
|
+
modes = R.eval("rbbt.get.modes(#{R.ruby2R clean_values})$modes")
|
23
|
+
mode = Array === modes ? modes.first : modes
|
24
|
+
mode_values = clean_values.select{|v| v.to_f <= mode}
|
25
|
+
mode_values.concat mode_values.collect{|v| v+mode}
|
26
|
+
sd = Misc.sd mode_values
|
27
|
+
if sd.nil?
|
28
|
+
[key, [nil] * values.length]
|
29
|
+
else
|
30
|
+
threshold = mode + sd
|
31
|
+
bars = if Array === values.compact.first
|
32
|
+
values.collect do |v|
|
33
|
+
Misc.mean(v.compact.collect{|v| v.to_f}) > threshold ? 1 : 0
|
34
|
+
end
|
35
|
+
else
|
36
|
+
values.collect do |v|
|
37
|
+
v.to_f > threshold ? 1 : 0
|
38
|
+
end
|
39
|
+
end
|
40
|
+
key = key.first if Array === key
|
41
|
+
[key, bars]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
Misc.sensiblewrite(outfile, dumper.stream)
|
46
|
+
end
|
47
|
+
|
15
48
|
def activity_cluster(outfile, factor = 2)
|
16
49
|
|
17
50
|
FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rbbt/util/R'
|
2
|
+
require 'rbbt/util/R/eval'
|
3
|
+
|
4
|
+
module Fisher
|
5
|
+
def self.test_classification(classes1, classes2, alternative='greater')
|
6
|
+
matrix = [0,0,0,0]
|
7
|
+
classes1.each_with_index do |c1,i|
|
8
|
+
c2 = classes2[i]
|
9
|
+
if c1 == 1 and c2 == 1
|
10
|
+
matrix[0] += 1
|
11
|
+
elsif c1 == 0 and c2 == 1
|
12
|
+
matrix[1] += 1
|
13
|
+
elsif c1 == 1 and c2 == 0
|
14
|
+
matrix[2] += 1
|
15
|
+
else
|
16
|
+
matrix[3] += 1
|
17
|
+
end
|
18
|
+
end
|
19
|
+
R.eval("fisher.test(matrix(#{R.ruby2R matrix}, nrow=2), alternative = #{R.ruby2R alternative})$p.value")
|
20
|
+
end
|
21
|
+
end
|
data/share/R/barcode.R
CHANGED
@@ -26,6 +26,42 @@ rbbt.GE.barcode <- function(matrix_file, output_file, sd.factor = 2, key.field =
|
|
26
26
|
close(file.barcode)
|
27
27
|
}
|
28
28
|
|
29
|
+
rbbt.GE.barcode.mode <- function(matrix_file, output_file, sd.factor = 2, key.field = "Ensembl Gene ID"){
|
30
|
+
data = rbbt.tsv(matrix_file)
|
31
|
+
data.mean = rowMeans(data, na.rm=T)
|
32
|
+
|
33
|
+
data.mode = apply(data, 1, function(x){ mode = rbbt.get.modes(x)$modes[1]; lower = x[x <= mode]; return(c(lower, mode, lower+mode));})
|
34
|
+
data.empty = sapply(data.mode,function(x){ length(x) < 3})
|
35
|
+
|
36
|
+
data = data[rownames(data)[!data.empty],]
|
37
|
+
data.mode = data.mode[!data.empty]
|
38
|
+
|
39
|
+
data.sd = sapply(data.mode, sd, na.rm=T)
|
40
|
+
data.threshold = as.vector(sapply(data.mode, function(x){return(x[length(x)/2])})) + data.sd
|
41
|
+
names(data.threshold) = rownames(data)
|
42
|
+
|
43
|
+
file.barcode = file(output_file, 'w')
|
44
|
+
|
45
|
+
cat("#: :type=:list#:cast=:to_i\n", file = file.barcode)
|
46
|
+
cat("#", file = file.barcode)
|
47
|
+
cat(key.field, file = file.barcode)
|
48
|
+
cat("\t", file = file.barcode)
|
49
|
+
cat(colnames(data), file = file.barcode, sep="\t")
|
50
|
+
cat("\n", file = file.barcode)
|
51
|
+
|
52
|
+
for (gene in rownames(data)){
|
53
|
+
barcode = (data[gene,] - data.threshold[gene]) > 0
|
54
|
+
|
55
|
+
barcode_value = rep(0, length(data[gene,]))
|
56
|
+
barcode_value[barcode] = 1
|
57
|
+
|
58
|
+
cat(gene, file = file.barcode)
|
59
|
+
cat("\t", file = file.barcode)
|
60
|
+
cat(barcode_value, file = file.barcode, sep = "\t")
|
61
|
+
cat("\n", file = file.barcode)
|
62
|
+
}
|
63
|
+
close(file.barcode)
|
64
|
+
}
|
29
65
|
|
30
66
|
rbbt.GE.activity_cluster <- function(matrix_file, output_file, key.field = "ID"){
|
31
67
|
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/matrix'
|
3
|
+
require 'rbbt/matrix/barcode'
|
4
|
+
|
5
|
+
class TestBarcode < Test::Unit::TestCase
|
6
|
+
def test_R_barcode
|
7
|
+
data = TSV.setup({}, :key_field => "Gene", :fields => %w(S1 S2 S3 S4 S5 S6) , :type => :list)
|
8
|
+
data["G1"] = [1,1,1,4,5,6]
|
9
|
+
data["G2"] = [1,6,1,6,1,6]
|
10
|
+
data["G3"] = [1,1,1,1,6,6]
|
11
|
+
data["G4"] = [6,6,1,1,1,1]
|
12
|
+
|
13
|
+
TmpFile.with_file(data.to_s) do |file|
|
14
|
+
m = Matrix.new file
|
15
|
+
m.barcode(file+'.barcode')
|
16
|
+
tsv = TSV.open(file+'.barcode')
|
17
|
+
assert tsv["G2"] = [0,1,0,1,0,1]
|
18
|
+
|
19
|
+
m.barcode_ruby(file+'.barcode_ruby')
|
20
|
+
tsv = TSV.open(file+'.barcode_ruby')
|
21
|
+
assert tsv["G2"] = [0,1,0,1,0,1]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/statistics/fisher'
|
3
|
+
|
4
|
+
class TestFisher < Test::Unit::TestCase
|
5
|
+
def test_classification
|
6
|
+
class1 = [0,0,0,0,1,1,1,1,1]
|
7
|
+
class2 = [0,0,0,1,1,1,1,1,0]
|
8
|
+
iii Fisher.test_classification(class1, class2)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
data/test/test_helper.rb
CHANGED
@@ -1,9 +1,22 @@
|
|
1
|
-
|
1
|
+
gem "test-unit", "~> 3.0"
|
2
|
+
gem "minitest", "~> 5.5"
|
3
|
+
|
2
4
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
5
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
4
6
|
|
7
|
+
require 'test/unit'
|
8
|
+
require 'fileutils'
|
9
|
+
|
10
|
+
require 'rbbt'
|
11
|
+
require 'rbbt/resource/path'
|
12
|
+
|
13
|
+
|
5
14
|
class Test::Unit::TestCase
|
6
|
-
def
|
7
|
-
File.join(File.dirname(__FILE__), 'data', file)
|
15
|
+
def self.datafile_test(file)
|
16
|
+
Path.setup(File.join(File.dirname(__FILE__), 'data', file.to_s))
|
17
|
+
end
|
18
|
+
|
19
|
+
def datafile_test(file)
|
20
|
+
Test::Unit::TestCase.datafile_test(file)
|
8
21
|
end
|
9
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.29
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -99,6 +99,7 @@ files:
|
|
99
99
|
- lib/rbbt/plots/bar.rb
|
100
100
|
- lib/rbbt/plots/heatmap.rb
|
101
101
|
- lib/rbbt/statistics/fdr.rb
|
102
|
+
- lib/rbbt/statistics/fisher.rb
|
102
103
|
- lib/rbbt/statistics/hypergeometric.rb
|
103
104
|
- lib/rbbt/statistics/random_walk.rb
|
104
105
|
- lib/rbbt/statistics/rank_product.rb
|
@@ -107,8 +108,10 @@ files:
|
|
107
108
|
- share/R/MA.R
|
108
109
|
- share/R/barcode.R
|
109
110
|
- share/R/heatmap.3.R
|
111
|
+
- test/rbbt/matrix/test_barcode.rb
|
110
112
|
- test/rbbt/network/test_paths.rb
|
111
113
|
- test/rbbt/statistics/test_fdr.rb
|
114
|
+
- test/rbbt/statistics/test_fisher.rb
|
112
115
|
- test/rbbt/statistics/test_hypergeometric.rb
|
113
116
|
- test/rbbt/statistics/test_random_walk.rb
|
114
117
|
- test/rbbt/vector/model/test_svm.rb
|
@@ -142,6 +145,8 @@ test_files:
|
|
142
145
|
- test/rbbt/vector/model/test_svm.rb
|
143
146
|
- test/rbbt/vector/test_model.rb
|
144
147
|
- test/rbbt/network/test_paths.rb
|
148
|
+
- test/rbbt/matrix/test_barcode.rb
|
145
149
|
- test/rbbt/statistics/test_random_walk.rb
|
146
150
|
- test/rbbt/statistics/test_fdr.rb
|
147
151
|
- test/rbbt/statistics/test_hypergeometric.rb
|
152
|
+
- test/rbbt/statistics/test_fisher.rb
|