rbbt-dm 1.1.28 → 1.1.29
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/matrix.rb +44 -6
- data/lib/rbbt/matrix/barcode.rb +34 -1
- data/lib/rbbt/statistics/fisher.rb +21 -0
- data/share/R/barcode.R +36 -0
- data/test/rbbt/matrix/test_barcode.rb +25 -0
- data/test/rbbt/statistics/test_fisher.rb +11 -0
- data/test/test_helper.rb +16 -3
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 27792718ef802b917873c9eec162237e4d90da2f
|
4
|
+
data.tar.gz: 8fd679e51737d0b95a72b09c2d3802614b9f856b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c351b6b4ffdae4cdeeb135ed34690cd4c81eb5de8c4311c0c82e11d5e6100a0354047b9a5b73897c4fd32d77fef93dac31094008e99747a6bf5fb8dd0e0f751b
|
7
|
+
data.tar.gz: 915b6f886c732373161d1b6bc43b51691e692ca50dafa81307890803e435ba4a8ea8de9a6068d13b35028af173da87dbebbdca02aee5a5ce96eede50ebe7dc3f
|
data/lib/rbbt/matrix.rb
CHANGED
@@ -11,17 +11,17 @@ class Matrix
|
|
11
11
|
end
|
12
12
|
|
13
13
|
attr_accessor :data_file, :labels, :value_type, :format, :organism, :identifiers
|
14
|
-
def initialize(data_file, labels, value_type, format, organism=nil, identifiers=nil)
|
14
|
+
def initialize(data_file, labels = nil, value_type = nil, format = nil, organism=nil, identifiers=nil)
|
15
15
|
@data_file = data_file
|
16
|
-
@labels = labels
|
17
|
-
@value_type = value_type
|
16
|
+
@labels = labels
|
17
|
+
@value_type = value_type || 'count'
|
18
18
|
@format = format
|
19
|
+
_header = nil
|
19
20
|
@format ||= begin
|
20
21
|
_header ||= TSV.parse_header(@data_file)
|
21
22
|
_header.key_field || "ID"
|
22
23
|
end
|
23
24
|
@organism = organism
|
24
|
-
_header = nil
|
25
25
|
@organism ||= begin
|
26
26
|
_header ||= TSV.parse_header(@data_file)
|
27
27
|
_header.namespace || Organism.default_code("Hsa")
|
@@ -150,12 +150,50 @@ class Matrix
|
|
150
150
|
matrix
|
151
151
|
end
|
152
152
|
|
153
|
+
def to_barcode_ruby(factor = 2)
|
154
|
+
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
155
|
+
|
156
|
+
file = Persist.persist(data_file, :tsv, :prefix => "Barcode #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
|
157
|
+
barcode_ruby(filename, factor)
|
158
|
+
end
|
159
|
+
subsets = self.subsets
|
160
|
+
matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
|
161
|
+
matrix.subsets = subsets
|
162
|
+
matrix
|
163
|
+
end
|
164
|
+
|
165
|
+
def to_barcode(factor = 2)
|
166
|
+
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
167
|
+
|
168
|
+
file = Persist.persist(data_file, :tsv, :prefix => "Barcode R #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
|
169
|
+
barcode(filename, factor).to_list
|
170
|
+
end
|
171
|
+
subsets = self.subsets
|
172
|
+
matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
|
173
|
+
matrix.subsets = subsets
|
174
|
+
matrix
|
175
|
+
end
|
176
|
+
|
177
|
+
def to_activity(factor = 2)
|
178
|
+
require 'rbbt/tsv/change_id'
|
179
|
+
|
180
|
+
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
181
|
+
|
182
|
+
file = Persist.persist(data_file, :tsv, :prefix => "Activity #{factor}", :dir => Matrix.matrix_dir.barcode, :no_load => true) do |filename|
|
183
|
+
activity_cluster(filename, factor)
|
184
|
+
end
|
185
|
+
subsets = self.subsets
|
186
|
+
matrix = Matrix.new file, labels, value_type, "Ensembl Gene ID", organism
|
187
|
+
matrix.subsets = subsets
|
188
|
+
matrix
|
189
|
+
end
|
190
|
+
|
153
191
|
def tsv(to_gene=true, identifiers = nil)
|
154
192
|
if to_gene and TSV.parse_header(self.data_file).key_field != "Ensembl Gene ID"
|
155
193
|
file = self.to_gene(identifiers).data_file
|
156
|
-
file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :type => :double, :merge => true
|
194
|
+
file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :type => :double, :merge => true
|
157
195
|
else
|
158
|
-
self.data_file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :
|
196
|
+
self.data_file.tsv :persist => true, :persist_dir => Matrix.matrix_dir.persist, :merge => true
|
159
197
|
end
|
160
198
|
end
|
161
199
|
|
data/lib/rbbt/matrix/barcode.rb
CHANGED
@@ -6,12 +6,45 @@ class Matrix
|
|
6
6
|
FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
|
7
7
|
cmd =<<-EOF
|
8
8
|
source('#{Rbbt.share.R['barcode.R'].find}')
|
9
|
-
rbbt.GE.barcode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
|
9
|
+
rbbt.GE.barcode.mode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
|
10
10
|
EOF
|
11
11
|
|
12
12
|
R.run(cmd)
|
13
13
|
end
|
14
14
|
|
15
|
+
def barcode_ruby(outfile, factor = 2)
|
16
|
+
parser = TSV::Parser.new self.data_file
|
17
|
+
dumper = TSV::Dumper.new parser.options.merge(:type => :list, :cast => :to_i)
|
18
|
+
dumper.init
|
19
|
+
|
20
|
+
TSV.traverse parser, :into => dumper, :bar => "Barcoding #{self.data_file}" do |key,values|
|
21
|
+
clean_values = values.flatten.compact.collect{|v| v.to_f}
|
22
|
+
modes = R.eval("rbbt.get.modes(#{R.ruby2R clean_values})$modes")
|
23
|
+
mode = Array === modes ? modes.first : modes
|
24
|
+
mode_values = clean_values.select{|v| v.to_f <= mode}
|
25
|
+
mode_values.concat mode_values.collect{|v| v+mode}
|
26
|
+
sd = Misc.sd mode_values
|
27
|
+
if sd.nil?
|
28
|
+
[key, [nil] * values.length]
|
29
|
+
else
|
30
|
+
threshold = mode + sd
|
31
|
+
bars = if Array === values.compact.first
|
32
|
+
values.collect do |v|
|
33
|
+
Misc.mean(v.compact.collect{|v| v.to_f}) > threshold ? 1 : 0
|
34
|
+
end
|
35
|
+
else
|
36
|
+
values.collect do |v|
|
37
|
+
v.to_f > threshold ? 1 : 0
|
38
|
+
end
|
39
|
+
end
|
40
|
+
key = key.first if Array === key
|
41
|
+
[key, bars]
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
Misc.sensiblewrite(outfile, dumper.stream)
|
46
|
+
end
|
47
|
+
|
15
48
|
def activity_cluster(outfile, factor = 2)
|
16
49
|
|
17
50
|
FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'rbbt/util/R'
|
2
|
+
require 'rbbt/util/R/eval'
|
3
|
+
|
4
|
+
module Fisher
|
5
|
+
def self.test_classification(classes1, classes2, alternative='greater')
|
6
|
+
matrix = [0,0,0,0]
|
7
|
+
classes1.each_with_index do |c1,i|
|
8
|
+
c2 = classes2[i]
|
9
|
+
if c1 == 1 and c2 == 1
|
10
|
+
matrix[0] += 1
|
11
|
+
elsif c1 == 0 and c2 == 1
|
12
|
+
matrix[1] += 1
|
13
|
+
elsif c1 == 1 and c2 == 0
|
14
|
+
matrix[2] += 1
|
15
|
+
else
|
16
|
+
matrix[3] += 1
|
17
|
+
end
|
18
|
+
end
|
19
|
+
R.eval("fisher.test(matrix(#{R.ruby2R matrix}, nrow=2), alternative = #{R.ruby2R alternative})$p.value")
|
20
|
+
end
|
21
|
+
end
|
data/share/R/barcode.R
CHANGED
@@ -26,6 +26,42 @@ rbbt.GE.barcode <- function(matrix_file, output_file, sd.factor = 2, key.field =
|
|
26
26
|
close(file.barcode)
|
27
27
|
}
|
28
28
|
|
29
|
+
rbbt.GE.barcode.mode <- function(matrix_file, output_file, sd.factor = 2, key.field = "Ensembl Gene ID"){
|
30
|
+
data = rbbt.tsv(matrix_file)
|
31
|
+
data.mean = rowMeans(data, na.rm=T)
|
32
|
+
|
33
|
+
data.mode = apply(data, 1, function(x){ mode = rbbt.get.modes(x)$modes[1]; lower = x[x <= mode]; return(c(lower, mode, lower+mode));})
|
34
|
+
data.empty = sapply(data.mode,function(x){ length(x) < 3})
|
35
|
+
|
36
|
+
data = data[rownames(data)[!data.empty],]
|
37
|
+
data.mode = data.mode[!data.empty]
|
38
|
+
|
39
|
+
data.sd = sapply(data.mode, sd, na.rm=T)
|
40
|
+
data.threshold = as.vector(sapply(data.mode, function(x){return(x[length(x)/2])})) + data.sd
|
41
|
+
names(data.threshold) = rownames(data)
|
42
|
+
|
43
|
+
file.barcode = file(output_file, 'w')
|
44
|
+
|
45
|
+
cat("#: :type=:list#:cast=:to_i\n", file = file.barcode)
|
46
|
+
cat("#", file = file.barcode)
|
47
|
+
cat(key.field, file = file.barcode)
|
48
|
+
cat("\t", file = file.barcode)
|
49
|
+
cat(colnames(data), file = file.barcode, sep="\t")
|
50
|
+
cat("\n", file = file.barcode)
|
51
|
+
|
52
|
+
for (gene in rownames(data)){
|
53
|
+
barcode = (data[gene,] - data.threshold[gene]) > 0
|
54
|
+
|
55
|
+
barcode_value = rep(0, length(data[gene,]))
|
56
|
+
barcode_value[barcode] = 1
|
57
|
+
|
58
|
+
cat(gene, file = file.barcode)
|
59
|
+
cat("\t", file = file.barcode)
|
60
|
+
cat(barcode_value, file = file.barcode, sep = "\t")
|
61
|
+
cat("\n", file = file.barcode)
|
62
|
+
}
|
63
|
+
close(file.barcode)
|
64
|
+
}
|
29
65
|
|
30
66
|
rbbt.GE.activity_cluster <- function(matrix_file, output_file, key.field = "ID"){
|
31
67
|
|
@@ -0,0 +1,25 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/matrix'
|
3
|
+
require 'rbbt/matrix/barcode'
|
4
|
+
|
5
|
+
class TestBarcode < Test::Unit::TestCase
|
6
|
+
def test_R_barcode
|
7
|
+
data = TSV.setup({}, :key_field => "Gene", :fields => %w(S1 S2 S3 S4 S5 S6) , :type => :list)
|
8
|
+
data["G1"] = [1,1,1,4,5,6]
|
9
|
+
data["G2"] = [1,6,1,6,1,6]
|
10
|
+
data["G3"] = [1,1,1,1,6,6]
|
11
|
+
data["G4"] = [6,6,1,1,1,1]
|
12
|
+
|
13
|
+
TmpFile.with_file(data.to_s) do |file|
|
14
|
+
m = Matrix.new file
|
15
|
+
m.barcode(file+'.barcode')
|
16
|
+
tsv = TSV.open(file+'.barcode')
|
17
|
+
assert tsv["G2"] = [0,1,0,1,0,1]
|
18
|
+
|
19
|
+
m.barcode_ruby(file+'.barcode_ruby')
|
20
|
+
tsv = TSV.open(file+'.barcode_ruby')
|
21
|
+
assert tsv["G2"] = [0,1,0,1,0,1]
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/statistics/fisher'
|
3
|
+
|
4
|
+
class TestFisher < Test::Unit::TestCase
|
5
|
+
def test_classification
|
6
|
+
class1 = [0,0,0,0,1,1,1,1,1]
|
7
|
+
class2 = [0,0,0,1,1,1,1,1,0]
|
8
|
+
iii Fisher.test_classification(class1, class2)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
data/test/test_helper.rb
CHANGED
@@ -1,9 +1,22 @@
|
|
1
|
-
|
1
|
+
gem "test-unit", "~> 3.0"
|
2
|
+
gem "minitest", "~> 5.5"
|
3
|
+
|
2
4
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
5
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
4
6
|
|
7
|
+
require 'test/unit'
|
8
|
+
require 'fileutils'
|
9
|
+
|
10
|
+
require 'rbbt'
|
11
|
+
require 'rbbt/resource/path'
|
12
|
+
|
13
|
+
|
5
14
|
class Test::Unit::TestCase
|
6
|
-
def
|
7
|
-
File.join(File.dirname(__FILE__), 'data', file)
|
15
|
+
def self.datafile_test(file)
|
16
|
+
Path.setup(File.join(File.dirname(__FILE__), 'data', file.to_s))
|
17
|
+
end
|
18
|
+
|
19
|
+
def datafile_test(file)
|
20
|
+
Test::Unit::TestCase.datafile_test(file)
|
8
21
|
end
|
9
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.29
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-10-
|
11
|
+
date: 2016-10-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -99,6 +99,7 @@ files:
|
|
99
99
|
- lib/rbbt/plots/bar.rb
|
100
100
|
- lib/rbbt/plots/heatmap.rb
|
101
101
|
- lib/rbbt/statistics/fdr.rb
|
102
|
+
- lib/rbbt/statistics/fisher.rb
|
102
103
|
- lib/rbbt/statistics/hypergeometric.rb
|
103
104
|
- lib/rbbt/statistics/random_walk.rb
|
104
105
|
- lib/rbbt/statistics/rank_product.rb
|
@@ -107,8 +108,10 @@ files:
|
|
107
108
|
- share/R/MA.R
|
108
109
|
- share/R/barcode.R
|
109
110
|
- share/R/heatmap.3.R
|
111
|
+
- test/rbbt/matrix/test_barcode.rb
|
110
112
|
- test/rbbt/network/test_paths.rb
|
111
113
|
- test/rbbt/statistics/test_fdr.rb
|
114
|
+
- test/rbbt/statistics/test_fisher.rb
|
112
115
|
- test/rbbt/statistics/test_hypergeometric.rb
|
113
116
|
- test/rbbt/statistics/test_random_walk.rb
|
114
117
|
- test/rbbt/vector/model/test_svm.rb
|
@@ -142,6 +145,8 @@ test_files:
|
|
142
145
|
- test/rbbt/vector/model/test_svm.rb
|
143
146
|
- test/rbbt/vector/test_model.rb
|
144
147
|
- test/rbbt/network/test_paths.rb
|
148
|
+
- test/rbbt/matrix/test_barcode.rb
|
145
149
|
- test/rbbt/statistics/test_random_walk.rb
|
146
150
|
- test/rbbt/statistics/test_fdr.rb
|
147
151
|
- test/rbbt/statistics/test_hypergeometric.rb
|
152
|
+
- test/rbbt/statistics/test_fisher.rb
|