rbbt-dm 1.1.8 → 1.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/matrix/barcode.rb +14 -1
- data/lib/rbbt/matrix/differential.rb +24 -8
- data/lib/rbbt/matrix/knowledge_base.rb +92 -0
- data/lib/rbbt/matrix.rb +46 -10
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4d951189411296e243b213618e445291596cb53
|
4
|
+
data.tar.gz: d7afae72bcaa8b8ee82aaac16d6e83d742386772
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0354f0e2134e14e836e93648a7ac64a32f79e652198bbaa50458cd780ca4cea3b256d2fd57d486d6a5fc5c90299097afb245f6eb289c1cf3f104b62a57f43f1f
|
7
|
+
data.tar.gz: d356f93d961b8681b03ea9af236d8c6c2a57da222004ec05e8647d49f7e548f26ea1f895446f997df79b9306a986b82a50fabefd4e3f0df4aef8cc837c2fd42a
|
data/lib/rbbt/matrix/barcode.rb
CHANGED
@@ -9,6 +9,19 @@ source('#{Rbbt.share.R['barcode.R'].find}')
|
|
9
9
|
rbbt.GE.barcode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
|
10
10
|
EOF
|
11
11
|
|
12
|
-
R.run(cmd
|
12
|
+
R.run(cmd)
|
13
13
|
end
|
14
|
+
|
15
|
+
def activity_cluster(outfile, factor = 2)
|
16
|
+
|
17
|
+
FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
|
18
|
+
cmd =<<-EOF
|
19
|
+
source('#{Rbbt.share.R['barcode.R'].find}')
|
20
|
+
rbbt.GE.activity_cluster(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{R.ruby2R value_type})
|
21
|
+
EOF
|
22
|
+
|
23
|
+
R.run(cmd)
|
24
|
+
end
|
25
|
+
|
26
|
+
|
14
27
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'rbbt/util/R'
|
2
|
+
|
1
3
|
class Matrix
|
2
4
|
def differential(main, contrast, path = nil)
|
3
5
|
if Array === main and Array === contrast
|
@@ -7,17 +9,31 @@ class Matrix
|
|
7
9
|
end
|
8
10
|
|
9
11
|
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
10
|
-
Persist.persist(name, :tsv, :
|
12
|
+
Persist.persist(name, :tsv, :persist => :update,
|
11
13
|
:other => {:main => main_samples, :contrast => contrast_samples},
|
12
14
|
:prefix => "Diff", :dir => Matrix.matrix_dir.differential, :no_load => true) do |file|
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
raise if file.nil?
|
17
|
+
|
18
|
+
log2 = value_type.nil? or value_type == "count"
|
19
|
+
log2 = false
|
20
|
+
two_channel = false
|
21
|
+
FileUtils.mkdir_p File.dirname(file) unless file.nil? or File.exists? File.dirname(file)
|
22
|
+
|
23
|
+
cmd = <<-EOS
|
24
|
+
|
25
|
+
source('#{Rbbt.share.R["MA.R"].find}')
|
26
|
+
|
27
|
+
data = rbbt.dm.matrix.differential(#{ R.ruby2R data_file },
|
28
|
+
main = #{R.ruby2R(main_samples)},
|
29
|
+
contrast = #{R.ruby2R(contrast_samples)},
|
30
|
+
log2=#{ R.ruby2R log2 },
|
31
|
+
outfile = #{R.ruby2R file},
|
32
|
+
key.field = #{R.ruby2R format},
|
33
|
+
two.channel = #{R.ruby2R two_channel})
|
34
|
+
EOS
|
35
|
+
|
36
|
+
R.run(cmd, :monitor => true)
|
21
37
|
end
|
22
38
|
end
|
23
39
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'rbbt/matrix'
|
2
|
+
require 'rbbt/knowledge_base'
|
3
|
+
require 'rbbt/matrix/barcode'
|
4
|
+
|
5
|
+
class KnowledgeBase
|
6
|
+
attr_accessor :matrix_registry
|
7
|
+
def matrix_registry=(new)
|
8
|
+
@matrix_registry = IndiferentHash.setup(new)
|
9
|
+
end
|
10
|
+
|
11
|
+
def matrix(name)
|
12
|
+
matrix, options = @matrix_registry[name]
|
13
|
+
|
14
|
+
return matrix if Matrix === matrix
|
15
|
+
|
16
|
+
Path.setup(matrix) if not Path === matrix and File.exists? matrix
|
17
|
+
|
18
|
+
raise "Registered matrix is strange: #{Misc.fingerprint matrix}" unless Path === matrix
|
19
|
+
|
20
|
+
path = matrix
|
21
|
+
|
22
|
+
raise "Registered path not found: #{path.find}" unless path.exists?
|
23
|
+
|
24
|
+
if path.find.directory?
|
25
|
+
data, labels, value_type, format, organism, identifiers = Misc.process_options options, :data, :labels, :value_type, :format, :organism, :identifiers
|
26
|
+
|
27
|
+
data ||= path.data if path.data.exists?
|
28
|
+
data ||= path.values if path.values.exists?
|
29
|
+
|
30
|
+
labels ||= path.labels if path.labels.exists?
|
31
|
+
labels ||= path.samples if path.samples.exists?
|
32
|
+
|
33
|
+
identifiers ||= path.identifiers if path.identifiers.exists?
|
34
|
+
|
35
|
+
value_type = TSV.parse_header(data.find).key_field if data
|
36
|
+
value_type ||= "Unknown ID"
|
37
|
+
|
38
|
+
Matrix.new data, labels, value_type, format, organism, identifiers
|
39
|
+
else
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def register_matrix(name, matrix, options = {})
|
44
|
+
options = Misc.add_defaults options, :sample_format => "Sample"
|
45
|
+
sample_format = Misc.process_options options, :sample_format
|
46
|
+
|
47
|
+
@matrix_registry ||= IndiferentHash.setup({})
|
48
|
+
@matrix_registry[name] = [matrix, options]
|
49
|
+
|
50
|
+
|
51
|
+
register name do
|
52
|
+
matrix = matrix(name)
|
53
|
+
TSV.read_matrix matrix.data_file, sample_format
|
54
|
+
end
|
55
|
+
|
56
|
+
register name.to_s + '_activity' do
|
57
|
+
matrix = matrix(name)
|
58
|
+
TmpFile.with_file do |tmpfile|
|
59
|
+
matrix.activity_cluster(tmpfile)
|
60
|
+
tsv = TSV.open(TSV.read_matrix(tmpfile, sample_format))
|
61
|
+
tsv.identifiers ||= matrix.data_file.identifier_files.first
|
62
|
+
tsv.identifiers = tsv.identifiers.find if tsv.identifiers.respond_to? :find
|
63
|
+
|
64
|
+
tsv = tsv.add_field "Activity" do |k,p|
|
65
|
+
samples, values = p
|
66
|
+
values = values.collect{|v| v.to_i }
|
67
|
+
new_values = case Misc.max(values)
|
68
|
+
when 1
|
69
|
+
[''] * samples.length
|
70
|
+
when 2
|
71
|
+
values.collect{|v| v == 2 ? "active" : '' }
|
72
|
+
else
|
73
|
+
values.collect{|v|
|
74
|
+
case v
|
75
|
+
when 1
|
76
|
+
"inactive"
|
77
|
+
when 2
|
78
|
+
''
|
79
|
+
else
|
80
|
+
"active"
|
81
|
+
end
|
82
|
+
}
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
tsv
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
data/lib/rbbt/matrix.rb
CHANGED
@@ -10,17 +10,23 @@ class Matrix
|
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
|
-
attr_accessor :data_file, :labels, :value_type, :format, :organism
|
13
|
+
attr_accessor :data_file, :labels, :value_type, :format, :organism, :identifiers
|
14
14
|
def initialize(data_file, labels, value_type, format, organism=nil, identifiers=nil)
|
15
15
|
@data_file = data_file
|
16
16
|
@labels = labels
|
17
17
|
@value_type = value_type
|
18
18
|
@format = format
|
19
|
+
@format ||= begin
|
20
|
+
_header ||= TSV.parse_header(@data_file)
|
21
|
+
_header.key_field || "ID"
|
22
|
+
end
|
19
23
|
@organism = organism
|
24
|
+
_header = nil
|
20
25
|
@organism ||= begin
|
21
|
-
TSV.parse_header(@data_file)
|
26
|
+
_header ||= TSV.parse_header(@data_file)
|
27
|
+
_header.namespace || "Hsa"
|
22
28
|
end
|
23
|
-
@identifiers = identifiers
|
29
|
+
@identifiers = identifiers
|
24
30
|
end
|
25
31
|
|
26
32
|
def samples
|
@@ -31,6 +37,17 @@ class Matrix
|
|
31
37
|
@subsets ||= begin
|
32
38
|
subsets = {}
|
33
39
|
case @labels
|
40
|
+
when Path
|
41
|
+
labels = @labels.tsv
|
42
|
+
factors = labels.fields
|
43
|
+
labels.through do |sample,values|
|
44
|
+
factors.zip(values).each do |factor,value|
|
45
|
+
subsets[factor] ||= {}
|
46
|
+
subsets[factor][value] ||= []
|
47
|
+
subsets[factor][value] << sample
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
34
51
|
when TSV
|
35
52
|
factors = @labels.fields
|
36
53
|
@labels.through do |sample,values|
|
@@ -42,7 +59,7 @@ class Matrix
|
|
42
59
|
end
|
43
60
|
when Hash
|
44
61
|
@labels.each do |factor,info|
|
45
|
-
subsets[factors] ||=
|
62
|
+
subsets[factors] ||= {}
|
46
63
|
info.each do |value, samples|
|
47
64
|
subsets[factors][value] = case samples
|
48
65
|
when Array
|
@@ -56,16 +73,29 @@ class Matrix
|
|
56
73
|
end
|
57
74
|
end
|
58
75
|
end
|
59
|
-
|
76
|
+
|
77
|
+
clean_subsets = {}
|
78
|
+
subsets.each do |factor,values|
|
79
|
+
next if values.nil? or values.size < 2
|
80
|
+
values.each do |level,samples|
|
81
|
+
next if samples.nil? or samples.length < 2
|
82
|
+
clean_subsets[factor] ||= {}
|
83
|
+
clean_subsets[factor][level] = samples
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
clean_subsets
|
60
88
|
end
|
61
89
|
end
|
62
90
|
|
63
91
|
def comparison(main, contrast, subsets = nil)
|
92
|
+
subsets ||= self.subsets
|
64
93
|
|
65
94
|
if main.index "="
|
66
95
|
main_factor, main_value = main.split "="
|
67
96
|
raise ParameterException, "Main selection not understood" if subsets[main_factor].nil? or subsets[main_factor][main_value].nil?
|
68
|
-
|
97
|
+
value = subsets[main_factor][main_value]
|
98
|
+
main_samples = String === value ? value.split(',') : value
|
69
99
|
else
|
70
100
|
main_samples = main.split(/[|,\n]/)
|
71
101
|
end
|
@@ -74,17 +104,20 @@ class Matrix
|
|
74
104
|
if contrast.index "="
|
75
105
|
contrast_factor, contrast_value = contrast.split "="
|
76
106
|
raise ParameterException, "Contrast selection not understood" if subsets[contrast_factor].nil? or subsets[contrast_factor][contrast_value].nil?
|
77
|
-
|
107
|
+
value = subsets[contrast_factor][contrast_value]
|
108
|
+
contrast_samples = String === value ? value.split(',') : value
|
78
109
|
else
|
79
110
|
contrast_samples = contrast.split(/[|,\n]/)
|
80
111
|
end
|
81
112
|
else
|
82
113
|
if subsets and defined? main_factor
|
83
|
-
contrast_samples = subsets[main_factor].values.collect{|s| s.split ',' }.flatten.uniq - main_samples
|
114
|
+
contrast_samples = subsets[main_factor].values.flatten.collect{|s| s.split ',' }.flatten.uniq - main_samples
|
84
115
|
else
|
85
116
|
contrast_samples = samples - main_samples
|
86
117
|
end
|
87
118
|
end
|
119
|
+
main_samples = main_samples.compact.reject{|m| m.empty? }.collect{|m| m.strip }
|
120
|
+
contrast_samples = contrast_samples.compact.reject{|m| m.empty? }.collect{|m| m.strip }
|
88
121
|
|
89
122
|
[main_samples, contrast_samples]
|
90
123
|
end
|
@@ -96,9 +129,12 @@ class Matrix
|
|
96
129
|
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
97
130
|
|
98
131
|
file = Persist.persist(data_file, :tsv, :prefix => "Gene", :dir => Matrix.matrix_dir.values, :no_load => true) do
|
99
|
-
identifiers = [Organism.identifiers(organism), @identifiers, identifiers].compact.uniq
|
100
132
|
|
101
|
-
data_file.tsv(:cast => :to_f)
|
133
|
+
data = data_file.tsv(:cast => :to_f)
|
134
|
+
|
135
|
+
identifiers = [identifiers, @identifiers, data.identifiers, Organism.identifiers(organism)].flatten.compact.uniq
|
136
|
+
|
137
|
+
data.change_key("Ensembl Gene ID", :identifiers => identifiers) do |v|
|
102
138
|
Misc.mean(v.compact)
|
103
139
|
end
|
104
140
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -94,6 +94,7 @@ files:
|
|
94
94
|
- lib/rbbt/matrix.rb
|
95
95
|
- lib/rbbt/matrix/barcode.rb
|
96
96
|
- lib/rbbt/matrix/differential.rb
|
97
|
+
- lib/rbbt/matrix/knowledge_base.rb
|
97
98
|
- lib/rbbt/network/paths.rb
|
98
99
|
- lib/rbbt/plots/bar.rb
|
99
100
|
- lib/rbbt/plots/heatmap.rb
|