rbbt-dm 1.1.8 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/matrix/barcode.rb +14 -1
- data/lib/rbbt/matrix/differential.rb +24 -8
- data/lib/rbbt/matrix/knowledge_base.rb +92 -0
- data/lib/rbbt/matrix.rb +46 -10
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f4d951189411296e243b213618e445291596cb53
|
4
|
+
data.tar.gz: d7afae72bcaa8b8ee82aaac16d6e83d742386772
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0354f0e2134e14e836e93648a7ac64a32f79e652198bbaa50458cd780ca4cea3b256d2fd57d486d6a5fc5c90299097afb245f6eb289c1cf3f104b62a57f43f1f
|
7
|
+
data.tar.gz: d356f93d961b8681b03ea9af236d8c6c2a57da222004ec05e8647d49f7e548f26ea1f895446f997df79b9306a986b82a50fabefd4e3f0df4aef8cc837c2fd42a
|
data/lib/rbbt/matrix/barcode.rb
CHANGED
@@ -9,6 +9,19 @@ source('#{Rbbt.share.R['barcode.R'].find}')
|
|
9
9
|
rbbt.GE.barcode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
|
10
10
|
EOF
|
11
11
|
|
12
|
-
R.run(cmd
|
12
|
+
R.run(cmd)
|
13
13
|
end
|
14
|
+
|
15
|
+
def activity_cluster(outfile, factor = 2)
|
16
|
+
|
17
|
+
FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
|
18
|
+
cmd =<<-EOF
|
19
|
+
source('#{Rbbt.share.R['barcode.R'].find}')
|
20
|
+
rbbt.GE.activity_cluster(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{R.ruby2R value_type})
|
21
|
+
EOF
|
22
|
+
|
23
|
+
R.run(cmd)
|
24
|
+
end
|
25
|
+
|
26
|
+
|
14
27
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'rbbt/util/R'
|
2
|
+
|
1
3
|
class Matrix
|
2
4
|
def differential(main, contrast, path = nil)
|
3
5
|
if Array === main and Array === contrast
|
@@ -7,17 +9,31 @@ class Matrix
|
|
7
9
|
end
|
8
10
|
|
9
11
|
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
10
|
-
Persist.persist(name, :tsv, :
|
12
|
+
Persist.persist(name, :tsv, :persist => :update,
|
11
13
|
:other => {:main => main_samples, :contrast => contrast_samples},
|
12
14
|
:prefix => "Diff", :dir => Matrix.matrix_dir.differential, :no_load => true) do |file|
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
raise if file.nil?
|
17
|
+
|
18
|
+
log2 = value_type.nil? or value_type == "count"
|
19
|
+
log2 = false
|
20
|
+
two_channel = false
|
21
|
+
FileUtils.mkdir_p File.dirname(file) unless file.nil? or File.exists? File.dirname(file)
|
22
|
+
|
23
|
+
cmd = <<-EOS
|
24
|
+
|
25
|
+
source('#{Rbbt.share.R["MA.R"].find}')
|
26
|
+
|
27
|
+
data = rbbt.dm.matrix.differential(#{ R.ruby2R data_file },
|
28
|
+
main = #{R.ruby2R(main_samples)},
|
29
|
+
contrast = #{R.ruby2R(contrast_samples)},
|
30
|
+
log2=#{ R.ruby2R log2 },
|
31
|
+
outfile = #{R.ruby2R file},
|
32
|
+
key.field = #{R.ruby2R format},
|
33
|
+
two.channel = #{R.ruby2R two_channel})
|
34
|
+
EOS
|
35
|
+
|
36
|
+
R.run(cmd, :monitor => true)
|
21
37
|
end
|
22
38
|
end
|
23
39
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'rbbt/matrix'
|
2
|
+
require 'rbbt/knowledge_base'
|
3
|
+
require 'rbbt/matrix/barcode'
|
4
|
+
|
5
|
+
class KnowledgeBase
|
6
|
+
attr_accessor :matrix_registry
|
7
|
+
def matrix_registry=(new)
|
8
|
+
@matrix_registry = IndiferentHash.setup(new)
|
9
|
+
end
|
10
|
+
|
11
|
+
def matrix(name)
|
12
|
+
matrix, options = @matrix_registry[name]
|
13
|
+
|
14
|
+
return matrix if Matrix === matrix
|
15
|
+
|
16
|
+
Path.setup(matrix) if not Path === matrix and File.exists? matrix
|
17
|
+
|
18
|
+
raise "Registered matrix is strange: #{Misc.fingerprint matrix}" unless Path === matrix
|
19
|
+
|
20
|
+
path = matrix
|
21
|
+
|
22
|
+
raise "Registered path not found: #{path.find}" unless path.exists?
|
23
|
+
|
24
|
+
if path.find.directory?
|
25
|
+
data, labels, value_type, format, organism, identifiers = Misc.process_options options, :data, :labels, :value_type, :format, :organism, :identifiers
|
26
|
+
|
27
|
+
data ||= path.data if path.data.exists?
|
28
|
+
data ||= path.values if path.values.exists?
|
29
|
+
|
30
|
+
labels ||= path.labels if path.labels.exists?
|
31
|
+
labels ||= path.samples if path.samples.exists?
|
32
|
+
|
33
|
+
identifiers ||= path.identifiers if path.identifiers.exists?
|
34
|
+
|
35
|
+
value_type = TSV.parse_header(data.find).key_field if data
|
36
|
+
value_type ||= "Unknown ID"
|
37
|
+
|
38
|
+
Matrix.new data, labels, value_type, format, organism, identifiers
|
39
|
+
else
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def register_matrix(name, matrix, options = {})
|
44
|
+
options = Misc.add_defaults options, :sample_format => "Sample"
|
45
|
+
sample_format = Misc.process_options options, :sample_format
|
46
|
+
|
47
|
+
@matrix_registry ||= IndiferentHash.setup({})
|
48
|
+
@matrix_registry[name] = [matrix, options]
|
49
|
+
|
50
|
+
|
51
|
+
register name do
|
52
|
+
matrix = matrix(name)
|
53
|
+
TSV.read_matrix matrix.data_file, sample_format
|
54
|
+
end
|
55
|
+
|
56
|
+
register name.to_s + '_activity' do
|
57
|
+
matrix = matrix(name)
|
58
|
+
TmpFile.with_file do |tmpfile|
|
59
|
+
matrix.activity_cluster(tmpfile)
|
60
|
+
tsv = TSV.open(TSV.read_matrix(tmpfile, sample_format))
|
61
|
+
tsv.identifiers ||= matrix.data_file.identifier_files.first
|
62
|
+
tsv.identifiers = tsv.identifiers.find if tsv.identifiers.respond_to? :find
|
63
|
+
|
64
|
+
tsv = tsv.add_field "Activity" do |k,p|
|
65
|
+
samples, values = p
|
66
|
+
values = values.collect{|v| v.to_i }
|
67
|
+
new_values = case Misc.max(values)
|
68
|
+
when 1
|
69
|
+
[''] * samples.length
|
70
|
+
when 2
|
71
|
+
values.collect{|v| v == 2 ? "active" : '' }
|
72
|
+
else
|
73
|
+
values.collect{|v|
|
74
|
+
case v
|
75
|
+
when 1
|
76
|
+
"inactive"
|
77
|
+
when 2
|
78
|
+
''
|
79
|
+
else
|
80
|
+
"active"
|
81
|
+
end
|
82
|
+
}
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
tsv
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
end
|
92
|
+
|
data/lib/rbbt/matrix.rb
CHANGED
@@ -10,17 +10,23 @@ class Matrix
|
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
|
-
attr_accessor :data_file, :labels, :value_type, :format, :organism
|
13
|
+
attr_accessor :data_file, :labels, :value_type, :format, :organism, :identifiers
|
14
14
|
def initialize(data_file, labels, value_type, format, organism=nil, identifiers=nil)
|
15
15
|
@data_file = data_file
|
16
16
|
@labels = labels
|
17
17
|
@value_type = value_type
|
18
18
|
@format = format
|
19
|
+
@format ||= begin
|
20
|
+
_header ||= TSV.parse_header(@data_file)
|
21
|
+
_header.key_field || "ID"
|
22
|
+
end
|
19
23
|
@organism = organism
|
24
|
+
_header = nil
|
20
25
|
@organism ||= begin
|
21
|
-
TSV.parse_header(@data_file)
|
26
|
+
_header ||= TSV.parse_header(@data_file)
|
27
|
+
_header.namespace || "Hsa"
|
22
28
|
end
|
23
|
-
@identifiers = identifiers
|
29
|
+
@identifiers = identifiers
|
24
30
|
end
|
25
31
|
|
26
32
|
def samples
|
@@ -31,6 +37,17 @@ class Matrix
|
|
31
37
|
@subsets ||= begin
|
32
38
|
subsets = {}
|
33
39
|
case @labels
|
40
|
+
when Path
|
41
|
+
labels = @labels.tsv
|
42
|
+
factors = labels.fields
|
43
|
+
labels.through do |sample,values|
|
44
|
+
factors.zip(values).each do |factor,value|
|
45
|
+
subsets[factor] ||= {}
|
46
|
+
subsets[factor][value] ||= []
|
47
|
+
subsets[factor][value] << sample
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
34
51
|
when TSV
|
35
52
|
factors = @labels.fields
|
36
53
|
@labels.through do |sample,values|
|
@@ -42,7 +59,7 @@ class Matrix
|
|
42
59
|
end
|
43
60
|
when Hash
|
44
61
|
@labels.each do |factor,info|
|
45
|
-
subsets[factors] ||=
|
62
|
+
subsets[factors] ||= {}
|
46
63
|
info.each do |value, samples|
|
47
64
|
subsets[factors][value] = case samples
|
48
65
|
when Array
|
@@ -56,16 +73,29 @@ class Matrix
|
|
56
73
|
end
|
57
74
|
end
|
58
75
|
end
|
59
|
-
|
76
|
+
|
77
|
+
clean_subsets = {}
|
78
|
+
subsets.each do |factor,values|
|
79
|
+
next if values.nil? or values.size < 2
|
80
|
+
values.each do |level,samples|
|
81
|
+
next if samples.nil? or samples.length < 2
|
82
|
+
clean_subsets[factor] ||= {}
|
83
|
+
clean_subsets[factor][level] = samples
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
clean_subsets
|
60
88
|
end
|
61
89
|
end
|
62
90
|
|
63
91
|
def comparison(main, contrast, subsets = nil)
|
92
|
+
subsets ||= self.subsets
|
64
93
|
|
65
94
|
if main.index "="
|
66
95
|
main_factor, main_value = main.split "="
|
67
96
|
raise ParameterException, "Main selection not understood" if subsets[main_factor].nil? or subsets[main_factor][main_value].nil?
|
68
|
-
|
97
|
+
value = subsets[main_factor][main_value]
|
98
|
+
main_samples = String === value ? value.split(',') : value
|
69
99
|
else
|
70
100
|
main_samples = main.split(/[|,\n]/)
|
71
101
|
end
|
@@ -74,17 +104,20 @@ class Matrix
|
|
74
104
|
if contrast.index "="
|
75
105
|
contrast_factor, contrast_value = contrast.split "="
|
76
106
|
raise ParameterException, "Contrast selection not understood" if subsets[contrast_factor].nil? or subsets[contrast_factor][contrast_value].nil?
|
77
|
-
|
107
|
+
value = subsets[contrast_factor][contrast_value]
|
108
|
+
contrast_samples = String === value ? value.split(',') : value
|
78
109
|
else
|
79
110
|
contrast_samples = contrast.split(/[|,\n]/)
|
80
111
|
end
|
81
112
|
else
|
82
113
|
if subsets and defined? main_factor
|
83
|
-
contrast_samples = subsets[main_factor].values.collect{|s| s.split ',' }.flatten.uniq - main_samples
|
114
|
+
contrast_samples = subsets[main_factor].values.flatten.collect{|s| s.split ',' }.flatten.uniq - main_samples
|
84
115
|
else
|
85
116
|
contrast_samples = samples - main_samples
|
86
117
|
end
|
87
118
|
end
|
119
|
+
main_samples = main_samples.compact.reject{|m| m.empty? }.collect{|m| m.strip }
|
120
|
+
contrast_samples = contrast_samples.compact.reject{|m| m.empty? }.collect{|m| m.strip }
|
88
121
|
|
89
122
|
[main_samples, contrast_samples]
|
90
123
|
end
|
@@ -96,9 +129,12 @@ class Matrix
|
|
96
129
|
name = data_file =~ /:>/ ? File.basename(data_file) : data_file
|
97
130
|
|
98
131
|
file = Persist.persist(data_file, :tsv, :prefix => "Gene", :dir => Matrix.matrix_dir.values, :no_load => true) do
|
99
|
-
identifiers = [Organism.identifiers(organism), @identifiers, identifiers].compact.uniq
|
100
132
|
|
101
|
-
data_file.tsv(:cast => :to_f)
|
133
|
+
data = data_file.tsv(:cast => :to_f)
|
134
|
+
|
135
|
+
identifiers = [identifiers, @identifiers, data.identifiers, Organism.identifiers(organism)].flatten.compact.uniq
|
136
|
+
|
137
|
+
data.change_key("Ensembl Gene ID", :identifiers => identifiers) do |v|
|
102
138
|
Misc.mean(v.compact)
|
103
139
|
end
|
104
140
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-09-
|
11
|
+
date: 2014-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rbbt-util
|
@@ -94,6 +94,7 @@ files:
|
|
94
94
|
- lib/rbbt/matrix.rb
|
95
95
|
- lib/rbbt/matrix/barcode.rb
|
96
96
|
- lib/rbbt/matrix/differential.rb
|
97
|
+
- lib/rbbt/matrix/knowledge_base.rb
|
97
98
|
- lib/rbbt/network/paths.rb
|
98
99
|
- lib/rbbt/plots/bar.rb
|
99
100
|
- lib/rbbt/plots/heatmap.rb
|