rbbt-dm 1.1.8 → 1.1.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2d622e348e2b667b9a7a50d72b838b052a901ee1
4
- data.tar.gz: 52aa969ad1244db73caa9869c7aac95ba676d8ea
3
+ metadata.gz: f4d951189411296e243b213618e445291596cb53
4
+ data.tar.gz: d7afae72bcaa8b8ee82aaac16d6e83d742386772
5
5
  SHA512:
6
- metadata.gz: ffc9db25cf186b73e846075f71704c48e2cd0287543d85c949a7ebaeef272351bdd064ed110dcdab6acbccf1e2819133e7a0a7ee74c3a65e7cfc2a42d9c3e070
7
- data.tar.gz: a17ba0299410b15b3ca4baedae8b2b85821782f3217809a99f7353f09249a90b86bd4005b30413d8612700cb05d15f4d38f5521976d7de2704bc52c2c2a1fca8
6
+ metadata.gz: 0354f0e2134e14e836e93648a7ac64a32f79e652198bbaa50458cd780ca4cea3b256d2fd57d486d6a5fc5c90299097afb245f6eb289c1cf3f104b62a57f43f1f
7
+ data.tar.gz: d356f93d961b8681b03ea9af236d8c6c2a57da222004ec05e8647d49f7e548f26ea1f895446f997df79b9306a986b82a50fabefd4e3f0df4aef8cc837c2fd42a
@@ -9,6 +9,19 @@ source('#{Rbbt.share.R['barcode.R'].find}')
9
9
  rbbt.GE.barcode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
10
10
  EOF
11
11
 
12
- R.run(cmd, :stderr => true)
12
+ R.run(cmd)
13
13
  end
14
+
15
+ def activity_cluster(outfile, factor = 2)
16
+
17
+ FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
18
+ cmd =<<-EOF
19
+ source('#{Rbbt.share.R['barcode.R'].find}')
20
+ rbbt.GE.activity_cluster(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{R.ruby2R value_type})
21
+ EOF
22
+
23
+ R.run(cmd)
24
+ end
25
+
26
+
14
27
  end
@@ -1,3 +1,5 @@
1
+ require 'rbbt/util/R'
2
+
1
3
  class Matrix
2
4
  def differential(main, contrast, path = nil)
3
5
  if Array === main and Array === contrast
@@ -7,17 +9,31 @@ class Matrix
7
9
  end
8
10
 
9
11
  name = data_file =~ /:>/ ? File.basename(data_file) : data_file
10
- Persist.persist(name, :tsv, :update => true,
12
+ Persist.persist(name, :tsv, :persist => :update,
11
13
  :other => {:main => main_samples, :contrast => contrast_samples},
12
14
  :prefix => "Diff", :dir => Matrix.matrix_dir.differential, :no_load => true) do |file|
13
15
 
14
- log2 = value_type.nil? or value_type == "count"
15
- log2 = false
16
- two_channel = false
17
- FileUtils.mkdir_p File.dirname(file) unless file.nil? or File.exists? File.dirname(file)
18
- cmd = "source('#{Rbbt.share.R["MA.R"].find}'); rbbt.dm.matrix.differential(#{ R.ruby2R data_file }, main = #{R.ruby2R(main_samples)}, contrast = #{R.ruby2R(contrast_samples)}, log2=#{ R.ruby2R log2 }, outfile = #{R.ruby2R path}, key.field = #{R.ruby2R format}, two.channel = #{R.ruby2R two_channel})"
19
- R.run(cmd)
20
- nil
16
+ raise if file.nil?
17
+
18
+ log2 = value_type.nil? or value_type == "count"
19
+ log2 = false
20
+ two_channel = false
21
+ FileUtils.mkdir_p File.dirname(file) unless file.nil? or File.exists? File.dirname(file)
22
+
23
+ cmd = <<-EOS
24
+
25
+ source('#{Rbbt.share.R["MA.R"].find}')
26
+
27
+ data = rbbt.dm.matrix.differential(#{ R.ruby2R data_file },
28
+ main = #{R.ruby2R(main_samples)},
29
+ contrast = #{R.ruby2R(contrast_samples)},
30
+ log2=#{ R.ruby2R log2 },
31
+ outfile = #{R.ruby2R file},
32
+ key.field = #{R.ruby2R format},
33
+ two.channel = #{R.ruby2R two_channel})
34
+ EOS
35
+
36
+ R.run(cmd, :monitor => true)
21
37
  end
22
38
  end
23
39
  end
@@ -0,0 +1,92 @@
1
+ require 'rbbt/matrix'
2
+ require 'rbbt/knowledge_base'
3
+ require 'rbbt/matrix/barcode'
4
+
5
+ class KnowledgeBase
6
+ attr_accessor :matrix_registry
7
+ def matrix_registry=(new)
8
+ @matrix_registry = IndiferentHash.setup(new)
9
+ end
10
+
11
+ def matrix(name)
12
+ matrix, options = @matrix_registry[name]
13
+
14
+ return matrix if Matrix === matrix
15
+
16
+ Path.setup(matrix) if not Path === matrix and File.exists? matrix
17
+
18
+ raise "Registered matrix is strange: #{Misc.fingerprint matrix}" unless Path === matrix
19
+
20
+ path = matrix
21
+
22
+ raise "Registered path not found: #{path.find}" unless path.exists?
23
+
24
+ if path.find.directory?
25
+ data, labels, value_type, format, organism, identifiers = Misc.process_options options, :data, :labels, :value_type, :format, :organism, :identifiers
26
+
27
+ data ||= path.data if path.data.exists?
28
+ data ||= path.values if path.values.exists?
29
+
30
+ labels ||= path.labels if path.labels.exists?
31
+ labels ||= path.samples if path.samples.exists?
32
+
33
+ identifiers ||= path.identifiers if path.identifiers.exists?
34
+
35
+ value_type = TSV.parse_header(data.find).key_field if data
36
+ value_type ||= "Unknown ID"
37
+
38
+ Matrix.new data, labels, value_type, format, organism, identifiers
39
+ else
40
+ end
41
+ end
42
+
43
+ def register_matrix(name, matrix, options = {})
44
+ options = Misc.add_defaults options, :sample_format => "Sample"
45
+ sample_format = Misc.process_options options, :sample_format
46
+
47
+ @matrix_registry ||= IndiferentHash.setup({})
48
+ @matrix_registry[name] = [matrix, options]
49
+
50
+
51
+ register name do
52
+ matrix = matrix(name)
53
+ TSV.read_matrix matrix.data_file, sample_format
54
+ end
55
+
56
+ register name.to_s + '_activity' do
57
+ matrix = matrix(name)
58
+ TmpFile.with_file do |tmpfile|
59
+ matrix.activity_cluster(tmpfile)
60
+ tsv = TSV.open(TSV.read_matrix(tmpfile, sample_format))
61
+ tsv.identifiers ||= matrix.data_file.identifier_files.first
62
+ tsv.identifiers = tsv.identifiers.find if tsv.identifiers.respond_to? :find
63
+
64
+ tsv = tsv.add_field "Activity" do |k,p|
65
+ samples, values = p
66
+ values = values.collect{|v| v.to_i }
67
+ new_values = case Misc.max(values)
68
+ when 1
69
+ [''] * samples.length
70
+ when 2
71
+ values.collect{|v| v == 2 ? "active" : '' }
72
+ else
73
+ values.collect{|v|
74
+ case v
75
+ when 1
76
+ "inactive"
77
+ when 2
78
+ ''
79
+ else
80
+ "active"
81
+ end
82
+ }
83
+ end
84
+ end
85
+
86
+ tsv
87
+ end
88
+ end
89
+ end
90
+
91
+ end
92
+
data/lib/rbbt/matrix.rb CHANGED
@@ -10,17 +10,23 @@ class Matrix
10
10
  end
11
11
  end
12
12
 
13
- attr_accessor :data_file, :labels, :value_type, :format, :organism
13
+ attr_accessor :data_file, :labels, :value_type, :format, :organism, :identifiers
14
14
  def initialize(data_file, labels, value_type, format, organism=nil, identifiers=nil)
15
15
  @data_file = data_file
16
16
  @labels = labels
17
17
  @value_type = value_type
18
18
  @format = format
19
+ @format ||= begin
20
+ _header ||= TSV.parse_header(@data_file)
21
+ _header.key_field || "ID"
22
+ end
19
23
  @organism = organism
24
+ _header = nil
20
25
  @organism ||= begin
21
- TSV.parse_header(@data_file).namespace || "Hsa"
26
+ _header ||= TSV.parse_header(@data_file)
27
+ _header.namespace || "Hsa"
22
28
  end
23
- @identifiers = identifiers || Organism.identifiers(organism)
29
+ @identifiers = identifiers
24
30
  end
25
31
 
26
32
  def samples
@@ -31,6 +37,17 @@ class Matrix
31
37
  @subsets ||= begin
32
38
  subsets = {}
33
39
  case @labels
40
+ when Path
41
+ labels = @labels.tsv
42
+ factors = labels.fields
43
+ labels.through do |sample,values|
44
+ factors.zip(values).each do |factor,value|
45
+ subsets[factor] ||= {}
46
+ subsets[factor][value] ||= []
47
+ subsets[factor][value] << sample
48
+ end
49
+ end
50
+
34
51
  when TSV
35
52
  factors = @labels.fields
36
53
  @labels.through do |sample,values|
@@ -42,7 +59,7 @@ class Matrix
42
59
  end
43
60
  when Hash
44
61
  @labels.each do |factor,info|
45
- subsets[factors] ||= []
62
+ subsets[factors] ||= {}
46
63
  info.each do |value, samples|
47
64
  subsets[factors][value] = case samples
48
65
  when Array
@@ -56,16 +73,29 @@ class Matrix
56
73
  end
57
74
  end
58
75
  end
59
- subsets
76
+
77
+ clean_subsets = {}
78
+ subsets.each do |factor,values|
79
+ next if values.nil? or values.size < 2
80
+ values.each do |level,samples|
81
+ next if samples.nil? or samples.length < 2
82
+ clean_subsets[factor] ||= {}
83
+ clean_subsets[factor][level] = samples
84
+ end
85
+ end
86
+
87
+ clean_subsets
60
88
  end
61
89
  end
62
90
 
63
91
  def comparison(main, contrast, subsets = nil)
92
+ subsets ||= self.subsets
64
93
 
65
94
  if main.index "="
66
95
  main_factor, main_value = main.split "="
67
96
  raise ParameterException, "Main selection not understood" if subsets[main_factor].nil? or subsets[main_factor][main_value].nil?
68
- main_samples = subsets[main_factor][main_value].split ','
97
+ value = subsets[main_factor][main_value]
98
+ main_samples = String === value ? value.split(',') : value
69
99
  else
70
100
  main_samples = main.split(/[|,\n]/)
71
101
  end
@@ -74,17 +104,20 @@ class Matrix
74
104
  if contrast.index "="
75
105
  contrast_factor, contrast_value = contrast.split "="
76
106
  raise ParameterException, "Contrast selection not understood" if subsets[contrast_factor].nil? or subsets[contrast_factor][contrast_value].nil?
77
- contrast_samples = subsets[contrast_factor][contrast_value].split ','
107
+ value = subsets[contrast_factor][contrast_value]
108
+ contrast_samples = String === value ? value.split(',') : value
78
109
  else
79
110
  contrast_samples = contrast.split(/[|,\n]/)
80
111
  end
81
112
  else
82
113
  if subsets and defined? main_factor
83
- contrast_samples = subsets[main_factor].values.collect{|s| s.split ',' }.flatten.uniq - main_samples
114
+ contrast_samples = subsets[main_factor].values.flatten.collect{|s| s.split ',' }.flatten.uniq - main_samples
84
115
  else
85
116
  contrast_samples = samples - main_samples
86
117
  end
87
118
  end
119
+ main_samples = main_samples.compact.reject{|m| m.empty? }.collect{|m| m.strip }
120
+ contrast_samples = contrast_samples.compact.reject{|m| m.empty? }.collect{|m| m.strip }
88
121
 
89
122
  [main_samples, contrast_samples]
90
123
  end
@@ -96,9 +129,12 @@ class Matrix
96
129
  name = data_file =~ /:>/ ? File.basename(data_file) : data_file
97
130
 
98
131
  file = Persist.persist(data_file, :tsv, :prefix => "Gene", :dir => Matrix.matrix_dir.values, :no_load => true) do
99
- identifiers = [Organism.identifiers(organism), @identifiers, identifiers].compact.uniq
100
132
 
101
- data_file.tsv(:cast => :to_f).change_key("Ensembl Gene ID", :identifiers => identifiers) do |v|
133
+ data = data_file.tsv(:cast => :to_f)
134
+
135
+ identifiers = [identifiers, @identifiers, data.identifiers, Organism.identifiers(organism)].flatten.compact.uniq
136
+
137
+ data.change_key("Ensembl Gene ID", :identifiers => identifiers) do |v|
102
138
  Misc.mean(v.compact)
103
139
  end
104
140
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.8
4
+ version: 1.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-16 00:00:00.000000000 Z
11
+ date: 2014-09-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -94,6 +94,7 @@ files:
94
94
  - lib/rbbt/matrix.rb
95
95
  - lib/rbbt/matrix/barcode.rb
96
96
  - lib/rbbt/matrix/differential.rb
97
+ - lib/rbbt/matrix/knowledge_base.rb
97
98
  - lib/rbbt/network/paths.rb
98
99
  - lib/rbbt/plots/bar.rb
99
100
  - lib/rbbt/plots/heatmap.rb