rbbt-dm 1.1.8 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2d622e348e2b667b9a7a50d72b838b052a901ee1
4
- data.tar.gz: 52aa969ad1244db73caa9869c7aac95ba676d8ea
3
+ metadata.gz: f4d951189411296e243b213618e445291596cb53
4
+ data.tar.gz: d7afae72bcaa8b8ee82aaac16d6e83d742386772
5
5
  SHA512:
6
- metadata.gz: ffc9db25cf186b73e846075f71704c48e2cd0287543d85c949a7ebaeef272351bdd064ed110dcdab6acbccf1e2819133e7a0a7ee74c3a65e7cfc2a42d9c3e070
7
- data.tar.gz: a17ba0299410b15b3ca4baedae8b2b85821782f3217809a99f7353f09249a90b86bd4005b30413d8612700cb05d15f4d38f5521976d7de2704bc52c2c2a1fca8
6
+ metadata.gz: 0354f0e2134e14e836e93648a7ac64a32f79e652198bbaa50458cd780ca4cea3b256d2fd57d486d6a5fc5c90299097afb245f6eb289c1cf3f104b62a57f43f1f
7
+ data.tar.gz: d356f93d961b8681b03ea9af236d8c6c2a57da222004ec05e8647d49f7e548f26ea1f895446f997df79b9306a986b82a50fabefd4e3f0df4aef8cc837c2fd42a
@@ -9,6 +9,19 @@ source('#{Rbbt.share.R['barcode.R'].find}')
9
9
  rbbt.GE.barcode(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{ R.ruby2R factor })
10
10
  EOF
11
11
 
12
- R.run(cmd, :stderr => true)
12
+ R.run(cmd)
13
13
  end
14
+
15
+ def activity_cluster(outfile, factor = 2)
16
+
17
+ FileUtils.mkdir_p File.dirname(outfile) unless outfile.nil? or File.exists? File.dirname(outfile)
18
+ cmd =<<-EOF
19
+ source('#{Rbbt.share.R['barcode.R'].find}')
20
+ rbbt.GE.activity_cluster(#{ R.ruby2R self.data_file }, #{ R.ruby2R outfile }, #{R.ruby2R value_type})
21
+ EOF
22
+
23
+ R.run(cmd)
24
+ end
25
+
26
+
14
27
  end
@@ -1,3 +1,5 @@
1
+ require 'rbbt/util/R'
2
+
1
3
  class Matrix
2
4
  def differential(main, contrast, path = nil)
3
5
  if Array === main and Array === contrast
@@ -7,17 +9,31 @@ class Matrix
7
9
  end
8
10
 
9
11
  name = data_file =~ /:>/ ? File.basename(data_file) : data_file
10
- Persist.persist(name, :tsv, :update => true,
12
+ Persist.persist(name, :tsv, :persist => :update,
11
13
  :other => {:main => main_samples, :contrast => contrast_samples},
12
14
  :prefix => "Diff", :dir => Matrix.matrix_dir.differential, :no_load => true) do |file|
13
15
 
14
- log2 = value_type.nil? or value_type == "count"
15
- log2 = false
16
- two_channel = false
17
- FileUtils.mkdir_p File.dirname(file) unless file.nil? or File.exists? File.dirname(file)
18
- cmd = "source('#{Rbbt.share.R["MA.R"].find}'); rbbt.dm.matrix.differential(#{ R.ruby2R data_file }, main = #{R.ruby2R(main_samples)}, contrast = #{R.ruby2R(contrast_samples)}, log2=#{ R.ruby2R log2 }, outfile = #{R.ruby2R path}, key.field = #{R.ruby2R format}, two.channel = #{R.ruby2R two_channel})"
19
- R.run(cmd)
20
- nil
16
+ raise if file.nil?
17
+
18
+ log2 = value_type.nil? or value_type == "count"
19
+ log2 = false
20
+ two_channel = false
21
+ FileUtils.mkdir_p File.dirname(file) unless file.nil? or File.exists? File.dirname(file)
22
+
23
+ cmd = <<-EOS
24
+
25
+ source('#{Rbbt.share.R["MA.R"].find}')
26
+
27
+ data = rbbt.dm.matrix.differential(#{ R.ruby2R data_file },
28
+ main = #{R.ruby2R(main_samples)},
29
+ contrast = #{R.ruby2R(contrast_samples)},
30
+ log2=#{ R.ruby2R log2 },
31
+ outfile = #{R.ruby2R file},
32
+ key.field = #{R.ruby2R format},
33
+ two.channel = #{R.ruby2R two_channel})
34
+ EOS
35
+
36
+ R.run(cmd, :monitor => true)
21
37
  end
22
38
  end
23
39
  end
@@ -0,0 +1,92 @@
1
+ require 'rbbt/matrix'
2
+ require 'rbbt/knowledge_base'
3
+ require 'rbbt/matrix/barcode'
4
+
5
+ class KnowledgeBase
6
+ attr_accessor :matrix_registry
7
+ def matrix_registry=(new)
8
+ @matrix_registry = IndiferentHash.setup(new)
9
+ end
10
+
11
+ def matrix(name)
12
+ matrix, options = @matrix_registry[name]
13
+
14
+ return matrix if Matrix === matrix
15
+
16
+ Path.setup(matrix) if not Path === matrix and File.exists? matrix
17
+
18
+ raise "Registered matrix is strange: #{Misc.fingerprint matrix}" unless Path === matrix
19
+
20
+ path = matrix
21
+
22
+ raise "Registered path not found: #{path.find}" unless path.exists?
23
+
24
+ if path.find.directory?
25
+ data, labels, value_type, format, organism, identifiers = Misc.process_options options, :data, :labels, :value_type, :format, :organism, :identifiers
26
+
27
+ data ||= path.data if path.data.exists?
28
+ data ||= path.values if path.values.exists?
29
+
30
+ labels ||= path.labels if path.labels.exists?
31
+ labels ||= path.samples if path.samples.exists?
32
+
33
+ identifiers ||= path.identifiers if path.identifiers.exists?
34
+
35
+ value_type = TSV.parse_header(data.find).key_field if data
36
+ value_type ||= "Unknown ID"
37
+
38
+ Matrix.new data, labels, value_type, format, organism, identifiers
39
+ else
40
+ end
41
+ end
42
+
43
+ def register_matrix(name, matrix, options = {})
44
+ options = Misc.add_defaults options, :sample_format => "Sample"
45
+ sample_format = Misc.process_options options, :sample_format
46
+
47
+ @matrix_registry ||= IndiferentHash.setup({})
48
+ @matrix_registry[name] = [matrix, options]
49
+
50
+
51
+ register name do
52
+ matrix = matrix(name)
53
+ TSV.read_matrix matrix.data_file, sample_format
54
+ end
55
+
56
+ register name.to_s + '_activity' do
57
+ matrix = matrix(name)
58
+ TmpFile.with_file do |tmpfile|
59
+ matrix.activity_cluster(tmpfile)
60
+ tsv = TSV.open(TSV.read_matrix(tmpfile, sample_format))
61
+ tsv.identifiers ||= matrix.data_file.identifier_files.first
62
+ tsv.identifiers = tsv.identifiers.find if tsv.identifiers.respond_to? :find
63
+
64
+ tsv = tsv.add_field "Activity" do |k,p|
65
+ samples, values = p
66
+ values = values.collect{|v| v.to_i }
67
+ new_values = case Misc.max(values)
68
+ when 1
69
+ [''] * samples.length
70
+ when 2
71
+ values.collect{|v| v == 2 ? "active" : '' }
72
+ else
73
+ values.collect{|v|
74
+ case v
75
+ when 1
76
+ "inactive"
77
+ when 2
78
+ ''
79
+ else
80
+ "active"
81
+ end
82
+ }
83
+ end
84
+ end
85
+
86
+ tsv
87
+ end
88
+ end
89
+ end
90
+
91
+ end
92
+
data/lib/rbbt/matrix.rb CHANGED
@@ -10,17 +10,23 @@ class Matrix
10
10
  end
11
11
  end
12
12
 
13
- attr_accessor :data_file, :labels, :value_type, :format, :organism
13
+ attr_accessor :data_file, :labels, :value_type, :format, :organism, :identifiers
14
14
  def initialize(data_file, labels, value_type, format, organism=nil, identifiers=nil)
15
15
  @data_file = data_file
16
16
  @labels = labels
17
17
  @value_type = value_type
18
18
  @format = format
19
+ @format ||= begin
20
+ _header ||= TSV.parse_header(@data_file)
21
+ _header.key_field || "ID"
22
+ end
19
23
  @organism = organism
24
+ _header = nil
20
25
  @organism ||= begin
21
- TSV.parse_header(@data_file).namespace || "Hsa"
26
+ _header ||= TSV.parse_header(@data_file)
27
+ _header.namespace || "Hsa"
22
28
  end
23
- @identifiers = identifiers || Organism.identifiers(organism)
29
+ @identifiers = identifiers
24
30
  end
25
31
 
26
32
  def samples
@@ -31,6 +37,17 @@ class Matrix
31
37
  @subsets ||= begin
32
38
  subsets = {}
33
39
  case @labels
40
+ when Path
41
+ labels = @labels.tsv
42
+ factors = labels.fields
43
+ labels.through do |sample,values|
44
+ factors.zip(values).each do |factor,value|
45
+ subsets[factor] ||= {}
46
+ subsets[factor][value] ||= []
47
+ subsets[factor][value] << sample
48
+ end
49
+ end
50
+
34
51
  when TSV
35
52
  factors = @labels.fields
36
53
  @labels.through do |sample,values|
@@ -42,7 +59,7 @@ class Matrix
42
59
  end
43
60
  when Hash
44
61
  @labels.each do |factor,info|
45
- subsets[factors] ||= []
62
+ subsets[factors] ||= {}
46
63
  info.each do |value, samples|
47
64
  subsets[factors][value] = case samples
48
65
  when Array
@@ -56,16 +73,29 @@ class Matrix
56
73
  end
57
74
  end
58
75
  end
59
- subsets
76
+
77
+ clean_subsets = {}
78
+ subsets.each do |factor,values|
79
+ next if values.nil? or values.size < 2
80
+ values.each do |level,samples|
81
+ next if samples.nil? or samples.length < 2
82
+ clean_subsets[factor] ||= {}
83
+ clean_subsets[factor][level] = samples
84
+ end
85
+ end
86
+
87
+ clean_subsets
60
88
  end
61
89
  end
62
90
 
63
91
  def comparison(main, contrast, subsets = nil)
92
+ subsets ||= self.subsets
64
93
 
65
94
  if main.index "="
66
95
  main_factor, main_value = main.split "="
67
96
  raise ParameterException, "Main selection not understood" if subsets[main_factor].nil? or subsets[main_factor][main_value].nil?
68
- main_samples = subsets[main_factor][main_value].split ','
97
+ value = subsets[main_factor][main_value]
98
+ main_samples = String === value ? value.split(',') : value
69
99
  else
70
100
  main_samples = main.split(/[|,\n]/)
71
101
  end
@@ -74,17 +104,20 @@ class Matrix
74
104
  if contrast.index "="
75
105
  contrast_factor, contrast_value = contrast.split "="
76
106
  raise ParameterException, "Contrast selection not understood" if subsets[contrast_factor].nil? or subsets[contrast_factor][contrast_value].nil?
77
- contrast_samples = subsets[contrast_factor][contrast_value].split ','
107
+ value = subsets[contrast_factor][contrast_value]
108
+ contrast_samples = String === value ? value.split(',') : value
78
109
  else
79
110
  contrast_samples = contrast.split(/[|,\n]/)
80
111
  end
81
112
  else
82
113
  if subsets and defined? main_factor
83
- contrast_samples = subsets[main_factor].values.collect{|s| s.split ',' }.flatten.uniq - main_samples
114
+ contrast_samples = subsets[main_factor].values.flatten.collect{|s| s.split ',' }.flatten.uniq - main_samples
84
115
  else
85
116
  contrast_samples = samples - main_samples
86
117
  end
87
118
  end
119
+ main_samples = main_samples.compact.reject{|m| m.empty? }.collect{|m| m.strip }
120
+ contrast_samples = contrast_samples.compact.reject{|m| m.empty? }.collect{|m| m.strip }
88
121
 
89
122
  [main_samples, contrast_samples]
90
123
  end
@@ -96,9 +129,12 @@ class Matrix
96
129
  name = data_file =~ /:>/ ? File.basename(data_file) : data_file
97
130
 
98
131
  file = Persist.persist(data_file, :tsv, :prefix => "Gene", :dir => Matrix.matrix_dir.values, :no_load => true) do
99
- identifiers = [Organism.identifiers(organism), @identifiers, identifiers].compact.uniq
100
132
 
101
- data_file.tsv(:cast => :to_f).change_key("Ensembl Gene ID", :identifiers => identifiers) do |v|
133
+ data = data_file.tsv(:cast => :to_f)
134
+
135
+ identifiers = [identifiers, @identifiers, data.identifiers, Organism.identifiers(organism)].flatten.compact.uniq
136
+
137
+ data.change_key("Ensembl Gene ID", :identifiers => identifiers) do |v|
102
138
  Misc.mean(v.compact)
103
139
  end
104
140
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.8
4
+ version: 1.1.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-09-16 00:00:00.000000000 Z
11
+ date: 2014-09-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rbbt-util
@@ -94,6 +94,7 @@ files:
94
94
  - lib/rbbt/matrix.rb
95
95
  - lib/rbbt/matrix/barcode.rb
96
96
  - lib/rbbt/matrix/differential.rb
97
+ - lib/rbbt/matrix/knowledge_base.rb
97
98
  - lib/rbbt/network/paths.rb
98
99
  - lib/rbbt/plots/bar.rb
99
100
  - lib/rbbt/plots/heatmap.rb