rbbt-dm 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -97,10 +97,13 @@ module TSV
97
97
  fields ||= self.fields
98
98
  fields = [fields] if String === fields or Symbol === fields
99
99
 
100
- Persist.persist(filename, :marshal, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
100
+ Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
101
101
  data ||= Hash.new(0)
102
- through :key, fields do |key, values|
103
- values.flatten.compact.uniq.each{|value| data[value] += 1}
102
+
103
+ with_unnamed do
104
+ through :key, fields do |key, values|
105
+ values.flatten.compact.uniq.each{|value| data[value] += 1}
106
+ end
104
107
  end
105
108
 
106
109
  data
@@ -108,40 +111,70 @@ module TSV
108
111
  end
109
112
 
110
113
  def enrichment(list, fields = nil, options = {})
111
- fields ||= self.fields.first
112
- options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false
113
- Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
114
+ with_unnamed do
115
+ fields ||= self.fields.first
116
+ options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false, :add_keys => true
117
+
118
+ add_keys = Misc.process_options options, :add_keys
119
+
120
+ Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
121
+
122
+ selected = select :key => list
123
+
124
+ tsv_size = keys.length
125
+ total = selected.keys.length
126
+ Log.debug "Found #{total} of #{list.length} entities"
127
+
128
+ counts = annotation_counts fields, options[:persist]
129
+
130
+ annotations = Hash.new
131
+ annotation_keys = Hash.new
132
+ selected.with_unnamed do
133
+ selected.through :key, fields do |key, values|
134
+ values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
135
+ value = value.dup
136
+ annotations[value] ||= 0
137
+ annotations[value] += 1
138
+ next unless add_keys
139
+ annotation_keys[value] ||= []
140
+ annotation_keys[value] << key
141
+ }
142
+ end
143
+ end
114
144
 
115
- selected = select :key => list
145
+ pvalues = {}
146
+ annotations.each do |annotation, count|
147
+ next if count < options[:min_support] or not counts.include? annotation
148
+ pvalues[annotation] = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
149
+ end
116
150
 
117
- tsv_size = keys.length
118
- total = selected.keys.length
119
- Log.debug "Found #{total} of #{list.length} entities"
151
+ FDR.adjust_hash! pvalues if options[:fdr]
120
152
 
121
- counts = annotation_counts fields, options[:persist]
153
+ pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
122
154
 
123
- annotations = Hash.new 0
124
- with_unnamed do
125
- selected.through :key, fields do |key, values|
126
- values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
127
- annotations[value] += 1
128
- }
129
- end
130
- end
155
+ TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
131
156
 
132
- pvalues = {}
133
- annotations.each do |annotation, count|
134
- next if count < options[:min_support] or not counts.include? annotation
135
- pvalue = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
136
- pvalues[annotation] = pvalue
137
- end
157
+ if add_keys
158
+ tsv = TSV.setup(pvalues.keys, :key_field => fields, :fields => [], :type => :double)
138
159
 
139
- FDR.adjust_hash! pvalues if options[:fdr]
140
- pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
160
+ tsv.add_field 'p-value' do |annot, values|
161
+ [pvalues[annot]]
162
+ end
141
163
 
142
- TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
164
+ tsv.add_field self.key_field do |annot, values|
165
+ if list.respond_to? :annotate
166
+ list.annotate annotation_keys[annot]
167
+ else
168
+ annotation_keys[annot]
169
+ end
170
+ end
143
171
 
144
- pvalues
172
+ tsv
173
+ else
174
+ pvalues
175
+ end
176
+
177
+ end
145
178
  end
146
179
 
147
180
  def enrichment_for(tsv, field, options = {} )
@@ -1,5 +1,6 @@
1
1
  require 'png'
2
2
  require 'inline'
3
+ require 'set'
3
4
 
4
5
  module RandomWalk
5
6
 
@@ -89,8 +90,10 @@ module RandomWalk
89
90
  if size == 0
90
91
  [0] * times
91
92
  else
93
+ a = (0..total - 1).to_a
92
94
  (1..times).collect do
93
- score(Array.new(size){ (rand * total).to_i }.sort, total, missing).abs
95
+ a.shuffle!
96
+ score(a[1..size].sort, total, missing).abs
94
97
  end
95
98
  end
96
99
  end
@@ -114,7 +117,7 @@ module RandomWalk
114
117
  :green => PNG::Color::Green,
115
118
  :white => PNG::Color::White,
116
119
  :black => PNG::Color::Black,
117
-
120
+ :gray => PNG::Color::Gray,
118
121
  }
119
122
 
120
123
  def self.draw_hits(hits, total, filename = nil, options = {})
@@ -131,7 +134,7 @@ module RandomWalk
131
134
  hits = hits.collect{|h| (h.to_f * size / total).to_i}
132
135
  end
133
136
 
134
- canvas = PNG::Canvas.new size, width, COLORS[bg_color]
137
+ canvas = PNG::Canvas.new size, width, COLORS[bg_color] || PNG::Color.from(bg_color)
135
138
 
136
139
  sections.each{|color, info|
137
140
  start = info[0]
@@ -156,3 +159,35 @@ module RandomWalk
156
159
  end
157
160
  end
158
161
  end
162
+
163
+ module OrderedList
164
+ def self.hits(list, set)
165
+ set = Set.new(set) unless Set === set
166
+ hits = []
167
+ list.each_with_index do |e,i|
168
+ hits << i if set.include? e
169
+ end
170
+ hits
171
+ end
172
+
173
+ def self.draw_hits(list, set, filename = nil, options = {})
174
+ hits = OrderedList.hits(list, set)
175
+ RandomWalk.draw_hits(hits, list.length, filename, options)
176
+ end
177
+
178
+ def hits(set)
179
+ OrderedList.hits(self, set)
180
+ end
181
+
182
+ def draw_hits(set, filename = nil, options = {})
183
+ OrderedList.draw_hits(self, set, filename, options)
184
+ end
185
+
186
+ def pvalue(set, options = {})
187
+ options = Misc.add_defaults options, :permutations => 1000, :missing => 0
188
+ hits = hits(set.compact)
189
+ score = RandomWalk.score(hits.sort, self.length, 0)
190
+ permutations = RandomWalk.permutations(set.length, self.length, options[:missing], options[:permutations])
191
+ RandomWalk.pvalue(permutations, score)
192
+ end
193
+ end
@@ -0,0 +1,68 @@
1
+ require 'rbbt/tsv'
2
+
3
+ module RankProduct
4
+ def self.score(gene_ranks, signature_sizes)
5
+ scores = {}
6
+ log_sizes = signature_sizes.collect{|size| Math::log(size)}
7
+ gene_ranks.each{|gene, positions|
8
+ scores[gene] = positions.zip(log_sizes).
9
+ collect{|p| Math::log(p[0]) - p[1]}. # Take log and substract from size (normalize)
10
+ inject(0){|acc, v| acc += v }
11
+ }
12
+ scores
13
+ end
14
+
15
+ def self.permutations(num_signatures, num = 1000)
16
+ scores = []
17
+ num.times{
18
+ value = 0
19
+ num_signatures.times{|size_and_log|
20
+ value += Math::log(rand)
21
+ }
22
+ scores << value
23
+ }
24
+ scores
25
+ end
26
+
27
+ def self.permutations_full(signature_sizes)
28
+ gene_ranks = {}
29
+ signature_sizes.each{|size|
30
+ (1..size).to_a.shuffle.each_with_index{|gene, pos|
31
+ gene_ranks[gene] ||= []
32
+ gene_ranks[gene] << pos + 1
33
+ }
34
+ }
35
+ gene_ranks.delete_if{|code, positions| positions.length != signature_sizes.length}
36
+
37
+ scores = score(gene_ranks, signature_sizes)
38
+ scores.values
39
+ end
40
+ end
41
+
42
+ module TSV
43
+ def rank_product(fields, reverse = false, &block)
44
+ tsv = self.slice(fields)
45
+
46
+ if block_given?
47
+ scores = fields.collect{|field| tsv.sort_by(field, true, &block)}
48
+ else
49
+ scores = fields.collect{|field| tsv.sort_by(field, true){|gene,values| tsv.type == :double ? values.first.to_f : value.to_f}}
50
+ end
51
+ positions = {}
52
+
53
+ if reverse
54
+ size = self.size
55
+ tsv.keys.each do |entity|
56
+ positions[entity] = scores.collect{|list| size - list.index(entity)}
57
+ end
58
+ else
59
+ tsv.keys.each do |entity|
60
+ positions[entity] = scores.collect{|list| list.index(entity) + 1}
61
+ end
62
+ end
63
+
64
+ score = RankProduct.score(positions, fields.collect{ tsv.size })
65
+
66
+ score
67
+ end
68
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-10-04 00:00:00 +02:00
19
- default_executable:
18
+ date: 2012-01-13 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rbbt-util
@@ -33,7 +32,7 @@ dependencies:
33
32
  type: :runtime
34
33
  version_requirements: *id001
35
34
  - !ruby/object:Gem::Dependency
36
- name: rbbt-entities
35
+ name: RubyInline
37
36
  prerelease: false
38
37
  requirement: &id002 !ruby/object:Gem::Requirement
39
38
  none: false
@@ -47,7 +46,7 @@ dependencies:
47
46
  type: :runtime
48
47
  version_requirements: *id002
49
48
  - !ruby/object:Gem::Dependency
50
- name: RubyInline
49
+ name: priority_queue
51
50
  prerelease: false
52
51
  requirement: &id003 !ruby/object:Gem::Requirement
53
52
  none: false
@@ -60,20 +59,6 @@ dependencies:
60
59
  version: "0"
61
60
  type: :runtime
62
61
  version_requirements: *id003
63
- - !ruby/object:Gem::Dependency
64
- name: priority_queue
65
- prerelease: false
66
- requirement: &id004 !ruby/object:Gem::Requirement
67
- none: false
68
- requirements:
69
- - - ">="
70
- - !ruby/object:Gem::Version
71
- hash: 3
72
- segments:
73
- - 0
74
- version: "0"
75
- type: :runtime
76
- version_requirements: *id004
77
62
  description: Data-mining and statistics
78
63
  email: miguel.vazquez@fdi.ucm.es
79
64
  executables: []
@@ -88,6 +73,7 @@ files:
88
73
  - lib/rbbt/statistics/fdr.rb
89
74
  - lib/rbbt/statistics/hypergeometric.rb
90
75
  - lib/rbbt/statistics/random_walk.rb
76
+ - lib/rbbt/statistics/rank_product.rb
91
77
  - lib/rbbt/vector/model.rb
92
78
  - lib/rbbt/vector/model/svm.rb
93
79
  - test/rbbt/statistics/test_fdr.rb
@@ -96,7 +82,6 @@ files:
96
82
  - test/rbbt/vector/model/test_svm.rb
97
83
  - test/rbbt/network/test_paths.rb
98
84
  - test/test_helper.rb
99
- has_rdoc: true
100
85
  homepage: http://github.com/mikisvaz/rbbt-phgx
101
86
  licenses: []
102
87
 
@@ -126,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
111
  requirements: []
127
112
 
128
113
  rubyforge_project:
129
- rubygems_version: 1.6.2
114
+ rubygems_version: 1.8.10
130
115
  signing_key:
131
116
  specification_version: 3
132
117
  summary: Data-mining and statistics