rbbt-dm 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -97,10 +97,13 @@ module TSV
97
97
  fields ||= self.fields
98
98
  fields = [fields] if String === fields or Symbol === fields
99
99
 
100
- Persist.persist(filename, :marshal, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
100
+ Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
101
101
  data ||= Hash.new(0)
102
- through :key, fields do |key, values|
103
- values.flatten.compact.uniq.each{|value| data[value] += 1}
102
+
103
+ with_unnamed do
104
+ through :key, fields do |key, values|
105
+ values.flatten.compact.uniq.each{|value| data[value] += 1}
106
+ end
104
107
  end
105
108
 
106
109
  data
@@ -108,40 +111,70 @@ module TSV
108
111
  end
109
112
 
110
113
  def enrichment(list, fields = nil, options = {})
111
- fields ||= self.fields.first
112
- options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false
113
- Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
114
+ with_unnamed do
115
+ fields ||= self.fields.first
116
+ options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false, :add_keys => true
117
+
118
+ add_keys = Misc.process_options options, :add_keys
119
+
120
+ Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
121
+
122
+ selected = select :key => list
123
+
124
+ tsv_size = keys.length
125
+ total = selected.keys.length
126
+ Log.debug "Found #{total} of #{list.length} entities"
127
+
128
+ counts = annotation_counts fields, options[:persist]
129
+
130
+ annotations = Hash.new
131
+ annotation_keys = Hash.new
132
+ selected.with_unnamed do
133
+ selected.through :key, fields do |key, values|
134
+ values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
135
+ value = value.dup
136
+ annotations[value] ||= 0
137
+ annotations[value] += 1
138
+ next unless add_keys
139
+ annotation_keys[value] ||= []
140
+ annotation_keys[value] << key
141
+ }
142
+ end
143
+ end
114
144
 
115
- selected = select :key => list
145
+ pvalues = {}
146
+ annotations.each do |annotation, count|
147
+ next if count < options[:min_support] or not counts.include? annotation
148
+ pvalues[annotation] = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
149
+ end
116
150
 
117
- tsv_size = keys.length
118
- total = selected.keys.length
119
- Log.debug "Found #{total} of #{list.length} entities"
151
+ FDR.adjust_hash! pvalues if options[:fdr]
120
152
 
121
- counts = annotation_counts fields, options[:persist]
153
+ pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
122
154
 
123
- annotations = Hash.new 0
124
- with_unnamed do
125
- selected.through :key, fields do |key, values|
126
- values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
127
- annotations[value] += 1
128
- }
129
- end
130
- end
155
+ TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
131
156
 
132
- pvalues = {}
133
- annotations.each do |annotation, count|
134
- next if count < options[:min_support] or not counts.include? annotation
135
- pvalue = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
136
- pvalues[annotation] = pvalue
137
- end
157
+ if add_keys
158
+ tsv = TSV.setup(pvalues.keys, :key_field => fields, :fields => [], :type => :double)
138
159
 
139
- FDR.adjust_hash! pvalues if options[:fdr]
140
- pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
160
+ tsv.add_field 'p-value' do |annot, values|
161
+ [pvalues[annot]]
162
+ end
141
163
 
142
- TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
164
+ tsv.add_field self.key_field do |annot, values|
165
+ if list.respond_to? :annotate
166
+ list.annotate annotation_keys[annot]
167
+ else
168
+ annotation_keys[annot]
169
+ end
170
+ end
143
171
 
144
- pvalues
172
+ tsv
173
+ else
174
+ pvalues
175
+ end
176
+
177
+ end
145
178
  end
146
179
 
147
180
  def enrichment_for(tsv, field, options = {} )
@@ -1,5 +1,6 @@
1
1
  require 'png'
2
2
  require 'inline'
3
+ require 'set'
3
4
 
4
5
  module RandomWalk
5
6
 
@@ -89,8 +90,10 @@ module RandomWalk
89
90
  if size == 0
90
91
  [0] * times
91
92
  else
93
+ a = (0..total - 1).to_a
92
94
  (1..times).collect do
93
- score(Array.new(size){ (rand * total).to_i }.sort, total, missing).abs
95
+ a.shuffle!
96
+ score(a[1..size].sort, total, missing).abs
94
97
  end
95
98
  end
96
99
  end
@@ -114,7 +117,7 @@ module RandomWalk
114
117
  :green => PNG::Color::Green,
115
118
  :white => PNG::Color::White,
116
119
  :black => PNG::Color::Black,
117
-
120
+ :gray => PNG::Color::Gray,
118
121
  }
119
122
 
120
123
  def self.draw_hits(hits, total, filename = nil, options = {})
@@ -131,7 +134,7 @@ module RandomWalk
131
134
  hits = hits.collect{|h| (h.to_f * size / total).to_i}
132
135
  end
133
136
 
134
- canvas = PNG::Canvas.new size, width, COLORS[bg_color]
137
+ canvas = PNG::Canvas.new size, width, COLORS[bg_color] || PNG::Color.from(bg_color)
135
138
 
136
139
  sections.each{|color, info|
137
140
  start = info[0]
@@ -156,3 +159,35 @@ module RandomWalk
156
159
  end
157
160
  end
158
161
  end
162
+
163
+ module OrderedList
164
+ def self.hits(list, set)
165
+ set = Set.new(set) unless Set === set
166
+ hits = []
167
+ list.each_with_index do |e,i|
168
+ hits << i if set.include? e
169
+ end
170
+ hits
171
+ end
172
+
173
+ def self.draw_hits(list, set, filename = nil, options = {})
174
+ hits = OrderedList.hits(list, set)
175
+ RandomWalk.draw_hits(hits, list.length, filename, options)
176
+ end
177
+
178
+ def hits(set)
179
+ OrderedList.hits(self, set)
180
+ end
181
+
182
+ def draw_hits(set, filename = nil, options = {})
183
+ OrderedList.draw_hits(self, set, filename, options)
184
+ end
185
+
186
+ def pvalue(set, options = {})
187
+ options = Misc.add_defaults options, :permutations => 1000, :missing => 0
188
+ hits = hits(set.compact)
189
+ score = RandomWalk.score(hits.sort, self.length, 0)
190
+ permutations = RandomWalk.permutations(set.length, self.length, options[:missing], options[:permutations])
191
+ RandomWalk.pvalue(permutations, score)
192
+ end
193
+ end
@@ -0,0 +1,68 @@
1
+ require 'rbbt/tsv'
2
+
3
+ module RankProduct
4
+ def self.score(gene_ranks, signature_sizes)
5
+ scores = {}
6
+ log_sizes = signature_sizes.collect{|size| Math::log(size)}
7
+ gene_ranks.each{|gene, positions|
8
+ scores[gene] = positions.zip(log_sizes).
9
+ collect{|p| Math::log(p[0]) - p[1]}. # Take log and substract from size (normalize)
10
+ inject(0){|acc, v| acc += v }
11
+ }
12
+ scores
13
+ end
14
+
15
+ def self.permutations(num_signatures, num = 1000)
16
+ scores = []
17
+ num.times{
18
+ value = 0
19
+ num_signatures.times{|size_and_log|
20
+ value += Math::log(rand)
21
+ }
22
+ scores << value
23
+ }
24
+ scores
25
+ end
26
+
27
+ def self.permutations_full(signature_sizes)
28
+ gene_ranks = {}
29
+ signature_sizes.each{|size|
30
+ (1..size).to_a.shuffle.each_with_index{|gene, pos|
31
+ gene_ranks[gene] ||= []
32
+ gene_ranks[gene] << pos + 1
33
+ }
34
+ }
35
+ gene_ranks.delete_if{|code, positions| positions.length != signature_sizes.length}
36
+
37
+ scores = score(gene_ranks, signature_sizes)
38
+ scores.values
39
+ end
40
+ end
41
+
42
+ module TSV
43
+ def rank_product(fields, reverse = false, &block)
44
+ tsv = self.slice(fields)
45
+
46
+ if block_given?
47
+ scores = fields.collect{|field| tsv.sort_by(field, true, &block)}
48
+ else
49
+ scores = fields.collect{|field| tsv.sort_by(field, true){|gene,values| tsv.type == :double ? values.first.to_f : value.to_f}}
50
+ end
51
+ positions = {}
52
+
53
+ if reverse
54
+ size = self.size
55
+ tsv.keys.each do |entity|
56
+ positions[entity] = scores.collect{|list| size - list.index(entity)}
57
+ end
58
+ else
59
+ tsv.keys.each do |entity|
60
+ positions[entity] = scores.collect{|list| list.index(entity) + 1}
61
+ end
62
+ end
63
+
64
+ score = RankProduct.score(positions, fields.collect{ tsv.size })
65
+
66
+ score
67
+ end
68
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-dm
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Miguel Vazquez
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-10-04 00:00:00 +02:00
19
- default_executable:
18
+ date: 2012-01-13 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: rbbt-util
@@ -33,7 +32,7 @@ dependencies:
33
32
  type: :runtime
34
33
  version_requirements: *id001
35
34
  - !ruby/object:Gem::Dependency
36
- name: rbbt-entities
35
+ name: RubyInline
37
36
  prerelease: false
38
37
  requirement: &id002 !ruby/object:Gem::Requirement
39
38
  none: false
@@ -47,7 +46,7 @@ dependencies:
47
46
  type: :runtime
48
47
  version_requirements: *id002
49
48
  - !ruby/object:Gem::Dependency
50
- name: RubyInline
49
+ name: priority_queue
51
50
  prerelease: false
52
51
  requirement: &id003 !ruby/object:Gem::Requirement
53
52
  none: false
@@ -60,20 +59,6 @@ dependencies:
60
59
  version: "0"
61
60
  type: :runtime
62
61
  version_requirements: *id003
63
- - !ruby/object:Gem::Dependency
64
- name: priority_queue
65
- prerelease: false
66
- requirement: &id004 !ruby/object:Gem::Requirement
67
- none: false
68
- requirements:
69
- - - ">="
70
- - !ruby/object:Gem::Version
71
- hash: 3
72
- segments:
73
- - 0
74
- version: "0"
75
- type: :runtime
76
- version_requirements: *id004
77
62
  description: Data-mining and statistics
78
63
  email: miguel.vazquez@fdi.ucm.es
79
64
  executables: []
@@ -88,6 +73,7 @@ files:
88
73
  - lib/rbbt/statistics/fdr.rb
89
74
  - lib/rbbt/statistics/hypergeometric.rb
90
75
  - lib/rbbt/statistics/random_walk.rb
76
+ - lib/rbbt/statistics/rank_product.rb
91
77
  - lib/rbbt/vector/model.rb
92
78
  - lib/rbbt/vector/model/svm.rb
93
79
  - test/rbbt/statistics/test_fdr.rb
@@ -96,7 +82,6 @@ files:
96
82
  - test/rbbt/vector/model/test_svm.rb
97
83
  - test/rbbt/network/test_paths.rb
98
84
  - test/test_helper.rb
99
- has_rdoc: true
100
85
  homepage: http://github.com/mikisvaz/rbbt-phgx
101
86
  licenses: []
102
87
 
@@ -126,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
111
  requirements: []
127
112
 
128
113
  rubyforge_project:
129
- rubygems_version: 1.6.2
114
+ rubygems_version: 1.8.10
130
115
  signing_key:
131
116
  specification_version: 3
132
117
  summary: Data-mining and statistics