rbbt-dm 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
@@ -97,10 +97,13 @@ module TSV
|
|
97
97
|
fields ||= self.fields
|
98
98
|
fields = [fields] if String === fields or Symbol === fields
|
99
99
|
|
100
|
-
Persist.persist(filename, :
|
100
|
+
Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
|
101
101
|
data ||= Hash.new(0)
|
102
|
-
|
103
|
-
|
102
|
+
|
103
|
+
with_unnamed do
|
104
|
+
through :key, fields do |key, values|
|
105
|
+
values.flatten.compact.uniq.each{|value| data[value] += 1}
|
106
|
+
end
|
104
107
|
end
|
105
108
|
|
106
109
|
data
|
@@ -108,40 +111,70 @@ module TSV
|
|
108
111
|
end
|
109
112
|
|
110
113
|
def enrichment(list, fields = nil, options = {})
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
+
with_unnamed do
|
115
|
+
fields ||= self.fields.first
|
116
|
+
options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false, :add_keys => true
|
117
|
+
|
118
|
+
add_keys = Misc.process_options options, :add_keys
|
119
|
+
|
120
|
+
Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
|
121
|
+
|
122
|
+
selected = select :key => list
|
123
|
+
|
124
|
+
tsv_size = keys.length
|
125
|
+
total = selected.keys.length
|
126
|
+
Log.debug "Found #{total} of #{list.length} entities"
|
127
|
+
|
128
|
+
counts = annotation_counts fields, options[:persist]
|
129
|
+
|
130
|
+
annotations = Hash.new
|
131
|
+
annotation_keys = Hash.new
|
132
|
+
selected.with_unnamed do
|
133
|
+
selected.through :key, fields do |key, values|
|
134
|
+
values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
|
135
|
+
value = value.dup
|
136
|
+
annotations[value] ||= 0
|
137
|
+
annotations[value] += 1
|
138
|
+
next unless add_keys
|
139
|
+
annotation_keys[value] ||= []
|
140
|
+
annotation_keys[value] << key
|
141
|
+
}
|
142
|
+
end
|
143
|
+
end
|
114
144
|
|
115
|
-
|
145
|
+
pvalues = {}
|
146
|
+
annotations.each do |annotation, count|
|
147
|
+
next if count < options[:min_support] or not counts.include? annotation
|
148
|
+
pvalues[annotation] = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
|
149
|
+
end
|
116
150
|
|
117
|
-
|
118
|
-
total = selected.keys.length
|
119
|
-
Log.debug "Found #{total} of #{list.length} entities"
|
151
|
+
FDR.adjust_hash! pvalues if options[:fdr]
|
120
152
|
|
121
|
-
|
153
|
+
pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
|
122
154
|
|
123
|
-
|
124
|
-
with_unnamed do
|
125
|
-
selected.through :key, fields do |key, values|
|
126
|
-
values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
|
127
|
-
annotations[value] += 1
|
128
|
-
}
|
129
|
-
end
|
130
|
-
end
|
155
|
+
TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
|
131
156
|
|
132
|
-
|
133
|
-
|
134
|
-
next if count < options[:min_support] or not counts.include? annotation
|
135
|
-
pvalue = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
|
136
|
-
pvalues[annotation] = pvalue
|
137
|
-
end
|
157
|
+
if add_keys
|
158
|
+
tsv = TSV.setup(pvalues.keys, :key_field => fields, :fields => [], :type => :double)
|
138
159
|
|
139
|
-
|
140
|
-
|
160
|
+
tsv.add_field 'p-value' do |annot, values|
|
161
|
+
[pvalues[annot]]
|
162
|
+
end
|
141
163
|
|
142
|
-
|
164
|
+
tsv.add_field self.key_field do |annot, values|
|
165
|
+
if list.respond_to? :annotate
|
166
|
+
list.annotate annotation_keys[annot]
|
167
|
+
else
|
168
|
+
annotation_keys[annot]
|
169
|
+
end
|
170
|
+
end
|
143
171
|
|
144
|
-
|
172
|
+
tsv
|
173
|
+
else
|
174
|
+
pvalues
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
145
178
|
end
|
146
179
|
|
147
180
|
def enrichment_for(tsv, field, options = {} )
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'png'
|
2
2
|
require 'inline'
|
3
|
+
require 'set'
|
3
4
|
|
4
5
|
module RandomWalk
|
5
6
|
|
@@ -89,8 +90,10 @@ module RandomWalk
|
|
89
90
|
if size == 0
|
90
91
|
[0] * times
|
91
92
|
else
|
93
|
+
a = (0..total - 1).to_a
|
92
94
|
(1..times).collect do
|
93
|
-
|
95
|
+
a.shuffle!
|
96
|
+
score(a[1..size].sort, total, missing).abs
|
94
97
|
end
|
95
98
|
end
|
96
99
|
end
|
@@ -114,7 +117,7 @@ module RandomWalk
|
|
114
117
|
:green => PNG::Color::Green,
|
115
118
|
:white => PNG::Color::White,
|
116
119
|
:black => PNG::Color::Black,
|
117
|
-
|
120
|
+
:gray => PNG::Color::Gray,
|
118
121
|
}
|
119
122
|
|
120
123
|
def self.draw_hits(hits, total, filename = nil, options = {})
|
@@ -131,7 +134,7 @@ module RandomWalk
|
|
131
134
|
hits = hits.collect{|h| (h.to_f * size / total).to_i}
|
132
135
|
end
|
133
136
|
|
134
|
-
canvas = PNG::Canvas.new size, width, COLORS[bg_color]
|
137
|
+
canvas = PNG::Canvas.new size, width, COLORS[bg_color] || PNG::Color.from(bg_color)
|
135
138
|
|
136
139
|
sections.each{|color, info|
|
137
140
|
start = info[0]
|
@@ -156,3 +159,35 @@ module RandomWalk
|
|
156
159
|
end
|
157
160
|
end
|
158
161
|
end
|
162
|
+
|
163
|
+
module OrderedList
|
164
|
+
def self.hits(list, set)
|
165
|
+
set = Set.new(set) unless Set === set
|
166
|
+
hits = []
|
167
|
+
list.each_with_index do |e,i|
|
168
|
+
hits << i if set.include? e
|
169
|
+
end
|
170
|
+
hits
|
171
|
+
end
|
172
|
+
|
173
|
+
def self.draw_hits(list, set, filename = nil, options = {})
|
174
|
+
hits = OrderedList.hits(list, set)
|
175
|
+
RandomWalk.draw_hits(hits, list.length, filename, options)
|
176
|
+
end
|
177
|
+
|
178
|
+
def hits(set)
|
179
|
+
OrderedList.hits(self, set)
|
180
|
+
end
|
181
|
+
|
182
|
+
def draw_hits(set, filename = nil, options = {})
|
183
|
+
OrderedList.draw_hits(self, set, filename, options)
|
184
|
+
end
|
185
|
+
|
186
|
+
def pvalue(set, options = {})
|
187
|
+
options = Misc.add_defaults options, :permutations => 1000, :missing => 0
|
188
|
+
hits = hits(set.compact)
|
189
|
+
score = RandomWalk.score(hits.sort, self.length, 0)
|
190
|
+
permutations = RandomWalk.permutations(set.length, self.length, options[:missing], options[:permutations])
|
191
|
+
RandomWalk.pvalue(permutations, score)
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rbbt/tsv'
|
2
|
+
|
3
|
+
module RankProduct
|
4
|
+
def self.score(gene_ranks, signature_sizes)
|
5
|
+
scores = {}
|
6
|
+
log_sizes = signature_sizes.collect{|size| Math::log(size)}
|
7
|
+
gene_ranks.each{|gene, positions|
|
8
|
+
scores[gene] = positions.zip(log_sizes).
|
9
|
+
collect{|p| Math::log(p[0]) - p[1]}. # Take log and substract from size (normalize)
|
10
|
+
inject(0){|acc, v| acc += v }
|
11
|
+
}
|
12
|
+
scores
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.permutations(num_signatures, num = 1000)
|
16
|
+
scores = []
|
17
|
+
num.times{
|
18
|
+
value = 0
|
19
|
+
num_signatures.times{|size_and_log|
|
20
|
+
value += Math::log(rand)
|
21
|
+
}
|
22
|
+
scores << value
|
23
|
+
}
|
24
|
+
scores
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.permutations_full(signature_sizes)
|
28
|
+
gene_ranks = {}
|
29
|
+
signature_sizes.each{|size|
|
30
|
+
(1..size).to_a.shuffle.each_with_index{|gene, pos|
|
31
|
+
gene_ranks[gene] ||= []
|
32
|
+
gene_ranks[gene] << pos + 1
|
33
|
+
}
|
34
|
+
}
|
35
|
+
gene_ranks.delete_if{|code, positions| positions.length != signature_sizes.length}
|
36
|
+
|
37
|
+
scores = score(gene_ranks, signature_sizes)
|
38
|
+
scores.values
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
module TSV
|
43
|
+
def rank_product(fields, reverse = false, &block)
|
44
|
+
tsv = self.slice(fields)
|
45
|
+
|
46
|
+
if block_given?
|
47
|
+
scores = fields.collect{|field| tsv.sort_by(field, true, &block)}
|
48
|
+
else
|
49
|
+
scores = fields.collect{|field| tsv.sort_by(field, true){|gene,values| tsv.type == :double ? values.first.to_f : value.to_f}}
|
50
|
+
end
|
51
|
+
positions = {}
|
52
|
+
|
53
|
+
if reverse
|
54
|
+
size = self.size
|
55
|
+
tsv.keys.each do |entity|
|
56
|
+
positions[entity] = scores.collect{|list| size - list.index(entity)}
|
57
|
+
end
|
58
|
+
else
|
59
|
+
tsv.keys.each do |entity|
|
60
|
+
positions[entity] = scores.collect{|list| list.index(entity) + 1}
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
score = RankProduct.score(positions, fields.collect{ tsv.size })
|
65
|
+
|
66
|
+
score
|
67
|
+
end
|
68
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,8 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
19
|
-
default_executable:
|
18
|
+
date: 2012-01-13 00:00:00 Z
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
22
21
|
name: rbbt-util
|
@@ -33,7 +32,7 @@ dependencies:
|
|
33
32
|
type: :runtime
|
34
33
|
version_requirements: *id001
|
35
34
|
- !ruby/object:Gem::Dependency
|
36
|
-
name:
|
35
|
+
name: RubyInline
|
37
36
|
prerelease: false
|
38
37
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
38
|
none: false
|
@@ -47,7 +46,7 @@ dependencies:
|
|
47
46
|
type: :runtime
|
48
47
|
version_requirements: *id002
|
49
48
|
- !ruby/object:Gem::Dependency
|
50
|
-
name:
|
49
|
+
name: priority_queue
|
51
50
|
prerelease: false
|
52
51
|
requirement: &id003 !ruby/object:Gem::Requirement
|
53
52
|
none: false
|
@@ -60,20 +59,6 @@ dependencies:
|
|
60
59
|
version: "0"
|
61
60
|
type: :runtime
|
62
61
|
version_requirements: *id003
|
63
|
-
- !ruby/object:Gem::Dependency
|
64
|
-
name: priority_queue
|
65
|
-
prerelease: false
|
66
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
-
none: false
|
68
|
-
requirements:
|
69
|
-
- - ">="
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
hash: 3
|
72
|
-
segments:
|
73
|
-
- 0
|
74
|
-
version: "0"
|
75
|
-
type: :runtime
|
76
|
-
version_requirements: *id004
|
77
62
|
description: Data-mining and statistics
|
78
63
|
email: miguel.vazquez@fdi.ucm.es
|
79
64
|
executables: []
|
@@ -88,6 +73,7 @@ files:
|
|
88
73
|
- lib/rbbt/statistics/fdr.rb
|
89
74
|
- lib/rbbt/statistics/hypergeometric.rb
|
90
75
|
- lib/rbbt/statistics/random_walk.rb
|
76
|
+
- lib/rbbt/statistics/rank_product.rb
|
91
77
|
- lib/rbbt/vector/model.rb
|
92
78
|
- lib/rbbt/vector/model/svm.rb
|
93
79
|
- test/rbbt/statistics/test_fdr.rb
|
@@ -96,7 +82,6 @@ files:
|
|
96
82
|
- test/rbbt/vector/model/test_svm.rb
|
97
83
|
- test/rbbt/network/test_paths.rb
|
98
84
|
- test/test_helper.rb
|
99
|
-
has_rdoc: true
|
100
85
|
homepage: http://github.com/mikisvaz/rbbt-phgx
|
101
86
|
licenses: []
|
102
87
|
|
@@ -126,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
126
111
|
requirements: []
|
127
112
|
|
128
113
|
rubyforge_project:
|
129
|
-
rubygems_version: 1.
|
114
|
+
rubygems_version: 1.8.10
|
130
115
|
signing_key:
|
131
116
|
specification_version: 3
|
132
117
|
summary: Data-mining and statistics
|