rbbt-dm 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -97,10 +97,13 @@ module TSV
|
|
97
97
|
fields ||= self.fields
|
98
98
|
fields = [fields] if String === fields or Symbol === fields
|
99
99
|
|
100
|
-
Persist.persist(filename, :
|
100
|
+
Persist.persist(filename, :yaml, :fields => fields, :persist => persistence, :prefix => "Hyp.Geo.Counts") do
|
101
101
|
data ||= Hash.new(0)
|
102
|
-
|
103
|
-
|
102
|
+
|
103
|
+
with_unnamed do
|
104
|
+
through :key, fields do |key, values|
|
105
|
+
values.flatten.compact.uniq.each{|value| data[value] += 1}
|
106
|
+
end
|
104
107
|
end
|
105
108
|
|
106
109
|
data
|
@@ -108,40 +111,70 @@ module TSV
|
|
108
111
|
end
|
109
112
|
|
110
113
|
def enrichment(list, fields = nil, options = {})
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
+
with_unnamed do
|
115
|
+
fields ||= self.fields.first
|
116
|
+
options = Misc.add_defaults options, :min_support => 3, :fdr => true, :cutoff => false, :add_keys => true
|
117
|
+
|
118
|
+
add_keys = Misc.process_options options, :add_keys
|
119
|
+
|
120
|
+
Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
|
121
|
+
|
122
|
+
selected = select :key => list
|
123
|
+
|
124
|
+
tsv_size = keys.length
|
125
|
+
total = selected.keys.length
|
126
|
+
Log.debug "Found #{total} of #{list.length} entities"
|
127
|
+
|
128
|
+
counts = annotation_counts fields, options[:persist]
|
129
|
+
|
130
|
+
annotations = Hash.new
|
131
|
+
annotation_keys = Hash.new
|
132
|
+
selected.with_unnamed do
|
133
|
+
selected.through :key, fields do |key, values|
|
134
|
+
values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
|
135
|
+
value = value.dup
|
136
|
+
annotations[value] ||= 0
|
137
|
+
annotations[value] += 1
|
138
|
+
next unless add_keys
|
139
|
+
annotation_keys[value] ||= []
|
140
|
+
annotation_keys[value] << key
|
141
|
+
}
|
142
|
+
end
|
143
|
+
end
|
114
144
|
|
115
|
-
|
145
|
+
pvalues = {}
|
146
|
+
annotations.each do |annotation, count|
|
147
|
+
next if count < options[:min_support] or not counts.include? annotation
|
148
|
+
pvalues[annotation] = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
|
149
|
+
end
|
116
150
|
|
117
|
-
|
118
|
-
total = selected.keys.length
|
119
|
-
Log.debug "Found #{total} of #{list.length} entities"
|
151
|
+
FDR.adjust_hash! pvalues if options[:fdr]
|
120
152
|
|
121
|
-
|
153
|
+
pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
|
122
154
|
|
123
|
-
|
124
|
-
with_unnamed do
|
125
|
-
selected.through :key, fields do |key, values|
|
126
|
-
values.flatten.compact.uniq.reject{|value| value.empty?}.each{|value|
|
127
|
-
annotations[value] += 1
|
128
|
-
}
|
129
|
-
end
|
130
|
-
end
|
155
|
+
TSV.setup(pvalues, :key_field => fields, :fields => ["p-value"], :cast => :to_f, :type => :single)
|
131
156
|
|
132
|
-
|
133
|
-
|
134
|
-
next if count < options[:min_support] or not counts.include? annotation
|
135
|
-
pvalue = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
|
136
|
-
pvalues[annotation] = pvalue
|
137
|
-
end
|
157
|
+
if add_keys
|
158
|
+
tsv = TSV.setup(pvalues.keys, :key_field => fields, :fields => [], :type => :double)
|
138
159
|
|
139
|
-
|
140
|
-
|
160
|
+
tsv.add_field 'p-value' do |annot, values|
|
161
|
+
[pvalues[annot]]
|
162
|
+
end
|
141
163
|
|
142
|
-
|
164
|
+
tsv.add_field self.key_field do |annot, values|
|
165
|
+
if list.respond_to? :annotate
|
166
|
+
list.annotate annotation_keys[annot]
|
167
|
+
else
|
168
|
+
annotation_keys[annot]
|
169
|
+
end
|
170
|
+
end
|
143
171
|
|
144
|
-
|
172
|
+
tsv
|
173
|
+
else
|
174
|
+
pvalues
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|
145
178
|
end
|
146
179
|
|
147
180
|
def enrichment_for(tsv, field, options = {} )
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require 'png'
|
2
2
|
require 'inline'
|
3
|
+
require 'set'
|
3
4
|
|
4
5
|
module RandomWalk
|
5
6
|
|
@@ -89,8 +90,10 @@ module RandomWalk
|
|
89
90
|
if size == 0
|
90
91
|
[0] * times
|
91
92
|
else
|
93
|
+
a = (0..total - 1).to_a
|
92
94
|
(1..times).collect do
|
93
|
-
|
95
|
+
a.shuffle!
|
96
|
+
score(a[1..size].sort, total, missing).abs
|
94
97
|
end
|
95
98
|
end
|
96
99
|
end
|
@@ -114,7 +117,7 @@ module RandomWalk
|
|
114
117
|
:green => PNG::Color::Green,
|
115
118
|
:white => PNG::Color::White,
|
116
119
|
:black => PNG::Color::Black,
|
117
|
-
|
120
|
+
:gray => PNG::Color::Gray,
|
118
121
|
}
|
119
122
|
|
120
123
|
def self.draw_hits(hits, total, filename = nil, options = {})
|
@@ -131,7 +134,7 @@ module RandomWalk
|
|
131
134
|
hits = hits.collect{|h| (h.to_f * size / total).to_i}
|
132
135
|
end
|
133
136
|
|
134
|
-
canvas = PNG::Canvas.new size, width, COLORS[bg_color]
|
137
|
+
canvas = PNG::Canvas.new size, width, COLORS[bg_color] || PNG::Color.from(bg_color)
|
135
138
|
|
136
139
|
sections.each{|color, info|
|
137
140
|
start = info[0]
|
@@ -156,3 +159,35 @@ module RandomWalk
|
|
156
159
|
end
|
157
160
|
end
|
158
161
|
end
|
162
|
+
|
163
|
+
module OrderedList
|
164
|
+
def self.hits(list, set)
|
165
|
+
set = Set.new(set) unless Set === set
|
166
|
+
hits = []
|
167
|
+
list.each_with_index do |e,i|
|
168
|
+
hits << i if set.include? e
|
169
|
+
end
|
170
|
+
hits
|
171
|
+
end
|
172
|
+
|
173
|
+
def self.draw_hits(list, set, filename = nil, options = {})
|
174
|
+
hits = OrderedList.hits(list, set)
|
175
|
+
RandomWalk.draw_hits(hits, list.length, filename, options)
|
176
|
+
end
|
177
|
+
|
178
|
+
def hits(set)
|
179
|
+
OrderedList.hits(self, set)
|
180
|
+
end
|
181
|
+
|
182
|
+
def draw_hits(set, filename = nil, options = {})
|
183
|
+
OrderedList.draw_hits(self, set, filename, options)
|
184
|
+
end
|
185
|
+
|
186
|
+
def pvalue(set, options = {})
|
187
|
+
options = Misc.add_defaults options, :permutations => 1000, :missing => 0
|
188
|
+
hits = hits(set.compact)
|
189
|
+
score = RandomWalk.score(hits.sort, self.length, 0)
|
190
|
+
permutations = RandomWalk.permutations(set.length, self.length, options[:missing], options[:permutations])
|
191
|
+
RandomWalk.pvalue(permutations, score)
|
192
|
+
end
|
193
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'rbbt/tsv'
|
2
|
+
|
3
|
+
module RankProduct
|
4
|
+
def self.score(gene_ranks, signature_sizes)
|
5
|
+
scores = {}
|
6
|
+
log_sizes = signature_sizes.collect{|size| Math::log(size)}
|
7
|
+
gene_ranks.each{|gene, positions|
|
8
|
+
scores[gene] = positions.zip(log_sizes).
|
9
|
+
collect{|p| Math::log(p[0]) - p[1]}. # Take log and substract from size (normalize)
|
10
|
+
inject(0){|acc, v| acc += v }
|
11
|
+
}
|
12
|
+
scores
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.permutations(num_signatures, num = 1000)
|
16
|
+
scores = []
|
17
|
+
num.times{
|
18
|
+
value = 0
|
19
|
+
num_signatures.times{|size_and_log|
|
20
|
+
value += Math::log(rand)
|
21
|
+
}
|
22
|
+
scores << value
|
23
|
+
}
|
24
|
+
scores
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.permutations_full(signature_sizes)
|
28
|
+
gene_ranks = {}
|
29
|
+
signature_sizes.each{|size|
|
30
|
+
(1..size).to_a.shuffle.each_with_index{|gene, pos|
|
31
|
+
gene_ranks[gene] ||= []
|
32
|
+
gene_ranks[gene] << pos + 1
|
33
|
+
}
|
34
|
+
}
|
35
|
+
gene_ranks.delete_if{|code, positions| positions.length != signature_sizes.length}
|
36
|
+
|
37
|
+
scores = score(gene_ranks, signature_sizes)
|
38
|
+
scores.values
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
module TSV
|
43
|
+
def rank_product(fields, reverse = false, &block)
|
44
|
+
tsv = self.slice(fields)
|
45
|
+
|
46
|
+
if block_given?
|
47
|
+
scores = fields.collect{|field| tsv.sort_by(field, true, &block)}
|
48
|
+
else
|
49
|
+
scores = fields.collect{|field| tsv.sort_by(field, true){|gene,values| tsv.type == :double ? values.first.to_f : value.to_f}}
|
50
|
+
end
|
51
|
+
positions = {}
|
52
|
+
|
53
|
+
if reverse
|
54
|
+
size = self.size
|
55
|
+
tsv.keys.each do |entity|
|
56
|
+
positions[entity] = scores.collect{|list| size - list.index(entity)}
|
57
|
+
end
|
58
|
+
else
|
59
|
+
tsv.keys.each do |entity|
|
60
|
+
positions[entity] = scores.collect{|list| list.index(entity) + 1}
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
score = RankProduct.score(positions, fields.collect{ tsv.size })
|
65
|
+
|
66
|
+
score
|
67
|
+
end
|
68
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-dm
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Miguel Vazquez
|
@@ -15,8 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
19
|
-
default_executable:
|
18
|
+
date: 2012-01-13 00:00:00 Z
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
22
21
|
name: rbbt-util
|
@@ -33,7 +32,7 @@ dependencies:
|
|
33
32
|
type: :runtime
|
34
33
|
version_requirements: *id001
|
35
34
|
- !ruby/object:Gem::Dependency
|
36
|
-
name:
|
35
|
+
name: RubyInline
|
37
36
|
prerelease: false
|
38
37
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
38
|
none: false
|
@@ -47,7 +46,7 @@ dependencies:
|
|
47
46
|
type: :runtime
|
48
47
|
version_requirements: *id002
|
49
48
|
- !ruby/object:Gem::Dependency
|
50
|
-
name:
|
49
|
+
name: priority_queue
|
51
50
|
prerelease: false
|
52
51
|
requirement: &id003 !ruby/object:Gem::Requirement
|
53
52
|
none: false
|
@@ -60,20 +59,6 @@ dependencies:
|
|
60
59
|
version: "0"
|
61
60
|
type: :runtime
|
62
61
|
version_requirements: *id003
|
63
|
-
- !ruby/object:Gem::Dependency
|
64
|
-
name: priority_queue
|
65
|
-
prerelease: false
|
66
|
-
requirement: &id004 !ruby/object:Gem::Requirement
|
67
|
-
none: false
|
68
|
-
requirements:
|
69
|
-
- - ">="
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
hash: 3
|
72
|
-
segments:
|
73
|
-
- 0
|
74
|
-
version: "0"
|
75
|
-
type: :runtime
|
76
|
-
version_requirements: *id004
|
77
62
|
description: Data-mining and statistics
|
78
63
|
email: miguel.vazquez@fdi.ucm.es
|
79
64
|
executables: []
|
@@ -88,6 +73,7 @@ files:
|
|
88
73
|
- lib/rbbt/statistics/fdr.rb
|
89
74
|
- lib/rbbt/statistics/hypergeometric.rb
|
90
75
|
- lib/rbbt/statistics/random_walk.rb
|
76
|
+
- lib/rbbt/statistics/rank_product.rb
|
91
77
|
- lib/rbbt/vector/model.rb
|
92
78
|
- lib/rbbt/vector/model/svm.rb
|
93
79
|
- test/rbbt/statistics/test_fdr.rb
|
@@ -96,7 +82,6 @@ files:
|
|
96
82
|
- test/rbbt/vector/model/test_svm.rb
|
97
83
|
- test/rbbt/network/test_paths.rb
|
98
84
|
- test/test_helper.rb
|
99
|
-
has_rdoc: true
|
100
85
|
homepage: http://github.com/mikisvaz/rbbt-phgx
|
101
86
|
licenses: []
|
102
87
|
|
@@ -126,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
126
111
|
requirements: []
|
127
112
|
|
128
113
|
rubyforge_project:
|
129
|
-
rubygems_version: 1.
|
114
|
+
rubygems_version: 1.8.10
|
130
115
|
signing_key:
|
131
116
|
specification_version: 3
|
132
117
|
summary: Data-mining and statistics
|