rbbt-dm 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010-2011 Miguel Vázquez García
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,132 @@
1
+ require 'inline'
2
+
3
+ module FDR
4
+
5
+ # values should be sorted
6
+ def self.step_up_native(values, rate)
7
+ total = values.length
8
+
9
+ last = 0
10
+ values.each_with_index do |value, i|
11
+ if value > rate * (i + 1).to_f / total
12
+ return last
13
+ end
14
+ last = value
15
+ end
16
+ return last
17
+ end
18
+
19
+ # values should be sorted
20
+ def self.adjust_native(values)
21
+ total = values.length.to_f
22
+
23
+ adjusted = []
24
+ last = 1
25
+ values.reverse.each_with_index do |value, i|
26
+ adj = [last, value * total / (total - i )].min
27
+ last = adj
28
+ adjusted << adj
29
+ end
30
+
31
+ adjusted.reverse
32
+ end
33
+
34
+ class << self
35
+ inline do |builder|
36
+ builder.c <<-EOC
37
+ double step_up_fast(VALUE ps, double rate){
38
+ long idx;
39
+ int total = RARRAY(ps)->len;
40
+
41
+ double last_value = 0;
42
+ for (idx = 0; idx < total; idx++){
43
+ double p = (double) RFLOAT(rb_ary_entry(ps, idx))->value;
44
+
45
+ if (p > rate * (double) (idx + 1) / (double) total){
46
+ return last_value;
47
+ }
48
+ last_value = p;
49
+ }
50
+
51
+ return last_value;
52
+ }
53
+
54
+ EOC
55
+
56
+
57
+ builder.c <<-EOC
58
+
59
+ VALUE adjust_fast_self(VALUE ps){
60
+ long idx;
61
+
62
+ int total = RARRAY(ps)->len;
63
+
64
+ VALUE new = rb_ary_new();
65
+
66
+ double last = 1;
67
+ for (idx = total - 1; idx >= 0 ; idx-- ){
68
+ double p = (double) RFLOAT(rb_ary_entry(ps, idx))->value;
69
+
70
+
71
+ p = p * (double) total / (double) (idx + 1);
72
+ if (p > last) p = last;
73
+ last = p;
74
+
75
+ RFLOAT(rb_ary_entry(ps, idx))->value = p;
76
+ }
77
+
78
+ return ps;
79
+ }
80
+ EOC
81
+
82
+ builder.c <<-EOC
83
+ VALUE adjust_fast(VALUE ps){
84
+ long idx;
85
+
86
+ int total = RARRAY(ps)->len;
87
+
88
+ VALUE new = rb_ary_new();
89
+
90
+ double last = 1;
91
+ for (idx = total - 1; idx >= 0 ; idx-- ){
92
+ double p = (double) RFLOAT(rb_ary_entry(ps, idx))->value;
93
+
94
+
95
+ p = p * (double) total / (double) (idx + 1);
96
+ if (p > last) p = last;
97
+ last = p;
98
+
99
+ rb_ary_unshift(new,rb_float_new(p));
100
+ }
101
+
102
+ return new;
103
+ }
104
+ EOC
105
+ end
106
+ end
107
+
108
+ class << self
109
+ alias_method :adjust, :adjust_fast
110
+ alias_method :adjust!, :adjust_fast_self
111
+ alias_method :step_up, :step_up_fast
112
+ end
113
+
114
+ # This will change the values of the floats in situ
115
+ def self.adjust_hash!(data, field = nil)
116
+ keys = []
117
+ values = []
118
+
119
+ data.collect{|key, value| [key, field.nil? ? value : value[field]] }.sort{|a,b|
120
+ a[1] <=> b[1]
121
+ }.each{|p|
122
+ keys << p[0]
123
+ values << p[1]
124
+ }
125
+
126
+ FDR.adjust!(values)
127
+
128
+ data
129
+ end
130
+
131
+ end
132
+
@@ -0,0 +1,146 @@
1
+ require 'inline'
2
+ require 'rbbt/util/tsv'
3
+
4
+ module Hypergeometric
5
+ class << self
6
+ inline do |builder|
7
+ builder.c_raw <<-EOC
8
+ /**
9
+ * Compute log(k!)
10
+ * @param k The value k.
11
+ * @return The result.
12
+ */
13
+ double lFactorial(double k)
14
+ {
15
+ double r = 0;
16
+ int i;
17
+ for(i=2 ; i<=(int)k ; i++)
18
+ {
19
+ r = r + (double)(log((double)i));
20
+ }
21
+ return r;
22
+ }
23
+
24
+
25
+
26
+ /**
27
+ * Compute the log(binom(n,k))
28
+ * @param n The number of possible items.
29
+ * @param k The number of selected items.
30
+ * @return The result.
31
+ */
32
+ double lBinom(double n, double k)
33
+ {
34
+ long i;
35
+ double r = 0;
36
+
37
+ if(n > n-k){
38
+ k = n-k;
39
+ }
40
+
41
+ for(i = (long)n ; i> (n-k) ; i--)
42
+ {
43
+ r = r + log((double)i);
44
+ }
45
+
46
+ r = r - lFactorial(k);
47
+
48
+ return r;
49
+ }
50
+ EOC
51
+
52
+ builder.c <<-EOC
53
+ /**
54
+ * * Compute the Hypergeometric accumulated value.
55
+ * * @param total => total size
56
+ * * @param support => total support
57
+ * * @param list => selected list size
58
+ * * @param found => support
59
+ * * @return The result
60
+ * */
61
+ double hypergeometric(double total, double support, double list, double found)
62
+ {
63
+ double other = total - support;
64
+
65
+ double top = list;
66
+ if(support < list){
67
+ top = support;
68
+ }
69
+
70
+ double log_n_choose_k = lBinom(total,list);
71
+
72
+ double lfoo = lBinom(support,top) + lBinom(other, list-top);
73
+
74
+ double sum = 0;
75
+ int i;
76
+ for (i = (int)top; i >= found; i-- )
77
+ {
78
+ sum = sum + exp(lfoo - log_n_choose_k);
79
+ if ( i > found)
80
+ {
81
+ lfoo = lfoo + log(i / (support - i+1)) + log( (other - list + i) / (list-i+1) );
82
+ }
83
+ }
84
+ return sum;
85
+ }
86
+ EOC
87
+ end
88
+ end
89
+ end
90
+
91
+ class TSV
92
+
93
+ def annotation_counts(fields = nil)
94
+ fields ||= self.fields
95
+ fields = [fields] if String === fields or Symbol === fields
96
+
97
+ annotation_count_cache_file = TSV.get_persistence_file(File.basename(filename) + "_" + fields.inspect, File.expand_path(File.dirname(filename)))
98
+
99
+ if File.exists?(annotation_count_cache_file)
100
+ Log.low "Loading annotation counts from #{ annotation_count_cache_file }"
101
+ TCHash.get(annotation_count_cache_file)
102
+ else
103
+ Log.low "Saving annotation counts to #{ annotation_count_cache_file }"
104
+ hash = TCHash.get(annotation_count_cache_file)
105
+
106
+ counts = Hash.new(0)
107
+ through :main, fields do |key, values|
108
+ values.flatten.compact.uniq.each{|value| counts[value] += 1}
109
+ end
110
+ hash.merge! counts
111
+ end
112
+ end
113
+
114
+ def enrichment(list, fields, options = {})
115
+ options = Misc.add_defaults options, :min_support => 3
116
+ Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
117
+ selected = select :main => list
118
+
119
+ tsv_size = keys.length
120
+ total = selected.keys.length
121
+ Log.debug "Found #{total} of #{list.length} entities"
122
+
123
+ counts = annotation_counts fields
124
+
125
+ annotations = Hash.new 0
126
+ selected.through :main, fields do |key, values|
127
+ values.flatten.compact.uniq.each{|value| annotations[value] += 1}
128
+ end
129
+
130
+ pvalues = {}
131
+ annotations.each do |annotation, count|
132
+ Log.debug "Hypergeometric: #{ annotation } - #{[tsv_size, counts[annotation], total, count].inspect}"
133
+ next if count < options[:min_support]
134
+ pvalue = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
135
+ pvalues[annotation] = pvalue
136
+ end
137
+
138
+ FDR.adjust_hash! pvalues if options[:fdr]
139
+ pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
140
+
141
+ pvalues
142
+ end
143
+ end
144
+
145
+
146
+
@@ -0,0 +1,41 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'rbbt/statistics/fdr'
3
+ require 'test/unit'
4
+ require 'rsruby'
5
+
6
+ class TestFDR < Test::Unit::TestCase
7
+ def clean(values)
8
+ if Array === values
9
+ values.collect{|v| (v * 10000).to_i.to_f / 10000}
10
+ else
11
+ (values * 10000).to_i.to_f / 10000
12
+ end
13
+ end
14
+
15
+ def copy(values)
16
+ values.collect{|v| v + 0.0}
17
+ end
18
+
19
+ def setup
20
+ @r = RSRuby.instance
21
+ @values = [0.001, 0.002, 0.003, 0.003, 0.003, 0.004, 0.006, 0.07, 0.09]
22
+ @threshold = 0.01
23
+ @r_adj = @r.p_adjust(@values,'BH')
24
+
25
+ end
26
+
27
+ def test_step_up
28
+ assert_equal(0.006, clean(FDR.step_up(@values, @threshold)))
29
+ assert_equal(clean(FDR.step_up_native(@values, @threshold)), clean(FDR.step_up_fast(@values,@threshold)))
30
+ assert_equal(@r_adj.select{|v| v <= @threshold}.length, @values.select{|v| v <= FDR.step_up(@values, @threshold)}.length)
31
+ end
32
+
33
+ def test_adjust
34
+ assert_equal(clean(@r_adj), clean(FDR.adjust_native(@values)))
35
+ assert_equal(clean(FDR.adjust_fast(@values)), clean(FDR.adjust_native(@values)))
36
+
37
+ assert_equal(clean(@r_adj), clean(FDR.adjust_fast_self(copy(@values))))
38
+ end
39
+ end
40
+
41
+
@@ -0,0 +1,45 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'rbbt/statistics/hypergeometric'
3
+ require 'test/unit'
4
+
5
+ class TestHypergeometric < Test::Unit::TestCase
6
+
7
+ def test_hypergeometric
8
+ assert Hypergeometric.hypergeometric(100, 20, 15,13) < 0.05
9
+ end
10
+
11
+ def test_annotation_counts
12
+ content =<<-EOF
13
+ #Id ValueA ValueB OtherID
14
+ row1 a|aa|aaa b Id1|Id2
15
+ row2 A B Id3
16
+ row3 a C Id4
17
+ EOF
18
+
19
+ TmpFile.with_file(content) do |filename|
20
+ tsv = TSV.new(filename + '#:sep=/\s+/')
21
+ counts = tsv.annotation_counts
22
+ assert_equal 2, counts['a']
23
+ end
24
+ end
25
+
26
+ def test_enrichment
27
+ content =<<-EOF
28
+ #Id ValueA ValueB OtherID
29
+ row1 a|aa|aaa b Id1|Id2
30
+ row2 A B Id3
31
+ row3 a C Id4
32
+ row4 a B Id3
33
+ row5 a B Id3
34
+ row6 A B Id3
35
+ row7 A B Id3
36
+ EOF
37
+
38
+ TmpFile.with_file(content) do |filename|
39
+ tsv = TSV.new(filename + '#:sep=/\s+/')
40
+
41
+ assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
42
+ assert_equal %w(aa aaa), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue > 0.05 ? annot : nil}.compact
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,9 @@
1
+ require 'test/unit'
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+
5
+ class Test::Unit::TestCase
6
+ def test_datafile(file)
7
+ File.join(File.dirname(__FILE__), 'data', file)
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rbbt-dm
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Miguel Vazquez
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-20 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rbbt-util
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Data-mining and statistics
36
+ email: miguel.vazquez@fdi.ucm.es
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ files:
44
+ - LICENSE
45
+ - lib/rbbt/statistics/fdr.rb
46
+ - lib/rbbt/statistics/hypergeometric.rb
47
+ - test/rbbt/statistics/test_fdr.rb
48
+ - test/rbbt/statistics/test_hypergeometric.rb
49
+ - test/test_helper.rb
50
+ has_rdoc: true
51
+ homepage: http://github.com/mikisvaz/rbbt-phgx
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options: []
56
+
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ hash: 3
65
+ segments:
66
+ - 0
67
+ version: "0"
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ hash: 3
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ requirements: []
78
+
79
+ rubyforge_project:
80
+ rubygems_version: 1.3.7
81
+ signing_key:
82
+ specification_version: 3
83
+ summary: Data-mining and statistics
84
+ test_files:
85
+ - test/rbbt/statistics/test_fdr.rb
86
+ - test/rbbt/statistics/test_hypergeometric.rb
87
+ - test/test_helper.rb