rbbt-dm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2010-2011 Miguel Vázquez García
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,132 @@
1
+ require 'inline'
2
+
3
+ module FDR
4
+
5
+ # values should be sorted
6
+ def self.step_up_native(values, rate)
7
+ total = values.length
8
+
9
+ last = 0
10
+ values.each_with_index do |value, i|
11
+ if value > rate * (i + 1).to_f / total
12
+ return last
13
+ end
14
+ last = value
15
+ end
16
+ return last
17
+ end
18
+
19
+ # values should be sorted
20
+ def self.adjust_native(values)
21
+ total = values.length.to_f
22
+
23
+ adjusted = []
24
+ last = 1
25
+ values.reverse.each_with_index do |value, i|
26
+ adj = [last, value * total / (total - i )].min
27
+ last = adj
28
+ adjusted << adj
29
+ end
30
+
31
+ adjusted.reverse
32
+ end
33
+
34
+ class << self
35
+ inline do |builder|
36
+ builder.c <<-EOC
37
+ double step_up_fast(VALUE ps, double rate){
38
+ long idx;
39
+ int total = RARRAY(ps)->len;
40
+
41
+ double last_value = 0;
42
+ for (idx = 0; idx < total; idx++){
43
+ double p = (double) RFLOAT(rb_ary_entry(ps, idx))->value;
44
+
45
+ if (p > rate * (double) (idx + 1) / (double) total){
46
+ return last_value;
47
+ }
48
+ last_value = p;
49
+ }
50
+
51
+ return last_value;
52
+ }
53
+
54
+ EOC
55
+
56
+
57
+ builder.c <<-EOC
58
+
59
+ VALUE adjust_fast_self(VALUE ps){
60
+ long idx;
61
+
62
+ int total = RARRAY(ps)->len;
63
+
64
+ VALUE new = rb_ary_new();
65
+
66
+ double last = 1;
67
+ for (idx = total - 1; idx >= 0 ; idx-- ){
68
+ double p = (double) RFLOAT(rb_ary_entry(ps, idx))->value;
69
+
70
+
71
+ p = p * (double) total / (double) (idx + 1);
72
+ if (p > last) p = last;
73
+ last = p;
74
+
75
+ RFLOAT(rb_ary_entry(ps, idx))->value = p;
76
+ }
77
+
78
+ return ps;
79
+ }
80
+ EOC
81
+
82
+ builder.c <<-EOC
83
+ VALUE adjust_fast(VALUE ps){
84
+ long idx;
85
+
86
+ int total = RARRAY(ps)->len;
87
+
88
+ VALUE new = rb_ary_new();
89
+
90
+ double last = 1;
91
+ for (idx = total - 1; idx >= 0 ; idx-- ){
92
+ double p = (double) RFLOAT(rb_ary_entry(ps, idx))->value;
93
+
94
+
95
+ p = p * (double) total / (double) (idx + 1);
96
+ if (p > last) p = last;
97
+ last = p;
98
+
99
+ rb_ary_unshift(new,rb_float_new(p));
100
+ }
101
+
102
+ return new;
103
+ }
104
+ EOC
105
+ end
106
+ end
107
+
108
+ class << self
109
+ alias_method :adjust, :adjust_fast
110
+ alias_method :adjust!, :adjust_fast_self
111
+ alias_method :step_up, :step_up_fast
112
+ end
113
+
114
+ # This will change the values of the floats in situ
115
+ def self.adjust_hash!(data, field = nil)
116
+ keys = []
117
+ values = []
118
+
119
+ data.collect{|key, value| [key, field.nil? ? value : value[field]] }.sort{|a,b|
120
+ a[1] <=> b[1]
121
+ }.each{|p|
122
+ keys << p[0]
123
+ values << p[1]
124
+ }
125
+
126
+ FDR.adjust!(values)
127
+
128
+ data
129
+ end
130
+
131
+ end
132
+
@@ -0,0 +1,146 @@
1
+ require 'inline'
2
+ require 'rbbt/util/tsv'
3
+
4
+ module Hypergeometric
5
+ class << self
6
+ inline do |builder|
7
+ builder.c_raw <<-EOC
8
+ /**
9
+ * Compute log(k!)
10
+ * @param k The value k.
11
+ * @return The result.
12
+ */
13
+ double lFactorial(double k)
14
+ {
15
+ double r = 0;
16
+ int i;
17
+ for(i=2 ; i<=(int)k ; i++)
18
+ {
19
+ r = r + (double)(log((double)i));
20
+ }
21
+ return r;
22
+ }
23
+
24
+
25
+
26
+ /**
27
+ * Compute the log(binom(n,k))
28
+ * @param n The number of possible items.
29
+ * @param k The number of selected items.
30
+ * @return The result.
31
+ */
32
+ double lBinom(double n, double k)
33
+ {
34
+ long i;
35
+ double r = 0;
36
+
37
+ if(n > n-k){
38
+ k = n-k;
39
+ }
40
+
41
+ for(i = (long)n ; i> (n-k) ; i--)
42
+ {
43
+ r = r + log((double)i);
44
+ }
45
+
46
+ r = r - lFactorial(k);
47
+
48
+ return r;
49
+ }
50
+ EOC
51
+
52
+ builder.c <<-EOC
53
+ /**
54
+ * * Compute the Hypergeometric accumulated value.
55
+ * * @param total => total size
56
+ * * @param support => total support
57
+ * * @param list => selected list size
58
+ * * @param found => support
59
+ * * @return The result
60
+ * */
61
+ double hypergeometric(double total, double support, double list, double found)
62
+ {
63
+ double other = total - support;
64
+
65
+ double top = list;
66
+ if(support < list){
67
+ top = support;
68
+ }
69
+
70
+ double log_n_choose_k = lBinom(total,list);
71
+
72
+ double lfoo = lBinom(support,top) + lBinom(other, list-top);
73
+
74
+ double sum = 0;
75
+ int i;
76
+ for (i = (int)top; i >= found; i-- )
77
+ {
78
+ sum = sum + exp(lfoo - log_n_choose_k);
79
+ if ( i > found)
80
+ {
81
+ lfoo = lfoo + log(i / (support - i+1)) + log( (other - list + i) / (list-i+1) );
82
+ }
83
+ }
84
+ return sum;
85
+ }
86
+ EOC
87
+ end
88
+ end
89
+ end
90
+
91
+ class TSV
92
+
93
+ def annotation_counts(fields = nil)
94
+ fields ||= self.fields
95
+ fields = [fields] if String === fields or Symbol === fields
96
+
97
+ annotation_count_cache_file = TSV.get_persistence_file(File.basename(filename) + "_" + fields.inspect, File.expand_path(File.dirname(filename)))
98
+
99
+ if File.exists?(annotation_count_cache_file)
100
+ Log.low "Loading annotation counts from #{ annotation_count_cache_file }"
101
+ TCHash.get(annotation_count_cache_file)
102
+ else
103
+ Log.low "Saving annotation counts to #{ annotation_count_cache_file }"
104
+ hash = TCHash.get(annotation_count_cache_file)
105
+
106
+ counts = Hash.new(0)
107
+ through :main, fields do |key, values|
108
+ values.flatten.compact.uniq.each{|value| counts[value] += 1}
109
+ end
110
+ hash.merge! counts
111
+ end
112
+ end
113
+
114
+ def enrichment(list, fields, options = {})
115
+ options = Misc.add_defaults options, :min_support => 3
116
+ Log.debug "Enrichment analysis of field #{fields.inspect} for #{list.length} entities"
117
+ selected = select :main => list
118
+
119
+ tsv_size = keys.length
120
+ total = selected.keys.length
121
+ Log.debug "Found #{total} of #{list.length} entities"
122
+
123
+ counts = annotation_counts fields
124
+
125
+ annotations = Hash.new 0
126
+ selected.through :main, fields do |key, values|
127
+ values.flatten.compact.uniq.each{|value| annotations[value] += 1}
128
+ end
129
+
130
+ pvalues = {}
131
+ annotations.each do |annotation, count|
132
+ Log.debug "Hypergeometric: #{ annotation } - #{[tsv_size, counts[annotation], total, count].inspect}"
133
+ next if count < options[:min_support]
134
+ pvalue = Hypergeometric.hypergeometric(tsv_size, counts[annotation], total, count)
135
+ pvalues[annotation] = pvalue
136
+ end
137
+
138
+ FDR.adjust_hash! pvalues if options[:fdr]
139
+ pvalues.delete_if{|k, pvalue| pvalue > options[:cutoff] } if options[:cutoff]
140
+
141
+ pvalues
142
+ end
143
+ end
144
+
145
+
146
+
@@ -0,0 +1,41 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'rbbt/statistics/fdr'
3
+ require 'test/unit'
4
+ require 'rsruby'
5
+
6
+ class TestFDR < Test::Unit::TestCase
7
+ def clean(values)
8
+ if Array === values
9
+ values.collect{|v| (v * 10000).to_i.to_f / 10000}
10
+ else
11
+ (values * 10000).to_i.to_f / 10000
12
+ end
13
+ end
14
+
15
+ def copy(values)
16
+ values.collect{|v| v + 0.0}
17
+ end
18
+
19
+ def setup
20
+ @r = RSRuby.instance
21
+ @values = [0.001, 0.002, 0.003, 0.003, 0.003, 0.004, 0.006, 0.07, 0.09]
22
+ @threshold = 0.01
23
+ @r_adj = @r.p_adjust(@values,'BH')
24
+
25
+ end
26
+
27
+ def test_step_up
28
+ assert_equal(0.006, clean(FDR.step_up(@values, @threshold)))
29
+ assert_equal(clean(FDR.step_up_native(@values, @threshold)), clean(FDR.step_up_fast(@values,@threshold)))
30
+ assert_equal(@r_adj.select{|v| v <= @threshold}.length, @values.select{|v| v <= FDR.step_up(@values, @threshold)}.length)
31
+ end
32
+
33
+ def test_adjust
34
+ assert_equal(clean(@r_adj), clean(FDR.adjust_native(@values)))
35
+ assert_equal(clean(FDR.adjust_fast(@values)), clean(FDR.adjust_native(@values)))
36
+
37
+ assert_equal(clean(@r_adj), clean(FDR.adjust_fast_self(copy(@values))))
38
+ end
39
+ end
40
+
41
+
@@ -0,0 +1,45 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../test_helper')
2
+ require 'rbbt/statistics/hypergeometric'
3
+ require 'test/unit'
4
+
5
+ class TestHypergeometric < Test::Unit::TestCase
6
+
7
+ def test_hypergeometric
8
+ assert Hypergeometric.hypergeometric(100, 20, 15,13) < 0.05
9
+ end
10
+
11
+ def test_annotation_counts
12
+ content =<<-EOF
13
+ #Id ValueA ValueB OtherID
14
+ row1 a|aa|aaa b Id1|Id2
15
+ row2 A B Id3
16
+ row3 a C Id4
17
+ EOF
18
+
19
+ TmpFile.with_file(content) do |filename|
20
+ tsv = TSV.new(filename + '#:sep=/\s+/')
21
+ counts = tsv.annotation_counts
22
+ assert_equal 2, counts['a']
23
+ end
24
+ end
25
+
26
+ def test_enrichment
27
+ content =<<-EOF
28
+ #Id ValueA ValueB OtherID
29
+ row1 a|aa|aaa b Id1|Id2
30
+ row2 A B Id3
31
+ row3 a C Id4
32
+ row4 a B Id3
33
+ row5 a B Id3
34
+ row6 A B Id3
35
+ row7 A B Id3
36
+ EOF
37
+
38
+ TmpFile.with_file(content) do |filename|
39
+ tsv = TSV.new(filename + '#:sep=/\s+/')
40
+
41
+ assert_equal %w(a), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue < 0.05 ? annot : nil}.compact
42
+ assert_equal %w(aa aaa), tsv.enrichment(%w(row1 row3 row4 row5), "ValueA").collect{|annot,pvalue| pvalue > 0.05 ? annot : nil}.compact
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,9 @@
1
+ require 'test/unit'
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
4
+
5
+ class Test::Unit::TestCase
6
+ def test_datafile(file)
7
+ File.join(File.dirname(__FILE__), 'data', file)
8
+ end
9
+ end
metadata ADDED
@@ -0,0 +1,87 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rbbt-dm
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - Miguel Vazquez
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-12-20 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: rbbt-util
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Data-mining and statistics
36
+ email: miguel.vazquez@fdi.ucm.es
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ files:
44
+ - LICENSE
45
+ - lib/rbbt/statistics/fdr.rb
46
+ - lib/rbbt/statistics/hypergeometric.rb
47
+ - test/rbbt/statistics/test_fdr.rb
48
+ - test/rbbt/statistics/test_hypergeometric.rb
49
+ - test/test_helper.rb
50
+ has_rdoc: true
51
+ homepage: http://github.com/mikisvaz/rbbt-phgx
52
+ licenses: []
53
+
54
+ post_install_message:
55
+ rdoc_options: []
56
+
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ hash: 3
65
+ segments:
66
+ - 0
67
+ version: "0"
68
+ required_rubygems_version: !ruby/object:Gem::Requirement
69
+ none: false
70
+ requirements:
71
+ - - ">="
72
+ - !ruby/object:Gem::Version
73
+ hash: 3
74
+ segments:
75
+ - 0
76
+ version: "0"
77
+ requirements: []
78
+
79
+ rubyforge_project:
80
+ rubygems_version: 1.3.7
81
+ signing_key:
82
+ specification_version: 3
83
+ summary: Data-mining and statistics
84
+ test_files:
85
+ - test/rbbt/statistics/test_fdr.rb
86
+ - test/rbbt/statistics/test_hypergeometric.rb
87
+ - test/test_helper.rb