mspire 0.6.21 → 0.6.22

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.6.21
1
+ 0.6.22
@@ -0,0 +1,40 @@
1
+
2
+ module MS
3
+ module ErrorRate
4
+ module Decoy
5
+ module_function
6
+
7
+ # this is the # true positives (found by estimating the number of false
8
+ # hits using the # decoy)
9
+ # pi_not is the ratio of decoy hits to the estimated false hits in the
10
+ # target set. A data set with a small fraction of true hits will have a
11
+ # pi_not close to 1. A data set where 40% of the hits are correct
12
+ # should have a pi_not of 0.6.
13
+ # For instance, Spivak uses a fixed pi_not of 0.9 in J. Proteome Res.,
14
+ # 2009, 8 (7), pp 3737–3745
15
+ def precision(num_target, num_decoy, pi_not=1.0)
16
+ num_target_f = num_target.to_f
17
+ num_true_pos = num_target_f - (num_decoy.to_f * pi_not)
18
+ precision =
19
+ if num_target_f == 0.0
20
+ if num_decoy.to_f > 0.0
21
+ 0.0
22
+ else
23
+ 1.0
24
+ end
25
+ else
26
+ num_true_pos/num_target_f
27
+ end
28
+ precision
29
+ end
30
+
31
+ # the false positive predictive rate (sometimes called the false
32
+ # positive rate). This is 1 - precision
33
+ def fppr(num_target, num_decoy, pi_not=1.0)
34
+ 1.0 - precision(num_target, num_decoy, pi_not=1.0)
35
+ end
36
+
37
+ extend(self)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,81 @@
1
+ require 'set'
2
+ require 'ms/error_rate/decoy'
3
+
4
+ module MS
5
+
6
+ module ErrorRate
7
+ # For generating and working with q-value calculations. The q-value is the global false discovery rate when accepting that particular ID. We do not necessarily distinguish here between *how* the FDR is generated (i.e., Storey's pFDR "the occurrence of false positives" vs. Benjamini-Hochberg's FDR "the rate of false positives" [except to prefer Storey when possible] ). The main point is that we sort and threshold based on a global FDR.
8
+ module Qvalue
9
+ module_function
10
+
11
+ # returns a parallel array to target hits with qvalues
12
+ # opts = :z_together true/false (default false) group all charges
13
+ # together.
14
+ # the sort block should sort from worst to best
15
+ # by default, sorting is: {|hit| hit.score} if not provided
16
+ # options also passed through to mixed_target_decoy
17
+ def target_decoy_qvalues(target_hits, decoy_hits, opts={}, &sorting)
18
+ sorting ||= :score
19
+ opts = {:z_together => false}.merge(opts)
20
+ target_set = Set.new(target_hits)
21
+
22
+ # Proc.new doesn't do arity checking
23
+ hit_with_qvalue_pairs = Proc.new do |hits|
24
+ sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
25
+ (target_hits, qvalues) = MS::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
26
+ target_hits.zip(qvalues)
27
+ end
28
+
29
+ all_together = target_hits + decoy_hits
30
+ if !opts[:z_together]
31
+ hit_with_qvalue_pairs.call(all_together)
32
+ else
33
+ all_hits = []
34
+ by_charge = all_together.group_by(&:charge)
35
+ by_charge.each do |charge,hits|
36
+ all_hits.push(*(hit_with_qvalue_pairs.call(hits)))
37
+ end
38
+ all_hits
39
+ end
40
+ end
41
+
42
+ # returns [target_hits, qvalues] (parallel arrays sorted from best hit to
43
+ # worst hit). expects an array-like object of hits sorted from best to worst
44
+ # hit with decoys interspersed and a target_setlike object that responds to
45
+ # :include? for the hit object assumes the hit is a decoy if not found
46
+ # in the target set! if monotonic is false, then the guarantee that
47
+ # qvalues be monotonically increasing is not respected.
48
+ def mixed_target_decoy(best_to_worst, target_setlike, opts={})
49
+ opts = {:monotonic => true}.merge(opts)
50
+ num_target = 0 ; num_decoy = 0
51
+ monotonic = opts[:monotonic]
52
+ target_hits = []
53
+ qvalues = []
54
+ best_to_worst.each do |hit|
55
+ if target_setlike.include?(hit)
56
+ num_target += 1
57
+ precision = MS::ErrorRate::Decoy.precision(num_target, num_decoy)
58
+ target_hits << hit
59
+ qvalues << (1.0 - precision)
60
+ else
61
+ num_decoy += 1
62
+ end
63
+ end
64
+ if opts[:monotonic]
65
+ min_qvalue = qvalues.last
66
+ qvalues = qvalues.reverse.map do |val| # from worst to best score
67
+ if min_qvalue < val
68
+ min_qvalue
69
+ else
70
+ min_qvalue = val
71
+ val
72
+ end
73
+ end.reverse
74
+ end
75
+ [target_hits, qvalues]
76
+ end
77
+
78
+
79
+ end
80
+ end
81
+ end
@@ -45,7 +45,7 @@ module MS
45
45
  # number of spectra
46
46
  # :return_data => false returns a parallel array containing
47
47
  # the peaks associated with each returned point
48
- # :split => :share | :greedy_y see MS::Peak#split
48
+ # :split => false | :share | :greedy_y see MS::Peak#split
49
49
  #
50
50
  # The binning algorithm is the fastest possible algorithm that would allow
51
51
  # for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "mspire"
8
- s.version = "0.6.20"
8
+ s.version = "0.6.22"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["John T. Prince", "Simon Chiang"]
12
- s.date = "2012-02-24"
12
+ s.date = "2012-02-29"
13
13
  s.description = "mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire, merging of ms-* gems"
14
14
  s.email = "jtprince@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -33,6 +33,8 @@ Gem::Specification.new do |s|
33
33
  "lib/ms/cv/param.rb",
34
34
  "lib/ms/cv/paramable.rb",
35
35
  "lib/ms/digester.rb",
36
+ "lib/ms/error_rate/decoy.rb",
37
+ "lib/ms/error_rate/qvalue.rb",
36
38
  "lib/ms/fasta.rb",
37
39
  "lib/ms/ident.rb",
38
40
  "lib/ms/ident/peptide.rb",
@@ -106,12 +108,14 @@ Gem::Specification.new do |s|
106
108
  "lib/obo/unit.rb",
107
109
  "lib/openany.rb",
108
110
  "lib/write_file_or_string.rb",
111
+ "mspire.gemspec",
109
112
  "obo/ims.obo",
110
113
  "obo/ms.obo",
111
114
  "obo/unit.obo",
112
115
  "spec/bin_spec.rb",
113
116
  "spec/ms/cv/param_spec.rb",
114
117
  "spec/ms/digester_spec.rb",
118
+ "spec/ms/error_rate/qvalue_spec.rb",
115
119
  "spec/ms/fasta_spec.rb",
116
120
  "spec/ms/ident/peptide/db_spec.rb",
117
121
  "spec/ms/ident/pepxml/sample_enzyme_spec.rb",
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/error_rate/qvalue'
4
+
5
+ Hit = Struct.new(:score, :charge)
6
+ HitWeird = Struct.new(:some_obscure_score, :charge)
7
+
8
+ describe 'calculating q-values' do
9
+
10
+ before do
11
+ scores = [14,15,13,12,11]
12
+ qvals_expected = [0.5 ,0.0, 2.0/3.0, 3.0/4, 4.0/5]
13
+ @target_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(*pair) }
14
+ @decoy_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(pair.first-0.5, pair.last) }
15
+ @target_hits_weird = scores.zip(Array.new(scores.size, 2)).map {|pair| HitWeird.new(*pair) }
16
+ @decoy_hits_weird = scores.zip(Array.new(scores.size, 2)).map {|pair| HitWeird.new(pair.first-0.5, pair.last) }
17
+ @qval_by_hit = {}
18
+ @target_hits.zip(qvals_expected) {|hit, qval| @qval_by_hit[hit] = qval }
19
+ @target_hits_weird.zip(qvals_expected) {|hit, qval| @qval_by_hit[hit] = qval }
20
+ end
21
+
22
+ it 'can calculate qvalues on target/decoy sets (:score is default)' do
23
+ pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(@target_hits, @decoy_hits)
24
+ pairs.each do |hit, qval|
25
+ @qval_by_hit[hit].should be_within(0.00000001).of(qval)
26
+ end
27
+ end
28
+
29
+ it 'can calculate qvalues on target/decoy sets with custom sorting' do
30
+ pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(@target_hits_weird, @decoy_hits_weird) {|hit| hit.some_obscure_score }
31
+ pairs.each do |hit, qval|
32
+ @qval_by_hit[hit].should be_within(0.00000001).of(qval)
33
+ end
34
+ end
35
+ end
@@ -77,10 +77,8 @@ describe MS::Mzml do
77
77
  end
78
78
 
79
79
  it 'writes MS1 and MS2 spectra' do
80
- # profile and ms_level 1
81
- spec_params = ['MS:1000128', ['MS:1000511', 1]]
82
-
83
- spec1 = MS::Mzml::Spectrum.new('scan=1', params: spec_params) do |spec|
80
+ # params: profile and ms_level 1
81
+ spec1 = MS::Mzml::Spectrum.new('scan=1', params: ['MS:1000128', ['MS:1000511', 1]]) do |spec|
84
82
  spec.data_arrays = [[1,2,3], [4,5,6]]
85
83
  spec.scan_list = MS::Mzml::ScanList.new do |sl|
86
84
  scan = MS::Mzml::Scan.new do |scan|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mspire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.21
4
+ version: 0.6.22
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,11 +10,11 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-02-27 00:00:00.000000000 Z
13
+ date: 2012-02-29 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: nokogiri
17
- requirement: &8601020 !ruby/object:Gem::Requirement
17
+ requirement: &20175840 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '1.5'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *8601020
25
+ version_requirements: *20175840
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: bsearch
28
- requirement: &8615260 !ruby/object:Gem::Requirement
28
+ requirement: &20175280 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 1.5.0
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *8615260
36
+ version_requirements: *20175280
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: andand
39
- requirement: &8612840 !ruby/object:Gem::Requirement
39
+ requirement: &20174800 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 1.3.1
45
45
  type: :runtime
46
46
  prerelease: false
47
- version_requirements: *8612840
47
+ version_requirements: *20174800
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: obo
50
- requirement: &8611480 !ruby/object:Gem::Requirement
50
+ requirement: &20174320 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ! '>='
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 0.1.0
56
56
  type: :runtime
57
57
  prerelease: false
58
- version_requirements: *8611480
58
+ version_requirements: *20174320
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: rspec
61
- requirement: &8609880 !ruby/object:Gem::Requirement
61
+ requirement: &20173800 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ~>
@@ -66,10 +66,10 @@ dependencies:
66
66
  version: '2.6'
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *8609880
69
+ version_requirements: *20173800
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: jeweler
72
- requirement: &8624220 !ruby/object:Gem::Requirement
72
+ requirement: &20173240 !ruby/object:Gem::Requirement
73
73
  none: false
74
74
  requirements:
75
75
  - - ~>
@@ -77,10 +77,10 @@ dependencies:
77
77
  version: 1.5.2
78
78
  type: :development
79
79
  prerelease: false
80
- version_requirements: *8624220
80
+ version_requirements: *20173240
81
81
  - !ruby/object:Gem::Dependency
82
82
  name: rcov
83
- requirement: &8617960 !ruby/object:Gem::Requirement
83
+ requirement: &20172740 !ruby/object:Gem::Requirement
84
84
  none: false
85
85
  requirements:
86
86
  - - ! '>='
@@ -88,7 +88,7 @@ dependencies:
88
88
  version: '0'
89
89
  type: :development
90
90
  prerelease: false
91
- version_requirements: *8617960
91
+ version_requirements: *20172740
92
92
  description: mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire,
93
93
  merging of ms-* gems
94
94
  email: jtprince@gmail.com
@@ -114,6 +114,8 @@ files:
114
114
  - lib/ms/cv/param.rb
115
115
  - lib/ms/cv/paramable.rb
116
116
  - lib/ms/digester.rb
117
+ - lib/ms/error_rate/decoy.rb
118
+ - lib/ms/error_rate/qvalue.rb
117
119
  - lib/ms/fasta.rb
118
120
  - lib/ms/ident.rb
119
121
  - lib/ms/ident/peptide.rb
@@ -194,6 +196,7 @@ files:
194
196
  - spec/bin_spec.rb
195
197
  - spec/ms/cv/param_spec.rb
196
198
  - spec/ms/digester_spec.rb
199
+ - spec/ms/error_rate/qvalue_spec.rb
197
200
  - spec/ms/fasta_spec.rb
198
201
  - spec/ms/ident/peptide/db_spec.rb
199
202
  - spec/ms/ident/pepxml/sample_enzyme_spec.rb