mspire 0.6.21 → 0.6.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.6.21
1
+ 0.6.22
@@ -0,0 +1,40 @@
1
+
2
+ module MS
3
+ module ErrorRate
4
+ module Decoy
5
+ module_function
6
+
7
+ # this is the # true positives (found by estimating the number of false
8
+ # hits using the # decoy)
9
+ # pi_not is the ratio of decoy hits to the estimated false hits in the
10
+ # target set. A data set with a small fraction of true hits will have a
11
+ # pi_not close to 1. A data set where 40% of the hits are correct
12
+ # should have a pi_not of 0.6.
13
+ # For instance, Spivak uses a fixed pi_not of 0.9 in J. Proteome Res.,
14
+ # 2009, 8 (7), pp 3737–3745
15
+ def precision(num_target, num_decoy, pi_not=1.0)
16
+ num_target_f = num_target.to_f
17
+ num_true_pos = num_target_f - (num_decoy.to_f * pi_not)
18
+ precision =
19
+ if num_target_f == 0.0
20
+ if num_decoy.to_f > 0.0
21
+ 0.0
22
+ else
23
+ 1.0
24
+ end
25
+ else
26
+ num_true_pos/num_target_f
27
+ end
28
+ precision
29
+ end
30
+
31
+ # the false positive predictive rate (sometimes called the false
32
+ # positive rate). This is 1 - precision
33
+ def fppr(num_target, num_decoy, pi_not=1.0)
34
+ 1.0 - precision(num_target, num_decoy, pi_not=1.0)
35
+ end
36
+
37
+ extend(self)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,81 @@
1
+ require 'set'
2
+ require 'ms/error_rate/decoy'
3
+
4
+ module MS
5
+
6
+ module ErrorRate
7
+ # For generating and working with q-value calculations. The q-value is the global false discovery rate when accepting that particular ID. We do not necessarily distinguish here between *how* the FDR is generated (i.e., Storey's pFDR "the occurrence of false positives" vs. Benjamini-Hochberg's FDR "the rate of false positives" [except to prefer Storey when possible] ). The main point is that we sort and threshold based on a global FDR.
8
+ module Qvalue
9
+ module_function
10
+
11
+ # returns a parallel array to target hits with qvalues
12
+ # opts = :z_together true/false (default false) group all charges
13
+ # together.
14
+ # the sort block should sort from worst to best
15
+ # by default, sorting is: {|hit| hit.score} if not provided
16
+ # options also passed through to mixed_target_decoy
17
+ def target_decoy_qvalues(target_hits, decoy_hits, opts={}, &sorting)
18
+ sorting ||= :score
19
+ opts = {:z_together => false}.merge(opts)
20
+ target_set = Set.new(target_hits)
21
+
22
+ # Proc.new doesn't do arity checking
23
+ hit_with_qvalue_pairs = Proc.new do |hits|
24
+ sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
25
+ (target_hits, qvalues) = MS::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
26
+ target_hits.zip(qvalues)
27
+ end
28
+
29
+ all_together = target_hits + decoy_hits
30
+ if !opts[:z_together]
31
+ hit_with_qvalue_pairs.call(all_together)
32
+ else
33
+ all_hits = []
34
+ by_charge = all_together.group_by(&:charge)
35
+ by_charge.each do |charge,hits|
36
+ all_hits.push(*(hit_with_qvalue_pairs.call(hits)))
37
+ end
38
+ all_hits
39
+ end
40
+ end
41
+
42
+ # returns [target_hits, qvalues] (parallel arrays sorted from best hit to
43
+ # worst hit). expects an array-like object of hits sorted from best to worst
44
+ # hit with decoys interspersed and a target_setlike object that responds to
45
+ # :include? for the hit object assumes the hit is a decoy if not found
46
+ # in the target set! if monotonic is false, then the guarantee that
47
+ # qvalues be monotonically increasing is not respected.
48
+ def mixed_target_decoy(best_to_worst, target_setlike, opts={})
49
+ opts = {:monotonic => true}.merge(opts)
50
+ num_target = 0 ; num_decoy = 0
51
+ monotonic = opts[:monotonic]
52
+ target_hits = []
53
+ qvalues = []
54
+ best_to_worst.each do |hit|
55
+ if target_setlike.include?(hit)
56
+ num_target += 1
57
+ precision = MS::ErrorRate::Decoy.precision(num_target, num_decoy)
58
+ target_hits << hit
59
+ qvalues << (1.0 - precision)
60
+ else
61
+ num_decoy += 1
62
+ end
63
+ end
64
+ if opts[:monotonic]
65
+ min_qvalue = qvalues.last
66
+ qvalues = qvalues.reverse.map do |val| # from worst to best score
67
+ if min_qvalue < val
68
+ min_qvalue
69
+ else
70
+ min_qvalue = val
71
+ val
72
+ end
73
+ end.reverse
74
+ end
75
+ [target_hits, qvalues]
76
+ end
77
+
78
+
79
+ end
80
+ end
81
+ end
@@ -45,7 +45,7 @@ module MS
45
45
  # number of spectra
46
46
  # :return_data => false returns a parallel array containing
47
47
  # the peaks associated with each returned point
48
- # :split => :share | :greedy_y see MS::Peak#split
48
+ # :split => false | :share | :greedy_y see MS::Peak#split
49
49
  #
50
50
  # The binning algorithm is the fastest possible algorithm that would allow
51
51
  # for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "mspire"
8
- s.version = "0.6.20"
8
+ s.version = "0.6.22"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["John T. Prince", "Simon Chiang"]
12
- s.date = "2012-02-24"
12
+ s.date = "2012-02-29"
13
13
  s.description = "mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire, merging of ms-* gems"
14
14
  s.email = "jtprince@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -33,6 +33,8 @@ Gem::Specification.new do |s|
33
33
  "lib/ms/cv/param.rb",
34
34
  "lib/ms/cv/paramable.rb",
35
35
  "lib/ms/digester.rb",
36
+ "lib/ms/error_rate/decoy.rb",
37
+ "lib/ms/error_rate/qvalue.rb",
36
38
  "lib/ms/fasta.rb",
37
39
  "lib/ms/ident.rb",
38
40
  "lib/ms/ident/peptide.rb",
@@ -106,12 +108,14 @@ Gem::Specification.new do |s|
106
108
  "lib/obo/unit.rb",
107
109
  "lib/openany.rb",
108
110
  "lib/write_file_or_string.rb",
111
+ "mspire.gemspec",
109
112
  "obo/ims.obo",
110
113
  "obo/ms.obo",
111
114
  "obo/unit.obo",
112
115
  "spec/bin_spec.rb",
113
116
  "spec/ms/cv/param_spec.rb",
114
117
  "spec/ms/digester_spec.rb",
118
+ "spec/ms/error_rate/qvalue_spec.rb",
115
119
  "spec/ms/fasta_spec.rb",
116
120
  "spec/ms/ident/peptide/db_spec.rb",
117
121
  "spec/ms/ident/pepxml/sample_enzyme_spec.rb",
@@ -0,0 +1,35 @@
1
+ require 'spec_helper'
2
+
3
+ require 'ms/error_rate/qvalue'
4
+
5
+ Hit = Struct.new(:score, :charge)
6
+ HitWeird = Struct.new(:some_obscure_score, :charge)
7
+
8
+ describe 'calculating q-values' do
9
+
10
+ before do
11
+ scores = [14,15,13,12,11]
12
+ qvals_expected = [0.5 ,0.0, 2.0/3.0, 3.0/4, 4.0/5]
13
+ @target_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(*pair) }
14
+ @decoy_hits = scores.zip(Array.new(scores.size, 2)).map {|pair| Hit.new(pair.first-0.5, pair.last) }
15
+ @target_hits_weird = scores.zip(Array.new(scores.size, 2)).map {|pair| HitWeird.new(*pair) }
16
+ @decoy_hits_weird = scores.zip(Array.new(scores.size, 2)).map {|pair| HitWeird.new(pair.first-0.5, pair.last) }
17
+ @qval_by_hit = {}
18
+ @target_hits.zip(qvals_expected) {|hit, qval| @qval_by_hit[hit] = qval }
19
+ @target_hits_weird.zip(qvals_expected) {|hit, qval| @qval_by_hit[hit] = qval }
20
+ end
21
+
22
+ it 'can calculate qvalues on target/decoy sets (:score is default)' do
23
+ pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(@target_hits, @decoy_hits)
24
+ pairs.each do |hit, qval|
25
+ @qval_by_hit[hit].should be_within(0.00000001).of(qval)
26
+ end
27
+ end
28
+
29
+ it 'can calculate qvalues on target/decoy sets with custom sorting' do
30
+ pairs = MS::ErrorRate::Qvalue.target_decoy_qvalues(@target_hits_weird, @decoy_hits_weird) {|hit| hit.some_obscure_score }
31
+ pairs.each do |hit, qval|
32
+ @qval_by_hit[hit].should be_within(0.00000001).of(qval)
33
+ end
34
+ end
35
+ end
@@ -77,10 +77,8 @@ describe MS::Mzml do
77
77
  end
78
78
 
79
79
  it 'writes MS1 and MS2 spectra' do
80
- # profile and ms_level 1
81
- spec_params = ['MS:1000128', ['MS:1000511', 1]]
82
-
83
- spec1 = MS::Mzml::Spectrum.new('scan=1', params: spec_params) do |spec|
80
+ # params: profile and ms_level 1
81
+ spec1 = MS::Mzml::Spectrum.new('scan=1', params: ['MS:1000128', ['MS:1000511', 1]]) do |spec|
84
82
  spec.data_arrays = [[1,2,3], [4,5,6]]
85
83
  spec.scan_list = MS::Mzml::ScanList.new do |sl|
86
84
  scan = MS::Mzml::Scan.new do |scan|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mspire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.21
4
+ version: 0.6.22
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,11 +10,11 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-02-27 00:00:00.000000000 Z
13
+ date: 2012-02-29 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: nokogiri
17
- requirement: &8601020 !ruby/object:Gem::Requirement
17
+ requirement: &20175840 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ~>
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '1.5'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *8601020
25
+ version_requirements: *20175840
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: bsearch
28
- requirement: &8615260 !ruby/object:Gem::Requirement
28
+ requirement: &20175280 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,10 +33,10 @@ dependencies:
33
33
  version: 1.5.0
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *8615260
36
+ version_requirements: *20175280
37
37
  - !ruby/object:Gem::Dependency
38
38
  name: andand
39
- requirement: &8612840 !ruby/object:Gem::Requirement
39
+ requirement: &20174800 !ruby/object:Gem::Requirement
40
40
  none: false
41
41
  requirements:
42
42
  - - ! '>='
@@ -44,10 +44,10 @@ dependencies:
44
44
  version: 1.3.1
45
45
  type: :runtime
46
46
  prerelease: false
47
- version_requirements: *8612840
47
+ version_requirements: *20174800
48
48
  - !ruby/object:Gem::Dependency
49
49
  name: obo
50
- requirement: &8611480 !ruby/object:Gem::Requirement
50
+ requirement: &20174320 !ruby/object:Gem::Requirement
51
51
  none: false
52
52
  requirements:
53
53
  - - ! '>='
@@ -55,10 +55,10 @@ dependencies:
55
55
  version: 0.1.0
56
56
  type: :runtime
57
57
  prerelease: false
58
- version_requirements: *8611480
58
+ version_requirements: *20174320
59
59
  - !ruby/object:Gem::Dependency
60
60
  name: rspec
61
- requirement: &8609880 !ruby/object:Gem::Requirement
61
+ requirement: &20173800 !ruby/object:Gem::Requirement
62
62
  none: false
63
63
  requirements:
64
64
  - - ~>
@@ -66,10 +66,10 @@ dependencies:
66
66
  version: '2.6'
67
67
  type: :development
68
68
  prerelease: false
69
- version_requirements: *8609880
69
+ version_requirements: *20173800
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: jeweler
72
- requirement: &8624220 !ruby/object:Gem::Requirement
72
+ requirement: &20173240 !ruby/object:Gem::Requirement
73
73
  none: false
74
74
  requirements:
75
75
  - - ~>
@@ -77,10 +77,10 @@ dependencies:
77
77
  version: 1.5.2
78
78
  type: :development
79
79
  prerelease: false
80
- version_requirements: *8624220
80
+ version_requirements: *20173240
81
81
  - !ruby/object:Gem::Dependency
82
82
  name: rcov
83
- requirement: &8617960 !ruby/object:Gem::Requirement
83
+ requirement: &20172740 !ruby/object:Gem::Requirement
84
84
  none: false
85
85
  requirements:
86
86
  - - ! '>='
@@ -88,7 +88,7 @@ dependencies:
88
88
  version: '0'
89
89
  type: :development
90
90
  prerelease: false
91
- version_requirements: *8617960
91
+ version_requirements: *20172740
92
92
  description: mass spectrometry proteomics, lipidomics, and tools, a rewrite of mspire,
93
93
  merging of ms-* gems
94
94
  email: jtprince@gmail.com
@@ -114,6 +114,8 @@ files:
114
114
  - lib/ms/cv/param.rb
115
115
  - lib/ms/cv/paramable.rb
116
116
  - lib/ms/digester.rb
117
+ - lib/ms/error_rate/decoy.rb
118
+ - lib/ms/error_rate/qvalue.rb
117
119
  - lib/ms/fasta.rb
118
120
  - lib/ms/ident.rb
119
121
  - lib/ms/ident/peptide.rb
@@ -194,6 +196,7 @@ files:
194
196
  - spec/bin_spec.rb
195
197
  - spec/ms/cv/param_spec.rb
196
198
  - spec/ms/digester_spec.rb
199
+ - spec/ms/error_rate/qvalue_spec.rb
197
200
  - spec/ms/fasta_spec.rb
198
201
  - spec/ms/ident/peptide/db_spec.rb
199
202
  - spec/ms/ident/pepxml/sample_enzyme_spec.rb