ms-core 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History ADDED
@@ -0,0 +1,7 @@
1
+
2
+ == version 0.0.1
3
+
4
+ * copy overed from simon's mspire.
5
+ * added functionality to Ms::Spectrum
6
+
7
+
File without changes
File without changes
data/lib/ms/calc.rb CHANGED
@@ -19,14 +19,12 @@ module Ms
19
19
  mz = mz.to_f
20
20
  tol = ppm_tol_at(mz, ppm)
21
21
  mz-tol...mz+tol
22
- end
23
-
22
+ end
24
23
 
25
24
  # Rounds n to the specified precision (ie number of decimal places)
26
- # def round(n, precision)
27
- # factor = 10**precision.to_i
28
- # (n * factor).round.to_f / factor
29
- # end
30
-
25
+ def round(n, precision)
26
+ factor = 10**precision.to_i
27
+ (n * factor).round.to_f / factor
28
+ end
31
29
  end
32
30
  end
data/lib/ms/data.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'ms/data/interleaved'
2
2
  require 'ms/data/transposed'
3
+ require 'ms/data/lazy_string'
3
4
 
4
5
  module Ms
5
6
 
@@ -54,4 +55,4 @@ module Ms
54
55
  send("new_#{type}", data)
55
56
  end
56
57
  end
57
- end
58
+ end
data/lib/ms/id/peptide.rb CHANGED
@@ -28,7 +28,7 @@ module Ms::Id::Peptide
28
28
  when 1 ## this must be a parse error!
29
29
  pieces[0] ## which is the peptide itself
30
30
  else
31
- abort "bad peptide sequence: #{sequence}"
31
+ abort "bad peptide sequence: #{sequence.inspect}"
32
32
  end
33
33
  end
34
34
 
data/lib/ms/mass/aa.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'molecules'
1
2
  require 'ms/mass'
2
3
 
3
4
  module Ms
@@ -14,16 +15,10 @@ module Ms
14
15
  # # or use symbols
15
16
  # MONO[:A] # => 71.0371137878
16
17
  #
17
- # This module is built on masses generated from the excellent {'molecules'
18
+ # This module is built on the excellent {'molecules'
18
19
  # library}[http://github.com/bahuvrihi/molecules/tree/master]. See that
19
- # library for more serious work with masses:
20
- #
21
- # gem install molecules
20
+ # library for more serious work with masses.
22
21
  module AA
23
- Ms::Mass.constants.reject {|v| v == 'AA' }.each do |const|
24
- const_set(const, Ms::Mass.const_get(const))
25
- end
26
-
27
22
  # These are included here to offer maximum functionality
28
23
  MOLECULES_MONO_UNSUPPORTED = {
29
24
  :B => 172.048405, # average of aspartic acid and asparagine
@@ -40,70 +35,10 @@ module Ms
40
35
  #:J => nil,
41
36
  }
42
37
 
43
- # generated from molecules version 0.1.3:
44
- MOLECULES_MONO = {
45
- :A => 71.0371137878,
46
- :C => 103.0091844778,
47
- :D => 115.026943032,
48
- :E => 129.0425930962,
49
- :F => 147.0684139162,
50
- :G => 57.0214637236,
51
- :H => 137.0589118624,
52
- :I => 113.0840639804,
53
- :K => 128.0949630177,
54
- :L => 113.0840639804,
55
- :M => 131.0404846062,
56
- :N => 114.0429274472,
57
- :O => 211.1446528645,
58
- :P => 97.052763852,
59
- :Q => 128.0585775114,
60
- :R => 156.1011110281,
61
- :S => 87.0320284099,
62
- :T => 101.0476784741,
63
- :U => 150.9536355878,
64
- :V => 99.0684139162,
65
- :W => 186.0793129535,
66
- :Y => 163.0633285383,
67
- }
68
-
69
- MONO = MOLECULES_MONO_UNSUPPORTED.merge MOLECULES_MONO
70
-
71
- # generated from molecules version 0.1.3:
72
- MOLECULES_AVG = {
73
- :A => 71.0779,
74
- :C => 103.1429,
75
- :D => 115.0874,
76
- :E => 129.11398,
77
- :F => 147.17386,
78
- :G => 57.05132,
79
- :H => 137.13928,
80
- :I => 113.15764,
81
- :K => 128.17228,
82
- :L => 113.15764,
83
- :M => 131.19606,
84
- :N => 114.10264,
85
- :O => 211.28076,
86
- :P => 97.11518,
87
- :Q => 128.12922,
88
- :R => 156.18568,
89
- :S => 87.0773,
90
- :T => 101.10388,
91
- :U => 150.0379,
92
- :V => 99.13106,
93
- :W => 186.2099,
94
- :Y => 163.17326,
95
- }
96
-
97
- AVG = MOLECULES_AVG_UNSUPPORTED.merge MOLECULES_AVG
98
-
99
- [AVG, MONO].each do |hash|
100
- hash.each {|k,v| hash[k.to_s] = v }
101
- end
102
38
 
103
39
  # returns a hash based on the molecules library of amino acid residues.
104
40
  # type is :mono or :avg
105
41
  def self.mass_index(type=:mono)
106
- require 'molecules'
107
42
  hash = {}
108
43
  ('A'..'Z').each do |letter|
109
44
  if res = Molecules::Libraries::Residue[letter]
@@ -120,14 +55,10 @@ module Ms
120
55
  hash
121
56
  end
122
57
 
123
- # prints a MONO or AVG hash for inclusion in ruby code
124
- # type can be :mono or :avg
125
- def self.print_mass_index(type=:mono)
126
- puts "#{type.to_s.upcase} = {"
127
- mass_index(type).sort.each do |k,v|
128
- puts ":#{k} => #{v},"
129
- end
130
- puts "}"
58
+ MONO = MOLECULES_MONO_UNSUPPORTED.merge( self.mass_index(:mono) )
59
+ AVG = MOLECULES_AVG_UNSUPPORTED.merge( self.mass_index(:avg) )
60
+ [AVG, MONO].each do |hash|
61
+ hash.each {|k,v| hash[k.to_s] = v }
131
62
  end
132
63
 
133
64
  end
data/lib/ms/spectrum.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module Ms
2
2
  class Spectrum
3
+ include Enumerable
4
+
3
5
  # The underlying data store.
4
6
  attr_reader :data
5
7
 
@@ -60,97 +62,99 @@ module Ms
60
62
  Ms::Spectrum.new([self.mzs, self.intensities.map {|v| v / tic }])
61
63
  end
62
64
 
63
- # uses index function and returns the intensity at that value
64
- def intensity_at_mz(mz)
65
- if x = index(mz)
66
- intensities[x]
67
- else
68
- nil
69
- end
70
- end
65
+ ## uses index function and returns the intensity at that value
66
+ #def intensity_at_mz(mz)
67
+ #if x = index(mz)
68
+ #intensities[x]
69
+ #else
70
+ #nil
71
+ #end
72
+ #end
71
73
 
72
- # returns the index of the first value matching that m/z. the argument m/z
73
- # may be less precise than the actual m/z (rounding to the same precision
74
- # given) but must be at least integer precision (after rounding)
75
- # implemented as binary search (bsearch from the web)
76
- def index(mz)
77
- mz_ar = mzs
78
- return_val = nil
79
- ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
80
- if mz_ar[ind] == mz
81
- return_val = ind
82
- else
83
- # do a rounding game to see which one is it, or nil
84
- # find all the values rounding to the same integer in the locale
85
- # test each one fully in turn
86
- mz = mz.to_f
87
- mz_size = mz_ar.size
88
- if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
89
- return_val = ind
90
- else # run the loop
91
- up = ind
92
- loop do
93
- up += 1
94
- if up >= mz_size
95
- break
96
- end
97
- mz_up = mz_ar[up]
98
- if (mz_up.ceil - mz.ceil >= 2)
99
- break
100
- else
101
- if equal_after_rounding?(mz_up, mz)
102
- return_val = up
103
- return return_val
104
- end
105
- end
106
- end
107
- dn= ind
108
- loop do
109
- dn -= 1
110
- if dn < 0
111
- break
112
- end
113
- mz_dn = mz_ar[dn]
114
- if (mz.floor - mz_dn.floor >= 2)
115
- break
116
- else
117
- if equal_after_rounding?(mz_dn, mz)
118
- return_val = dn
119
- return return_val
120
- end
121
- end
122
- end
123
- end
124
- end
125
- return_val
126
- end
74
+ ## index mz, tolerance = :nearest(1), Float, :nearest_within_integer
127
75
 
128
- # less_precise should be a float
129
- # precise should be a float
130
- def equal_after_rounding?(precise, less_precise) # :nodoc:
131
- # determine the precision of less_precise
132
- exp10 = precision_as_neg_int(less_precise)
133
- #puts "EXP10: #{exp10}"
134
- answ = ((precise*exp10).round == (less_precise*exp10).round)
135
- #puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
136
- #puts answ
137
- (precise*exp10).round == (less_precise*exp10).round
138
- end
76
+ ## returns the index of the first value matching that m/z. the argument m/z
77
+ ## may be less precise than the actual m/z (rounding to the same precision
78
+ ## given) but must be at least integer precision (after rounding)
79
+ ## implemented as binary search (bsearch from the web)
80
+ #def index(mz)
81
+ #mz_ar = mzs
82
+ #return_val = nil
83
+ #ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
84
+ #if mz_ar[ind] == mz
85
+ #return_val = ind
86
+ #else
87
+ ## do a rounding game to see which one is it, or nil
88
+ ## find all the values rounding to the same integer in the locale
89
+ ## test each one fully in turn
90
+ #mz = mz.to_f
91
+ #mz_size = mz_ar.size
92
+ #if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
93
+ #return_val = ind
94
+ #else # run the loop
95
+ #up = ind
96
+ #loop do
97
+ #up += 1
98
+ #if up >= mz_size
99
+ #break
100
+ #end
101
+ #mz_up = mz_ar[up]
102
+ #if (mz_up.ceil - mz.ceil >= 2)
103
+ #break
104
+ #else
105
+ #if equal_after_rounding?(mz_up, mz)
106
+ #return_val = up
107
+ #return return_val
108
+ #end
109
+ #end
110
+ #end
111
+ #dn= ind
112
+ #loop do
113
+ #dn -= 1
114
+ #if dn < 0
115
+ #break
116
+ #end
117
+ #mz_dn = mz_ar[dn]
118
+ #if (mz.floor - mz_dn.floor >= 2)
119
+ #break
120
+ #else
121
+ #if equal_after_rounding?(mz_dn, mz)
122
+ #return_val = dn
123
+ #return return_val
124
+ #end
125
+ #end
126
+ #end
127
+ #end
128
+ #end
129
+ #return_val
130
+ #end
139
131
 
140
- # returns 1 for ones place, 10 for tenths, 100 for hundredths
141
- # to a precision exceeding 1e-6
142
- def precision_as_neg_int(float) # :nodoc:
143
- neg_exp10 = 1
144
- loop do
145
- over = float * neg_exp10
146
- rounded = over.round
147
- if (over - rounded).abs <= 1e-6
148
- break
149
- end
150
- neg_exp10 *= 10
151
- end
152
- neg_exp10
153
- end
132
+ ## less_precise should be a float
133
+ ## precise should be a float
134
+ #def equal_after_rounding?(precise, less_precise) # :nodoc:
135
+ ## determine the precision of less_precise
136
+ #exp10 = precision_as_neg_int(less_precise)
137
+ ##puts "EXP10: #{exp10}"
138
+ #answ = ((precise*exp10).round == (less_precise*exp10).round)
139
+ ##puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
140
+ ##puts answ
141
+ #(precise*exp10).round == (less_precise*exp10).round
142
+ #end
143
+
144
+ ## returns 1 for ones place, 10 for tenths, 100 for hundredths
145
+ ## to a precision exceeding 1e-6
146
+ #def precision_as_neg_int(float) # :nodoc:
147
+ #neg_exp10 = 1
148
+ #loop do
149
+ #over = float * neg_exp10
150
+ #rounded = over.round
151
+ #if (over - rounded).abs <= 1e-6
152
+ #break
153
+ #end
154
+ #neg_exp10 *= 10
155
+ #end
156
+ #neg_exp10
157
+ #end
154
158
 
155
159
 
156
160
  end
metadata CHANGED
@@ -1,18 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
- - John Prince
8
7
  - Simon Chiang
8
+ - John Prince
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-05-22 00:00:00 -06:00
13
+ date: 2009-09-08 00:00:00 -06:00
14
14
  default_executable:
15
15
  dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: molecules
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: 0.2.0
25
+ version:
16
26
  - !ruby/object:Gem::Dependency
17
27
  name: tap
18
28
  type: :development
@@ -29,42 +39,43 @@ dependencies:
29
39
  version_requirement:
30
40
  version_requirements: !ruby/object:Gem::Requirement
31
41
  requirements:
32
- - - "="
42
+ - - ">="
33
43
  - !ruby/object:Gem::Version
34
44
  version: 1.3.0
35
45
  version:
36
46
  description:
37
- email: jtprince@gmail.com
47
+ email:
48
+ - jtprince@gmail.com
38
49
  executables: []
39
50
 
40
51
  extensions: []
41
52
 
42
53
  extra_rdoc_files:
43
- - changelog.txt
44
- - LICENSE
45
- - README
54
+ - README.rdoc
55
+ - MIT-LICENSE
56
+ - History
46
57
  files:
58
+ - MIT-LICENSE
59
+ - README.rdoc
60
+ - History
61
+ - lib/ms.rb
62
+ - lib/ms/calc.rb
63
+ - lib/ms/data.rb
64
+ - lib/ms/data/interleaved.rb
65
+ - lib/ms/data/lazy_io.rb
66
+ - lib/ms/data/lazy_string.rb
67
+ - lib/ms/data/simple.rb
68
+ - lib/ms/data/transposed.rb
47
69
  - lib/ms/format/format_error.rb
48
- - lib/ms/id/search.rb
49
70
  - lib/ms/id/peptide.rb
50
71
  - lib/ms/id/protein.rb
72
+ - lib/ms/id/search.rb
73
+ - lib/ms/mass.rb
51
74
  - lib/ms/mass/aa.rb
52
- - lib/ms/data.rb
53
75
  - lib/ms/spectrum.rb
54
76
  - lib/ms/support/binary_search.rb
55
- - lib/ms/mass.rb
56
- - lib/ms/calc.rb
57
- - lib/ms/data/interleaved.rb
58
- - lib/ms/data/simple.rb
59
- - lib/ms/data/lazy_string.rb
60
- - lib/ms/data/transposed.rb
61
- - lib/ms/data/lazy_io.rb
62
- - lib/ms.rb
63
- - changelog.txt
64
- - LICENSE
65
- - README
66
77
  has_rdoc: true
67
- homepage: http://mspire.rubyforge.org/projects/ms-core/
78
+ homepage: http://mspire.rubyforge.org/ms-core/
68
79
  licenses: []
69
80
 
70
81
  post_install_message:
@@ -90,6 +101,6 @@ rubyforge_project: mspire
90
101
  rubygems_version: 1.3.2
91
102
  signing_key:
92
103
  specification_version: 3
93
- summary: the core, shared library for mspire
104
+ summary: basic, shared functionality for mspire libraries
94
105
  test_files: []
95
106
 
data/changelog.txt DELETED
@@ -1,196 +0,0 @@
1
-
2
- == version 0.1.7
3
-
4
- 1. A couple of scripts and subroutines were hashing peptides but not on the file
5
- basename. This would result in slightly incorrect results (any time there
6
- were overlapping scan numbers in multiple datasets, only the top one would be
7
- chosen). The results would be correct for single runs.
8
-
9
- Output files that could be affected:
10
- *.top_per_scan.txt
11
- *.all_peps_per_scan.txt
12
-
13
- Scripts that could be affected:
14
- script/top_hit_per_scan.rb
15
- bin/filter_spec_id.rb
16
- script/filter-peps.rb
17
- bin/id_precision.rb
18
-
19
- Subroutines that were affected:
20
- spec_id.rb (pep_probs_by_* )
21
- spec_id.rb (top_peps_prefilter!)
22
- proph.rb uniq_by_seqcharge
23
- align.rb called uniq_by_seqcharge
24
-
25
-
26
- 2. false_positive_rate.rb and protein_summary.rb (by extension) were using
27
- number of true positives on the x axis while in reality I was plotting the
28
- number of hits. I've updated x axis labels to reflect this change. In
29
- addition, since the term 'false positive rate' has such a distinct definition
30
- in classical ROC plots and binary statistics, I've decided to work primarily
31
- in terms of precision (TP/(TP+FP)). I've purged the terms 'False Positive
32
- Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
33
- called the False Positive Predictive Rate (FPPR). I will probably implement
34
- this in a future release.
35
-
36
- == version 0.2.0
37
-
38
- Revamped the way SpecID works (it is now mixed-in).
39
- Added support for modifications to bioworks_to_pepxml.rb
40
- Can read .srf files (nearly interchangeable with bioworks files)
41
- Redid filter.rb
42
-
43
- == version 0.2.1
44
-
45
- minor bugfix
46
-
47
- == version 0.2.2
48
-
49
- made compatible with Bioworks fasta file reverser and updated tutorial.
50
- Killed classify_by_prefix routine in favor of classify_by_false_flag which has
51
- a prefix option
52
-
53
- == version 0.2.3
54
-
55
- in protein_summary.rb added handling for proteins with no annotation. (either
56
- dispaly NA or use gi2annnot to grab them from NCBI)
57
-
58
- == version 0.2.5
59
-
60
- renamed prep_list in roc (potential breaks in code)
61
-
62
- == version 0.2.6
63
-
64
- 1. Massive refactorization of filtering and validation. Validation objects are
65
- created and then can be used to validate just about anything.
66
- 2. Massive redo of the parsing of MS runs. Can parse mzXML v1, v2.X
67
- (including readw broken output), and mzData (even Thermo's broken output).
68
- 4. Moved all tests to specs (rspec).
69
- 5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
70
- 2.X)
71
-
72
- Bugfixes:
73
- 1. The search_summary 'base_name' in pepxml output was incorrect (this did not
74
- appear to influence our analyses, however). Fixed.
75
- 2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
76
- missed cleavages if the last amino acid was a cut point. Fixed.
77
-
78
- == version 0.2.7
79
-
80
- 1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
81
- Now, the sample enzyme is set explicitly from the params file and the option
82
- is not available. This can give more accuract pepxml files than from
83
- previous depending on your enzyme.
84
-
85
- == version 0.2.9
86
-
87
- 1. Added support for phobius transmembrane predictions
88
- 2. have filter_and_validate.rb working well (multiple validators allowed).
89
- 3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
90
- 4. Added a bias validator
91
-
92
- == version 0.2.10
93
-
94
- 1. Fixed --hits_separate flag in spec_id/filter
95
-
96
- == version 0.2.11
97
-
98
- 1. Added prob precision support and reorganized filter_and_validate libs
99
-
100
- == version 0.2.12
101
-
102
- 1. Fixed bug in transmem for prob and others.
103
- 2. Can use axml (XMLParser based) or libxml depending on availability
104
-
105
- == version 0.2.13
106
-
107
- 1. Fixed issue with --hits_separate
108
- 2. filter_and_validate.rb requires decoy validator if decoy proteins
109
- (refactored code)
110
-
111
- == version 0.2.14
112
-
113
- 1. Can read PeptideProphet files (should be able to read pepxml files, too)
114
- 2. API change: Some slight modifications to the Sequest::PepXML object
115
- interfaces and implementations (using ArrayClass)
116
-
117
- == version 0.2.15
118
-
119
- 1. can convert srf files to sqt files
120
-
121
- == version 0.3.0
122
-
123
- 1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
124
- 2. SQT export is correct and works at least on 3.2 and 3.3.1.
125
-
126
- == version 0.3.1
127
-
128
- 1. Bug fix in srf filtering (num_hits adjusted)
129
-
130
- == version 0.3.2
131
-
132
- 1. Uses sequest peptide_mass_tolerance filter on srf group files by default
133
- now.
134
-
135
- == version 0.3.3
136
-
137
- 1. Worked out minor kinks in prob_precision.rb
138
-
139
- == version 0.3.4
140
-
141
- 1. filters >= +3 charged ions now.
142
-
143
- == version 0.3.5
144
-
145
- 1. fixed creation of background distribution in validators (hash_by base_name,
146
- first_scan, charge now)
147
-
148
- == version 0.3.6
149
-
150
- 1. split off bad_aa_est from bad_aa
151
-
152
- == version 0.3.7
153
-
154
- 1. can deal with No_Enzyme searches now (while still capable of setting
155
- sample_enzyme)
156
-
157
- == version 0.3.8
158
-
159
- 1. can set a decoy to target ratio for decoy validation
160
- 2. added mass calculator in Mass::Calculator
161
-
162
- == version 0.3.9
163
-
164
- 1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
165
-
166
- == version 0.3.10
167
-
168
- 1. added run_percolator.rb script which makes running multiple files easy
169
-
170
- == version 0.3.11
171
-
172
- 1. faster sensing of bad scan tags in mzXML v. 2.0 files
173
- 2. implemented lazy evaluation of spectrum in 2 different ways allowing much
174
- larger files to be parsed
175
-
176
- == version 0.4.0
177
-
178
- 1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
179
- 2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
180
- 3. lazy eval working on mzData
181
- 4. mzData not necessarily guaranteed to have precursor intensities on lazy
182
- eval methos (however, the method intensity_at_mz will still work (causing
183
- evaluation))
184
-
185
- == version 0.4.1
186
-
187
- 1. added support for reading mzXML version 3.0 (may fail in some cases)
188
-
189
- == version 0.4.2
190
-
191
- 1. added MS::MSRun.open method
192
- 2. added method to write dta files from SRF
193
-
194
- == version 0.4.3
195
-
196
- 1. added to_mfg_file from SRF