ms-core 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/History ADDED
@@ -0,0 +1,7 @@
1
+
2
+ == version 0.0.1
3
+
4
+ * copy overed from simon's mspire.
5
+ * added functionality to Ms::Spectrum
6
+
7
+
File without changes
File without changes
data/lib/ms/calc.rb CHANGED
@@ -19,14 +19,12 @@ module Ms
19
19
  mz = mz.to_f
20
20
  tol = ppm_tol_at(mz, ppm)
21
21
  mz-tol...mz+tol
22
- end
23
-
22
+ end
24
23
 
25
24
  # Rounds n to the specified precision (ie number of decimal places)
26
- # def round(n, precision)
27
- # factor = 10**precision.to_i
28
- # (n * factor).round.to_f / factor
29
- # end
30
-
25
+ def round(n, precision)
26
+ factor = 10**precision.to_i
27
+ (n * factor).round.to_f / factor
28
+ end
31
29
  end
32
30
  end
data/lib/ms/data.rb CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'ms/data/interleaved'
2
2
  require 'ms/data/transposed'
3
+ require 'ms/data/lazy_string'
3
4
 
4
5
  module Ms
5
6
 
@@ -54,4 +55,4 @@ module Ms
54
55
  send("new_#{type}", data)
55
56
  end
56
57
  end
57
- end
58
+ end
data/lib/ms/id/peptide.rb CHANGED
@@ -28,7 +28,7 @@ module Ms::Id::Peptide
28
28
  when 1 ## this must be a parse error!
29
29
  pieces[0] ## which is the peptide itself
30
30
  else
31
- abort "bad peptide sequence: #{sequence}"
31
+ abort "bad peptide sequence: #{sequence.inspect}"
32
32
  end
33
33
  end
34
34
 
data/lib/ms/mass/aa.rb CHANGED
@@ -1,3 +1,4 @@
1
+ require 'molecules'
1
2
  require 'ms/mass'
2
3
 
3
4
  module Ms
@@ -14,16 +15,10 @@ module Ms
14
15
  # # or use symbols
15
16
  # MONO[:A] # => 71.0371137878
16
17
  #
17
- # This module is built on masses generated from the excellent {'molecules'
18
+ # This module is built on the excellent {'molecules'
18
19
  # library}[http://github.com/bahuvrihi/molecules/tree/master]. See that
19
- # library for more serious work with masses:
20
- #
21
- # gem install molecules
20
+ # library for more serious work with masses.
22
21
  module AA
23
- Ms::Mass.constants.reject {|v| v == 'AA' }.each do |const|
24
- const_set(const, Ms::Mass.const_get(const))
25
- end
26
-
27
22
  # These are included here to offer maximum functionality
28
23
  MOLECULES_MONO_UNSUPPORTED = {
29
24
  :B => 172.048405, # average of aspartic acid and asparagine
@@ -40,70 +35,10 @@ module Ms
40
35
  #:J => nil,
41
36
  }
42
37
 
43
- # generated from molecules version 0.1.3:
44
- MOLECULES_MONO = {
45
- :A => 71.0371137878,
46
- :C => 103.0091844778,
47
- :D => 115.026943032,
48
- :E => 129.0425930962,
49
- :F => 147.0684139162,
50
- :G => 57.0214637236,
51
- :H => 137.0589118624,
52
- :I => 113.0840639804,
53
- :K => 128.0949630177,
54
- :L => 113.0840639804,
55
- :M => 131.0404846062,
56
- :N => 114.0429274472,
57
- :O => 211.1446528645,
58
- :P => 97.052763852,
59
- :Q => 128.0585775114,
60
- :R => 156.1011110281,
61
- :S => 87.0320284099,
62
- :T => 101.0476784741,
63
- :U => 150.9536355878,
64
- :V => 99.0684139162,
65
- :W => 186.0793129535,
66
- :Y => 163.0633285383,
67
- }
68
-
69
- MONO = MOLECULES_MONO_UNSUPPORTED.merge MOLECULES_MONO
70
-
71
- # generated from molecules version 0.1.3:
72
- MOLECULES_AVG = {
73
- :A => 71.0779,
74
- :C => 103.1429,
75
- :D => 115.0874,
76
- :E => 129.11398,
77
- :F => 147.17386,
78
- :G => 57.05132,
79
- :H => 137.13928,
80
- :I => 113.15764,
81
- :K => 128.17228,
82
- :L => 113.15764,
83
- :M => 131.19606,
84
- :N => 114.10264,
85
- :O => 211.28076,
86
- :P => 97.11518,
87
- :Q => 128.12922,
88
- :R => 156.18568,
89
- :S => 87.0773,
90
- :T => 101.10388,
91
- :U => 150.0379,
92
- :V => 99.13106,
93
- :W => 186.2099,
94
- :Y => 163.17326,
95
- }
96
-
97
- AVG = MOLECULES_AVG_UNSUPPORTED.merge MOLECULES_AVG
98
-
99
- [AVG, MONO].each do |hash|
100
- hash.each {|k,v| hash[k.to_s] = v }
101
- end
102
38
 
103
39
  # returns a hash based on the molecules library of amino acid residues.
104
40
  # type is :mono or :avg
105
41
  def self.mass_index(type=:mono)
106
- require 'molecules'
107
42
  hash = {}
108
43
  ('A'..'Z').each do |letter|
109
44
  if res = Molecules::Libraries::Residue[letter]
@@ -120,14 +55,10 @@ module Ms
120
55
  hash
121
56
  end
122
57
 
123
- # prints a MONO or AVG hash for inclusion in ruby code
124
- # type can be :mono or :avg
125
- def self.print_mass_index(type=:mono)
126
- puts "#{type.to_s.upcase} = {"
127
- mass_index(type).sort.each do |k,v|
128
- puts ":#{k} => #{v},"
129
- end
130
- puts "}"
58
+ MONO = MOLECULES_MONO_UNSUPPORTED.merge( self.mass_index(:mono) )
59
+ AVG = MOLECULES_AVG_UNSUPPORTED.merge( self.mass_index(:avg) )
60
+ [AVG, MONO].each do |hash|
61
+ hash.each {|k,v| hash[k.to_s] = v }
131
62
  end
132
63
 
133
64
  end
data/lib/ms/spectrum.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  module Ms
2
2
  class Spectrum
3
+ include Enumerable
4
+
3
5
  # The underlying data store.
4
6
  attr_reader :data
5
7
 
@@ -60,97 +62,99 @@ module Ms
60
62
  Ms::Spectrum.new([self.mzs, self.intensities.map {|v| v / tic }])
61
63
  end
62
64
 
63
- # uses index function and returns the intensity at that value
64
- def intensity_at_mz(mz)
65
- if x = index(mz)
66
- intensities[x]
67
- else
68
- nil
69
- end
70
- end
65
+ ## uses index function and returns the intensity at that value
66
+ #def intensity_at_mz(mz)
67
+ #if x = index(mz)
68
+ #intensities[x]
69
+ #else
70
+ #nil
71
+ #end
72
+ #end
71
73
 
72
- # returns the index of the first value matching that m/z. the argument m/z
73
- # may be less precise than the actual m/z (rounding to the same precision
74
- # given) but must be at least integer precision (after rounding)
75
- # implemented as binary search (bsearch from the web)
76
- def index(mz)
77
- mz_ar = mzs
78
- return_val = nil
79
- ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
80
- if mz_ar[ind] == mz
81
- return_val = ind
82
- else
83
- # do a rounding game to see which one is it, or nil
84
- # find all the values rounding to the same integer in the locale
85
- # test each one fully in turn
86
- mz = mz.to_f
87
- mz_size = mz_ar.size
88
- if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
89
- return_val = ind
90
- else # run the loop
91
- up = ind
92
- loop do
93
- up += 1
94
- if up >= mz_size
95
- break
96
- end
97
- mz_up = mz_ar[up]
98
- if (mz_up.ceil - mz.ceil >= 2)
99
- break
100
- else
101
- if equal_after_rounding?(mz_up, mz)
102
- return_val = up
103
- return return_val
104
- end
105
- end
106
- end
107
- dn= ind
108
- loop do
109
- dn -= 1
110
- if dn < 0
111
- break
112
- end
113
- mz_dn = mz_ar[dn]
114
- if (mz.floor - mz_dn.floor >= 2)
115
- break
116
- else
117
- if equal_after_rounding?(mz_dn, mz)
118
- return_val = dn
119
- return return_val
120
- end
121
- end
122
- end
123
- end
124
- end
125
- return_val
126
- end
74
+ ## index mz, tolerance = :nearest(1), Float, :nearest_within_integer
127
75
 
128
- # less_precise should be a float
129
- # precise should be a float
130
- def equal_after_rounding?(precise, less_precise) # :nodoc:
131
- # determine the precision of less_precise
132
- exp10 = precision_as_neg_int(less_precise)
133
- #puts "EXP10: #{exp10}"
134
- answ = ((precise*exp10).round == (less_precise*exp10).round)
135
- #puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
136
- #puts answ
137
- (precise*exp10).round == (less_precise*exp10).round
138
- end
76
+ ## returns the index of the first value matching that m/z. the argument m/z
77
+ ## may be less precise than the actual m/z (rounding to the same precision
78
+ ## given) but must be at least integer precision (after rounding)
79
+ ## implemented as binary search (bsearch from the web)
80
+ #def index(mz)
81
+ #mz_ar = mzs
82
+ #return_val = nil
83
+ #ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
84
+ #if mz_ar[ind] == mz
85
+ #return_val = ind
86
+ #else
87
+ ## do a rounding game to see which one is it, or nil
88
+ ## find all the values rounding to the same integer in the locale
89
+ ## test each one fully in turn
90
+ #mz = mz.to_f
91
+ #mz_size = mz_ar.size
92
+ #if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
93
+ #return_val = ind
94
+ #else # run the loop
95
+ #up = ind
96
+ #loop do
97
+ #up += 1
98
+ #if up >= mz_size
99
+ #break
100
+ #end
101
+ #mz_up = mz_ar[up]
102
+ #if (mz_up.ceil - mz.ceil >= 2)
103
+ #break
104
+ #else
105
+ #if equal_after_rounding?(mz_up, mz)
106
+ #return_val = up
107
+ #return return_val
108
+ #end
109
+ #end
110
+ #end
111
+ #dn= ind
112
+ #loop do
113
+ #dn -= 1
114
+ #if dn < 0
115
+ #break
116
+ #end
117
+ #mz_dn = mz_ar[dn]
118
+ #if (mz.floor - mz_dn.floor >= 2)
119
+ #break
120
+ #else
121
+ #if equal_after_rounding?(mz_dn, mz)
122
+ #return_val = dn
123
+ #return return_val
124
+ #end
125
+ #end
126
+ #end
127
+ #end
128
+ #end
129
+ #return_val
130
+ #end
139
131
 
140
- # returns 1 for ones place, 10 for tenths, 100 for hundredths
141
- # to a precision exceeding 1e-6
142
- def precision_as_neg_int(float) # :nodoc:
143
- neg_exp10 = 1
144
- loop do
145
- over = float * neg_exp10
146
- rounded = over.round
147
- if (over - rounded).abs <= 1e-6
148
- break
149
- end
150
- neg_exp10 *= 10
151
- end
152
- neg_exp10
153
- end
132
+ ## less_precise should be a float
133
+ ## precise should be a float
134
+ #def equal_after_rounding?(precise, less_precise) # :nodoc:
135
+ ## determine the precision of less_precise
136
+ #exp10 = precision_as_neg_int(less_precise)
137
+ ##puts "EXP10: #{exp10}"
138
+ #answ = ((precise*exp10).round == (less_precise*exp10).round)
139
+ ##puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
140
+ ##puts answ
141
+ #(precise*exp10).round == (less_precise*exp10).round
142
+ #end
143
+
144
+ ## returns 1 for ones place, 10 for tenths, 100 for hundredths
145
+ ## to a precision exceeding 1e-6
146
+ #def precision_as_neg_int(float) # :nodoc:
147
+ #neg_exp10 = 1
148
+ #loop do
149
+ #over = float * neg_exp10
150
+ #rounded = over.round
151
+ #if (over - rounded).abs <= 1e-6
152
+ #break
153
+ #end
154
+ #neg_exp10 *= 10
155
+ #end
156
+ #neg_exp10
157
+ #end
154
158
 
155
159
 
156
160
  end
metadata CHANGED
@@ -1,18 +1,28 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ms-core
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
- - John Prince
8
7
  - Simon Chiang
8
+ - John Prince
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2009-05-22 00:00:00 -06:00
13
+ date: 2009-09-08 00:00:00 -06:00
14
14
  default_executable:
15
15
  dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: molecules
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">="
23
+ - !ruby/object:Gem::Version
24
+ version: 0.2.0
25
+ version:
16
26
  - !ruby/object:Gem::Dependency
17
27
  name: tap
18
28
  type: :development
@@ -29,42 +39,43 @@ dependencies:
29
39
  version_requirement:
30
40
  version_requirements: !ruby/object:Gem::Requirement
31
41
  requirements:
32
- - - "="
42
+ - - ">="
33
43
  - !ruby/object:Gem::Version
34
44
  version: 1.3.0
35
45
  version:
36
46
  description:
37
- email: jtprince@gmail.com
47
+ email:
48
+ - jtprince@gmail.com
38
49
  executables: []
39
50
 
40
51
  extensions: []
41
52
 
42
53
  extra_rdoc_files:
43
- - changelog.txt
44
- - LICENSE
45
- - README
54
+ - README.rdoc
55
+ - MIT-LICENSE
56
+ - History
46
57
  files:
58
+ - MIT-LICENSE
59
+ - README.rdoc
60
+ - History
61
+ - lib/ms.rb
62
+ - lib/ms/calc.rb
63
+ - lib/ms/data.rb
64
+ - lib/ms/data/interleaved.rb
65
+ - lib/ms/data/lazy_io.rb
66
+ - lib/ms/data/lazy_string.rb
67
+ - lib/ms/data/simple.rb
68
+ - lib/ms/data/transposed.rb
47
69
  - lib/ms/format/format_error.rb
48
- - lib/ms/id/search.rb
49
70
  - lib/ms/id/peptide.rb
50
71
  - lib/ms/id/protein.rb
72
+ - lib/ms/id/search.rb
73
+ - lib/ms/mass.rb
51
74
  - lib/ms/mass/aa.rb
52
- - lib/ms/data.rb
53
75
  - lib/ms/spectrum.rb
54
76
  - lib/ms/support/binary_search.rb
55
- - lib/ms/mass.rb
56
- - lib/ms/calc.rb
57
- - lib/ms/data/interleaved.rb
58
- - lib/ms/data/simple.rb
59
- - lib/ms/data/lazy_string.rb
60
- - lib/ms/data/transposed.rb
61
- - lib/ms/data/lazy_io.rb
62
- - lib/ms.rb
63
- - changelog.txt
64
- - LICENSE
65
- - README
66
77
  has_rdoc: true
67
- homepage: http://mspire.rubyforge.org/projects/ms-core/
78
+ homepage: http://mspire.rubyforge.org/ms-core/
68
79
  licenses: []
69
80
 
70
81
  post_install_message:
@@ -90,6 +101,6 @@ rubyforge_project: mspire
90
101
  rubygems_version: 1.3.2
91
102
  signing_key:
92
103
  specification_version: 3
93
- summary: the core, shared library for mspire
104
+ summary: basic, shared functionality for mspire libraries
94
105
  test_files: []
95
106
 
data/changelog.txt DELETED
@@ -1,196 +0,0 @@
1
-
2
- == version 0.1.7
3
-
4
- 1. A couple of scripts and subroutines were hashing peptides but not on the file
5
- basename. This would result in slightly incorrect results (any time there
6
- were overlapping scan numbers in multiple datasets, only the top one would be
7
- chosen). The results would be correct for single runs.
8
-
9
- Output files that could be affected:
10
- *.top_per_scan.txt
11
- *.all_peps_per_scan.txt
12
-
13
- Scripts that could be affected:
14
- script/top_hit_per_scan.rb
15
- bin/filter_spec_id.rb
16
- script/filter-peps.rb
17
- bin/id_precision.rb
18
-
19
- Subroutines that were affected:
20
- spec_id.rb (pep_probs_by_* )
21
- spec_id.rb (top_peps_prefilter!)
22
- proph.rb uniq_by_seqcharge
23
- align.rb called uniq_by_seqcharge
24
-
25
-
26
- 2. false_positive_rate.rb and protein_summary.rb (by extension) were using
27
- number of true positives on the x axis while in reality I was plotting the
28
- number of hits. I've updated x axis labels to reflect this change. In
29
- addition, since the term 'false positive rate' has such a distinct definition
30
- in classical ROC plots and binary statistics, I've decided to work primarily
31
- in terms of precision (TP/(TP+FP)). I've purged the terms 'False Positive
32
- Rate' and 'FPR' from the package. It's been suggested that FP/(TP+FP) be
33
- called the False Positive Predictive Rate (FPPR). I will probably implement
34
- this in a future release.
35
-
36
- == version 0.2.0
37
-
38
- Revamped the way SpecID works (it is now mixed-in).
39
- Added support for modifications to bioworks_to_pepxml.rb
40
- Can read .srf files (nearly interchangeable with bioworks files)
41
- Redid filter.rb
42
-
43
- == version 0.2.1
44
-
45
- minor bugfix
46
-
47
- == version 0.2.2
48
-
49
- made compatible with Bioworks fasta file reverser and updated tutorial.
50
- Killed classify_by_prefix routine in favor of classify_by_false_flag which has
51
- a prefix option
52
-
53
- == version 0.2.3
54
-
55
- in protein_summary.rb added handling for proteins with no annotation. (either
56
- dispaly NA or use gi2annnot to grab them from NCBI)
57
-
58
- == version 0.2.5
59
-
60
- renamed prep_list in roc (potential breaks in code)
61
-
62
- == version 0.2.6
63
-
64
- 1. Massive refactorization of filtering and validation. Validation objects are
65
- created and then can be used to validate just about anything.
66
- 2. Massive redo of the parsing of MS runs. Can parse mzXML v1, v2.X
67
- (including readw broken output), and mzData (even Thermo's broken output).
68
- 4. Moved all tests to specs (rspec).
69
- 5. Can read gradient programs off of .meth or .RAW files (both Xcal 1.X and
70
- 2.X)
71
-
72
- Bugfixes:
73
- 1. The search_summary 'base_name' in pepxml output was incorrect (this did not
74
- appear to influence our analyses, however). Fixed.
75
- 2. Enzymes with no exceptions (e.g., cuts at KR) would report one too many
76
- missed cleavages if the last amino acid was a cut point. Fixed.
77
-
78
- == version 0.2.7
79
-
80
- 1. In conversion from bioworks to pepxml, the default was trypsin (KR/P).
81
- Now, the sample enzyme is set explicitly from the params file and the option
82
- is not available. This can give more accuract pepxml files than from
83
- previous depending on your enzyme.
84
-
85
- == version 0.2.9
86
-
87
- 1. Added support for phobius transmembrane predictions
88
- 2. have filter_and_validate.rb working well (multiple validators allowed).
89
- 3. Can read bioworks 3.3.1 .srf files (.srf version 3.5 files)
90
- 4. Added a bias validator
91
-
92
- == version 0.2.10
93
-
94
- 1. Fixed --hits_separate flag in spec_id/filter
95
-
96
- == version 0.2.11
97
-
98
- 1. Added prob precision support and reorganized filter_and_validate libs
99
-
100
- == version 0.2.12
101
-
102
- 1. Fixed bug in transmem for prob and others.
103
- 2. Can use axml (XMLParser based) or libxml depending on availability
104
-
105
- == version 0.2.13
106
-
107
- 1. Fixed issue with --hits_separate
108
- 2. filter_and_validate.rb requires decoy validator if decoy proteins
109
- (refactored code)
110
-
111
- == version 0.2.14
112
-
113
- 1. Can read PeptideProphet files (should be able to read pepxml files, too)
114
- 2. API change: Some slight modifications to the Sequest::PepXML object
115
- interfaces and implementations (using ArrayClass)
116
-
117
- == version 0.2.15
118
-
119
- 1. can convert srf files to sqt files
120
-
121
- == version 0.3.0
122
-
123
- 1. IMPORTANT BUG FIX: protein reporting in srf files is correct now (proteins after the first protein were being assigned to the last hit in an out file).
124
- 2. SQT export is correct and works at least on 3.2 and 3.3.1.
125
-
126
- == version 0.3.1
127
-
128
- 1. Bug fix in srf filtering (num_hits adjusted)
129
-
130
- == version 0.3.2
131
-
132
- 1. Uses sequest peptide_mass_tolerance filter on srf group files by default
133
- now.
134
-
135
- == version 0.3.3
136
-
137
- 1. Worked out minor kinks in prob_precision.rb
138
-
139
- == version 0.3.4
140
-
141
- 1. filters >= +3 charged ions now.
142
-
143
- == version 0.3.5
144
-
145
- 1. fixed creation of background distribution in validators (hash_by base_name,
146
- first_scan, charge now)
147
-
148
- == version 0.3.6
149
-
150
- 1. split off bad_aa_est from bad_aa
151
-
152
- == version 0.3.7
153
-
154
- 1. can deal with No_Enzyme searches now (while still capable of setting
155
- sample_enzyme)
156
-
157
- == version 0.3.8
158
-
159
- 1. can set a decoy to target ratio for decoy validation
160
- 2. added mass calculator in Mass::Calculator
161
-
162
- == version 0.3.9
163
-
164
- 1. doesn't clobber mzdata filename in ms_to_lmat.rb conversion
165
-
166
- == version 0.3.10
167
-
168
- 1. added run_percolator.rb script which makes running multiple files easy
169
-
170
- == version 0.3.11
171
-
172
- 1. faster sensing of bad scan tags in mzXML v. 2.0 files
173
- 2. implemented lazy evaluation of spectrum in 2 different ways allowing much
174
- larger files to be parsed
175
-
176
- == version 0.4.0
177
-
178
- 1. ** INTERFACE CHANGE: each scan can only have one precursor (used to be an array)
179
- 2. ** INTERFACE CHANGE: spectrum mz and intensity data accessed with mzs and intensities
180
- 3. lazy eval working on mzData
181
- 4. mzData not necessarily guaranteed to have precursor intensities on lazy
182
- eval methos (however, the method intensity_at_mz will still work (causing
183
- evaluation))
184
-
185
- == version 0.4.1
186
-
187
- 1. added support for reading mzXML version 3.0 (may fail in some cases)
188
-
189
- == version 0.4.2
190
-
191
- 1. added MS::MSRun.open method
192
- 2. added method to write dta files from SRF
193
-
194
- == version 0.4.3
195
-
196
- 1. added to_mfg_file from SRF