mspire 0.3.9 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. data/INSTALL +24 -7
  2. data/README +15 -13
  3. data/README.rdoc +18 -0
  4. data/Rakefile +50 -14
  5. data/bin/aafreqs.rb +0 -0
  6. data/bin/bioworks2excel.rb +0 -0
  7. data/bin/bioworks_to_pepxml.rb +2 -1
  8. data/bin/bioworks_to_pepxml_gui.rb +0 -0
  9. data/bin/fasta_shaker.rb +0 -0
  10. data/bin/filter_and_validate.rb +0 -0
  11. data/bin/gi2annot.rb +0 -0
  12. data/bin/id_class_anal.rb +0 -0
  13. data/bin/id_precision.rb +0 -0
  14. data/bin/ms_to_lmat.rb +0 -0
  15. data/bin/pepproph_filter.rb +0 -0
  16. data/bin/protein_summary.rb +0 -0
  17. data/bin/protxml2prots_peps.rb +0 -0
  18. data/bin/raw_to_mzXML.rb +3 -3
  19. data/bin/run_percolator.rb +122 -0
  20. data/bin/sqt_group.rb +0 -0
  21. data/bin/srf_group.rb +0 -0
  22. data/changelog.txt +29 -0
  23. data/lib/ms/gradient_program.rb +0 -1
  24. data/lib/ms/msrun.rb +62 -29
  25. data/lib/ms/parser/mzdata/axml.rb +55 -0
  26. data/lib/ms/parser/mzdata/dom.rb +51 -36
  27. data/lib/ms/parser/mzdata.rb +8 -2
  28. data/lib/ms/parser/mzxml/axml.rb +59 -0
  29. data/lib/ms/parser/mzxml/dom.rb +80 -57
  30. data/lib/ms/parser/mzxml/hpricot.rb +1 -1
  31. data/lib/ms/parser/mzxml/libxml.rb +6 -2
  32. data/lib/ms/parser/mzxml.rb +110 -3
  33. data/lib/ms/parser.rb +4 -4
  34. data/lib/ms/precursor.rb +19 -4
  35. data/lib/ms/scan.rb +7 -7
  36. data/lib/ms/spectrum.rb +249 -58
  37. data/lib/mspire.rb +1 -1
  38. data/lib/spec_id/bioworks.rb +2 -2
  39. data/lib/spec_id/precision/filter/cmdline.rb +8 -1
  40. data/lib/spec_id/precision/prob/cmdline.rb +2 -2
  41. data/lib/spec_id/precision/prob.rb +1 -0
  42. data/lib/spec_id/proph/pep_summary.rb +3 -4
  43. data/lib/spec_id/proph/prot_summary.rb +3 -3
  44. data/lib/spec_id/protein_summary.rb +1 -1
  45. data/lib/spec_id/sequest/pepxml.rb +5 -5
  46. data/lib/spec_id/sqt.rb +4 -4
  47. data/lib/spec_id/srf.rb +49 -8
  48. data/lib/spec_id.rb +5 -0
  49. data/lib/xml_style_parser.rb +16 -2
  50. data/script/compile_and_plot_smriti_final.rb +0 -0
  51. data/script/create_little_pepxml.rb +0 -0
  52. data/script/degenerate_peptides.rb +0 -0
  53. data/script/estimate_fpr_by_cysteine.rb +0 -0
  54. data/script/extract_gradient_programs.rb +1 -1
  55. data/script/find_cysteine_background.rb +0 -0
  56. data/script/genuine_tps_and_probs.rb +0 -0
  57. data/script/get_apex_values_rexml.rb +0 -0
  58. data/script/mascot_fix_pepxml.rb +123 -0
  59. data/script/msvis.rb +0 -0
  60. data/script/mzXML2timeIndex.rb +0 -0
  61. data/script/peps_per_bin.rb +0 -0
  62. data/script/prep_dir.rb +0 -0
  63. data/script/simple_protein_digestion.rb +0 -0
  64. data/script/smriti_final_analysis.rb +0 -0
  65. data/script/sqt_to_meta.rb +0 -0
  66. data/script/top_hit_per_scan.rb +0 -0
  67. data/script/toppred_to_yaml.rb +0 -0
  68. data/script/tpp_installer.rb +0 -0
  69. data/specs/bin/prob_validate_spec.rb +5 -2
  70. data/specs/bin/protein_summary_spec.rb +5 -1
  71. data/specs/ms/msrun_spec.rb +176 -133
  72. data/specs/ms/parser_spec.rb +3 -3
  73. data/specs/ms/spectrum_spec.rb +0 -2
  74. data/specs/spec_id/precision/filter_spec.rb +4 -1
  75. data/specs/spec_id/precision/prob_spec.rb +2 -2
  76. data/specs/spec_id/sequest/pepxml_spec.rb +1 -1
  77. data/specs/spec_id/sqt_spec.rb +5 -5
  78. data/specs/spec_id/srf_spec.rb +56 -93
  79. data/specs/spec_id/srf_spec_helper.rb +121 -284
  80. data/specs/spec_id_spec.rb +3 -0
  81. data/specs/transmem/toppred_spec.rb +1 -0
  82. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +683 -0
  83. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +382 -0
  84. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +683 -0
  85. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +382 -0
  86. data/test_files/opd1_2runs_2mods/data/README.txt +6 -0
  87. metadata +247 -229
data/lib/ms/parser.rb CHANGED
@@ -86,23 +86,23 @@ module MS::Parser
86
86
 
87
87
  # filetype_version is an example file to parse, or it is an array: [type, version].
88
88
  # parse_type is the information to be gleaned (as symbol).
89
- def self.new(filetype_version, parse_type)
89
+ def self.new(filetype_version, parse_type, opts={})
90
90
  unless filetype_version.is_a? Array
91
91
  filetype_version = filetype_and_version(filetype_version)
92
92
  end
93
- require_and_create_parser(filetype_version, parse_type)
93
+ require_and_create_parser(filetype_version, parse_type, opts)
94
94
  end
95
95
 
96
96
  private
97
97
 
98
98
  # returns a working parser.
99
- def self.require_and_create_parser(filetype_version, parse_type)
99
+ def self.require_and_create_parser(filetype_version, parse_type, opts)
100
100
  (filetype, version) = filetype_version
101
101
  #puts "FT: #{filetype} VERSION: #{version}"
102
102
  reply = require @@filetypes_to_require[filetype]
103
103
  @@filetypes_to_require[filetype]
104
104
  parser_class = MS::Parser.const_get(@@filetypes_to_upcase[filetype])
105
- parser_class.new(parse_type, version)
105
+ parser_class.new(parse_type, version, opts)
106
106
  end
107
107
 
108
108
  end
data/lib/ms/precursor.rb CHANGED
@@ -1,10 +1,25 @@
1
- require 'array_class'
1
+ require 'arrayclass'
2
2
 
3
3
  module MS; end
4
4
 
5
- # parent == spectrumRef references a scan
6
- # 0 1 2 3 4
7
- MS::Precursor = ArrayClass.new(%w(mz intensity parent ms_level charge_states))
5
+ # charge_states are the possible charge states of the precursor
6
+ # parent references a scan
7
+ # 0 1 2 3
8
+ MS::Precursor = Arrayclass.new(%w(mz intensity parent charge_states))
8
9
 
9
10
  class MS::Precursor
11
+
12
+ undef :intensity
13
+
14
+ def intensity
15
+ if self[1].nil?
16
+ if s = self[2].spectrum
17
+ self[1] = s.intensity_at_mz(self[0])
18
+ else
19
+ nil # if we didn't read in the spectra, we can't get this value!
20
+ end
21
+ end
22
+ self[1]
23
+ end
24
+
10
25
  end
data/lib/ms/scan.rb CHANGED
@@ -1,13 +1,13 @@
1
- require 'array_class'
1
+ require 'arrayclass'
2
2
  require 'ms/precursor'
3
3
 
4
4
  module MS ; end
5
5
 
6
6
  # 0 1 2 3 4 5 6
7
- MS::Scan = ArrayClass.new( %w(num ms_level time start_mz end_mz precursors spectrum) )
7
+ MS::Scan = Arrayclass.new( %w(num ms_level time start_mz end_mz precursor spectrum) )
8
8
 
9
9
  # time in seconds
10
- # everything else in float/int or as array (precursors)
10
+ # everything else in float/int
11
11
 
12
12
  class MS::Scan
13
13
  #@@order = %w(num ms_level time start_mz end_mz prec_mz prec_inten parent spectrum)
@@ -36,18 +36,18 @@ class MS::Scan
36
36
  display.compact!
37
37
  spec_display =
38
38
  if spectrum
39
- spectrum.mz.size
39
+ spectrum.mzs.size
40
40
  else
41
41
  'nil'
42
42
  end
43
- "<MS::Scan:#{__id__} " + display.join(", ") + " precursors=#{precursors.inspect}" + " spectrum(size)=#{spec_display}" + " >"
43
+ "<MS::Scan:#{__id__} " + display.join(", ") + " precursor=#{precursor.inspect}" + " spectrum(size)=#{spec_display}" + " >"
44
44
  end
45
45
 
46
46
  # returns the string (space delimited): "ms_level num time [prec_mz prec_inten]"
47
47
  def to_index_file_string
48
48
  arr = [ms_level, num, time]
49
- if precursors then arr << precursors.first.mz end
50
- if x = precursors.first.inten then arr << x end
49
+ if precursor then arr << precursor.mz end
50
+ if x = precursor.intensity then arr << x end
51
51
  arr.join(" ")
52
52
  end
53
53
 
data/lib/ms/spectrum.rb CHANGED
@@ -10,27 +10,16 @@ class MS::Spectrum
10
10
  Unpack_little_endian_double = 'E*'
11
11
 
12
12
  # m/z's
13
- attr_accessor :mz
13
+ attr_accessor :mzs
14
14
  # intensities
15
- attr_accessor :intensity
15
+ attr_accessor :intensities
16
16
 
17
- def initialize(mz=[], intensity=[])
18
- @mz = mz
19
- @intensity = intensity
20
- end
21
-
22
-
23
- def has_mz_data?
24
- (@mz.size > 0) && (@mz.first.is_a?(Numeric))
25
- end
26
-
27
- def has_intensity_data?
28
- (@intensity.size > 0) && (@intensity.first.is_a?(Numeric))
29
- end
30
-
31
- # takes a base64 string and returns an array
32
- def self.base64_to_array(string, precision=32, network_order=true)
33
- b64d = Base64.decode64(string)
17
+ #######################
18
+ ## CLASS METHODS:
19
+ #######################
20
+
21
+ # an already decoded string (ready to be unpacked as floating point numbers)
22
+ def self.string_to_array(string, precision=32, network_order=true)
34
23
  unpack_code =
35
24
  if network_order
36
25
  if precision == 32
@@ -45,57 +34,57 @@ class MS::Spectrum
45
34
  Unpack_little_endian_double
46
35
  end
47
36
  end
48
- b64d.unpack(unpack_code)
37
+ string.unpack(unpack_code)
49
38
  end
50
39
 
51
- def self.from_base64_pair(mz_string, mz_precision, mz_network_order, inten_string, inten_precision, inten_network_order)
52
- mz = base64_to_array(mz_string, mz_precision, mz_network_order)
53
- inten = base64_to_array(inten_string, inten_precision, inten_network_order)
54
- self.new(mz, inten)
40
+ # takes a base64 string and returns an array
41
+ def self.base64_to_array(b64_string, precision=32, network_order=true)
42
+ self.string_to_array(Base64.decode64(b64_string), precision, network_order)
55
43
  end
56
44
 
57
- # takes a base64 peaks string and sets spectrum
58
- # returns self for chaining
59
- def self.from_base64_peaks(string, precision=32, network_order=true)
60
- data = base64_to_array(string, precision, network_order)
45
+
46
+ def self.mzs_and_intensities_from_base64_peaks(b64_string, precision=32, network_order=true)
47
+ data = base64_to_array(b64_string, precision, network_order)
61
48
  sz = data.size/2
62
- mz = Array.new(sz)
63
- intensity = Array.new(sz)
49
+ mz_ar = Array.new(sz)
50
+ intensity_ar = Array.new(sz)
64
51
  ndata = []
65
52
  my_ind = 0
66
53
  data.each_with_index do |dat,ind|
67
54
  if (ind % 2) == 0 # even
68
- mz[my_ind] = dat
55
+ mz_ar[my_ind] = dat
69
56
  else
70
- intensity[my_ind] = dat
57
+ intensity_ar[my_ind] = dat
71
58
  my_ind += 1
72
59
  end
73
60
  end
74
- self.new(mz, intensity)
61
+ [mz_ar, intensity_ar]
75
62
  end
76
63
 
64
+ # takes a base64 peaks string and sets spectrum
65
+ # returns self for chaining
66
+ def self.from_base64_peaks(b64_string, precision=32, network_order=true)
67
+ (mz_ar, intensity_ar) = self.mzs_and_intensities_from_base64_peaks(b64_string, precision, network_order)
68
+ self.new(mz_ar, intensity_ar)
69
+ end
77
70
 
71
+ def self.from_base64_pair(mz_string, mz_precision, mz_network_order, intensity_string, intensity_precision, intensity_network_order)
72
+ mz_ar = base64_to_array(mz_string, mz_precision, mz_network_order)
73
+ inten_ar = base64_to_array(intensity_string, intensity_precision, intensity_network_order)
74
+ self.new(mz_ar, inten_ar)
75
+ end
78
76
 
77
+ def initialize(mz_ar=[], intensity_ar=[])
78
+ @mzs = mz_ar
79
+ @intensities = intensity_ar
80
+ end
79
81
 
80
- ######
81
- # NOT REALLY USING RIGHT NOW:
82
- ######
82
+ def has_mz_data?
83
+ @mzs && (@mzs.size > 0) && (@mzs.first.is_a?(Numeric))
84
+ end
83
85
 
84
- # takes a base64 peaks string and returns an array of [m/z,intens] doublets
85
- # mzXML as network ordered
86
- def base64_peaks_to_pairs(string, precision=32)
87
- data = base64_peaks_to_array(string, precision)
88
- ndata = []
89
- data.each_with_index do |dat,ind|
90
- if (ind % 2) == 0 # even
91
- arr = Array.new(2)
92
- arr[0] = dat
93
- ndata.push( arr )
94
- else
95
- ndata.last[1] = dat
96
- end
97
- end
98
- ndata
86
+ def has_intensity_data?
87
+ @intensities && (@intensities.size > 0) && (@intensities.first.is_a?(Numeric))
99
88
  end
100
89
 
101
90
  # returns the index of the first value matching that m/z. the argument m/z
@@ -103,17 +92,18 @@ class MS::Spectrum
103
92
  # given) but must be at least integer precision (after rounding)
104
93
  # implemented as binary search (bsearch from the web)
105
94
  def index(mz)
95
+ mz_ar = mzs
106
96
  return_val = nil
107
- ind = @mz.bsearch_lower_boundary{|x| x <=> mz }
108
- if @mz[ind] == mz
97
+ ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
98
+ if mz_ar[ind] == mz
109
99
  return_val = ind
110
100
  else
111
101
  # do a rounding game to see which one is it, or nil
112
102
  # find all the values rounding to the same integer in the locale
113
103
  # test each one fully in turn
114
104
  mz = mz.to_f
115
- mz_size = @mz.size
116
- if ((ind < mz_size) and equal_after_rounding?(@mz[ind], mz))
105
+ mz_size = mz_ar.size
106
+ if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
117
107
  return_val = ind
118
108
  else # run the loop
119
109
  up = ind
@@ -122,7 +112,7 @@ class MS::Spectrum
122
112
  if up >= mz_size
123
113
  break
124
114
  end
125
- mz_up = @mz[up]
115
+ mz_up = mz_ar[up]
126
116
  if (mz_up.ceil - mz.ceil >= 2)
127
117
  break
128
118
  else
@@ -138,7 +128,7 @@ class MS::Spectrum
138
128
  if dn < 0
139
129
  break
140
130
  end
141
- mz_dn = @mz[dn]
131
+ mz_dn = mz_ar[dn]
142
132
  if (mz.floor - mz_dn.floor >= 2)
143
133
  break
144
134
  else
@@ -156,7 +146,7 @@ class MS::Spectrum
156
146
  # uses index function and returns the intensity at that value
157
147
  def intensity_at_mz(mz)
158
148
  if x = index(mz)
159
- @intensity[x]
149
+ intensities[x]
160
150
  else
161
151
  nil
162
152
  end
@@ -189,5 +179,206 @@ class MS::Spectrum
189
179
  neg_exp10
190
180
  end
191
181
 
182
+ ######
183
+ # NOT REALLY USING RIGHT NOW:
184
+ ######
185
+
186
+ # takes a base64 peaks string and returns an array of [m/z,intensity] doublets
187
+ # mzXML as network ordered
188
+ def base64_peaks_to_pairs(string, precision=32)
189
+ data = base64_peaks_to_array(string, precision)
190
+ ndata = []
191
+ data.each_with_index do |dat,ind|
192
+ if (ind % 2) == 0 # even
193
+ arr = Array.new(2)
194
+ arr[0] = dat
195
+ ndata.push( arr )
196
+ else
197
+ ndata.last[1] = dat
198
+ end
199
+ end
200
+ ndata
201
+ end
202
+
203
+ end
204
+
205
+ # This implements a spectrum that stores itself as string data and only
206
+ # evaluates the information when it is called
207
+ class MS::Spectrum::LazyString < MS::Spectrum
208
+
209
+ undef mzs=
210
+ undef intensities=
211
+
212
+ # beware that this converts the information in @mz_string every time it is
213
+ # called
214
+ def mzs
215
+ MS::Spectrum.string_to_array(@mz_string, @mz_precision, @mz_network_order)
216
+ end
217
+
218
+ # beware that this converts the information in @intensity_string every time
219
+ # it is
220
+ def intensities
221
+ MS::Spectrum.string_to_array(@intensity_string, @intensity_precision, @intensity_network_order)
222
+ end
223
+
224
+ # this takes a decoded base64 string that is then interpreted when
225
+ # information is accessed
226
+ def initialize(mz_string, mz_precision, mz_network_order, intensity_string, intensity_precision, intensity_network_order)
227
+ @mz_string = mz_string
228
+ @mz_precision = mz_precision
229
+ @mz_network_order = mz_network_order
230
+ @intensity_string = intensity_string
231
+ @intensity_precision = intensity_precision
232
+ @intensity_network_order = intensity_network_order
233
+ end
234
+
235
+ # from mzXML files where information is held in peaks (m/z, intensity,
236
+ # m/z...)
237
+ def self.from_base64_peaks(b64_string, precision=32, network_order=true)
238
+ # decode
239
+ string = Base64.decode64(b64_string)
240
+ # split into two strings:
241
+ bytes_per_number = precision / 8
242
+ s_size = string.size
243
+ num_numbers = s_size / bytes_per_number
244
+ mz_pieces = Array.new(num_numbers)
245
+ intensity_pieces = Array.new(num_numbers)
246
+ index = 0
247
+ (0...string.size).step(bytes_per_number) do |i|
248
+ if index % 2 == 0
249
+ mz_pieces[index] = string[i,bytes_per_number]
250
+ else
251
+ intensity_pieces[index] = string[i,bytes_per_number]
252
+ end
253
+ index += 1
254
+ end
255
+ self.new(mz_pieces.join, precision, network_order, intensity_pieces.join, precision, network_order)
256
+ end
257
+
258
+ # from mzML and mzData style files where mz and intensity information are
259
+ # kept in different strings.
260
+ def self.from_base64_pair(b64_mz_string, mz_precision, mz_network_order, b64_intensity_string, intensity_precision, intensity_network_order)
261
+ self.new(Base64.decode64(b64_mz_string), mz_precision, mz_network_order, Base64.decode64(b64_intensity_string), intensity_precision, intensity_network_order)
262
+ end
263
+
264
+ def has_mz_data?
265
+ @mz_string.is_a?(String) && @mz_precision && !@mz_network_order.nil?
266
+ end
267
+
268
+ def has_intensity_data?
269
+ @intensity_string.is_a?(String) && @intensity_precision && !@intensity_network_order.nil?
270
+ end
271
+
272
+ end
273
+
274
+ module MS::Spectrum::LazyIO
275
+ def self.new(*args)
276
+ if args.size == 5 # mzXMl
277
+ MS::Spectrum::LazyIO::Peaks.new(*args)
278
+ elsif args.size == 9 # other
279
+ MS::Spectrum::LazyIO::Pair.new(*args)
280
+ else
281
+ raise RunTimeError, "must give 5 or 7 args for peak data and pair data respectively"
282
+ end
283
+ end
192
284
  end
193
285
 
286
+
287
+ # stores an io object and the start and end indices and only evaluates the
288
+ # spectrum when information is requested
289
+ class MS::Spectrum::LazyIO::Pair < MS::Spectrum
290
+ include MS::Spectrum::LazyIO
291
+
292
+ undef mzs=
293
+ undef intensities=
294
+
295
+ def initialize(io, mz_start_index, mz_num_bytes, mz_precision, mz_network_order, intensity_start_index, intensity_num_bytes, intensity_precision, intensity_network_order)
296
+ @io = io
297
+
298
+ @mz_start_index = mz_start_index
299
+ @mz_num_bytes = mz_num_bytes
300
+ @mz_precision = mz_precision
301
+ @mz_network_order = mz_network_order
302
+
303
+ @intensity_start_index = intensity_start_index
304
+ @intensity_num_bytes = intensity_num_bytes
305
+ @intensity_precision = intensity_precision
306
+ @intensity_network_order = intensity_network_order
307
+
308
+ end
309
+
310
+ # beware that this converts the information on disk every time it is called.
311
+ def mzs
312
+ @io.pos = @mz_start_index
313
+ b64_string = @io.read(@mz_num_bytes)
314
+ MS::Spectrum.base64_to_array(b64_string, @mz_precision, @mz_network_order)
315
+ end
316
+
317
+ # beware that this converts the information in @intensity_string every time
318
+ # it is called.
319
+ def intensities
320
+ @io.pos = @intensity_start_index
321
+ b64_string = @io.read(@intensity_num_bytes)
322
+ MS::Spectrum.base64_to_array(b64_string, @intensity_precision, @intensity_network_order)
323
+ end
324
+
325
+ def has_mz_data?
326
+ (!@io.closed?) && @mz_start_index && @mz_num_bytes && @mz_precision && !@mz_network_order.nil?
327
+ end
328
+
329
+ def has_intensity_data?
330
+ (!@io.closed?) && @intensity_start_index && @intensity_num_bytes && @intensity_precision && !@intensity_network_order.nil?
331
+ end
332
+
333
+ end
334
+
335
+ class MS::Spectrum::LazyIO::Peaks < MS::Spectrum
336
+ include MS::Spectrum::LazyIO
337
+
338
+ undef mzs=
339
+ undef intensities=
340
+
341
+ def initialize(io, start_index, num_bytes, precision, network_order)
342
+ @io = io
343
+ @start_index = start_index
344
+ @num_bytes = num_bytes
345
+ @precision = precision
346
+ @network_order = network_order
347
+ end
348
+
349
+ # returns two arrays: an array of m/z values and an array of intensity
350
+ # values. This is the preferred way to access mzXML file information under
351
+ # lazy evaluation
352
+ def mzs_and_intensities
353
+ @io.pos = @start_index
354
+ b64_string = @io.read(@num_bytes)
355
+ MS::Spectrum.mzs_and_intensities_from_base64_peaks(b64_string, @precision, @network_order)
356
+ end
357
+
358
+ # when using 'io' lazy evaluation on files with m/z and intensity data
359
+ # interwoven (i.e., mzXML) it is more efficient to call 'mzs_and_intensities'
360
+ # if you are using both mz and intensity data.
361
+ def mzs
362
+ # TODO: this can be made slightly faster
363
+ mzs_and_intensities.first
364
+ end
365
+
366
+ # when using 'io' lazy evaluation on files with m/z and intensity data
367
+ # interwoven (i.e., mzXML) it is more efficient to call
368
+ # 'mzs_and_intensities'
369
+ # if you are using both mz and intensity data.
370
+ def intensities
371
+ # TODO: this can be made slightly faster
372
+ mzs_and_intensities.last
373
+ end
374
+
375
+
376
+ def has_mz_data?
377
+ (!@io.closed?) && @start_index && @num_bytes && @precision && !@network_order.nil?
378
+ end
379
+
380
+ def has_intensity_data?
381
+ (!@io.closed?) && @start_index && @num_bytes && @precision && !@network_order.nil?
382
+ end
383
+
384
+ end
data/lib/mspire.rb CHANGED
@@ -1,4 +1,4 @@
1
1
 
2
2
  module Mspire
3
- Version = '0.3.9'
3
+ Version = '0.4.2'
4
4
  end
@@ -5,7 +5,7 @@ require 'xmlparser'
5
5
  require 'spec_id'
6
6
  require 'zlib'
7
7
  require 'hash_by'
8
- require 'array_class'
8
+ require 'arrayclass'
9
9
  require 'fasta'
10
10
 
11
11
  ## have to pre-declare some guys
@@ -377,7 +377,7 @@ class Bioworks::Prot
377
377
  end
378
378
  end
379
379
 
380
- Bioworks::Pep = ArrayClass.new( %w(sequence mass deltamass charge xcorr deltacn sp rsp ions count tic prots base_name first_scan last_scan peptide_probability file _num_prots _first_prot aaseq) )
380
+ Bioworks::Pep = Arrayclass.new( %w(sequence mass deltamass charge xcorr deltacn sp rsp ions count tic prots base_name first_scan last_scan peptide_probability file _num_prots _first_prot aaseq) )
381
381
  # 0=sequence 1=mass 2=deltamass 3=charge 4=xcorr 5=deltacn 6=sp 7=rsp 8=ions 9=count 10=tic 11=prots 12=base_name 13=first_scan 14=last_scan 15=peptide_probability 16=file 17=_num_prots 18=_first_prot 19=aaseq
382
382
 
383
383
  class Bioworks::Pep
@@ -145,7 +145,14 @@ module SpecID
145
145
 
146
146
  op.separator ""
147
147
  op.separator "OTHER OPTIONS: "
148
- op.opt(:interactive) {|v| opts[:interactive] = v }
148
+ op.opt(:interactive) do |v|
149
+ opts[:interactive] =
150
+ if v
151
+ v
152
+ else
153
+ true
154
+ end
155
+ end
149
156
  op.opt(:interactive_verbose) {|v| opts[:interactive_verbose] = v }
150
157
 
151
158
  op.opt(:top_hit_by) {|v| opts[:top_hit_by] = v.to_sym}
@@ -123,10 +123,10 @@ module SpecID
123
123
  end
124
124
  postfilter =
125
125
  if spec_id_obj.class == SQTGroup or spec_id_obj.class == Proph::PepSummary
126
- puts 'making background estimates with: top_per_scan'
126
+ #puts 'making background estimates with: top_per_scan'
127
127
  :top_per_scan
128
128
  else
129
- puts 'making background estimates with: top_per_aaseq_charge'
129
+ #puts 'making background estimates with: top_per_aaseq_charge'
130
130
  :top_per_aaseq_charge
131
131
  end
132
132
  opts[:validators] = Validator::Cmdline.prepare_validators(opts, !opts[:ties], opts[:interactive], postfilter, spec_id_obj)
@@ -216,6 +216,7 @@ class SpecID::Precision::Prob
216
216
  else
217
217
  out[:probabilities] = probabilities
218
218
  end
219
+ out[:pephits] = ordered_peps # just in case they want to see
219
220
  out[:count] = num_pephits
220
221
  out[:aaseqs] = pepstrings
221
222
  out[:charges] = pepcharges
@@ -1,5 +1,5 @@
1
1
 
2
- require 'array_class'
2
+ require 'arrayclass'
3
3
  require 'spec_id/sequest/pepxml'
4
4
  require 'spec_id/parser/proph'
5
5
 
@@ -61,11 +61,10 @@ module Proph
61
61
 
62
62
  # this is a SpecID::Pep (by interface: not including stuff yet)
63
63
  class PepSummary::Pep < Sequest::PepXML::SearchHit
64
-
65
64
  # aaseq is defined in SearchHit
66
65
 
67
66
  %w(probability fval ntt nmc massd prots).each do |guy|
68
- self.add_member(guy)
67
+ self.add_member(guy)
69
68
  end
70
69
 
71
70
  # returns self
@@ -91,7 +90,7 @@ module Proph
91
90
  end
92
91
  end
93
92
 
94
- ::Proph::PepSummary::Prot = ArrayClass.new(%w(name protein_descr peps))
93
+ ::Proph::PepSummary::Prot = Arrayclass.new(%w(name protein_descr peps))
95
94
 
96
95
  class PepSummary::Prot
97
96
  def first_entry ; self[0] end ## name
@@ -3,7 +3,7 @@ require 'hash_by'
3
3
  require 'instance_var_set_from_hash'
4
4
  require 'axml'
5
5
  require 'spec_id'
6
- require 'array_class'
6
+ require 'arrayclass'
7
7
 
8
8
  require 'spec_id/parser/proph'
9
9
 
@@ -122,7 +122,7 @@ end # Proph
122
122
 
123
123
 
124
124
 
125
- Proph::Prot = ArrayClass.new(%w(protein_name probability n_indistinguishable_proteins percent_coverage unique_stripped_peptides group_sibling_id total_number_peptides pct_spectrum_ids description peps))
125
+ Proph::Prot = Arrayclass.new(%w(protein_name probability n_indistinguishable_proteins percent_coverage unique_stripped_peptides group_sibling_id total_number_peptides pct_spectrum_ids description peps))
126
126
 
127
127
  # note that 'description' is found in the element 'annotation', attribute 'protein_description'
128
128
  # NOTE!: unique_stripped peptides is an array rather than + joined string
@@ -142,7 +142,7 @@ end
142
142
 
143
143
  # this is a pep from a -prot.xml file
144
144
 
145
- Proph::Prot::Pep = ArrayClass.new(%w(peptide_sequence charge initial_probability nsp_adjusted_probability weight is_nondegenerate_evidence n_enzymatic_termini n_sibling_peptides n_sibling_peptides_bin n_instances is_contributing_evidence calc_neutral_pep_mass modification_info prots))
145
+ Proph::Prot::Pep = Arrayclass.new(%w(peptide_sequence charge initial_probability nsp_adjusted_probability weight is_nondegenerate_evidence n_enzymatic_termini n_sibling_peptides n_sibling_peptides_bin n_instances is_contributing_evidence calc_neutral_pep_mass modification_info prots))
146
146
 
147
147
  class Proph::Prot::Pep
148
148
  include SpecID::Pep
@@ -428,7 +428,7 @@ class ProteinSummary
428
428
  op.on("--#{PRECISION_PROGRAM_BASE}", "include output of #{PRECISION_PROGRAM_BASE}.rb,") {|v| opt.precision = v}
429
429
  op.separator(" type '#{PRECISION_PROGRAM_BASE}.rb' for details")
430
430
  op.separator ""
431
- op.separator "MSific to ProteinProphet (with no concatenated DB):"
431
+ op.separator "specific to ProteinProphet (with no concatenated DB):"
432
432
  op.on("-c", "--cutoff percent", "false positive predictive rate (FPPR)% for given cutoff") {|v| opt.c = v }
433
433
  op.on("--cut_at percent", "only reports proteins within FPPR %") {|v| opt.cut_at = v }
434
434
  op.on("--get_annotation", "retrieves annotation by gi code") {|v| opt.get_annotation = v}
@@ -1206,7 +1206,7 @@ class Sequest::PepXML::SearchDatabase
1206
1206
 
1207
1207
  end
1208
1208
 
1209
- Sequest::PepXML::SpectrumQuery = ArrayClass.new(%w(spectrum start_scan end_scan precursor_neutral_mass index assumed_charge search_results pepxml_version))
1209
+ Sequest::PepXML::SpectrumQuery = Arrayclass.new(%w(spectrum start_scan end_scan precursor_neutral_mass index assumed_charge search_results pepxml_version))
1210
1210
 
1211
1211
  class Sequest::PepXML::SpectrumQuery
1212
1212
  include SpecIDXML
@@ -1292,7 +1292,7 @@ class Sequest::PepXML::SpectrumQuery
1292
1292
  end
1293
1293
 
1294
1294
 
1295
- Sequest::PepXML::SearchHit = ArrayClass.new( %w( hit_rank peptide peptide_prev_aa peptide_next_aa protein num_tot_proteins num_matched_ions tot_num_ions calc_neutral_pep_mass massdiff num_tol_term num_missed_cleavages is_rejected deltacnstar xcorr deltacn spscore sprank modification_info spectrum_query) )
1295
+ Sequest::PepXML::SearchHit = Arrayclass.new( %w( hit_rank peptide peptide_prev_aa peptide_next_aa protein num_tot_proteins num_matched_ions tot_num_ions calc_neutral_pep_mass massdiff num_tol_term num_missed_cleavages is_rejected deltacnstar xcorr deltacn spscore sprank modification_info spectrum_query) )
1296
1296
 
1297
1297
  # 0=hit_rank 1=peptide 2=peptide_prev_aa 3=peptide_next_aa 4=protein 5=num_tot_proteins 6=num_matched_ions 7=tot_num_ions 8=calc_neutral_pep_mass 9=massdiff 10=num_tol_term 11=num_missed_cleavages 12=is_rejected 13=deltacnstar 14=xcorr 15=deltacn 16=spscore 17=sprank 18=modification_info 19=spectrum_query
1298
1298
 
@@ -1312,7 +1312,7 @@ class Sequest::PepXML::SearchHit
1312
1312
  tmp_verb = $VERBOSE
1313
1313
  $VERBOSE = nil
1314
1314
  def initialize(hash=nil)
1315
- super(@@arr_size)
1315
+ super(self.class.size)
1316
1316
  if hash
1317
1317
  self[0,20] = [hash[:hit_rank], hash[:peptide], hash[:peptide_prev_aa], hash[:peptide_next_aa], hash[:protein], hash[:num_tot_proteins], hash[:num_matched_ions], hash[:tot_num_ions], hash[:calc_neutral_pep_mass], hash[:massdiff], hash[:num_tol_term], hash[:num_missed_cleavages], hash[:is_rejected], hash[:deltacnstar], hash[:xcorr], hash[:deltacn], hash[:spscore], hash[:sprank], hash[:modification_info], hash[:spectrum_query]]
1318
1318
  end
@@ -1379,7 +1379,7 @@ class Sequest::PepXML::SearchHit
1379
1379
  end
1380
1380
 
1381
1381
 
1382
- Sequest::PepXML::SearchHit::ModificationInfo = ArrayClass.new(%w(modified_peptide mod_aminoacid_masses mod_nterm_mass mod_cterm_mass))
1382
+ Sequest::PepXML::SearchHit::ModificationInfo = Arrayclass.new(%w(modified_peptide mod_aminoacid_masses mod_nterm_mass mod_cterm_mass))
1383
1383
 
1384
1384
  # Positions and masses of modifications
1385
1385
  class Sequest::PepXML::SearchHit::ModificationInfo
@@ -1455,4 +1455,4 @@ class Sequest::PepXML::SearchHit::ModificationInfo
1455
1455
  # </modification_info>
1456
1456
  end
1457
1457
 
1458
- Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass = ArrayClass.new(%w(position mass))
1458
+ Sequest::PepXML::SearchHit::ModificationInfo::ModAminoacidMass = Arrayclass.new(%w(position mass))