mspire 0.4.9 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/ms/spectrum.rb CHANGED
@@ -1,384 +1,25 @@
1
- require 'base64'
2
- require 'bsearch'
3
- require 'ms'
4
-
5
- class MS::Spectrum
6
-
7
- Unpack_network_float = 'g*'
8
- Unpack_network_double = 'G*'
9
- Unpack_little_endian_float = 'e*'
10
- Unpack_little_endian_double = 'E*'
11
-
12
- # m/z's
13
- attr_accessor :mzs
14
- # intensities
15
- attr_accessor :intensities
16
-
17
- #######################
18
- ## CLASS METHODS:
19
- #######################
20
-
21
- # an already decoded string (ready to be unpacked as floating point numbers)
22
- def self.string_to_array(string, precision=32, network_order=true)
23
- unpack_code =
24
- if network_order
25
- if precision == 32
26
- Unpack_network_float
27
- elsif precision == 64
28
- Unpack_network_double
29
- end
30
- else ## little endian
31
- if precision == 32
32
- Unpack_little_endian_float
33
- elsif precision == 64
34
- Unpack_little_endian_double
35
- end
36
- end
37
- string.unpack(unpack_code)
38
- end
39
-
40
- # takes a base64 string and returns an array
41
- def self.base64_to_array(b64_string, precision=32, network_order=true)
42
- self.string_to_array(Base64.decode64(b64_string), precision, network_order)
43
- end
44
-
45
-
46
- def self.mzs_and_intensities_from_base64_peaks(b64_string, precision=32, network_order=true)
47
- data = base64_to_array(b64_string, precision, network_order)
48
- sz = data.size/2
49
- mz_ar = Array.new(sz)
50
- intensity_ar = Array.new(sz)
51
- ndata = []
52
- my_ind = 0
53
- data.each_with_index do |dat,ind|
54
- if (ind % 2) == 0 # even
55
- mz_ar[my_ind] = dat
56
- else
57
- intensity_ar[my_ind] = dat
58
- my_ind += 1
59
- end
60
- end
61
- [mz_ar, intensity_ar]
62
- end
63
-
64
- # takes a base64 peaks string and sets spectrum
65
- # returns self for chaining
66
- def self.from_base64_peaks(b64_string, precision=32, network_order=true)
67
- (mz_ar, intensity_ar) = self.mzs_and_intensities_from_base64_peaks(b64_string, precision, network_order)
68
- self.new(mz_ar, intensity_ar)
69
- end
70
-
71
- def self.from_base64_pair(mz_string, mz_precision, mz_network_order, intensity_string, intensity_precision, intensity_network_order)
72
- mz_ar = base64_to_array(mz_string, mz_precision, mz_network_order)
73
- inten_ar = base64_to_array(intensity_string, intensity_precision, intensity_network_order)
74
- self.new(mz_ar, inten_ar)
75
- end
76
-
77
- def initialize(mz_ar=[], intensity_ar=[])
78
- @mzs = mz_ar
79
- @intensities = intensity_ar
80
- end
81
-
82
- def has_mz_data?
83
- @mzs && (@mzs.size > 0) && (@mzs.first.is_a?(Numeric))
84
- end
85
-
86
- def has_intensity_data?
87
- @intensities && (@intensities.size > 0) && (@intensities.first.is_a?(Numeric))
88
- end
89
-
90
- # returns the index of the first value matching that m/z. the argument m/z
91
- # may be less precise than the actual m/z (rounding to the same precision
92
- # given) but must be at least integer precision (after rounding)
93
- # implemented as binary search (bsearch from the web)
94
- def index(mz)
95
- mz_ar = mzs
96
- return_val = nil
97
- ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
98
- if mz_ar[ind] == mz
99
- return_val = ind
100
- else
101
- # do a rounding game to see which one is it, or nil
102
- # find all the values rounding to the same integer in the locale
103
- # test each one fully in turn
104
- mz = mz.to_f
105
- mz_size = mz_ar.size
106
- if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
107
- return_val = ind
108
- else # run the loop
109
- up = ind
110
- loop do
111
- up += 1
112
- if up >= mz_size
113
- break
114
- end
115
- mz_up = mz_ar[up]
116
- if (mz_up.ceil - mz.ceil >= 2)
117
- break
118
- else
119
- if equal_after_rounding?(mz_up, mz)
120
- return_val = up
121
- return return_val
122
- end
123
- end
124
- end
125
- dn= ind
126
- loop do
127
- dn -= 1
128
- if dn < 0
129
- break
130
- end
131
- mz_dn = mz_ar[dn]
132
- if (mz.floor - mz_dn.floor >= 2)
133
- break
134
- else
135
- if equal_after_rounding?(mz_dn, mz)
136
- return_val = dn
137
- return return_val
138
- end
139
- end
140
- end
141
- end
142
- end
143
- return_val
144
- end
145
-
146
- # uses index function and returns the intensity at that value
147
- def intensity_at_mz(mz)
148
- if x = index(mz)
149
- intensities[x]
150
- else
151
- nil
152
- end
153
- end
154
-
155
- # less_precise should be a float
156
- # precise should be a float
157
- def equal_after_rounding?(precise, less_precise)
158
- # determine the precision of less_precise
159
- exp10 = precision_as_neg_int(less_precise)
160
- #puts "EXP10: #{exp10}"
161
- answ = ((precise*exp10).round == (less_precise*exp10).round)
162
- #puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
163
- #puts answ
164
- (precise*exp10).round == (less_precise*exp10).round
165
- end
166
-
167
- # returns 1 for ones place, 10 for tenths, 100 for hundredths
168
- # to a precision exceeding 1e-6
169
- def precision_as_neg_int(float)
170
- neg_exp10 = 1
171
- loop do
172
- over = float * neg_exp10
173
- rounded = over.round
174
- if (over - rounded).abs <= 1e-6
175
- break
176
- end
177
- neg_exp10 *= 10
178
- end
179
- neg_exp10
180
- end
181
-
182
- ######
183
- # NOT REALLY USING RIGHT NOW:
184
- ######
185
-
186
- # takes a base64 peaks string and returns an array of [m/z,intensity] doublets
187
- # mzXML as network ordered
188
- def base64_peaks_to_pairs(string, precision=32)
189
- data = base64_peaks_to_array(string, precision)
190
- ndata = []
191
- data.each_with_index do |dat,ind|
192
- if (ind % 2) == 0 # even
193
- arr = Array.new(2)
194
- arr[0] = dat
195
- ndata.push( arr )
196
- else
197
- ndata.last[1] = dat
198
- end
199
- end
200
- ndata
201
- end
202
-
203
- end
204
-
205
- # This implements a spectrum that stores itself as string data and only
206
- # evaluates the information when it is called
207
- class MS::Spectrum::LazyString < MS::Spectrum
208
-
209
- undef mzs=
210
- undef intensities=
211
-
212
- # beware that this converts the information in @mz_string every time it is
213
- # called
214
- def mzs
215
- MS::Spectrum.string_to_array(@mz_string, @mz_precision, @mz_network_order)
216
- end
217
-
218
- # beware that this converts the information in @intensity_string every time
219
- # it is
220
- def intensities
221
- MS::Spectrum.string_to_array(@intensity_string, @intensity_precision, @intensity_network_order)
222
- end
223
-
224
- # this takes a decoded base64 string that is then interpreted when
225
- # information is accessed
226
- def initialize(mz_string, mz_precision, mz_network_order, intensity_string, intensity_precision, intensity_network_order)
227
- @mz_string = mz_string
228
- @mz_precision = mz_precision
229
- @mz_network_order = mz_network_order
230
- @intensity_string = intensity_string
231
- @intensity_precision = intensity_precision
232
- @intensity_network_order = intensity_network_order
233
- end
234
-
235
- # from mzXML files where information is held in peaks (m/z, intensity,
236
- # m/z...)
237
- def self.from_base64_peaks(b64_string, precision=32, network_order=true)
238
- # decode
239
- string = Base64.decode64(b64_string)
240
- # split into two strings:
241
- bytes_per_number = precision / 8
242
- s_size = string.size
243
- num_numbers = s_size / bytes_per_number
244
- mz_pieces = Array.new(num_numbers)
245
- intensity_pieces = Array.new(num_numbers)
246
- index = 0
247
- (0...string.size).step(bytes_per_number) do |i|
248
- if index % 2 == 0
249
- mz_pieces[index] = string[i,bytes_per_number]
250
- else
251
- intensity_pieces[index] = string[i,bytes_per_number]
252
- end
253
- index += 1
254
- end
255
- self.new(mz_pieces.join, precision, network_order, intensity_pieces.join, precision, network_order)
256
- end
257
-
258
- # from mzML and mzData style files where mz and intensity information are
259
- # kept in different strings.
260
- def self.from_base64_pair(b64_mz_string, mz_precision, mz_network_order, b64_intensity_string, intensity_precision, intensity_network_order)
261
- self.new(Base64.decode64(b64_mz_string), mz_precision, mz_network_order, Base64.decode64(b64_intensity_string), intensity_precision, intensity_network_order)
262
- end
263
-
264
- def has_mz_data?
265
- @mz_string.is_a?(String) && @mz_precision && !@mz_network_order.nil?
266
- end
267
-
268
- def has_intensity_data?
269
- @intensity_string.is_a?(String) && @intensity_precision && !@intensity_network_order.nil?
270
- end
271
-
272
- end
273
-
274
- module MS::Spectrum::LazyIO
275
- def self.new(*args)
276
- if args.size == 5 # mzXMl
277
- MS::Spectrum::LazyIO::Peaks.new(*args)
278
- elsif args.size == 9 # other
279
- MS::Spectrum::LazyIO::Pair.new(*args)
280
- else
281
- raise RunTimeError, "must give 5 or 7 args for peak data and pair data respectively"
282
- end
283
- end
284
- end
285
-
286
-
287
- # stores an io object and the start and end indices and only evaluates the
288
- # spectrum when information is requested
289
- class MS::Spectrum::LazyIO::Pair < MS::Spectrum
290
- include MS::Spectrum::LazyIO
291
-
292
- undef mzs=
293
- undef intensities=
294
-
295
- def initialize(io, mz_start_index, mz_num_bytes, mz_precision, mz_network_order, intensity_start_index, intensity_num_bytes, intensity_precision, intensity_network_order)
296
- @io = io
297
-
298
- @mz_start_index = mz_start_index
299
- @mz_num_bytes = mz_num_bytes
300
- @mz_precision = mz_precision
301
- @mz_network_order = mz_network_order
302
-
303
- @intensity_start_index = intensity_start_index
304
- @intensity_num_bytes = intensity_num_bytes
305
- @intensity_precision = intensity_precision
306
- @intensity_network_order = intensity_network_order
307
-
308
- end
309
-
310
- # beware that this converts the information on disk every time it is called.
311
- def mzs
312
- @io.pos = @mz_start_index
313
- b64_string = @io.read(@mz_num_bytes)
314
- MS::Spectrum.base64_to_array(b64_string, @mz_precision, @mz_network_order)
315
- end
316
-
317
- # beware that this converts the information in @intensity_string every time
318
- # it is called.
319
- def intensities
320
- @io.pos = @intensity_start_index
321
- b64_string = @io.read(@intensity_num_bytes)
322
- MS::Spectrum.base64_to_array(b64_string, @intensity_precision, @intensity_network_order)
323
- end
324
-
325
- def has_mz_data?
326
- (!@io.closed?) && @mz_start_index && @mz_num_bytes && @mz_precision && !@mz_network_order.nil?
327
- end
328
-
329
- def has_intensity_data?
330
- (!@io.closed?) && @intensity_start_index && @intensity_num_bytes && @intensity_precision && !@intensity_network_order.nil?
331
- end
332
-
333
- end
334
-
335
- class MS::Spectrum::LazyIO::Peaks < MS::Spectrum
336
- include MS::Spectrum::LazyIO
337
-
338
- undef mzs=
339
- undef intensities=
340
-
341
- def initialize(io, start_index, num_bytes, precision, network_order)
342
- @io = io
343
- @start_index = start_index
344
- @num_bytes = num_bytes
345
- @precision = precision
346
- @network_order = network_order
347
- end
348
-
349
- # returns two arrays: an array of m/z values and an array of intensity
350
- # values. This is the preferred way to access mzXML file information under
351
- # lazy evaluation
352
- def mzs_and_intensities
353
- @io.pos = @start_index
354
- b64_string = @io.read(@num_bytes)
355
- MS::Spectrum.mzs_and_intensities_from_base64_peaks(b64_string, @precision, @network_order)
356
- end
357
-
358
- # when using 'io' lazy evaluation on files with m/z and intensity data
359
- # interwoven (i.e., mzXML) it is more efficient to call 'mzs_and_intensities'
360
- # if you are using both mz and intensity data.
361
- def mzs
362
- # TODO: this can be made slightly faster
363
- mzs_and_intensities.first
364
- end
365
-
366
- # when using 'io' lazy evaluation on files with m/z and intensity data
367
- # interwoven (i.e., mzXML) it is more efficient to call
368
- # 'mzs_and_intensities'
369
- # if you are using both mz and intensity data.
370
- def intensities
371
- # TODO: this can be made slightly faster
372
- mzs_and_intensities.last
373
- end
374
-
375
-
376
- def has_mz_data?
377
- (!@io.closed?) && @start_index && @num_bytes && @precision && !@network_order.nil?
378
- end
379
-
380
- def has_intensity_data?
381
- (!@io.closed?) && @start_index && @num_bytes && @precision && !@network_order.nil?
382
- end
383
-
384
- end
1
+ module Ms
2
+ class Spectrum
3
+ # The underlying data store.
4
+ attr_reader :data
5
+
6
+ # Associated headers
7
+ attr_reader :headers
8
+
9
+ def initialize(data, headers={})
10
+ @data = data
11
+ @headers = headers
12
+ end
13
+
14
+ # An array of the mz data.
15
+ def mzs
16
+ @data[0]
17
+ end
18
+
19
+ # An array of the intensities data, corresponding to mzs.
20
+ def intensities
21
+ @data[1]
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,126 @@
1
+ module Ms
2
+ module Support
3
+
4
+ # A binary search library adapted from: http://0xcc.net/ruby-bsearch/
5
+ # ---
6
+ #
7
+ # Ruby/Bsearch - a binary search library for Ruby.
8
+ #
9
+ # Copyright (C) 2001 Satoru Takabayashi <satoru@namazu.org>
10
+ # All rights reserved.
11
+ # This is free software with ABSOLUTELY NO WARRANTY.
12
+ #
13
+ # You can redistribute it and/or modify it under the terms of
14
+ # the Ruby's licence.
15
+ #
16
+ # Example:
17
+ #
18
+ # % irb -r ./bsearch.rb
19
+ # >> %w(a b c c c d e f).bsearch_first {|x| x <=> "c"}
20
+ # => 2
21
+ # >> %w(a b c c c d e f).bsearch_last {|x| x <=> "c"}
22
+ # => 4
23
+ # >> %w(a b c e f).bsearch_first {|x| x <=> "c"}
24
+ # => 2
25
+ # >> %w(a b e f).bsearch_first {|x| x <=> "c"}
26
+ # => nil
27
+ # >> %w(a b e f).bsearch_last {|x| x <=> "c"}
28
+ # => nil
29
+ # >> %w(a b e f).bsearch_lower_boundary {|x| x <=> "c"}
30
+ # => 2
31
+ # >> %w(a b e f).bsearch_upper_boundary {|x| x <=> "c"}
32
+ # => 2
33
+ # >> %w(a b c c c d e f).bsearch_range {|x| x <=> "c"}
34
+ # => 2...5
35
+ # >> %w(a b c d e f).bsearch_range {|x| x <=> "c"}
36
+ # => 2...3
37
+ # >> %w(a b d e f).bsearch_range {|x| x <=> "c"}
38
+ # => 2...2
39
+ #
40
+ # The binary search algorithm is extracted from Jon Bentley's
41
+ # Programming Pearls 2nd ed. p.93
42
+ #
43
+ module BinarySearch
44
+ VERSION = '1.5'
45
+
46
+ module_function
47
+
48
+ #
49
+ # Return the lower boundary. (inside)
50
+ #
51
+ def search_lower_boundary(array, range=nil, &block)
52
+ range = 0 ... array.length if range == nil
53
+
54
+ lower = range.first() -1
55
+ upper = if range.exclude_end? then range.last else range.last + 1 end
56
+ while lower + 1 != upper
57
+ mid = ((lower + upper) / 2).to_i # for working with mathn.rb (Rational)
58
+ if yield(array[mid]) < 0
59
+ lower = mid
60
+ else
61
+ upper = mid
62
+ end
63
+ end
64
+ return upper
65
+ end
66
+
67
+ #
68
+ # This method searches the FIRST occurrence which satisfies a
69
+ # condition given by a block in binary fashion and return the
70
+ # index of the first occurrence. Return nil if not found.
71
+ #
72
+ def search_first(array, range=nil, &block)
73
+ boundary = search_lower_boundary(array, range, &block)
74
+ if boundary >= array.length || yield(array[boundary]) != 0
75
+ return nil
76
+ else
77
+ return boundary
78
+ end
79
+ end
80
+
81
+ #
82
+ # Return the upper boundary. (outside)
83
+ #
84
+ def search_upper_boundary(array, range=nil, &block)
85
+ range = 0 ... array.length if range == nil
86
+
87
+ lower = range.first() -1
88
+ upper = if range.exclude_end? then range.last else range.last + 1 end
89
+ while lower + 1 != upper
90
+ mid = ((lower + upper) / 2).to_i # for working with mathn.rb (Rational)
91
+ if yield(array[mid]) <= 0
92
+ lower = mid
93
+ else
94
+ upper = mid
95
+ end
96
+ end
97
+ return lower + 1 # outside of the matching range.
98
+ end
99
+
100
+ #
101
+ # This method searches the LAST occurrence which satisfies a
102
+ # condition given by a block in binary fashion and return the
103
+ # index of the last occurrence. Return nil if not found.
104
+ #
105
+ def search_last(array, range=nil, &block)
106
+ # `- 1' for canceling `lower + 1' in bsearch_upper_boundary.
107
+ boundary = search_upper_boundary(array, range, &block) - 1
108
+
109
+ if (boundary <= -1 || yield(array[boundary]) != 0)
110
+ return nil
111
+ else
112
+ return boundary
113
+ end
114
+ end
115
+
116
+ #
117
+ # Return the search result as a Range object.
118
+ #
119
+ def search_range(array, range=nil, &block)
120
+ lower = search_lower_boundary(array, range, &block)
121
+ upper = search_upper_boundary(array, range, &block)
122
+ return lower ... upper
123
+ end
124
+ end
125
+ end
126
+ end
data/lib/ms.rb CHANGED
@@ -1,10 +1,10 @@
1
-
2
-
3
- module MS
4
- attr_accessor :spectra
5
-
6
- # should
7
- def new(file=nil)
8
- end
9
-
10
- end
1
+ module Ms
2
+ module_function
3
+
4
+ # def parse(format, path)
5
+ # const = Tap::Env.instance.search(:formats, format)
6
+ # raise ArgumentError, "unknown format: #{format}" unless const
7
+ # const.constantize.parse(path)
8
+ # end
9
+
10
+ end