mspire 0.4.9 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (255) hide show
  1. data/README +27 -17
  2. data/changelog.txt +31 -62
  3. data/lib/ms/calc.rb +32 -0
  4. data/lib/ms/data/interleaved.rb +60 -0
  5. data/lib/ms/data/lazy_io.rb +73 -0
  6. data/lib/ms/data/lazy_string.rb +15 -0
  7. data/lib/ms/data/simple.rb +59 -0
  8. data/lib/ms/data/transposed.rb +41 -0
  9. data/lib/ms/data.rb +57 -0
  10. data/lib/ms/format/format_error.rb +12 -0
  11. data/lib/ms/spectrum.rb +25 -384
  12. data/lib/ms/support/binary_search.rb +126 -0
  13. data/lib/ms.rb +10 -10
  14. metadata +38 -350
  15. data/INSTALL +0 -58
  16. data/README.rdoc +0 -18
  17. data/Rakefile +0 -330
  18. data/bin/aafreqs.rb +0 -23
  19. data/bin/bioworks2excel.rb +0 -14
  20. data/bin/bioworks_to_pepxml.rb +0 -148
  21. data/bin/bioworks_to_pepxml_gui.rb +0 -225
  22. data/bin/fasta_shaker.rb +0 -5
  23. data/bin/filter_and_validate.rb +0 -5
  24. data/bin/gi2annot.rb +0 -14
  25. data/bin/id_class_anal.rb +0 -112
  26. data/bin/id_precision.rb +0 -172
  27. data/bin/ms_to_lmat.rb +0 -67
  28. data/bin/pepproph_filter.rb +0 -16
  29. data/bin/prob_validate.rb +0 -6
  30. data/bin/protein_summary.rb +0 -6
  31. data/bin/protxml2prots_peps.rb +0 -32
  32. data/bin/raw_to_mzXML.rb +0 -55
  33. data/bin/run_percolator.rb +0 -122
  34. data/bin/sqt_group.rb +0 -26
  35. data/bin/srf_group.rb +0 -27
  36. data/bin/srf_to_sqt.rb +0 -40
  37. data/lib/align/chams.rb +0 -78
  38. data/lib/align.rb +0 -154
  39. data/lib/archive/targz.rb +0 -94
  40. data/lib/bsearch.rb +0 -120
  41. data/lib/core_extensions.rb +0 -16
  42. data/lib/fasta.rb +0 -626
  43. data/lib/gi.rb +0 -124
  44. data/lib/group_by.rb +0 -10
  45. data/lib/index_by.rb +0 -11
  46. data/lib/merge_deep.rb +0 -21
  47. data/lib/ms/converter/mzxml.rb +0 -77
  48. data/lib/ms/gradient_program.rb +0 -170
  49. data/lib/ms/msrun.rb +0 -244
  50. data/lib/ms/msrun_index.rb +0 -108
  51. data/lib/ms/parser/mzdata/axml.rb +0 -67
  52. data/lib/ms/parser/mzdata/dom.rb +0 -175
  53. data/lib/ms/parser/mzdata/libxml.rb +0 -7
  54. data/lib/ms/parser/mzdata.rb +0 -31
  55. data/lib/ms/parser/mzxml/axml.rb +0 -70
  56. data/lib/ms/parser/mzxml/dom.rb +0 -182
  57. data/lib/ms/parser/mzxml/hpricot.rb +0 -253
  58. data/lib/ms/parser/mzxml/libxml.rb +0 -19
  59. data/lib/ms/parser/mzxml/regexp.rb +0 -122
  60. data/lib/ms/parser/mzxml/rexml.rb +0 -72
  61. data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
  62. data/lib/ms/parser/mzxml.rb +0 -282
  63. data/lib/ms/parser.rb +0 -108
  64. data/lib/ms/precursor.rb +0 -25
  65. data/lib/ms/scan.rb +0 -81
  66. data/lib/mspire.rb +0 -4
  67. data/lib/pi_zero.rb +0 -244
  68. data/lib/qvalue.rb +0 -161
  69. data/lib/roc.rb +0 -187
  70. data/lib/sample_enzyme.rb +0 -160
  71. data/lib/scan_i.rb +0 -21
  72. data/lib/spec_id/aa_freqs.rb +0 -170
  73. data/lib/spec_id/bioworks.rb +0 -497
  74. data/lib/spec_id/digestor.rb +0 -138
  75. data/lib/spec_id/mass.rb +0 -179
  76. data/lib/spec_id/parser/proph.rb +0 -335
  77. data/lib/spec_id/precision/filter/cmdline.rb +0 -218
  78. data/lib/spec_id/precision/filter/interactive.rb +0 -134
  79. data/lib/spec_id/precision/filter/output.rb +0 -148
  80. data/lib/spec_id/precision/filter.rb +0 -637
  81. data/lib/spec_id/precision/output.rb +0 -60
  82. data/lib/spec_id/precision/prob/cmdline.rb +0 -160
  83. data/lib/spec_id/precision/prob/output.rb +0 -94
  84. data/lib/spec_id/precision/prob.rb +0 -249
  85. data/lib/spec_id/proph/pep_summary.rb +0 -104
  86. data/lib/spec_id/proph/prot_summary.rb +0 -484
  87. data/lib/spec_id/proph.rb +0 -4
  88. data/lib/spec_id/protein_summary.rb +0 -489
  89. data/lib/spec_id/sequest/params.rb +0 -316
  90. data/lib/spec_id/sequest/pepxml.rb +0 -1458
  91. data/lib/spec_id/sequest.rb +0 -33
  92. data/lib/spec_id/sqt.rb +0 -349
  93. data/lib/spec_id/srf.rb +0 -973
  94. data/lib/spec_id.rb +0 -778
  95. data/lib/spec_id_xml.rb +0 -99
  96. data/lib/transmem/phobius.rb +0 -147
  97. data/lib/transmem/toppred.rb +0 -368
  98. data/lib/transmem.rb +0 -157
  99. data/lib/validator/aa.rb +0 -48
  100. data/lib/validator/aa_est.rb +0 -112
  101. data/lib/validator/background.rb +0 -77
  102. data/lib/validator/bias.rb +0 -95
  103. data/lib/validator/cmdline.rb +0 -431
  104. data/lib/validator/decoy.rb +0 -107
  105. data/lib/validator/digestion_based.rb +0 -70
  106. data/lib/validator/probability.rb +0 -51
  107. data/lib/validator/prot_from_pep.rb +0 -234
  108. data/lib/validator/q_value.rb +0 -32
  109. data/lib/validator/transmem.rb +0 -272
  110. data/lib/validator/true_pos.rb +0 -46
  111. data/lib/validator.rb +0 -197
  112. data/lib/xml.rb +0 -38
  113. data/lib/xml_style_parser.rb +0 -119
  114. data/lib/xmlparser_wrapper.rb +0 -19
  115. data/release_notes.txt +0 -2
  116. data/script/compile_and_plot_smriti_final.rb +0 -97
  117. data/script/create_little_pepxml.rb +0 -61
  118. data/script/degenerate_peptides.rb +0 -47
  119. data/script/estimate_fpr_by_cysteine.rb +0 -226
  120. data/script/extract_gradient_programs.rb +0 -56
  121. data/script/find_cysteine_background.rb +0 -137
  122. data/script/genuine_tps_and_probs.rb +0 -136
  123. data/script/get_apex_values_rexml.rb +0 -44
  124. data/script/histogram_probs.rb +0 -61
  125. data/script/mascot_fix_pepxml.rb +0 -123
  126. data/script/msvis.rb +0 -42
  127. data/script/mzXML2timeIndex.rb +0 -25
  128. data/script/peps_per_bin.rb +0 -67
  129. data/script/prep_dir.rb +0 -121
  130. data/script/simple_protein_digestion.rb +0 -27
  131. data/script/smriti_final_analysis.rb +0 -103
  132. data/script/sqt_to_meta.rb +0 -24
  133. data/script/top_hit_per_scan.rb +0 -67
  134. data/script/toppred_to_yaml.rb +0 -47
  135. data/script/tpp_installer.rb +0 -249
  136. data/specs/align_spec.rb +0 -79
  137. data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
  138. data/specs/bin/fasta_shaker_spec.rb +0 -259
  139. data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
  140. data/specs/bin/filter_and_validate_spec.rb +0 -180
  141. data/specs/bin/ms_to_lmat_spec.rb +0 -34
  142. data/specs/bin/prob_validate_spec.rb +0 -86
  143. data/specs/bin/protein_summary_spec.rb +0 -14
  144. data/specs/fasta_spec.rb +0 -354
  145. data/specs/gi_spec.rb +0 -22
  146. data/specs/load_bin_path.rb +0 -7
  147. data/specs/merge_deep_spec.rb +0 -13
  148. data/specs/ms/gradient_program_spec.rb +0 -77
  149. data/specs/ms/msrun_spec.rb +0 -498
  150. data/specs/ms/parser_spec.rb +0 -92
  151. data/specs/ms/spectrum_spec.rb +0 -87
  152. data/specs/pi_zero_spec.rb +0 -115
  153. data/specs/qvalue_spec.rb +0 -39
  154. data/specs/roc_spec.rb +0 -251
  155. data/specs/rspec_autotest.rb +0 -149
  156. data/specs/sample_enzyme_spec.rb +0 -126
  157. data/specs/spec_helper.rb +0 -135
  158. data/specs/spec_id/aa_freqs_spec.rb +0 -52
  159. data/specs/spec_id/bioworks_spec.rb +0 -148
  160. data/specs/spec_id/digestor_spec.rb +0 -75
  161. data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
  162. data/specs/spec_id/precision/filter/output_spec.rb +0 -31
  163. data/specs/spec_id/precision/filter_spec.rb +0 -246
  164. data/specs/spec_id/precision/prob_spec.rb +0 -44
  165. data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
  166. data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
  167. data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
  168. data/specs/spec_id/protein_summary_spec.rb +0 -189
  169. data/specs/spec_id/sequest/params_spec.rb +0 -68
  170. data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
  171. data/specs/spec_id/sequest_spec.rb +0 -38
  172. data/specs/spec_id/sqt_spec.rb +0 -246
  173. data/specs/spec_id/srf_spec.rb +0 -172
  174. data/specs/spec_id/srf_spec_helper.rb +0 -139
  175. data/specs/spec_id_helper.rb +0 -33
  176. data/specs/spec_id_spec.rb +0 -366
  177. data/specs/spec_id_xml_spec.rb +0 -33
  178. data/specs/transmem/phobius_spec.rb +0 -425
  179. data/specs/transmem/toppred_spec.rb +0 -298
  180. data/specs/transmem_spec.rb +0 -60
  181. data/specs/transmem_spec_shared.rb +0 -64
  182. data/specs/validator/aa_est_spec.rb +0 -66
  183. data/specs/validator/aa_spec.rb +0 -40
  184. data/specs/validator/background_spec.rb +0 -67
  185. data/specs/validator/bias_spec.rb +0 -122
  186. data/specs/validator/decoy_spec.rb +0 -51
  187. data/specs/validator/fasta_helper.rb +0 -26
  188. data/specs/validator/prot_from_pep_spec.rb +0 -141
  189. data/specs/validator/transmem_spec.rb +0 -146
  190. data/specs/validator/true_pos_spec.rb +0 -58
  191. data/specs/validator_helper.rb +0 -33
  192. data/specs/xml_spec.rb +0 -12
  193. data/test_files/000_pepxml18_small.xml +0 -206
  194. data/test_files/020a.mzXML.timeIndex +0 -4710
  195. data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
  196. data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
  197. data/test_files/4-03-03_small-prot.xml +0 -321
  198. data/test_files/4-03-03_small.xml +0 -3876
  199. data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
  200. data/test_files/bioworks-3.3_10prots.xml +0 -5999
  201. data/test_files/bioworks31.params +0 -77
  202. data/test_files/bioworks32.params +0 -62
  203. data/test_files/bioworks33.params +0 -63
  204. data/test_files/bioworks_single_run_small.xml +0 -7237
  205. data/test_files/bioworks_small.fasta +0 -212
  206. data/test_files/bioworks_small.params +0 -63
  207. data/test_files/bioworks_small.phobius +0 -109
  208. data/test_files/bioworks_small.toppred.out +0 -2847
  209. data/test_files/bioworks_small.xml +0 -5610
  210. data/test_files/bioworks_with_INV_small.xml +0 -3753
  211. data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
  212. data/test_files/corrupted_900.srf +0 -0
  213. data/test_files/head_of_7MIX.srf +0 -0
  214. data/test_files/interact-opd1_mods_small-prot.xml +0 -304
  215. data/test_files/messups.fasta +0 -297
  216. data/test_files/opd1/000.my_answer.100lines.xml +0 -101
  217. data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
  218. data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
  219. data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
  220. data/test_files/opd1/000_020-prot.png +0 -0
  221. data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
  222. data/test_files/opd1/000_020_3prots-prot.xml +0 -62
  223. data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
  224. data/test_files/opd1/sequest.3.1.params +0 -77
  225. data/test_files/opd1/sequest.3.2.params +0 -62
  226. data/test_files/opd1/twenty_scans.mzXML +0 -418
  227. data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
  228. data/test_files/opd1/twenty_scans_answ.lmat +0 -0
  229. data/test_files/opd1/twenty_scans_answ.lmata +0 -9
  230. data/test_files/opd1_020_beginning.RAW +0 -0
  231. data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
  232. data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
  233. data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
  234. data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
  235. data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
  236. data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
  237. data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
  238. data/test_files/pepproph_small.xml +0 -4691
  239. data/test_files/phobius.small.noheader.txt +0 -50
  240. data/test_files/phobius.small.small.txt +0 -53
  241. data/test_files/s01_anC1_ld020mM.key.txt +0 -25
  242. data/test_files/s01_anC1_ld020mM.meth +0 -0
  243. data/test_files/small.fasta +0 -297
  244. data/test_files/small.sqt +0 -87
  245. data/test_files/smallraw.RAW +0 -0
  246. data/test_files/tf_bioworks2excel.bioXML +0 -14340
  247. data/test_files/tf_bioworks2excel.txt.actual +0 -1035
  248. data/test_files/toppred.small.out +0 -416
  249. data/test_files/toppred.xml.out +0 -318
  250. data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
  251. data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
  252. data/test_files/yeast_gly_small-prot.xml +0 -265
  253. data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
  254. data/test_files/yeast_gly_small.xml +0 -3807
  255. data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/ms/spectrum.rb CHANGED
@@ -1,384 +1,25 @@
1
- require 'base64'
2
- require 'bsearch'
3
- require 'ms'
4
-
5
- class MS::Spectrum
6
-
7
- Unpack_network_float = 'g*'
8
- Unpack_network_double = 'G*'
9
- Unpack_little_endian_float = 'e*'
10
- Unpack_little_endian_double = 'E*'
11
-
12
- # m/z's
13
- attr_accessor :mzs
14
- # intensities
15
- attr_accessor :intensities
16
-
17
- #######################
18
- ## CLASS METHODS:
19
- #######################
20
-
21
- # an already decoded string (ready to be unpacked as floating point numbers)
22
- def self.string_to_array(string, precision=32, network_order=true)
23
- unpack_code =
24
- if network_order
25
- if precision == 32
26
- Unpack_network_float
27
- elsif precision == 64
28
- Unpack_network_double
29
- end
30
- else ## little endian
31
- if precision == 32
32
- Unpack_little_endian_float
33
- elsif precision == 64
34
- Unpack_little_endian_double
35
- end
36
- end
37
- string.unpack(unpack_code)
38
- end
39
-
40
- # takes a base64 string and returns an array
41
- def self.base64_to_array(b64_string, precision=32, network_order=true)
42
- self.string_to_array(Base64.decode64(b64_string), precision, network_order)
43
- end
44
-
45
-
46
- def self.mzs_and_intensities_from_base64_peaks(b64_string, precision=32, network_order=true)
47
- data = base64_to_array(b64_string, precision, network_order)
48
- sz = data.size/2
49
- mz_ar = Array.new(sz)
50
- intensity_ar = Array.new(sz)
51
- ndata = []
52
- my_ind = 0
53
- data.each_with_index do |dat,ind|
54
- if (ind % 2) == 0 # even
55
- mz_ar[my_ind] = dat
56
- else
57
- intensity_ar[my_ind] = dat
58
- my_ind += 1
59
- end
60
- end
61
- [mz_ar, intensity_ar]
62
- end
63
-
64
- # takes a base64 peaks string and sets spectrum
65
- # returns self for chaining
66
- def self.from_base64_peaks(b64_string, precision=32, network_order=true)
67
- (mz_ar, intensity_ar) = self.mzs_and_intensities_from_base64_peaks(b64_string, precision, network_order)
68
- self.new(mz_ar, intensity_ar)
69
- end
70
-
71
- def self.from_base64_pair(mz_string, mz_precision, mz_network_order, intensity_string, intensity_precision, intensity_network_order)
72
- mz_ar = base64_to_array(mz_string, mz_precision, mz_network_order)
73
- inten_ar = base64_to_array(intensity_string, intensity_precision, intensity_network_order)
74
- self.new(mz_ar, inten_ar)
75
- end
76
-
77
- def initialize(mz_ar=[], intensity_ar=[])
78
- @mzs = mz_ar
79
- @intensities = intensity_ar
80
- end
81
-
82
- def has_mz_data?
83
- @mzs && (@mzs.size > 0) && (@mzs.first.is_a?(Numeric))
84
- end
85
-
86
- def has_intensity_data?
87
- @intensities && (@intensities.size > 0) && (@intensities.first.is_a?(Numeric))
88
- end
89
-
90
- # returns the index of the first value matching that m/z. the argument m/z
91
- # may be less precise than the actual m/z (rounding to the same precision
92
- # given) but must be at least integer precision (after rounding)
93
- # implemented as binary search (bsearch from the web)
94
- def index(mz)
95
- mz_ar = mzs
96
- return_val = nil
97
- ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
98
- if mz_ar[ind] == mz
99
- return_val = ind
100
- else
101
- # do a rounding game to see which one is it, or nil
102
- # find all the values rounding to the same integer in the locale
103
- # test each one fully in turn
104
- mz = mz.to_f
105
- mz_size = mz_ar.size
106
- if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
107
- return_val = ind
108
- else # run the loop
109
- up = ind
110
- loop do
111
- up += 1
112
- if up >= mz_size
113
- break
114
- end
115
- mz_up = mz_ar[up]
116
- if (mz_up.ceil - mz.ceil >= 2)
117
- break
118
- else
119
- if equal_after_rounding?(mz_up, mz)
120
- return_val = up
121
- return return_val
122
- end
123
- end
124
- end
125
- dn= ind
126
- loop do
127
- dn -= 1
128
- if dn < 0
129
- break
130
- end
131
- mz_dn = mz_ar[dn]
132
- if (mz.floor - mz_dn.floor >= 2)
133
- break
134
- else
135
- if equal_after_rounding?(mz_dn, mz)
136
- return_val = dn
137
- return return_val
138
- end
139
- end
140
- end
141
- end
142
- end
143
- return_val
144
- end
145
-
146
- # uses index function and returns the intensity at that value
147
- def intensity_at_mz(mz)
148
- if x = index(mz)
149
- intensities[x]
150
- else
151
- nil
152
- end
153
- end
154
-
155
- # less_precise should be a float
156
- # precise should be a float
157
- def equal_after_rounding?(precise, less_precise)
158
- # determine the precision of less_precise
159
- exp10 = precision_as_neg_int(less_precise)
160
- #puts "EXP10: #{exp10}"
161
- answ = ((precise*exp10).round == (less_precise*exp10).round)
162
- #puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
163
- #puts answ
164
- (precise*exp10).round == (less_precise*exp10).round
165
- end
166
-
167
- # returns 1 for ones place, 10 for tenths, 100 for hundredths
168
- # to a precision exceeding 1e-6
169
- def precision_as_neg_int(float)
170
- neg_exp10 = 1
171
- loop do
172
- over = float * neg_exp10
173
- rounded = over.round
174
- if (over - rounded).abs <= 1e-6
175
- break
176
- end
177
- neg_exp10 *= 10
178
- end
179
- neg_exp10
180
- end
181
-
182
- ######
183
- # NOT REALLY USING RIGHT NOW:
184
- ######
185
-
186
- # takes a base64 peaks string and returns an array of [m/z,intensity] doublets
187
- # mzXML as network ordered
188
- def base64_peaks_to_pairs(string, precision=32)
189
- data = base64_peaks_to_array(string, precision)
190
- ndata = []
191
- data.each_with_index do |dat,ind|
192
- if (ind % 2) == 0 # even
193
- arr = Array.new(2)
194
- arr[0] = dat
195
- ndata.push( arr )
196
- else
197
- ndata.last[1] = dat
198
- end
199
- end
200
- ndata
201
- end
202
-
203
- end
204
-
205
- # This implements a spectrum that stores itself as string data and only
206
- # evaluates the information when it is called
207
- class MS::Spectrum::LazyString < MS::Spectrum
208
-
209
- undef mzs=
210
- undef intensities=
211
-
212
- # beware that this converts the information in @mz_string every time it is
213
- # called
214
- def mzs
215
- MS::Spectrum.string_to_array(@mz_string, @mz_precision, @mz_network_order)
216
- end
217
-
218
- # beware that this converts the information in @intensity_string every time
219
- # it is
220
- def intensities
221
- MS::Spectrum.string_to_array(@intensity_string, @intensity_precision, @intensity_network_order)
222
- end
223
-
224
- # this takes a decoded base64 string that is then interpreted when
225
- # information is accessed
226
- def initialize(mz_string, mz_precision, mz_network_order, intensity_string, intensity_precision, intensity_network_order)
227
- @mz_string = mz_string
228
- @mz_precision = mz_precision
229
- @mz_network_order = mz_network_order
230
- @intensity_string = intensity_string
231
- @intensity_precision = intensity_precision
232
- @intensity_network_order = intensity_network_order
233
- end
234
-
235
- # from mzXML files where information is held in peaks (m/z, intensity,
236
- # m/z...)
237
- def self.from_base64_peaks(b64_string, precision=32, network_order=true)
238
- # decode
239
- string = Base64.decode64(b64_string)
240
- # split into two strings:
241
- bytes_per_number = precision / 8
242
- s_size = string.size
243
- num_numbers = s_size / bytes_per_number
244
- mz_pieces = Array.new(num_numbers)
245
- intensity_pieces = Array.new(num_numbers)
246
- index = 0
247
- (0...string.size).step(bytes_per_number) do |i|
248
- if index % 2 == 0
249
- mz_pieces[index] = string[i,bytes_per_number]
250
- else
251
- intensity_pieces[index] = string[i,bytes_per_number]
252
- end
253
- index += 1
254
- end
255
- self.new(mz_pieces.join, precision, network_order, intensity_pieces.join, precision, network_order)
256
- end
257
-
258
- # from mzML and mzData style files where mz and intensity information are
259
- # kept in different strings.
260
- def self.from_base64_pair(b64_mz_string, mz_precision, mz_network_order, b64_intensity_string, intensity_precision, intensity_network_order)
261
- self.new(Base64.decode64(b64_mz_string), mz_precision, mz_network_order, Base64.decode64(b64_intensity_string), intensity_precision, intensity_network_order)
262
- end
263
-
264
- def has_mz_data?
265
- @mz_string.is_a?(String) && @mz_precision && !@mz_network_order.nil?
266
- end
267
-
268
- def has_intensity_data?
269
- @intensity_string.is_a?(String) && @intensity_precision && !@intensity_network_order.nil?
270
- end
271
-
272
- end
273
-
274
- module MS::Spectrum::LazyIO
275
- def self.new(*args)
276
- if args.size == 5 # mzXMl
277
- MS::Spectrum::LazyIO::Peaks.new(*args)
278
- elsif args.size == 9 # other
279
- MS::Spectrum::LazyIO::Pair.new(*args)
280
- else
281
- raise RunTimeError, "must give 5 or 7 args for peak data and pair data respectively"
282
- end
283
- end
284
- end
285
-
286
-
287
- # stores an io object and the start and end indices and only evaluates the
288
- # spectrum when information is requested
289
- class MS::Spectrum::LazyIO::Pair < MS::Spectrum
290
- include MS::Spectrum::LazyIO
291
-
292
- undef mzs=
293
- undef intensities=
294
-
295
- def initialize(io, mz_start_index, mz_num_bytes, mz_precision, mz_network_order, intensity_start_index, intensity_num_bytes, intensity_precision, intensity_network_order)
296
- @io = io
297
-
298
- @mz_start_index = mz_start_index
299
- @mz_num_bytes = mz_num_bytes
300
- @mz_precision = mz_precision
301
- @mz_network_order = mz_network_order
302
-
303
- @intensity_start_index = intensity_start_index
304
- @intensity_num_bytes = intensity_num_bytes
305
- @intensity_precision = intensity_precision
306
- @intensity_network_order = intensity_network_order
307
-
308
- end
309
-
310
- # beware that this converts the information on disk every time it is called.
311
- def mzs
312
- @io.pos = @mz_start_index
313
- b64_string = @io.read(@mz_num_bytes)
314
- MS::Spectrum.base64_to_array(b64_string, @mz_precision, @mz_network_order)
315
- end
316
-
317
- # beware that this converts the information in @intensity_string every time
318
- # it is called.
319
- def intensities
320
- @io.pos = @intensity_start_index
321
- b64_string = @io.read(@intensity_num_bytes)
322
- MS::Spectrum.base64_to_array(b64_string, @intensity_precision, @intensity_network_order)
323
- end
324
-
325
- def has_mz_data?
326
- (!@io.closed?) && @mz_start_index && @mz_num_bytes && @mz_precision && !@mz_network_order.nil?
327
- end
328
-
329
- def has_intensity_data?
330
- (!@io.closed?) && @intensity_start_index && @intensity_num_bytes && @intensity_precision && !@intensity_network_order.nil?
331
- end
332
-
333
- end
334
-
335
- class MS::Spectrum::LazyIO::Peaks < MS::Spectrum
336
- include MS::Spectrum::LazyIO
337
-
338
- undef mzs=
339
- undef intensities=
340
-
341
- def initialize(io, start_index, num_bytes, precision, network_order)
342
- @io = io
343
- @start_index = start_index
344
- @num_bytes = num_bytes
345
- @precision = precision
346
- @network_order = network_order
347
- end
348
-
349
- # returns two arrays: an array of m/z values and an array of intensity
350
- # values. This is the preferred way to access mzXML file information under
351
- # lazy evaluation
352
- def mzs_and_intensities
353
- @io.pos = @start_index
354
- b64_string = @io.read(@num_bytes)
355
- MS::Spectrum.mzs_and_intensities_from_base64_peaks(b64_string, @precision, @network_order)
356
- end
357
-
358
- # when using 'io' lazy evaluation on files with m/z and intensity data
359
- # interwoven (i.e., mzXML) it is more efficient to call 'mzs_and_intensities'
360
- # if you are using both mz and intensity data.
361
- def mzs
362
- # TODO: this can be made slightly faster
363
- mzs_and_intensities.first
364
- end
365
-
366
- # when using 'io' lazy evaluation on files with m/z and intensity data
367
- # interwoven (i.e., mzXML) it is more efficient to call
368
- # 'mzs_and_intensities'
369
- # if you are using both mz and intensity data.
370
- def intensities
371
- # TODO: this can be made slightly faster
372
- mzs_and_intensities.last
373
- end
374
-
375
-
376
- def has_mz_data?
377
- (!@io.closed?) && @start_index && @num_bytes && @precision && !@network_order.nil?
378
- end
379
-
380
- def has_intensity_data?
381
- (!@io.closed?) && @start_index && @num_bytes && @precision && !@network_order.nil?
382
- end
383
-
384
- end
1
+ module Ms
2
+ class Spectrum
3
+ # The underlying data store.
4
+ attr_reader :data
5
+
6
+ # Associated headers
7
+ attr_reader :headers
8
+
9
+ def initialize(data, headers={})
10
+ @data = data
11
+ @headers = headers
12
+ end
13
+
14
+ # An array of the mz data.
15
+ def mzs
16
+ @data[0]
17
+ end
18
+
19
+ # An array of the intensities data, corresponding to mzs.
20
+ def intensities
21
+ @data[1]
22
+ end
23
+
24
+ end
25
+ end
@@ -0,0 +1,126 @@
1
+ module Ms
2
+ module Support
3
+
4
+ # A binary search library adapted from: http://0xcc.net/ruby-bsearch/
5
+ # ---
6
+ #
7
+ # Ruby/Bsearch - a binary search library for Ruby.
8
+ #
9
+ # Copyright (C) 2001 Satoru Takabayashi <satoru@namazu.org>
10
+ # All rights reserved.
11
+ # This is free software with ABSOLUTELY NO WARRANTY.
12
+ #
13
+ # You can redistribute it and/or modify it under the terms of
14
+ # the Ruby's licence.
15
+ #
16
+ # Example:
17
+ #
18
+ # % irb -r ./bsearch.rb
19
+ # >> %w(a b c c c d e f).bsearch_first {|x| x <=> "c"}
20
+ # => 2
21
+ # >> %w(a b c c c d e f).bsearch_last {|x| x <=> "c"}
22
+ # => 4
23
+ # >> %w(a b c e f).bsearch_first {|x| x <=> "c"}
24
+ # => 2
25
+ # >> %w(a b e f).bsearch_first {|x| x <=> "c"}
26
+ # => nil
27
+ # >> %w(a b e f).bsearch_last {|x| x <=> "c"}
28
+ # => nil
29
+ # >> %w(a b e f).bsearch_lower_boundary {|x| x <=> "c"}
30
+ # => 2
31
+ # >> %w(a b e f).bsearch_upper_boundary {|x| x <=> "c"}
32
+ # => 2
33
+ # >> %w(a b c c c d e f).bsearch_range {|x| x <=> "c"}
34
+ # => 2...5
35
+ # >> %w(a b c d e f).bsearch_range {|x| x <=> "c"}
36
+ # => 2...3
37
+ # >> %w(a b d e f).bsearch_range {|x| x <=> "c"}
38
+ # => 2...2
39
+ #
40
+ # The binary search algorithm is extracted from Jon Bentley's
41
+ # Programming Pearls 2nd ed. p.93
42
+ #
43
+ module BinarySearch
44
+ VERSION = '1.5'
45
+
46
+ module_function
47
+
48
+ #
49
+ # Return the lower boundary. (inside)
50
+ #
51
+ def search_lower_boundary(array, range=nil, &block)
52
+ range = 0 ... array.length if range == nil
53
+
54
+ lower = range.first() -1
55
+ upper = if range.exclude_end? then range.last else range.last + 1 end
56
+ while lower + 1 != upper
57
+ mid = ((lower + upper) / 2).to_i # for working with mathn.rb (Rational)
58
+ if yield(array[mid]) < 0
59
+ lower = mid
60
+ else
61
+ upper = mid
62
+ end
63
+ end
64
+ return upper
65
+ end
66
+
67
+ #
68
+ # This method searches the FIRST occurrence which satisfies a
69
+ # condition given by a block in binary fashion and return the
70
+ # index of the first occurrence. Return nil if not found.
71
+ #
72
+ def search_first(array, range=nil, &block)
73
+ boundary = search_lower_boundary(array, range, &block)
74
+ if boundary >= array.length || yield(array[boundary]) != 0
75
+ return nil
76
+ else
77
+ return boundary
78
+ end
79
+ end
80
+
81
+ #
82
+ # Return the upper boundary. (outside)
83
+ #
84
+ def search_upper_boundary(array, range=nil, &block)
85
+ range = 0 ... array.length if range == nil
86
+
87
+ lower = range.first() -1
88
+ upper = if range.exclude_end? then range.last else range.last + 1 end
89
+ while lower + 1 != upper
90
+ mid = ((lower + upper) / 2).to_i # for working with mathn.rb (Rational)
91
+ if yield(array[mid]) <= 0
92
+ lower = mid
93
+ else
94
+ upper = mid
95
+ end
96
+ end
97
+ return lower + 1 # outside of the matching range.
98
+ end
99
+
100
+ #
101
+ # This method searches the LAST occurrence which satisfies a
102
+ # condition given by a block in binary fashion and return the
103
+ # index of the last occurrence. Return nil if not found.
104
+ #
105
+ def search_last(array, range=nil, &block)
106
+ # `- 1' for canceling `lower + 1' in bsearch_upper_boundary.
107
+ boundary = search_upper_boundary(array, range, &block) - 1
108
+
109
+ if (boundary <= -1 || yield(array[boundary]) != 0)
110
+ return nil
111
+ else
112
+ return boundary
113
+ end
114
+ end
115
+
116
+ #
117
+ # Return the search result as a Range object.
118
+ #
119
+ def search_range(array, range=nil, &block)
120
+ lower = search_lower_boundary(array, range, &block)
121
+ upper = search_upper_boundary(array, range, &block)
122
+ return lower ... upper
123
+ end
124
+ end
125
+ end
126
+ end
data/lib/ms.rb CHANGED
@@ -1,10 +1,10 @@
1
-
2
-
3
- module MS
4
- attr_accessor :spectra
5
-
6
- # should
7
- def new(file=nil)
8
- end
9
-
10
- end
1
+ module Ms
2
+ module_function
3
+
4
+ # def parse(format, path)
5
+ # const = Tap::Env.instance.search(:formats, format)
6
+ # raise ArgumentError, "unknown format: #{format}" unless const
7
+ # const.constantize.parse(path)
8
+ # end
9
+
10
+ end