mspire 0.4.9 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +27 -17
- data/changelog.txt +31 -62
- data/lib/ms/calc.rb +32 -0
- data/lib/ms/data/interleaved.rb +60 -0
- data/lib/ms/data/lazy_io.rb +73 -0
- data/lib/ms/data/lazy_string.rb +15 -0
- data/lib/ms/data/simple.rb +59 -0
- data/lib/ms/data/transposed.rb +41 -0
- data/lib/ms/data.rb +57 -0
- data/lib/ms/format/format_error.rb +12 -0
- data/lib/ms/spectrum.rb +25 -384
- data/lib/ms/support/binary_search.rb +126 -0
- data/lib/ms.rb +10 -10
- metadata +38 -350
- data/INSTALL +0 -58
- data/README.rdoc +0 -18
- data/Rakefile +0 -330
- data/bin/aafreqs.rb +0 -23
- data/bin/bioworks2excel.rb +0 -14
- data/bin/bioworks_to_pepxml.rb +0 -148
- data/bin/bioworks_to_pepxml_gui.rb +0 -225
- data/bin/fasta_shaker.rb +0 -5
- data/bin/filter_and_validate.rb +0 -5
- data/bin/gi2annot.rb +0 -14
- data/bin/id_class_anal.rb +0 -112
- data/bin/id_precision.rb +0 -172
- data/bin/ms_to_lmat.rb +0 -67
- data/bin/pepproph_filter.rb +0 -16
- data/bin/prob_validate.rb +0 -6
- data/bin/protein_summary.rb +0 -6
- data/bin/protxml2prots_peps.rb +0 -32
- data/bin/raw_to_mzXML.rb +0 -55
- data/bin/run_percolator.rb +0 -122
- data/bin/sqt_group.rb +0 -26
- data/bin/srf_group.rb +0 -27
- data/bin/srf_to_sqt.rb +0 -40
- data/lib/align/chams.rb +0 -78
- data/lib/align.rb +0 -154
- data/lib/archive/targz.rb +0 -94
- data/lib/bsearch.rb +0 -120
- data/lib/core_extensions.rb +0 -16
- data/lib/fasta.rb +0 -626
- data/lib/gi.rb +0 -124
- data/lib/group_by.rb +0 -10
- data/lib/index_by.rb +0 -11
- data/lib/merge_deep.rb +0 -21
- data/lib/ms/converter/mzxml.rb +0 -77
- data/lib/ms/gradient_program.rb +0 -170
- data/lib/ms/msrun.rb +0 -244
- data/lib/ms/msrun_index.rb +0 -108
- data/lib/ms/parser/mzdata/axml.rb +0 -67
- data/lib/ms/parser/mzdata/dom.rb +0 -175
- data/lib/ms/parser/mzdata/libxml.rb +0 -7
- data/lib/ms/parser/mzdata.rb +0 -31
- data/lib/ms/parser/mzxml/axml.rb +0 -70
- data/lib/ms/parser/mzxml/dom.rb +0 -182
- data/lib/ms/parser/mzxml/hpricot.rb +0 -253
- data/lib/ms/parser/mzxml/libxml.rb +0 -19
- data/lib/ms/parser/mzxml/regexp.rb +0 -122
- data/lib/ms/parser/mzxml/rexml.rb +0 -72
- data/lib/ms/parser/mzxml/xmlparser.rb +0 -248
- data/lib/ms/parser/mzxml.rb +0 -282
- data/lib/ms/parser.rb +0 -108
- data/lib/ms/precursor.rb +0 -25
- data/lib/ms/scan.rb +0 -81
- data/lib/mspire.rb +0 -4
- data/lib/pi_zero.rb +0 -244
- data/lib/qvalue.rb +0 -161
- data/lib/roc.rb +0 -187
- data/lib/sample_enzyme.rb +0 -160
- data/lib/scan_i.rb +0 -21
- data/lib/spec_id/aa_freqs.rb +0 -170
- data/lib/spec_id/bioworks.rb +0 -497
- data/lib/spec_id/digestor.rb +0 -138
- data/lib/spec_id/mass.rb +0 -179
- data/lib/spec_id/parser/proph.rb +0 -335
- data/lib/spec_id/precision/filter/cmdline.rb +0 -218
- data/lib/spec_id/precision/filter/interactive.rb +0 -134
- data/lib/spec_id/precision/filter/output.rb +0 -148
- data/lib/spec_id/precision/filter.rb +0 -637
- data/lib/spec_id/precision/output.rb +0 -60
- data/lib/spec_id/precision/prob/cmdline.rb +0 -160
- data/lib/spec_id/precision/prob/output.rb +0 -94
- data/lib/spec_id/precision/prob.rb +0 -249
- data/lib/spec_id/proph/pep_summary.rb +0 -104
- data/lib/spec_id/proph/prot_summary.rb +0 -484
- data/lib/spec_id/proph.rb +0 -4
- data/lib/spec_id/protein_summary.rb +0 -489
- data/lib/spec_id/sequest/params.rb +0 -316
- data/lib/spec_id/sequest/pepxml.rb +0 -1458
- data/lib/spec_id/sequest.rb +0 -33
- data/lib/spec_id/sqt.rb +0 -349
- data/lib/spec_id/srf.rb +0 -973
- data/lib/spec_id.rb +0 -778
- data/lib/spec_id_xml.rb +0 -99
- data/lib/transmem/phobius.rb +0 -147
- data/lib/transmem/toppred.rb +0 -368
- data/lib/transmem.rb +0 -157
- data/lib/validator/aa.rb +0 -48
- data/lib/validator/aa_est.rb +0 -112
- data/lib/validator/background.rb +0 -77
- data/lib/validator/bias.rb +0 -95
- data/lib/validator/cmdline.rb +0 -431
- data/lib/validator/decoy.rb +0 -107
- data/lib/validator/digestion_based.rb +0 -70
- data/lib/validator/probability.rb +0 -51
- data/lib/validator/prot_from_pep.rb +0 -234
- data/lib/validator/q_value.rb +0 -32
- data/lib/validator/transmem.rb +0 -272
- data/lib/validator/true_pos.rb +0 -46
- data/lib/validator.rb +0 -197
- data/lib/xml.rb +0 -38
- data/lib/xml_style_parser.rb +0 -119
- data/lib/xmlparser_wrapper.rb +0 -19
- data/release_notes.txt +0 -2
- data/script/compile_and_plot_smriti_final.rb +0 -97
- data/script/create_little_pepxml.rb +0 -61
- data/script/degenerate_peptides.rb +0 -47
- data/script/estimate_fpr_by_cysteine.rb +0 -226
- data/script/extract_gradient_programs.rb +0 -56
- data/script/find_cysteine_background.rb +0 -137
- data/script/genuine_tps_and_probs.rb +0 -136
- data/script/get_apex_values_rexml.rb +0 -44
- data/script/histogram_probs.rb +0 -61
- data/script/mascot_fix_pepxml.rb +0 -123
- data/script/msvis.rb +0 -42
- data/script/mzXML2timeIndex.rb +0 -25
- data/script/peps_per_bin.rb +0 -67
- data/script/prep_dir.rb +0 -121
- data/script/simple_protein_digestion.rb +0 -27
- data/script/smriti_final_analysis.rb +0 -103
- data/script/sqt_to_meta.rb +0 -24
- data/script/top_hit_per_scan.rb +0 -67
- data/script/toppred_to_yaml.rb +0 -47
- data/script/tpp_installer.rb +0 -249
- data/specs/align_spec.rb +0 -79
- data/specs/bin/bioworks_to_pepxml_spec.rb +0 -79
- data/specs/bin/fasta_shaker_spec.rb +0 -259
- data/specs/bin/filter_and_validate__multiple_vals_helper.yaml +0 -199
- data/specs/bin/filter_and_validate_spec.rb +0 -180
- data/specs/bin/ms_to_lmat_spec.rb +0 -34
- data/specs/bin/prob_validate_spec.rb +0 -86
- data/specs/bin/protein_summary_spec.rb +0 -14
- data/specs/fasta_spec.rb +0 -354
- data/specs/gi_spec.rb +0 -22
- data/specs/load_bin_path.rb +0 -7
- data/specs/merge_deep_spec.rb +0 -13
- data/specs/ms/gradient_program_spec.rb +0 -77
- data/specs/ms/msrun_spec.rb +0 -498
- data/specs/ms/parser_spec.rb +0 -92
- data/specs/ms/spectrum_spec.rb +0 -87
- data/specs/pi_zero_spec.rb +0 -115
- data/specs/qvalue_spec.rb +0 -39
- data/specs/roc_spec.rb +0 -251
- data/specs/rspec_autotest.rb +0 -149
- data/specs/sample_enzyme_spec.rb +0 -126
- data/specs/spec_helper.rb +0 -135
- data/specs/spec_id/aa_freqs_spec.rb +0 -52
- data/specs/spec_id/bioworks_spec.rb +0 -148
- data/specs/spec_id/digestor_spec.rb +0 -75
- data/specs/spec_id/precision/filter/cmdline_spec.rb +0 -20
- data/specs/spec_id/precision/filter/output_spec.rb +0 -31
- data/specs/spec_id/precision/filter_spec.rb +0 -246
- data/specs/spec_id/precision/prob_spec.rb +0 -44
- data/specs/spec_id/precision/prob_spec_helper.rb +0 -0
- data/specs/spec_id/proph/pep_summary_spec.rb +0 -98
- data/specs/spec_id/proph/prot_summary_spec.rb +0 -128
- data/specs/spec_id/protein_summary_spec.rb +0 -189
- data/specs/spec_id/sequest/params_spec.rb +0 -68
- data/specs/spec_id/sequest/pepxml_spec.rb +0 -374
- data/specs/spec_id/sequest_spec.rb +0 -38
- data/specs/spec_id/sqt_spec.rb +0 -246
- data/specs/spec_id/srf_spec.rb +0 -172
- data/specs/spec_id/srf_spec_helper.rb +0 -139
- data/specs/spec_id_helper.rb +0 -33
- data/specs/spec_id_spec.rb +0 -366
- data/specs/spec_id_xml_spec.rb +0 -33
- data/specs/transmem/phobius_spec.rb +0 -425
- data/specs/transmem/toppred_spec.rb +0 -298
- data/specs/transmem_spec.rb +0 -60
- data/specs/transmem_spec_shared.rb +0 -64
- data/specs/validator/aa_est_spec.rb +0 -66
- data/specs/validator/aa_spec.rb +0 -40
- data/specs/validator/background_spec.rb +0 -67
- data/specs/validator/bias_spec.rb +0 -122
- data/specs/validator/decoy_spec.rb +0 -51
- data/specs/validator/fasta_helper.rb +0 -26
- data/specs/validator/prot_from_pep_spec.rb +0 -141
- data/specs/validator/transmem_spec.rb +0 -146
- data/specs/validator/true_pos_spec.rb +0 -58
- data/specs/validator_helper.rb +0 -33
- data/specs/xml_spec.rb +0 -12
- data/test_files/000_pepxml18_small.xml +0 -206
- data/test_files/020a.mzXML.timeIndex +0 -4710
- data/test_files/4-03-03_mzXML/000.mzXML.timeIndex +0 -3973
- data/test_files/4-03-03_mzXML/020.mzXML.timeIndex +0 -3872
- data/test_files/4-03-03_small-prot.xml +0 -321
- data/test_files/4-03-03_small.xml +0 -3876
- data/test_files/7MIX_STD_110802_1.sequest_params_fragment.srf +0 -0
- data/test_files/bioworks-3.3_10prots.xml +0 -5999
- data/test_files/bioworks31.params +0 -77
- data/test_files/bioworks32.params +0 -62
- data/test_files/bioworks33.params +0 -63
- data/test_files/bioworks_single_run_small.xml +0 -7237
- data/test_files/bioworks_small.fasta +0 -212
- data/test_files/bioworks_small.params +0 -63
- data/test_files/bioworks_small.phobius +0 -109
- data/test_files/bioworks_small.toppred.out +0 -2847
- data/test_files/bioworks_small.xml +0 -5610
- data/test_files/bioworks_with_INV_small.xml +0 -3753
- data/test_files/bioworks_with_SHUFF_small.xml +0 -2503
- data/test_files/corrupted_900.srf +0 -0
- data/test_files/head_of_7MIX.srf +0 -0
- data/test_files/interact-opd1_mods_small-prot.xml +0 -304
- data/test_files/messups.fasta +0 -297
- data/test_files/opd1/000.my_answer.100lines.xml +0 -101
- data/test_files/opd1/000.tpp_1.2.3.first10.xml +0 -115
- data/test_files/opd1/000.tpp_2.9.2.first10.xml +0 -126
- data/test_files/opd1/000.v2.1.mzXML.timeIndex +0 -3748
- data/test_files/opd1/000_020-prot.png +0 -0
- data/test_files/opd1/000_020_3prots-prot.mod_initprob.xml +0 -62
- data/test_files/opd1/000_020_3prots-prot.xml +0 -62
- data/test_files/opd1/opd1_cat_inv_small-prot.xml +0 -139
- data/test_files/opd1/sequest.3.1.params +0 -77
- data/test_files/opd1/sequest.3.2.params +0 -62
- data/test_files/opd1/twenty_scans.mzXML +0 -418
- data/test_files/opd1/twenty_scans.v2.1.mzXML +0 -382
- data/test_files/opd1/twenty_scans_answ.lmat +0 -0
- data/test_files/opd1/twenty_scans_answ.lmata +0 -9
- data/test_files/opd1_020_beginning.RAW +0 -0
- data/test_files/opd1_2runs_2mods/data/020.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/020.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/040.mzData.xml +0 -683
- data/test_files/opd1_2runs_2mods/data/040.readw.mzXML +0 -382
- data/test_files/opd1_2runs_2mods/data/README.txt +0 -6
- data/test_files/opd1_2runs_2mods/interact-opd1_mods__small.xml +0 -753
- data/test_files/orbitrap_mzData/000_cut.xml +0 -1920
- data/test_files/pepproph_small.xml +0 -4691
- data/test_files/phobius.small.noheader.txt +0 -50
- data/test_files/phobius.small.small.txt +0 -53
- data/test_files/s01_anC1_ld020mM.key.txt +0 -25
- data/test_files/s01_anC1_ld020mM.meth +0 -0
- data/test_files/small.fasta +0 -297
- data/test_files/small.sqt +0 -87
- data/test_files/smallraw.RAW +0 -0
- data/test_files/tf_bioworks2excel.bioXML +0 -14340
- data/test_files/tf_bioworks2excel.txt.actual +0 -1035
- data/test_files/toppred.small.out +0 -416
- data/test_files/toppred.xml.out +0 -318
- data/test_files/validator_hits_separate/bias_bioworks_small_HS.fasta +0 -7
- data/test_files/validator_hits_separate/bioworks_small_HS.xml +0 -5651
- data/test_files/yeast_gly_small-prot.xml +0 -265
- data/test_files/yeast_gly_small.1.0_1.0_1.0.parentTimes +0 -6
- data/test_files/yeast_gly_small.xml +0 -3807
- data/test_files/yeast_gly_small2.parentTimes +0 -6
data/lib/ms/spectrum.rb
CHANGED
|
@@ -1,384 +1,25 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
Unpack_network_float
|
|
27
|
-
elsif precision == 64
|
|
28
|
-
Unpack_network_double
|
|
29
|
-
end
|
|
30
|
-
else ## little endian
|
|
31
|
-
if precision == 32
|
|
32
|
-
Unpack_little_endian_float
|
|
33
|
-
elsif precision == 64
|
|
34
|
-
Unpack_little_endian_double
|
|
35
|
-
end
|
|
36
|
-
end
|
|
37
|
-
string.unpack(unpack_code)
|
|
38
|
-
end
|
|
39
|
-
|
|
40
|
-
# takes a base64 string and returns an array
|
|
41
|
-
def self.base64_to_array(b64_string, precision=32, network_order=true)
|
|
42
|
-
self.string_to_array(Base64.decode64(b64_string), precision, network_order)
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def self.mzs_and_intensities_from_base64_peaks(b64_string, precision=32, network_order=true)
|
|
47
|
-
data = base64_to_array(b64_string, precision, network_order)
|
|
48
|
-
sz = data.size/2
|
|
49
|
-
mz_ar = Array.new(sz)
|
|
50
|
-
intensity_ar = Array.new(sz)
|
|
51
|
-
ndata = []
|
|
52
|
-
my_ind = 0
|
|
53
|
-
data.each_with_index do |dat,ind|
|
|
54
|
-
if (ind % 2) == 0 # even
|
|
55
|
-
mz_ar[my_ind] = dat
|
|
56
|
-
else
|
|
57
|
-
intensity_ar[my_ind] = dat
|
|
58
|
-
my_ind += 1
|
|
59
|
-
end
|
|
60
|
-
end
|
|
61
|
-
[mz_ar, intensity_ar]
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# takes a base64 peaks string and sets spectrum
|
|
65
|
-
# returns self for chaining
|
|
66
|
-
def self.from_base64_peaks(b64_string, precision=32, network_order=true)
|
|
67
|
-
(mz_ar, intensity_ar) = self.mzs_and_intensities_from_base64_peaks(b64_string, precision, network_order)
|
|
68
|
-
self.new(mz_ar, intensity_ar)
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def self.from_base64_pair(mz_string, mz_precision, mz_network_order, intensity_string, intensity_precision, intensity_network_order)
|
|
72
|
-
mz_ar = base64_to_array(mz_string, mz_precision, mz_network_order)
|
|
73
|
-
inten_ar = base64_to_array(intensity_string, intensity_precision, intensity_network_order)
|
|
74
|
-
self.new(mz_ar, inten_ar)
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
def initialize(mz_ar=[], intensity_ar=[])
|
|
78
|
-
@mzs = mz_ar
|
|
79
|
-
@intensities = intensity_ar
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
def has_mz_data?
|
|
83
|
-
@mzs && (@mzs.size > 0) && (@mzs.first.is_a?(Numeric))
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def has_intensity_data?
|
|
87
|
-
@intensities && (@intensities.size > 0) && (@intensities.first.is_a?(Numeric))
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
# returns the index of the first value matching that m/z. the argument m/z
|
|
91
|
-
# may be less precise than the actual m/z (rounding to the same precision
|
|
92
|
-
# given) but must be at least integer precision (after rounding)
|
|
93
|
-
# implemented as binary search (bsearch from the web)
|
|
94
|
-
def index(mz)
|
|
95
|
-
mz_ar = mzs
|
|
96
|
-
return_val = nil
|
|
97
|
-
ind = mz_ar.bsearch_lower_boundary{|x| x <=> mz }
|
|
98
|
-
if mz_ar[ind] == mz
|
|
99
|
-
return_val = ind
|
|
100
|
-
else
|
|
101
|
-
# do a rounding game to see which one is it, or nil
|
|
102
|
-
# find all the values rounding to the same integer in the locale
|
|
103
|
-
# test each one fully in turn
|
|
104
|
-
mz = mz.to_f
|
|
105
|
-
mz_size = mz_ar.size
|
|
106
|
-
if ((ind < mz_size) and equal_after_rounding?(mz_ar[ind], mz))
|
|
107
|
-
return_val = ind
|
|
108
|
-
else # run the loop
|
|
109
|
-
up = ind
|
|
110
|
-
loop do
|
|
111
|
-
up += 1
|
|
112
|
-
if up >= mz_size
|
|
113
|
-
break
|
|
114
|
-
end
|
|
115
|
-
mz_up = mz_ar[up]
|
|
116
|
-
if (mz_up.ceil - mz.ceil >= 2)
|
|
117
|
-
break
|
|
118
|
-
else
|
|
119
|
-
if equal_after_rounding?(mz_up, mz)
|
|
120
|
-
return_val = up
|
|
121
|
-
return return_val
|
|
122
|
-
end
|
|
123
|
-
end
|
|
124
|
-
end
|
|
125
|
-
dn= ind
|
|
126
|
-
loop do
|
|
127
|
-
dn -= 1
|
|
128
|
-
if dn < 0
|
|
129
|
-
break
|
|
130
|
-
end
|
|
131
|
-
mz_dn = mz_ar[dn]
|
|
132
|
-
if (mz.floor - mz_dn.floor >= 2)
|
|
133
|
-
break
|
|
134
|
-
else
|
|
135
|
-
if equal_after_rounding?(mz_dn, mz)
|
|
136
|
-
return_val = dn
|
|
137
|
-
return return_val
|
|
138
|
-
end
|
|
139
|
-
end
|
|
140
|
-
end
|
|
141
|
-
end
|
|
142
|
-
end
|
|
143
|
-
return_val
|
|
144
|
-
end
|
|
145
|
-
|
|
146
|
-
# uses index function and returns the intensity at that value
|
|
147
|
-
def intensity_at_mz(mz)
|
|
148
|
-
if x = index(mz)
|
|
149
|
-
intensities[x]
|
|
150
|
-
else
|
|
151
|
-
nil
|
|
152
|
-
end
|
|
153
|
-
end
|
|
154
|
-
|
|
155
|
-
# less_precise should be a float
|
|
156
|
-
# precise should be a float
|
|
157
|
-
def equal_after_rounding?(precise, less_precise)
|
|
158
|
-
# determine the precision of less_precise
|
|
159
|
-
exp10 = precision_as_neg_int(less_precise)
|
|
160
|
-
#puts "EXP10: #{exp10}"
|
|
161
|
-
answ = ((precise*exp10).round == (less_precise*exp10).round)
|
|
162
|
-
#puts "TESTING FOR EQUAL: #{precise} #{less_precise}"
|
|
163
|
-
#puts answ
|
|
164
|
-
(precise*exp10).round == (less_precise*exp10).round
|
|
165
|
-
end
|
|
166
|
-
|
|
167
|
-
# returns 1 for ones place, 10 for tenths, 100 for hundredths
|
|
168
|
-
# to a precision exceeding 1e-6
|
|
169
|
-
def precision_as_neg_int(float)
|
|
170
|
-
neg_exp10 = 1
|
|
171
|
-
loop do
|
|
172
|
-
over = float * neg_exp10
|
|
173
|
-
rounded = over.round
|
|
174
|
-
if (over - rounded).abs <= 1e-6
|
|
175
|
-
break
|
|
176
|
-
end
|
|
177
|
-
neg_exp10 *= 10
|
|
178
|
-
end
|
|
179
|
-
neg_exp10
|
|
180
|
-
end
|
|
181
|
-
|
|
182
|
-
######
|
|
183
|
-
# NOT REALLY USING RIGHT NOW:
|
|
184
|
-
######
|
|
185
|
-
|
|
186
|
-
# takes a base64 peaks string and returns an array of [m/z,intensity] doublets
|
|
187
|
-
# mzXML as network ordered
|
|
188
|
-
def base64_peaks_to_pairs(string, precision=32)
|
|
189
|
-
data = base64_peaks_to_array(string, precision)
|
|
190
|
-
ndata = []
|
|
191
|
-
data.each_with_index do |dat,ind|
|
|
192
|
-
if (ind % 2) == 0 # even
|
|
193
|
-
arr = Array.new(2)
|
|
194
|
-
arr[0] = dat
|
|
195
|
-
ndata.push( arr )
|
|
196
|
-
else
|
|
197
|
-
ndata.last[1] = dat
|
|
198
|
-
end
|
|
199
|
-
end
|
|
200
|
-
ndata
|
|
201
|
-
end
|
|
202
|
-
|
|
203
|
-
end
|
|
204
|
-
|
|
205
|
-
# This implements a spectrum that stores itself as string data and only
|
|
206
|
-
# evaluates the information when it is called
|
|
207
|
-
class MS::Spectrum::LazyString < MS::Spectrum
|
|
208
|
-
|
|
209
|
-
undef mzs=
|
|
210
|
-
undef intensities=
|
|
211
|
-
|
|
212
|
-
# beware that this converts the information in @mz_string every time it is
|
|
213
|
-
# called
|
|
214
|
-
def mzs
|
|
215
|
-
MS::Spectrum.string_to_array(@mz_string, @mz_precision, @mz_network_order)
|
|
216
|
-
end
|
|
217
|
-
|
|
218
|
-
# beware that this converts the information in @intensity_string every time
|
|
219
|
-
# it is
|
|
220
|
-
def intensities
|
|
221
|
-
MS::Spectrum.string_to_array(@intensity_string, @intensity_precision, @intensity_network_order)
|
|
222
|
-
end
|
|
223
|
-
|
|
224
|
-
# this takes a decoded base64 string that is then interpreted when
|
|
225
|
-
# information is accessed
|
|
226
|
-
def initialize(mz_string, mz_precision, mz_network_order, intensity_string, intensity_precision, intensity_network_order)
|
|
227
|
-
@mz_string = mz_string
|
|
228
|
-
@mz_precision = mz_precision
|
|
229
|
-
@mz_network_order = mz_network_order
|
|
230
|
-
@intensity_string = intensity_string
|
|
231
|
-
@intensity_precision = intensity_precision
|
|
232
|
-
@intensity_network_order = intensity_network_order
|
|
233
|
-
end
|
|
234
|
-
|
|
235
|
-
# from mzXML files where information is held in peaks (m/z, intensity,
|
|
236
|
-
# m/z...)
|
|
237
|
-
def self.from_base64_peaks(b64_string, precision=32, network_order=true)
|
|
238
|
-
# decode
|
|
239
|
-
string = Base64.decode64(b64_string)
|
|
240
|
-
# split into two strings:
|
|
241
|
-
bytes_per_number = precision / 8
|
|
242
|
-
s_size = string.size
|
|
243
|
-
num_numbers = s_size / bytes_per_number
|
|
244
|
-
mz_pieces = Array.new(num_numbers)
|
|
245
|
-
intensity_pieces = Array.new(num_numbers)
|
|
246
|
-
index = 0
|
|
247
|
-
(0...string.size).step(bytes_per_number) do |i|
|
|
248
|
-
if index % 2 == 0
|
|
249
|
-
mz_pieces[index] = string[i,bytes_per_number]
|
|
250
|
-
else
|
|
251
|
-
intensity_pieces[index] = string[i,bytes_per_number]
|
|
252
|
-
end
|
|
253
|
-
index += 1
|
|
254
|
-
end
|
|
255
|
-
self.new(mz_pieces.join, precision, network_order, intensity_pieces.join, precision, network_order)
|
|
256
|
-
end
|
|
257
|
-
|
|
258
|
-
# from mzML and mzData style files where mz and intensity information are
|
|
259
|
-
# kept in different strings.
|
|
260
|
-
def self.from_base64_pair(b64_mz_string, mz_precision, mz_network_order, b64_intensity_string, intensity_precision, intensity_network_order)
|
|
261
|
-
self.new(Base64.decode64(b64_mz_string), mz_precision, mz_network_order, Base64.decode64(b64_intensity_string), intensity_precision, intensity_network_order)
|
|
262
|
-
end
|
|
263
|
-
|
|
264
|
-
def has_mz_data?
|
|
265
|
-
@mz_string.is_a?(String) && @mz_precision && !@mz_network_order.nil?
|
|
266
|
-
end
|
|
267
|
-
|
|
268
|
-
def has_intensity_data?
|
|
269
|
-
@intensity_string.is_a?(String) && @intensity_precision && !@intensity_network_order.nil?
|
|
270
|
-
end
|
|
271
|
-
|
|
272
|
-
end
|
|
273
|
-
|
|
274
|
-
module MS::Spectrum::LazyIO
|
|
275
|
-
def self.new(*args)
|
|
276
|
-
if args.size == 5 # mzXMl
|
|
277
|
-
MS::Spectrum::LazyIO::Peaks.new(*args)
|
|
278
|
-
elsif args.size == 9 # other
|
|
279
|
-
MS::Spectrum::LazyIO::Pair.new(*args)
|
|
280
|
-
else
|
|
281
|
-
raise RunTimeError, "must give 5 or 7 args for peak data and pair data respectively"
|
|
282
|
-
end
|
|
283
|
-
end
|
|
284
|
-
end
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
# stores an io object and the start and end indices and only evaluates the
|
|
288
|
-
# spectrum when information is requested
|
|
289
|
-
class MS::Spectrum::LazyIO::Pair < MS::Spectrum
|
|
290
|
-
include MS::Spectrum::LazyIO
|
|
291
|
-
|
|
292
|
-
undef mzs=
|
|
293
|
-
undef intensities=
|
|
294
|
-
|
|
295
|
-
def initialize(io, mz_start_index, mz_num_bytes, mz_precision, mz_network_order, intensity_start_index, intensity_num_bytes, intensity_precision, intensity_network_order)
|
|
296
|
-
@io = io
|
|
297
|
-
|
|
298
|
-
@mz_start_index = mz_start_index
|
|
299
|
-
@mz_num_bytes = mz_num_bytes
|
|
300
|
-
@mz_precision = mz_precision
|
|
301
|
-
@mz_network_order = mz_network_order
|
|
302
|
-
|
|
303
|
-
@intensity_start_index = intensity_start_index
|
|
304
|
-
@intensity_num_bytes = intensity_num_bytes
|
|
305
|
-
@intensity_precision = intensity_precision
|
|
306
|
-
@intensity_network_order = intensity_network_order
|
|
307
|
-
|
|
308
|
-
end
|
|
309
|
-
|
|
310
|
-
# beware that this converts the information on disk every time it is called.
|
|
311
|
-
def mzs
|
|
312
|
-
@io.pos = @mz_start_index
|
|
313
|
-
b64_string = @io.read(@mz_num_bytes)
|
|
314
|
-
MS::Spectrum.base64_to_array(b64_string, @mz_precision, @mz_network_order)
|
|
315
|
-
end
|
|
316
|
-
|
|
317
|
-
# beware that this converts the information in @intensity_string every time
|
|
318
|
-
# it is called.
|
|
319
|
-
def intensities
|
|
320
|
-
@io.pos = @intensity_start_index
|
|
321
|
-
b64_string = @io.read(@intensity_num_bytes)
|
|
322
|
-
MS::Spectrum.base64_to_array(b64_string, @intensity_precision, @intensity_network_order)
|
|
323
|
-
end
|
|
324
|
-
|
|
325
|
-
def has_mz_data?
|
|
326
|
-
(!@io.closed?) && @mz_start_index && @mz_num_bytes && @mz_precision && !@mz_network_order.nil?
|
|
327
|
-
end
|
|
328
|
-
|
|
329
|
-
def has_intensity_data?
|
|
330
|
-
(!@io.closed?) && @intensity_start_index && @intensity_num_bytes && @intensity_precision && !@intensity_network_order.nil?
|
|
331
|
-
end
|
|
332
|
-
|
|
333
|
-
end
|
|
334
|
-
|
|
335
|
-
class MS::Spectrum::LazyIO::Peaks < MS::Spectrum
|
|
336
|
-
include MS::Spectrum::LazyIO
|
|
337
|
-
|
|
338
|
-
undef mzs=
|
|
339
|
-
undef intensities=
|
|
340
|
-
|
|
341
|
-
def initialize(io, start_index, num_bytes, precision, network_order)
|
|
342
|
-
@io = io
|
|
343
|
-
@start_index = start_index
|
|
344
|
-
@num_bytes = num_bytes
|
|
345
|
-
@precision = precision
|
|
346
|
-
@network_order = network_order
|
|
347
|
-
end
|
|
348
|
-
|
|
349
|
-
# returns two arrays: an array of m/z values and an array of intensity
|
|
350
|
-
# values. This is the preferred way to access mzXML file information under
|
|
351
|
-
# lazy evaluation
|
|
352
|
-
def mzs_and_intensities
|
|
353
|
-
@io.pos = @start_index
|
|
354
|
-
b64_string = @io.read(@num_bytes)
|
|
355
|
-
MS::Spectrum.mzs_and_intensities_from_base64_peaks(b64_string, @precision, @network_order)
|
|
356
|
-
end
|
|
357
|
-
|
|
358
|
-
# when using 'io' lazy evaluation on files with m/z and intensity data
|
|
359
|
-
# interwoven (i.e., mzXML) it is more efficient to call 'mzs_and_intensities'
|
|
360
|
-
# if you are using both mz and intensity data.
|
|
361
|
-
def mzs
|
|
362
|
-
# TODO: this can be made slightly faster
|
|
363
|
-
mzs_and_intensities.first
|
|
364
|
-
end
|
|
365
|
-
|
|
366
|
-
# when using 'io' lazy evaluation on files with m/z and intensity data
|
|
367
|
-
# interwoven (i.e., mzXML) it is more efficient to call
|
|
368
|
-
# 'mzs_and_intensities'
|
|
369
|
-
# if you are using both mz and intensity data.
|
|
370
|
-
def intensities
|
|
371
|
-
# TODO: this can be made slightly faster
|
|
372
|
-
mzs_and_intensities.last
|
|
373
|
-
end
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
def has_mz_data?
|
|
377
|
-
(!@io.closed?) && @start_index && @num_bytes && @precision && !@network_order.nil?
|
|
378
|
-
end
|
|
379
|
-
|
|
380
|
-
def has_intensity_data?
|
|
381
|
-
(!@io.closed?) && @start_index && @num_bytes && @precision && !@network_order.nil?
|
|
382
|
-
end
|
|
383
|
-
|
|
384
|
-
end
|
|
1
|
+
module Ms
|
|
2
|
+
class Spectrum
|
|
3
|
+
# The underlying data store.
|
|
4
|
+
attr_reader :data
|
|
5
|
+
|
|
6
|
+
# Associated headers
|
|
7
|
+
attr_reader :headers
|
|
8
|
+
|
|
9
|
+
def initialize(data, headers={})
|
|
10
|
+
@data = data
|
|
11
|
+
@headers = headers
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# An array of the mz data.
|
|
15
|
+
def mzs
|
|
16
|
+
@data[0]
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# An array of the intensities data, corresponding to mzs.
|
|
20
|
+
def intensities
|
|
21
|
+
@data[1]
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
module Ms
|
|
2
|
+
module Support
|
|
3
|
+
|
|
4
|
+
# A binary search library adapted from: http://0xcc.net/ruby-bsearch/
|
|
5
|
+
# ---
|
|
6
|
+
#
|
|
7
|
+
# Ruby/Bsearch - a binary search library for Ruby.
|
|
8
|
+
#
|
|
9
|
+
# Copyright (C) 2001 Satoru Takabayashi <satoru@namazu.org>
|
|
10
|
+
# All rights reserved.
|
|
11
|
+
# This is free software with ABSOLUTELY NO WARRANTY.
|
|
12
|
+
#
|
|
13
|
+
# You can redistribute it and/or modify it under the terms of
|
|
14
|
+
# the Ruby's licence.
|
|
15
|
+
#
|
|
16
|
+
# Example:
|
|
17
|
+
#
|
|
18
|
+
# % irb -r ./bsearch.rb
|
|
19
|
+
# >> %w(a b c c c d e f).bsearch_first {|x| x <=> "c"}
|
|
20
|
+
# => 2
|
|
21
|
+
# >> %w(a b c c c d e f).bsearch_last {|x| x <=> "c"}
|
|
22
|
+
# => 4
|
|
23
|
+
# >> %w(a b c e f).bsearch_first {|x| x <=> "c"}
|
|
24
|
+
# => 2
|
|
25
|
+
# >> %w(a b e f).bsearch_first {|x| x <=> "c"}
|
|
26
|
+
# => nil
|
|
27
|
+
# >> %w(a b e f).bsearch_last {|x| x <=> "c"}
|
|
28
|
+
# => nil
|
|
29
|
+
# >> %w(a b e f).bsearch_lower_boundary {|x| x <=> "c"}
|
|
30
|
+
# => 2
|
|
31
|
+
# >> %w(a b e f).bsearch_upper_boundary {|x| x <=> "c"}
|
|
32
|
+
# => 2
|
|
33
|
+
# >> %w(a b c c c d e f).bsearch_range {|x| x <=> "c"}
|
|
34
|
+
# => 2...5
|
|
35
|
+
# >> %w(a b c d e f).bsearch_range {|x| x <=> "c"}
|
|
36
|
+
# => 2...3
|
|
37
|
+
# >> %w(a b d e f).bsearch_range {|x| x <=> "c"}
|
|
38
|
+
# => 2...2
|
|
39
|
+
#
|
|
40
|
+
# The binary search algorithm is extracted from Jon Bentley's
|
|
41
|
+
# Programming Pearls 2nd ed. p.93
|
|
42
|
+
#
|
|
43
|
+
module BinarySearch
|
|
44
|
+
VERSION = '1.5'
|
|
45
|
+
|
|
46
|
+
module_function
|
|
47
|
+
|
|
48
|
+
#
|
|
49
|
+
# Return the lower boundary. (inside)
|
|
50
|
+
#
|
|
51
|
+
def search_lower_boundary(array, range=nil, &block)
|
|
52
|
+
range = 0 ... array.length if range == nil
|
|
53
|
+
|
|
54
|
+
lower = range.first() -1
|
|
55
|
+
upper = if range.exclude_end? then range.last else range.last + 1 end
|
|
56
|
+
while lower + 1 != upper
|
|
57
|
+
mid = ((lower + upper) / 2).to_i # for working with mathn.rb (Rational)
|
|
58
|
+
if yield(array[mid]) < 0
|
|
59
|
+
lower = mid
|
|
60
|
+
else
|
|
61
|
+
upper = mid
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
return upper
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
#
|
|
68
|
+
# This method searches the FIRST occurrence which satisfies a
|
|
69
|
+
# condition given by a block in binary fashion and return the
|
|
70
|
+
# index of the first occurrence. Return nil if not found.
|
|
71
|
+
#
|
|
72
|
+
def search_first(array, range=nil, &block)
|
|
73
|
+
boundary = search_lower_boundary(array, range, &block)
|
|
74
|
+
if boundary >= array.length || yield(array[boundary]) != 0
|
|
75
|
+
return nil
|
|
76
|
+
else
|
|
77
|
+
return boundary
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
#
|
|
82
|
+
# Return the upper boundary. (outside)
|
|
83
|
+
#
|
|
84
|
+
def search_upper_boundary(array, range=nil, &block)
|
|
85
|
+
range = 0 ... array.length if range == nil
|
|
86
|
+
|
|
87
|
+
lower = range.first() -1
|
|
88
|
+
upper = if range.exclude_end? then range.last else range.last + 1 end
|
|
89
|
+
while lower + 1 != upper
|
|
90
|
+
mid = ((lower + upper) / 2).to_i # for working with mathn.rb (Rational)
|
|
91
|
+
if yield(array[mid]) <= 0
|
|
92
|
+
lower = mid
|
|
93
|
+
else
|
|
94
|
+
upper = mid
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
return lower + 1 # outside of the matching range.
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
#
|
|
101
|
+
# This method searches the LAST occurrence which satisfies a
|
|
102
|
+
# condition given by a block in binary fashion and return the
|
|
103
|
+
# index of the last occurrence. Return nil if not found.
|
|
104
|
+
#
|
|
105
|
+
def search_last(array, range=nil, &block)
|
|
106
|
+
# `- 1' for canceling `lower + 1' in bsearch_upper_boundary.
|
|
107
|
+
boundary = search_upper_boundary(array, range, &block) - 1
|
|
108
|
+
|
|
109
|
+
if (boundary <= -1 || yield(array[boundary]) != 0)
|
|
110
|
+
return nil
|
|
111
|
+
else
|
|
112
|
+
return boundary
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
#
|
|
117
|
+
# Return the search result as a Range object.
|
|
118
|
+
#
|
|
119
|
+
def search_range(array, range=nil, &block)
|
|
120
|
+
lower = search_lower_boundary(array, range, &block)
|
|
121
|
+
upper = search_upper_boundary(array, range, &block)
|
|
122
|
+
return lower ... upper
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
data/lib/ms.rb
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
#
|
|
7
|
-
|
|
8
|
-
end
|
|
9
|
-
|
|
10
|
-
end
|
|
1
|
+
module Ms
|
|
2
|
+
module_function
|
|
3
|
+
|
|
4
|
+
# def parse(format, path)
|
|
5
|
+
# const = Tap::Env.instance.search(:formats, format)
|
|
6
|
+
# raise ArgumentError, "unknown format: #{format}" unless const
|
|
7
|
+
# const.constantize.parse(path)
|
|
8
|
+
# end
|
|
9
|
+
|
|
10
|
+
end
|