mspire 0.8.5 → 0.8.6
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/lib/hash/inverse.rb +15 -0
- data/lib/mspire/error_rate/qvalue.rb +5 -5
- data/lib/mspire/fasta.rb +2 -0
- data/lib/mspire/ident/peptide/db/creator.rb +48 -58
- data/lib/mspire/ident/peptide/db/io.rb +5 -0
- data/lib/mspire/ident/peptide_hit/qvalue.rb +2 -2
- data/lib/mspire/ident/peptide_hit.rb +2 -2
- data/lib/mspire/ident/protein_group.rb +4 -2
- data/lib/mspire/isotope/aa.rb +10 -10
- data/lib/mspire/mzml/instrument_configuration.rb +10 -3
- data/lib/mspire/quant/cmdline.rb +42 -0
- data/lib/mspire/quant/protein_group_comparison.rb +29 -0
- data/lib/mspire/quant/spectral_counts.rb +42 -0
- data/script/fasta_to_peptide_centric_db.rb +5 -0
- data/script/mascot_dat_to_peptide_hit_qvalues.rb +37 -45
- data/script/mass_correct.rb +118 -0
- data/script/minimal_protein_set.rb +345 -0
- data/script/mzml_to_mgf.rb +46 -0
- data/script/peptide_hit_qvalues_to_spectral_counts_table.rb +275 -0
- data/spec/mspire/ident/peptide/db/creator_spec.rb +11 -0
- data/spec/testfiles/mspire/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +157 -157
- metadata +11 -2
@@ -0,0 +1,118 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rserve/simpler/R'
|
4
|
+
require 'runarray/narray'
|
5
|
+
|
6
|
+
MzDiffs = Struct.new(:mz, :intensity, :spectrum_id, :dev) do
|
7
|
+
def abs_dev
|
8
|
+
self.dev.abs
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
# returns an array of spectrum_id => shift
|
13
|
+
def find_spectral_shifts(mz_theor, mz_diffs, dev_cutoff = 0.5)
|
14
|
+
spec_id_to_shift = {}
|
15
|
+
|
16
|
+
(close_diffs, far_diffs) = mz_diffs.partition {|diff| diff.abs_dev < dev_cutoff }
|
17
|
+
|
18
|
+
close_mz_vals = close_diffs.map(&:mz)
|
19
|
+
|
20
|
+
runarray = Runarray::NArray.new(close_mz_vals)
|
21
|
+
outlier_indices = runarray.outliers_iteratively(3)
|
22
|
+
|
23
|
+
# need the global shift
|
24
|
+
tight_mz_vals = close_mz_vals.reject.with_index do |mz, i|
|
25
|
+
outlier_indices.include?(i)
|
26
|
+
end
|
27
|
+
|
28
|
+
(mean, sd) = Runarray::NArray.new(tight_mz_vals).sample_stats
|
29
|
+
|
30
|
+
global_shift = mean - mz_theor
|
31
|
+
|
32
|
+
close_diffs.zip(close_mz_vals).each.with_index do |(mz_diff, mz_val),i|
|
33
|
+
spec_id_to_shift[mz_diff.spectrum_id] =
|
34
|
+
if outlier_indices.include?(i)
|
35
|
+
global_shift
|
36
|
+
else
|
37
|
+
global_shift + (mz_val - mean)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
far_diffs.each {|mz_diff| spec_id_to_shift[mz_diff.spectrum_id] = global_shift }
|
42
|
+
|
43
|
+
#pvalue = R.converse( mz_diffs: close_mz_vals ) do
|
44
|
+
# "shapiro.test(mz_diffs)$p.value"
|
45
|
+
#end
|
46
|
+
spec_id_to_shift
|
47
|
+
end
|
48
|
+
|
49
|
+
require 'optparse'
|
50
|
+
require 'mspire/mzml'
|
51
|
+
ext = ".massCorrected.mzML"
|
52
|
+
opt = {}
|
53
|
+
opts = OptionParser.new do |op|
|
54
|
+
op.banner = "usage: #{File.basename($0)} [OPTS] <m/z> <file>.mzML ..."
|
55
|
+
op.separator "output: <file>#{ext}"
|
56
|
+
op.separator "finds the nearest m/z to <m/z> and shifts m/z values"
|
57
|
+
op.separator "prints the corrected deviation to stdout"
|
58
|
+
op.separator ""
|
59
|
+
op.separator "options:"
|
60
|
+
op.on("-t", "--threshold <Float>", Float, 'intensity must be above threshold') {|v| opt[:threshold] = v }
|
61
|
+
op.on("-f", "--filter-string-regex <regex-no-slashes>", 'only match and calibrate if matches filter string') {|v| opt[:filter_string_regex] = Regexp.new(Regexp.escape(v)) }
|
62
|
+
end
|
63
|
+
opts.parse!
|
64
|
+
|
65
|
+
if ARGV.size == 0
|
66
|
+
puts opts
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
|
70
|
+
threshold = opt[:threshold] || 0.0
|
71
|
+
filter_string_regex = opt[:filter_string_regex]
|
72
|
+
|
73
|
+
mz_theor = ARGV.shift.to_f
|
74
|
+
|
75
|
+
ARGV.each do |file|
|
76
|
+
base = file.chomp(File.extname(file))
|
77
|
+
outfile = base + ext
|
78
|
+
|
79
|
+
mz_diffs = []
|
80
|
+
Mspire::Mzml.open(file) do |mzml|
|
81
|
+
#Finding the deviation
|
82
|
+
mzml.each do |spectrum|
|
83
|
+
if spectrum.ms_level == 1
|
84
|
+
if filter_string_regex
|
85
|
+
next unless filter_string_regex.match(spectrum.scan_list.first.fetch_by_acc('MS:1000512'))
|
86
|
+
end
|
87
|
+
indices = spectrum.find_all_nearest_index(mz_theor)
|
88
|
+
best_index = indices.max {|i| spectrum.intensities[i] }
|
89
|
+
closest_mz = spectrum.mzs[best_index]
|
90
|
+
mz_diffs << MzDiffs.new(closest_mz, spectrum.intensities[best_index], spectrum.id, closest_mz - mz_theor)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
spectral_shifts = find_spectral_shifts(mz_theor, mz_diffs)
|
95
|
+
|
96
|
+
#correcting the masses
|
97
|
+
spectra = mzml.map do |spectrum|
|
98
|
+
if spectrum.ms_level == 1
|
99
|
+
spectrum.mzs.map! do|mz|
|
100
|
+
if (shift=spectral_shifts[spectrum.id])
|
101
|
+
mz + shift
|
102
|
+
else
|
103
|
+
mz
|
104
|
+
end
|
105
|
+
end
|
106
|
+
spectrum
|
107
|
+
else
|
108
|
+
spectrum
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
data_processing = Mspire::Mzml::DataProcessing.new("Corrected_Mass")
|
113
|
+
mzml.data_processing_list << data_processing
|
114
|
+
mzml.run.spectrum_list = Mspire::Mzml::SpectrumList.new(data_processing, spectra)
|
115
|
+
mzml.write(outfile)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
@@ -0,0 +1,345 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'set'
|
5
|
+
require 'optparse'
|
6
|
+
require 'mspire/fasta'
|
7
|
+
require 'mspire/ident/peptide/db/io'
|
8
|
+
|
9
|
+
SET_RE = /Set\s+(.*)/i
|
10
|
+
QVALUE_EXT = ".phq.tsv"
|
11
|
+
|
12
|
+
# returns [sets_to_paths_hash, sets_order]
|
13
|
+
def sets_compare_to_paths(file, ext=QVALUE_EXT)
|
14
|
+
dirname = File.dirname(File.expand_path(file))
|
15
|
+
lines = IO.readlines(file).map {|v| v.chomp }.select {|v| v =~ /\w/}
|
16
|
+
sets = {}
|
17
|
+
current_set = nil
|
18
|
+
sets_order = []
|
19
|
+
lines.each do |line|
|
20
|
+
if line =~ SET_RE
|
21
|
+
current_set = $1.dup
|
22
|
+
sets[current_set] = []
|
23
|
+
sets_order << current_set
|
24
|
+
else
|
25
|
+
full_path = (File.join(dirname,(line + ext)))
|
26
|
+
raise RuntimeError, "file #{full_path} does not exist!!" unless File.exist?(full_path)
|
27
|
+
sets[current_set] << full_path
|
28
|
+
end
|
29
|
+
end
|
30
|
+
[sets, sets_order]
|
31
|
+
end
|
32
|
+
|
33
|
+
# returns [minimal_protein_to_uniq_peps_hash, indistinguishable_protein_hash]
|
34
|
+
# takes a hash of proteins to aaseqs. Uses a greedy algorithm where
|
35
|
+
# things are sorted first by the number of uniq amino acid sequences and total
|
36
|
+
# aa length. if a block is given, then will yield the prot and the
|
37
|
+
# peptide_array and sort by the returned value. The greedy algorithm acts on
|
38
|
+
# the REVERSE of the sorted proteins. indistinguishable_protein_hash is keyed
|
39
|
+
# on the proteins in the minimal_protein_array and gives an array of other
|
40
|
+
# proteins.
|
41
|
+
def minimal_protein_set(proteins_to_aaseqs)
|
42
|
+
blk_given = block_given?
|
43
|
+
#STDERR.puts "using block for minimal_protein_set" if blk_given
|
44
|
+
proteins_and_uniq_peps = []
|
45
|
+
|
46
|
+
sorted_most_to_least = proteins_to_aaseqs.sort_by do |k,v|
|
47
|
+
if blk_given
|
48
|
+
yield(k,v)
|
49
|
+
else
|
50
|
+
[ v.size, v.inject(0){|m,s| m+s.size} ]
|
51
|
+
end
|
52
|
+
end.reverse
|
53
|
+
|
54
|
+
found_seq = Set.new
|
55
|
+
|
56
|
+
same_peptide_hits = {}
|
57
|
+
|
58
|
+
last_peps = nil
|
59
|
+
last_uniq_prot = nil
|
60
|
+
sorted_most_to_least.each do |prot, peps|
|
61
|
+
sorted_peps = peps.sort # is it necessary to SORT?????????
|
62
|
+
uniq_peps = peps.select do |pep|
|
63
|
+
if found_seq.include?(pep)
|
64
|
+
false
|
65
|
+
else
|
66
|
+
found_seq.add pep
|
67
|
+
true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
if uniq_peps.size > 0
|
71
|
+
proteins_and_uniq_peps << [prot, uniq_peps]
|
72
|
+
same_peptide_hits[prot] = []
|
73
|
+
last_peps = sorted_peps
|
74
|
+
last_uniq_prot = prot
|
75
|
+
else
|
76
|
+
if sorted_peps == last_peps
|
77
|
+
same_peptide_hits[last_uniq_prot] << prot
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
prot_to_uniq_peps_hash = {}
|
82
|
+
proteins_and_uniq_peps.each do |prot, uniq_peps|
|
83
|
+
prot_to_uniq_peps_hash[prot] = uniq_peps
|
84
|
+
end
|
85
|
+
|
86
|
+
[prot_to_uniq_peps_hash, same_peptide_hits]
|
87
|
+
end
|
88
|
+
|
89
|
+
def cutoffs_to_floats(ar)
|
90
|
+
ar.map do |v|
|
91
|
+
if v == 'nil' || v == '-'
|
92
|
+
nil
|
93
|
+
else
|
94
|
+
answ = v.to_f
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# returns a hash keyed on protein id that yields an array:
|
100
|
+
# [#aaseq, #aaseq_and_charge, #total_hits]
|
101
|
+
def stats_per_prot(prot_to_peps, seq_to_hits)
|
102
|
+
per_protein_hash = {}
|
103
|
+
prot_to_peps.each do |prot, uniq_pep_seqs|
|
104
|
+
all = Set.new
|
105
|
+
aaseqcharges = Set.new
|
106
|
+
aaseqs = Set.new
|
107
|
+
|
108
|
+
uniq_pep_seqs.each do |pep_seq|
|
109
|
+
all_hits = seq_to_hits[pep_seq]
|
110
|
+
all.merge( all_hits )
|
111
|
+
all_hits.each do |hit|
|
112
|
+
aaseq = hit.sequence
|
113
|
+
aaseqs.add( aaseq )
|
114
|
+
aaseqcharges.add( aaseq + '_' + hit.charge.to_s )
|
115
|
+
end
|
116
|
+
per_protein_hash[prot] = [aaseqs.size, aaseqcharges.size, all.size]
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
120
|
+
per_protein_hash
|
121
|
+
end
|
122
|
+
|
123
|
+
opt = {
|
124
|
+
:cutoffs => [nil],
|
125
|
+
:outfile => "summary.yml",
|
126
|
+
}
|
127
|
+
|
128
|
+
opts = OptionParser.new do |op|
|
129
|
+
op.banner = "usage: #{File.basename(__FILE__)} pepcentric_db.yml sets_compare.txt"
|
130
|
+
op.separator "output: #{opt[:outfile]}"
|
131
|
+
op.separator ""
|
132
|
+
op.separator "input: "
|
133
|
+
op.separator " each <file> referenced in sets_compare.txt should have a"
|
134
|
+
op.separator " <file>.phq.tsv file"
|
135
|
+
op.separator ""
|
136
|
+
op.separator "options:"
|
137
|
+
op.on("-q", "--qvalue <0-1[,...]>", Array, "only take qvalues < given ['-' for no threshold]") {|v| opt[:cutoffs] = cutoffs_to_floats(v)}
|
138
|
+
op.separator ""
|
139
|
+
op.separator "formats:"
|
140
|
+
op.on("--output-format", "prints the output yaml scheme and exits") {|v| opt[:output_format] = v }
|
141
|
+
op.on("--input-format", "prints sets_compare.txt format and exits") {|v| opt[:input_format] = v }
|
142
|
+
op.on("--pepcentric-db-format", "prints peptide centric db format and exits") {|v| opt[:pepcentric_db_format] = v }
|
143
|
+
end
|
144
|
+
|
145
|
+
# later on we could implement full isoform resolution like IsoformResolver
|
146
|
+
# for now we will generate a report, realizing that some isoforms may not be
|
147
|
+
# reported
|
148
|
+
# it is implemented by using a pre-made map from sequence to protein groups
|
149
|
+
# then, a set of sequences allows one to deduce all the relationships from the
|
150
|
+
# protein groups.
|
151
|
+
|
152
|
+
opts.parse!
|
153
|
+
|
154
|
+
pd = Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER
|
155
|
+
kvd = Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER
|
156
|
+
|
157
|
+
if opt[:pepcentric_db_format]
|
158
|
+
puts "pepcentric_db.yml needs to be in the format:"
|
159
|
+
puts "<PEPTIDE>#{kvd.inspect}<ID>#{pd.inspect}<ID>#{pd.inspect}<ID>"
|
160
|
+
puts "(The delimiters are shown with #inspect)"
|
161
|
+
end
|
162
|
+
|
163
|
+
if opt[:output_format]
|
164
|
+
yaml = <<SKEL
|
165
|
+
results:
|
166
|
+
- qvalue_cutoff: <Float>
|
167
|
+
sets:
|
168
|
+
<set_name>:
|
169
|
+
num_uniq_aaseqs: <Integer>
|
170
|
+
num_aaseqs_not_in_pep_db: <Integer>
|
171
|
+
num_uniq_aaseqs_charge: <Integer>
|
172
|
+
proteins:
|
173
|
+
<protein_id>:
|
174
|
+
num_hits_all:
|
175
|
+
- <Integer> # total num aaseqs
|
176
|
+
- <Integer> # total num aaseq+charge "prints sets_compare.txt format and exits") {|v| opt[:input_format] = v }
|
177
|
+
op.on("--pepcentric-db-
|
178
|
+
- <Integer> # total num hits
|
179
|
+
num_hits_minimal:
|
180
|
+
- <Integer> # total num aaseqs
|
181
|
+
- <Integer> # total num aaseq+charge
|
182
|
+
- <Integer> # total num hits
|
183
|
+
indistinguishable:
|
184
|
+
- <protein_id>
|
185
|
+
- <protein_id>
|
186
|
+
aaseqs:
|
187
|
+
- <String>
|
188
|
+
- <String>
|
189
|
+
sets_order:
|
190
|
+
- <String>
|
191
|
+
- <String>
|
192
|
+
SKEL
|
193
|
+
print yaml
|
194
|
+
end
|
195
|
+
|
196
|
+
if opt[:input_format]
|
197
|
+
string =<<EXPLANATION
|
198
|
+
# the sets_compare.txt format is very simple:
|
199
|
+
|
200
|
+
Set <some_name_for_set1>
|
201
|
+
filename1_no_ext
|
202
|
+
filename2_no_ext
|
203
|
+
Set <some_name_for_set2>
|
204
|
+
filename3_no_ext
|
205
|
+
filename4_no_ext
|
206
|
+
...
|
207
|
+
EXPLANATION
|
208
|
+
puts string
|
209
|
+
end
|
210
|
+
|
211
|
+
exit if opt.keys.any? {|key| key.to_s =~ /_format/ }
|
212
|
+
|
213
|
+
if ARGV.size != 2
|
214
|
+
p opts
|
215
|
+
puts opts.to_s
|
216
|
+
exit
|
217
|
+
end
|
218
|
+
|
219
|
+
(pepcentric_fn, sets_compare_fn) = ARGV
|
220
|
+
|
221
|
+
results = {}
|
222
|
+
|
223
|
+
results['results'] = []
|
224
|
+
|
225
|
+
(sets_hash, sets_order) = sets_compare_to_paths(sets_compare_fn)
|
226
|
+
results['sets_order'] = sets_order
|
227
|
+
|
228
|
+
STDERR.print "Loading peptide centric DB (this takes about a minute)..."
|
229
|
+
start = Time.now
|
230
|
+
Mspire::Ident::Peptide::Db::IO.open(pepcentric_fn) do |pep_to_prots|
|
231
|
+
STDERR.puts "#{Time.now - start} seconds."
|
232
|
+
|
233
|
+
opt[:cutoffs].each do |cutoff|
|
234
|
+
|
235
|
+
cutoff_results = {'qvalue_cutoff' => cutoff}
|
236
|
+
results_sets_hash = {}
|
237
|
+
cutoff_results['sets'] = results_sets_hash
|
238
|
+
results['results'] << cutoff_results
|
239
|
+
|
240
|
+
#########################
|
241
|
+
# FOR EACH SET:
|
242
|
+
#########################
|
243
|
+
pep_klass = nil
|
244
|
+
sets_hash.each do |set, files|
|
245
|
+
set_results = {}
|
246
|
+
results_sets_hash[set] = set_results
|
247
|
+
|
248
|
+
# assumes the indices are the same into each data file
|
249
|
+
|
250
|
+
# get the complete set of passing hits
|
251
|
+
all_passing_hits = files.inject([]) do |all_passing_hits, file|
|
252
|
+
hash = YAML.load_file(file)
|
253
|
+
|
254
|
+
header_hash = hash['headers']
|
255
|
+
pep_klass ||= Struct.new(*(header_hash.map {|v| v.to_sym }))
|
256
|
+
hits = hash['data'].map {|v| pep_klass.new(*v) }
|
257
|
+
|
258
|
+
passing_hits =
|
259
|
+
if cutoff
|
260
|
+
# assumes monotonic qvalues values!
|
261
|
+
(above, below) = hits.partition {|hit| hit.qvalue <= cutoff }
|
262
|
+
above
|
263
|
+
else
|
264
|
+
hits
|
265
|
+
end
|
266
|
+
all_passing_hits.push(*passing_hits)
|
267
|
+
end
|
268
|
+
|
269
|
+
|
270
|
+
# create an index from aaseq to hits
|
271
|
+
seq_to_hits = Hash.new {|h,k| h[k] = []}
|
272
|
+
uniq_seqcharge = Set.new
|
273
|
+
all_passing_hits.each do |hit|
|
274
|
+
seq_to_hits[hit.sequence] << hit
|
275
|
+
uniq_seqcharge.add( hit.sequence + '_' + hit.charge.to_s )
|
276
|
+
end
|
277
|
+
|
278
|
+
|
279
|
+
# determine the number of uniq aaseqs
|
280
|
+
uniq_seqs = seq_to_hits.size
|
281
|
+
|
282
|
+
num_uniq_seqcharges = uniq_seqcharge.size
|
283
|
+
|
284
|
+
set_results.merge!( { 'num_peptide_hits' => all_passing_hits.size,
|
285
|
+
'num_uniq_aaseqs' => uniq_seqs,
|
286
|
+
'num_uniq_aaseqs_charge' => num_uniq_seqcharges,
|
287
|
+
})
|
288
|
+
|
289
|
+
# create an index from proteins to peptides
|
290
|
+
prots_to_peps = Hash.new {|h,k| h[k] = [] }
|
291
|
+
peptides_not_found = []
|
292
|
+
seq_to_hits.keys.each do |seq|
|
293
|
+
if pep_db.key?(seq)
|
294
|
+
pep_db[seq].each do |prot|
|
295
|
+
prots_to_peps[prot] << seq
|
296
|
+
end
|
297
|
+
else
|
298
|
+
peptides_not_found << seq
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# Determine the number of 1) hits, 2) aaseqs, 3) aaseqcharges per protein BEFORE minimization
|
303
|
+
stats_per_protein_before = stats_per_prot(prots_to_peps, seq_to_hits)
|
304
|
+
|
305
|
+
# get the minimal protein set
|
306
|
+
(prot_to_uniq_peps_hash, indistinguishable_protein_hash) = minimal_protein_set(prots_to_peps) do |prot,peps|
|
307
|
+
# will sort with lowest
|
308
|
+
[ peps.size, peps.inject(0){|m,s| m+s.size}, -(prot_sizes_hash[prot])]
|
309
|
+
end
|
310
|
+
|
311
|
+
prot_to_uniq_peps_hash.each do |prot, peps|
|
312
|
+
[prot, *indistinguishable_protein_hash[prot]].each do |prot|
|
313
|
+
protein_info[prot] = prot_header_hash[prot]
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
stats_per_protein_minimal = stats_per_prot(prot_to_uniq_peps_hash, seq_to_hits)
|
318
|
+
|
319
|
+
# create a hash of data for each protein
|
320
|
+
protein_data_hashes_hash = {}
|
321
|
+
prot_to_uniq_peps_hash.each do |prot, peps|
|
322
|
+
protein_data_hashes_hash[prot] = {
|
323
|
+
'aaseqs' => peps,
|
324
|
+
# this will be a triplet
|
325
|
+
'num_hits_minimal' => stats_per_protein_minimal[prot],
|
326
|
+
'indistinguishable' => indistinguishable_protein_hash[prot],
|
327
|
+
'num_hits_all' => stats_per_protein_before[prot],
|
328
|
+
}
|
329
|
+
end
|
330
|
+
|
331
|
+
set_results['proteins'] = protein_data_hashes_hash
|
332
|
+
set_results['num_proteins'] = prot_to_uniq_peps_hash.size
|
333
|
+
set_results['num_aaseqs_not_in_pep_db'] = peptides_not_found.size
|
334
|
+
if peptides_not_found.size > 0
|
335
|
+
warn "Did not find in peptide centric db: #{peptides_not_found.join(', ')}"
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
File.open(opt[:outfile], 'w') do |out|
|
341
|
+
out.print results.to_yaml
|
342
|
+
end
|
343
|
+
|
344
|
+
end
|
345
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'mspire/mzml'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
opt = {
|
7
|
+
filter_zero_intensity: true,
|
8
|
+
retention_times: true,
|
9
|
+
}
|
10
|
+
opts = OptionParser.new do |op|
|
11
|
+
op.banner = "usage: #{File.basename($0)} <file>.mzML ..."
|
12
|
+
op.separator "outputs: <file>.mgf"
|
13
|
+
#op.on("--no-filter-zeros", "won't remove values with zero intensity") {|v| opt[:filter_zero_intensity] = false }
|
14
|
+
# the default is set in ms/msrun/search.rb -> set_opts
|
15
|
+
op.on("--no-retention-times", "won't include RT even if available") {|v| opt[:retention_times] = false }
|
16
|
+
end
|
17
|
+
|
18
|
+
opts.parse!
|
19
|
+
|
20
|
+
if ARGV.size == 0
|
21
|
+
puts opts
|
22
|
+
exit
|
23
|
+
end
|
24
|
+
|
25
|
+
ARGV.each do |file|
|
26
|
+
if File.exist?(file)
|
27
|
+
Mspire::Mzml.foreach(file).with_index do |spectrum,i|
|
28
|
+
next unless spectrum.ms_level > 1
|
29
|
+
puts "BEGIN IONS"
|
30
|
+
# id, spectrumid,
|
31
|
+
rt = spectrum.retention_time
|
32
|
+
title = [i, "id_#{spectrum.id}", "rt_#{rt.round}"].join('.')
|
33
|
+
puts "TITLE=#{title}"
|
34
|
+
puts "RTINSECONDS=#{rt}" if opt[:retention_times]
|
35
|
+
puts "PEPMASS=#{spectrum.precursor_mz}"
|
36
|
+
puts "CHARGE=#{spectrum.precursor_charge}+"
|
37
|
+
spectrum.each do |mz,int|
|
38
|
+
puts [mz, int].join(" ")
|
39
|
+
end
|
40
|
+
puts "END IONS"
|
41
|
+
puts ""
|
42
|
+
end
|
43
|
+
else
|
44
|
+
puts "missing file: #{file} [skipping]"
|
45
|
+
end
|
46
|
+
end
|