mspire 0.8.5 → 0.8.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/lib/hash/inverse.rb +15 -0
- data/lib/mspire/error_rate/qvalue.rb +5 -5
- data/lib/mspire/fasta.rb +2 -0
- data/lib/mspire/ident/peptide/db/creator.rb +48 -58
- data/lib/mspire/ident/peptide/db/io.rb +5 -0
- data/lib/mspire/ident/peptide_hit/qvalue.rb +2 -2
- data/lib/mspire/ident/peptide_hit.rb +2 -2
- data/lib/mspire/ident/protein_group.rb +4 -2
- data/lib/mspire/isotope/aa.rb +10 -10
- data/lib/mspire/mzml/instrument_configuration.rb +10 -3
- data/lib/mspire/quant/cmdline.rb +42 -0
- data/lib/mspire/quant/protein_group_comparison.rb +29 -0
- data/lib/mspire/quant/spectral_counts.rb +42 -0
- data/script/fasta_to_peptide_centric_db.rb +5 -0
- data/script/mascot_dat_to_peptide_hit_qvalues.rb +37 -45
- data/script/mass_correct.rb +118 -0
- data/script/minimal_protein_set.rb +345 -0
- data/script/mzml_to_mgf.rb +46 -0
- data/script/peptide_hit_qvalues_to_spectral_counts_table.rb +275 -0
- data/spec/mspire/ident/peptide/db/creator_spec.rb +11 -0
- data/spec/testfiles/mspire/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +157 -157
- metadata +11 -2
@@ -0,0 +1,118 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'rserve/simpler/R'
|
4
|
+
require 'runarray/narray'
|
5
|
+
|
6
|
+
MzDiffs = Struct.new(:mz, :intensity, :spectrum_id, :dev) do
|
7
|
+
def abs_dev
|
8
|
+
self.dev.abs
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
# returns an array of spectrum_id => shift
|
13
|
+
def find_spectral_shifts(mz_theor, mz_diffs, dev_cutoff = 0.5)
|
14
|
+
spec_id_to_shift = {}
|
15
|
+
|
16
|
+
(close_diffs, far_diffs) = mz_diffs.partition {|diff| diff.abs_dev < dev_cutoff }
|
17
|
+
|
18
|
+
close_mz_vals = close_diffs.map(&:mz)
|
19
|
+
|
20
|
+
runarray = Runarray::NArray.new(close_mz_vals)
|
21
|
+
outlier_indices = runarray.outliers_iteratively(3)
|
22
|
+
|
23
|
+
# need the global shift
|
24
|
+
tight_mz_vals = close_mz_vals.reject.with_index do |mz, i|
|
25
|
+
outlier_indices.include?(i)
|
26
|
+
end
|
27
|
+
|
28
|
+
(mean, sd) = Runarray::NArray.new(tight_mz_vals).sample_stats
|
29
|
+
|
30
|
+
global_shift = mean - mz_theor
|
31
|
+
|
32
|
+
close_diffs.zip(close_mz_vals).each.with_index do |(mz_diff, mz_val),i|
|
33
|
+
spec_id_to_shift[mz_diff.spectrum_id] =
|
34
|
+
if outlier_indices.include?(i)
|
35
|
+
global_shift
|
36
|
+
else
|
37
|
+
global_shift + (mz_val - mean)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
far_diffs.each {|mz_diff| spec_id_to_shift[mz_diff.spectrum_id] = global_shift }
|
42
|
+
|
43
|
+
#pvalue = R.converse( mz_diffs: close_mz_vals ) do
|
44
|
+
# "shapiro.test(mz_diffs)$p.value"
|
45
|
+
#end
|
46
|
+
spec_id_to_shift
|
47
|
+
end
|
48
|
+
|
49
|
+
require 'optparse'
|
50
|
+
require 'mspire/mzml'
|
51
|
+
ext = ".massCorrected.mzML"
|
52
|
+
opt = {}
|
53
|
+
opts = OptionParser.new do |op|
|
54
|
+
op.banner = "usage: #{File.basename($0)} [OPTS] <m/z> <file>.mzML ..."
|
55
|
+
op.separator "output: <file>#{ext}"
|
56
|
+
op.separator "finds the nearest m/z to <m/z> and shifts m/z values"
|
57
|
+
op.separator "prints the corrected deviation to stdout"
|
58
|
+
op.separator ""
|
59
|
+
op.separator "options:"
|
60
|
+
op.on("-t", "--threshold <Float>", Float, 'intensity must be above threshold') {|v| opt[:threshold] = v }
|
61
|
+
op.on("-f", "--filter-string-regex <regex-no-slashes>", 'only match and calibrate if matches filter string') {|v| opt[:filter_string_regex] = Regexp.new(Regexp.escape(v)) }
|
62
|
+
end
|
63
|
+
opts.parse!
|
64
|
+
|
65
|
+
if ARGV.size == 0
|
66
|
+
puts opts
|
67
|
+
exit
|
68
|
+
end
|
69
|
+
|
70
|
+
threshold = opt[:threshold] || 0.0
|
71
|
+
filter_string_regex = opt[:filter_string_regex]
|
72
|
+
|
73
|
+
mz_theor = ARGV.shift.to_f
|
74
|
+
|
75
|
+
ARGV.each do |file|
|
76
|
+
base = file.chomp(File.extname(file))
|
77
|
+
outfile = base + ext
|
78
|
+
|
79
|
+
mz_diffs = []
|
80
|
+
Mspire::Mzml.open(file) do |mzml|
|
81
|
+
#Finding the deviation
|
82
|
+
mzml.each do |spectrum|
|
83
|
+
if spectrum.ms_level == 1
|
84
|
+
if filter_string_regex
|
85
|
+
next unless filter_string_regex.match(spectrum.scan_list.first.fetch_by_acc('MS:1000512'))
|
86
|
+
end
|
87
|
+
indices = spectrum.find_all_nearest_index(mz_theor)
|
88
|
+
best_index = indices.max {|i| spectrum.intensities[i] }
|
89
|
+
closest_mz = spectrum.mzs[best_index]
|
90
|
+
mz_diffs << MzDiffs.new(closest_mz, spectrum.intensities[best_index], spectrum.id, closest_mz - mz_theor)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
spectral_shifts = find_spectral_shifts(mz_theor, mz_diffs)
|
95
|
+
|
96
|
+
#correcting the masses
|
97
|
+
spectra = mzml.map do |spectrum|
|
98
|
+
if spectrum.ms_level == 1
|
99
|
+
spectrum.mzs.map! do|mz|
|
100
|
+
if (shift=spectral_shifts[spectrum.id])
|
101
|
+
mz + shift
|
102
|
+
else
|
103
|
+
mz
|
104
|
+
end
|
105
|
+
end
|
106
|
+
spectrum
|
107
|
+
else
|
108
|
+
spectrum
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
data_processing = Mspire::Mzml::DataProcessing.new("Corrected_Mass")
|
113
|
+
mzml.data_processing_list << data_processing
|
114
|
+
mzml.run.spectrum_list = Mspire::Mzml::SpectrumList.new(data_processing, spectra)
|
115
|
+
mzml.write(outfile)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
@@ -0,0 +1,345 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'set'
|
5
|
+
require 'optparse'
|
6
|
+
require 'mspire/fasta'
|
7
|
+
require 'mspire/ident/peptide/db/io'
|
8
|
+
|
9
|
+
SET_RE = /Set\s+(.*)/i
|
10
|
+
QVALUE_EXT = ".phq.tsv"
|
11
|
+
|
12
|
+
# returns [sets_to_paths_hash, sets_order]
|
13
|
+
def sets_compare_to_paths(file, ext=QVALUE_EXT)
|
14
|
+
dirname = File.dirname(File.expand_path(file))
|
15
|
+
lines = IO.readlines(file).map {|v| v.chomp }.select {|v| v =~ /\w/}
|
16
|
+
sets = {}
|
17
|
+
current_set = nil
|
18
|
+
sets_order = []
|
19
|
+
lines.each do |line|
|
20
|
+
if line =~ SET_RE
|
21
|
+
current_set = $1.dup
|
22
|
+
sets[current_set] = []
|
23
|
+
sets_order << current_set
|
24
|
+
else
|
25
|
+
full_path = (File.join(dirname,(line + ext)))
|
26
|
+
raise RuntimeError, "file #{full_path} does not exist!!" unless File.exist?(full_path)
|
27
|
+
sets[current_set] << full_path
|
28
|
+
end
|
29
|
+
end
|
30
|
+
[sets, sets_order]
|
31
|
+
end
|
32
|
+
|
33
|
+
# returns [minimal_protein_to_uniq_peps_hash, indistinguishable_protein_hash]
|
34
|
+
# takes a hash of proteins to aaseqs. Uses a greedy algorithm where
|
35
|
+
# things are sorted first by the number of uniq amino acid sequences and total
|
36
|
+
# aa length. if a block is given, then will yield the prot and the
|
37
|
+
# peptide_array and sort by the returned value. The greedy algorithm acts on
|
38
|
+
# the REVERSE of the sorted proteins. indistinguishable_protein_hash is keyed
|
39
|
+
# on the proteins in the minimal_protein_array and gives an array of other
|
40
|
+
# proteins.
|
41
|
+
def minimal_protein_set(proteins_to_aaseqs)
|
42
|
+
blk_given = block_given?
|
43
|
+
#STDERR.puts "using block for minimal_protein_set" if blk_given
|
44
|
+
proteins_and_uniq_peps = []
|
45
|
+
|
46
|
+
sorted_most_to_least = proteins_to_aaseqs.sort_by do |k,v|
|
47
|
+
if blk_given
|
48
|
+
yield(k,v)
|
49
|
+
else
|
50
|
+
[ v.size, v.inject(0){|m,s| m+s.size} ]
|
51
|
+
end
|
52
|
+
end.reverse
|
53
|
+
|
54
|
+
found_seq = Set.new
|
55
|
+
|
56
|
+
same_peptide_hits = {}
|
57
|
+
|
58
|
+
last_peps = nil
|
59
|
+
last_uniq_prot = nil
|
60
|
+
sorted_most_to_least.each do |prot, peps|
|
61
|
+
sorted_peps = peps.sort # is it necessary to SORT?????????
|
62
|
+
uniq_peps = peps.select do |pep|
|
63
|
+
if found_seq.include?(pep)
|
64
|
+
false
|
65
|
+
else
|
66
|
+
found_seq.add pep
|
67
|
+
true
|
68
|
+
end
|
69
|
+
end
|
70
|
+
if uniq_peps.size > 0
|
71
|
+
proteins_and_uniq_peps << [prot, uniq_peps]
|
72
|
+
same_peptide_hits[prot] = []
|
73
|
+
last_peps = sorted_peps
|
74
|
+
last_uniq_prot = prot
|
75
|
+
else
|
76
|
+
if sorted_peps == last_peps
|
77
|
+
same_peptide_hits[last_uniq_prot] << prot
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
prot_to_uniq_peps_hash = {}
|
82
|
+
proteins_and_uniq_peps.each do |prot, uniq_peps|
|
83
|
+
prot_to_uniq_peps_hash[prot] = uniq_peps
|
84
|
+
end
|
85
|
+
|
86
|
+
[prot_to_uniq_peps_hash, same_peptide_hits]
|
87
|
+
end
|
88
|
+
|
89
|
+
def cutoffs_to_floats(ar)
|
90
|
+
ar.map do |v|
|
91
|
+
if v == 'nil' || v == '-'
|
92
|
+
nil
|
93
|
+
else
|
94
|
+
answ = v.to_f
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
# returns a hash keyed on protein id that yields an array:
|
100
|
+
# [#aaseq, #aaseq_and_charge, #total_hits]
|
101
|
+
def stats_per_prot(prot_to_peps, seq_to_hits)
|
102
|
+
per_protein_hash = {}
|
103
|
+
prot_to_peps.each do |prot, uniq_pep_seqs|
|
104
|
+
all = Set.new
|
105
|
+
aaseqcharges = Set.new
|
106
|
+
aaseqs = Set.new
|
107
|
+
|
108
|
+
uniq_pep_seqs.each do |pep_seq|
|
109
|
+
all_hits = seq_to_hits[pep_seq]
|
110
|
+
all.merge( all_hits )
|
111
|
+
all_hits.each do |hit|
|
112
|
+
aaseq = hit.sequence
|
113
|
+
aaseqs.add( aaseq )
|
114
|
+
aaseqcharges.add( aaseq + '_' + hit.charge.to_s )
|
115
|
+
end
|
116
|
+
per_protein_hash[prot] = [aaseqs.size, aaseqcharges.size, all.size]
|
117
|
+
|
118
|
+
end
|
119
|
+
end
|
120
|
+
per_protein_hash
|
121
|
+
end
|
122
|
+
|
123
|
+
opt = {
|
124
|
+
:cutoffs => [nil],
|
125
|
+
:outfile => "summary.yml",
|
126
|
+
}
|
127
|
+
|
128
|
+
opts = OptionParser.new do |op|
|
129
|
+
op.banner = "usage: #{File.basename(__FILE__)} pepcentric_db.yml sets_compare.txt"
|
130
|
+
op.separator "output: #{opt[:outfile]}"
|
131
|
+
op.separator ""
|
132
|
+
op.separator "input: "
|
133
|
+
op.separator " each <file> referenced in sets_compare.txt should have a"
|
134
|
+
op.separator " <file>.phq.tsv file"
|
135
|
+
op.separator ""
|
136
|
+
op.separator "options:"
|
137
|
+
op.on("-q", "--qvalue <0-1[,...]>", Array, "only take qvalues < given ['-' for no threshold]") {|v| opt[:cutoffs] = cutoffs_to_floats(v)}
|
138
|
+
op.separator ""
|
139
|
+
op.separator "formats:"
|
140
|
+
op.on("--output-format", "prints the output yaml scheme and exits") {|v| opt[:output_format] = v }
|
141
|
+
op.on("--input-format", "prints sets_compare.txt format and exits") {|v| opt[:input_format] = v }
|
142
|
+
op.on("--pepcentric-db-format", "prints peptide centric db format and exits") {|v| opt[:pepcentric_db_format] = v }
|
143
|
+
end
|
144
|
+
|
145
|
+
# later on we could implement full isoform resolution like IsoformResolver
|
146
|
+
# for now we will generate a report, realizing that some isoforms may not be
|
147
|
+
# reported
|
148
|
+
# it is implemented by using a pre-made map from sequence to protein groups
|
149
|
+
# then, a set of sequences allows one to deduce all the relationships from the
|
150
|
+
# protein groups.
|
151
|
+
|
152
|
+
opts.parse!
|
153
|
+
|
154
|
+
pd = Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER
|
155
|
+
kvd = Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER
|
156
|
+
|
157
|
+
if opt[:pepcentric_db_format]
|
158
|
+
puts "pepcentric_db.yml needs to be in the format:"
|
159
|
+
puts "<PEPTIDE>#{kvd.inspect}<ID>#{pd.inspect}<ID>#{pd.inspect}<ID>"
|
160
|
+
puts "(The delimiters are shown with #inspect)"
|
161
|
+
end
|
162
|
+
|
163
|
+
if opt[:output_format]
|
164
|
+
yaml = <<SKEL
|
165
|
+
results:
|
166
|
+
- qvalue_cutoff: <Float>
|
167
|
+
sets:
|
168
|
+
<set_name>:
|
169
|
+
num_uniq_aaseqs: <Integer>
|
170
|
+
num_aaseqs_not_in_pep_db: <Integer>
|
171
|
+
num_uniq_aaseqs_charge: <Integer>
|
172
|
+
proteins:
|
173
|
+
<protein_id>:
|
174
|
+
num_hits_all:
|
175
|
+
- <Integer> # total num aaseqs
|
176
|
+
- <Integer> # total num aaseq+charge "prints sets_compare.txt format and exits") {|v| opt[:input_format] = v }
|
177
|
+
op.on("--pepcentric-db-
|
178
|
+
- <Integer> # total num hits
|
179
|
+
num_hits_minimal:
|
180
|
+
- <Integer> # total num aaseqs
|
181
|
+
- <Integer> # total num aaseq+charge
|
182
|
+
- <Integer> # total num hits
|
183
|
+
indistinguishable:
|
184
|
+
- <protein_id>
|
185
|
+
- <protein_id>
|
186
|
+
aaseqs:
|
187
|
+
- <String>
|
188
|
+
- <String>
|
189
|
+
sets_order:
|
190
|
+
- <String>
|
191
|
+
- <String>
|
192
|
+
SKEL
|
193
|
+
print yaml
|
194
|
+
end
|
195
|
+
|
196
|
+
if opt[:input_format]
|
197
|
+
string =<<EXPLANATION
|
198
|
+
# the sets_compare.txt format is very simple:
|
199
|
+
|
200
|
+
Set <some_name_for_set1>
|
201
|
+
filename1_no_ext
|
202
|
+
filename2_no_ext
|
203
|
+
Set <some_name_for_set2>
|
204
|
+
filename3_no_ext
|
205
|
+
filename4_no_ext
|
206
|
+
...
|
207
|
+
EXPLANATION
|
208
|
+
puts string
|
209
|
+
end
|
210
|
+
|
211
|
+
exit if opt.keys.any? {|key| key.to_s =~ /_format/ }
|
212
|
+
|
213
|
+
if ARGV.size != 2
|
214
|
+
p opts
|
215
|
+
puts opts.to_s
|
216
|
+
exit
|
217
|
+
end
|
218
|
+
|
219
|
+
(pepcentric_fn, sets_compare_fn) = ARGV
|
220
|
+
|
221
|
+
results = {}
|
222
|
+
|
223
|
+
results['results'] = []
|
224
|
+
|
225
|
+
(sets_hash, sets_order) = sets_compare_to_paths(sets_compare_fn)
|
226
|
+
results['sets_order'] = sets_order
|
227
|
+
|
228
|
+
STDERR.print "Loading peptide centric DB (this takes about a minute)..."
|
229
|
+
start = Time.now
|
230
|
+
Mspire::Ident::Peptide::Db::IO.open(pepcentric_fn) do |pep_to_prots|
|
231
|
+
STDERR.puts "#{Time.now - start} seconds."
|
232
|
+
|
233
|
+
opt[:cutoffs].each do |cutoff|
|
234
|
+
|
235
|
+
cutoff_results = {'qvalue_cutoff' => cutoff}
|
236
|
+
results_sets_hash = {}
|
237
|
+
cutoff_results['sets'] = results_sets_hash
|
238
|
+
results['results'] << cutoff_results
|
239
|
+
|
240
|
+
#########################
|
241
|
+
# FOR EACH SET:
|
242
|
+
#########################
|
243
|
+
pep_klass = nil
|
244
|
+
sets_hash.each do |set, files|
|
245
|
+
set_results = {}
|
246
|
+
results_sets_hash[set] = set_results
|
247
|
+
|
248
|
+
# assumes the indices are the same into each data file
|
249
|
+
|
250
|
+
# get the complete set of passing hits
|
251
|
+
all_passing_hits = files.inject([]) do |all_passing_hits, file|
|
252
|
+
hash = YAML.load_file(file)
|
253
|
+
|
254
|
+
header_hash = hash['headers']
|
255
|
+
pep_klass ||= Struct.new(*(header_hash.map {|v| v.to_sym }))
|
256
|
+
hits = hash['data'].map {|v| pep_klass.new(*v) }
|
257
|
+
|
258
|
+
passing_hits =
|
259
|
+
if cutoff
|
260
|
+
# assumes monotonic qvalues values!
|
261
|
+
(above, below) = hits.partition {|hit| hit.qvalue <= cutoff }
|
262
|
+
above
|
263
|
+
else
|
264
|
+
hits
|
265
|
+
end
|
266
|
+
all_passing_hits.push(*passing_hits)
|
267
|
+
end
|
268
|
+
|
269
|
+
|
270
|
+
# create an index from aaseq to hits
|
271
|
+
seq_to_hits = Hash.new {|h,k| h[k] = []}
|
272
|
+
uniq_seqcharge = Set.new
|
273
|
+
all_passing_hits.each do |hit|
|
274
|
+
seq_to_hits[hit.sequence] << hit
|
275
|
+
uniq_seqcharge.add( hit.sequence + '_' + hit.charge.to_s )
|
276
|
+
end
|
277
|
+
|
278
|
+
|
279
|
+
# determine the number of uniq aaseqs
|
280
|
+
uniq_seqs = seq_to_hits.size
|
281
|
+
|
282
|
+
num_uniq_seqcharges = uniq_seqcharge.size
|
283
|
+
|
284
|
+
set_results.merge!( { 'num_peptide_hits' => all_passing_hits.size,
|
285
|
+
'num_uniq_aaseqs' => uniq_seqs,
|
286
|
+
'num_uniq_aaseqs_charge' => num_uniq_seqcharges,
|
287
|
+
})
|
288
|
+
|
289
|
+
# create an index from proteins to peptides
|
290
|
+
prots_to_peps = Hash.new {|h,k| h[k] = [] }
|
291
|
+
peptides_not_found = []
|
292
|
+
seq_to_hits.keys.each do |seq|
|
293
|
+
if pep_db.key?(seq)
|
294
|
+
pep_db[seq].each do |prot|
|
295
|
+
prots_to_peps[prot] << seq
|
296
|
+
end
|
297
|
+
else
|
298
|
+
peptides_not_found << seq
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# Determine the number of 1) hits, 2) aaseqs, 3) aaseqcharges per protein BEFORE minimization
|
303
|
+
stats_per_protein_before = stats_per_prot(prots_to_peps, seq_to_hits)
|
304
|
+
|
305
|
+
# get the minimal protein set
|
306
|
+
(prot_to_uniq_peps_hash, indistinguishable_protein_hash) = minimal_protein_set(prots_to_peps) do |prot,peps|
|
307
|
+
# will sort with lowest
|
308
|
+
[ peps.size, peps.inject(0){|m,s| m+s.size}, -(prot_sizes_hash[prot])]
|
309
|
+
end
|
310
|
+
|
311
|
+
prot_to_uniq_peps_hash.each do |prot, peps|
|
312
|
+
[prot, *indistinguishable_protein_hash[prot]].each do |prot|
|
313
|
+
protein_info[prot] = prot_header_hash[prot]
|
314
|
+
end
|
315
|
+
end
|
316
|
+
|
317
|
+
stats_per_protein_minimal = stats_per_prot(prot_to_uniq_peps_hash, seq_to_hits)
|
318
|
+
|
319
|
+
# create a hash of data for each protein
|
320
|
+
protein_data_hashes_hash = {}
|
321
|
+
prot_to_uniq_peps_hash.each do |prot, peps|
|
322
|
+
protein_data_hashes_hash[prot] = {
|
323
|
+
'aaseqs' => peps,
|
324
|
+
# this will be a triplet
|
325
|
+
'num_hits_minimal' => stats_per_protein_minimal[prot],
|
326
|
+
'indistinguishable' => indistinguishable_protein_hash[prot],
|
327
|
+
'num_hits_all' => stats_per_protein_before[prot],
|
328
|
+
}
|
329
|
+
end
|
330
|
+
|
331
|
+
set_results['proteins'] = protein_data_hashes_hash
|
332
|
+
set_results['num_proteins'] = prot_to_uniq_peps_hash.size
|
333
|
+
set_results['num_aaseqs_not_in_pep_db'] = peptides_not_found.size
|
334
|
+
if peptides_not_found.size > 0
|
335
|
+
warn "Did not find in peptide centric db: #{peptides_not_found.join(', ')}"
|
336
|
+
end
|
337
|
+
end
|
338
|
+
end
|
339
|
+
|
340
|
+
File.open(opt[:outfile], 'w') do |out|
|
341
|
+
out.print results.to_yaml
|
342
|
+
end
|
343
|
+
|
344
|
+
end
|
345
|
+
|
@@ -0,0 +1,46 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'mspire/mzml'
|
4
|
+
require 'optparse'
|
5
|
+
|
6
|
+
opt = {
|
7
|
+
filter_zero_intensity: true,
|
8
|
+
retention_times: true,
|
9
|
+
}
|
10
|
+
opts = OptionParser.new do |op|
|
11
|
+
op.banner = "usage: #{File.basename($0)} <file>.mzML ..."
|
12
|
+
op.separator "outputs: <file>.mgf"
|
13
|
+
#op.on("--no-filter-zeros", "won't remove values with zero intensity") {|v| opt[:filter_zero_intensity] = false }
|
14
|
+
# the default is set in ms/msrun/search.rb -> set_opts
|
15
|
+
op.on("--no-retention-times", "won't include RT even if available") {|v| opt[:retention_times] = false }
|
16
|
+
end
|
17
|
+
|
18
|
+
opts.parse!
|
19
|
+
|
20
|
+
if ARGV.size == 0
|
21
|
+
puts opts
|
22
|
+
exit
|
23
|
+
end
|
24
|
+
|
25
|
+
ARGV.each do |file|
|
26
|
+
if File.exist?(file)
|
27
|
+
Mspire::Mzml.foreach(file).with_index do |spectrum,i|
|
28
|
+
next unless spectrum.ms_level > 1
|
29
|
+
puts "BEGIN IONS"
|
30
|
+
# id, spectrumid,
|
31
|
+
rt = spectrum.retention_time
|
32
|
+
title = [i, "id_#{spectrum.id}", "rt_#{rt.round}"].join('.')
|
33
|
+
puts "TITLE=#{title}"
|
34
|
+
puts "RTINSECONDS=#{rt}" if opt[:retention_times]
|
35
|
+
puts "PEPMASS=#{spectrum.precursor_mz}"
|
36
|
+
puts "CHARGE=#{spectrum.precursor_charge}+"
|
37
|
+
spectrum.each do |mz,int|
|
38
|
+
puts [mz, int].join(" ")
|
39
|
+
end
|
40
|
+
puts "END IONS"
|
41
|
+
puts ""
|
42
|
+
end
|
43
|
+
else
|
44
|
+
puts "missing file: #{file} [skipping]"
|
45
|
+
end
|
46
|
+
end
|