mspire 0.8.6 → 0.8.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.6
1
+ 0.8.6.1
data/lib/mspire.rb CHANGED
@@ -3,4 +3,5 @@ require 'mspire/mass/aa' # requires mspire/mass & therefore mspire/molecular_for
3
3
 
4
4
  module Mspire
5
5
  VERSION = IO.read(File.join(File.dirname(__FILE__), '..', 'VERSION')).chomp
6
+ CITE = "Prince JT, Marcotte EM. mspire: mass spectrometry proteomics in Ruby. Bioinformatics. 2008. 24(23):2796-7."
6
7
  end
@@ -0,0 +1,176 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'ostruct'
5
+ require 'set'
6
+
7
+ require 'mspire/mzml'
8
+ require 'mspire/digester'
9
+ require 'mspire/mascot/dat'
10
+
11
+
12
+ class Array
13
+
14
+ def sum
15
+ inject( nil ) { |sum,x| sum ? sum + x.to_f : x.to_f }
16
+ end
17
+
18
+ def weighted_mean(weights_array)
19
+ w_sum = weights_array.sum
20
+ w_prod = 0
21
+ self.each_index {|i| w_prod += self[i] * weights_array[i].to_f}
22
+ w_prod.to_f / w_sum.to_f
23
+ end
24
+ end
25
+
26
+
27
+ opt = OpenStruct.new( {
28
+ max_rt_before: 60,
29
+ max_rt_after: 60,
30
+ mz_window: 0.01,
31
+ scan_id_regex: Regexp.new("(.*)"),
32
+ # the regex I use:
33
+ #scan_id_regex: Regexp.new("id_([^\\.]+)"),
34
+ } )
35
+
36
+
37
+ opts = OptionParser.new do |op|
38
+ op.banner = "usage: #{File.basename(__FILE__)} [OPTS] <mzML> <dat> <accession> ..."
39
+ op.separator "output: <TBD>"
40
+ op.separator ""
41
+ op.separator "options: "
42
+ op.on("--max_rt_before <#{opt.max_rt_before}>", Float, "(sec) max RT to look before") {v| opt.max_rt_before = v }
43
+ op.on("--max_rt_after <#{opt.max_rt_after}>", Float, "(sec) max RT to look after") {v| opt.max_rt_after = v }
44
+ op.on("--mz_window <#{opt.mz_window}>", Float, "(Th) window around m/z value") {|v| opt.mz_window = v }
45
+ op.on("--scan_id_regex <#{opt.scan_id_regex.source}>", "scan") {|v| opt.scan_id_regex = Regexp.new(v) }
46
+ op.on("--add-filename", "adds the filename to output files") {|v| opt.add_filename = v }
47
+ end
48
+ opts.parse!
49
+
50
+ if ARGV.size < 3
51
+ puts opts
52
+ exit
53
+ end
54
+
55
+ (mzml_file, dat_file, *accessions_array) = ARGV
56
+
57
+ accessions = Set.new(accessions_array)
58
+
59
+ # block yields the retention time in seconds and stops iteration if the block returns nil/false
60
+ def create_chromatogram(mzml, index_enum, mz, mz_window, ms_level=1, &block)
61
+ chromatogram = []
62
+ while index=index_enum.next
63
+ break unless spectrum=mzml[index]
64
+ next unless ms_level===spectrum.ms_level
65
+ break unless block.call( spectrum.retention_time )
66
+ mzs = spectrum.mzs
67
+ ints = spectrum.intensities
68
+ index = spectrum.find_nearest_index(mz)
69
+
70
+ lwin_mz = mz - (mz_window/2.0)
71
+ hwin_mz = mz + (mz_window/2.0)
72
+
73
+
74
+ ints_in_range = []
75
+ index.upto(Float::INFINITY) do |i|
76
+ if mzs[i] <= hwin_mz
77
+ ints_in_range << ints[i]
78
+ else
79
+ break
80
+ end
81
+ end
82
+ (index-1).downto(0) do |i|
83
+ if mzs[i] >= lwin_mz
84
+ ints_in_range << ints[i]
85
+ else
86
+ break
87
+ end
88
+ end
89
+ if ints_in_range.size > 0
90
+ chromatogram << [spectrum.retention_time, ints_in_range.reduce(:+)]
91
+ end
92
+ end
93
+ chromatogram
94
+ end
95
+
96
+ Pephit = Struct.new(:spectrum_id, :exp_mz, :charge, :seq, :accessions, :var_mods_string, :chromatogram)
97
+
98
+ pephits = []
99
+ Mspire::Mascot::Dat.open(dat_file) do |dat|
100
+ dat.each_peptide(1) do |pephit|
101
+ intersecting_accessions = accessions & pephit.protein_hits_info.map(&:accession)
102
+ if intersecting_accessions.size > 0
103
+ query = dat.query(pephit.query_num)
104
+ z = query.charge
105
+ exp_mr = pephit.mr + pephit.delta
106
+ exp_mz = (exp_mr + (z * Mspire::Mass::H_PLUS)) / z
107
+ md=opt.scan_id_regex.match(query.title)
108
+ if md
109
+ spectrum_id = md[1]
110
+ end
111
+ pephits << Pephit.new(spectrum_id, exp_mz, z, pephit.seq, intersecting_accessions.to_a, pephit.var_mods_string)
112
+ end
113
+ end
114
+ end
115
+
116
+ puts "Found: #{pephits.size} pephits"
117
+ exit unless pephits.size > 0
118
+
119
+ Mspire::Mzml.open(mzml_file) do |mzml|
120
+ spec_index = mzml.index_list[:spectrum]
121
+
122
+ tic = mzml.map {|spec| spec.fetch_by_acc('MS:1000285').to_f }.reduce(:+)
123
+ divisor = tic.to_f/1e7
124
+
125
+ id_to_index = {}
126
+ spec_index.ids.each_with_index {|id,index| id_to_index[id] = index }
127
+
128
+
129
+ pephits.each do |pephit|
130
+ print "." ; $stdout.flush
131
+
132
+ ms1_spec_id = mzml[pephit.spectrum_id].precursors.first.spectrum_id
133
+ index = id_to_index[ms1_spec_id]
134
+ spectrum = mzml[index]
135
+
136
+ orig_rt = spectrum.retention_time
137
+ lo_rt = orig_rt - opt.max_rt_before
138
+ hi_rt = orig_rt + opt.max_rt_after
139
+
140
+ first_chunk = create_chromatogram(mzml, index.downto(0), pephit.exp_mz, opt.mz_window) {|rt| rt >= lo_rt }
141
+ last_chunk = create_chromatogram(mzml, (index+1).upto(Float::INFINITY), pephit.exp_mz, opt.mz_window) {|rt| rt <= hi_rt }
142
+
143
+ chromatogram = (first_chunk + last_chunk).sort
144
+ chromatogram.each {|pair| pair[1] /= divisor }
145
+
146
+ pephit.chromatogram = chromatogram
147
+ end
148
+ end
149
+ puts "finished with mzml"
150
+
151
+ pephits.group_by {|pephit| [pephit.seq, pephit.charge, pephit.var_mods_string] }.map do |group, sub_pephits|
152
+ puts "grouping: #{group.join(', ')}"
153
+ avg_exp_mz = sub_pephits.map(&:exp_mz).reduce(:+) / sub_pephits.size
154
+ new_chrom = sub_pephits.flat_map(&:chromatogram).uniq.sort
155
+ cpephit = Pephit.new("(#{sub_pephits.size})", avg_exp_mz, *[:charge, :seq, :accessions, :var_mods_string].map {|key| sub_pephits.first.send(key) }, new_chrom)
156
+
157
+ fileparts = [cpephit.seq, cpephit.charge, cpephit.var_mods_string]
158
+ if opt.add_filename
159
+ fileparts.unshift(dat_file.chomp(File.extname(dat_file)))
160
+ end
161
+ filename = fileparts.join(".") + ".tsv"
162
+
163
+ puts "writing: #{filename}"
164
+ File.open(filename, 'w') do |out|
165
+ cpephit.each_pair do |k,v|
166
+ out.puts "# #{k}: #{v}" unless k.to_sym == :chromatogram
167
+ end
168
+ out.puts
169
+ out.puts "rt(sec)\tnorm_intensity"
170
+ cpephit.chromatogram.each do |row|
171
+ out.puts row.join("\t")
172
+ end
173
+ end
174
+ end
175
+
176
+
@@ -3,6 +3,13 @@
3
3
  require 'mspire/mzml'
4
4
  require 'optparse'
5
5
 
6
+
7
+ # returns '3+' for 3 or '2-' for -2
8
+ def mascot_charge(val)
9
+ "#{val}#{val > 0 ? '+' : '-'}"
10
+ end
11
+
12
+
6
13
  opt = {
7
14
  filter_zero_intensity: true,
8
15
  retention_times: true,
@@ -10,7 +17,7 @@ opt = {
10
17
  opts = OptionParser.new do |op|
11
18
  op.banner = "usage: #{File.basename($0)} <file>.mzML ..."
12
19
  op.separator "outputs: <file>.mgf"
13
- #op.on("--no-filter-zeros", "won't remove values with zero intensity") {|v| opt[:filter_zero_intensity] = false }
20
+ op.on("--no-filter-zeros", "won't remove values with zero intensity") {|v| opt[:filter_zero_intensity] = false }
14
21
  # the default is set in ms/msrun/search.rb -> set_opts
15
22
  op.on("--no-retention-times", "won't include RT even if available") {|v| opt[:retention_times] = false }
16
23
  end
@@ -22,25 +29,35 @@ if ARGV.size == 0
22
29
  exit
23
30
  end
24
31
 
32
+ filter_zeros = opt[:filter_zero_intensity]
33
+
25
34
  ARGV.each do |file|
26
- if File.exist?(file)
35
+ basename = file.chomp(File.extname(file))
36
+ outfile = basename + ".mgf"
37
+
38
+ File.open(outfile, 'w') do |out|
27
39
  Mspire::Mzml.foreach(file).with_index do |spectrum,i|
28
40
  next unless spectrum.ms_level > 1
29
- puts "BEGIN IONS"
41
+ out.puts "BEGIN IONS"
30
42
  # id, spectrumid,
31
43
  rt = spectrum.retention_time
32
- title = [i, "id_#{spectrum.id}", "rt_#{rt.round}"].join('.')
33
- puts "TITLE=#{title}"
34
- puts "RTINSECONDS=#{rt}" if opt[:retention_times]
35
- puts "PEPMASS=#{spectrum.precursor_mz}"
36
- puts "CHARGE=#{spectrum.precursor_charge}+"
44
+ title_ar = [i, "id_#{spectrum.id}"]
45
+ title_ar.push("rt_#{rt.round}") if opt[:retention_times]
46
+ title = title_ar.join('.')
47
+ out.puts "TITLE=#{title}"
48
+ out.puts "RTINSECONDS=#{rt}" if opt[:retention_times]
49
+ out.puts "PEPMASS=#{spectrum.precursor_mz}"
50
+ if z=spectrum.precursor_charge
51
+ out.puts "CHARGE=#{mascot_charge(z)}"
52
+ end
53
+
37
54
  spectrum.each do |mz,int|
38
- puts [mz, int].join(" ")
55
+ unless filter_zeros && (int==0.0)
56
+ out.puts([mz, int].join(" "))
57
+ end
39
58
  end
40
- puts "END IONS"
41
- puts ""
59
+ out.puts "END IONS"
60
+ out.puts ""
42
61
  end
43
- else
44
- puts "missing file: #{file} [skipping]"
45
62
  end
46
63
  end
@@ -4,6 +4,7 @@ require 'andand'
4
4
  require 'set'
5
5
  require 'ruport'
6
6
 
7
+ require 'mspire'
7
8
  require 'mspire/ident/peptide_hit/qvalue'
8
9
  require 'mspire/ident/peptide_hit'
9
10
  require 'mspire/ident/protein_group'
@@ -96,11 +97,14 @@ group names can be arbitrarily defined
96
97
  opt :qspec_decibans, "report bayesfactor in decibans"
97
98
  opt :qspec_normalize, "normalize spectral counts per run", :default => false
98
99
  opt :qspec_keep_files, "keep a copy of the files submitted and returned from Qspec", :default => false
100
+ opt :version_tag, "pass in a version tag (e.g. pass in git describe --tags) for version record", :type => String
99
101
  opt :write_subset, "(dev use only) write subset db", :default => false
100
102
  end
101
103
 
104
+ commandline_incantation = __FILE__ + " " + ARGV.join(" ")
102
105
  opt = opts.parse(ARGV)
103
106
  opt[:count_type] = opt[:count_type].to_sym
107
+ outfile = opt[:outfile] || outfile
104
108
 
105
109
  $VERBOSE = opt.delete(:verbose)
106
110
 
@@ -271,5 +275,15 @@ if opt[:peptides]
271
275
  hits_table.to_tsv(pephits_outfile, :footer => ["parallel to #{outfile}"])
272
276
  end
273
277
 
274
- intro = ["samples: #{samplename_to_filename}", "options: #{opt}"]
278
+ intro = [
279
+ "",
280
+ "ruby: #{RUBY_VERSION}",
281
+ "software: mspire #{Mspire::VERSION}",
282
+ "cite: #{Mspire::CITE}",
283
+ "samples: #{samplename_to_filename}",
284
+ "options: #{opt}",
285
+ "commandline: #{commandline_incantation}"
286
+ ]
287
+ intro.insert(3, "version_tag: #{opt[:version_tag]}") if opt[:version_tag]
288
+
275
289
  counts_table.to_tsv(outfile, :footer => intro)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mspire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.6
4
+ version: 0.8.6.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2013-04-03 00:00:00.000000000 Z
13
+ date: 2013-04-16 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: nokogiri
@@ -314,6 +314,7 @@ files:
314
314
  - obo/ms.obo
315
315
  - obo/unit.obo
316
316
  - schema/peptide_hit_qvalues.pqh.tsv
317
+ - script/accession_quantifier.rb
317
318
  - script/download_uniprotkb_db.rb
318
319
  - script/fasta_to_peptide_centric_db.rb
319
320
  - script/mascot_dat_to_peptide_hit_qvalues.rb