mspire 0.8.6 → 0.8.6.1

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.6
1
+ 0.8.6.1
data/lib/mspire.rb CHANGED
@@ -3,4 +3,5 @@ require 'mspire/mass/aa' # requires mspire/mass & therefore mspire/molecular_for
3
3
 
4
4
  module Mspire
5
5
  VERSION = IO.read(File.join(File.dirname(__FILE__), '..', 'VERSION')).chomp
6
+ CITE = "Prince JT, Marcotte EM. mspire: mass spectrometry proteomics in Ruby. Bioinformatics. 2008. 24(23):2796-7."
6
7
  end
@@ -0,0 +1,176 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'optparse'
4
+ require 'ostruct'
5
+ require 'set'
6
+
7
+ require 'mspire/mzml'
8
+ require 'mspire/digester'
9
+ require 'mspire/mascot/dat'
10
+
11
+
12
+ class Array
13
+
14
+ def sum
15
+ inject( nil ) { |sum,x| sum ? sum + x.to_f : x.to_f }
16
+ end
17
+
18
+ def weighted_mean(weights_array)
19
+ w_sum = weights_array.sum
20
+ w_prod = 0
21
+ self.each_index {|i| w_prod += self[i] * weights_array[i].to_f}
22
+ w_prod.to_f / w_sum.to_f
23
+ end
24
+ end
25
+
26
+
27
+ opt = OpenStruct.new( {
28
+ max_rt_before: 60,
29
+ max_rt_after: 60,
30
+ mz_window: 0.01,
31
+ scan_id_regex: Regexp.new("(.*)"),
32
+ # the regex I use:
33
+ #scan_id_regex: Regexp.new("id_([^\\.]+)"),
34
+ } )
35
+
36
+
37
+ opts = OptionParser.new do |op|
38
+ op.banner = "usage: #{File.basename(__FILE__)} [OPTS] <mzML> <dat> <accession> ..."
39
+ op.separator "output: <TBD>"
40
+ op.separator ""
41
+ op.separator "options: "
42
+ op.on("--max_rt_before <#{opt.max_rt_before}>", Float, "(sec) max RT to look before") {v| opt.max_rt_before = v }
43
+ op.on("--max_rt_after <#{opt.max_rt_after}>", Float, "(sec) max RT to look after") {v| opt.max_rt_after = v }
44
+ op.on("--mz_window <#{opt.mz_window}>", Float, "(Th) window around m/z value") {|v| opt.mz_window = v }
45
+ op.on("--scan_id_regex <#{opt.scan_id_regex.source}>", "scan") {|v| opt.scan_id_regex = Regexp.new(v) }
46
+ op.on("--add-filename", "adds the filename to output files") {|v| opt.add_filename = v }
47
+ end
48
+ opts.parse!
49
+
50
+ if ARGV.size < 3
51
+ puts opts
52
+ exit
53
+ end
54
+
55
+ (mzml_file, dat_file, *accessions_array) = ARGV
56
+
57
+ accessions = Set.new(accessions_array)
58
+
59
+ # block yields the retention time in seconds and stops iteration if the block returns nil/false
60
+ def create_chromatogram(mzml, index_enum, mz, mz_window, ms_level=1, &block)
61
+ chromatogram = []
62
+ while index=index_enum.next
63
+ break unless spectrum=mzml[index]
64
+ next unless ms_level===spectrum.ms_level
65
+ break unless block.call( spectrum.retention_time )
66
+ mzs = spectrum.mzs
67
+ ints = spectrum.intensities
68
+ index = spectrum.find_nearest_index(mz)
69
+
70
+ lwin_mz = mz - (mz_window/2.0)
71
+ hwin_mz = mz + (mz_window/2.0)
72
+
73
+
74
+ ints_in_range = []
75
+ index.upto(Float::INFINITY) do |i|
76
+ if mzs[i] <= hwin_mz
77
+ ints_in_range << ints[i]
78
+ else
79
+ break
80
+ end
81
+ end
82
+ (index-1).downto(0) do |i|
83
+ if mzs[i] >= lwin_mz
84
+ ints_in_range << ints[i]
85
+ else
86
+ break
87
+ end
88
+ end
89
+ if ints_in_range.size > 0
90
+ chromatogram << [spectrum.retention_time, ints_in_range.reduce(:+)]
91
+ end
92
+ end
93
+ chromatogram
94
+ end
95
+
96
+ Pephit = Struct.new(:spectrum_id, :exp_mz, :charge, :seq, :accessions, :var_mods_string, :chromatogram)
97
+
98
+ pephits = []
99
+ Mspire::Mascot::Dat.open(dat_file) do |dat|
100
+ dat.each_peptide(1) do |pephit|
101
+ intersecting_accessions = accessions & pephit.protein_hits_info.map(&:accession)
102
+ if intersecting_accessions.size > 0
103
+ query = dat.query(pephit.query_num)
104
+ z = query.charge
105
+ exp_mr = pephit.mr + pephit.delta
106
+ exp_mz = (exp_mr + (z * Mspire::Mass::H_PLUS)) / z
107
+ md=opt.scan_id_regex.match(query.title)
108
+ if md
109
+ spectrum_id = md[1]
110
+ end
111
+ pephits << Pephit.new(spectrum_id, exp_mz, z, pephit.seq, intersecting_accessions.to_a, pephit.var_mods_string)
112
+ end
113
+ end
114
+ end
115
+
116
+ puts "Found: #{pephits.size} pephits"
117
+ exit unless pephits.size > 0
118
+
119
+ Mspire::Mzml.open(mzml_file) do |mzml|
120
+ spec_index = mzml.index_list[:spectrum]
121
+
122
+ tic = mzml.map {|spec| spec.fetch_by_acc('MS:1000285').to_f }.reduce(:+)
123
+ divisor = tic.to_f/1e7
124
+
125
+ id_to_index = {}
126
+ spec_index.ids.each_with_index {|id,index| id_to_index[id] = index }
127
+
128
+
129
+ pephits.each do |pephit|
130
+ print "." ; $stdout.flush
131
+
132
+ ms1_spec_id = mzml[pephit.spectrum_id].precursors.first.spectrum_id
133
+ index = id_to_index[ms1_spec_id]
134
+ spectrum = mzml[index]
135
+
136
+ orig_rt = spectrum.retention_time
137
+ lo_rt = orig_rt - opt.max_rt_before
138
+ hi_rt = orig_rt + opt.max_rt_after
139
+
140
+ first_chunk = create_chromatogram(mzml, index.downto(0), pephit.exp_mz, opt.mz_window) {|rt| rt >= lo_rt }
141
+ last_chunk = create_chromatogram(mzml, (index+1).upto(Float::INFINITY), pephit.exp_mz, opt.mz_window) {|rt| rt <= hi_rt }
142
+
143
+ chromatogram = (first_chunk + last_chunk).sort
144
+ chromatogram.each {|pair| pair[1] /= divisor }
145
+
146
+ pephit.chromatogram = chromatogram
147
+ end
148
+ end
149
+ puts "finished with mzml"
150
+
151
+ pephits.group_by {|pephit| [pephit.seq, pephit.charge, pephit.var_mods_string] }.map do |group, sub_pephits|
152
+ puts "grouping: #{group.join(', ')}"
153
+ avg_exp_mz = sub_pephits.map(&:exp_mz).reduce(:+) / sub_pephits.size
154
+ new_chrom = sub_pephits.flat_map(&:chromatogram).uniq.sort
155
+ cpephit = Pephit.new("(#{sub_pephits.size})", avg_exp_mz, *[:charge, :seq, :accessions, :var_mods_string].map {|key| sub_pephits.first.send(key) }, new_chrom)
156
+
157
+ fileparts = [cpephit.seq, cpephit.charge, cpephit.var_mods_string]
158
+ if opt.add_filename
159
+ fileparts.unshift(dat_file.chomp(File.extname(dat_file)))
160
+ end
161
+ filename = fileparts.join(".") + ".tsv"
162
+
163
+ puts "writing: #{filename}"
164
+ File.open(filename, 'w') do |out|
165
+ cpephit.each_pair do |k,v|
166
+ out.puts "# #{k}: #{v}" unless k.to_sym == :chromatogram
167
+ end
168
+ out.puts
169
+ out.puts "rt(sec)\tnorm_intensity"
170
+ cpephit.chromatogram.each do |row|
171
+ out.puts row.join("\t")
172
+ end
173
+ end
174
+ end
175
+
176
+
@@ -3,6 +3,13 @@
3
3
  require 'mspire/mzml'
4
4
  require 'optparse'
5
5
 
6
+
7
+ # returns '3+' for 3 or '2-' for -2
8
+ def mascot_charge(val)
9
+ "#{val}#{val > 0 ? '+' : '-'}"
10
+ end
11
+
12
+
6
13
  opt = {
7
14
  filter_zero_intensity: true,
8
15
  retention_times: true,
@@ -10,7 +17,7 @@ opt = {
10
17
  opts = OptionParser.new do |op|
11
18
  op.banner = "usage: #{File.basename($0)} <file>.mzML ..."
12
19
  op.separator "outputs: <file>.mgf"
13
- #op.on("--no-filter-zeros", "won't remove values with zero intensity") {|v| opt[:filter_zero_intensity] = false }
20
+ op.on("--no-filter-zeros", "won't remove values with zero intensity") {|v| opt[:filter_zero_intensity] = false }
14
21
  # the default is set in ms/msrun/search.rb -> set_opts
15
22
  op.on("--no-retention-times", "won't include RT even if available") {|v| opt[:retention_times] = false }
16
23
  end
@@ -22,25 +29,35 @@ if ARGV.size == 0
22
29
  exit
23
30
  end
24
31
 
32
+ filter_zeros = opt[:filter_zero_intensity]
33
+
25
34
  ARGV.each do |file|
26
- if File.exist?(file)
35
+ basename = file.chomp(File.extname(file))
36
+ outfile = basename + ".mgf"
37
+
38
+ File.open(outfile, 'w') do |out|
27
39
  Mspire::Mzml.foreach(file).with_index do |spectrum,i|
28
40
  next unless spectrum.ms_level > 1
29
- puts "BEGIN IONS"
41
+ out.puts "BEGIN IONS"
30
42
  # id, spectrumid,
31
43
  rt = spectrum.retention_time
32
- title = [i, "id_#{spectrum.id}", "rt_#{rt.round}"].join('.')
33
- puts "TITLE=#{title}"
34
- puts "RTINSECONDS=#{rt}" if opt[:retention_times]
35
- puts "PEPMASS=#{spectrum.precursor_mz}"
36
- puts "CHARGE=#{spectrum.precursor_charge}+"
44
+ title_ar = [i, "id_#{spectrum.id}"]
45
+ title_ar.push("rt_#{rt.round}") if opt[:retention_times]
46
+ title = title_ar.join('.')
47
+ out.puts "TITLE=#{title}"
48
+ out.puts "RTINSECONDS=#{rt}" if opt[:retention_times]
49
+ out.puts "PEPMASS=#{spectrum.precursor_mz}"
50
+ if z=spectrum.precursor_charge
51
+ out.puts "CHARGE=#{mascot_charge(z)}"
52
+ end
53
+
37
54
  spectrum.each do |mz,int|
38
- puts [mz, int].join(" ")
55
+ unless filter_zeros && (int==0.0)
56
+ out.puts([mz, int].join(" "))
57
+ end
39
58
  end
40
- puts "END IONS"
41
- puts ""
59
+ out.puts "END IONS"
60
+ out.puts ""
42
61
  end
43
- else
44
- puts "missing file: #{file} [skipping]"
45
62
  end
46
63
  end
@@ -4,6 +4,7 @@ require 'andand'
4
4
  require 'set'
5
5
  require 'ruport'
6
6
 
7
+ require 'mspire'
7
8
  require 'mspire/ident/peptide_hit/qvalue'
8
9
  require 'mspire/ident/peptide_hit'
9
10
  require 'mspire/ident/protein_group'
@@ -96,11 +97,14 @@ group names can be arbitrarily defined
96
97
  opt :qspec_decibans, "report bayesfactor in decibans"
97
98
  opt :qspec_normalize, "normalize spectral counts per run", :default => false
98
99
  opt :qspec_keep_files, "keep a copy of the files submitted and returned from Qspec", :default => false
100
+ opt :version_tag, "pass in a version tag (e.g. pass in git describe --tags) for version record", :type => String
99
101
  opt :write_subset, "(dev use only) write subset db", :default => false
100
102
  end
101
103
 
104
+ commandline_incantation = __FILE__ + " " + ARGV.join(" ")
102
105
  opt = opts.parse(ARGV)
103
106
  opt[:count_type] = opt[:count_type].to_sym
107
+ outfile = opt[:outfile] || outfile
104
108
 
105
109
  $VERBOSE = opt.delete(:verbose)
106
110
 
@@ -271,5 +275,15 @@ if opt[:peptides]
271
275
  hits_table.to_tsv(pephits_outfile, :footer => ["parallel to #{outfile}"])
272
276
  end
273
277
 
274
- intro = ["samples: #{samplename_to_filename}", "options: #{opt}"]
278
+ intro = [
279
+ "",
280
+ "ruby: #{RUBY_VERSION}",
281
+ "software: mspire #{Mspire::VERSION}",
282
+ "cite: #{Mspire::CITE}",
283
+ "samples: #{samplename_to_filename}",
284
+ "options: #{opt}",
285
+ "commandline: #{commandline_incantation}"
286
+ ]
287
+ intro.insert(3, "version_tag: #{opt[:version_tag]}") if opt[:version_tag]
288
+
275
289
  counts_table.to_tsv(outfile, :footer => intro)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mspire
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.6
4
+ version: 0.8.6.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2013-04-03 00:00:00.000000000 Z
13
+ date: 2013-04-16 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: nokogiri
@@ -314,6 +314,7 @@ files:
314
314
  - obo/ms.obo
315
315
  - obo/unit.obo
316
316
  - schema/peptide_hit_qvalues.pqh.tsv
317
+ - script/accession_quantifier.rb
317
318
  - script/download_uniprotkb_db.rb
318
319
  - script/fasta_to_peptide_centric_db.rb
319
320
  - script/mascot_dat_to_peptide_hit_qvalues.rb