mspire 0.8.5 → 0.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.5
1
+ 0.8.6
@@ -0,0 +1,15 @@
1
+
2
+
3
+ # inverse from Tilo Sloboda (now in facets)
4
+
5
+ class Hash
6
+ def inverse
7
+ i = Hash.new
8
+ self.each_pair do |k,v|
9
+ if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
10
+ else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
11
+ end ; i
12
+ end
13
+ end
14
+
15
+
@@ -22,8 +22,8 @@ module Mspire
22
22
  # Proc.new doesn't do arity checking
23
23
  hit_with_qvalue_pairs = Proc.new do |hits|
24
24
  sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
25
- (target_hits, qvalues) = Mspire::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
26
- target_hits.zip(qvalues)
25
+ (sorted_target_hits, qvalues) = Mspire::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
26
+ sorted_target_hits.zip(qvalues)
27
27
  end
28
28
 
29
29
  all_together = target_hits + decoy_hits
@@ -49,13 +49,13 @@ module Mspire
49
49
  opts = {:monotonic => true}.merge(opts)
50
50
  num_target = 0 ; num_decoy = 0
51
51
  monotonic = opts[:monotonic]
52
- target_hits = []
52
+ sorted_target_hits = []
53
53
  qvalues = []
54
54
  best_to_worst.each do |hit|
55
55
  if target_setlike.include?(hit)
56
56
  num_target += 1
57
57
  precision = Mspire::ErrorRate::Decoy.precision(num_target, num_decoy)
58
- target_hits << hit
58
+ sorted_target_hits << hit
59
59
  qvalues << (1.0 - precision)
60
60
  else
61
61
  num_decoy += 1
@@ -72,7 +72,7 @@ module Mspire
72
72
  end
73
73
  end.reverse
74
74
  end
75
- [target_hits, qvalues]
75
+ [sorted_target_hits, qvalues]
76
76
  end
77
77
 
78
78
 
data/lib/mspire/fasta.rb CHANGED
@@ -61,6 +61,8 @@ module Mspire
61
61
  # takes the header string and returns the uniprot id
62
62
  #
63
63
  # 'sp|Q04917|1433F_HUMAN' #=> 'Q04917'
64
+ # This can also be found with BioFastaFormat#accession (but it may be much
65
+ # slower)
64
66
  def self.uniprot_id(header)
65
67
  header[/^[^\|]+\|([^\|]+)\|/, 1]
66
68
  end
@@ -43,8 +43,11 @@ class Mspire::Ident::Peptide::Db::Creator
43
43
  op.on("--no-expand-x", "don't enumerate aa possibilities", "(removes these peptides)") { opt[:expand_aa] = false }
44
44
  op.on("--no-uniprot", "use entire protid section of fasta header", "for non-uniprot fasta files") { opt[:uniprot] = false }
45
45
  op.on("--trie", "use a trie (for very large uniprot files)", "must have fast_trie gem installed") {|v| opt[:trie] = v }
46
+
46
47
  op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] = Mspire::Insilico::Digester.const_get(v.upcase) }
48
+ op.on("-v", "--verbose", "talk about it") { $VERBOSE = 5 }
47
49
  op.on("--list-enzymes", "lists approved enzymes and exits") do
50
+ op.on("-v", "--verbose", "talk about it") { $VERBOSE = 5 }
48
51
  puts Mspire::Digester::ENZYMES.keys.join("\n")
49
52
  exit
50
53
  end
@@ -95,7 +98,7 @@ class Mspire::Ident::Peptide::Db::Creator
95
98
  (pep =~ letters_to_expand_re) ? expand_peptides(pep, EXPAND_AA) : pep
96
99
  end
97
100
  else
98
- peptides.map {|pep| pep =~ letters_to_expand_re }.compact
101
+ peptides.select {|pep| pep !~ letters_to_expand_re }
99
102
  end
100
103
  header = prot.header
101
104
  id = opts[:uniprot] ? Mspire::Fasta.uniprot_id(header) : header.split(/\s+/).first
@@ -118,15 +121,25 @@ class Mspire::Ident::Peptide::Db::Creator
118
121
  hash_like = hash_like_from_digestion_file(digestion_file, opts[:min_length], opts[:trie])
119
122
 
120
123
  base = digestion_file.chomp(File.extname(digestion_file))
121
- final_outfile = base + ".min_aaseq#{opts[:min_length]}" + ".yml"
124
+ final_outfile =
125
+ if opts[:trie]
126
+ base + ".min_aaseq#{opts[:min_length]}"
127
+ else
128
+ base + ".min_aaseq#{opts[:min_length]}" + ".yml"
129
+ end
122
130
 
123
131
  start_time = Time.now
124
132
  print "Writing #{hash_like.size} peptides to #{} ..." if $VERBOSE
125
133
 
126
- File.open(final_outfile, 'w') do |out|
127
- hash_like.each do |k,v|
128
- #out.puts( [k, v.join(Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER)].join(Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER) )
129
- out.puts "#{k}#{Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER}#{v}"
134
+ if opts[:trie]
135
+ trie = hash_like
136
+ trie.save(final_outfile)
137
+ else
138
+ File.open(final_outfile, 'w') do |out|
139
+ hash_like.each do |k,v|
140
+ out.puts( [k, v.join(Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER)].join(Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER) )
141
+ #out.puts "#{k}#{Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER}#{v}"
142
+ end
130
143
  end
131
144
  end
132
145
  puts "#{Time.now - start_time} sec" if $VERBOSE
@@ -137,71 +150,47 @@ class Mspire::Ident::Peptide::Db::Creator
137
150
  File.expand_path(final_outfile)
138
151
  end
139
152
 
140
- def hash_like_tree
141
- require 'trie'
142
- trie = Trie.new
143
- def trie.[](key)
144
- val = self.get(key)
145
- if val.nil?
146
- self.add(key,"")
147
- self.get(key)
148
- else
149
- val
150
- end
153
+ def get_a_trie
154
+ begin
155
+ require 'trie'
156
+ rescue
157
+ raise LoadError, "must first install fast_trie"
151
158
  end
152
- trie
159
+ Trie.new
153
160
  end
154
161
 
155
162
  def hash_like_from_digestion_file(digestion_file, min_length, use_trie=false)
156
- cnt = 0
157
163
  if use_trie
158
- raise NotImplementedError
159
- #puts "using trie" if $VERBOSE
160
- #trie = hash_like_tree
161
- #line_cnt = 0
162
- #::IO.foreach(digestion_file) do |line|
163
- #line_cnt += 1
164
- ##puts "LINE COUND"
165
- ##p line_cnt
166
- #(prot, *peps) = line.chomp!.split(/\s+/)
167
- ##p peps
168
- ##p peps.class
169
- ## prot is something like this: "P31946"
170
- #puts line
171
- #peps.each do |pep|
172
- #if pep.size >= min_length
173
- #to_set =
174
- #if val = trie.get(pep)
175
- #val + Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER + prot
176
- #else
177
- #prot
178
- #end
179
- #p to_set.size
180
- #trie.add(pep, to_set)
181
- #end
182
- #end
183
- #cnt += 1
184
- #puts cnt if (cnt % 1000) == 0
185
- #end
186
- #abort "HERE"
187
- #trie
188
- else
189
- hash = {}
164
+ trie = get_a_trie
190
165
  ::IO.foreach(digestion_file) do |line|
191
- (prot, *peps) = line.chomp!.split(/\s+/)
166
+ line.chomp!
167
+ (prot, *peps) = line.split(/\s+/)
192
168
  # prot is something like this: "P31946"
169
+ peps.uniq!
193
170
  peps.each do |pep|
194
171
  if pep.size >= min_length
195
- if val = hash[pep]
196
- val << Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER << prot
172
+ if trie.has_key?(pep)
173
+ ar = trie.get(pep)
174
+ ar << prot
197
175
  else
198
- val = prot
176
+ trie.add( pep, [prot] )
199
177
  end
200
- hash[pep] = val
201
178
  end
202
179
  end
203
- cnt += 1
204
- puts cnt if (cnt % 1000) == 0
180
+ end
181
+ trie
182
+ else
183
+ hash = Hash.new {|h,k| h[k] = [] }
184
+ ::IO.foreach(digestion_file) do |line|
185
+ line.chomp!
186
+ (prot, *peps) = line.split(/\s+/)
187
+ # prot is something like this: "P31946"
188
+ peps.uniq!
189
+ peps.each do |pep|
190
+ if pep.size >= min_length
191
+ hash[pep] << prot
192
+ end
193
+ end
205
194
  end
206
195
  hash
207
196
  end
@@ -215,6 +204,7 @@ class Mspire::Ident::Peptide::Db::Creator
215
204
  def create(fasta_file, opts={})
216
205
  opts = DEFAULT_PEPTIDE_CENTRIC_DB.merge(opts)
217
206
  digestion_file = create_digestion_file(fasta_file, opts)
207
+ puts "created file of size: #{File.size(digestion_file)}" if $VERBOSE
218
208
  db_from_fasta_digestion_file(digestion_file, opts)
219
209
  end
220
210
 
@@ -6,6 +6,7 @@ class Mspire::Ident::Peptide::Db::IO
6
6
  # behaves like a hash once it is opened.
7
7
  include Enumerable
8
8
  def self.open(filename, &block)
9
+ #p filename
9
10
  raise ArgumentError unless block
10
11
  File.open(filename) do |io|
11
12
  block.call(self.new(io))
@@ -39,6 +40,10 @@ class Mspire::Ident::Peptide::Db::IO
39
40
  string.split(Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER)
40
41
  end
41
42
 
43
+ def key?(key)
44
+ @index[key]
45
+ end
46
+
42
47
  # number of entries
43
48
  def size ; @index.size end
44
49
  alias_method :length, :size
@@ -12,7 +12,7 @@ class Mspire::Ident::PeptideHit
12
12
 
13
13
  class << self
14
14
 
15
- # writes to the file, adding an extension
15
+ # writes to the file, adding an extension. returns the filename
16
16
  def to_phq(base, hits, qvalues=[])
17
17
  to_file(base + FILE_EXTENSION, hits, qvalues)
18
18
  end
@@ -20,7 +20,7 @@ class Mspire::Ident::PeptideHit
20
20
  # writes the peptide hits to a phq.tsv file. qvalues is a parallel array
21
21
  # to hits that can provide qvalues if not inherent to the hits
22
22
  # returns the filename. Expects each hit to implement #search_id, #id,
23
- # #aaseq and #charge
23
+ # #aaseq and #charge. returns the filename
24
24
  def to_file(filename, hits, qvalues=[])
25
25
  File.open(filename,'w') do |out|
26
26
  out.puts HEADER.join(FILE_DELIMITER)
@@ -19,8 +19,8 @@ class Mspire::Ident::PeptideHit
19
19
  include Mspire::Ident::PeptideHitLike
20
20
  include Merge
21
21
 
22
- def initialize(hash)
23
- merge!(hash)
22
+ def initialize(hash=nil)
23
+ merge!(hash) if hash
24
24
  end
25
25
  end
26
26
 
@@ -38,8 +38,10 @@ module Mspire
38
38
  # note to self: I wrote this in 2011, so I think I know what I'm doing now
39
39
  protein_to_peptides = Hash.new {|h,k| h[k] = Set.new }
40
40
  peptide_hits.each do |peptide_hit|
41
- peptide_hit.proteins.each do |protein|
42
- protein_to_peptides[protein] << peptide_hit
41
+ if prots = peptide_hit.proteins
42
+ prots.each do |protein|
43
+ protein_to_peptides[protein] << peptide_hit
44
+ end
43
45
  end
44
46
  end
45
47
  peptides_to_protein_group = Hash.new {|h,k| h[k] = [] }
@@ -8,27 +8,27 @@ module Mspire
8
8
  # and OH on the ends)
9
9
  aa_to_el_hash = {
10
10
  'A' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
11
- 'R' => { :c =>6, :h =>12 , :o =>1 , :n =>4 , :s =>0 , :p =>0, :se =>0 },
12
- 'N' => { :c =>4, :h =>6 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
13
- 'D' => { :c =>4, :h =>5 , :o =>3 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
14
11
  'C' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>1 , :p =>0, :se =>0 },
12
+ 'D' => { :c =>4, :h =>5 , :o =>3 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
15
13
  'E' => { :c =>5, :h =>7 , :o =>3 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
16
- 'Q' => { :c =>5, :h =>8 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
14
+ 'F' => { :c =>9, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
17
15
  'G' => { :c =>2, :h =>3 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
18
- 'H' => { :c =>6, :h =>7 , :o =>1 , :n =>3 , :s =>0 , :p =>0, :se =>0 },
19
16
  'I' => { :c =>6, :h =>11 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
20
- 'L' => { :c =>6, :h =>11 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
17
+ 'H' => { :c =>6, :h =>7 , :o =>1 , :n =>3 , :s =>0 , :p =>0, :se =>0 },
21
18
  'K' => { :c =>6, :h =>12 , :o =>1 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
19
+ 'L' => { :c =>6, :h =>11 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
22
20
  'M' => { :c =>5, :h =>9 , :o =>1 , :n =>1 , :s =>1 , :p =>0, :se =>0 },
23
- 'F' => { :c =>9, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
21
+ 'N' => { :c =>4, :h =>6 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
22
+ 'O' => { :c =>12, :h =>19 , :o =>2 , :n =>3 , :s =>0 , :p =>0, :se =>0 },
24
23
  'P' => { :c =>5, :h =>7 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
24
+ 'Q' => { :c =>5, :h =>8 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
25
+ 'R' => { :c =>6, :h =>12 , :o =>1 , :n =>4 , :s =>0 , :p =>0, :se =>0 },
25
26
  'S' => { :c =>3, :h =>5 , :o =>2 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
26
27
  'T' => { :c =>4, :h =>7 , :o =>2 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
28
+ 'U' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>1 },
29
+ 'V' => { :c =>5, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
27
30
  'W' => { :c =>11, :h =>10 , :o =>1 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
28
31
  'Y' => { :c =>9, :h =>9 , :o =>2 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
29
- 'V' => { :c =>5, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
30
- 'U' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>1 },
31
- 'O' => { :c =>12, :h =>19 , :o =>2 , :n =>3 , :s =>0 , :p =>0, :se =>0 }
32
32
  }
33
33
 
34
34
  #
@@ -35,10 +35,17 @@ module Mspire
35
35
  def self.from_xml(xml, link)
36
36
  obj = self.new(xml[:id])
37
37
  next_n = obj.describe_from_xml!(xml, link[:ref_hash])
38
- if next_n && next_n.name == 'componentList'
38
+ if next_n && (next_n.name == 'componentList')
39
39
  obj.components = next_n.children.map do |component_n|
40
- Mspire::Mzml.const_get(component_n.name.capitalize).new.describe_self_from_xml!(component_n, link[:ref_hash])
41
- end
40
+ if component_n.is_a?(Nokogiri::XML::Text)
41
+ # TODO: this is a fix for when there is an empty component list but
42
+ # Nokogiri returns a text node. Really this needs to be fixed
43
+ # in our xml writer!
44
+ nil
45
+ else
46
+ Mspire::Mzml.const_get(component_n.name.capitalize).new.describe_self_from_xml!(component_n, link[:ref_hash])
47
+ end
48
+ end.compact
42
49
  next_n = next_n.next
43
50
  end
44
51
  if next_n && next_n.name == 'softwareRef'
@@ -0,0 +1,42 @@
1
+ require 'hash/inverse'
2
+
3
+ module Mspire ; module Quant ; end ; end
4
+
5
+ module Mspire::Quant::Cmdline
6
+
7
+ # expects arguments in one of two forms. The first form is grouped by
8
+ # condition as shown:
9
+ #
10
+ # condition1=file1,file2,file3... condition2=file4,file5...
11
+ #
12
+ # The second is where each file is its own condition (1 replicate):
13
+ #
14
+ # file1 file2 file3
15
+ #
16
+ # Returns three ordered hashes (only ordered for ruby 1.9):
17
+ #
18
+ # 1) Condition to an array of samplenames
19
+ # 2) Samplename to the filename
20
+ # 3) Samplename to condition
21
+ def self.args_to_hashes(args, replicate_postfix="-rep")
22
+ # groupname => files
23
+ condition_to_samplenames = {}
24
+ samplename_to_filename = {}
25
+ args.each do |arg|
26
+ (condition, files) =
27
+ if arg.include?('=')
28
+ (condition, filestring) = arg.split('=')
29
+ [condition, filestring.split(',')]
30
+ else
31
+ [basename(arg), [arg]]
32
+ end
33
+ sample_to_file_pairs = files.each_with_index.map do |file,i|
34
+ rep_string = (files.size == 1) ? "" : "#{replicate_postfix}#{i+1}"
35
+ ["#{condition}#{rep_string}", file]
36
+ end
37
+ sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
38
+ condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
39
+ end
40
+ [samplename_to_filename, condition_to_samplenames, condition_to_samplenames.inverse]
41
+ end
42
+ end
@@ -0,0 +1,29 @@
1
+
2
+ module Mspire
3
+ module Quant
4
+ end
5
+ end
6
+
7
+ module Mspire::Quant::ProteinGroupComparison
8
+
9
+ # a protein group object
10
+ attr_accessor :protein_group
11
+
12
+ # an array of experiment names
13
+ attr_accessor :experiments
14
+
15
+ # parallel array to experiments with the measured values
16
+ attr_accessor :values
17
+
18
+ def initialize(protein_group, experiments, values)
19
+ (@protein_group, @experiment, @values) = protein_group, experiments, values
20
+ end
21
+ end
22
+
23
+ class Mspire::Quant::ProteinGroupComparison::SpectralCounts
24
+ include Mspire::Quant::ProteinGroupComparison
25
+ end
26
+
27
+ class Mspire::Quant::ProteinGroupComparison::UniqAAzCounts
28
+ include Mspire::Quant::ProteinGroupComparison
29
+ end
@@ -0,0 +1,42 @@
1
+ #require 'set'
2
+ #require 'mspire/ident/protein_group'
3
+
4
+ module Mspire
5
+ module Quant
6
+ module SpectralCounts
7
+ Counts = Struct.new(:spectral, :aaseqcharge, :aaseq)
8
+ class Counts
9
+ def initialize(*args)
10
+ super(*args)
11
+ # default is zero counts
12
+ self[0] ||= 0.0 ; self[1] ||= 0.0 ; self[2] ||= 0.0
13
+ end
14
+ end
15
+
16
+ # returns a parallel array of Count objects. If split_hits then counts
17
+ # are split between groups sharing the hit. peptide_hits must respond
18
+ # to :charge and :aaseq. If a block is given, the weight of a
19
+ # particular hit can be given (typically this will be 1/#proteins
20
+ # sharing the hit
21
+ def self.counts(peptide_hits, &share_the_pephit)
22
+ uniq_aaseq = {}
23
+ uniq_aaseq_charge = {}
24
+ weights = peptide_hits.map do |hit|
25
+ weight = share_the_pephit ? share_the_pephit.call(hit) : 1
26
+ # these guys will end up clobbering themselves, but the
27
+ # linked_to_size should be consistent if the key is the same
28
+ uniq_aaseq_charge[[hit.aaseq, hit.charge]] = weight
29
+ uniq_aaseq[hit.aaseq] = weight
30
+ weight
31
+ end
32
+ counts_data = [weights, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
33
+ array.reduce(:+)
34
+ end
35
+ Counts.new(*counts_data)
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+
42
+
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'mspire/ident/peptide/db/creator'
4
+
5
+ Mspire::Ident::Peptide::Db::Creator.cmdline(ARGV)
@@ -4,16 +4,7 @@ require 'trollop'
4
4
  require 'set'
5
5
  require 'mspire/ident/peptide_hit/qvalue'
6
6
  require 'mspire/error_rate/qvalue'
7
-
8
- begin
9
- require 'mascot/dat'
10
- rescue LoadError
11
- puts "You need the mascot-dat gem for this to work!"
12
- puts "AND IT MUST BE THE PRINCELAB GITHUB FORK until changes get incorporated upstream!"
13
- puts "> gem install mascot-dat"
14
- raise LoadError
15
- end
16
- raise "need princelab mascot-dat gem!" unless Mascot::DAT::VERSION == "0.3.1.1"
7
+ require 'mspire/mascot/dat'
17
8
 
18
9
  # target-decoy bundle
19
10
  SearchBundle = Struct.new(:target, :decoy) do
@@ -28,36 +19,35 @@ end
28
19
 
29
20
  PSM = Struct.new(:search_id, :id, :aaseq, :charge, :score)
30
21
 
31
- # turns 1+ into 1
32
- def charge_string_to_charge(st)
33
- md = st.match(/(\d)([\+\-])/)
34
- i = md[1].to_i
35
- i *= -1 if (md[2] == '-')
36
- i
37
- end
38
-
39
- def read_mascot_dat_hits(dat_file)
22
+ def run_name_from_dat(dat_file)
40
23
  filename =nil
41
24
  IO.foreach(dat_file) do |line|
42
- if line =~ /^FILE=(.*?).mgf/i
25
+ if line =~ /^FILE=(.*)/i
43
26
  filename = $1.dup
27
+ filename.sub!(/^File Name: /,'')
28
+ filename.sub!(/.(mgf|raw|mzxml|mzml)$/i,'')
44
29
  break
45
30
  end
46
31
  end
47
- dat = Mascot::DAT.open(dat_file)
48
-
49
- data = [:peptides, :decoy_peptides].map do |mthd|
50
- psms = []
51
- dat.send(mthd).each do |psm|
52
- next unless psm.query
53
- query = dat.query(psm.query)
54
- charge = charge_string_to_charge(query.charge)
55
- psms << PSM.new(filename, query.title, psm.pep, charge, psm.score) if psm.score
32
+ filename
33
+ end
34
+
35
+ def read_mascot_dat_hits(dat_file)
36
+ filename = run_name_from_dat(dat_file)
37
+
38
+ reply = Mspire::Mascot::Dat.open(dat_file) do |dat|
39
+ # for some reason, I am getting diff results using the 'map' tagged onto the
40
+ # method. For now just going to collect old-fashioned.
41
+ cnt = 0
42
+ target_and_decoy = [true, false].map do |target_or_decoy|
43
+ dat.each_peptide(target_or_decoy, 1).map do |pephit|
44
+ cnt += 1
45
+ query = dat.query(pephit.query_num)
46
+ PSM.new(filename, query.title, pephit.seq, query.charge, pephit.ions_score)
47
+ end
56
48
  end
57
- psms
49
+ SearchBundle.new(*target_and_decoy)
58
50
  end
59
- dat.close
60
- SearchBundle.new(*data)
61
51
  end
62
52
 
63
53
 
@@ -66,16 +56,18 @@ def putsv(*args)
66
56
  $stdout.flush
67
57
  end
68
58
 
69
- EXT = Mspire::Ident::PeptideHit::Qvalue::FILE_EXTENSION
70
59
  combine_base = "combined"
71
60
 
61
+ EXT = Mspire::Ident::PeptideHit::Qvalue::FILE_EXTENSION
62
+
72
63
  opts = Trollop::Parser.new do
73
- #banner %Q{usage: #{File.basename(__FILE__)} <target>.xml <decoy>.xml ...
74
64
  banner %Q{usage: #{File.basename(__FILE__)} <mascot>.dat ...
75
- outputs: <mascot>.phq.tsv
76
- assumes a decoy search was run *with* the initial search
77
- phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
65
+ outputs: <mascot>#{EXT}
66
+
67
+ assumes a decoy search was run *with* the initial search
68
+ phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
78
69
  }
70
+ text ""
79
71
  opt :combine, "groups target and decoy hits together from all files, writing to #{combine_base}#{EXT}", :default => false
80
72
  opt :z_together, "do not group by charge state", :default => false
81
73
  opt :verbose, "be verbose", :default => false
@@ -100,19 +92,19 @@ to_run = {}
100
92
  if opt[:combine]
101
93
  putsv "combining all target hits together and all decoy hits together"
102
94
  bundle = SearchBundle.new.combine(bundles)
103
- to_run[combine_base + EXT] = bundle
95
+ to_run[combine_base] = bundle
104
96
  else
105
97
  files.zip(bundles) do |file, bundle|
106
- to_run[file.chomp(File.extname(file)) + EXT] = bundle
98
+ to_run[file.chomp(File.extname(file))] = bundle
107
99
  end
108
100
  end
109
101
 
110
- to_run.each do |file, bundle|
111
- putsv "calculating qvalues for #{file}"
112
- hit_qvalue_pairs = Mspire::ErrorRate::Qvalue.target_decoy_qvalues(bundle.target, bundle.decoy, :z_together => opt[:z_together])
113
- # {|hit| hit.search_scores[:ionscore] }
114
- outfile = Mspire::Ident::PeptideHit::Qvalue.to_file(file, *hit_qvalue_pairs.transpose)
102
+ to_run.each do |file_base, bundle|
103
+ putsv "calculating qvalues for #{file_base}"
104
+ hit_and_qvalue_pairs = Mspire::ErrorRate::Qvalue.target_decoy_qvalues(bundle.target, bundle.decoy, :z_together => opt[:z_together])
105
+
106
+ outfile = Mspire::Ident::PeptideHit::Qvalue.to_phq(file_base, *hit_and_qvalue_pairs.transpose)
107
+
115
108
  putsv "created: #{outfile}"
116
109
  end
117
110
 
118
-