RubyGems - mspire - Versions diffs - 0.8.5 → 0.8.6 - Mend

mspire 0.8.5 → 0.8.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

data/VERSION +1 -1
data/lib/hash/inverse.rb +15 -0
data/lib/mspire/error_rate/qvalue.rb +5 -5
data/lib/mspire/fasta.rb +2 -0
data/lib/mspire/ident/peptide/db/creator.rb +48 -58
data/lib/mspire/ident/peptide/db/io.rb +5 -0
data/lib/mspire/ident/peptide_hit/qvalue.rb +2 -2
data/lib/mspire/ident/peptide_hit.rb +2 -2
data/lib/mspire/ident/protein_group.rb +4 -2
data/lib/mspire/isotope/aa.rb +10 -10
data/lib/mspire/mzml/instrument_configuration.rb +10 -3
data/lib/mspire/quant/cmdline.rb +42 -0
data/lib/mspire/quant/protein_group_comparison.rb +29 -0
data/lib/mspire/quant/spectral_counts.rb +42 -0
data/script/fasta_to_peptide_centric_db.rb +5 -0
data/script/mascot_dat_to_peptide_hit_qvalues.rb +37 -45
data/script/mass_correct.rb +118 -0
data/script/minimal_protein_set.rb +345 -0
data/script/mzml_to_mgf.rb +46 -0
data/script/peptide_hit_qvalues_to_spectral_counts_table.rb +275 -0
data/spec/mspire/ident/peptide/db/creator_spec.rb +11 -0
data/spec/testfiles/mspire/ident/peptide/db/uni_11_sp_tr.msd_clvg2.min_aaseq4.yml +157 -157
metadata +11 -2

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.8.5
1	+ 0.8.6

data/lib/hash/inverse.rb ADDED Viewed

@@ -0,0 +1,15 @@
+# inverse from Tilo Sloboda (now in facets)
+class Hash
+  def inverse
+    i = Hash.new
+    self.each_pair do |k,v|
+      if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
+      else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
+    end ; i
+  end
+end

data/lib/mspire/error_rate/qvalue.rb CHANGED Viewed

@@ -22,8 +22,8 @@ module Mspire
         # Proc.new doesn't do arity checking
         hit_with_qvalue_pairs = Proc.new do |hits|
           sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
-          (target_hits, qvalues) = Mspire::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
-          target_hits.zip(qvalues)
+          (sorted_target_hits, qvalues) = Mspire::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
+          sorted_target_hits.zip(qvalues)
         end
         all_together = target_hits + decoy_hits
@@ -49,13 +49,13 @@ module Mspire
         opts = {:monotonic => true}.merge(opts)
         num_target = 0 ; num_decoy = 0
         monotonic = opts[:monotonic]
-        target_hits = []
+        sorted_target_hits = []
         qvalues = []
         best_to_worst.each do |hit|
           if target_setlike.include?(hit)
             num_target += 1
             precision = Mspire::ErrorRate::Decoy.precision(num_target, num_decoy)
-            target_hits << hit
+            sorted_target_hits << hit
             qvalues << (1.0 - precision)
           else
             num_decoy += 1
@@ -72,7 +72,7 @@ module Mspire
             end
           end.reverse
         end
-        [target_hits, qvalues]
+        [sorted_target_hits, qvalues]
       end

data/lib/mspire/fasta.rb CHANGED Viewed

@@ -61,6 +61,8 @@ module Mspire
     # takes the header string and returns the uniprot id
     #
     #     'sp|Q04917|1433F_HUMAN' #=> 'Q04917'
+    # This can also be found with BioFastaFormat#accession (but it may be much
+    # slower)
     def self.uniprot_id(header)
       header[/^[^\|]+\|([^\|]+)\|/, 1]
     end

data/lib/mspire/ident/peptide/db/creator.rb CHANGED Viewed

@@ -43,8 +43,11 @@ class Mspire::Ident::Peptide::Db::Creator
       op.on("--no-expand-x", "don't enumerate aa possibilities", "(removes these peptides)") { opt[:expand_aa] = false }
       op.on("--no-uniprot", "use entire protid section of fasta header", "for non-uniprot fasta files") { opt[:uniprot] = false }
       op.on("--trie", "use a trie (for very large uniprot files)", "must have fast_trie gem installed") {|v| opt[:trie] = v }
       op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] = Mspire::Insilico::Digester.const_get(v.upcase) }
+      op.on("-v", "--verbose", "talk about it") { $VERBOSE = 5 }
       op.on("--list-enzymes", "lists approved enzymes and exits") do
+      op.on("-v", "--verbose", "talk about it") { $VERBOSE = 5 }
         puts Mspire::Digester::ENZYMES.keys.join("\n")
         exit
       end
@@ -95,7 +98,7 @@ class Mspire::Ident::Peptide::Db::Creator
                 (pep =~ letters_to_expand_re) ? expand_peptides(pep, EXPAND_AA) : pep
               end
             else
-              peptides.map {|pep| pep =~ letters_to_expand_re }.compact
+              peptides.select {|pep| pep !~ letters_to_expand_re }
             end
           header = prot.header
           id = opts[:uniprot] ? Mspire::Fasta.uniprot_id(header) : header.split(/\s+/).first
@@ -118,15 +121,25 @@ class Mspire::Ident::Peptide::Db::Creator
     hash_like = hash_like_from_digestion_file(digestion_file, opts[:min_length], opts[:trie])
     base = digestion_file.chomp(File.extname(digestion_file))
-    final_outfile = base + ".min_aaseq#{opts[:min_length]}" + ".yml"
+    final_outfile =
+      if opts[:trie]
+        base + ".min_aaseq#{opts[:min_length]}"
+      else
+        base + ".min_aaseq#{opts[:min_length]}" + ".yml"
+      end
     start_time = Time.now
     print "Writing #{hash_like.size} peptides to #{} ..." if $VERBOSE
-    File.open(final_outfile, 'w') do |out|
-      hash_like.each do |k,v|
-        #out.puts( [k, v.join(Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER)].join(Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER) )
-        out.puts "#{k}#{Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER}#{v}"
+      if opts[:trie]
+        trie = hash_like
+        trie.save(final_outfile)
+      else
+        File.open(final_outfile, 'w') do |out|
+        hash_like.each do |k,v|
+          out.puts( [k, v.join(Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER)].join(Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER) )
+          #out.puts "#{k}#{Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER}#{v}"
+        end
       end
     end
     puts "#{Time.now - start_time} sec" if $VERBOSE
@@ -137,71 +150,47 @@ class Mspire::Ident::Peptide::Db::Creator
     File.expand_path(final_outfile)
   end
-  def hash_like_tree
-    require 'trie'
-    trie = Trie.new
-    def trie.[](key)
-      val = self.get(key)
-      if val.nil?
-        self.add(key,"")
-        self.get(key)
-      else
-        val
-      end
+  def get_a_trie
+    begin
+      require 'trie'
+    rescue
+      raise LoadError, "must first install fast_trie"
     end
-    trie
+    Trie.new
   end
   def hash_like_from_digestion_file(digestion_file, min_length, use_trie=false)
-    cnt = 0
     if use_trie
-      raise NotImplementedError
-      #puts "using trie" if $VERBOSE
-      #trie = hash_like_tree
-      #line_cnt = 0
-      #::IO.foreach(digestion_file) do |line|
-        #line_cnt += 1
-        ##puts "LINE COUND"
-        ##p line_cnt
-        #(prot, *peps) = line.chomp!.split(/\s+/)
-        ##p peps
-        ##p peps.class
-        ## prot is something like this: "P31946"
-        #puts line
-        #peps.each do |pep|
-          #if pep.size >= min_length
-            #to_set =
-              #if val = trie.get(pep)
-                #val +  Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER + prot
-              #else
-                #prot
-              #end
-            #p to_set.size
-            #trie.add(pep, to_set)
-          #end
-        #end
-        #cnt += 1
-        #puts cnt if (cnt % 1000) == 0
-      #end
-      #abort "HERE"
-      #trie
-    else
-      hash = {}
+      trie = get_a_trie
       ::IO.foreach(digestion_file) do |line|
-        (prot, *peps) = line.chomp!.split(/\s+/)
+        line.chomp!
+        (prot, *peps) = line.split(/\s+/)
         # prot is something like this: "P31946"
+        peps.uniq!
         peps.each do |pep|
           if pep.size >= min_length
-            if val = hash[pep]
-              val << Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER << prot
+            if trie.has_key?(pep)
+              ar = trie.get(pep)
+              ar << prot
             else
-              val = prot
+              trie.add( pep, [prot] )
             end
-            hash[pep] = val
           end
         end
-        cnt += 1
-        puts cnt if (cnt % 1000) == 0
+      end
+      trie
+    else
+      hash = Hash.new {|h,k| h[k] = [] }
+      ::IO.foreach(digestion_file) do |line|
+        line.chomp!
+        (prot, *peps) = line.split(/\s+/)
+        # prot is something like this: "P31946"
+        peps.uniq!
+        peps.each do |pep|
+          if pep.size >= min_length
+            hash[pep] << prot
+          end
+        end
       end
       hash
     end
@@ -215,6 +204,7 @@ class Mspire::Ident::Peptide::Db::Creator
   def create(fasta_file, opts={})
     opts = DEFAULT_PEPTIDE_CENTRIC_DB.merge(opts)
     digestion_file = create_digestion_file(fasta_file, opts)
+    puts "created file of size: #{File.size(digestion_file)}" if $VERBOSE
     db_from_fasta_digestion_file(digestion_file, opts)
   end

data/lib/mspire/ident/peptide/db/io.rb CHANGED Viewed

@@ -6,6 +6,7 @@ class Mspire::Ident::Peptide::Db::IO
   # behaves like a hash once it is opened.
   include Enumerable
   def self.open(filename, &block)
+    #p filename
     raise ArgumentError unless block
     File.open(filename) do |io|
       block.call(self.new(io))
@@ -39,6 +40,10 @@ class Mspire::Ident::Peptide::Db::IO
     string.split(Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER)
   end
+  def key?(key)
+    @index[key]
+  end
   # number of entries
   def size ; @index.size end
   alias_method :length, :size

data/lib/mspire/ident/peptide_hit/qvalue.rb CHANGED Viewed

@@ -12,7 +12,7 @@ class Mspire::Ident::PeptideHit
     class << self
-      # writes to the file, adding an extension
+      # writes to the file, adding an extension. returns the filename
       def to_phq(base, hits, qvalues=[])
         to_file(base + FILE_EXTENSION, hits, qvalues)
       end
@@ -20,7 +20,7 @@ class Mspire::Ident::PeptideHit
       # writes the peptide hits to a phq.tsv file. qvalues is a parallel array
       # to hits that can provide qvalues if not inherent to the hits
       # returns the filename.  Expects each hit to implement #search_id, #id,
-      # #aaseq and #charge
+      # #aaseq and #charge. returns the filename
       def to_file(filename, hits, qvalues=[])
         File.open(filename,'w') do |out|
           out.puts HEADER.join(FILE_DELIMITER)

data/lib/mspire/ident/peptide_hit.rb CHANGED Viewed

@@ -19,8 +19,8 @@ class Mspire::Ident::PeptideHit
   include Mspire::Ident::PeptideHitLike
   include Merge
-  def initialize(hash)
-    merge!(hash)
+  def initialize(hash=nil)
+    merge!(hash) if hash
   end
 end

data/lib/mspire/ident/protein_group.rb CHANGED Viewed

@@ -38,8 +38,10 @@ module Mspire
         # note to self: I wrote this in 2011, so I think I know what I'm doing now
         protein_to_peptides = Hash.new {|h,k| h[k] = Set.new }
         peptide_hits.each do |peptide_hit|
-          peptide_hit.proteins.each do |protein|
-            protein_to_peptides[protein] << peptide_hit
+          if prots = peptide_hit.proteins
+            prots.each do |protein|
+              protein_to_peptides[protein] << peptide_hit
+            end
           end
         end
         peptides_to_protein_group = Hash.new {|h,k| h[k] = [] }

data/lib/mspire/isotope/aa.rb CHANGED Viewed

@@ -8,27 +8,27 @@ module Mspire
       # and OH on the ends)
       aa_to_el_hash = {
         'A' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
-        'R' => { :c =>6, :h =>12 , :o =>1 , :n =>4 , :s =>0 , :p =>0, :se =>0 },
-        'N' => { :c =>4, :h =>6 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
-        'D' => { :c =>4, :h =>5 , :o =>3 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
         'C' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>1 , :p =>0, :se =>0 },
+        'D' => { :c =>4, :h =>5 , :o =>3 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
         'E' => { :c =>5, :h =>7 , :o =>3 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
-        'Q' => { :c =>5, :h =>8 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
+        'F' => { :c =>9, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
         'G' => { :c =>2, :h =>3 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
-        'H' => { :c =>6, :h =>7 , :o =>1 , :n =>3 , :s =>0 , :p =>0, :se =>0 },
         'I' => { :c =>6, :h =>11 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
-        'L' => { :c =>6, :h =>11 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
+        'H' => { :c =>6, :h =>7 , :o =>1 , :n =>3 , :s =>0 , :p =>0, :se =>0 },
         'K' => { :c =>6, :h =>12 , :o =>1 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
+        'L' => { :c =>6, :h =>11 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
         'M' => { :c =>5, :h =>9 , :o =>1 , :n =>1 , :s =>1 , :p =>0, :se =>0 },
-        'F' => { :c =>9, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
+        'N' => { :c =>4, :h =>6 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
+        'O' => { :c =>12, :h =>19 , :o =>2 , :n =>3 , :s =>0 , :p =>0, :se =>0 },
         'P' => { :c =>5, :h =>7 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
+        'Q' => { :c =>5, :h =>8 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
+        'R' => { :c =>6, :h =>12 , :o =>1 , :n =>4 , :s =>0 , :p =>0, :se =>0 },
         'S' => { :c =>3, :h =>5 , :o =>2 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
         'T' => { :c =>4, :h =>7 , :o =>2 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
+        'U' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>1 },
+        'V' => { :c =>5, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
         'W' => { :c =>11, :h =>10 , :o =>1 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
         'Y' => { :c =>9, :h =>9 , :o =>2 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
-        'V' => { :c =>5, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
-        'U' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>1 },
-        'O' => { :c =>12, :h =>19 , :o =>2 , :n =>3 , :s =>0 , :p =>0, :se =>0 }
       }
       #

data/lib/mspire/mzml/instrument_configuration.rb CHANGED Viewed

@@ -35,10 +35,17 @@ module Mspire
       def self.from_xml(xml, link)
         obj = self.new(xml[:id])
         next_n = obj.describe_from_xml!(xml, link[:ref_hash])
-        if next_n && next_n.name == 'componentList'
+        if next_n && (next_n.name == 'componentList')
           obj.components = next_n.children.map do |component_n|
-            Mspire::Mzml.const_get(component_n.name.capitalize).new.describe_self_from_xml!(component_n, link[:ref_hash])
-          end
+            if component_n.is_a?(Nokogiri::XML::Text)
+              # TODO: this is a fix for when there is an empty component list but
+              # Nokogiri returns a text node.  Really this needs to be fixed
+              # in our xml writer!
+              nil
+            else
+              Mspire::Mzml.const_get(component_n.name.capitalize).new.describe_self_from_xml!(component_n, link[:ref_hash])
+            end
+          end.compact
           next_n = next_n.next
         end
         if next_n && next_n.name == 'softwareRef'

data/lib/mspire/quant/cmdline.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require 'hash/inverse'
+module Mspire ; module Quant ; end ; end
+module Mspire::Quant::Cmdline
+  # expects arguments in one of two forms.  The first form is grouped by
+  # condition as shown:
+  #
+  #     condition1=file1,file2,file3... condition2=file4,file5...
+  #
+  # The second is where each file is its own condition (1 replicate):
+  #
+  #     file1 file2 file3
+  #
+  # Returns three ordered hashes (only ordered for ruby 1.9):
+  #
+  #     1) Condition to an array of samplenames
+  #     2) Samplename to the filename
+  #     3) Samplename to condition
+  def self.args_to_hashes(args, replicate_postfix="-rep")
+    # groupname => files
+    condition_to_samplenames = {}
+    samplename_to_filename = {}
+    args.each do |arg|
+      (condition, files) =
+        if arg.include?('=')
+          (condition, filestring) = arg.split('=')
+          [condition, filestring.split(',')]
+        else
+          [basename(arg), [arg]]
+        end
+      sample_to_file_pairs = files.each_with_index.map do |file,i|
+        rep_string = (files.size == 1) ? "" : "#{replicate_postfix}#{i+1}"
+        ["#{condition}#{rep_string}", file]
+      end
+      sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
+      condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
+    end
+    [samplename_to_filename, condition_to_samplenames, condition_to_samplenames.inverse]
+  end
+end

data/lib/mspire/quant/protein_group_comparison.rb ADDED Viewed

@@ -0,0 +1,29 @@
+module Mspire
+  module Quant
+  end
+end
+module Mspire::Quant::ProteinGroupComparison
+  # a protein group object
+  attr_accessor :protein_group
+  # an array of experiment names
+  attr_accessor :experiments
+  # parallel array to experiments with the measured values
+  attr_accessor :values
+  def initialize(protein_group, experiments, values)
+    (@protein_group, @experiment, @values) = protein_group, experiments, values
+  end
+end
+class Mspire::Quant::ProteinGroupComparison::SpectralCounts
+  include Mspire::Quant::ProteinGroupComparison
+end
+class Mspire::Quant::ProteinGroupComparison::UniqAAzCounts
+  include Mspire::Quant::ProteinGroupComparison
+end

data/lib/mspire/quant/spectral_counts.rb ADDED Viewed

@@ -0,0 +1,42 @@
+#require 'set'
+#require 'mspire/ident/protein_group'
+module Mspire
+  module Quant
+    module SpectralCounts
+      Counts = Struct.new(:spectral, :aaseqcharge, :aaseq)
+      class Counts
+        def initialize(*args)
+          super(*args)
+          # default is zero counts
+          self[0] ||= 0.0 ; self[1] ||= 0.0 ; self[2] ||= 0.0
+        end
+      end
+      # returns a parallel array of Count objects.  If split_hits then counts
+      # are split between groups sharing the hit.  peptide_hits must respond
+      # to :charge and :aaseq.  If a block is given, the weight of a
+      # particular hit can be given (typically this will be 1/#proteins
+      # sharing the hit
+      def self.counts(peptide_hits, &share_the_pephit)
+        uniq_aaseq = {}
+        uniq_aaseq_charge = {}
+        weights = peptide_hits.map do |hit|
+          weight = share_the_pephit ? share_the_pephit.call(hit) : 1
+          # these guys will end up clobbering themselves, but the
+          # linked_to_size should be consistent if the key is the same
+          uniq_aaseq_charge[[hit.aaseq, hit.charge]] = weight
+          uniq_aaseq[hit.aaseq] = weight
+          weight
+        end
+        counts_data = [weights, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
+          array.reduce(:+)
+        end
+        Counts.new(*counts_data)
+      end
+    end
+  end
+end

data/script/fasta_to_peptide_centric_db.rb ADDED Viewed

@@ -0,0 +1,5 @@
+#!/usr/bin/env ruby
+require 'mspire/ident/peptide/db/creator'
+Mspire::Ident::Peptide::Db::Creator.cmdline(ARGV)

data/script/mascot_dat_to_peptide_hit_qvalues.rb CHANGED Viewed

@@ -4,16 +4,7 @@ require 'trollop'
 require 'set'
 require 'mspire/ident/peptide_hit/qvalue'
 require 'mspire/error_rate/qvalue'
-begin
-  require 'mascot/dat'
-rescue LoadError
-  puts "You need the mascot-dat gem for this to work!"
-  puts "AND IT MUST BE THE PRINCELAB GITHUB FORK until changes get incorporated upstream!"
-  puts ">     gem install mascot-dat"
-  raise LoadError
-end
-raise "need princelab mascot-dat gem!" unless Mascot::DAT::VERSION == "0.3.1.1"
+require 'mspire/mascot/dat'
 # target-decoy bundle
 SearchBundle = Struct.new(:target, :decoy) do
@@ -28,36 +19,35 @@ end
 PSM = Struct.new(:search_id, :id, :aaseq, :charge, :score)
-# turns 1+ into 1
-def charge_string_to_charge(st)
-  md = st.match(/(\d)([\+\-])/)
-  i = md[1].to_i
-  i *= -1 if (md[2] == '-')
-  i
-end
-def read_mascot_dat_hits(dat_file)
+def run_name_from_dat(dat_file)
   filename =nil
   IO.foreach(dat_file) do |line|
-    if line =~ /^FILE=(.*?).mgf/i
+    if line =~ /^FILE=(.*)/i
       filename = $1.dup
+      filename.sub!(/^File Name: /,'')
+      filename.sub!(/.(mgf|raw|mzxml|mzml)$/i,'')
       break
     end
   end
-  dat = Mascot::DAT.open(dat_file)
-  data = [:peptides, :decoy_peptides].map do |mthd|
-    psms = []
-    dat.send(mthd).each do |psm|
-      next unless psm.query
-      query = dat.query(psm.query)
-      charge = charge_string_to_charge(query.charge)
-      psms << PSM.new(filename, query.title, psm.pep, charge, psm.score) if psm.score
+  filename
+end
+def read_mascot_dat_hits(dat_file)
+  filename = run_name_from_dat(dat_file)
+  reply = Mspire::Mascot::Dat.open(dat_file) do |dat|
+    # for some reason, I am getting diff results using the 'map' tagged onto the
+    # method. For now just going to collect old-fashioned.
+    cnt = 0
+    target_and_decoy = [true, false].map do |target_or_decoy|
+      dat.each_peptide(target_or_decoy, 1).map do |pephit|
+        cnt += 1
+        query = dat.query(pephit.query_num)
+        PSM.new(filename, query.title, pephit.seq, query.charge, pephit.ions_score)
+      end
     end
-    psms
+    SearchBundle.new(*target_and_decoy)
   end
-  dat.close
-  SearchBundle.new(*data)
 end
@@ -66,16 +56,18 @@ def putsv(*args)
   $stdout.flush
 end
-EXT = Mspire::Ident::PeptideHit::Qvalue::FILE_EXTENSION
 combine_base  = "combined"
+EXT = Mspire::Ident::PeptideHit::Qvalue::FILE_EXTENSION
 opts = Trollop::Parser.new do
-  #banner %Q{usage: #{File.basename(__FILE__)} <target>.xml <decoy>.xml ...
   banner %Q{usage: #{File.basename(__FILE__)} <mascot>.dat ...
-outputs: <mascot>.phq.tsv
-assumes a decoy search was run *with* the initial search
-phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
+outputs: <mascot>#{EXT}
+    assumes a decoy search was run *with* the initial search
+    phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
 }
+  text ""
   opt :combine, "groups target and decoy hits together from all files, writing to #{combine_base}#{EXT}", :default => false
   opt :z_together, "do not group by charge state", :default => false
   opt :verbose, "be verbose", :default => false
@@ -100,19 +92,19 @@ to_run = {}
 if opt[:combine]
   putsv "combining all target hits together and all decoy hits together"
   bundle = SearchBundle.new.combine(bundles)
-  to_run[combine_base + EXT] = bundle
+  to_run[combine_base] = bundle
 else
   files.zip(bundles) do |file, bundle|
-    to_run[file.chomp(File.extname(file)) + EXT] = bundle
+    to_run[file.chomp(File.extname(file))] = bundle
   end
 end
-to_run.each do |file, bundle|
-  putsv "calculating qvalues for #{file}"
-  hit_qvalue_pairs = Mspire::ErrorRate::Qvalue.target_decoy_qvalues(bundle.target, bundle.decoy, :z_together => opt[:z_together])
-  # {|hit| hit.search_scores[:ionscore] }
-  outfile = Mspire::Ident::PeptideHit::Qvalue.to_file(file, *hit_qvalue_pairs.transpose)
+to_run.each do |file_base, bundle|
+  putsv "calculating qvalues for #{file_base}"
+  hit_and_qvalue_pairs = Mspire::ErrorRate::Qvalue.target_decoy_qvalues(bundle.target, bundle.decoy, :z_together => opt[:z_together])
+  outfile = Mspire::Ident::PeptideHit::Qvalue.to_phq(file_base, *hit_and_qvalue_pairs.transpose)
   putsv "created: #{outfile}"
 end