mspire 0.8.5 → 0.8.6

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.5
1
+ 0.8.6
@@ -0,0 +1,15 @@
1
+
2
+
3
+ # inverse from Tilo Sloboda (now in facets)
4
+
5
+ class Hash
6
+ def inverse
7
+ i = Hash.new
8
+ self.each_pair do |k,v|
9
+ if (Array === v) ; v.each{ |x| i[x] = ( i.has_key?(x) ? [k,i[x]].flatten : k ) }
10
+ else ; i[v] = ( i.has_key?(v) ? [k,i[v]].flatten : k ) end
11
+ end ; i
12
+ end
13
+ end
14
+
15
+
@@ -22,8 +22,8 @@ module Mspire
22
22
  # Proc.new doesn't do arity checking
23
23
  hit_with_qvalue_pairs = Proc.new do |hits|
24
24
  sorted_best_to_worst = (hits.sort_by(&sorting)).reverse
25
- (target_hits, qvalues) = Mspire::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
26
- target_hits.zip(qvalues)
25
+ (sorted_target_hits, qvalues) = Mspire::ErrorRate::Qvalue.mixed_target_decoy(sorted_best_to_worst, target_set, opts)
26
+ sorted_target_hits.zip(qvalues)
27
27
  end
28
28
 
29
29
  all_together = target_hits + decoy_hits
@@ -49,13 +49,13 @@ module Mspire
49
49
  opts = {:monotonic => true}.merge(opts)
50
50
  num_target = 0 ; num_decoy = 0
51
51
  monotonic = opts[:monotonic]
52
- target_hits = []
52
+ sorted_target_hits = []
53
53
  qvalues = []
54
54
  best_to_worst.each do |hit|
55
55
  if target_setlike.include?(hit)
56
56
  num_target += 1
57
57
  precision = Mspire::ErrorRate::Decoy.precision(num_target, num_decoy)
58
- target_hits << hit
58
+ sorted_target_hits << hit
59
59
  qvalues << (1.0 - precision)
60
60
  else
61
61
  num_decoy += 1
@@ -72,7 +72,7 @@ module Mspire
72
72
  end
73
73
  end.reverse
74
74
  end
75
- [target_hits, qvalues]
75
+ [sorted_target_hits, qvalues]
76
76
  end
77
77
 
78
78
 
data/lib/mspire/fasta.rb CHANGED
@@ -61,6 +61,8 @@ module Mspire
61
61
  # takes the header string and returns the uniprot id
62
62
  #
63
63
  # 'sp|Q04917|1433F_HUMAN' #=> 'Q04917'
64
+ # This can also be found with BioFastaFormat#accession (but it may be much
65
+ # slower)
64
66
  def self.uniprot_id(header)
65
67
  header[/^[^\|]+\|([^\|]+)\|/, 1]
66
68
  end
@@ -43,8 +43,11 @@ class Mspire::Ident::Peptide::Db::Creator
43
43
  op.on("--no-expand-x", "don't enumerate aa possibilities", "(removes these peptides)") { opt[:expand_aa] = false }
44
44
  op.on("--no-uniprot", "use entire protid section of fasta header", "for non-uniprot fasta files") { opt[:uniprot] = false }
45
45
  op.on("--trie", "use a trie (for very large uniprot files)", "must have fast_trie gem installed") {|v| opt[:trie] = v }
46
+
46
47
  op.on("-e", "--enzyme <name>", "enzyme for digestion") {|v| opt[:enzyme] = Mspire::Insilico::Digester.const_get(v.upcase) }
48
+ op.on("-v", "--verbose", "talk about it") { $VERBOSE = 5 }
47
49
  op.on("--list-enzymes", "lists approved enzymes and exits") do
50
+ op.on("-v", "--verbose", "talk about it") { $VERBOSE = 5 }
48
51
  puts Mspire::Digester::ENZYMES.keys.join("\n")
49
52
  exit
50
53
  end
@@ -95,7 +98,7 @@ class Mspire::Ident::Peptide::Db::Creator
95
98
  (pep =~ letters_to_expand_re) ? expand_peptides(pep, EXPAND_AA) : pep
96
99
  end
97
100
  else
98
- peptides.map {|pep| pep =~ letters_to_expand_re }.compact
101
+ peptides.select {|pep| pep !~ letters_to_expand_re }
99
102
  end
100
103
  header = prot.header
101
104
  id = opts[:uniprot] ? Mspire::Fasta.uniprot_id(header) : header.split(/\s+/).first
@@ -118,15 +121,25 @@ class Mspire::Ident::Peptide::Db::Creator
118
121
  hash_like = hash_like_from_digestion_file(digestion_file, opts[:min_length], opts[:trie])
119
122
 
120
123
  base = digestion_file.chomp(File.extname(digestion_file))
121
- final_outfile = base + ".min_aaseq#{opts[:min_length]}" + ".yml"
124
+ final_outfile =
125
+ if opts[:trie]
126
+ base + ".min_aaseq#{opts[:min_length]}"
127
+ else
128
+ base + ".min_aaseq#{opts[:min_length]}" + ".yml"
129
+ end
122
130
 
123
131
  start_time = Time.now
124
132
  print "Writing #{hash_like.size} peptides to #{} ..." if $VERBOSE
125
133
 
126
- File.open(final_outfile, 'w') do |out|
127
- hash_like.each do |k,v|
128
- #out.puts( [k, v.join(Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER)].join(Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER) )
129
- out.puts "#{k}#{Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER}#{v}"
134
+ if opts[:trie]
135
+ trie = hash_like
136
+ trie.save(final_outfile)
137
+ else
138
+ File.open(final_outfile, 'w') do |out|
139
+ hash_like.each do |k,v|
140
+ out.puts( [k, v.join(Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER)].join(Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER) )
141
+ #out.puts "#{k}#{Mspire::Ident::Peptide::Db::KEY_VALUE_DELIMITER}#{v}"
142
+ end
130
143
  end
131
144
  end
132
145
  puts "#{Time.now - start_time} sec" if $VERBOSE
@@ -137,71 +150,47 @@ class Mspire::Ident::Peptide::Db::Creator
137
150
  File.expand_path(final_outfile)
138
151
  end
139
152
 
140
- def hash_like_tree
141
- require 'trie'
142
- trie = Trie.new
143
- def trie.[](key)
144
- val = self.get(key)
145
- if val.nil?
146
- self.add(key,"")
147
- self.get(key)
148
- else
149
- val
150
- end
153
+ def get_a_trie
154
+ begin
155
+ require 'trie'
156
+ rescue
157
+ raise LoadError, "must first install fast_trie"
151
158
  end
152
- trie
159
+ Trie.new
153
160
  end
154
161
 
155
162
  def hash_like_from_digestion_file(digestion_file, min_length, use_trie=false)
156
- cnt = 0
157
163
  if use_trie
158
- raise NotImplementedError
159
- #puts "using trie" if $VERBOSE
160
- #trie = hash_like_tree
161
- #line_cnt = 0
162
- #::IO.foreach(digestion_file) do |line|
163
- #line_cnt += 1
164
- ##puts "LINE COUND"
165
- ##p line_cnt
166
- #(prot, *peps) = line.chomp!.split(/\s+/)
167
- ##p peps
168
- ##p peps.class
169
- ## prot is something like this: "P31946"
170
- #puts line
171
- #peps.each do |pep|
172
- #if pep.size >= min_length
173
- #to_set =
174
- #if val = trie.get(pep)
175
- #val + Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER + prot
176
- #else
177
- #prot
178
- #end
179
- #p to_set.size
180
- #trie.add(pep, to_set)
181
- #end
182
- #end
183
- #cnt += 1
184
- #puts cnt if (cnt % 1000) == 0
185
- #end
186
- #abort "HERE"
187
- #trie
188
- else
189
- hash = {}
164
+ trie = get_a_trie
190
165
  ::IO.foreach(digestion_file) do |line|
191
- (prot, *peps) = line.chomp!.split(/\s+/)
166
+ line.chomp!
167
+ (prot, *peps) = line.split(/\s+/)
192
168
  # prot is something like this: "P31946"
169
+ peps.uniq!
193
170
  peps.each do |pep|
194
171
  if pep.size >= min_length
195
- if val = hash[pep]
196
- val << Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER << prot
172
+ if trie.has_key?(pep)
173
+ ar = trie.get(pep)
174
+ ar << prot
197
175
  else
198
- val = prot
176
+ trie.add( pep, [prot] )
199
177
  end
200
- hash[pep] = val
201
178
  end
202
179
  end
203
- cnt += 1
204
- puts cnt if (cnt % 1000) == 0
180
+ end
181
+ trie
182
+ else
183
+ hash = Hash.new {|h,k| h[k] = [] }
184
+ ::IO.foreach(digestion_file) do |line|
185
+ line.chomp!
186
+ (prot, *peps) = line.split(/\s+/)
187
+ # prot is something like this: "P31946"
188
+ peps.uniq!
189
+ peps.each do |pep|
190
+ if pep.size >= min_length
191
+ hash[pep] << prot
192
+ end
193
+ end
205
194
  end
206
195
  hash
207
196
  end
@@ -215,6 +204,7 @@ class Mspire::Ident::Peptide::Db::Creator
215
204
  def create(fasta_file, opts={})
216
205
  opts = DEFAULT_PEPTIDE_CENTRIC_DB.merge(opts)
217
206
  digestion_file = create_digestion_file(fasta_file, opts)
207
+ puts "created file of size: #{File.size(digestion_file)}" if $VERBOSE
218
208
  db_from_fasta_digestion_file(digestion_file, opts)
219
209
  end
220
210
 
@@ -6,6 +6,7 @@ class Mspire::Ident::Peptide::Db::IO
6
6
  # behaves like a hash once it is opened.
7
7
  include Enumerable
8
8
  def self.open(filename, &block)
9
+ #p filename
9
10
  raise ArgumentError unless block
10
11
  File.open(filename) do |io|
11
12
  block.call(self.new(io))
@@ -39,6 +40,10 @@ class Mspire::Ident::Peptide::Db::IO
39
40
  string.split(Mspire::Ident::Peptide::Db::PROTEIN_DELIMITER)
40
41
  end
41
42
 
43
+ def key?(key)
44
+ @index[key]
45
+ end
46
+
42
47
  # number of entries
43
48
  def size ; @index.size end
44
49
  alias_method :length, :size
@@ -12,7 +12,7 @@ class Mspire::Ident::PeptideHit
12
12
 
13
13
  class << self
14
14
 
15
- # writes to the file, adding an extension
15
+ # writes to the file, adding an extension. returns the filename
16
16
  def to_phq(base, hits, qvalues=[])
17
17
  to_file(base + FILE_EXTENSION, hits, qvalues)
18
18
  end
@@ -20,7 +20,7 @@ class Mspire::Ident::PeptideHit
20
20
  # writes the peptide hits to a phq.tsv file. qvalues is a parallel array
21
21
  # to hits that can provide qvalues if not inherent to the hits
22
22
  # returns the filename. Expects each hit to implement #search_id, #id,
23
- # #aaseq and #charge
23
+ # #aaseq and #charge. returns the filename
24
24
  def to_file(filename, hits, qvalues=[])
25
25
  File.open(filename,'w') do |out|
26
26
  out.puts HEADER.join(FILE_DELIMITER)
@@ -19,8 +19,8 @@ class Mspire::Ident::PeptideHit
19
19
  include Mspire::Ident::PeptideHitLike
20
20
  include Merge
21
21
 
22
- def initialize(hash)
23
- merge!(hash)
22
+ def initialize(hash=nil)
23
+ merge!(hash) if hash
24
24
  end
25
25
  end
26
26
 
@@ -38,8 +38,10 @@ module Mspire
38
38
  # note to self: I wrote this in 2011, so I think I know what I'm doing now
39
39
  protein_to_peptides = Hash.new {|h,k| h[k] = Set.new }
40
40
  peptide_hits.each do |peptide_hit|
41
- peptide_hit.proteins.each do |protein|
42
- protein_to_peptides[protein] << peptide_hit
41
+ if prots = peptide_hit.proteins
42
+ prots.each do |protein|
43
+ protein_to_peptides[protein] << peptide_hit
44
+ end
43
45
  end
44
46
  end
45
47
  peptides_to_protein_group = Hash.new {|h,k| h[k] = [] }
@@ -8,27 +8,27 @@ module Mspire
8
8
  # and OH on the ends)
9
9
  aa_to_el_hash = {
10
10
  'A' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
11
- 'R' => { :c =>6, :h =>12 , :o =>1 , :n =>4 , :s =>0 , :p =>0, :se =>0 },
12
- 'N' => { :c =>4, :h =>6 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
13
- 'D' => { :c =>4, :h =>5 , :o =>3 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
14
11
  'C' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>1 , :p =>0, :se =>0 },
12
+ 'D' => { :c =>4, :h =>5 , :o =>3 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
15
13
  'E' => { :c =>5, :h =>7 , :o =>3 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
16
- 'Q' => { :c =>5, :h =>8 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
14
+ 'F' => { :c =>9, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
17
15
  'G' => { :c =>2, :h =>3 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
18
- 'H' => { :c =>6, :h =>7 , :o =>1 , :n =>3 , :s =>0 , :p =>0, :se =>0 },
19
16
  'I' => { :c =>6, :h =>11 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
20
- 'L' => { :c =>6, :h =>11 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
17
+ 'H' => { :c =>6, :h =>7 , :o =>1 , :n =>3 , :s =>0 , :p =>0, :se =>0 },
21
18
  'K' => { :c =>6, :h =>12 , :o =>1 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
19
+ 'L' => { :c =>6, :h =>11 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
22
20
  'M' => { :c =>5, :h =>9 , :o =>1 , :n =>1 , :s =>1 , :p =>0, :se =>0 },
23
- 'F' => { :c =>9, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
21
+ 'N' => { :c =>4, :h =>6 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
22
+ 'O' => { :c =>12, :h =>19 , :o =>2 , :n =>3 , :s =>0 , :p =>0, :se =>0 },
24
23
  'P' => { :c =>5, :h =>7 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
24
+ 'Q' => { :c =>5, :h =>8 , :o =>2 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
25
+ 'R' => { :c =>6, :h =>12 , :o =>1 , :n =>4 , :s =>0 , :p =>0, :se =>0 },
25
26
  'S' => { :c =>3, :h =>5 , :o =>2 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
26
27
  'T' => { :c =>4, :h =>7 , :o =>2 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
28
+ 'U' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>1 },
29
+ 'V' => { :c =>5, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
27
30
  'W' => { :c =>11, :h =>10 , :o =>1 , :n =>2 , :s =>0 , :p =>0, :se =>0 },
28
31
  'Y' => { :c =>9, :h =>9 , :o =>2 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
29
- 'V' => { :c =>5, :h =>9 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>0 },
30
- 'U' => { :c =>3, :h =>5 , :o =>1 , :n =>1 , :s =>0 , :p =>0, :se =>1 },
31
- 'O' => { :c =>12, :h =>19 , :o =>2 , :n =>3 , :s =>0 , :p =>0, :se =>0 }
32
32
  }
33
33
 
34
34
  #
@@ -35,10 +35,17 @@ module Mspire
35
35
  def self.from_xml(xml, link)
36
36
  obj = self.new(xml[:id])
37
37
  next_n = obj.describe_from_xml!(xml, link[:ref_hash])
38
- if next_n && next_n.name == 'componentList'
38
+ if next_n && (next_n.name == 'componentList')
39
39
  obj.components = next_n.children.map do |component_n|
40
- Mspire::Mzml.const_get(component_n.name.capitalize).new.describe_self_from_xml!(component_n, link[:ref_hash])
41
- end
40
+ if component_n.is_a?(Nokogiri::XML::Text)
41
+ # TODO: this is a fix for when there is an empty component list but
42
+ # Nokogiri returns a text node. Really this needs to be fixed
43
+ # in our xml writer!
44
+ nil
45
+ else
46
+ Mspire::Mzml.const_get(component_n.name.capitalize).new.describe_self_from_xml!(component_n, link[:ref_hash])
47
+ end
48
+ end.compact
42
49
  next_n = next_n.next
43
50
  end
44
51
  if next_n && next_n.name == 'softwareRef'
@@ -0,0 +1,42 @@
1
+ require 'hash/inverse'
2
+
3
+ module Mspire ; module Quant ; end ; end
4
+
5
+ module Mspire::Quant::Cmdline
6
+
7
+ # expects arguments in one of two forms. The first form is grouped by
8
+ # condition as shown:
9
+ #
10
+ # condition1=file1,file2,file3... condition2=file4,file5...
11
+ #
12
+ # The second is where each file is its own condition (1 replicate):
13
+ #
14
+ # file1 file2 file3
15
+ #
16
+ # Returns three ordered hashes (only ordered for ruby 1.9):
17
+ #
18
+ # 1) Condition to an array of samplenames
19
+ # 2) Samplename to the filename
20
+ # 3) Samplename to condition
21
+ def self.args_to_hashes(args, replicate_postfix="-rep")
22
+ # groupname => files
23
+ condition_to_samplenames = {}
24
+ samplename_to_filename = {}
25
+ args.each do |arg|
26
+ (condition, files) =
27
+ if arg.include?('=')
28
+ (condition, filestring) = arg.split('=')
29
+ [condition, filestring.split(',')]
30
+ else
31
+ [basename(arg), [arg]]
32
+ end
33
+ sample_to_file_pairs = files.each_with_index.map do |file,i|
34
+ rep_string = (files.size == 1) ? "" : "#{replicate_postfix}#{i+1}"
35
+ ["#{condition}#{rep_string}", file]
36
+ end
37
+ sample_to_file_pairs.each {|name,file| samplename_to_filename[name] = file }
38
+ condition_to_samplenames[condition] = sample_to_file_pairs.map(&:first)
39
+ end
40
+ [samplename_to_filename, condition_to_samplenames, condition_to_samplenames.inverse]
41
+ end
42
+ end
@@ -0,0 +1,29 @@
1
+
2
+ module Mspire
3
+ module Quant
4
+ end
5
+ end
6
+
7
+ module Mspire::Quant::ProteinGroupComparison
8
+
9
+ # a protein group object
10
+ attr_accessor :protein_group
11
+
12
+ # an array of experiment names
13
+ attr_accessor :experiments
14
+
15
+ # parallel array to experiments with the measured values
16
+ attr_accessor :values
17
+
18
+ def initialize(protein_group, experiments, values)
19
+ (@protein_group, @experiment, @values) = protein_group, experiments, values
20
+ end
21
+ end
22
+
23
+ class Mspire::Quant::ProteinGroupComparison::SpectralCounts
24
+ include Mspire::Quant::ProteinGroupComparison
25
+ end
26
+
27
+ class Mspire::Quant::ProteinGroupComparison::UniqAAzCounts
28
+ include Mspire::Quant::ProteinGroupComparison
29
+ end
@@ -0,0 +1,42 @@
1
+ #require 'set'
2
+ #require 'mspire/ident/protein_group'
3
+
4
+ module Mspire
5
+ module Quant
6
+ module SpectralCounts
7
+ Counts = Struct.new(:spectral, :aaseqcharge, :aaseq)
8
+ class Counts
9
+ def initialize(*args)
10
+ super(*args)
11
+ # default is zero counts
12
+ self[0] ||= 0.0 ; self[1] ||= 0.0 ; self[2] ||= 0.0
13
+ end
14
+ end
15
+
16
+ # returns a parallel array of Count objects. If split_hits then counts
17
+ # are split between groups sharing the hit. peptide_hits must respond
18
+ # to :charge and :aaseq. If a block is given, the weight of a
19
+ # particular hit can be given (typically this will be 1/#proteins
20
+ # sharing the hit
21
+ def self.counts(peptide_hits, &share_the_pephit)
22
+ uniq_aaseq = {}
23
+ uniq_aaseq_charge = {}
24
+ weights = peptide_hits.map do |hit|
25
+ weight = share_the_pephit ? share_the_pephit.call(hit) : 1
26
+ # these guys will end up clobbering themselves, but the
27
+ # linked_to_size should be consistent if the key is the same
28
+ uniq_aaseq_charge[[hit.aaseq, hit.charge]] = weight
29
+ uniq_aaseq[hit.aaseq] = weight
30
+ weight
31
+ end
32
+ counts_data = [weights, uniq_aaseq_charge.values, uniq_aaseq.values].map do |array|
33
+ array.reduce(:+)
34
+ end
35
+ Counts.new(*counts_data)
36
+ end
37
+ end
38
+ end
39
+ end
40
+
41
+
42
+
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'mspire/ident/peptide/db/creator'
4
+
5
+ Mspire::Ident::Peptide::Db::Creator.cmdline(ARGV)
@@ -4,16 +4,7 @@ require 'trollop'
4
4
  require 'set'
5
5
  require 'mspire/ident/peptide_hit/qvalue'
6
6
  require 'mspire/error_rate/qvalue'
7
-
8
- begin
9
- require 'mascot/dat'
10
- rescue LoadError
11
- puts "You need the mascot-dat gem for this to work!"
12
- puts "AND IT MUST BE THE PRINCELAB GITHUB FORK until changes get incorporated upstream!"
13
- puts "> gem install mascot-dat"
14
- raise LoadError
15
- end
16
- raise "need princelab mascot-dat gem!" unless Mascot::DAT::VERSION == "0.3.1.1"
7
+ require 'mspire/mascot/dat'
17
8
 
18
9
  # target-decoy bundle
19
10
  SearchBundle = Struct.new(:target, :decoy) do
@@ -28,36 +19,35 @@ end
28
19
 
29
20
  PSM = Struct.new(:search_id, :id, :aaseq, :charge, :score)
30
21
 
31
- # turns 1+ into 1
32
- def charge_string_to_charge(st)
33
- md = st.match(/(\d)([\+\-])/)
34
- i = md[1].to_i
35
- i *= -1 if (md[2] == '-')
36
- i
37
- end
38
-
39
- def read_mascot_dat_hits(dat_file)
22
+ def run_name_from_dat(dat_file)
40
23
  filename =nil
41
24
  IO.foreach(dat_file) do |line|
42
- if line =~ /^FILE=(.*?).mgf/i
25
+ if line =~ /^FILE=(.*)/i
43
26
  filename = $1.dup
27
+ filename.sub!(/^File Name: /,'')
28
+ filename.sub!(/.(mgf|raw|mzxml|mzml)$/i,'')
44
29
  break
45
30
  end
46
31
  end
47
- dat = Mascot::DAT.open(dat_file)
48
-
49
- data = [:peptides, :decoy_peptides].map do |mthd|
50
- psms = []
51
- dat.send(mthd).each do |psm|
52
- next unless psm.query
53
- query = dat.query(psm.query)
54
- charge = charge_string_to_charge(query.charge)
55
- psms << PSM.new(filename, query.title, psm.pep, charge, psm.score) if psm.score
32
+ filename
33
+ end
34
+
35
+ def read_mascot_dat_hits(dat_file)
36
+ filename = run_name_from_dat(dat_file)
37
+
38
+ reply = Mspire::Mascot::Dat.open(dat_file) do |dat|
39
+ # for some reason, I am getting diff results using the 'map' tagged onto the
40
+ # method. For now just going to collect old-fashioned.
41
+ cnt = 0
42
+ target_and_decoy = [true, false].map do |target_or_decoy|
43
+ dat.each_peptide(target_or_decoy, 1).map do |pephit|
44
+ cnt += 1
45
+ query = dat.query(pephit.query_num)
46
+ PSM.new(filename, query.title, pephit.seq, query.charge, pephit.ions_score)
47
+ end
56
48
  end
57
- psms
49
+ SearchBundle.new(*target_and_decoy)
58
50
  end
59
- dat.close
60
- SearchBundle.new(*data)
61
51
  end
62
52
 
63
53
 
@@ -66,16 +56,18 @@ def putsv(*args)
66
56
  $stdout.flush
67
57
  end
68
58
 
69
- EXT = Mspire::Ident::PeptideHit::Qvalue::FILE_EXTENSION
70
59
  combine_base = "combined"
71
60
 
61
+ EXT = Mspire::Ident::PeptideHit::Qvalue::FILE_EXTENSION
62
+
72
63
  opts = Trollop::Parser.new do
73
- #banner %Q{usage: #{File.basename(__FILE__)} <target>.xml <decoy>.xml ...
74
64
  banner %Q{usage: #{File.basename(__FILE__)} <mascot>.dat ...
75
- outputs: <mascot>.phq.tsv
76
- assumes a decoy search was run *with* the initial search
77
- phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
65
+ outputs: <mascot>#{EXT}
66
+
67
+ assumes a decoy search was run *with* the initial search
68
+ phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
78
69
  }
70
+ text ""
79
71
  opt :combine, "groups target and decoy hits together from all files, writing to #{combine_base}#{EXT}", :default => false
80
72
  opt :z_together, "do not group by charge state", :default => false
81
73
  opt :verbose, "be verbose", :default => false
@@ -100,19 +92,19 @@ to_run = {}
100
92
  if opt[:combine]
101
93
  putsv "combining all target hits together and all decoy hits together"
102
94
  bundle = SearchBundle.new.combine(bundles)
103
- to_run[combine_base + EXT] = bundle
95
+ to_run[combine_base] = bundle
104
96
  else
105
97
  files.zip(bundles) do |file, bundle|
106
- to_run[file.chomp(File.extname(file)) + EXT] = bundle
98
+ to_run[file.chomp(File.extname(file))] = bundle
107
99
  end
108
100
  end
109
101
 
110
- to_run.each do |file, bundle|
111
- putsv "calculating qvalues for #{file}"
112
- hit_qvalue_pairs = Mspire::ErrorRate::Qvalue.target_decoy_qvalues(bundle.target, bundle.decoy, :z_together => opt[:z_together])
113
- # {|hit| hit.search_scores[:ionscore] }
114
- outfile = Mspire::Ident::PeptideHit::Qvalue.to_file(file, *hit_qvalue_pairs.transpose)
102
+ to_run.each do |file_base, bundle|
103
+ putsv "calculating qvalues for #{file_base}"
104
+ hit_and_qvalue_pairs = Mspire::ErrorRate::Qvalue.target_decoy_qvalues(bundle.target, bundle.decoy, :z_together => opt[:z_together])
105
+
106
+ outfile = Mspire::Ident::PeptideHit::Qvalue.to_phq(file_base, *hit_and_qvalue_pairs.transpose)
107
+
115
108
  putsv "created: #{outfile}"
116
109
  end
117
110
 
118
-