ms-error_rate 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.10
1
+ 0.0.11
@@ -4,15 +4,20 @@ require 'trollop'
4
4
  require 'nokogiri'
5
5
  require 'set'
6
6
 
7
+ require 'ms/ident/pepxml'
7
8
  require 'ms/ident/peptide_hit/qvalue'
8
- require 'ms/error_rate/qvalue/pepxml'
9
+
10
+ EXT = Ms::Ident::PeptideHit::Qvalue::FILE_EXTENSION
11
+ combine_base = "combined"
9
12
 
10
13
  opts = Trollop::Parser.new do
11
- banner %Q{usage: #{File.basename(__FILE__)} <fwd>.xml <decoy>.xml ...
12
- outputs: <fwd>.phq.csv
14
+ banner %Q{usage: #{File.basename(__FILE__)} <target>.xml <decoy>.xml ...
15
+ outputs: <fwd>.phq.tsv
13
16
  phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
14
17
  }
18
+ opt :combine, "groups target and decoy hits together from all files, writing to #{combine_base}#{EXT}", :default => false
15
19
  opt :z_together, "do not group by charge state", :default => false
20
+ opt :decoy_first, "decoy files are before target files", :default => false
16
21
  opt :verbose, "be verbose", :default => false
17
22
  end
18
23
 
@@ -22,16 +27,37 @@ if ARGV.size == 0 || (ARGV.size%2 != 0)
22
27
  opts.educate
23
28
  exit
24
29
  end
30
+
25
31
  $VERBOSE = opt.delete(:verbose)
26
32
 
27
33
  files = ARGV.to_a
28
- files.each_slice(2).map do |target, decoy|
29
- hit_qvalue_pairs = Ms::ErrorRate::Qvalue::Pepxml.target_decoy_qvalues(target, decoy, opt, &:ionscore)
34
+
35
+ files.each_slice(2).map!(&:reverse) if opt[:decoy_first]
36
+
37
+ groups_of_search_hits = files.map do |file|
38
+ Ms::Ident::Pepxml.search_hits(file)
39
+ end
40
+
41
+ to_run = {}
42
+ if opt[:combine]
43
+ all_target = [] ; all_decoy = []
44
+ groups_of_search_hits.each_slice(2) do |target_hits,decoy_hits|
45
+ all_target.push(*target_hits) ; all_decoy.push(*decoy_hits)
46
+ end
47
+ to_run[combine_base] = [all_target, all_decoy]
48
+ else
49
+ groups_of_search_hits.each_slice(2).zip(files) do |target_hits_and_decoy_hits, file|
50
+ to_run[file.chomp(File.extname(file)) + EXT] = target_hits_and_decoy_hits
51
+ end
52
+ end
53
+
54
+ to_run.each do |file, target_decoy_pair|
55
+ hit_qvalue_pairs = Ms::ErrorRate::Qvalue.target_decoy_qvalues(target_decoy_pair.first, target_decoy_pair.last, :z_together => opt[:z_together])
30
56
  hits = [] ; qvals = []
31
57
  hit_qvalue_pairs.each do |hit, qval|
32
58
  hits << hit ; qvals << qval
33
59
  end
34
- outfile = Ms::Ident::PeptideHit::Qvalue.to_phq(target.chomp(File.extname(target)), hits, qvals)
60
+ outfile = Ms::Ident::PeptideHit::Qvalue.to_file(file, hits, qvals)
35
61
  puts "created: #{outfile}" if $VERBOSE
36
62
  end
37
63
 
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 10
9
- version: 0.0.10
8
+ - 11
9
+ version: 0.0.11
10
10
  platform: ruby
11
11
  authors:
12
12
  - John Prince
@@ -127,7 +127,6 @@ files:
127
127
  - lib/ms/error_rate/qvalue.rb
128
128
  - lib/ms/error_rate/qvalue/mascot.rb
129
129
  - lib/ms/error_rate/qvalue/mascot/percolator.rb
130
- - lib/ms/error_rate/qvalue/pepxml.rb
131
130
  - lib/ms/error_rate/sbv.rb
132
131
  - lib/ms/error_rate/sbv/peptide_based.rb
133
132
  - lib/ms/error_rate/sbv/protein_based.rb
@@ -1,52 +0,0 @@
1
- require 'ms/error_rate/qvalue'
2
-
3
- module Ms ; end
4
- module Ms::ErrorRate ; end
5
- module Ms::ErrorRate::Qvalue ; end
6
-
7
- module Ms::ErrorRate::Qvalue::Pepxml
8
- module_function
9
-
10
- # returns an array of hit and qvalue pairs
11
- # retrieves the aaseq, charge, and all search_score keys and values for use
12
- # in the search_hit. caller must provide a sort_by block, where the best
13
- # hits are last. charge is an integer, and all other search scores are cast
14
- # as floats. returns the output filename.
15
- def target_decoy_qvalues(target_pepxml, decoy_pepxml, opt={}, &sort_by)
16
-
17
- # this is a list of high quality peptide hits associated with each group
18
- fields = [:aaseq, :charge]
19
- ss_names = []
20
- have_ss_names = false
21
- (target_hits, decoy_hits) = [target_pepxml, decoy_pepxml].map do |file|
22
- # begin with aaseq, charge
23
- File.open(file) do |io|
24
- doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
25
- # we can work with namespaces, or just remove them ...
26
- doc.remove_namespaces!
27
- root = doc.root
28
- search_hits = root.xpath('//search_hit')
29
- search_hits.map do |search_hit|
30
- aaseq = search_hit['peptide']
31
- charge = search_hit.parent.parent['assumed_charge'].to_i
32
- search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' }
33
- ss_values = []
34
- search_score_nodes.each do |node|
35
- ss_names << node['name'].to_sym unless have_ss_names
36
- ss_values << node['value'].to_f
37
- end
38
- have_ss_names = true
39
- [aaseq, charge, *ss_values]
40
- end
41
- end
42
- end
43
-
44
- fields.push(*ss_names)
45
-
46
- peptide_hit_class = Struct.new(*fields)
47
- (t_hits, d_hits) = [target_hits, decoy_hits].map {|hits| hits.map {|hit_values| peptide_hit_class.new(*hit_values) } }
48
-
49
- # hit and qvalue pairs
50
- Ms::ErrorRate::Qvalue.target_decoy_qvalues(t_hits, d_hits, :z_together => opt[:z_together], &sort_by)
51
- end
52
- end