ms-error_rate 0.0.10 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.10
1
+ 0.0.11
@@ -4,15 +4,20 @@ require 'trollop'
4
4
  require 'nokogiri'
5
5
  require 'set'
6
6
 
7
+ require 'ms/ident/pepxml'
7
8
  require 'ms/ident/peptide_hit/qvalue'
8
- require 'ms/error_rate/qvalue/pepxml'
9
+
10
+ EXT = Ms::Ident::PeptideHit::Qvalue::FILE_EXTENSION
11
+ combine_base = "combined"
9
12
 
10
13
  opts = Trollop::Parser.new do
11
- banner %Q{usage: #{File.basename(__FILE__)} <fwd>.xml <decoy>.xml ...
12
- outputs: <fwd>.phq.csv
14
+ banner %Q{usage: #{File.basename(__FILE__)} <target>.xml <decoy>.xml ...
15
+ outputs: <fwd>.phq.tsv
13
16
  phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
14
17
  }
18
+ opt :combine, "groups target and decoy hits together from all files, writing to #{combine_base}#{EXT}", :default => false
15
19
  opt :z_together, "do not group by charge state", :default => false
20
+ opt :decoy_first, "decoy files are before target files", :default => false
16
21
  opt :verbose, "be verbose", :default => false
17
22
  end
18
23
 
@@ -22,16 +27,37 @@ if ARGV.size == 0 || (ARGV.size%2 != 0)
22
27
  opts.educate
23
28
  exit
24
29
  end
30
+
25
31
  $VERBOSE = opt.delete(:verbose)
26
32
 
27
33
  files = ARGV.to_a
28
- files.each_slice(2).map do |target, decoy|
29
- hit_qvalue_pairs = Ms::ErrorRate::Qvalue::Pepxml.target_decoy_qvalues(target, decoy, opt, &:ionscore)
34
+
35
+ files.each_slice(2).map!(&:reverse) if opt[:decoy_first]
36
+
37
+ groups_of_search_hits = files.map do |file|
38
+ Ms::Ident::Pepxml.search_hits(file)
39
+ end
40
+
41
+ to_run = {}
42
+ if opt[:combine]
43
+ all_target = [] ; all_decoy = []
44
+ groups_of_search_hits.each_slice(2) do |target_hits,decoy_hits|
45
+ all_target.push(*target_hits) ; all_decoy.push(*decoy_hits)
46
+ end
47
+ to_run[combine_base] = [all_target, all_decoy]
48
+ else
49
+ groups_of_search_hits.each_slice(2).zip(files) do |target_hits_and_decoy_hits, file|
50
+ to_run[file.chomp(File.extname(file)) + EXT] = target_hits_and_decoy_hits
51
+ end
52
+ end
53
+
54
+ to_run.each do |file, target_decoy_pair|
55
+ hit_qvalue_pairs = Ms::ErrorRate::Qvalue.target_decoy_qvalues(target_decoy_pair.first, target_decoy_pair.last, :z_together => opt[:z_together])
30
56
  hits = [] ; qvals = []
31
57
  hit_qvalue_pairs.each do |hit, qval|
32
58
  hits << hit ; qvals << qval
33
59
  end
34
- outfile = Ms::Ident::PeptideHit::Qvalue.to_phq(target.chomp(File.extname(target)), hits, qvals)
60
+ outfile = Ms::Ident::PeptideHit::Qvalue.to_file(file, hits, qvals)
35
61
  puts "created: #{outfile}" if $VERBOSE
36
62
  end
37
63
 
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 10
9
- version: 0.0.10
8
+ - 11
9
+ version: 0.0.11
10
10
  platform: ruby
11
11
  authors:
12
12
  - John Prince
@@ -127,7 +127,6 @@ files:
127
127
  - lib/ms/error_rate/qvalue.rb
128
128
  - lib/ms/error_rate/qvalue/mascot.rb
129
129
  - lib/ms/error_rate/qvalue/mascot/percolator.rb
130
- - lib/ms/error_rate/qvalue/pepxml.rb
131
130
  - lib/ms/error_rate/sbv.rb
132
131
  - lib/ms/error_rate/sbv/peptide_based.rb
133
132
  - lib/ms/error_rate/sbv/protein_based.rb
@@ -1,52 +0,0 @@
1
- require 'ms/error_rate/qvalue'
2
-
3
- module Ms ; end
4
- module Ms::ErrorRate ; end
5
- module Ms::ErrorRate::Qvalue ; end
6
-
7
- module Ms::ErrorRate::Qvalue::Pepxml
8
- module_function
9
-
10
- # returns an array of hit and qvalue pairs
11
- # retrieves the aaseq, charge, and all search_score keys and values for use
12
- # in the search_hit. caller must provide a sort_by block, where the best
13
- # hits are last. charge is an integer, and all other search scores are cast
14
- # as floats. returns the output filename.
15
- def target_decoy_qvalues(target_pepxml, decoy_pepxml, opt={}, &sort_by)
16
-
17
- # this is a list of high quality peptide hits associated with each group
18
- fields = [:aaseq, :charge]
19
- ss_names = []
20
- have_ss_names = false
21
- (target_hits, decoy_hits) = [target_pepxml, decoy_pepxml].map do |file|
22
- # begin with aaseq, charge
23
- File.open(file) do |io|
24
- doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
25
- # we can work with namespaces, or just remove them ...
26
- doc.remove_namespaces!
27
- root = doc.root
28
- search_hits = root.xpath('//search_hit')
29
- search_hits.map do |search_hit|
30
- aaseq = search_hit['peptide']
31
- charge = search_hit.parent.parent['assumed_charge'].to_i
32
- search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' }
33
- ss_values = []
34
- search_score_nodes.each do |node|
35
- ss_names << node['name'].to_sym unless have_ss_names
36
- ss_values << node['value'].to_f
37
- end
38
- have_ss_names = true
39
- [aaseq, charge, *ss_values]
40
- end
41
- end
42
- end
43
-
44
- fields.push(*ss_names)
45
-
46
- peptide_hit_class = Struct.new(*fields)
47
- (t_hits, d_hits) = [target_hits, decoy_hits].map {|hits| hits.map {|hit_values| peptide_hit_class.new(*hit_values) } }
48
-
49
- # hit and qvalue pairs
50
- Ms::ErrorRate::Qvalue.target_decoy_qvalues(t_hits, d_hits, :z_together => opt[:z_together], &sort_by)
51
- end
52
- end