ms-error_rate 0.0.10 → 0.0.11
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/bin/mascot_pepxml_to_peptide_hit_qvalues.rb +32 -6
- metadata +2 -3
- data/lib/ms/error_rate/qvalue/pepxml.rb +0 -52
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.11
|
@@ -4,15 +4,20 @@ require 'trollop'
|
|
4
4
|
require 'nokogiri'
|
5
5
|
require 'set'
|
6
6
|
|
7
|
+
require 'ms/ident/pepxml'
|
7
8
|
require 'ms/ident/peptide_hit/qvalue'
|
8
|
-
|
9
|
+
|
10
|
+
EXT = Ms::Ident::PeptideHit::Qvalue::FILE_EXTENSION
|
11
|
+
combine_base = "combined"
|
9
12
|
|
10
13
|
opts = Trollop::Parser.new do
|
11
|
-
banner %Q{usage: #{File.basename(__FILE__)} <
|
12
|
-
outputs: <fwd>.phq.
|
14
|
+
banner %Q{usage: #{File.basename(__FILE__)} <target>.xml <decoy>.xml ...
|
15
|
+
outputs: <fwd>.phq.tsv
|
13
16
|
phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
|
14
17
|
}
|
18
|
+
opt :combine, "groups target and decoy hits together from all files, writing to #{combine_base}#{EXT}", :default => false
|
15
19
|
opt :z_together, "do not group by charge state", :default => false
|
20
|
+
opt :decoy_first, "decoy files are before target files", :default => false
|
16
21
|
opt :verbose, "be verbose", :default => false
|
17
22
|
end
|
18
23
|
|
@@ -22,16 +27,37 @@ if ARGV.size == 0 || (ARGV.size%2 != 0)
|
|
22
27
|
opts.educate
|
23
28
|
exit
|
24
29
|
end
|
30
|
+
|
25
31
|
$VERBOSE = opt.delete(:verbose)
|
26
32
|
|
27
33
|
files = ARGV.to_a
|
28
|
-
|
29
|
-
|
34
|
+
|
35
|
+
files.each_slice(2).map!(&:reverse) if opt[:decoy_first]
|
36
|
+
|
37
|
+
groups_of_search_hits = files.map do |file|
|
38
|
+
Ms::Ident::Pepxml.search_hits(file)
|
39
|
+
end
|
40
|
+
|
41
|
+
to_run = {}
|
42
|
+
if opt[:combine]
|
43
|
+
all_target = [] ; all_decoy = []
|
44
|
+
groups_of_search_hits.each_slice(2) do |target_hits,decoy_hits|
|
45
|
+
all_target.push(*target_hits) ; all_decoy.push(*decoy_hits)
|
46
|
+
end
|
47
|
+
to_run[combine_base] = [all_target, all_decoy]
|
48
|
+
else
|
49
|
+
groups_of_search_hits.each_slice(2).zip(files) do |target_hits_and_decoy_hits, file|
|
50
|
+
to_run[file.chomp(File.extname(file)) + EXT] = target_hits_and_decoy_hits
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
to_run.each do |file, target_decoy_pair|
|
55
|
+
hit_qvalue_pairs = Ms::ErrorRate::Qvalue.target_decoy_qvalues(target_decoy_pair.first, target_decoy_pair.last, :z_together => opt[:z_together])
|
30
56
|
hits = [] ; qvals = []
|
31
57
|
hit_qvalue_pairs.each do |hit, qval|
|
32
58
|
hits << hit ; qvals << qval
|
33
59
|
end
|
34
|
-
outfile = Ms::Ident::PeptideHit::Qvalue.
|
60
|
+
outfile = Ms::Ident::PeptideHit::Qvalue.to_file(file, hits, qvals)
|
35
61
|
puts "created: #{outfile}" if $VERBOSE
|
36
62
|
end
|
37
63
|
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 11
|
9
|
+
version: 0.0.11
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- John Prince
|
@@ -127,7 +127,6 @@ files:
|
|
127
127
|
- lib/ms/error_rate/qvalue.rb
|
128
128
|
- lib/ms/error_rate/qvalue/mascot.rb
|
129
129
|
- lib/ms/error_rate/qvalue/mascot/percolator.rb
|
130
|
-
- lib/ms/error_rate/qvalue/pepxml.rb
|
131
130
|
- lib/ms/error_rate/sbv.rb
|
132
131
|
- lib/ms/error_rate/sbv/peptide_based.rb
|
133
132
|
- lib/ms/error_rate/sbv/protein_based.rb
|
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'ms/error_rate/qvalue'
|
2
|
-
|
3
|
-
module Ms ; end
|
4
|
-
module Ms::ErrorRate ; end
|
5
|
-
module Ms::ErrorRate::Qvalue ; end
|
6
|
-
|
7
|
-
module Ms::ErrorRate::Qvalue::Pepxml
|
8
|
-
module_function
|
9
|
-
|
10
|
-
# returns an array of hit and qvalue pairs
|
11
|
-
# retrieves the aaseq, charge, and all search_score keys and values for use
|
12
|
-
# in the search_hit. caller must provide a sort_by block, where the best
|
13
|
-
# hits are last. charge is an integer, and all other search scores are cast
|
14
|
-
# as floats. returns the output filename.
|
15
|
-
def target_decoy_qvalues(target_pepxml, decoy_pepxml, opt={}, &sort_by)
|
16
|
-
|
17
|
-
# this is a list of high quality peptide hits associated with each group
|
18
|
-
fields = [:aaseq, :charge]
|
19
|
-
ss_names = []
|
20
|
-
have_ss_names = false
|
21
|
-
(target_hits, decoy_hits) = [target_pepxml, decoy_pepxml].map do |file|
|
22
|
-
# begin with aaseq, charge
|
23
|
-
File.open(file) do |io|
|
24
|
-
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
|
25
|
-
# we can work with namespaces, or just remove them ...
|
26
|
-
doc.remove_namespaces!
|
27
|
-
root = doc.root
|
28
|
-
search_hits = root.xpath('//search_hit')
|
29
|
-
search_hits.map do |search_hit|
|
30
|
-
aaseq = search_hit['peptide']
|
31
|
-
charge = search_hit.parent.parent['assumed_charge'].to_i
|
32
|
-
search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' }
|
33
|
-
ss_values = []
|
34
|
-
search_score_nodes.each do |node|
|
35
|
-
ss_names << node['name'].to_sym unless have_ss_names
|
36
|
-
ss_values << node['value'].to_f
|
37
|
-
end
|
38
|
-
have_ss_names = true
|
39
|
-
[aaseq, charge, *ss_values]
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
fields.push(*ss_names)
|
45
|
-
|
46
|
-
peptide_hit_class = Struct.new(*fields)
|
47
|
-
(t_hits, d_hits) = [target_hits, decoy_hits].map {|hits| hits.map {|hit_values| peptide_hit_class.new(*hit_values) } }
|
48
|
-
|
49
|
-
# hit and qvalue pairs
|
50
|
-
Ms::ErrorRate::Qvalue.target_decoy_qvalues(t_hits, d_hits, :z_together => opt[:z_together], &sort_by)
|
51
|
-
end
|
52
|
-
end
|