ms-error_rate 0.0.10 → 0.0.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/mascot_pepxml_to_peptide_hit_qvalues.rb +32 -6
- metadata +2 -3
- data/lib/ms/error_rate/qvalue/pepxml.rb +0 -52
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.11
|
@@ -4,15 +4,20 @@ require 'trollop'
|
|
4
4
|
require 'nokogiri'
|
5
5
|
require 'set'
|
6
6
|
|
7
|
+
require 'ms/ident/pepxml'
|
7
8
|
require 'ms/ident/peptide_hit/qvalue'
|
8
|
-
|
9
|
+
|
10
|
+
EXT = Ms::Ident::PeptideHit::Qvalue::FILE_EXTENSION
|
11
|
+
combine_base = "combined"
|
9
12
|
|
10
13
|
opts = Trollop::Parser.new do
|
11
|
-
banner %Q{usage: #{File.basename(__FILE__)} <
|
12
|
-
outputs: <fwd>.phq.
|
14
|
+
banner %Q{usage: #{File.basename(__FILE__)} <target>.xml <decoy>.xml ...
|
15
|
+
outputs: <fwd>.phq.tsv
|
13
16
|
phq.tsv?: see schema/peptide_hit_qvalues.phq.tsv
|
14
17
|
}
|
18
|
+
opt :combine, "groups target and decoy hits together from all files, writing to #{combine_base}#{EXT}", :default => false
|
15
19
|
opt :z_together, "do not group by charge state", :default => false
|
20
|
+
opt :decoy_first, "decoy files are before target files", :default => false
|
16
21
|
opt :verbose, "be verbose", :default => false
|
17
22
|
end
|
18
23
|
|
@@ -22,16 +27,37 @@ if ARGV.size == 0 || (ARGV.size%2 != 0)
|
|
22
27
|
opts.educate
|
23
28
|
exit
|
24
29
|
end
|
30
|
+
|
25
31
|
$VERBOSE = opt.delete(:verbose)
|
26
32
|
|
27
33
|
files = ARGV.to_a
|
28
|
-
|
29
|
-
|
34
|
+
|
35
|
+
files.each_slice(2).map!(&:reverse) if opt[:decoy_first]
|
36
|
+
|
37
|
+
groups_of_search_hits = files.map do |file|
|
38
|
+
Ms::Ident::Pepxml.search_hits(file)
|
39
|
+
end
|
40
|
+
|
41
|
+
to_run = {}
|
42
|
+
if opt[:combine]
|
43
|
+
all_target = [] ; all_decoy = []
|
44
|
+
groups_of_search_hits.each_slice(2) do |target_hits,decoy_hits|
|
45
|
+
all_target.push(*target_hits) ; all_decoy.push(*decoy_hits)
|
46
|
+
end
|
47
|
+
to_run[combine_base] = [all_target, all_decoy]
|
48
|
+
else
|
49
|
+
groups_of_search_hits.each_slice(2).zip(files) do |target_hits_and_decoy_hits, file|
|
50
|
+
to_run[file.chomp(File.extname(file)) + EXT] = target_hits_and_decoy_hits
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
to_run.each do |file, target_decoy_pair|
|
55
|
+
hit_qvalue_pairs = Ms::ErrorRate::Qvalue.target_decoy_qvalues(target_decoy_pair.first, target_decoy_pair.last, :z_together => opt[:z_together])
|
30
56
|
hits = [] ; qvals = []
|
31
57
|
hit_qvalue_pairs.each do |hit, qval|
|
32
58
|
hits << hit ; qvals << qval
|
33
59
|
end
|
34
|
-
outfile = Ms::Ident::PeptideHit::Qvalue.
|
60
|
+
outfile = Ms::Ident::PeptideHit::Qvalue.to_file(file, hits, qvals)
|
35
61
|
puts "created: #{outfile}" if $VERBOSE
|
36
62
|
end
|
37
63
|
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 11
|
9
|
+
version: 0.0.11
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- John Prince
|
@@ -127,7 +127,6 @@ files:
|
|
127
127
|
- lib/ms/error_rate/qvalue.rb
|
128
128
|
- lib/ms/error_rate/qvalue/mascot.rb
|
129
129
|
- lib/ms/error_rate/qvalue/mascot/percolator.rb
|
130
|
-
- lib/ms/error_rate/qvalue/pepxml.rb
|
131
130
|
- lib/ms/error_rate/sbv.rb
|
132
131
|
- lib/ms/error_rate/sbv/peptide_based.rb
|
133
132
|
- lib/ms/error_rate/sbv/protein_based.rb
|
@@ -1,52 +0,0 @@
|
|
1
|
-
require 'ms/error_rate/qvalue'
|
2
|
-
|
3
|
-
module Ms ; end
|
4
|
-
module Ms::ErrorRate ; end
|
5
|
-
module Ms::ErrorRate::Qvalue ; end
|
6
|
-
|
7
|
-
module Ms::ErrorRate::Qvalue::Pepxml
|
8
|
-
module_function
|
9
|
-
|
10
|
-
# returns an array of hit and qvalue pairs
|
11
|
-
# retrieves the aaseq, charge, and all search_score keys and values for use
|
12
|
-
# in the search_hit. caller must provide a sort_by block, where the best
|
13
|
-
# hits are last. charge is an integer, and all other search scores are cast
|
14
|
-
# as floats. returns the output filename.
|
15
|
-
def target_decoy_qvalues(target_pepxml, decoy_pepxml, opt={}, &sort_by)
|
16
|
-
|
17
|
-
# this is a list of high quality peptide hits associated with each group
|
18
|
-
fields = [:aaseq, :charge]
|
19
|
-
ss_names = []
|
20
|
-
have_ss_names = false
|
21
|
-
(target_hits, decoy_hits) = [target_pepxml, decoy_pepxml].map do |file|
|
22
|
-
# begin with aaseq, charge
|
23
|
-
File.open(file) do |io|
|
24
|
-
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS)
|
25
|
-
# we can work with namespaces, or just remove them ...
|
26
|
-
doc.remove_namespaces!
|
27
|
-
root = doc.root
|
28
|
-
search_hits = root.xpath('//search_hit')
|
29
|
-
search_hits.map do |search_hit|
|
30
|
-
aaseq = search_hit['peptide']
|
31
|
-
charge = search_hit.parent.parent['assumed_charge'].to_i
|
32
|
-
search_score_nodes = search_hit.children.select {|node| node.name == 'search_score' }
|
33
|
-
ss_values = []
|
34
|
-
search_score_nodes.each do |node|
|
35
|
-
ss_names << node['name'].to_sym unless have_ss_names
|
36
|
-
ss_values << node['value'].to_f
|
37
|
-
end
|
38
|
-
have_ss_names = true
|
39
|
-
[aaseq, charge, *ss_values]
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
fields.push(*ss_names)
|
45
|
-
|
46
|
-
peptide_hit_class = Struct.new(*fields)
|
47
|
-
(t_hits, d_hits) = [target_hits, decoy_hits].map {|hits| hits.map {|hit_values| peptide_hit_class.new(*hit_values) } }
|
48
|
-
|
49
|
-
# hit and qvalue pairs
|
50
|
-
Ms::ErrorRate::Qvalue.target_decoy_qvalues(t_hits, d_hits, :z_together => opt[:z_together], &sort_by)
|
51
|
-
end
|
52
|
-
end
|