mzid 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/convert_mzid_to_csv +53 -0
- data/bin/load_helper.rb +2 -0
- data/lib/mzid.rb +14 -0
- data/lib/mzid/base_parser.rb +45 -0
- data/lib/mzid/batch_parser.rb +148 -0
- data/lib/mzid/filtered_streaming_parser.rb +257 -0
- data/lib/mzid/parser_sax.rb +292 -0
- data/lib/mzid/peptide_evidence.rb +39 -0
- data/lib/mzid/psm.rb +61 -0
- data/lib/mzid/streaming_parser.rb +177 -0
- data/lib/mzid/streaming_parser_lines.rb +179 -0
- data/lib/mzid/version.rb +3 -0
- data/tests/data/example.mzid +71 -0
- data/tests/data/example_2.mzid +118 -0
- data/tests/data/example_mod.mzid +112 -0
- data/tests/load_helper.rb +1 -0
- data/tests/test_all.rb +6 -0
- data/tests/test_batch_parser.rb +86 -0
- data/tests/test_default_parser.rb +72 -0
- data/tests/test_helper.rb +8 -0
- data/tests/test_parser_sax.rb +47 -0
- data/tests/test_psm.rb +15 -0
- data/tests/test_streaming_parser.rb +87 -0
- data/tests/test_streaming_parser_lines.rb +104 -0
- metadata +162 -0
@@ -0,0 +1,53 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require_relative 'load_helper'
|
4
|
+
require 'mzid'
|
5
|
+
require 'csv'
|
6
|
+
require 'progressbar'
|
7
|
+
require 'optparse'
|
8
|
+
|
9
|
+
|
10
|
+
options = {}
|
11
|
+
optparse = OptionParser.new do |opt|
|
12
|
+
opt.banner = "Usage: results.mzid [OPTIONS]"
|
13
|
+
opt.separator ""
|
14
|
+
opt.separator "Options"
|
15
|
+
|
16
|
+
options[:verbose] = false
|
17
|
+
opt.on("-v", "--verbose", "flag for verbose output or silent output") do |verbose|
|
18
|
+
options[:verbose] = verbose
|
19
|
+
end
|
20
|
+
|
21
|
+
options[:mods] = false
|
22
|
+
opt.on("-m", "--mods", "flag if the search contained modifications") do |ptm|
|
23
|
+
options[:mods] = ptm
|
24
|
+
end
|
25
|
+
|
26
|
+
opt.on("-o","--output FILE","output file name, if unspecified will create a results.csv file") do |outFile|
|
27
|
+
options[:output] = outFile
|
28
|
+
end
|
29
|
+
|
30
|
+
opt.on("-h","--help","help") do
|
31
|
+
puts optparse
|
32
|
+
Process.exit(0)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
optparse.parse!
|
36
|
+
#
|
37
|
+
# basic checking
|
38
|
+
#
|
39
|
+
if options.size == 0 || ARGV.size != 1 then
|
40
|
+
puts optparse
|
41
|
+
Process.exit(0)
|
42
|
+
end
|
43
|
+
#
|
44
|
+
# setup params
|
45
|
+
#
|
46
|
+
result_mzid_file = ARGV[0]
|
47
|
+
tda_flag = true
|
48
|
+
outfile = options.has_key?(:output) ? options[:output] : (result_mzid_file.split(".mzid")[0] + ".csv")
|
49
|
+
#
|
50
|
+
# parse file and output
|
51
|
+
#
|
52
|
+
parser = MzID::ParserSax.new(result_mzid_file, (!options[:verbose] ? nil : true), tda_flag)
|
53
|
+
parser.write_to_csv(outfile, options[:mods])
|
data/bin/load_helper.rb
ADDED
data/lib/mzid.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'progressbar'
|
3
|
+
|
4
|
+
module MzID
|
5
|
+
#
|
6
|
+
# class to parse an mzIdentML file
|
7
|
+
#
|
8
|
+
class BaseParser
|
9
|
+
|
10
|
+
def initialize(file)
|
11
|
+
@mzid_file = file
|
12
|
+
end
|
13
|
+
#
|
14
|
+
# given an XML.parse output from the peptide block, extract peptide sequence
|
15
|
+
#
|
16
|
+
def get_peptide_sequence(pnode)
|
17
|
+
plst = pnode.xpath('.//PeptideSequence')
|
18
|
+
id = pnode['id']
|
19
|
+
seq = plst[0].content
|
20
|
+
end
|
21
|
+
#
|
22
|
+
# given an XML.parse output from the peptide block, extract modifications
|
23
|
+
#
|
24
|
+
def get_modifications(pep_node)
|
25
|
+
mods = pep_node.xpath('.//Modification')
|
26
|
+
id = pep_node['id']
|
27
|
+
mod_h = Hash.new
|
28
|
+
# parse any modifications
|
29
|
+
mods.each do |mod|
|
30
|
+
loc = mod['location'].to_i-1
|
31
|
+
delta_mass = mod['monoisotopicMassDelta'].to_f
|
32
|
+
if !mod_h.empty? then
|
33
|
+
mod_h.merge!( loc => delta_mass )
|
34
|
+
else
|
35
|
+
mod_h = {mod['location'].to_i-1 => delta_mass}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
mod_h.empty? ? nil : mod_h
|
39
|
+
end
|
40
|
+
|
41
|
+
private :get_peptide_sequence, :get_modifications
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
@@ -0,0 +1,148 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'progressbar'
|
3
|
+
require 'mzid/base_parser'
|
4
|
+
require 'mzid/peptide_evidence'
|
5
|
+
|
6
|
+
module MzID
|
7
|
+
#
|
8
|
+
# class to parse an mzIdentML file
|
9
|
+
#
|
10
|
+
class BatchParser < BaseParser
|
11
|
+
|
12
|
+
def initialize(file)
|
13
|
+
super(file)
|
14
|
+
@pep_ev_h = Hash.new
|
15
|
+
@db_seq_h = Hash.new
|
16
|
+
cache_ids
|
17
|
+
end
|
18
|
+
#
|
19
|
+
# store peptide sequences in hash for lookup
|
20
|
+
#
|
21
|
+
def cache_ids()
|
22
|
+
hit_values = File.open(@mzid_file) do |io|
|
23
|
+
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT)
|
24
|
+
doc.remove_namespaces!
|
25
|
+
root = doc.root
|
26
|
+
|
27
|
+
cache_db_seq_entries(root)
|
28
|
+
cache_pep_ev(root)
|
29
|
+
|
30
|
+
peptide_lst = root.xpath('//Peptide')
|
31
|
+
@pep_h = Hash.new
|
32
|
+
@mod_h = Hash.new
|
33
|
+
peptide_lst.each do |pnode|
|
34
|
+
|
35
|
+
pep_id = pnode['id']
|
36
|
+
pep_seq = get_peptide_sequence(pnode)
|
37
|
+
mod_line = get_modifications(pnode)
|
38
|
+
@pep_h[pep_id] = pep_seq
|
39
|
+
@mod_h[pep_id] = mod_line
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
44
|
+
#
|
45
|
+
# store peptide evidence sequences in hash for lookup
|
46
|
+
#
|
47
|
+
def cache_pep_ev(root)
|
48
|
+
pep_ev_lst = root.xpath('//PeptideEvidence')
|
49
|
+
pep_ev_lst.each do |pnode|
|
50
|
+
id = pnode["id"]
|
51
|
+
|
52
|
+
@pep_ev_h[id] =
|
53
|
+
PeptideEvidence.new(:id => pnode["id"],
|
54
|
+
:db_seq_ref => pnode["dBSequence_ref"],
|
55
|
+
:pep_id => pnode["peptide_ref"],
|
56
|
+
:start_pos => pnode["start"].to_i,
|
57
|
+
:end_pos => pnode["end"].to_i,
|
58
|
+
:pre => pnode["pre"],
|
59
|
+
:post => pnode["post"],
|
60
|
+
:prot_id => @db_seq_h[pnode["dBSequence_ref"]].to_sym)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
#
|
64
|
+
# store database sequence entries (ids)
|
65
|
+
#
|
66
|
+
def cache_db_seq_entries(root)
|
67
|
+
dbseq_lst = root.xpath('//DBSequence')
|
68
|
+
dbseq_lst.each do |dnode|
|
69
|
+
id = dnode["id"]
|
70
|
+
acc_id = dnode["accession"]
|
71
|
+
@db_seq_h[id] = acc_id
|
72
|
+
end
|
73
|
+
end
|
74
|
+
#
|
75
|
+
# iterate through each psm
|
76
|
+
#
|
77
|
+
def each_psm(use_pbar=nil)
|
78
|
+
hit_values = File.open(@mzid_file) do |io|
|
79
|
+
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT)
|
80
|
+
doc.remove_namespaces!
|
81
|
+
root = doc.root
|
82
|
+
# get list of identifications
|
83
|
+
spec_results = root.xpath('//SpectrumIdentificationResult')
|
84
|
+
pbar = ProgressBar.new("PSMs", spec_results.size) if use_pbar
|
85
|
+
spec_results.each do |sres|
|
86
|
+
#
|
87
|
+
psms_of_spec = sres.xpath('.//SpectrumIdentificationItem')
|
88
|
+
# go over each PSM from the spectra
|
89
|
+
psms_of_spec.each do |psm_node|
|
90
|
+
# get peptide evidence list
|
91
|
+
pep_ev_raw_lst = psm_node.xpath('.//PeptideEvidenceRef')
|
92
|
+
pep_ev_lst = pep_ev_raw_lst.map do |penode|
|
93
|
+
pep_ev_ref_id = penode["peptideEvidence_ref"]
|
94
|
+
@pep_ev_h[pep_ev_ref_id]
|
95
|
+
end
|
96
|
+
# get cvparams
|
97
|
+
cvlst = psm_node.xpath('.//cvParam')
|
98
|
+
# find spectral prob
|
99
|
+
tmp_lst = cvlst.select{|v| v['name'] == "MS-GF:SpecEValue"}
|
100
|
+
spec_prob = tmp_lst[0]['value']
|
101
|
+
# get peptide
|
102
|
+
pep_seq = @pep_h[psm_node['peptide_ref']]
|
103
|
+
# get spectrum id/ref number
|
104
|
+
spec_id = psm_node['id']
|
105
|
+
spec_num = spec_id.split("_")[1].to_i
|
106
|
+
spec_ref = spec_id.split("_")[-1].to_i
|
107
|
+
# store in object
|
108
|
+
psm = PSM.new(:spec_num => spec_num,
|
109
|
+
:spec_ref => spec_ref,
|
110
|
+
:pep => pep_seq,
|
111
|
+
:spec_prob => spec_prob.to_f,
|
112
|
+
:mods => (@mod_h.has_key?(psm_node['peptide_ref']) ? @mod_h[psm_node['peptide_ref']] : nil),
|
113
|
+
:pep_ev => pep_ev_lst
|
114
|
+
)
|
115
|
+
# yield psm object
|
116
|
+
yield psm
|
117
|
+
end
|
118
|
+
pbar.inc if use_pbar
|
119
|
+
end
|
120
|
+
pbar.finish if use_pbar
|
121
|
+
end
|
122
|
+
end
|
123
|
+
#
|
124
|
+
# for each spectrum, return a list of PSM objects for that spectrum
|
125
|
+
#
|
126
|
+
def each_spectrum(use_pbar=nil)
|
127
|
+
spec_lst = []
|
128
|
+
self.each_psm(use_pbar) do |psm|
|
129
|
+
if spec_lst.empty? then
|
130
|
+
spec_lst.push(psm)
|
131
|
+
else
|
132
|
+
if spec_lst[-1].get_spec_num == psm.get_spec_num then
|
133
|
+
spec_lst.push(psm)
|
134
|
+
else # found new spec num, yield psm list
|
135
|
+
yield spec_lst
|
136
|
+
spec_lst = [psm] # add new to list
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
yield spec_lst
|
141
|
+
end
|
142
|
+
|
143
|
+
|
144
|
+
private :cache_ids
|
145
|
+
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
@@ -0,0 +1,257 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'progressbar'
|
3
|
+
require 'mzid/base_parser'
|
4
|
+
require 'mzid/streaming_parser'
|
5
|
+
|
6
|
+
module MzID
|
7
|
+
#
|
8
|
+
# class to parse an mzIdentML file in a streaming (i.e., mem-efficient) manner
|
9
|
+
# performs multi-pass filtering so that can maintain smallest datastruct in memory
|
10
|
+
# 1) first collect counts of elements
|
11
|
+
# 2) get list of peptide evidence from PSMs that pass filter
|
12
|
+
# 3)
|
13
|
+
#
|
14
|
+
class FilteredStreamingParser < StreamingParser
|
15
|
+
|
16
|
+
def initialize(file, sp_thresh = 10.0**-10, use_pbar = nil)
|
17
|
+
@num_spec = 0
|
18
|
+
#
|
19
|
+
@pep_ev_h_protID = Hash.new
|
20
|
+
@pep_ev_h_startPos = Hash.new
|
21
|
+
@pep_ev_h_endPos = Hash.new
|
22
|
+
@pep_ev_h_dbseqRef = Hash.new
|
23
|
+
super(file, use_pbar)
|
24
|
+
end
|
25
|
+
#
|
26
|
+
#
|
27
|
+
def cache_ids2(use_pbar = @use_pbar)
|
28
|
+
end
|
29
|
+
|
30
|
+
#def get_pep_ev_protID(pid) @pep_ev_h_protID[pid] end
|
31
|
+
|
32
|
+
def get_prot_id(pep_ev_id)
|
33
|
+
dbref = @pep_ev_h_dbseqRef[pep_ev_id]
|
34
|
+
prot_id = @db_seq_h[dbref]
|
35
|
+
prot_id
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_accessor :pep_ev_h_dbseqRef
|
39
|
+
|
40
|
+
#
|
41
|
+
# store peptide sequences in hash for lookup
|
42
|
+
#
|
43
|
+
def cache_ids(use_pbar = @use_pbar)
|
44
|
+
num_pep, num_db_seq, num_pep_ev = get_num_elements(nil)
|
45
|
+
puts "SPEC:\t#{@num_spec}"
|
46
|
+
puts "PEP:\t#{num_pep}"
|
47
|
+
puts "DB:\t#{num_db_seq}"
|
48
|
+
puts "PEPEV:\t#{num_pep_ev}"
|
49
|
+
|
50
|
+
#pbar1 = ProgressBar.new("Caching psm", num_pep) if use_pbar
|
51
|
+
#reader = Nokogiri::XML::Reader(File.open(@mzid_file))
|
52
|
+
#reader.each do |node|
|
53
|
+
#end
|
54
|
+
|
55
|
+
@pep_h = Hash.new
|
56
|
+
@mod_h = Hash.new
|
57
|
+
#pbar = ProgressBar.new("Caching", num_pep+num_db_seq+num_pep_ev) if use_pbar
|
58
|
+
pbar1 = ProgressBar.new("peptides", num_pep/2) if use_pbar
|
59
|
+
reader = Nokogiri::XML::Reader(File.open(@mzid_file))
|
60
|
+
reader.each do |node|
|
61
|
+
#
|
62
|
+
if node.name == "Peptide" then
|
63
|
+
#pbar.inc if use_pbar
|
64
|
+
# parse local peptide entry
|
65
|
+
tmp_node = Nokogiri::XML.parse(node.outer_xml)
|
66
|
+
tmp_node.remove_namespaces!
|
67
|
+
root = tmp_node.root
|
68
|
+
pep_id = root["id"].to_sym
|
69
|
+
# skip if already handled PepID
|
70
|
+
next if @pep_h.has_key?(pep_id)
|
71
|
+
# parse sequence/mods if haven't seen it yet
|
72
|
+
pep_seq = get_peptide_sequence(root)
|
73
|
+
mod_line = get_modifications(root)
|
74
|
+
@pep_h[pep_id] = pep_seq
|
75
|
+
@mod_h[pep_id] = mod_line
|
76
|
+
pbar1.inc if use_pbar
|
77
|
+
end
|
78
|
+
end
|
79
|
+
pbar1.finish if use_pbar
|
80
|
+
#
|
81
|
+
pbar2 = ProgressBar.new("db_seq", num_db_seq) if use_pbar
|
82
|
+
IO.foreach(@mzid_file) do |line|
|
83
|
+
next if !line.match(/^\s+<DBSequence\s/)
|
84
|
+
|
85
|
+
prot_id = line.match(/accession=\"([\w|\|]+)/)[1]
|
86
|
+
db_id = line.match(/id=\"(\w+)/)[1]
|
87
|
+
|
88
|
+
@db_seq_h[db_id.to_sym] = prot_id.to_sym
|
89
|
+
pbar2.inc if use_pbar
|
90
|
+
end
|
91
|
+
# reader2 = Nokogiri::XML::Reader(File.open(@mzid_file))
|
92
|
+
# reader2.each do |node|
|
93
|
+
# #
|
94
|
+
# if node.name == "DBSequence" then
|
95
|
+
# # parse local DBSequence entry
|
96
|
+
# tmp_node = Nokogiri::XML.parse(node.outer_xml)
|
97
|
+
# tmp_node.remove_namespaces!
|
98
|
+
# root = tmp_node.root
|
99
|
+
# cache_db_seq_entries(root)
|
100
|
+
# pbar2.inc if use_pbar
|
101
|
+
# end
|
102
|
+
# end
|
103
|
+
pbar2.finish if use_pbar
|
104
|
+
#
|
105
|
+
pbar3 = ProgressBar.new("pep_ev", num_pep_ev) if use_pbar
|
106
|
+
IO.foreach(@mzid_file) do |line|
|
107
|
+
next if !line.match(/^\s+<PeptideEvidence\s/)
|
108
|
+
|
109
|
+
db_id = line.match(/dBSequence_ref=\"(\w+)/)[1]
|
110
|
+
pep_ev = line.match(/id=\"(\w+)/)[1]
|
111
|
+
@pep_ev_h_dbseqRef[pep_ev.to_sym] = db_id.to_sym
|
112
|
+
pbar3.inc if use_pbar
|
113
|
+
end
|
114
|
+
# reader3 = Nokogiri::XML::Reader(File.open(@mzid_file))
|
115
|
+
# reader3.each do |node|
|
116
|
+
# if node.name == "PeptideEvidence" then
|
117
|
+
# # parse local DBSequence entry
|
118
|
+
# tmp_node = Nokogiri::XML.parse(node.outer_xml)
|
119
|
+
# tmp_node.remove_namespaces!
|
120
|
+
# root = tmp_node.root
|
121
|
+
# cache_pep_ev(root)
|
122
|
+
# pbar3.inc if use_pbar
|
123
|
+
# end
|
124
|
+
# # if node.name == "PeptideEvidence" then
|
125
|
+
# # tmp_node = Nokogiri::XML.parse(node.outer_xml)
|
126
|
+
# # root = tmp_node.root
|
127
|
+
# # pep_ref = root.to_s.match(/peptide_ref=\"(\w+)\"/)[1]
|
128
|
+
# # id_ref = root.to_s.match(/id=\"(\w+)\"/)[1]
|
129
|
+
# # db_ref = root.to_s.match(/dBSequence_ref=\"(\w+)\"/)[1]
|
130
|
+
# # @pep_ev_h_dbseqRef[id_ref.to_sym] = db_ref.to_sym
|
131
|
+
# # end
|
132
|
+
# end
|
133
|
+
pbar3.finish if use_pbar
|
134
|
+
puts "PEP_H SIZE:\t#{@pep_h.size}"
|
135
|
+
puts "DBSEQ_H SIZE:\t#{@db_seq_h.size}"
|
136
|
+
puts "PEP_EV_H SIZE:\t#{@pep_ev_h_dbseqRef.size}"
|
137
|
+
end
|
138
|
+
#
|
139
|
+
# store database sequence entries (ids)
|
140
|
+
#
|
141
|
+
def cache_db_seq_entries(root)
|
142
|
+
dbseq_lst = root.xpath('//DBSequence')
|
143
|
+
dbseq_lst.each do |dnode|
|
144
|
+
id = dnode["id"].to_sym
|
145
|
+
acc_id = dnode["accession"]
|
146
|
+
@db_seq_h[id] = acc_id.to_sym
|
147
|
+
end
|
148
|
+
end
|
149
|
+
#
|
150
|
+
# store peptide evidence sequences in hash for lookup
|
151
|
+
#
|
152
|
+
def cache_pep_ev(root)
|
153
|
+
pep_ev_lst = root.xpath('//PeptideEvidence')
|
154
|
+
pep_ev_lst.each do |pnode|
|
155
|
+
id = pnode["id"].to_sym
|
156
|
+
# @pep_ev_h[id] =
|
157
|
+
# PeptideEvidence.new(#:id => pnode["id"],
|
158
|
+
# :db_seq_ref => pnode["dBSequence_ref"],
|
159
|
+
# #:pep_id => pnode["peptide_ref"],
|
160
|
+
# :start_pos => pnode["start"].to_i,
|
161
|
+
# :end_pos => pnode["end"].to_i,
|
162
|
+
# #:pre => pnode["pre"],
|
163
|
+
# #:post => pnode["post"],
|
164
|
+
# :prot_id => @db_seq_h[pnode["dBSequence_ref"]].to_sym)
|
165
|
+
|
166
|
+
# @pep_ev_h_protID[id.to_sym] = @db_seq_h[pnode["dBSequence_ref"]].to_sym
|
167
|
+
# @pep_ev_h_startPos[id.to_sym] = pnode["start"].to_i,
|
168
|
+
# @pep_ev_h_endPos[id.to_sym] = pnode["end"].to_i
|
169
|
+
@pep_ev_h_dbseqRef[id.to_sym] = pnode["dBSequence_ref"].to_sym
|
170
|
+
end
|
171
|
+
end
|
172
|
+
#
|
173
|
+
# iterate through each psm
|
174
|
+
#
|
175
|
+
def each_psm(use_pbar=@use_pbar)
|
176
|
+
hit_values = File.open(@mzid_file) do |io|
|
177
|
+
doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT)
|
178
|
+
doc.remove_namespaces!
|
179
|
+
root = doc.root
|
180
|
+
# get list of identifications
|
181
|
+
spec_results = root.xpath('//SpectrumIdentificationResult')
|
182
|
+
pbar = ProgressBar.new("PSMs", spec_results.size) if use_pbar
|
183
|
+
spec_results.each do |sres|
|
184
|
+
#
|
185
|
+
psms_of_spec = sres.xpath('.//SpectrumIdentificationItem')
|
186
|
+
# go over each PSM from the spectra
|
187
|
+
psms_of_spec.each do |psm_node|
|
188
|
+
psm = get_psm(psm_node)
|
189
|
+
# yield psm object
|
190
|
+
yield psm
|
191
|
+
end
|
192
|
+
pbar.inc if use_pbar
|
193
|
+
end
|
194
|
+
pbar.finish if use_pbar
|
195
|
+
end
|
196
|
+
end
|
197
|
+
#
|
198
|
+
# given a xml node of a psm, return the PSM
|
199
|
+
#
|
200
|
+
def get_psm(psm_node)
|
201
|
+
# get peptide evidence list
|
202
|
+
pep_ev_raw_lst = psm_node.xpath('.//PeptideEvidenceRef')
|
203
|
+
pep_ev_lst = pep_ev_raw_lst.map{|penode| pep_ev_ref_id = penode["peptideEvidence_ref"].to_sym}
|
204
|
+
# get cvparams
|
205
|
+
cvlst = psm_node.xpath('.//cvParam')
|
206
|
+
# find spectral prob
|
207
|
+
tmp_lst = cvlst.select{|v| v['name'] == "MS-GF:SpecEValue"}
|
208
|
+
spec_prob = tmp_lst[0]['value']
|
209
|
+
# get peptide
|
210
|
+
pep_seq = @pep_h[psm_node['peptide_ref'].to_sym]
|
211
|
+
# get spectrum id/ref number
|
212
|
+
spec_id = psm_node['id']
|
213
|
+
spec_num = spec_id.split("_")[1].to_i
|
214
|
+
spec_ref = spec_id.split("_")[-1].to_i
|
215
|
+
#
|
216
|
+
# store in object
|
217
|
+
psm = PSM.new(:spec_num => spec_num,
|
218
|
+
:spec_ref => spec_ref,
|
219
|
+
:pep => pep_seq,
|
220
|
+
:spec_prob => spec_prob.to_f,
|
221
|
+
:mods => (@mod_h.has_key?(psm_node['peptide_ref']) ? @mod_h[psm_node['peptide_ref']] : nil),
|
222
|
+
:pep_ev => pep_ev_lst)
|
223
|
+
end
|
224
|
+
#
|
225
|
+
# load PSMs into memory, and go back to perform lookup for prot ids
|
226
|
+
#
|
227
|
+
def write_to_file(outfile, use_pbar=@use_pbar)
|
228
|
+
|
229
|
+
pbar3 = ProgressBar.new("Caching pep_ev", num_db_seq) if use_pbar
|
230
|
+
t1_db = Time.now
|
231
|
+
reader3 = Nokogiri::XML::Reader(File.open(@mzid_file))
|
232
|
+
reader3.each do |node|
|
233
|
+
if node.name == "PeptideEvidence" then
|
234
|
+
# parse local DBSequence entry
|
235
|
+
tmp_node = Nokogiri::XML.parse(node.outer_xml)
|
236
|
+
tmp_node.remove_namespaces!
|
237
|
+
root = tmp_node.root
|
238
|
+
#cache_pep_ev(root)
|
239
|
+
pep_ev_lst = root.xpath('//PeptideEvidence')
|
240
|
+
pep_ev_lst.each do |pnode|
|
241
|
+
id = pnode["id"]
|
242
|
+
start_pos = pnode["start"].to_i,
|
243
|
+
end_pos = pnode["end"].to_i
|
244
|
+
db_seq_ref = pnode["dBSequence_ref"].to_sym
|
245
|
+
end
|
246
|
+
pbar3.inc if use_pbar
|
247
|
+
end
|
248
|
+
|
249
|
+
end
|
250
|
+
pbar3.finish if use_pbar
|
251
|
+
|
252
|
+
end
|
253
|
+
|
254
|
+
|
255
|
+
end
|
256
|
+
|
257
|
+
end
|