mzid 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/convert_mzid_to_csv +53 -0
- data/bin/load_helper.rb +2 -0
- data/lib/mzid.rb +14 -0
- data/lib/mzid/base_parser.rb +45 -0
- data/lib/mzid/batch_parser.rb +148 -0
- data/lib/mzid/filtered_streaming_parser.rb +257 -0
- data/lib/mzid/parser_sax.rb +292 -0
- data/lib/mzid/peptide_evidence.rb +39 -0
- data/lib/mzid/psm.rb +61 -0
- data/lib/mzid/streaming_parser.rb +177 -0
- data/lib/mzid/streaming_parser_lines.rb +179 -0
- data/lib/mzid/version.rb +3 -0
- data/tests/data/example.mzid +71 -0
- data/tests/data/example_2.mzid +118 -0
- data/tests/data/example_mod.mzid +112 -0
- data/tests/load_helper.rb +1 -0
- data/tests/test_all.rb +6 -0
- data/tests/test_batch_parser.rb +86 -0
- data/tests/test_default_parser.rb +72 -0
- data/tests/test_helper.rb +8 -0
- data/tests/test_parser_sax.rb +47 -0
- data/tests/test_psm.rb +15 -0
- data/tests/test_streaming_parser.rb +87 -0
- data/tests/test_streaming_parser_lines.rb +104 -0
- metadata +162 -0
@@ -0,0 +1,179 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'progressbar'
|
3
|
+
require 'mzid/base_parser'
|
4
|
+
require 'mzid/streaming_parser'
|
5
|
+
require 'csv'
|
6
|
+
|
7
|
+
module MzID
|
8
|
+
#
|
9
|
+
# class to parse an mzIdentML file in a streaming (i.e., mem-efficient) manner
|
10
|
+
# not using any XML parsing library, only exploiting the structure of mzIdentML files
|
11
|
+
#
|
12
|
+
class StreamingParserLines < StreamingParser
|
13
|
+
|
14
|
+
def initialize(file, sp_thresh = 10.0**-10, use_pbar = nil, tda_flag = true)
|
15
|
+
@num_spec = 0
|
16
|
+
@tda_flag = tda_flag
|
17
|
+
#
|
18
|
+
@pep_ev_h_protID = Hash.new
|
19
|
+
@pep_ev_h_startPos = Hash.new
|
20
|
+
@pep_ev_h_endPos = Hash.new
|
21
|
+
@pep_ev_h_dbseqRef = Hash.new
|
22
|
+
super(file, use_pbar)
|
23
|
+
end
|
24
|
+
#
|
25
|
+
# get a protein ID from a PeptideEvidenceID
|
26
|
+
#
|
27
|
+
def get_prot_id(pep_ev_id)
|
28
|
+
#dbref = @pep_ev_h_dbseqRef[pep_ev_id]
|
29
|
+
dbref = @pep_ev_h[pep_ev_id].get_db_seq_ref
|
30
|
+
prot_id = @db_seq_h[dbref]
|
31
|
+
prot_id
|
32
|
+
end
|
33
|
+
#
|
34
|
+
#
|
35
|
+
#
|
36
|
+
def get_pep_start(pep_ev_id) @pep_ev_h[pep_ev_id].get_start_pos end
|
37
|
+
def get_pep_end(pep_ev_id) @pep_ev_h[pep_ev_id].get_end_pos end
|
38
|
+
def get_is_decoy(pep_ev_id) @pep_ev_h[pep_ev_id].get_is_decoy end
|
39
|
+
#attr_accessor :pep_ev_h_dbseqRef
|
40
|
+
|
41
|
+
#
|
42
|
+
# store peptide sequences in hash for lookup
|
43
|
+
#
|
44
|
+
def cache_ids(use_pbar = @use_pbar)
|
45
|
+
num_pep, num_db_seq, num_pep_ev = get_num_elements(nil)
|
46
|
+
|
47
|
+
@pep_h = Hash.new
|
48
|
+
@mod_h = Hash.new
|
49
|
+
pbar1 = ProgressBar.new("peptides", num_pep/2) if use_pbar
|
50
|
+
reader = Nokogiri::XML::Reader(File.open(@mzid_file))
|
51
|
+
reader.each do |node|
|
52
|
+
# parse Peptide items
|
53
|
+
if node.name == "Peptide" then
|
54
|
+
# parse local peptide entry
|
55
|
+
tmp_node = Nokogiri::XML.parse(node.outer_xml)
|
56
|
+
tmp_node.remove_namespaces!
|
57
|
+
root = tmp_node.root
|
58
|
+
pep_id = root["id"].to_sym
|
59
|
+
# skip if already handled PepID
|
60
|
+
next if @pep_h.has_key?(pep_id)
|
61
|
+
# parse sequence/mods if haven't seen it yet
|
62
|
+
pep_seq = get_peptide_sequence(root)
|
63
|
+
mod_line = get_modifications(root)
|
64
|
+
@pep_h[pep_id] = pep_seq
|
65
|
+
@mod_h[pep_id] = mod_line
|
66
|
+
pbar1.inc if use_pbar
|
67
|
+
end
|
68
|
+
end
|
69
|
+
pbar1.finish if use_pbar
|
70
|
+
# now parse DBSequence items
|
71
|
+
dbseq_re = Regexp.new(/^\s*<DBSequence\s/)
|
72
|
+
pbar2 = ProgressBar.new("db_seq", num_db_seq) if use_pbar
|
73
|
+
IO.foreach(@mzid_file) do |line|
|
74
|
+
next if !dbseq_re.match(line)
|
75
|
+
|
76
|
+
prot_id = line.match(/accession=\"([\w|\|]+)/)[1]
|
77
|
+
db_id = line.match(/id=\"(\w+)/)[1]
|
78
|
+
|
79
|
+
@db_seq_h[db_id.to_sym] = prot_id.to_sym
|
80
|
+
pbar2.inc if use_pbar
|
81
|
+
end
|
82
|
+
pbar2.finish if use_pbar
|
83
|
+
# now parse PeptideEvidence items
|
84
|
+
pepev_re = Regexp.new(/^\s*<PeptideEvidence\s/)
|
85
|
+
pbar3 = ProgressBar.new("pep_ev", num_pep_ev) if use_pbar
|
86
|
+
IO.foreach(@mzid_file) do |line|
|
87
|
+
next if !pepev_re.match(line)
|
88
|
+
|
89
|
+
db_id = line.match(/dBSequence_ref=\"(\w+)/)[1]
|
90
|
+
start_pos = line.match(/start=\"(\d+)/)[1].to_i
|
91
|
+
end_pos = line.match(/end=\"(\d+)/)[1].to_i
|
92
|
+
pep_ev = line.match(/id=\"(\w+)/)[1]
|
93
|
+
is_decoy = line.match(/isDecoy=\"(\w+)\"/)[1]
|
94
|
+
# @pep_ev_h_dbseqRef[pep_ev.to_sym] = db_id.to_sym
|
95
|
+
@pep_ev_h[pep_ev.to_sym] = PeptideEvidence.new(:db_seq_ref => db_id.to_sym,
|
96
|
+
:start_pos => start_pos,
|
97
|
+
:end_pos => end_pos,
|
98
|
+
:is_decoy => is_decoy)
|
99
|
+
pbar3.inc if use_pbar
|
100
|
+
end
|
101
|
+
pbar3.finish if use_pbar
|
102
|
+
end
|
103
|
+
#
|
104
|
+
# iterate through each psm by identifying them parsing the file
|
105
|
+
# one line at a time - faster than using XML parser
|
106
|
+
#
|
107
|
+
def each_psm(use_pbar=@use_pbar)
|
108
|
+
num_lines = `wc -l #{@mzid_file}`.to_i if use_pbar
|
109
|
+
curr_psm = nil
|
110
|
+
pbar = ProgressBar.new("PSMs", num_lines) if use_pbar
|
111
|
+
specid_item_re = Regexp.new(/^\s+<SpectrumIdentificationItem\s/)
|
112
|
+
pepevref_re = Regexp.new(/^\s+<PeptideEvidenceRef\s/)
|
113
|
+
specprob_re = Regexp.new(/name=\"MS-GF:SpecEValue\"\/>$/)
|
114
|
+
specid_item_end_re = Regexp.new(/^\s+<\/SpectrumIdentificationItem>\s*$/)
|
115
|
+
IO.foreach(@mzid_file) do |line|
|
116
|
+
pbar.inc if use_pbar
|
117
|
+
# skip line if not one pertaiing to spectrum ID item
|
118
|
+
next if !specid_item_re.match(line) &&
|
119
|
+
!pepevref_re.match(line) &&
|
120
|
+
!specprob_re.match(line) &&
|
121
|
+
!specid_item_end_re.match(line)
|
122
|
+
# beginning of spectrum ID item
|
123
|
+
if specid_item_re.match(line) then
|
124
|
+
spec_id_id = line.match(/id=\"(\w+)/)[1]
|
125
|
+
spec_num = spec_id_id.split("_")[1].to_i
|
126
|
+
pep_ref = line.match(/peptide_ref=\"(\w+)/)[1]
|
127
|
+
# get peptide
|
128
|
+
pep_seq = @pep_h[pep_ref.to_sym]
|
129
|
+
mods = @mod_h[pep_ref.to_sym]
|
130
|
+
curr_psm = PSM.new(:spec_num => spec_num, :pep => pep_seq, :mods => mods)
|
131
|
+
elsif pepevref_re.match(line) then
|
132
|
+
pep_ev = line.match(/peptideEvidence_ref=\"(\w+)/)[1]
|
133
|
+
curr_psm.add_pep_ev(pep_ev.to_sym) if curr_psm
|
134
|
+
elsif specprob_re.match(line) then
|
135
|
+
sprob = line.match(/value=\"([\d|\w|\.|-]+)\"/)[1]
|
136
|
+
curr_psm.set_spec_prob(sprob.to_f) if curr_psm
|
137
|
+
elsif specid_item_end_re.match(line) then
|
138
|
+
yield curr_psm
|
139
|
+
curr_psm = nil # kill current PSM object
|
140
|
+
end
|
141
|
+
end
|
142
|
+
pbar.finish if use_pbar
|
143
|
+
end
|
144
|
+
#
|
145
|
+
# load PSMs into memory, and go back to perform lookup for prot ids
|
146
|
+
#
|
147
|
+
def write_to_csv(outfile="result.csv", use_pbar=@use_pbar)
|
148
|
+
CSV.open(outfile, "w", {:col_sep => "\t"}) do |csv|
|
149
|
+
headerAry = ["#spec_num", "peptide", "spec_prob", "decoy", "prot_ids", "start", "end", "num_prot"]
|
150
|
+
headerAry.delete("decoy") if !@tda_flag
|
151
|
+
csv << headerAry
|
152
|
+
|
153
|
+
# each PSM
|
154
|
+
self.each_psm do |psm|
|
155
|
+
pep_seq = psm.get_pep
|
156
|
+
spec_num = psm.get_spec_num
|
157
|
+
sp_prob = psm.get_spec_prob
|
158
|
+
pass_thresh = psm.get_pass_threshold
|
159
|
+
pep_ev_ref_lst = psm.get_pep_ev
|
160
|
+
# number of proteins with matching peptide
|
161
|
+
num_prot = pep_ev_ref_lst.size
|
162
|
+
# for each PeptideEvidence, write a different line
|
163
|
+
pep_ev_ref_lst.each do |pepev|
|
164
|
+
prot_id = self.get_prot_id(pepev)
|
165
|
+
start_pos = self.get_pep_start(pepev)
|
166
|
+
end_pos = self.get_pep_end(pepev)
|
167
|
+
is_decoy = self.get_is_decoy(pepev)
|
168
|
+
ary = [spec_num, pep_seq, sp_prob, is_decoy, prot_id, start_pos, end_pos, num_prot]
|
169
|
+
ary.delete_at(3) if !@tda_flag
|
170
|
+
csv << ary
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
|
177
|
+
end
|
178
|
+
|
179
|
+
end
|
data/lib/mzid/version.rb
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<MzIdentML id="MS-GF+" version="1.1.0" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd" creationDate="2014-09-20T12:27:24" >
|
3
|
+
<cvList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
4
|
+
<cv id="PSI-MS" uri="http://psidev.cvs.sourceforge.net/viewvc/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" version="3.30.0" fullName="PSI-MS"/>
|
5
|
+
<cv id="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" fullName="UNIMOD"/>
|
6
|
+
<cv id="UO" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" fullName="UNIT-ONTOLOGY"/>
|
7
|
+
</cvList>
|
8
|
+
<AnalysisSoftwareList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
9
|
+
<AnalysisSoftware version="Beta (v9979)" name="MS-GF+" id="ID_software">
|
10
|
+
<SoftwareName>
|
11
|
+
<cvParam accession="MS:1002048" cvRef="PSI-MS" name="MS-GF+"/>
|
12
|
+
</SoftwareName>
|
13
|
+
</AnalysisSoftware>
|
14
|
+
</AnalysisSoftwareList>
|
15
|
+
<SequenceCollection>
|
16
|
+
<DBSequence accession="sp|Q9RXK5|EFG_DEIRA" searchDatabase_ref="SearchDB_1" length="698" id="DBSeq30696">
|
17
|
+
<cvParam accession="MS:1001088" cvRef="PSI-MS" value="sp|Q9RXK5|EFG_DEIRA Elongation factor G OS=Deinococcus radiodurans GN=fusA PE=3 SV=1" name="protein description"/>
|
18
|
+
</DBSequence>
|
19
|
+
<Peptide id="Pep1">
|
20
|
+
<PeptideSequence>VVIYDGSYHEVDSSEMAFK</PeptideSequence>
|
21
|
+
</Peptide>
|
22
|
+
<PeptideEvidence isDecoy="false" post="I" pre="K" end="591" start="573" peptide_ref="Pep1" dBSequence_ref="DBSeq30696" id="PepEv_31268_1_573"/>
|
23
|
+
</SequenceCollection>
|
24
|
+
<AnalysisCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
25
|
+
<SpectrumIdentification spectrumIdentificationList_ref="SI_LIST_1" spectrumIdentificationProtocol_ref="SearchProtocol_1" id="SpecIdent_1">
|
26
|
+
<InputSpectra spectraData_ref="SID_1"/>
|
27
|
+
<SearchDatabaseRef searchDatabase_ref="SearchDB_1"/>
|
28
|
+
</SpectrumIdentification>
|
29
|
+
</AnalysisCollection>
|
30
|
+
<DataCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
31
|
+
<Inputs>
|
32
|
+
<SearchDatabase numDatabaseSequences="3085" location="/home/stef/data/PNNL_fastas/041.fa" id="SearchDB_1">
|
33
|
+
<FileFormat>
|
34
|
+
<cvParam accession="MS:1001348" cvRef="PSI-MS" name="FASTA format"/>
|
35
|
+
</FileFormat>
|
36
|
+
<DatabaseName>
|
37
|
+
<userParam name="041.fa"/>
|
38
|
+
</DatabaseName>
|
39
|
+
</SearchDatabase>
|
40
|
+
<SpectraData location="/tmp/org041/DS67179_Acq20060824_LTQ_4_dta.mgf" name="DS67179_Acq20060824_LTQ_4_dta.mgf" id="SID_1">
|
41
|
+
<FileFormat>
|
42
|
+
<cvParam accession="MS:1001062" cvRef="PSI-MS" name="Mascot MGF file"/>
|
43
|
+
</FileFormat>
|
44
|
+
<SpectrumIDFormat>
|
45
|
+
<cvParam accession="MS:1000774" cvRef="PSI-MS" name="multiple peak list nativeID format"/>
|
46
|
+
</SpectrumIDFormat>
|
47
|
+
</SpectraData>
|
48
|
+
</Inputs>
|
49
|
+
<AnalysisData>
|
50
|
+
<SpectrumIdentificationList id="SI_LIST_1">
|
51
|
+
<FragmentationTable>
|
52
|
+
<Measure id="Measure_MZ">
|
53
|
+
<cvParam accession="MS:1001225" cvRef="PSI-MS" unitCvRef="PSI-MS" unitName="m/z" unitAccession="MS:1000040" name="product ion m/z"/>
|
54
|
+
</Measure>
|
55
|
+
</FragmentationTable>
|
56
|
+
<SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=3590" id="SIR_3591">
|
57
|
+
<SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep1" calculatedMassToCharge="1088.498779296875" experimentalMassToCharge="1088.498046875" chargeState="2" id="SII_3591_1">
|
58
|
+
<PeptideEvidenceRef peptideEvidence_ref="PepEv_31268_1_573"/>
|
59
|
+
<cvParam accession="MS:1002049" cvRef="PSI-MS" value="253" name="MS-GF:RawScore"/>
|
60
|
+
<cvParam accession="MS:1002050" cvRef="PSI-MS" value="253" name="MS-GF:DeNovoScore"/>
|
61
|
+
<cvParam accession="MS:1002052" cvRef="PSI-MS" value="1.6364497E-26" name="MS-GF:SpecEValue"/>
|
62
|
+
<cvParam accession="MS:1002053" cvRef="PSI-MS" value="1.5468738E-20" name="MS-GF:EValue"/>
|
63
|
+
<userParam value="0" name="IsotopeError"/>
|
64
|
+
<userParam value="CID" name="AssumedDissociationMethod"/>
|
65
|
+
</SpectrumIdentificationItem>
|
66
|
+
<cvParam accession="MS:1000796" cvRef="PSI-MS" value="spectrum_200_8548" name="spectrum title"/>
|
67
|
+
</SpectrumIdentificationResult>
|
68
|
+
</SpectrumIdentificationList>
|
69
|
+
</AnalysisData>
|
70
|
+
</DataCollection>
|
71
|
+
</MzIdentML>
|
@@ -0,0 +1,118 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<MzIdentML id="MS-GF+" version="1.1.0" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd" creationDate="2014-09-20T12:27:24" >
|
3
|
+
<cvList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
4
|
+
<cv id="PSI-MS" uri="http://psidev.cvs.sourceforge.net/viewvc/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" version="3.30.0" fullName="PSI-MS"/>
|
5
|
+
<cv id="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" fullName="UNIMOD"/>
|
6
|
+
<cv id="UO" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" fullName="UNIT-ONTOLOGY"/>
|
7
|
+
</cvList>
|
8
|
+
<AnalysisSoftwareList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
9
|
+
<AnalysisSoftware version="Beta (v9979)" name="MS-GF+" id="ID_software">
|
10
|
+
<SoftwareName>
|
11
|
+
<cvParam accession="MS:1002048" cvRef="PSI-MS" name="MS-GF+"/>
|
12
|
+
</SoftwareName>
|
13
|
+
</AnalysisSoftware>
|
14
|
+
</AnalysisSoftwareList>
|
15
|
+
<SequenceCollection>
|
16
|
+
<Peptide id="Pep1">
|
17
|
+
<PeptideSequence>VVIYDGSYHEVDSSEMAFK</PeptideSequence>
|
18
|
+
</Peptide>
|
19
|
+
<Peptide id="Pep3183">
|
20
|
+
<PeptideSequence>PPEGIGGKQVAARLAEMGQR</PeptideSequence>
|
21
|
+
</Peptide>
|
22
|
+
<Peptide id="Pep3184">
|
23
|
+
<PeptideSequence>GVTVLLTTHDLGDVERLAR</PeptideSequence>
|
24
|
+
</Peptide>
|
25
|
+
<Peptide id="Pep3185">
|
26
|
+
<PeptideSequence>TLPPDAPSRHRLVHALER</PeptideSequence>
|
27
|
+
</Peptide>
|
28
|
+
<Peptide id="Pep3186">
|
29
|
+
<PeptideSequence>PRREARPRFPWELAQR</PeptideSequence>
|
30
|
+
</Peptide>
|
31
|
+
</SequenceCollection>
|
32
|
+
<AnalysisCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
33
|
+
<SpectrumIdentification spectrumIdentificationList_ref="SI_LIST_1" spectrumIdentificationProtocol_ref="SearchProtocol_1" id="SpecIdent_1">
|
34
|
+
<InputSpectra spectraData_ref="SID_1"/>
|
35
|
+
<SearchDatabaseRef searchDatabase_ref="SearchDB_1"/>
|
36
|
+
</SpectrumIdentification>
|
37
|
+
</AnalysisCollection>
|
38
|
+
<DataCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
39
|
+
<Inputs>
|
40
|
+
<SearchDatabase numDatabaseSequences="3085" location="/home/stef/data/PNNL_fastas/041.fa" id="SearchDB_1">
|
41
|
+
<FileFormat>
|
42
|
+
<cvParam accession="MS:1001348" cvRef="PSI-MS" name="FASTA format"/>
|
43
|
+
</FileFormat>
|
44
|
+
<DatabaseName>
|
45
|
+
<userParam name="041.fa"/>
|
46
|
+
</DatabaseName>
|
47
|
+
</SearchDatabase>
|
48
|
+
<SpectraData location="/tmp/org041/DS67179_Acq20060824_LTQ_4_dta.mgf" name="DS67179_Acq20060824_LTQ_4_dta.mgf" id="SID_1">
|
49
|
+
<FileFormat>
|
50
|
+
<cvParam accession="MS:1001062" cvRef="PSI-MS" name="Mascot MGF file"/>
|
51
|
+
</FileFormat>
|
52
|
+
<SpectrumIDFormat>
|
53
|
+
<cvParam accession="MS:1000774" cvRef="PSI-MS" name="multiple peak list nativeID format"/>
|
54
|
+
</SpectrumIDFormat>
|
55
|
+
</SpectraData>
|
56
|
+
</Inputs>
|
57
|
+
<AnalysisData>
|
58
|
+
<SpectrumIdentificationList id="SI_LIST_1">
|
59
|
+
<FragmentationTable>
|
60
|
+
<Measure id="Measure_MZ">
|
61
|
+
<cvParam accession="MS:1001225" cvRef="PSI-MS" unitCvRef="PSI-MS" unitName="m/z" unitAccession="MS:1000040" name="product ion m/z"/>
|
62
|
+
</Measure>
|
63
|
+
</FragmentationTable>
|
64
|
+
<SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=3590" id="SIR_3591">
|
65
|
+
<SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep1" calculatedMassToCharge="1088.498779296875" experimentalMassToCharge="1088.498046875" chargeState="2" id="SII_3591_1">
|
66
|
+
<PeptideEvidenceRef peptideEvidence_ref="PepEv_31268_1_573"/>
|
67
|
+
<cvParam accession="MS:1002049" cvRef="PSI-MS" value="253" name="MS-GF:RawScore"/>
|
68
|
+
<cvParam accession="MS:1002050" cvRef="PSI-MS" value="253" name="MS-GF:DeNovoScore"/>
|
69
|
+
<cvParam accession="MS:1002052" cvRef="PSI-MS" value="1.6364497E-26" name="MS-GF:SpecEValue"/>
|
70
|
+
<cvParam accession="MS:1002053" cvRef="PSI-MS" value="1.5468738E-20" name="MS-GF:EValue"/>
|
71
|
+
<userParam value="0" name="IsotopeError"/>
|
72
|
+
<userParam value="CID" name="AssumedDissociationMethod"/>
|
73
|
+
</SpectrumIdentificationItem>
|
74
|
+
<cvParam accession="MS:1000796" cvRef="PSI-MS" value="spectrum_200_8548" name="spectrum title"/>
|
75
|
+
</SpectrumIdentificationResult>
|
76
|
+
<SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=6064" id="SIR_6065">
|
77
|
+
<SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep3183" calculatedMassToCharge="1033.05224609375" experimentalMassToCharge="1033.0670166015625" chargeState="2" id="SII_6065_1">
|
78
|
+
<PeptideEvidenceRef peptideEvidence_ref="PepEv_478892_3183_314"/>
|
79
|
+
<cvParam accession="MS:1002049" cvRef="PSI-MS" value="-56" name="MS-GF:RawScore"/>
|
80
|
+
<cvParam accession="MS:1002050" cvRef="PSI-MS" value="71" name="MS-GF:DeNovoScore"/>
|
81
|
+
<cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9093196E-5" name="MS-GF:SpecEValue"/>
|
82
|
+
<cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.95656" name="MS-GF:EValue"/>
|
83
|
+
<userParam value="0" name="IsotopeError"/>
|
84
|
+
<userParam value="CID" name="AssumedDissociationMethod"/>
|
85
|
+
</SpectrumIdentificationItem>
|
86
|
+
<SpectrumIdentificationItem passThreshold="true" rank="2" peptide_ref="Pep3184" calculatedMassToCharge="1033.07373046875" experimentalMassToCharge="1033.0670166015625" chargeState="2" id="SII_6065_2">
|
87
|
+
<PeptideEvidenceRef peptideEvidence_ref="PepEv_674036_3184_229"/>
|
88
|
+
<cvParam accession="MS:1002049" cvRef="PSI-MS" value="-56" name="MS-GF:RawScore"/>
|
89
|
+
<cvParam accession="MS:1002050" cvRef="PSI-MS" value="71" name="MS-GF:DeNovoScore"/>
|
90
|
+
<cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9093196E-5" name="MS-GF:SpecEValue"/>
|
91
|
+
<cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.953312" name="MS-GF:EValue"/>
|
92
|
+
<userParam value="0" name="IsotopeError"/>
|
93
|
+
<userParam value="CID" name="AssumedDissociationMethod"/>
|
94
|
+
</SpectrumIdentificationItem>
|
95
|
+
<SpectrumIdentificationItem passThreshold="true" rank="3" peptide_ref="Pep3185" calculatedMassToCharge="1033.0743408203125" experimentalMassToCharge="1033.0670166015625" chargeState="2" id="SII_6065_3">
|
96
|
+
<PeptideEvidenceRef peptideEvidence_ref="PepEv_611354_3185_2"/>
|
97
|
+
<cvParam accession="MS:1002049" cvRef="PSI-MS" value="-56" name="MS-GF:RawScore"/>
|
98
|
+
<cvParam accession="MS:1002050" cvRef="PSI-MS" value="71" name="MS-GF:DeNovoScore"/>
|
99
|
+
<cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9093196E-5" name="MS-GF:SpecEValue"/>
|
100
|
+
<cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.949913" name="MS-GF:EValue"/>
|
101
|
+
<userParam value="0" name="IsotopeError"/>
|
102
|
+
<userParam value="CID" name="AssumedDissociationMethod"/>
|
103
|
+
</SpectrumIdentificationItem>
|
104
|
+
<SpectrumIdentificationItem passThreshold="true" rank="4" peptide_ref="Pep3186" calculatedMassToCharge="1033.0694580078125" experimentalMassToCharge="1033.0670166015625" chargeState="2" id="SII_6065_4">
|
105
|
+
<PeptideEvidenceRef peptideEvidence_ref="PepEv_548405_3186_110"/>
|
106
|
+
<cvParam accession="MS:1002049" cvRef="PSI-MS" value="-56" name="MS-GF:RawScore"/>
|
107
|
+
<cvParam accession="MS:1002050" cvRef="PSI-MS" value="71" name="MS-GF:DeNovoScore"/>
|
108
|
+
<cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9093196E-5" name="MS-GF:SpecEValue"/>
|
109
|
+
<cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.942093" name="MS-GF:EValue"/>
|
110
|
+
<userParam value="0" name="IsotopeError"/>
|
111
|
+
<userParam value="CID" name="AssumedDissociationMethod"/>
|
112
|
+
</SpectrumIdentificationItem>
|
113
|
+
<cvParam accession="MS:1000796" cvRef="PSI-MS" value="spectrum_200_11332" name="spectrum title"/>
|
114
|
+
</SpectrumIdentificationResult>
|
115
|
+
</SpectrumIdentificationList>
|
116
|
+
</AnalysisData>
|
117
|
+
</DataCollection>
|
118
|
+
</MzIdentML>
|
@@ -0,0 +1,112 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<MzIdentML id="MS-GF+" version="1.1.0" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd" creationDate="2014-09-20T12:27:24" >
|
3
|
+
<cvList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
4
|
+
<cv id="PSI-MS" uri="http://psidev.cvs.sourceforge.net/viewvc/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" version="3.30.0" fullName="PSI-MS"/>
|
5
|
+
<cv id="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" fullName="UNIMOD"/>
|
6
|
+
<cv id="UO" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" fullName="UNIT-ONTOLOGY"/>
|
7
|
+
</cvList>
|
8
|
+
<AnalysisSoftwareList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
9
|
+
<AnalysisSoftware version="Beta (v9979)" name="MS-GF+" id="ID_software">
|
10
|
+
<SoftwareName>
|
11
|
+
<cvParam accession="MS:1002048" cvRef="PSI-MS" name="MS-GF+"/>
|
12
|
+
</SoftwareName>
|
13
|
+
</AnalysisSoftware>
|
14
|
+
</AnalysisSoftwareList>
|
15
|
+
<SequenceCollection>
|
16
|
+
<DBSequence accession="sp|Q9RXK5|EFG_DEIRA" searchDatabase_ref="SearchDB_1" length="698" id="DBSeq30696">
|
17
|
+
<cvParam accession="MS:1001088" cvRef="PSI-MS" value="sp|Q9RXK5|EFG_DEIRA Elongation factor G OS=Deinococcus radiodurans GN=fusA PE=3 SV=1" name="protein description"/>
|
18
|
+
</DBSequence>
|
19
|
+
<DBSequence accession="tr|Q9RXN7|Q9RXN7_DEIRA" searchDatabase_ref="SearchDB_1" length="193" id="DBSeq658964">
|
20
|
+
<cvParam accession="MS:1001088" cvRef="PSI-MS" value="tr|Q9RXN7|Q9RXN7_DEIRA Putative uncharacterized protein OS=Deinococcus radiodurans GN=DR_0273 PE=4 SV=1" name="protein description"/>
|
21
|
+
</DBSequence>
|
22
|
+
<DBSequence accession="tr|Q9RS55|Q9RS55_DEIRA" searchDatabase_ref="SearchDB_1" length="172" id="DBSeq183410">
|
23
|
+
<cvParam accession="MS:1001088" cvRef="PSI-MS" value="tr|Q9RS55|Q9RS55_DEIRA MutT/nudix family protein OS=Deinococcus radiodurans GN=DR_2272 PE=4 SV=1" name="protein description"/>
|
24
|
+
</DBSequence>
|
25
|
+
<Peptide id="Pep1">
|
26
|
+
<PeptideSequence>VVIYDGSYHEVDSSEMAFK</PeptideSequence>
|
27
|
+
</Peptide>
|
28
|
+
<Peptide id="Pep3181">
|
29
|
+
<PeptideSequence>RFQIGEVVLEGTGECHPCSR</PeptideSequence>
|
30
|
+
<Modification monoisotopicMassDelta="57.021463735" location="15">
|
31
|
+
<cvParam accession="UNIMOD:4" cvRef="UNIMOD" name="Carbamidomethyl"/>
|
32
|
+
</Modification>
|
33
|
+
<Modification monoisotopicMassDelta="57.021463735" location="18">
|
34
|
+
<cvParam accession="UNIMOD:4" cvRef="UNIMOD" name="Carbamidomethyl"/>
|
35
|
+
</Modification>
|
36
|
+
</Peptide>
|
37
|
+
<Peptide id="Pep3182">
|
38
|
+
<PeptideSequence>FFHWEGRERHEFGFFFR</PeptideSequence>
|
39
|
+
</Peptide>
|
40
|
+
<PeptideEvidence isDecoy="false" post="I" pre="K" end="591" start="573" peptide_ref="Pep1" dBSequence_ref="DBSeq30696" id="PepEv_31268_1_573"/>
|
41
|
+
<PeptideEvidence isDecoy="false" post="M" pre="R" end="151" start="132" peptide_ref="Pep3181" dBSequence_ref="DBSeq658964" id="PepEv_659095_3181_132"/>
|
42
|
+
<PeptideEvidence isDecoy="false" post="V" pre="R" end="115" start="99" peptide_ref="Pep3182" dBSequence_ref="DBSeq183410" id="PepEv_183508_3182_99"/>
|
43
|
+
</SequenceCollection>
|
44
|
+
<AnalysisCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
45
|
+
<SpectrumIdentification spectrumIdentificationList_ref="SI_LIST_1" spectrumIdentificationProtocol_ref="SearchProtocol_1" id="SpecIdent_1">
|
46
|
+
<InputSpectra spectraData_ref="SID_1"/>
|
47
|
+
<SearchDatabaseRef searchDatabase_ref="SearchDB_1"/>
|
48
|
+
</SpectrumIdentification>
|
49
|
+
</AnalysisCollection>
|
50
|
+
<DataCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
|
51
|
+
<Inputs>
|
52
|
+
<SearchDatabase numDatabaseSequences="3085" location="/home/stef/data/PNNL_fastas/041.fa" id="SearchDB_1">
|
53
|
+
<FileFormat>
|
54
|
+
<cvParam accession="MS:1001348" cvRef="PSI-MS" name="FASTA format"/>
|
55
|
+
</FileFormat>
|
56
|
+
<DatabaseName>
|
57
|
+
<userParam name="041.fa"/>
|
58
|
+
</DatabaseName>
|
59
|
+
</SearchDatabase>
|
60
|
+
<SpectraData location="/tmp/org041/DS67179_Acq20060824_LTQ_4_dta.mgf" name="DS67179_Acq20060824_LTQ_4_dta.mgf" id="SID_1">
|
61
|
+
<FileFormat>
|
62
|
+
<cvParam accession="MS:1001062" cvRef="PSI-MS" name="Mascot MGF file"/>
|
63
|
+
</FileFormat>
|
64
|
+
<SpectrumIDFormat>
|
65
|
+
<cvParam accession="MS:1000774" cvRef="PSI-MS" name="multiple peak list nativeID format"/>
|
66
|
+
</SpectrumIDFormat>
|
67
|
+
</SpectraData>
|
68
|
+
</Inputs>
|
69
|
+
<AnalysisData>
|
70
|
+
<SpectrumIdentificationList id="SI_LIST_1">
|
71
|
+
<FragmentationTable>
|
72
|
+
<Measure id="Measure_MZ">
|
73
|
+
<cvParam accession="MS:1001225" cvRef="PSI-MS" unitCvRef="PSI-MS" unitName="m/z" unitAccession="MS:1000040" name="product ion m/z"/>
|
74
|
+
</Measure>
|
75
|
+
</FragmentationTable>
|
76
|
+
<SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=3590" id="SIR_3591">
|
77
|
+
<SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep1" calculatedMassToCharge="1088.498779296875" experimentalMassToCharge="1088.498046875" chargeState="2" id="SII_3591_1">
|
78
|
+
<PeptideEvidenceRef peptideEvidence_ref="PepEv_31268_1_573"/>
|
79
|
+
<cvParam accession="MS:1002049" cvRef="PSI-MS" value="253" name="MS-GF:RawScore"/>
|
80
|
+
<cvParam accession="MS:1002050" cvRef="PSI-MS" value="253" name="MS-GF:DeNovoScore"/>
|
81
|
+
<cvParam accession="MS:1002052" cvRef="PSI-MS" value="1.6364497E-26" name="MS-GF:SpecEValue"/>
|
82
|
+
<cvParam accession="MS:1002053" cvRef="PSI-MS" value="1.5468738E-20" name="MS-GF:EValue"/>
|
83
|
+
<userParam value="0" name="IsotopeError"/>
|
84
|
+
<userParam value="CID" name="AssumedDissociationMethod"/>
|
85
|
+
</SpectrumIdentificationItem>
|
86
|
+
<cvParam accession="MS:1000796" cvRef="PSI-MS" value="spectrum_200_8548" name="spectrum title"/>
|
87
|
+
</SpectrumIdentificationResult>
|
88
|
+
<SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=8577" id="SIR_8578">
|
89
|
+
<SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep3181" calculatedMassToCharge="1166.0521240234375" experimentalMassToCharge="1166.0589599609375" chargeState="2" id="SII_8578_1">
|
90
|
+
<PeptideEvidenceRef peptideEvidence_ref="PepEv_659095_3181_132"/>
|
91
|
+
<cvParam accession="MS:1002049" cvRef="PSI-MS" value="-14" name="MS-GF:RawScore"/>
|
92
|
+
<cvParam accession="MS:1002050" cvRef="PSI-MS" value="94" name="MS-GF:DeNovoScore"/>
|
93
|
+
<cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9070557E-5" name="MS-GF:SpecEValue"/>
|
94
|
+
<cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.935154" name="MS-GF:EValue"/>
|
95
|
+
<userParam value="0" name="IsotopeError"/>
|
96
|
+
<userParam value="CID" name="AssumedDissociationMethod"/>
|
97
|
+
</SpectrumIdentificationItem>
|
98
|
+
<SpectrumIdentificationItem passThreshold="true" rank="2" peptide_ref="Pep3182" calculatedMassToCharge="1166.0533447265625" experimentalMassToCharge="1166.0589599609375" chargeState="2" id="SII_8578_2">
|
99
|
+
<PeptideEvidenceRef peptideEvidence_ref="PepEv_183508_3182_99"/>
|
100
|
+
<cvParam accession="MS:1002049" cvRef="PSI-MS" value="-14" name="MS-GF:RawScore"/>
|
101
|
+
<cvParam accession="MS:1002050" cvRef="PSI-MS" value="94" name="MS-GF:DeNovoScore"/>
|
102
|
+
<cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9070557E-5" name="MS-GF:SpecEValue"/>
|
103
|
+
<cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.924725" name="MS-GF:EValue"/>
|
104
|
+
<userParam value="0" name="IsotopeError"/>
|
105
|
+
<userParam value="CID" name="AssumedDissociationMethod"/>
|
106
|
+
</SpectrumIdentificationItem>
|
107
|
+
<cvParam accession="MS:1000796" cvRef="PSI-MS" value="spectrum_200_14259" name="spectrum title"/>
|
108
|
+
</SpectrumIdentificationResult>
|
109
|
+
</SpectrumIdentificationList>
|
110
|
+
</AnalysisData>
|
111
|
+
</DataCollection>
|
112
|
+
</MzIdentML>
|