mzid 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,179 @@
1
+ require 'nokogiri'
2
+ require 'progressbar'
3
+ require 'mzid/base_parser'
4
+ require 'mzid/streaming_parser'
5
+ require 'csv'
6
+
7
+ module MzID
8
+ #
9
+ # class to parse an mzIdentML file in a streaming (i.e., mem-efficient) manner
10
+ # not using any XML parsing library, only exploiting the structure of mzIdentML files
11
+ #
12
+ class StreamingParserLines < StreamingParser
13
+
14
+ def initialize(file, sp_thresh = 10.0**-10, use_pbar = nil, tda_flag = true)
15
+ @num_spec = 0
16
+ @tda_flag = tda_flag
17
+ #
18
+ @pep_ev_h_protID = Hash.new
19
+ @pep_ev_h_startPos = Hash.new
20
+ @pep_ev_h_endPos = Hash.new
21
+ @pep_ev_h_dbseqRef = Hash.new
22
+ super(file, use_pbar)
23
+ end
24
+ #
25
+ # get a protein ID from a PeptideEvidenceID
26
+ #
27
+ def get_prot_id(pep_ev_id)
28
+ #dbref = @pep_ev_h_dbseqRef[pep_ev_id]
29
+ dbref = @pep_ev_h[pep_ev_id].get_db_seq_ref
30
+ prot_id = @db_seq_h[dbref]
31
+ prot_id
32
+ end
33
+ #
34
+ #
35
+ #
36
+ def get_pep_start(pep_ev_id) @pep_ev_h[pep_ev_id].get_start_pos end
37
+ def get_pep_end(pep_ev_id) @pep_ev_h[pep_ev_id].get_end_pos end
38
+ def get_is_decoy(pep_ev_id) @pep_ev_h[pep_ev_id].get_is_decoy end
39
+ #attr_accessor :pep_ev_h_dbseqRef
40
+
41
+ #
42
+ # store peptide sequences in hash for lookup
43
+ #
44
+ def cache_ids(use_pbar = @use_pbar)
45
+ num_pep, num_db_seq, num_pep_ev = get_num_elements(nil)
46
+
47
+ @pep_h = Hash.new
48
+ @mod_h = Hash.new
49
+ pbar1 = ProgressBar.new("peptides", num_pep/2) if use_pbar
50
+ reader = Nokogiri::XML::Reader(File.open(@mzid_file))
51
+ reader.each do |node|
52
+ # parse Peptide items
53
+ if node.name == "Peptide" then
54
+ # parse local peptide entry
55
+ tmp_node = Nokogiri::XML.parse(node.outer_xml)
56
+ tmp_node.remove_namespaces!
57
+ root = tmp_node.root
58
+ pep_id = root["id"].to_sym
59
+ # skip if already handled PepID
60
+ next if @pep_h.has_key?(pep_id)
61
+ # parse sequence/mods if haven't seen it yet
62
+ pep_seq = get_peptide_sequence(root)
63
+ mod_line = get_modifications(root)
64
+ @pep_h[pep_id] = pep_seq
65
+ @mod_h[pep_id] = mod_line
66
+ pbar1.inc if use_pbar
67
+ end
68
+ end
69
+ pbar1.finish if use_pbar
70
+ # now parse DBSequence items
71
+ dbseq_re = Regexp.new(/^\s*<DBSequence\s/)
72
+ pbar2 = ProgressBar.new("db_seq", num_db_seq) if use_pbar
73
+ IO.foreach(@mzid_file) do |line|
74
+ next if !dbseq_re.match(line)
75
+
76
+ prot_id = line.match(/accession=\"([\w|\|]+)/)[1]
77
+ db_id = line.match(/id=\"(\w+)/)[1]
78
+
79
+ @db_seq_h[db_id.to_sym] = prot_id.to_sym
80
+ pbar2.inc if use_pbar
81
+ end
82
+ pbar2.finish if use_pbar
83
+ # now parse PeptideEvidence items
84
+ pepev_re = Regexp.new(/^\s*<PeptideEvidence\s/)
85
+ pbar3 = ProgressBar.new("pep_ev", num_pep_ev) if use_pbar
86
+ IO.foreach(@mzid_file) do |line|
87
+ next if !pepev_re.match(line)
88
+
89
+ db_id = line.match(/dBSequence_ref=\"(\w+)/)[1]
90
+ start_pos = line.match(/start=\"(\d+)/)[1].to_i
91
+ end_pos = line.match(/end=\"(\d+)/)[1].to_i
92
+ pep_ev = line.match(/id=\"(\w+)/)[1]
93
+ is_decoy = line.match(/isDecoy=\"(\w+)\"/)[1]
94
+ # @pep_ev_h_dbseqRef[pep_ev.to_sym] = db_id.to_sym
95
+ @pep_ev_h[pep_ev.to_sym] = PeptideEvidence.new(:db_seq_ref => db_id.to_sym,
96
+ :start_pos => start_pos,
97
+ :end_pos => end_pos,
98
+ :is_decoy => is_decoy)
99
+ pbar3.inc if use_pbar
100
+ end
101
+ pbar3.finish if use_pbar
102
+ end
103
+ #
104
+ # iterate through each psm by identifying them parsing the file
105
+ # one line at a time - faster than using XML parser
106
+ #
107
+ def each_psm(use_pbar=@use_pbar)
108
+ num_lines = `wc -l #{@mzid_file}`.to_i if use_pbar
109
+ curr_psm = nil
110
+ pbar = ProgressBar.new("PSMs", num_lines) if use_pbar
111
+ specid_item_re = Regexp.new(/^\s+<SpectrumIdentificationItem\s/)
112
+ pepevref_re = Regexp.new(/^\s+<PeptideEvidenceRef\s/)
113
+ specprob_re = Regexp.new(/name=\"MS-GF:SpecEValue\"\/>$/)
114
+ specid_item_end_re = Regexp.new(/^\s+<\/SpectrumIdentificationItem>\s*$/)
115
+ IO.foreach(@mzid_file) do |line|
116
+ pbar.inc if use_pbar
117
+ # skip line if not one pertaiing to spectrum ID item
118
+ next if !specid_item_re.match(line) &&
119
+ !pepevref_re.match(line) &&
120
+ !specprob_re.match(line) &&
121
+ !specid_item_end_re.match(line)
122
+ # beginning of spectrum ID item
123
+ if specid_item_re.match(line) then
124
+ spec_id_id = line.match(/id=\"(\w+)/)[1]
125
+ spec_num = spec_id_id.split("_")[1].to_i
126
+ pep_ref = line.match(/peptide_ref=\"(\w+)/)[1]
127
+ # get peptide
128
+ pep_seq = @pep_h[pep_ref.to_sym]
129
+ mods = @mod_h[pep_ref.to_sym]
130
+ curr_psm = PSM.new(:spec_num => spec_num, :pep => pep_seq, :mods => mods)
131
+ elsif pepevref_re.match(line) then
132
+ pep_ev = line.match(/peptideEvidence_ref=\"(\w+)/)[1]
133
+ curr_psm.add_pep_ev(pep_ev.to_sym) if curr_psm
134
+ elsif specprob_re.match(line) then
135
+ sprob = line.match(/value=\"([\d|\w|\.|-]+)\"/)[1]
136
+ curr_psm.set_spec_prob(sprob.to_f) if curr_psm
137
+ elsif specid_item_end_re.match(line) then
138
+ yield curr_psm
139
+ curr_psm = nil # kill current PSM object
140
+ end
141
+ end
142
+ pbar.finish if use_pbar
143
+ end
144
+ #
145
+ # load PSMs into memory, and go back to perform lookup for prot ids
146
+ #
147
+ def write_to_csv(outfile="result.csv", use_pbar=@use_pbar)
148
+ CSV.open(outfile, "w", {:col_sep => "\t"}) do |csv|
149
+ headerAry = ["#spec_num", "peptide", "spec_prob", "decoy", "prot_ids", "start", "end", "num_prot"]
150
+ headerAry.delete("decoy") if !@tda_flag
151
+ csv << headerAry
152
+
153
+ # each PSM
154
+ self.each_psm do |psm|
155
+ pep_seq = psm.get_pep
156
+ spec_num = psm.get_spec_num
157
+ sp_prob = psm.get_spec_prob
158
+ pass_thresh = psm.get_pass_threshold
159
+ pep_ev_ref_lst = psm.get_pep_ev
160
+ # number of proteins with matching peptide
161
+ num_prot = pep_ev_ref_lst.size
162
+ # for each PeptideEvidence, write a different line
163
+ pep_ev_ref_lst.each do |pepev|
164
+ prot_id = self.get_prot_id(pepev)
165
+ start_pos = self.get_pep_start(pepev)
166
+ end_pos = self.get_pep_end(pepev)
167
+ is_decoy = self.get_is_decoy(pepev)
168
+ ary = [spec_num, pep_seq, sp_prob, is_decoy, prot_id, start_pos, end_pos, num_prot]
169
+ ary.delete_at(3) if !@tda_flag
170
+ csv << ary
171
+ end
172
+ end
173
+ end
174
+ end
175
+
176
+
177
+ end
178
+
179
+ end
@@ -0,0 +1,3 @@
1
+ module MzID
2
+ VERSION='0.0.1'
3
+ end
@@ -0,0 +1,71 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <MzIdentML id="MS-GF+" version="1.1.0" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd" creationDate="2014-09-20T12:27:24" >
3
+ <cvList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
4
+ <cv id="PSI-MS" uri="http://psidev.cvs.sourceforge.net/viewvc/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" version="3.30.0" fullName="PSI-MS"/>
5
+ <cv id="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" fullName="UNIMOD"/>
6
+ <cv id="UO" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" fullName="UNIT-ONTOLOGY"/>
7
+ </cvList>
8
+ <AnalysisSoftwareList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
9
+ <AnalysisSoftware version="Beta (v9979)" name="MS-GF+" id="ID_software">
10
+ <SoftwareName>
11
+ <cvParam accession="MS:1002048" cvRef="PSI-MS" name="MS-GF+"/>
12
+ </SoftwareName>
13
+ </AnalysisSoftware>
14
+ </AnalysisSoftwareList>
15
+ <SequenceCollection>
16
+ <DBSequence accession="sp|Q9RXK5|EFG_DEIRA" searchDatabase_ref="SearchDB_1" length="698" id="DBSeq30696">
17
+ <cvParam accession="MS:1001088" cvRef="PSI-MS" value="sp|Q9RXK5|EFG_DEIRA Elongation factor G OS=Deinococcus radiodurans GN=fusA PE=3 SV=1" name="protein description"/>
18
+ </DBSequence>
19
+ <Peptide id="Pep1">
20
+ <PeptideSequence>VVIYDGSYHEVDSSEMAFK</PeptideSequence>
21
+ </Peptide>
22
+ <PeptideEvidence isDecoy="false" post="I" pre="K" end="591" start="573" peptide_ref="Pep1" dBSequence_ref="DBSeq30696" id="PepEv_31268_1_573"/>
23
+ </SequenceCollection>
24
+ <AnalysisCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
25
+ <SpectrumIdentification spectrumIdentificationList_ref="SI_LIST_1" spectrumIdentificationProtocol_ref="SearchProtocol_1" id="SpecIdent_1">
26
+ <InputSpectra spectraData_ref="SID_1"/>
27
+ <SearchDatabaseRef searchDatabase_ref="SearchDB_1"/>
28
+ </SpectrumIdentification>
29
+ </AnalysisCollection>
30
+ <DataCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
31
+ <Inputs>
32
+ <SearchDatabase numDatabaseSequences="3085" location="/home/stef/data/PNNL_fastas/041.fa" id="SearchDB_1">
33
+ <FileFormat>
34
+ <cvParam accession="MS:1001348" cvRef="PSI-MS" name="FASTA format"/>
35
+ </FileFormat>
36
+ <DatabaseName>
37
+ <userParam name="041.fa"/>
38
+ </DatabaseName>
39
+ </SearchDatabase>
40
+ <SpectraData location="/tmp/org041/DS67179_Acq20060824_LTQ_4_dta.mgf" name="DS67179_Acq20060824_LTQ_4_dta.mgf" id="SID_1">
41
+ <FileFormat>
42
+ <cvParam accession="MS:1001062" cvRef="PSI-MS" name="Mascot MGF file"/>
43
+ </FileFormat>
44
+ <SpectrumIDFormat>
45
+ <cvParam accession="MS:1000774" cvRef="PSI-MS" name="multiple peak list nativeID format"/>
46
+ </SpectrumIDFormat>
47
+ </SpectraData>
48
+ </Inputs>
49
+ <AnalysisData>
50
+ <SpectrumIdentificationList id="SI_LIST_1">
51
+ <FragmentationTable>
52
+ <Measure id="Measure_MZ">
53
+ <cvParam accession="MS:1001225" cvRef="PSI-MS" unitCvRef="PSI-MS" unitName="m/z" unitAccession="MS:1000040" name="product ion m/z"/>
54
+ </Measure>
55
+ </FragmentationTable>
56
+ <SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=3590" id="SIR_3591">
57
+ <SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep1" calculatedMassToCharge="1088.498779296875" experimentalMassToCharge="1088.498046875" chargeState="2" id="SII_3591_1">
58
+ <PeptideEvidenceRef peptideEvidence_ref="PepEv_31268_1_573"/>
59
+ <cvParam accession="MS:1002049" cvRef="PSI-MS" value="253" name="MS-GF:RawScore"/>
60
+ <cvParam accession="MS:1002050" cvRef="PSI-MS" value="253" name="MS-GF:DeNovoScore"/>
61
+ <cvParam accession="MS:1002052" cvRef="PSI-MS" value="1.6364497E-26" name="MS-GF:SpecEValue"/>
62
+ <cvParam accession="MS:1002053" cvRef="PSI-MS" value="1.5468738E-20" name="MS-GF:EValue"/>
63
+ <userParam value="0" name="IsotopeError"/>
64
+ <userParam value="CID" name="AssumedDissociationMethod"/>
65
+ </SpectrumIdentificationItem>
66
+ <cvParam accession="MS:1000796" cvRef="PSI-MS" value="spectrum_200_8548" name="spectrum title"/>
67
+ </SpectrumIdentificationResult>
68
+ </SpectrumIdentificationList>
69
+ </AnalysisData>
70
+ </DataCollection>
71
+ </MzIdentML>
@@ -0,0 +1,118 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <MzIdentML id="MS-GF+" version="1.1.0" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd" creationDate="2014-09-20T12:27:24" >
3
+ <cvList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
4
+ <cv id="PSI-MS" uri="http://psidev.cvs.sourceforge.net/viewvc/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" version="3.30.0" fullName="PSI-MS"/>
5
+ <cv id="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" fullName="UNIMOD"/>
6
+ <cv id="UO" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" fullName="UNIT-ONTOLOGY"/>
7
+ </cvList>
8
+ <AnalysisSoftwareList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
9
+ <AnalysisSoftware version="Beta (v9979)" name="MS-GF+" id="ID_software">
10
+ <SoftwareName>
11
+ <cvParam accession="MS:1002048" cvRef="PSI-MS" name="MS-GF+"/>
12
+ </SoftwareName>
13
+ </AnalysisSoftware>
14
+ </AnalysisSoftwareList>
15
+ <SequenceCollection>
16
+ <Peptide id="Pep1">
17
+ <PeptideSequence>VVIYDGSYHEVDSSEMAFK</PeptideSequence>
18
+ </Peptide>
19
+ <Peptide id="Pep3183">
20
+ <PeptideSequence>PPEGIGGKQVAARLAEMGQR</PeptideSequence>
21
+ </Peptide>
22
+ <Peptide id="Pep3184">
23
+ <PeptideSequence>GVTVLLTTHDLGDVERLAR</PeptideSequence>
24
+ </Peptide>
25
+ <Peptide id="Pep3185">
26
+ <PeptideSequence>TLPPDAPSRHRLVHALER</PeptideSequence>
27
+ </Peptide>
28
+ <Peptide id="Pep3186">
29
+ <PeptideSequence>PRREARPRFPWELAQR</PeptideSequence>
30
+ </Peptide>
31
+ </SequenceCollection>
32
+ <AnalysisCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
33
+ <SpectrumIdentification spectrumIdentificationList_ref="SI_LIST_1" spectrumIdentificationProtocol_ref="SearchProtocol_1" id="SpecIdent_1">
34
+ <InputSpectra spectraData_ref="SID_1"/>
35
+ <SearchDatabaseRef searchDatabase_ref="SearchDB_1"/>
36
+ </SpectrumIdentification>
37
+ </AnalysisCollection>
38
+ <DataCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
39
+ <Inputs>
40
+ <SearchDatabase numDatabaseSequences="3085" location="/home/stef/data/PNNL_fastas/041.fa" id="SearchDB_1">
41
+ <FileFormat>
42
+ <cvParam accession="MS:1001348" cvRef="PSI-MS" name="FASTA format"/>
43
+ </FileFormat>
44
+ <DatabaseName>
45
+ <userParam name="041.fa"/>
46
+ </DatabaseName>
47
+ </SearchDatabase>
48
+ <SpectraData location="/tmp/org041/DS67179_Acq20060824_LTQ_4_dta.mgf" name="DS67179_Acq20060824_LTQ_4_dta.mgf" id="SID_1">
49
+ <FileFormat>
50
+ <cvParam accession="MS:1001062" cvRef="PSI-MS" name="Mascot MGF file"/>
51
+ </FileFormat>
52
+ <SpectrumIDFormat>
53
+ <cvParam accession="MS:1000774" cvRef="PSI-MS" name="multiple peak list nativeID format"/>
54
+ </SpectrumIDFormat>
55
+ </SpectraData>
56
+ </Inputs>
57
+ <AnalysisData>
58
+ <SpectrumIdentificationList id="SI_LIST_1">
59
+ <FragmentationTable>
60
+ <Measure id="Measure_MZ">
61
+ <cvParam accession="MS:1001225" cvRef="PSI-MS" unitCvRef="PSI-MS" unitName="m/z" unitAccession="MS:1000040" name="product ion m/z"/>
62
+ </Measure>
63
+ </FragmentationTable>
64
+ <SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=3590" id="SIR_3591">
65
+ <SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep1" calculatedMassToCharge="1088.498779296875" experimentalMassToCharge="1088.498046875" chargeState="2" id="SII_3591_1">
66
+ <PeptideEvidenceRef peptideEvidence_ref="PepEv_31268_1_573"/>
67
+ <cvParam accession="MS:1002049" cvRef="PSI-MS" value="253" name="MS-GF:RawScore"/>
68
+ <cvParam accession="MS:1002050" cvRef="PSI-MS" value="253" name="MS-GF:DeNovoScore"/>
69
+ <cvParam accession="MS:1002052" cvRef="PSI-MS" value="1.6364497E-26" name="MS-GF:SpecEValue"/>
70
+ <cvParam accession="MS:1002053" cvRef="PSI-MS" value="1.5468738E-20" name="MS-GF:EValue"/>
71
+ <userParam value="0" name="IsotopeError"/>
72
+ <userParam value="CID" name="AssumedDissociationMethod"/>
73
+ </SpectrumIdentificationItem>
74
+ <cvParam accession="MS:1000796" cvRef="PSI-MS" value="spectrum_200_8548" name="spectrum title"/>
75
+ </SpectrumIdentificationResult>
76
+ <SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=6064" id="SIR_6065">
77
+ <SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep3183" calculatedMassToCharge="1033.05224609375" experimentalMassToCharge="1033.0670166015625" chargeState="2" id="SII_6065_1">
78
+ <PeptideEvidenceRef peptideEvidence_ref="PepEv_478892_3183_314"/>
79
+ <cvParam accession="MS:1002049" cvRef="PSI-MS" value="-56" name="MS-GF:RawScore"/>
80
+ <cvParam accession="MS:1002050" cvRef="PSI-MS" value="71" name="MS-GF:DeNovoScore"/>
81
+ <cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9093196E-5" name="MS-GF:SpecEValue"/>
82
+ <cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.95656" name="MS-GF:EValue"/>
83
+ <userParam value="0" name="IsotopeError"/>
84
+ <userParam value="CID" name="AssumedDissociationMethod"/>
85
+ </SpectrumIdentificationItem>
86
+ <SpectrumIdentificationItem passThreshold="true" rank="2" peptide_ref="Pep3184" calculatedMassToCharge="1033.07373046875" experimentalMassToCharge="1033.0670166015625" chargeState="2" id="SII_6065_2">
87
+ <PeptideEvidenceRef peptideEvidence_ref="PepEv_674036_3184_229"/>
88
+ <cvParam accession="MS:1002049" cvRef="PSI-MS" value="-56" name="MS-GF:RawScore"/>
89
+ <cvParam accession="MS:1002050" cvRef="PSI-MS" value="71" name="MS-GF:DeNovoScore"/>
90
+ <cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9093196E-5" name="MS-GF:SpecEValue"/>
91
+ <cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.953312" name="MS-GF:EValue"/>
92
+ <userParam value="0" name="IsotopeError"/>
93
+ <userParam value="CID" name="AssumedDissociationMethod"/>
94
+ </SpectrumIdentificationItem>
95
+ <SpectrumIdentificationItem passThreshold="true" rank="3" peptide_ref="Pep3185" calculatedMassToCharge="1033.0743408203125" experimentalMassToCharge="1033.0670166015625" chargeState="2" id="SII_6065_3">
96
+ <PeptideEvidenceRef peptideEvidence_ref="PepEv_611354_3185_2"/>
97
+ <cvParam accession="MS:1002049" cvRef="PSI-MS" value="-56" name="MS-GF:RawScore"/>
98
+ <cvParam accession="MS:1002050" cvRef="PSI-MS" value="71" name="MS-GF:DeNovoScore"/>
99
+ <cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9093196E-5" name="MS-GF:SpecEValue"/>
100
+ <cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.949913" name="MS-GF:EValue"/>
101
+ <userParam value="0" name="IsotopeError"/>
102
+ <userParam value="CID" name="AssumedDissociationMethod"/>
103
+ </SpectrumIdentificationItem>
104
+ <SpectrumIdentificationItem passThreshold="true" rank="4" peptide_ref="Pep3186" calculatedMassToCharge="1033.0694580078125" experimentalMassToCharge="1033.0670166015625" chargeState="2" id="SII_6065_4">
105
+ <PeptideEvidenceRef peptideEvidence_ref="PepEv_548405_3186_110"/>
106
+ <cvParam accession="MS:1002049" cvRef="PSI-MS" value="-56" name="MS-GF:RawScore"/>
107
+ <cvParam accession="MS:1002050" cvRef="PSI-MS" value="71" name="MS-GF:DeNovoScore"/>
108
+ <cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9093196E-5" name="MS-GF:SpecEValue"/>
109
+ <cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.942093" name="MS-GF:EValue"/>
110
+ <userParam value="0" name="IsotopeError"/>
111
+ <userParam value="CID" name="AssumedDissociationMethod"/>
112
+ </SpectrumIdentificationItem>
113
+ <cvParam accession="MS:1000796" cvRef="PSI-MS" value="spectrum_200_11332" name="spectrum title"/>
114
+ </SpectrumIdentificationResult>
115
+ </SpectrumIdentificationList>
116
+ </AnalysisData>
117
+ </DataCollection>
118
+ </MzIdentML>
@@ -0,0 +1,112 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <MzIdentML id="MS-GF+" version="1.1.0" xmlns="http://psidev.info/psi/pi/mzIdentML/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://psidev.info/psi/pi/mzIdentML/1.1 http://www.psidev.info/files/mzIdentML1.1.0.xsd" creationDate="2014-09-20T12:27:24" >
3
+ <cvList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
4
+ <cv id="PSI-MS" uri="http://psidev.cvs.sourceforge.net/viewvc/*checkout*/psidev/psi/psi-ms/mzML/controlledVocabulary/psi-ms.obo" version="3.30.0" fullName="PSI-MS"/>
5
+ <cv id="UNIMOD" uri="http://www.unimod.org/obo/unimod.obo" fullName="UNIMOD"/>
6
+ <cv id="UO" uri="http://obo.cvs.sourceforge.net/*checkout*/obo/obo/ontology/phenotype/unit.obo" fullName="UNIT-ONTOLOGY"/>
7
+ </cvList>
8
+ <AnalysisSoftwareList xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
9
+ <AnalysisSoftware version="Beta (v9979)" name="MS-GF+" id="ID_software">
10
+ <SoftwareName>
11
+ <cvParam accession="MS:1002048" cvRef="PSI-MS" name="MS-GF+"/>
12
+ </SoftwareName>
13
+ </AnalysisSoftware>
14
+ </AnalysisSoftwareList>
15
+ <SequenceCollection>
16
+ <DBSequence accession="sp|Q9RXK5|EFG_DEIRA" searchDatabase_ref="SearchDB_1" length="698" id="DBSeq30696">
17
+ <cvParam accession="MS:1001088" cvRef="PSI-MS" value="sp|Q9RXK5|EFG_DEIRA Elongation factor G OS=Deinococcus radiodurans GN=fusA PE=3 SV=1" name="protein description"/>
18
+ </DBSequence>
19
+ <DBSequence accession="tr|Q9RXN7|Q9RXN7_DEIRA" searchDatabase_ref="SearchDB_1" length="193" id="DBSeq658964">
20
+ <cvParam accession="MS:1001088" cvRef="PSI-MS" value="tr|Q9RXN7|Q9RXN7_DEIRA Putative uncharacterized protein OS=Deinococcus radiodurans GN=DR_0273 PE=4 SV=1" name="protein description"/>
21
+ </DBSequence>
22
+ <DBSequence accession="tr|Q9RS55|Q9RS55_DEIRA" searchDatabase_ref="SearchDB_1" length="172" id="DBSeq183410">
23
+ <cvParam accession="MS:1001088" cvRef="PSI-MS" value="tr|Q9RS55|Q9RS55_DEIRA MutT/nudix family protein OS=Deinococcus radiodurans GN=DR_2272 PE=4 SV=1" name="protein description"/>
24
+ </DBSequence>
25
+ <Peptide id="Pep1">
26
+ <PeptideSequence>VVIYDGSYHEVDSSEMAFK</PeptideSequence>
27
+ </Peptide>
28
+ <Peptide id="Pep3181">
29
+ <PeptideSequence>RFQIGEVVLEGTGECHPCSR</PeptideSequence>
30
+ <Modification monoisotopicMassDelta="57.021463735" location="15">
31
+ <cvParam accession="UNIMOD:4" cvRef="UNIMOD" name="Carbamidomethyl"/>
32
+ </Modification>
33
+ <Modification monoisotopicMassDelta="57.021463735" location="18">
34
+ <cvParam accession="UNIMOD:4" cvRef="UNIMOD" name="Carbamidomethyl"/>
35
+ </Modification>
36
+ </Peptide>
37
+ <Peptide id="Pep3182">
38
+ <PeptideSequence>FFHWEGRERHEFGFFFR</PeptideSequence>
39
+ </Peptide>
40
+ <PeptideEvidence isDecoy="false" post="I" pre="K" end="591" start="573" peptide_ref="Pep1" dBSequence_ref="DBSeq30696" id="PepEv_31268_1_573"/>
41
+ <PeptideEvidence isDecoy="false" post="M" pre="R" end="151" start="132" peptide_ref="Pep3181" dBSequence_ref="DBSeq658964" id="PepEv_659095_3181_132"/>
42
+ <PeptideEvidence isDecoy="false" post="V" pre="R" end="115" start="99" peptide_ref="Pep3182" dBSequence_ref="DBSeq183410" id="PepEv_183508_3182_99"/>
43
+ </SequenceCollection>
44
+ <AnalysisCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
45
+ <SpectrumIdentification spectrumIdentificationList_ref="SI_LIST_1" spectrumIdentificationProtocol_ref="SearchProtocol_1" id="SpecIdent_1">
46
+ <InputSpectra spectraData_ref="SID_1"/>
47
+ <SearchDatabaseRef searchDatabase_ref="SearchDB_1"/>
48
+ </SpectrumIdentification>
49
+ </AnalysisCollection>
50
+ <DataCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
51
+ <Inputs>
52
+ <SearchDatabase numDatabaseSequences="3085" location="/home/stef/data/PNNL_fastas/041.fa" id="SearchDB_1">
53
+ <FileFormat>
54
+ <cvParam accession="MS:1001348" cvRef="PSI-MS" name="FASTA format"/>
55
+ </FileFormat>
56
+ <DatabaseName>
57
+ <userParam name="041.fa"/>
58
+ </DatabaseName>
59
+ </SearchDatabase>
60
+ <SpectraData location="/tmp/org041/DS67179_Acq20060824_LTQ_4_dta.mgf" name="DS67179_Acq20060824_LTQ_4_dta.mgf" id="SID_1">
61
+ <FileFormat>
62
+ <cvParam accession="MS:1001062" cvRef="PSI-MS" name="Mascot MGF file"/>
63
+ </FileFormat>
64
+ <SpectrumIDFormat>
65
+ <cvParam accession="MS:1000774" cvRef="PSI-MS" name="multiple peak list nativeID format"/>
66
+ </SpectrumIDFormat>
67
+ </SpectraData>
68
+ </Inputs>
69
+ <AnalysisData>
70
+ <SpectrumIdentificationList id="SI_LIST_1">
71
+ <FragmentationTable>
72
+ <Measure id="Measure_MZ">
73
+ <cvParam accession="MS:1001225" cvRef="PSI-MS" unitCvRef="PSI-MS" unitName="m/z" unitAccession="MS:1000040" name="product ion m/z"/>
74
+ </Measure>
75
+ </FragmentationTable>
76
+ <SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=3590" id="SIR_3591">
77
+ <SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep1" calculatedMassToCharge="1088.498779296875" experimentalMassToCharge="1088.498046875" chargeState="2" id="SII_3591_1">
78
+ <PeptideEvidenceRef peptideEvidence_ref="PepEv_31268_1_573"/>
79
+ <cvParam accession="MS:1002049" cvRef="PSI-MS" value="253" name="MS-GF:RawScore"/>
80
+ <cvParam accession="MS:1002050" cvRef="PSI-MS" value="253" name="MS-GF:DeNovoScore"/>
81
+ <cvParam accession="MS:1002052" cvRef="PSI-MS" value="1.6364497E-26" name="MS-GF:SpecEValue"/>
82
+ <cvParam accession="MS:1002053" cvRef="PSI-MS" value="1.5468738E-20" name="MS-GF:EValue"/>
83
+ <userParam value="0" name="IsotopeError"/>
84
+ <userParam value="CID" name="AssumedDissociationMethod"/>
85
+ </SpectrumIdentificationItem>
86
+ <cvParam accession="MS:1000796" cvRef="PSI-MS" value="spectrum_200_8548" name="spectrum title"/>
87
+ </SpectrumIdentificationResult>
88
+ <SpectrumIdentificationResult spectraData_ref="SID_1" spectrumID="index=8577" id="SIR_8578">
89
+ <SpectrumIdentificationItem passThreshold="true" rank="1" peptide_ref="Pep3181" calculatedMassToCharge="1166.0521240234375" experimentalMassToCharge="1166.0589599609375" chargeState="2" id="SII_8578_1">
90
+ <PeptideEvidenceRef peptideEvidence_ref="PepEv_659095_3181_132"/>
91
+ <cvParam accession="MS:1002049" cvRef="PSI-MS" value="-14" name="MS-GF:RawScore"/>
92
+ <cvParam accession="MS:1002050" cvRef="PSI-MS" value="94" name="MS-GF:DeNovoScore"/>
93
+ <cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9070557E-5" name="MS-GF:SpecEValue"/>
94
+ <cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.935154" name="MS-GF:EValue"/>
95
+ <userParam value="0" name="IsotopeError"/>
96
+ <userParam value="CID" name="AssumedDissociationMethod"/>
97
+ </SpectrumIdentificationItem>
98
+ <SpectrumIdentificationItem passThreshold="true" rank="2" peptide_ref="Pep3182" calculatedMassToCharge="1166.0533447265625" experimentalMassToCharge="1166.0589599609375" chargeState="2" id="SII_8578_2">
99
+ <PeptideEvidenceRef peptideEvidence_ref="PepEv_183508_3182_99"/>
100
+ <cvParam accession="MS:1002049" cvRef="PSI-MS" value="-14" name="MS-GF:RawScore"/>
101
+ <cvParam accession="MS:1002050" cvRef="PSI-MS" value="94" name="MS-GF:DeNovoScore"/>
102
+ <cvParam accession="MS:1002052" cvRef="PSI-MS" value="3.9070557E-5" name="MS-GF:SpecEValue"/>
103
+ <cvParam accession="MS:1002053" cvRef="PSI-MS" value="36.924725" name="MS-GF:EValue"/>
104
+ <userParam value="0" name="IsotopeError"/>
105
+ <userParam value="CID" name="AssumedDissociationMethod"/>
106
+ </SpectrumIdentificationItem>
107
+ <cvParam accession="MS:1000796" cvRef="PSI-MS" value="spectrum_200_14259" name="spectrum title"/>
108
+ </SpectrumIdentificationResult>
109
+ </SpectrumIdentificationList>
110
+ </AnalysisData>
111
+ </DataCollection>
112
+ </MzIdentML>