protk 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +32 -15
- data/bin/mzid_to_pepxml.rb +75 -0
- data/bin/mzid_to_protxml.rb +77 -0
- data/bin/protxml_to_gff.rb +1 -1
- data/bin/sixframe.rb +24 -5
- data/bin/spectrast_create.rb +125 -0
- data/bin/spectrast_filter.rb +108 -0
- data/lib/protk/command_runner.rb +1 -1
- data/lib/protk/data/template_pep.xml +34 -0
- data/lib/protk/data/template_prot.xml +39 -0
- data/lib/protk/mzidentml_doc.rb +140 -0
- data/lib/protk/mzml_parser.rb +9 -0
- data/lib/protk/peptide.rb +39 -5
- data/lib/protk/pepxml_writer.rb +24 -0
- data/lib/protk/physical_constants.rb +1 -0
- data/lib/protk/protein.rb +64 -1
- data/lib/protk/protein_group.rb +70 -0
- data/lib/protk/protxml_writer.rb +27 -0
- data/lib/protk/psm.rb +222 -0
- data/lib/protk/search_tool.rb +1 -6
- data/lib/protk/sniffer.rb +35 -0
- data/lib/protk/spectrum_query.rb +132 -0
- metadata +20 -2
@@ -0,0 +1,132 @@
|
|
1
|
+
|
2
|
+
require 'protk/mzidentml_doc'
|
3
|
+
require 'protk/psm'
|
4
|
+
require 'protk/physical_constants'
|
5
|
+
|
6
|
+
include LibXML
|
7
|
+
|
8
|
+
|
9
|
+
# <spectrum_query spectrum="mr176-BSA100fmole_BA3_01_8167.00003.00003.2" start_scan="3" end_scan="3"
|
10
|
+
#precursor_neutral_mass="1398.7082" assumed_charge="2" index="2" experiment_label="mr176">
|
11
|
+
# <search_result>
|
12
|
+
# <search_hit hit_rank="1" peptide="SQVFQLESTFDV" peptide_prev_aa="R" peptide_next_aa="K" protein="tr|Q90853|Q90853_CHICK" protein_descr="Homeobox protein OS=Gallus gallus GN=GH6 PE=2 SV=1" num_tot_proteins="1" num_matched_ions="9" tot_num_ions="22" calc_neutral_pep_mass="1380.6557" massdiff="18.053" num_tol_term="1" num_missed_cleavages="0" is_rejected="0">
|
13
|
+
# <search_score name="hyperscore" value="23.9"/>
|
14
|
+
# <search_score name="nextscore" value="19.3"/>
|
15
|
+
# <search_score name="bscore" value="9.6"/>
|
16
|
+
# <search_score name="yscore" value="7.6"/>
|
17
|
+
# <search_score name="cscore" value="0"/>
|
18
|
+
# <search_score name="zscore" value="0"/>
|
19
|
+
# <search_score name="ascore" value="0"/>
|
20
|
+
# <search_score name="xscore" value="0"/>
|
21
|
+
# <search_score name="expect" value="0.099"/>
|
22
|
+
# <analysis_result analysis="peptideprophet">
|
23
|
+
# <peptideprophet_result probability="0.9997" all_ntt_prob="(0.0000,0.9997,0.9999)">
|
24
|
+
# <search_score_summary>
|
25
|
+
# <parameter name="fval" value="2.3571"/>
|
26
|
+
# <parameter name="ntt" value="1"/>
|
27
|
+
# <parameter name="nmc" value="0"/>
|
28
|
+
# <parameter name="massd" value="18.053"/>
|
29
|
+
# </search_score_summary>
|
30
|
+
# </peptideprophet_result>
|
31
|
+
# </analysis_result>
|
32
|
+
# </search_hit>
|
33
|
+
# </search_result>
|
34
|
+
# </spectrum_query>
|
35
|
+
|
36
|
+
class SpectrumQuery
|
37
|
+
|
38
|
+
|
39
|
+
attr_accessor :spectrum_title
|
40
|
+
attr_accessor :retention_time
|
41
|
+
# attr_accessor :precursor_neutral_mass
|
42
|
+
# attr_accessor :assumed_charge
|
43
|
+
|
44
|
+
# attr_accessor :index
|
45
|
+
attr_accessor :psms
|
46
|
+
|
47
|
+
class << self
|
48
|
+
|
49
|
+
# <SpectrumIdentificationResult spectraData_ref="ma201_Vp_1-10.mzML.mgf"
|
50
|
+
# spectrumID="index=3152" id="SIR_1">
|
51
|
+
# <SpectrumIdentificationItem passThreshold="false"
|
52
|
+
# rank="1" peptide_ref="KSPVYKVHFTR"
|
53
|
+
# calculatedMassToCharge="1360.7615466836999"
|
54
|
+
# experimentalMassToCharge="1362.805053710938"
|
55
|
+
# chargeState="1" id="SII_1_1">
|
56
|
+
# <PeptideEvidenceRef peptideEvidence_ref="PepEv_1" />
|
57
|
+
# <Fragmentation>
|
58
|
+
# <IonType charge="1" index="1 4">
|
59
|
+
# <FragmentArray measure_ref="Measure_MZ"
|
60
|
+
# values="175.2081208 560.3388993" />
|
61
|
+
# <FragmentArray measure_ref="Measure_Int"
|
62
|
+
# values="94.0459823608 116.2766723633" />
|
63
|
+
# <FragmentArray measure_ref="Measure_Error"
|
64
|
+
# values="0.08916864948798775 0.0449421494880653" />
|
65
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001220"
|
66
|
+
# name="frag: y ion" />
|
67
|
+
# </IonType>
|
68
|
+
# </Fragmentation>
|
69
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002466"
|
70
|
+
# name="PeptideShaker PSM score" value="0.0" />
|
71
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002467"
|
72
|
+
# name="PeptideShaker PSM confidence" value="0.0" />
|
73
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002052"
|
74
|
+
# name="MS-GF:SpecEValue" value="1.4757611E-6" />
|
75
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001117"
|
76
|
+
# name="theoretical mass" value="1360.7615466836999" />
|
77
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002543"
|
78
|
+
# name="PeptideShaker PSM confidence type"
|
79
|
+
# value="Not Validated" />
|
80
|
+
# </SpectrumIdentificationItem>
|
81
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1000796"
|
82
|
+
# name="spectrum title"
|
83
|
+
# value="Suresh Vp 1 to 10_BAF.3535.3535.1" />
|
84
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1000894"
|
85
|
+
# name="retention time" value="6855.00001" unitCvRef="UO"
|
86
|
+
# unitAccession="UO:0000010" unitName="seconds" />
|
87
|
+
# </SpectrumIdentificationResult>
|
88
|
+
|
89
|
+
def from_mzid(query_node)
|
90
|
+
query = new()
|
91
|
+
query.spectrum_title = MzIdentMLDoc.get_cvParam(query_node,"MS:1000796")['value'].to_s
|
92
|
+
query.retention_time = MzIdentMLDoc.get_cvParam(query_node,"MS:1000894")['value'].to_f
|
93
|
+
items = MzIdentMLDoc.find(query_node,"SpectrumIdentificationItem")
|
94
|
+
query.psms = items.collect { |item| PSM.from_mzid(item) }
|
95
|
+
query
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
private :new
|
100
|
+
end
|
101
|
+
|
102
|
+
def initialize()
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
# <spectrum_query spectrum="SureshVp1to10_BAF.00833.00833.1" start_scan="833" end_scan="833"
|
107
|
+
# precursor_neutral_mass="1214.5937" assumed_charge="1" index="3222">
|
108
|
+
# <search_result>
|
109
|
+
|
110
|
+
def as_pepxml()
|
111
|
+
node = XML::Node.new('spectrum_query')
|
112
|
+
node['spectrum']=self.spectrum_title
|
113
|
+
node['retention_time_sec']=self.retention_time.to_s
|
114
|
+
|
115
|
+
|
116
|
+
# Use the first psm to populate spectrum level values
|
117
|
+
first_psm=self.psms.first
|
118
|
+
|
119
|
+
c=first_psm.charge
|
120
|
+
|
121
|
+
node['precursor_neutral_mass']=(first_psm.experimental_mz*c-c*HYDROGEN_MASS).to_s
|
122
|
+
node['assumed_charge']=c.to_s
|
123
|
+
|
124
|
+
|
125
|
+
self.psms.each do |psm|
|
126
|
+
node << psm.as_pepxml
|
127
|
+
end
|
128
|
+
node
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: protk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ira Cooke
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: open4
|
@@ -206,6 +206,10 @@ executables:
|
|
206
206
|
- protxml_to_table.rb
|
207
207
|
- swissprot_to_table.rb
|
208
208
|
- protxml_to_psql.rb
|
209
|
+
- mzid_to_protxml.rb
|
210
|
+
- mzid_to_pepxml.rb
|
211
|
+
- spectrast_create.rb
|
212
|
+
- spectrast_filter.rb
|
209
213
|
extensions:
|
210
214
|
- ext/decoymaker/extconf.rb
|
211
215
|
extra_rdoc_files: []
|
@@ -219,6 +223,8 @@ files:
|
|
219
223
|
- bin/mascot_search.rb
|
220
224
|
- bin/mascot_to_pepxml.rb
|
221
225
|
- bin/msgfplus_search.rb
|
226
|
+
- bin/mzid_to_pepxml.rb
|
227
|
+
- bin/mzid_to_protxml.rb
|
222
228
|
- bin/omssa_search.rb
|
223
229
|
- bin/peptide_prophet.rb
|
224
230
|
- bin/pepxml_to_table.rb
|
@@ -229,6 +235,8 @@ files:
|
|
229
235
|
- bin/protxml_to_table.rb
|
230
236
|
- bin/repair_run_summary.rb
|
231
237
|
- bin/sixframe.rb
|
238
|
+
- bin/spectrast_create.rb
|
239
|
+
- bin/spectrast_filter.rb
|
232
240
|
- bin/swissprot_to_table.rb
|
233
241
|
- bin/tandem_search.rb
|
234
242
|
- bin/tandem_to_pepxml.rb
|
@@ -261,6 +269,8 @@ files:
|
|
261
269
|
- lib/protk/data/tandem_isb_native_defaults.xml
|
262
270
|
- lib/protk/data/tandem_params.xml
|
263
271
|
- lib/protk/data/taxonomy_template.xml
|
272
|
+
- lib/protk/data/template_pep.xml
|
273
|
+
- lib/protk/data/template_prot.xml
|
264
274
|
- lib/protk/data/unimod.xml
|
265
275
|
- lib/protk/data/uniprot_accessions.loc
|
266
276
|
- lib/protk/data/uniprot_accessions_table.txt
|
@@ -274,20 +284,28 @@ files:
|
|
274
284
|
- lib/protk/manage_db_rakefile.rake
|
275
285
|
- lib/protk/manage_db_tool.rb
|
276
286
|
- lib/protk/mascot_util.rb
|
287
|
+
- lib/protk/mzidentml_doc.rb
|
277
288
|
- lib/protk/mzml_parser.rb
|
278
289
|
- lib/protk/omssa_util.rb
|
279
290
|
- lib/protk/openms_defaults.rb
|
280
291
|
- lib/protk/peptide.rb
|
281
292
|
- lib/protk/pepxml.rb
|
293
|
+
- lib/protk/pepxml_writer.rb
|
294
|
+
- lib/protk/physical_constants.rb
|
282
295
|
- lib/protk/plasmodb.rb
|
283
296
|
- lib/protk/prophet_tool.rb
|
284
297
|
- lib/protk/protein.rb
|
298
|
+
- lib/protk/protein_group.rb
|
285
299
|
- lib/protk/protein_to_genome_mapper.rb
|
286
300
|
- lib/protk/protxml_to_gff_tool.rb
|
301
|
+
- lib/protk/protxml_writer.rb
|
302
|
+
- lib/protk/psm.rb
|
287
303
|
- lib/protk/randomize.rb
|
288
304
|
- lib/protk/search_tool.rb
|
289
305
|
- lib/protk/setup_rakefile.rake
|
290
306
|
- lib/protk/setup_tool.rb
|
307
|
+
- lib/protk/sniffer.rb
|
308
|
+
- lib/protk/spectrum_query.rb
|
291
309
|
- lib/protk/swissprot_database.rb
|
292
310
|
- lib/protk/tandem_search_tool.rb
|
293
311
|
- lib/protk/tool.rb
|