protk 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +32 -15
- data/bin/mzid_to_pepxml.rb +75 -0
- data/bin/mzid_to_protxml.rb +77 -0
- data/bin/protxml_to_gff.rb +1 -1
- data/bin/sixframe.rb +24 -5
- data/bin/spectrast_create.rb +125 -0
- data/bin/spectrast_filter.rb +108 -0
- data/lib/protk/command_runner.rb +1 -1
- data/lib/protk/data/template_pep.xml +34 -0
- data/lib/protk/data/template_prot.xml +39 -0
- data/lib/protk/mzidentml_doc.rb +140 -0
- data/lib/protk/mzml_parser.rb +9 -0
- data/lib/protk/peptide.rb +39 -5
- data/lib/protk/pepxml_writer.rb +24 -0
- data/lib/protk/physical_constants.rb +1 -0
- data/lib/protk/protein.rb +64 -1
- data/lib/protk/protein_group.rb +70 -0
- data/lib/protk/protxml_writer.rb +27 -0
- data/lib/protk/psm.rb +222 -0
- data/lib/protk/search_tool.rb +1 -6
- data/lib/protk/sniffer.rb +35 -0
- data/lib/protk/spectrum_query.rb +132 -0
- metadata +20 -2
@@ -0,0 +1 @@
|
|
1
|
+
HYDROGEN_MASS=1.00794
|
data/lib/protk/protein.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'protk/peptide'
|
2
|
+
require 'protk/mzidentml_doc'
|
2
3
|
|
3
4
|
include LibXML
|
4
5
|
|
@@ -14,6 +15,21 @@ class Protein
|
|
14
15
|
attr_accessor :percent_coverage
|
15
16
|
attr_accessor :peptides
|
16
17
|
|
18
|
+
def as_protxml
|
19
|
+
node = XML::Node.new('protein')
|
20
|
+
node['protein_name']=self.protein_name.to_s
|
21
|
+
node['n_indistinguishable_proteins']=self.n_indistinguishable_proteins.to_s
|
22
|
+
node['probability']=self.probability.to_s
|
23
|
+
node['percent_coverage']=self.percent_coverage.to_s
|
24
|
+
node['unique_stripped_peptides']=self.peptides.collect {|p| p.sequence }.join("+")
|
25
|
+
node['total_number_peptides']=self.peptides.length.to_s
|
26
|
+
self.peptides.each do |peptide|
|
27
|
+
node<<peptide.as_protxml
|
28
|
+
end
|
29
|
+
node
|
30
|
+
end
|
31
|
+
|
32
|
+
|
17
33
|
class << self
|
18
34
|
|
19
35
|
# <protein_group group_number="1" probability="1.0000">
|
@@ -46,6 +62,52 @@ class Protein
|
|
46
62
|
prot.peptides = peptide_nodes.collect { |e| Peptide.from_protxml(e) }
|
47
63
|
prot
|
48
64
|
end
|
65
|
+
|
66
|
+
|
67
|
+
# <ProteinAmbiguityGroup id="PAG_0">
|
68
|
+
# <ProteinDetectionHypothesis id="PAG_0_1" dBSequence_ref="JEMP01000193.1_rev_g3500.t1 280755" passThreshold="false">
|
69
|
+
# <PeptideHypothesis peptideEvidence_ref="PepEv_1">
|
70
|
+
# <SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
|
71
|
+
# </PeptideHypothesis>
|
72
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002403" name="group representative"/>
|
73
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002401" name="leading protein"/>
|
74
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001093" name="sequence coverage" value="0.0"/>
|
75
|
+
# </ProteinDetectionHypothesis>
|
76
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002470" name="PeptideShaker protein group score" value="0.0"/>
|
77
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002471" name="PeptideShaker protein group confidence" value="0.0"/>
|
78
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002545" name="PeptideShaker protein confidence type" value="Not Validated"/>
|
79
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002415" name="protein group passes threshold" value="false"/>
|
80
|
+
# </ProteinAmbiguityGroup>
|
81
|
+
|
82
|
+
|
83
|
+
# Note:
|
84
|
+
# This is hacked together to work for a specific PeptideShaker output type
|
85
|
+
# Refactor and properly respect cvParams for real conversion
|
86
|
+
#
|
87
|
+
def from_mzid(xmlnode)
|
88
|
+
|
89
|
+
coverage_cvparam=""
|
90
|
+
prot=new()
|
91
|
+
groupnode = xmlnode.parent
|
92
|
+
|
93
|
+
prot.group_number=groupnode.attributes['id'].split("_").last.to_i+1
|
94
|
+
prot.protein_name=MzIdentMLDoc.get_dbsequence(xmlnode,xmlnode.attributes['dBSequence_ref']).attributes['accession']
|
95
|
+
prot.n_indistinguishable_proteins=MzIdentMLDoc.get_proteins_for_group(groupnode).length
|
96
|
+
prot.group_probability=MzIdentMLDoc.get_cvParam(groupnode,"MS:1002470").attributes['value'].to_f
|
97
|
+
|
98
|
+
coverage_node=MzIdentMLDoc.get_cvParam(xmlnode,"MS:1001093")
|
99
|
+
|
100
|
+
prot.percent_coverage=coverage_node.attributes['value'].to_f if coverage_node
|
101
|
+
prot.probability = MzIdentMLDoc.get_protein_probability(xmlnode)
|
102
|
+
# require 'byebug';byebug
|
103
|
+
|
104
|
+
peptide_nodes=MzIdentMLDoc.get_peptides_for_protein(xmlnode)
|
105
|
+
|
106
|
+
prot.peptides = peptide_nodes.collect { |e| Peptide.from_mzid(e) }
|
107
|
+
prot
|
108
|
+
end
|
109
|
+
|
110
|
+
|
49
111
|
private :new
|
50
112
|
end
|
51
113
|
|
@@ -62,11 +124,12 @@ class Protein
|
|
62
124
|
if best_peptides[seq].nil?
|
63
125
|
best_peptides[seq]=peptide
|
64
126
|
else
|
65
|
-
best_peptides[seq]=peptide if peptide.
|
127
|
+
best_peptides[seq]=peptide if peptide.probability > best_peptides[seq].probability
|
66
128
|
end
|
67
129
|
end
|
68
130
|
|
69
131
|
best_peptides.values
|
70
132
|
end
|
71
133
|
|
134
|
+
|
72
135
|
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
|
2
|
+
require 'protk/peptide'
|
3
|
+
require 'protk/protein'
|
4
|
+
require 'protk/mzidentml_doc'
|
5
|
+
require 'protk/protxml_writer'
|
6
|
+
|
7
|
+
include LibXML
|
8
|
+
|
9
|
+
|
10
|
+
class ProteinGroup
|
11
|
+
|
12
|
+
attr_accessor :group_number
|
13
|
+
attr_accessor :group_probability
|
14
|
+
attr_accessor :proteins
|
15
|
+
|
16
|
+
class << self
|
17
|
+
|
18
|
+
# <ProteinAmbiguityGroup id="PAG_0">
|
19
|
+
# <ProteinDetectionHypothesis id="PAG_0_1" dBSequence_ref="JEMP01000193.1_rev_g3500.t1 280755" passThreshold="false">
|
20
|
+
# <PeptideHypothesis peptideEvidence_ref="PepEv_1">
|
21
|
+
# <SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
|
22
|
+
# </PeptideHypothesis>
|
23
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002403" name="group representative"/>
|
24
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002401" name="leading protein"/>
|
25
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001093" name="sequence coverage" value="0.0"/>
|
26
|
+
# </ProteinDetectionHypothesis>
|
27
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002470" name="PeptideShaker protein group score" value="0.0"/>
|
28
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002471" name="PeptideShaker protein group confidence" value="0.0"/>
|
29
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002545" name="PeptideShaker protein confidence type" value="Not Validated"/>
|
30
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002415" name="protein group passes threshold" value="false"/>
|
31
|
+
# </ProteinAmbiguityGroup>
|
32
|
+
|
33
|
+
|
34
|
+
# Note:
|
35
|
+
# This is hacked together to work for a specific PeptideShaker output type
|
36
|
+
# Refactor and properly respect cvParams for real conversion
|
37
|
+
#
|
38
|
+
def from_mzid(groupnode)
|
39
|
+
|
40
|
+
group=new()
|
41
|
+
|
42
|
+
group.group_number=groupnode.attributes['id'].split("_").last.to_i+1
|
43
|
+
group.group_probability=MzIdentMLDoc.get_cvParam(groupnode,"MS:1002470").attributes['value'].to_f
|
44
|
+
|
45
|
+
# require 'byebug';byebug
|
46
|
+
|
47
|
+
protein_nodes=MzIdentMLDoc.get_proteins_for_group(groupnode)
|
48
|
+
|
49
|
+
group.proteins = protein_nodes.collect { |e| Protein.from_mzid(e) }
|
50
|
+
group
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
private :new
|
55
|
+
end
|
56
|
+
|
57
|
+
def initialize()
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def as_protxml()
|
62
|
+
node = XML::Node.new('protein_group')
|
63
|
+
node["group_number"] = self.group_number.to_s
|
64
|
+
node["group_probability"] = self.group_probability.to_s
|
65
|
+
self.proteins.each { |prot| node << prot.as_protxml }
|
66
|
+
node
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
include LibXML
|
2
|
+
|
3
|
+
class ProtXMLWriter < Object
|
4
|
+
|
5
|
+
PROTXML_NS_PREFIX="protxml"
|
6
|
+
PROTXML_NS="http://regis-web.systemsbiology.net/protXML"
|
7
|
+
|
8
|
+
attr :template_doc
|
9
|
+
attr :protein_summary_node
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
template_path="#{File.dirname(__FILE__)}/data/template_prot.xml"
|
13
|
+
template_parser=XML::Parser.file(template_path)
|
14
|
+
@template_doc=template_parser.parse
|
15
|
+
@protein_summary_node=@template_doc.root
|
16
|
+
end
|
17
|
+
|
18
|
+
def append_protein_group(pg_node)
|
19
|
+
# require 'byebug';byebug
|
20
|
+
@protein_summary_node << pg_node
|
21
|
+
end
|
22
|
+
|
23
|
+
def save(file_path)
|
24
|
+
@template_doc.save(file_path,:indent=>true,:encoding => XML::Encoding::UTF_8)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
data/lib/protk/psm.rb
ADDED
@@ -0,0 +1,222 @@
|
|
1
|
+
|
2
|
+
require 'protk/mzidentml_doc'
|
3
|
+
require 'libxml'
|
4
|
+
|
5
|
+
include LibXML
|
6
|
+
|
7
|
+
|
8
|
+
class String
|
9
|
+
def to_bool
|
10
|
+
return true if self == true || self =~ (/^(true|t|yes|y|1)$/i)
|
11
|
+
return false if self == false || self =~ (/^(false|f|no|n|0)$/i)
|
12
|
+
raise ArgumentError.new("invalid value for Boolean: \"#{self}\"")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class PeptideEvidence
|
17
|
+
attr_accessor :peptide_prev_aa
|
18
|
+
attr_accessor :peptide_next_aa
|
19
|
+
attr_accessor :protein
|
20
|
+
attr_accessor :protein_descr
|
21
|
+
# attr_accessor :peptide_sequence
|
22
|
+
attr_accessor :is_decoy
|
23
|
+
|
24
|
+
# <PeptideEvidence isDecoy="false" pre="K" post="G" start="712"
|
25
|
+
# end="722" peptide_ref="KSPVYKVHFTR"
|
26
|
+
# dBSequence_ref="JEMP01000193.1_rev_g3500.t1" id="PepEv_1" />
|
27
|
+
class << self
|
28
|
+
|
29
|
+
def from_mzid(pe_node)
|
30
|
+
pe = new()
|
31
|
+
pe.peptide_prev_aa=pe_node.attributes['pre']
|
32
|
+
pe.peptide_next_aa=pe_node.attributes['post']
|
33
|
+
pe.is_decoy=pe_node.attributes['isDecoy'].to_bool
|
34
|
+
|
35
|
+
# peptide_ref = pe_node.attributes['peptide_ref']
|
36
|
+
prot_ref = pe_node.attributes['dBSequence_ref']
|
37
|
+
# pep_node = MzIdentMLDoc.find(pe_node,"Peptide[@id=\'#{peptide_ref}\']",true)[0]
|
38
|
+
prot_node = MzIdentMLDoc.find(pe_node,"DBSequence[@id=\'#{prot_ref}\']",true)[0]
|
39
|
+
|
40
|
+
|
41
|
+
# <DBSequence id="JEMP01000193.1_rev_g3500.t1"
|
42
|
+
# accession="JEMP01000193.1_rev_g3500.t1"
|
43
|
+
# searchDatabase_ref="SearchDB_1">
|
44
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001088"
|
45
|
+
# name="protein description" value="280755|283436" />
|
46
|
+
# </DBSequence>
|
47
|
+
pe.protein=prot_node.attributes['accession']
|
48
|
+
pe.protein_descr=MzIdentMLDoc.get_cvParam(prot_node,"MS:1001088")['value']
|
49
|
+
|
50
|
+
|
51
|
+
# pe.peptide_sequence=pep_node
|
52
|
+
|
53
|
+
pe
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
private :new
|
58
|
+
end
|
59
|
+
|
60
|
+
def initialize()
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
# <alternative_protein protein="lcl|JEMP01000005.1_rev_g4624.t1"
|
65
|
+
# protein_descr="652491|654142" num_tol_term="2" peptide_prev_aa="K" peptide_next_aa="Y"/>
|
66
|
+
# We use this only for alternative_proteins
|
67
|
+
# The first peptide_evidence item is baked into the attributes of a spectrum_query
|
68
|
+
def as_pepxml()
|
69
|
+
alt_node = XML::Node.new('alternative_protein')
|
70
|
+
alt_node['protein']=self.protein
|
71
|
+
alt_node['protein_descr']=self.protein_descr
|
72
|
+
alt_node['peptide_prev_aa']=self.peptide_prev_aa
|
73
|
+
alt_node['peptide_next_aa']=self.peptide_next_aa
|
74
|
+
|
75
|
+
|
76
|
+
alt_node
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
# <spectrum_query spectrum="mr176-BSA100fmole_BA3_01_8167.00003.00003.2" start_scan="3" end_scan="3"
|
82
|
+
#precursor_neutral_mass="1398.7082" assumed_charge="2" index="2" experiment_label="mr176">
|
83
|
+
# <search_result>
|
84
|
+
# <search_hit hit_rank="1" peptide="SQVFQLESTFDV" peptide_prev_aa="R" peptide_next_aa="K" protein="tr|Q90853|Q90853_CHICK"
|
85
|
+
# protein_descr="Homeobox protein OS=Gallus gallus GN=GH6 PE=2 SV=1" num_tot_proteins="1"
|
86
|
+
# num_matched_ions="9" tot_num_ions="22" calc_neutral_pep_mass="1380.6557" massdiff="18.053" num_tol_term="1"
|
87
|
+
# num_missed_cleavages="0" is_rejected="0">
|
88
|
+
# <search_score name="hyperscore" value="23.9"/>
|
89
|
+
# <search_score name="nextscore" value="19.3"/>
|
90
|
+
# <search_score name="bscore" value="9.6"/>
|
91
|
+
# <search_score name="yscore" value="7.6"/>
|
92
|
+
# <search_score name="cscore" value="0"/>
|
93
|
+
# <search_score name="zscore" value="0"/>
|
94
|
+
# <search_score name="ascore" value="0"/>
|
95
|
+
# <search_score name="xscore" value="0"/>
|
96
|
+
# <search_score name="expect" value="0.099"/>
|
97
|
+
# <analysis_result analysis="peptideprophet">
|
98
|
+
# <peptideprophet_result probability="0.9997" all_ntt_prob="(0.0000,0.9997,0.9999)">
|
99
|
+
# <search_score_summary>
|
100
|
+
# <parameter name="fval" value="2.3571"/>
|
101
|
+
# <parameter name="ntt" value="1"/>
|
102
|
+
# <parameter name="nmc" value="0"/>
|
103
|
+
# <parameter name="massd" value="18.053"/>
|
104
|
+
# </search_score_summary>
|
105
|
+
# </peptideprophet_result>
|
106
|
+
# </analysis_result>
|
107
|
+
# </search_hit>
|
108
|
+
# </search_result>
|
109
|
+
# </spectrum_query>
|
110
|
+
|
111
|
+
class PSM
|
112
|
+
|
113
|
+
|
114
|
+
attr_accessor :peptide
|
115
|
+
attr_accessor :calculated_mz
|
116
|
+
attr_accessor :experimental_mz
|
117
|
+
attr_accessor :charge
|
118
|
+
|
119
|
+
attr_accessor :scores
|
120
|
+
attr_accessor :peptide_evidence
|
121
|
+
|
122
|
+
class << self
|
123
|
+
|
124
|
+
# <SpectrumIdentificationResult spectraData_ref="ma201_Vp_1-10.mzML.mgf"
|
125
|
+
# spectrumID="index=3152" id="SIR_1">
|
126
|
+
# <SpectrumIdentificationItem passThreshold="false"
|
127
|
+
# rank="1" peptide_ref="KSPVYKVHFTR"
|
128
|
+
# calculatedMassToCharge="1360.7615466836999"
|
129
|
+
# experimentalMassToCharge="1362.805053710938"
|
130
|
+
# chargeState="1" id="SII_1_1">
|
131
|
+
# <PeptideEvidenceRef peptideEvidence_ref="PepEv_1" />
|
132
|
+
# <Fragmentation>
|
133
|
+
# <IonType charge="1" index="1 4">
|
134
|
+
# <FragmentArray measure_ref="Measure_MZ"
|
135
|
+
# values="175.2081208 560.3388993" />
|
136
|
+
# <FragmentArray measure_ref="Measure_Int"
|
137
|
+
# values="94.0459823608 116.2766723633" />
|
138
|
+
# <FragmentArray measure_ref="Measure_Error"
|
139
|
+
# values="0.08916864948798775 0.0449421494880653" />
|
140
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001220"
|
141
|
+
# name="frag: y ion" />
|
142
|
+
# </IonType>
|
143
|
+
# </Fragmentation>
|
144
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002466"
|
145
|
+
# name="PeptideShaker PSM score" value="0.0" />
|
146
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002467"
|
147
|
+
# name="PeptideShaker PSM confidence" value="0.0" />
|
148
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002052"
|
149
|
+
# name="MS-GF:SpecEValue" value="1.4757611E-6" />
|
150
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001117"
|
151
|
+
# name="theoretical mass" value="1360.7615466836999" />
|
152
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002543"
|
153
|
+
# name="PeptideShaker PSM confidence type"
|
154
|
+
# value="Not Validated" />
|
155
|
+
# </SpectrumIdentificationItem>
|
156
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1000796"
|
157
|
+
# name="spectrum title"
|
158
|
+
# value="Suresh Vp 1 to 10_BAF.3535.3535.1" />
|
159
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1000894"
|
160
|
+
# name="retention time" value="6855.00001" unitCvRef="UO"
|
161
|
+
# unitAccession="UO:0000010" unitName="seconds" />
|
162
|
+
# </SpectrumIdentificationResult>
|
163
|
+
|
164
|
+
|
165
|
+
|
166
|
+
def from_mzid(psm_node)
|
167
|
+
psm = new()
|
168
|
+
psm.peptide = MzIdentMLDoc.get_sequence_for_psm(psm_node)
|
169
|
+
peptide_evidence_nodes = MzIdentMLDoc.get_peptide_evidence_from_psm(psm_node)
|
170
|
+
psm.peptide_evidence = peptide_evidence_nodes.collect { |pe| PeptideEvidence.from_mzid(pe) }
|
171
|
+
|
172
|
+
psm.calculated_mz = psm_node.attributes['calculatedMassToCharge'].to_f
|
173
|
+
psm.experimental_mz = psm_node.attributes['experimentalMassToCharge'].to_f
|
174
|
+
psm.charge = psm_node.attributes['chargeState'].to_i
|
175
|
+
|
176
|
+
psm
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
private :new
|
181
|
+
end
|
182
|
+
|
183
|
+
def initialize()
|
184
|
+
|
185
|
+
end
|
186
|
+
|
187
|
+
# <search_hit hit_rank="1" peptide="GGYNQDGGSGGGYQGGGGYSGGGGGYQGGQR"
|
188
|
+
# peptide_prev_aa="R" peptide_next_aa="N"
|
189
|
+
# protein="lcl|JEMP01000008.1_fwd_g5144.t1"
|
190
|
+
# num_tot_proteins="1"
|
191
|
+
# calc_neutral_pep_mass="2768.11967665812"
|
192
|
+
# massdiff="0.120361328125"
|
193
|
+
# protein_descr="4860|5785"
|
194
|
+
# num_tol_term="2"
|
195
|
+
# num_missed_cleavages="0">
|
196
|
+
|
197
|
+
# From what I can tell, search_hit is always trivially wrapped in search_result 1:1
|
198
|
+
#
|
199
|
+
def as_pepxml()
|
200
|
+
hit_node = XML::Node.new('search_hit')
|
201
|
+
hit_node['peptide']=self.peptide.to_s
|
202
|
+
|
203
|
+
# require 'byebug';byebug
|
204
|
+
first_evidence = self.peptide_evidence.first
|
205
|
+
|
206
|
+
hit_node['peptide_prev_aa']=first_evidence.peptide_prev_aa
|
207
|
+
hit_node['peptide_next_aa']=first_evidence.peptide_next_aa
|
208
|
+
hit_node['protein']=first_evidence.protein
|
209
|
+
hit_node['protein_descr']=first_evidence.protein_descr
|
210
|
+
|
211
|
+
hit_node['num_tot_proteins']=self.peptide_evidence.length.to_s
|
212
|
+
|
213
|
+
alt_evidence = peptide_evidence.drop(1)
|
214
|
+
alt_evidence.each { |ae| hit_node << ae.as_pepxml }
|
215
|
+
|
216
|
+
result_node = XML::Node.new('search_result')
|
217
|
+
result_node << hit_node
|
218
|
+
result_node
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
end
|
data/lib/protk/search_tool.rb
CHANGED
@@ -5,8 +5,7 @@
|
|
5
5
|
# Provides common functionality used by all msms search tools.
|
6
6
|
#
|
7
7
|
# It allows;
|
8
|
-
# 1.
|
9
|
-
# 2. Output files to be specified via a prefix or suffix to be added to the name of the corresponding input file
|
8
|
+
# 1. Output files to be specified via a prefix or suffix to be added to the name of the corresponding input file
|
10
9
|
#
|
11
10
|
|
12
11
|
require 'optparse'
|
@@ -21,10 +20,6 @@ class SearchTool < Tool
|
|
21
20
|
def initialize(option_support=[])
|
22
21
|
super(option_support)
|
23
22
|
|
24
|
-
# if (option_support.include? :database)
|
25
|
-
# add_value_option(:database,"sphuman",['-d', '--database dbname', 'Specify the database to use for this search. Can be a named protk database or the path to a fasta file'])
|
26
|
-
# end
|
27
|
-
|
28
23
|
if ( option_support.include? :enzyme )
|
29
24
|
add_value_option(:enzyme,"Trypsin",['--enzyme enz', 'Enzyme'])
|
30
25
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
|
2
|
+
class Sniffer
|
3
|
+
|
4
|
+
@sniff_lines = 100
|
5
|
+
|
6
|
+
# Return nil if undetectable
|
7
|
+
# Return detected format otherwise
|
8
|
+
def self.sniff_format(filepath)
|
9
|
+
if self.is_mgf_format(filepath)
|
10
|
+
return "mgf"
|
11
|
+
elsif self.is_mzml_format(filepath)
|
12
|
+
return "mzML"
|
13
|
+
end
|
14
|
+
return nil
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
def self.is_mzml_format(filepath)
|
19
|
+
lines = File.foreach(filepath).first(@sniff_lines).join("\n")
|
20
|
+
if lines =~ /\<mzML.*http\:\/\/psi\.hupo\.org\/ms\/mzml/
|
21
|
+
return true
|
22
|
+
end
|
23
|
+
return false
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.is_mgf_format(filepath)
|
27
|
+
lines = File.foreach(filepath).first(@sniff_lines).join("\n")
|
28
|
+
if lines =~ /^BEGIN IONS/
|
29
|
+
return true
|
30
|
+
end
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
end
|