protk 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +32 -15
- data/bin/mzid_to_pepxml.rb +75 -0
- data/bin/mzid_to_protxml.rb +77 -0
- data/bin/protxml_to_gff.rb +1 -1
- data/bin/sixframe.rb +24 -5
- data/bin/spectrast_create.rb +125 -0
- data/bin/spectrast_filter.rb +108 -0
- data/lib/protk/command_runner.rb +1 -1
- data/lib/protk/data/template_pep.xml +34 -0
- data/lib/protk/data/template_prot.xml +39 -0
- data/lib/protk/mzidentml_doc.rb +140 -0
- data/lib/protk/mzml_parser.rb +9 -0
- data/lib/protk/peptide.rb +39 -5
- data/lib/protk/pepxml_writer.rb +24 -0
- data/lib/protk/physical_constants.rb +1 -0
- data/lib/protk/protein.rb +64 -1
- data/lib/protk/protein_group.rb +70 -0
- data/lib/protk/protxml_writer.rb +27 -0
- data/lib/protk/psm.rb +222 -0
- data/lib/protk/search_tool.rb +1 -6
- data/lib/protk/sniffer.rb +35 -0
- data/lib/protk/spectrum_query.rb +132 -0
- metadata +20 -2
@@ -0,0 +1 @@
|
|
1
|
+
HYDROGEN_MASS=1.00794
|
data/lib/protk/protein.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'protk/peptide'
|
2
|
+
require 'protk/mzidentml_doc'
|
2
3
|
|
3
4
|
include LibXML
|
4
5
|
|
@@ -14,6 +15,21 @@ class Protein
|
|
14
15
|
attr_accessor :percent_coverage
|
15
16
|
attr_accessor :peptides
|
16
17
|
|
18
|
+
def as_protxml
|
19
|
+
node = XML::Node.new('protein')
|
20
|
+
node['protein_name']=self.protein_name.to_s
|
21
|
+
node['n_indistinguishable_proteins']=self.n_indistinguishable_proteins.to_s
|
22
|
+
node['probability']=self.probability.to_s
|
23
|
+
node['percent_coverage']=self.percent_coverage.to_s
|
24
|
+
node['unique_stripped_peptides']=self.peptides.collect {|p| p.sequence }.join("+")
|
25
|
+
node['total_number_peptides']=self.peptides.length.to_s
|
26
|
+
self.peptides.each do |peptide|
|
27
|
+
node<<peptide.as_protxml
|
28
|
+
end
|
29
|
+
node
|
30
|
+
end
|
31
|
+
|
32
|
+
|
17
33
|
class << self
|
18
34
|
|
19
35
|
# <protein_group group_number="1" probability="1.0000">
|
@@ -46,6 +62,52 @@ class Protein
|
|
46
62
|
prot.peptides = peptide_nodes.collect { |e| Peptide.from_protxml(e) }
|
47
63
|
prot
|
48
64
|
end
|
65
|
+
|
66
|
+
|
67
|
+
# <ProteinAmbiguityGroup id="PAG_0">
|
68
|
+
# <ProteinDetectionHypothesis id="PAG_0_1" dBSequence_ref="JEMP01000193.1_rev_g3500.t1 280755" passThreshold="false">
|
69
|
+
# <PeptideHypothesis peptideEvidence_ref="PepEv_1">
|
70
|
+
# <SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
|
71
|
+
# </PeptideHypothesis>
|
72
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002403" name="group representative"/>
|
73
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002401" name="leading protein"/>
|
74
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001093" name="sequence coverage" value="0.0"/>
|
75
|
+
# </ProteinDetectionHypothesis>
|
76
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002470" name="PeptideShaker protein group score" value="0.0"/>
|
77
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002471" name="PeptideShaker protein group confidence" value="0.0"/>
|
78
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002545" name="PeptideShaker protein confidence type" value="Not Validated"/>
|
79
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002415" name="protein group passes threshold" value="false"/>
|
80
|
+
# </ProteinAmbiguityGroup>
|
81
|
+
|
82
|
+
|
83
|
+
# Note:
|
84
|
+
# This is hacked together to work for a specific PeptideShaker output type
|
85
|
+
# Refactor and properly respect cvParams for real conversion
|
86
|
+
#
|
87
|
+
def from_mzid(xmlnode)
|
88
|
+
|
89
|
+
coverage_cvparam=""
|
90
|
+
prot=new()
|
91
|
+
groupnode = xmlnode.parent
|
92
|
+
|
93
|
+
prot.group_number=groupnode.attributes['id'].split("_").last.to_i+1
|
94
|
+
prot.protein_name=MzIdentMLDoc.get_dbsequence(xmlnode,xmlnode.attributes['dBSequence_ref']).attributes['accession']
|
95
|
+
prot.n_indistinguishable_proteins=MzIdentMLDoc.get_proteins_for_group(groupnode).length
|
96
|
+
prot.group_probability=MzIdentMLDoc.get_cvParam(groupnode,"MS:1002470").attributes['value'].to_f
|
97
|
+
|
98
|
+
coverage_node=MzIdentMLDoc.get_cvParam(xmlnode,"MS:1001093")
|
99
|
+
|
100
|
+
prot.percent_coverage=coverage_node.attributes['value'].to_f if coverage_node
|
101
|
+
prot.probability = MzIdentMLDoc.get_protein_probability(xmlnode)
|
102
|
+
# require 'byebug';byebug
|
103
|
+
|
104
|
+
peptide_nodes=MzIdentMLDoc.get_peptides_for_protein(xmlnode)
|
105
|
+
|
106
|
+
prot.peptides = peptide_nodes.collect { |e| Peptide.from_mzid(e) }
|
107
|
+
prot
|
108
|
+
end
|
109
|
+
|
110
|
+
|
49
111
|
private :new
|
50
112
|
end
|
51
113
|
|
@@ -62,11 +124,12 @@ class Protein
|
|
62
124
|
if best_peptides[seq].nil?
|
63
125
|
best_peptides[seq]=peptide
|
64
126
|
else
|
65
|
-
best_peptides[seq]=peptide if peptide.
|
127
|
+
best_peptides[seq]=peptide if peptide.probability > best_peptides[seq].probability
|
66
128
|
end
|
67
129
|
end
|
68
130
|
|
69
131
|
best_peptides.values
|
70
132
|
end
|
71
133
|
|
134
|
+
|
72
135
|
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
|
2
|
+
require 'protk/peptide'
|
3
|
+
require 'protk/protein'
|
4
|
+
require 'protk/mzidentml_doc'
|
5
|
+
require 'protk/protxml_writer'
|
6
|
+
|
7
|
+
include LibXML
|
8
|
+
|
9
|
+
|
10
|
+
class ProteinGroup
|
11
|
+
|
12
|
+
attr_accessor :group_number
|
13
|
+
attr_accessor :group_probability
|
14
|
+
attr_accessor :proteins
|
15
|
+
|
16
|
+
class << self
|
17
|
+
|
18
|
+
# <ProteinAmbiguityGroup id="PAG_0">
|
19
|
+
# <ProteinDetectionHypothesis id="PAG_0_1" dBSequence_ref="JEMP01000193.1_rev_g3500.t1 280755" passThreshold="false">
|
20
|
+
# <PeptideHypothesis peptideEvidence_ref="PepEv_1">
|
21
|
+
# <SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
|
22
|
+
# </PeptideHypothesis>
|
23
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002403" name="group representative"/>
|
24
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002401" name="leading protein"/>
|
25
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001093" name="sequence coverage" value="0.0"/>
|
26
|
+
# </ProteinDetectionHypothesis>
|
27
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002470" name="PeptideShaker protein group score" value="0.0"/>
|
28
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002471" name="PeptideShaker protein group confidence" value="0.0"/>
|
29
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002545" name="PeptideShaker protein confidence type" value="Not Validated"/>
|
30
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002415" name="protein group passes threshold" value="false"/>
|
31
|
+
# </ProteinAmbiguityGroup>
|
32
|
+
|
33
|
+
|
34
|
+
# Note:
|
35
|
+
# This is hacked together to work for a specific PeptideShaker output type
|
36
|
+
# Refactor and properly respect cvParams for real conversion
|
37
|
+
#
|
38
|
+
def from_mzid(groupnode)
|
39
|
+
|
40
|
+
group=new()
|
41
|
+
|
42
|
+
group.group_number=groupnode.attributes['id'].split("_").last.to_i+1
|
43
|
+
group.group_probability=MzIdentMLDoc.get_cvParam(groupnode,"MS:1002470").attributes['value'].to_f
|
44
|
+
|
45
|
+
# require 'byebug';byebug
|
46
|
+
|
47
|
+
protein_nodes=MzIdentMLDoc.get_proteins_for_group(groupnode)
|
48
|
+
|
49
|
+
group.proteins = protein_nodes.collect { |e| Protein.from_mzid(e) }
|
50
|
+
group
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
private :new
|
55
|
+
end
|
56
|
+
|
57
|
+
def initialize()
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def as_protxml()
|
62
|
+
node = XML::Node.new('protein_group')
|
63
|
+
node["group_number"] = self.group_number.to_s
|
64
|
+
node["group_probability"] = self.group_probability.to_s
|
65
|
+
self.proteins.each { |prot| node << prot.as_protxml }
|
66
|
+
node
|
67
|
+
end
|
68
|
+
|
69
|
+
|
70
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
include LibXML
|
2
|
+
|
3
|
+
class ProtXMLWriter < Object
|
4
|
+
|
5
|
+
PROTXML_NS_PREFIX="protxml"
|
6
|
+
PROTXML_NS="http://regis-web.systemsbiology.net/protXML"
|
7
|
+
|
8
|
+
attr :template_doc
|
9
|
+
attr :protein_summary_node
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
template_path="#{File.dirname(__FILE__)}/data/template_prot.xml"
|
13
|
+
template_parser=XML::Parser.file(template_path)
|
14
|
+
@template_doc=template_parser.parse
|
15
|
+
@protein_summary_node=@template_doc.root
|
16
|
+
end
|
17
|
+
|
18
|
+
def append_protein_group(pg_node)
|
19
|
+
# require 'byebug';byebug
|
20
|
+
@protein_summary_node << pg_node
|
21
|
+
end
|
22
|
+
|
23
|
+
def save(file_path)
|
24
|
+
@template_doc.save(file_path,:indent=>true,:encoding => XML::Encoding::UTF_8)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
data/lib/protk/psm.rb
ADDED
@@ -0,0 +1,222 @@
|
|
1
|
+
|
2
|
+
require 'protk/mzidentml_doc'
|
3
|
+
require 'libxml'
|
4
|
+
|
5
|
+
include LibXML
|
6
|
+
|
7
|
+
|
8
|
+
class String
|
9
|
+
def to_bool
|
10
|
+
return true if self == true || self =~ (/^(true|t|yes|y|1)$/i)
|
11
|
+
return false if self == false || self =~ (/^(false|f|no|n|0)$/i)
|
12
|
+
raise ArgumentError.new("invalid value for Boolean: \"#{self}\"")
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class PeptideEvidence
|
17
|
+
attr_accessor :peptide_prev_aa
|
18
|
+
attr_accessor :peptide_next_aa
|
19
|
+
attr_accessor :protein
|
20
|
+
attr_accessor :protein_descr
|
21
|
+
# attr_accessor :peptide_sequence
|
22
|
+
attr_accessor :is_decoy
|
23
|
+
|
24
|
+
# <PeptideEvidence isDecoy="false" pre="K" post="G" start="712"
|
25
|
+
# end="722" peptide_ref="KSPVYKVHFTR"
|
26
|
+
# dBSequence_ref="JEMP01000193.1_rev_g3500.t1" id="PepEv_1" />
|
27
|
+
class << self
|
28
|
+
|
29
|
+
def from_mzid(pe_node)
|
30
|
+
pe = new()
|
31
|
+
pe.peptide_prev_aa=pe_node.attributes['pre']
|
32
|
+
pe.peptide_next_aa=pe_node.attributes['post']
|
33
|
+
pe.is_decoy=pe_node.attributes['isDecoy'].to_bool
|
34
|
+
|
35
|
+
# peptide_ref = pe_node.attributes['peptide_ref']
|
36
|
+
prot_ref = pe_node.attributes['dBSequence_ref']
|
37
|
+
# pep_node = MzIdentMLDoc.find(pe_node,"Peptide[@id=\'#{peptide_ref}\']",true)[0]
|
38
|
+
prot_node = MzIdentMLDoc.find(pe_node,"DBSequence[@id=\'#{prot_ref}\']",true)[0]
|
39
|
+
|
40
|
+
|
41
|
+
# <DBSequence id="JEMP01000193.1_rev_g3500.t1"
|
42
|
+
# accession="JEMP01000193.1_rev_g3500.t1"
|
43
|
+
# searchDatabase_ref="SearchDB_1">
|
44
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001088"
|
45
|
+
# name="protein description" value="280755|283436" />
|
46
|
+
# </DBSequence>
|
47
|
+
pe.protein=prot_node.attributes['accession']
|
48
|
+
pe.protein_descr=MzIdentMLDoc.get_cvParam(prot_node,"MS:1001088")['value']
|
49
|
+
|
50
|
+
|
51
|
+
# pe.peptide_sequence=pep_node
|
52
|
+
|
53
|
+
pe
|
54
|
+
end
|
55
|
+
|
56
|
+
|
57
|
+
private :new
|
58
|
+
end
|
59
|
+
|
60
|
+
def initialize()
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
# <alternative_protein protein="lcl|JEMP01000005.1_rev_g4624.t1"
|
65
|
+
# protein_descr="652491|654142" num_tol_term="2" peptide_prev_aa="K" peptide_next_aa="Y"/>
|
66
|
+
# We use this only for alternative_proteins
|
67
|
+
# The first peptide_evidence item is baked into the attributes of a spectrum_query
|
68
|
+
def as_pepxml()
|
69
|
+
alt_node = XML::Node.new('alternative_protein')
|
70
|
+
alt_node['protein']=self.protein
|
71
|
+
alt_node['protein_descr']=self.protein_descr
|
72
|
+
alt_node['peptide_prev_aa']=self.peptide_prev_aa
|
73
|
+
alt_node['peptide_next_aa']=self.peptide_next_aa
|
74
|
+
|
75
|
+
|
76
|
+
alt_node
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
# <spectrum_query spectrum="mr176-BSA100fmole_BA3_01_8167.00003.00003.2" start_scan="3" end_scan="3"
|
82
|
+
#precursor_neutral_mass="1398.7082" assumed_charge="2" index="2" experiment_label="mr176">
|
83
|
+
# <search_result>
|
84
|
+
# <search_hit hit_rank="1" peptide="SQVFQLESTFDV" peptide_prev_aa="R" peptide_next_aa="K" protein="tr|Q90853|Q90853_CHICK"
|
85
|
+
# protein_descr="Homeobox protein OS=Gallus gallus GN=GH6 PE=2 SV=1" num_tot_proteins="1"
|
86
|
+
# num_matched_ions="9" tot_num_ions="22" calc_neutral_pep_mass="1380.6557" massdiff="18.053" num_tol_term="1"
|
87
|
+
# num_missed_cleavages="0" is_rejected="0">
|
88
|
+
# <search_score name="hyperscore" value="23.9"/>
|
89
|
+
# <search_score name="nextscore" value="19.3"/>
|
90
|
+
# <search_score name="bscore" value="9.6"/>
|
91
|
+
# <search_score name="yscore" value="7.6"/>
|
92
|
+
# <search_score name="cscore" value="0"/>
|
93
|
+
# <search_score name="zscore" value="0"/>
|
94
|
+
# <search_score name="ascore" value="0"/>
|
95
|
+
# <search_score name="xscore" value="0"/>
|
96
|
+
# <search_score name="expect" value="0.099"/>
|
97
|
+
# <analysis_result analysis="peptideprophet">
|
98
|
+
# <peptideprophet_result probability="0.9997" all_ntt_prob="(0.0000,0.9997,0.9999)">
|
99
|
+
# <search_score_summary>
|
100
|
+
# <parameter name="fval" value="2.3571"/>
|
101
|
+
# <parameter name="ntt" value="1"/>
|
102
|
+
# <parameter name="nmc" value="0"/>
|
103
|
+
# <parameter name="massd" value="18.053"/>
|
104
|
+
# </search_score_summary>
|
105
|
+
# </peptideprophet_result>
|
106
|
+
# </analysis_result>
|
107
|
+
# </search_hit>
|
108
|
+
# </search_result>
|
109
|
+
# </spectrum_query>
|
110
|
+
|
111
|
+
class PSM
|
112
|
+
|
113
|
+
|
114
|
+
attr_accessor :peptide
|
115
|
+
attr_accessor :calculated_mz
|
116
|
+
attr_accessor :experimental_mz
|
117
|
+
attr_accessor :charge
|
118
|
+
|
119
|
+
attr_accessor :scores
|
120
|
+
attr_accessor :peptide_evidence
|
121
|
+
|
122
|
+
class << self
|
123
|
+
|
124
|
+
# <SpectrumIdentificationResult spectraData_ref="ma201_Vp_1-10.mzML.mgf"
|
125
|
+
# spectrumID="index=3152" id="SIR_1">
|
126
|
+
# <SpectrumIdentificationItem passThreshold="false"
|
127
|
+
# rank="1" peptide_ref="KSPVYKVHFTR"
|
128
|
+
# calculatedMassToCharge="1360.7615466836999"
|
129
|
+
# experimentalMassToCharge="1362.805053710938"
|
130
|
+
# chargeState="1" id="SII_1_1">
|
131
|
+
# <PeptideEvidenceRef peptideEvidence_ref="PepEv_1" />
|
132
|
+
# <Fragmentation>
|
133
|
+
# <IonType charge="1" index="1 4">
|
134
|
+
# <FragmentArray measure_ref="Measure_MZ"
|
135
|
+
# values="175.2081208 560.3388993" />
|
136
|
+
# <FragmentArray measure_ref="Measure_Int"
|
137
|
+
# values="94.0459823608 116.2766723633" />
|
138
|
+
# <FragmentArray measure_ref="Measure_Error"
|
139
|
+
# values="0.08916864948798775 0.0449421494880653" />
|
140
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001220"
|
141
|
+
# name="frag: y ion" />
|
142
|
+
# </IonType>
|
143
|
+
# </Fragmentation>
|
144
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002466"
|
145
|
+
# name="PeptideShaker PSM score" value="0.0" />
|
146
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002467"
|
147
|
+
# name="PeptideShaker PSM confidence" value="0.0" />
|
148
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002052"
|
149
|
+
# name="MS-GF:SpecEValue" value="1.4757611E-6" />
|
150
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1001117"
|
151
|
+
# name="theoretical mass" value="1360.7615466836999" />
|
152
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1002543"
|
153
|
+
# name="PeptideShaker PSM confidence type"
|
154
|
+
# value="Not Validated" />
|
155
|
+
# </SpectrumIdentificationItem>
|
156
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1000796"
|
157
|
+
# name="spectrum title"
|
158
|
+
# value="Suresh Vp 1 to 10_BAF.3535.3535.1" />
|
159
|
+
# <cvParam cvRef="PSI-MS" accession="MS:1000894"
|
160
|
+
# name="retention time" value="6855.00001" unitCvRef="UO"
|
161
|
+
# unitAccession="UO:0000010" unitName="seconds" />
|
162
|
+
# </SpectrumIdentificationResult>
|
163
|
+
|
164
|
+
|
165
|
+
|
166
|
+
def from_mzid(psm_node)
|
167
|
+
psm = new()
|
168
|
+
psm.peptide = MzIdentMLDoc.get_sequence_for_psm(psm_node)
|
169
|
+
peptide_evidence_nodes = MzIdentMLDoc.get_peptide_evidence_from_psm(psm_node)
|
170
|
+
psm.peptide_evidence = peptide_evidence_nodes.collect { |pe| PeptideEvidence.from_mzid(pe) }
|
171
|
+
|
172
|
+
psm.calculated_mz = psm_node.attributes['calculatedMassToCharge'].to_f
|
173
|
+
psm.experimental_mz = psm_node.attributes['experimentalMassToCharge'].to_f
|
174
|
+
psm.charge = psm_node.attributes['chargeState'].to_i
|
175
|
+
|
176
|
+
psm
|
177
|
+
end
|
178
|
+
|
179
|
+
|
180
|
+
private :new
|
181
|
+
end
|
182
|
+
|
183
|
+
def initialize()
|
184
|
+
|
185
|
+
end
|
186
|
+
|
187
|
+
# <search_hit hit_rank="1" peptide="GGYNQDGGSGGGYQGGGGYSGGGGGYQGGQR"
|
188
|
+
# peptide_prev_aa="R" peptide_next_aa="N"
|
189
|
+
# protein="lcl|JEMP01000008.1_fwd_g5144.t1"
|
190
|
+
# num_tot_proteins="1"
|
191
|
+
# calc_neutral_pep_mass="2768.11967665812"
|
192
|
+
# massdiff="0.120361328125"
|
193
|
+
# protein_descr="4860|5785"
|
194
|
+
# num_tol_term="2"
|
195
|
+
# num_missed_cleavages="0">
|
196
|
+
|
197
|
+
# From what I can tell, search_hit is always trivially wrapped in search_result 1:1
|
198
|
+
#
|
199
|
+
def as_pepxml()
|
200
|
+
hit_node = XML::Node.new('search_hit')
|
201
|
+
hit_node['peptide']=self.peptide.to_s
|
202
|
+
|
203
|
+
# require 'byebug';byebug
|
204
|
+
first_evidence = self.peptide_evidence.first
|
205
|
+
|
206
|
+
hit_node['peptide_prev_aa']=first_evidence.peptide_prev_aa
|
207
|
+
hit_node['peptide_next_aa']=first_evidence.peptide_next_aa
|
208
|
+
hit_node['protein']=first_evidence.protein
|
209
|
+
hit_node['protein_descr']=first_evidence.protein_descr
|
210
|
+
|
211
|
+
hit_node['num_tot_proteins']=self.peptide_evidence.length.to_s
|
212
|
+
|
213
|
+
alt_evidence = peptide_evidence.drop(1)
|
214
|
+
alt_evidence.each { |ae| hit_node << ae.as_pepxml }
|
215
|
+
|
216
|
+
result_node = XML::Node.new('search_result')
|
217
|
+
result_node << hit_node
|
218
|
+
result_node
|
219
|
+
end
|
220
|
+
|
221
|
+
|
222
|
+
end
|
data/lib/protk/search_tool.rb
CHANGED
@@ -5,8 +5,7 @@
|
|
5
5
|
# Provides common functionality used by all msms search tools.
|
6
6
|
#
|
7
7
|
# It allows;
|
8
|
-
# 1.
|
9
|
-
# 2. Output files to be specified via a prefix or suffix to be added to the name of the corresponding input file
|
8
|
+
# 1. Output files to be specified via a prefix or suffix to be added to the name of the corresponding input file
|
10
9
|
#
|
11
10
|
|
12
11
|
require 'optparse'
|
@@ -21,10 +20,6 @@ class SearchTool < Tool
|
|
21
20
|
def initialize(option_support=[])
|
22
21
|
super(option_support)
|
23
22
|
|
24
|
-
# if (option_support.include? :database)
|
25
|
-
# add_value_option(:database,"sphuman",['-d', '--database dbname', 'Specify the database to use for this search. Can be a named protk database or the path to a fasta file'])
|
26
|
-
# end
|
27
|
-
|
28
23
|
if ( option_support.include? :enzyme )
|
29
24
|
add_value_option(:enzyme,"Trypsin",['--enzyme enz', 'Enzyme'])
|
30
25
|
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
|
2
|
+
class Sniffer
|
3
|
+
|
4
|
+
@sniff_lines = 100
|
5
|
+
|
6
|
+
# Return nil if undetectable
|
7
|
+
# Return detected format otherwise
|
8
|
+
def self.sniff_format(filepath)
|
9
|
+
if self.is_mgf_format(filepath)
|
10
|
+
return "mgf"
|
11
|
+
elsif self.is_mzml_format(filepath)
|
12
|
+
return "mzML"
|
13
|
+
end
|
14
|
+
return nil
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
def self.is_mzml_format(filepath)
|
19
|
+
lines = File.foreach(filepath).first(@sniff_lines).join("\n")
|
20
|
+
if lines =~ /\<mzML.*http\:\/\/psi\.hupo\.org\/ms\/mzml/
|
21
|
+
return true
|
22
|
+
end
|
23
|
+
return false
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.is_mgf_format(filepath)
|
27
|
+
lines = File.foreach(filepath).first(@sniff_lines).join("\n")
|
28
|
+
if lines =~ /^BEGIN IONS/
|
29
|
+
return true
|
30
|
+
end
|
31
|
+
return false
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
end
|