protk 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 30/4/2015
5
+ #
6
+ # A wrapper for SpectraST commands that manipulate splib files
7
+ #
8
+ #
9
+
10
+ require 'protk/constants'
11
+ require 'protk/command_runner'
12
+ require 'protk/tool'
13
+ require 'protk/galaxy_util'
14
+
15
+ for_galaxy = GalaxyUtil.for_galaxy?
16
+
17
+ genv=Constants.instance
18
+
19
+ # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
20
+ #
21
+ spectrast_tool=Tool.new([:explicit_output])
22
+ spectrast_tool.option_parser.banner = "Manipulate splib files.\n\nUsage: spectrast_filter.rb [options] file1.splib file1.splib ..."
23
+ spectrast_tool.add_boolean_option(:binary_output,false,['-B','--binary-output','Produce spectral libraries in binary format rather than ASCII'])
24
+ spectrast_tool.add_value_option(:filter_predicate,nil,['--predicate pred','Keep only spectra satifying predicate pred. Should be a C-style predicate'])
25
+ spectrast_tool.add_value_option(:merge_operation,"U",['--merge method',
26
+ 'How to combine multiple splib files (if provided). Options are U,S,H
27
+ U: Union. Include all the peptide ions in all the files.
28
+ S: Subtraction. Only include peptide ions in the first file
29
+ that are not present in any of the other files.
30
+ H: Subtraction of homologs. Only include peptide ions in the
31
+ first file that do not have any homologs with
32
+ same charge and similar m/z in any of the other files.
33
+ A: Appending. Each peptide ion is added from only one library:
34
+ the first file in the argument list that contains that peptide ion.
35
+ Useful for keeping existing consensus spectra unchanged while adding
36
+ only previously unseen peptide ions.'])
37
+ spectrast_tool.add_value_option(:spectrum_operation,"None",['--replicates method',
38
+ 'How to derive a single spectrum from replicates. Options are None, C,B
39
+ C: Consensus. Create the consensus spectrum of all replicate spectra of each peptide ion.
40
+ B: Best replicate. Pick the best replicate of each peptide ion.'])
41
+
42
+ exit unless spectrast_tool.check_options(true)
43
+
44
+ spectrast_bin = %x[which spectrast].chomp
45
+
46
+ # LIBRARY MANIPULATION OPTIONS (Applicable with .splib files)
47
+ # -cf<pred> Filter library. Keep only those entries satisfying the predicate <pred>.
48
+ # <pred> should be a C-style predicate in quotes.
49
+ # -cJU Union. Include all the peptide ions in all the files.
50
+ # -cJI Intersection. Only include peptide ions that are present in all the files.
51
+ # -cJS Subtraction. Only include peptide ions in the first file that are not present in any of the other files.
52
+ # -cJH Subtraction of homologs. Only include peptide ions in the first file
53
+ # that do not have any homologs with same charge and similar m/z in any of the other files.
54
+ # -cJA Appending. Each peptide ion is added from only one library: the first file in the argument list that contains that peptide ion.
55
+ # Useful for keeping existing consensus spectra unchanged while adding only previously unseen peptide ions.
56
+ # -cAB Best replicate. Pick the best replicate of each peptide ion.
57
+ # -cAC Consensus. Create the consensus spectrum of all replicate spectra of each peptide ion.
58
+ # -cAQ Quality filter. Apply quality filters to library.
59
+ # IMPORTANT: Quality filter can only be applied on a SINGLE .splib file with no peptide ion represented by more than one spectrum.
60
+ # -cAD Create artificial decoy spectra.
61
+ # -cAN Sort library entries by descending number of replicates used (tie-breaking by probability).
62
+ # -cAM Create semi-empirical spectra based on allowable modifications specified by -cx option.
63
+ # -cQ<num> Produce reduced spectra of at most <num> peaks. Inactive with -cAQ and -cAD.
64
+ # -cD<file> Refresh protein mappings of each library entry against the protein database <file> (Must be in .fasta format).
65
+ # -cu Delete entries whose peptide sequences do not map to any protein during refreshing with -cD option.
66
+ # When off, unmapped entries will be marked with Protein=0/UNMAPPED but retained in library. (Turn off with -cu!).
67
+ # -cd Delete entries whose peptide sequences map to multiple proteins during refreshing with -cD option. (Turn off with -cd!).
68
+
69
+ input_stagers=[]
70
+ inputs=ARGV.collect { |file_name| file_name.chomp}
71
+ if for_galaxy
72
+ input_stagers = inputs.collect {|ip| GalaxyStager.new(ip,{:extension=>".splib"}) }
73
+ inputs=input_stagers.collect { |sg| sg.staged_path }
74
+ end
75
+
76
+
77
+ cmd="#{spectrast_bin} "
78
+
79
+ unless spectrast_tool.binary_output
80
+ cmd << " -c_BIN!"
81
+ end
82
+
83
+ if spectrast_tool.filter_predicate
84
+ cmd << " -cf'#{spectrast_tool.filter_predicate}'"
85
+ end
86
+
87
+ if inputs.length > 1
88
+ cmd << " -cJ#{spectrast_tool.merge_operation}"
89
+ end
90
+
91
+ if spectrast_tool.spectrum_operation!="None"
92
+ cmd << " -cA#{spectrast_tool.spectrum_operation}"
93
+ end
94
+
95
+ if spectrast_tool.explicit_output==nil
96
+ output_file_name=Tool.default_output_path(inputs,"","","")
97
+ else
98
+ output_file_name=spectrast_tool.explicit_output
99
+ end
100
+
101
+ cmd << " -cN#{output_file_name}"
102
+
103
+ inputs.each { |ip| cmd << " #{ip}" }
104
+
105
+ # code = spectrast_tool.run(cmd,genv)
106
+ # throw "Command failed with exit code #{code}" unless code==0
107
+
108
+ %x[#{cmd}]
@@ -30,7 +30,7 @@ class CommandRunner
30
30
  def run_local(command_string)
31
31
  @env.log("Command: #{command_string} started",:info)
32
32
  status = Open4::popen4("#{command_string} ") do |pid, stdin, stdout, stderr|
33
- puts "PID #{pid}"
33
+ @env.log "PID #{pid}" , :info
34
34
 
35
35
  stdout.each { |line| @env.log(line.chomp,:info) }
36
36
 
@@ -0,0 +1,34 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-stylesheet type="text/xsl" href="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem_pproph.pep.xsl"?>
3
+ <msms_pipeline_analysis date="2014-06-22T15:28:36" summary_xml="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem_pproph.pep.xml" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /Users/icooke/bin/tpp/schema/pepXML_v117.xsd">
4
+ <analysis_summary analysis="peptideprophet" time="2014-06-22T15:28:36">
5
+ </analysis_summary>
6
+ <analysis_summary analysis="database_refresh" time="2014-06-22T15:28:36"/>
7
+ <analysis_summary analysis="interact" time="2014-06-22T15:28:36">
8
+ <interact_summary filename="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem_pproph.pep.xml" directory="">
9
+ <inputfile name="mr176-BSA100fmole_BA3_01_8167.d_tandem.pep.xml" directory="/Users/icooke/Sources/protk/spec/data"/>
10
+ </interact_summary>
11
+ </analysis_summary>
12
+ <dataset_derivation generation_no="0"/>
13
+ <msms_run_summary base_name="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem.tandem" search_engine="X! Tandem" raw_data_type="raw" raw_data=".?">
14
+ <sample_enzyme name="trypsin">
15
+ <specificity cut="KR" no_cut="P" sense="C"/>
16
+ </sample_enzyme>
17
+ <search_summary base_name="mr176-BSA100fmole_BA3_01_8167.d_tandem.tandem" search_engine="X! Tandem" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
18
+ <search_database local_path="/Users/icooke/Sources/protk/spec/data/AASequences.fasta" type="AA"/>
19
+ <enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="1"/>
20
+ <aminoacid_modification aminoacid="E" massdiff="-18.0106" mass="111.0320" variable="Y" symbol="^"/>
21
+ <!--X! Tandem n-terminal AA variable modification-->
22
+ <aminoacid_modification aminoacid="M" massdiff="15.9949" mass="147.0354" variable="Y"/>
23
+ <aminoacid_modification aminoacid="Q" massdiff="-17.0265" mass="111.0321" variable="Y" symbol="^"/>
24
+ <!--X! Tandem n-terminal AA variable modification-->
25
+ <terminal_modification terminus="n" massdiff="42.0106" mass="43.0184" protein_terminus="N" variable="Y" symbol="^"/>
26
+
27
+ </search_summary>
28
+ <analysis_timestamp analysis="peptideprophet" time="2014-06-22T15:28:36" id="1"/>
29
+ <analysis_timestamp analysis="database_refresh" time="2014-06-22T15:28:36" id="1">
30
+ <database_refresh_timestamp database="/Users/icooke/Sources/protk/spec/data/AASequences.fasta" min_num_enz_term="1"/>
31
+ </analysis_timestamp>
32
+
33
+ </msms_run_summary>
34
+ </msms_pipeline_analysis>
@@ -0,0 +1,39 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <protein_summary xmlns="http://regis-web.systemsbiology.net/protXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/protXML/protXML_v6.xsd" summary_xml="">
3
+ <protein_summary_header reference_database="FULLPATH_TO_REFERENCE_DB" residue_substitution_list="I -&gt; L" source_files="FULLPATH_TO_SOURCE_PEPXML" source_files_alt="FULLPATH_TO_SOURCE_PEPXML" min_peptide_probability="" min_peptide_weight="" num_predicted_correct_prots="" num_input_1_spectra="" num_input_2_spectra="" num_input_3_spectra="" num_input_4_spectra="" num_input_5_spectra="" initial_min_peptide_prob="" total_no_spectrum_ids="" sample_enzyme="trypsin">
4
+ <program_details analysis="proteinprophet" time="2014-01-20T14:17:37" version=" Insilicos_LabKey_C++ (TPP v0.0 Development trunk rev 0, Build 201307090846 (linux))">
5
+ <proteinprophet_details occam_flag="Y" groups_flag="Y" degen_flag="Y" nsp_flag="Y" initial_peptide_wt_iters="2" nsp_distribution_iters="2" final_peptide_wt_iters="3">
6
+ <nsp_information neighboring_bin_smoothing="Y">
7
+ <nsp_distribution bin_no="0" nsp_lower_bound_incl="0.00" nsp_upper_bound_incl="0.00" pos_freq="0.057" neg_freq="0.625" pos_to_neg_ratio="0.09"/>
8
+ <nsp_distribution bin_no="1" nsp_lower_bound_excl="0.00" nsp_upper_bound_incl="0.31" pos_freq="0.037" neg_freq="0.152" pos_to_neg_ratio="0.24"/>
9
+ <nsp_distribution bin_no="2" nsp_lower_bound_excl="0.31" nsp_upper_bound_incl="1.00" pos_freq="0.077" neg_freq="0.032" pos_to_neg_ratio="2.42"/>
10
+ <nsp_distribution bin_no="3" nsp_lower_bound_excl="1.00" nsp_upper_bound_incl="2.50" pos_freq="0.113" neg_freq="0.033" pos_to_neg_ratio="3.39"/>
11
+ <nsp_distribution bin_no="4" nsp_lower_bound_excl="2.50" nsp_upper_bound_incl="4.63" pos_freq="0.123" neg_freq="0.032" pos_to_neg_ratio="3.91"/>
12
+ <nsp_distribution bin_no="5" nsp_lower_bound_excl="4.63" nsp_upper_bound_incl="7.90" pos_freq="0.143" neg_freq="0.032" pos_to_neg_ratio="4.50"/>
13
+ <nsp_distribution bin_no="6" nsp_lower_bound_excl="7.90" nsp_upper_bound_incl="14.92" pos_freq="0.196" neg_freq="0.041" pos_to_neg_ratio="4.78"/>
14
+ <nsp_distribution bin_no="7" nsp_lower_bound_excl="14.92" nsp_upper_bound_excl="inf" pos_freq="0.254" neg_freq="0.054" pos_to_neg_ratio="4.72" alt_pos_to_neg_ratio="4.78"/>
15
+ </nsp_information>
16
+ <ni_information>
17
+ </ni_information>
18
+ <protein_summary_data_filter min_probability="0.00" sensitivity="1.000" false_positive_error_rate="0.835" predicted_num_correct="1787" predicted_num_incorrect="9044"/>
19
+ <protein_summary_data_filter min_probability="0.10" sensitivity="1.000" false_positive_error_rate="0.235" predicted_num_correct="1787" predicted_num_incorrect="548"/>
20
+ <protein_summary_data_filter min_probability="0.20" sensitivity="1.000" false_positive_error_rate="0.235" predicted_num_correct="1787" predicted_num_incorrect="548"/>
21
+ <protein_summary_data_filter min_probability="0.30" sensitivity="0.956" false_positive_error_rate="0.151" predicted_num_correct="1709" predicted_num_incorrect="305"/>
22
+ <protein_summary_data_filter min_probability="0.40" sensitivity="0.916" false_positive_error_rate="0.095" predicted_num_correct="1638" predicted_num_incorrect="171"/>
23
+ <protein_summary_data_filter min_probability="0.50" sensitivity="0.887" false_positive_error_rate="0.063" predicted_num_correct="1585" predicted_num_incorrect="106"/>
24
+ <protein_summary_data_filter min_probability="0.60" sensitivity="0.853" false_positive_error_rate="0.036" predicted_num_correct="1525" predicted_num_incorrect="58"/>
25
+ <protein_summary_data_filter min_probability="0.70" sensitivity="0.826" false_positive_error_rate="0.020" predicted_num_correct="1477" predicted_num_incorrect="31"/>
26
+ <protein_summary_data_filter min_probability="0.80" sensitivity="0.805" false_positive_error_rate="0.012" predicted_num_correct="1438" predicted_num_incorrect="18"/>
27
+ <protein_summary_data_filter min_probability="0.90" sensitivity="0.773" false_positive_error_rate="0.006" predicted_num_correct="1381" predicted_num_incorrect="8"/>
28
+ <protein_summary_data_filter min_probability="0.95" sensitivity="0.749" false_positive_error_rate="0.004" predicted_num_correct="1339" predicted_num_incorrect="5"/>
29
+ <protein_summary_data_filter min_probability="0.96" sensitivity="0.738" false_positive_error_rate="0.003" predicted_num_correct="1318" predicted_num_incorrect="4"/>
30
+ <protein_summary_data_filter min_probability="0.97" sensitivity="0.728" false_positive_error_rate="0.002" predicted_num_correct="1302" predicted_num_incorrect="3"/>
31
+ <protein_summary_data_filter min_probability="0.98" sensitivity="0.711" false_positive_error_rate="0.002" predicted_num_correct="1272" predicted_num_incorrect="2"/>
32
+ <protein_summary_data_filter min_probability="0.99" sensitivity="0.609" false_positive_error_rate="0.000" predicted_num_correct="1088" predicted_num_incorrect="0"/>
33
+ <protein_summary_data_filter min_probability="1.00" sensitivity="0.164" false_positive_error_rate="0.000" predicted_num_correct="294" predicted_num_incorrect="0"/>
34
+ </proteinprophet_details>
35
+ </program_details>
36
+ </protein_summary_header>
37
+ <dataset_derivation generation_no="0">
38
+ </dataset_derivation>
39
+ </protein_summary>
@@ -0,0 +1,140 @@
1
+ require 'libxml'
2
+
3
+ include LibXML
4
+
5
+ class MzIdentMLDoc < Object
6
+
7
+ MZID_NS_PREFIX="mzidentml"
8
+ MZID_NS='http://psidev.info/psi/pi/mzIdentML/1.1'
9
+
10
+ def initialize(path)
11
+ parser=XML::Parser.file(path)
12
+ @document=parser.parse
13
+ end
14
+
15
+
16
+ def spectrum_queries
17
+ @document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationResult","#{MZID_NS_PREFIX}:#{MZID_NS}")
18
+ end
19
+
20
+ def peptide_evidence
21
+ @document.find("//#{MZID_NS_PREFIX}:PeptideEvidence","#{MZID_NS_PREFIX}:#{MZID_NS}")
22
+ end
23
+
24
+ def psms
25
+ @document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationItem","#{MZID_NS_PREFIX}:#{MZID_NS}")
26
+ end
27
+
28
+ def protein_groups
29
+ @document.find("//#{MZID_NS_PREFIX}:ProteinAmbiguityGroup","#{MZID_NS_PREFIX}:#{MZID_NS}")
30
+ end
31
+
32
+
33
+ def proteins
34
+ @document.find("//#{MZID_NS_PREFIX}:ProteinDetectionHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}")
35
+ end
36
+
37
+ # Peptides are referenced in many ways in mzidentml.
38
+ # We define a "Peptide" as a peptide supporting a particular protein
39
+ # Such peptides may encompass several PSM's
40
+ #
41
+ def peptides
42
+ @document.find("//#{MZID_NS_PREFIX}:PeptideHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}")
43
+ end
44
+
45
+
46
+
47
+ # -----------------------------------------------------------
48
+ #
49
+ # Class Level Utility methods for searching from a given node
50
+ #
51
+ # -----------------------------------------------------------
52
+
53
+ def self.find(node,expression,root=false)
54
+ pp = root ? "//" : "./"
55
+ node.find("#{pp}#{MZID_NS_PREFIX}:#{expression}","#{MZID_NS_PREFIX}:#{MZID_NS}")
56
+ end
57
+
58
+
59
+ def self.get_cvParam(mzidnode,accession)
60
+ self.find(mzidnode,"cvParam[@accession=\'#{accession}\']")[0]
61
+ end
62
+
63
+ def self.get_dbsequence(mzidnode,accession)
64
+ self.find(mzidnode,"DBSequence[@accession=\'#{accession}\']",true)[0]
65
+ end
66
+
67
+ # As per PeptideShaker. Assume group probability used for protein if it is group rep otherwise 0
68
+ def self.get_protein_probability(protein_node)
69
+
70
+ #MS:1002403
71
+ is_group_representative=(self.get_cvParam(protein_node,"MS:1002403")!=nil)
72
+ if is_group_representative
73
+ return self.get_cvParam(protein_node.parent,"MS:1002470").attributes['value'].to_f*0.01
74
+ else
75
+ return 0
76
+ end
77
+ end
78
+
79
+ def self.get_proteins_for_group(group_node)
80
+ self.find(group_node,"ProteinDetectionHypothesis")
81
+ end
82
+
83
+ # def self.get_sister_proteins(protein_node)
84
+ # self.find(protein_node.parent,"ProteinDetectionHypothesis")
85
+ # end
86
+
87
+ def self.get_peptides_for_protein(protein_node)
88
+ self.find(protein_node,"PeptideHypothesis")
89
+ end
90
+
91
+ # <PeptideHypothesis peptideEvidence_ref="PepEv_1">
92
+ # <SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
93
+ # </PeptideHypothesis>
94
+ def self.get_best_psm_for_peptide(peptide_node)
95
+
96
+ best_score=-1
97
+ best_psm=nil
98
+ self.find(peptide_node,"SpectrumIdentificationItemRef").each do |id_ref_node|
99
+ id_ref = id_ref_node.attributes['spectrumIdentificationItem_ref']
100
+ psm_node = self.find(peptide_node,"SpectrumIdentificationItem[@id=\'#{id_ref}\']",true)[0]
101
+ score = self.get_cvParam(psm_node,"MS:1002466")['value'].to_f
102
+ if score>best_score
103
+ best_psm=psm_node
104
+ best_score=score
105
+ end
106
+ end
107
+ best_psm
108
+ end
109
+
110
+ def self.get_sequence_for_peptide(peptide_node)
111
+ evidence_ref = peptide_node.attributes['peptideEvidence_ref']
112
+ pep_ref = peptide_node.find("//#{MZID_NS_PREFIX}:PeptideEvidence[@id=\'#{evidence_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].attributes['peptide_ref']
113
+ peptide=peptide_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0]
114
+ # require 'byebug';byebug
115
+ peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content
116
+ end
117
+
118
+ def self.get_sequence_for_psm(psm_node)
119
+ pep_ref = psm_node.attributes['peptide_ref']
120
+ peptide=psm_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0]
121
+ peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content
122
+ end
123
+
124
+ def self.get_peptide_evidence_from_psm(psm_node)
125
+ pe_nodes = []
126
+ self.find(psm_node,"PeptideEvidenceRef").each do |pe_node|
127
+ ev_id=pe_node.attributes['peptideEvidence_ref']
128
+ pe_nodes << self.find(pe_node,"PeptideEvidence[@id=\'#{ev_id}\']",true)[0]
129
+ end
130
+ pe_nodes
131
+ end
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+ end
@@ -14,6 +14,15 @@ class MzMLParser < Object
14
14
  @file_reader=XML::Reader.document(doc)
15
15
  end
16
16
 
17
+ def next_runid()
18
+ until @file_reader.name=="run"
19
+ if !@file_reader.read()
20
+ return nil
21
+ end
22
+ end
23
+ return @file_reader.get_attribute('id')
24
+ end
25
+
17
26
  def next_spectrum()
18
27
 
19
28
  until @file_reader.name=="spectrum"
data/lib/protk/peptide.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'libxml'
2
2
  require 'bio'
3
3
  require 'protk/bio_gff3_extensions'
4
+ require 'protk/mzidentml_doc'
4
5
  require 'protk/error'
5
6
 
6
7
  include LibXML
@@ -10,22 +11,55 @@ end
10
11
 
11
12
  class Peptide
12
13
 
14
+ # Stripped sequence (no modifications)
13
15
  attr_accessor :sequence
14
16
  attr_accessor :protein_name
15
17
  attr_accessor :charge
16
- attr_accessor :nsp_adjusted_probability
17
-
18
-
18
+ attr_accessor :probability
19
+ attr_accessor :theoretical_neutral_mass
20
+
21
+ def as_protxml
22
+ node = XML::Node.new('peptide')
23
+ node['peptide_sequence']=self.sequence.to_s
24
+ node['charge']=self.charge.to_s
25
+ node['nsp_adjusted_probability']=self.probability.to_s
26
+ node['calc_neutral_pep_mass']=self.theoretical_neutral_mass.to_s
27
+ node
28
+ end
19
29
 
20
30
  class << self
21
31
  def from_protxml(xmlnode)
22
32
  pep=new()
23
33
  pep.sequence=xmlnode['peptide_sequence']
24
- pep.nsp_adjusted_probability=xmlnode['nsp_adjusted_probability'].to_f
34
+ pep.probability=xmlnode['nsp_adjusted_probability'].to_f
25
35
  pep.charge=xmlnode['charge'].to_i
26
36
  pep
27
37
  end
28
38
 
39
+ # <ProteinDetectionHypothesis id="PAG_0_1" dBSequence_ref="JEMP01000193.1_rev_g3500.t1 280755" passThreshold="false">
40
+ # <PeptideHypothesis peptideEvidence_ref="PepEv_1">
41
+ # <SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
42
+ # </PeptideHypothesis>
43
+ # <cvParam cvRef="PSI-MS" accession="MS:1002403" name="group representative"/>
44
+ # <cvParam cvRef="PSI-MS" accession="MS:1002401" name="leading protein"/>
45
+ # <cvParam cvRef="PSI-MS" accession="MS:1001093" name="sequence coverage" value="0.0"/>
46
+ # </ProteinDetectionHypothesis>
47
+
48
+ def from_mzid(xmlnode)
49
+ pep=new()
50
+ pep.sequence=MzIdentMLDoc.get_sequence_for_peptide(xmlnode)
51
+ best_psm = MzIdentMLDoc.get_best_psm_for_peptide(xmlnode)
52
+ # require 'byebug';byebug
53
+ pep.probability = MzIdentMLDoc.get_cvParam(best_psm,"MS:1002466")['value'].to_f
54
+ pep.theoretical_neutral_mass = MzIdentMLDoc.get_cvParam(best_psm,"MS:1001117")['value'].to_f
55
+ pep.charge = best_psm.attributes['chargeState'].to_i
56
+ pep.protein_name = MzIdentMLDoc.get_dbsequence(xmlnode.parent,xmlnode.parent.attributes['dBSequence_ref']).attributes['accession']
57
+
58
+ # pep.charge = MzIdentMLDoc.get_charge_for_psm(best_psm)
59
+
60
+ pep
61
+ end
62
+
29
63
  def from_sequence(seq,charge=nil)
30
64
  pep=new()
31
65
  pep.sequence=seq
@@ -146,7 +180,7 @@ class Peptide
146
180
  cds_id = parent_record.id
147
181
  this_id = "#{cds_id}.#{self.sequence}"
148
182
  this_id << ".#{self.charge}" unless self.charge.nil?
149
- score = self.nsp_adjusted_probability.nil? ? "." : self.nsp_adjusted_probability.to_s
183
+ score = self.probability.nil? ? "." : self.probability.to_s
150
184
  gff_string = "#{parent_record.seqid}\tMSMS\tpolypeptide\t#{start_i}\t#{end_i}\t#{score}\t#{parent_record.strand}\t0\tID=#{this_id};Parent=#{cds_id}"
151
185
  Bio::GFF::GFF3::Record.new(gff_string)
152
186
  end
@@ -0,0 +1,24 @@
1
+ include LibXML
2
+
3
+ class PepXMLWriter < Object
4
+
5
+ PEPXML_NS_PREFIX="pepxml"
6
+ PEPXML_NS="http://regis-web.systemsbiology.net/pepXML"
7
+
8
+ attr :template_doc
9
+
10
+ def initialize
11
+ template_path="#{File.dirname(__FILE__)}/data/template_pep.xml"
12
+ template_parser=XML::Parser.file(template_path)
13
+ @template_doc=template_parser.parse
14
+ end
15
+
16
+ def append_spectrum_query(query_node)
17
+ @template_doc.root << query_node
18
+ end
19
+
20
+ def save(file_path)
21
+ @template_doc.save(file_path,:indent=>true,:encoding => XML::Encoding::UTF_8)
22
+ end
23
+
24
+ end