protk 1.4.1 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 30/4/2015
5
+ #
6
+ # A wrapper for SpectraST commands that manipulate splib files
7
+ #
8
+ #
9
+
10
+ require 'protk/constants'
11
+ require 'protk/command_runner'
12
+ require 'protk/tool'
13
+ require 'protk/galaxy_util'
14
+
15
+ for_galaxy = GalaxyUtil.for_galaxy?
16
+
17
+ genv=Constants.instance
18
+
19
+ # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
20
+ #
21
+ spectrast_tool=Tool.new([:explicit_output])
22
+ spectrast_tool.option_parser.banner = "Manipulate splib files.\n\nUsage: spectrast_filter.rb [options] file1.splib file1.splib ..."
23
+ spectrast_tool.add_boolean_option(:binary_output,false,['-B','--binary-output','Produce spectral libraries in binary format rather than ASCII'])
24
+ spectrast_tool.add_value_option(:filter_predicate,nil,['--predicate pred','Keep only spectra satifying predicate pred. Should be a C-style predicate'])
25
+ spectrast_tool.add_value_option(:merge_operation,"U",['--merge method',
26
+ 'How to combine multiple splib files (if provided). Options are U,S,H
27
+ U: Union. Include all the peptide ions in all the files.
28
+ S: Subtraction. Only include peptide ions in the first file
29
+ that are not present in any of the other files.
30
+ H: Subtraction of homologs. Only include peptide ions in the
31
+ first file that do not have any homologs with
32
+ same charge and similar m/z in any of the other files.
33
+ A: Appending. Each peptide ion is added from only one library:
34
+ the first file in the argument list that contains that peptide ion.
35
+ Useful for keeping existing consensus spectra unchanged while adding
36
+ only previously unseen peptide ions.'])
37
+ spectrast_tool.add_value_option(:spectrum_operation,"None",['--replicates method',
38
+ 'How to derive a single spectrum from replicates. Options are None, C,B
39
+ C: Consensus. Create the consensus spectrum of all replicate spectra of each peptide ion.
40
+ B: Best replicate. Pick the best replicate of each peptide ion.'])
41
+
42
+ exit unless spectrast_tool.check_options(true)
43
+
44
+ spectrast_bin = %x[which spectrast].chomp
45
+
46
+ # LIBRARY MANIPULATION OPTIONS (Applicable with .splib files)
47
+ # -cf<pred> Filter library. Keep only those entries satisfying the predicate <pred>.
48
+ # <pred> should be a C-style predicate in quotes.
49
+ # -cJU Union. Include all the peptide ions in all the files.
50
+ # -cJI Intersection. Only include peptide ions that are present in all the files.
51
+ # -cJS Subtraction. Only include peptide ions in the first file that are not present in any of the other files.
52
+ # -cJH Subtraction of homologs. Only include peptide ions in the first file
53
+ # that do not have any homologs with same charge and similar m/z in any of the other files.
54
+ # -cJA Appending. Each peptide ion is added from only one library: the first file in the argument list that contains that peptide ion.
55
+ # Useful for keeping existing consensus spectra unchanged while adding only previously unseen peptide ions.
56
+ # -cAB Best replicate. Pick the best replicate of each peptide ion.
57
+ # -cAC Consensus. Create the consensus spectrum of all replicate spectra of each peptide ion.
58
+ # -cAQ Quality filter. Apply quality filters to library.
59
+ # IMPORTANT: Quality filter can only be applied on a SINGLE .splib file with no peptide ion represented by more than one spectrum.
60
+ # -cAD Create artificial decoy spectra.
61
+ # -cAN Sort library entries by descending number of replicates used (tie-breaking by probability).
62
+ # -cAM Create semi-empirical spectra based on allowable modifications specified by -cx option.
63
+ # -cQ<num> Produce reduced spectra of at most <num> peaks. Inactive with -cAQ and -cAD.
64
+ # -cD<file> Refresh protein mappings of each library entry against the protein database <file> (Must be in .fasta format).
65
+ # -cu Delete entries whose peptide sequences do not map to any protein during refreshing with -cD option.
66
+ # When off, unmapped entries will be marked with Protein=0/UNMAPPED but retained in library. (Turn off with -cu!).
67
+ # -cd Delete entries whose peptide sequences map to multiple proteins during refreshing with -cD option. (Turn off with -cd!).
68
+
69
+ input_stagers=[]
70
+ inputs=ARGV.collect { |file_name| file_name.chomp}
71
+ if for_galaxy
72
+ input_stagers = inputs.collect {|ip| GalaxyStager.new(ip,{:extension=>".splib"}) }
73
+ inputs=input_stagers.collect { |sg| sg.staged_path }
74
+ end
75
+
76
+
77
+ cmd="#{spectrast_bin} "
78
+
79
+ unless spectrast_tool.binary_output
80
+ cmd << " -c_BIN!"
81
+ end
82
+
83
+ if spectrast_tool.filter_predicate
84
+ cmd << " -cf'#{spectrast_tool.filter_predicate}'"
85
+ end
86
+
87
+ if inputs.length > 1
88
+ cmd << " -cJ#{spectrast_tool.merge_operation}"
89
+ end
90
+
91
+ if spectrast_tool.spectrum_operation!="None"
92
+ cmd << " -cA#{spectrast_tool.spectrum_operation}"
93
+ end
94
+
95
+ if spectrast_tool.explicit_output==nil
96
+ output_file_name=Tool.default_output_path(inputs,"","","")
97
+ else
98
+ output_file_name=spectrast_tool.explicit_output
99
+ end
100
+
101
+ cmd << " -cN#{output_file_name}"
102
+
103
+ inputs.each { |ip| cmd << " #{ip}" }
104
+
105
+ # code = spectrast_tool.run(cmd,genv)
106
+ # throw "Command failed with exit code #{code}" unless code==0
107
+
108
+ %x[#{cmd}]
@@ -30,7 +30,7 @@ class CommandRunner
30
30
  def run_local(command_string)
31
31
  @env.log("Command: #{command_string} started",:info)
32
32
  status = Open4::popen4("#{command_string} ") do |pid, stdin, stdout, stderr|
33
- puts "PID #{pid}"
33
+ @env.log "PID #{pid}" , :info
34
34
 
35
35
  stdout.each { |line| @env.log(line.chomp,:info) }
36
36
 
@@ -0,0 +1,34 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <?xml-stylesheet type="text/xsl" href="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem_pproph.pep.xsl"?>
3
+ <msms_pipeline_analysis date="2014-06-22T15:28:36" summary_xml="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem_pproph.pep.xml" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /Users/icooke/bin/tpp/schema/pepXML_v117.xsd">
4
+ <analysis_summary analysis="peptideprophet" time="2014-06-22T15:28:36">
5
+ </analysis_summary>
6
+ <analysis_summary analysis="database_refresh" time="2014-06-22T15:28:36"/>
7
+ <analysis_summary analysis="interact" time="2014-06-22T15:28:36">
8
+ <interact_summary filename="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem_pproph.pep.xml" directory="">
9
+ <inputfile name="mr176-BSA100fmole_BA3_01_8167.d_tandem.pep.xml" directory="/Users/icooke/Sources/protk/spec/data"/>
10
+ </interact_summary>
11
+ </analysis_summary>
12
+ <dataset_derivation generation_no="0"/>
13
+ <msms_run_summary base_name="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem.tandem" search_engine="X! Tandem" raw_data_type="raw" raw_data=".?">
14
+ <sample_enzyme name="trypsin">
15
+ <specificity cut="KR" no_cut="P" sense="C"/>
16
+ </sample_enzyme>
17
+ <search_summary base_name="mr176-BSA100fmole_BA3_01_8167.d_tandem.tandem" search_engine="X! Tandem" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
18
+ <search_database local_path="/Users/icooke/Sources/protk/spec/data/AASequences.fasta" type="AA"/>
19
+ <enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="1"/>
20
+ <aminoacid_modification aminoacid="E" massdiff="-18.0106" mass="111.0320" variable="Y" symbol="^"/>
21
+ <!--X! Tandem n-terminal AA variable modification-->
22
+ <aminoacid_modification aminoacid="M" massdiff="15.9949" mass="147.0354" variable="Y"/>
23
+ <aminoacid_modification aminoacid="Q" massdiff="-17.0265" mass="111.0321" variable="Y" symbol="^"/>
24
+ <!--X! Tandem n-terminal AA variable modification-->
25
+ <terminal_modification terminus="n" massdiff="42.0106" mass="43.0184" protein_terminus="N" variable="Y" symbol="^"/>
26
+
27
+ </search_summary>
28
+ <analysis_timestamp analysis="peptideprophet" time="2014-06-22T15:28:36" id="1"/>
29
+ <analysis_timestamp analysis="database_refresh" time="2014-06-22T15:28:36" id="1">
30
+ <database_refresh_timestamp database="/Users/icooke/Sources/protk/spec/data/AASequences.fasta" min_num_enz_term="1"/>
31
+ </analysis_timestamp>
32
+
33
+ </msms_run_summary>
34
+ </msms_pipeline_analysis>
@@ -0,0 +1,39 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <protein_summary xmlns="http://regis-web.systemsbiology.net/protXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/protXML/protXML_v6.xsd" summary_xml="">
3
+ <protein_summary_header reference_database="FULLPATH_TO_REFERENCE_DB" residue_substitution_list="I -&gt; L" source_files="FULLPATH_TO_SOURCE_PEPXML" source_files_alt="FULLPATH_TO_SOURCE_PEPXML" min_peptide_probability="" min_peptide_weight="" num_predicted_correct_prots="" num_input_1_spectra="" num_input_2_spectra="" num_input_3_spectra="" num_input_4_spectra="" num_input_5_spectra="" initial_min_peptide_prob="" total_no_spectrum_ids="" sample_enzyme="trypsin">
4
+ <program_details analysis="proteinprophet" time="2014-01-20T14:17:37" version=" Insilicos_LabKey_C++ (TPP v0.0 Development trunk rev 0, Build 201307090846 (linux))">
5
+ <proteinprophet_details occam_flag="Y" groups_flag="Y" degen_flag="Y" nsp_flag="Y" initial_peptide_wt_iters="2" nsp_distribution_iters="2" final_peptide_wt_iters="3">
6
+ <nsp_information neighboring_bin_smoothing="Y">
7
+ <nsp_distribution bin_no="0" nsp_lower_bound_incl="0.00" nsp_upper_bound_incl="0.00" pos_freq="0.057" neg_freq="0.625" pos_to_neg_ratio="0.09"/>
8
+ <nsp_distribution bin_no="1" nsp_lower_bound_excl="0.00" nsp_upper_bound_incl="0.31" pos_freq="0.037" neg_freq="0.152" pos_to_neg_ratio="0.24"/>
9
+ <nsp_distribution bin_no="2" nsp_lower_bound_excl="0.31" nsp_upper_bound_incl="1.00" pos_freq="0.077" neg_freq="0.032" pos_to_neg_ratio="2.42"/>
10
+ <nsp_distribution bin_no="3" nsp_lower_bound_excl="1.00" nsp_upper_bound_incl="2.50" pos_freq="0.113" neg_freq="0.033" pos_to_neg_ratio="3.39"/>
11
+ <nsp_distribution bin_no="4" nsp_lower_bound_excl="2.50" nsp_upper_bound_incl="4.63" pos_freq="0.123" neg_freq="0.032" pos_to_neg_ratio="3.91"/>
12
+ <nsp_distribution bin_no="5" nsp_lower_bound_excl="4.63" nsp_upper_bound_incl="7.90" pos_freq="0.143" neg_freq="0.032" pos_to_neg_ratio="4.50"/>
13
+ <nsp_distribution bin_no="6" nsp_lower_bound_excl="7.90" nsp_upper_bound_incl="14.92" pos_freq="0.196" neg_freq="0.041" pos_to_neg_ratio="4.78"/>
14
+ <nsp_distribution bin_no="7" nsp_lower_bound_excl="14.92" nsp_upper_bound_excl="inf" pos_freq="0.254" neg_freq="0.054" pos_to_neg_ratio="4.72" alt_pos_to_neg_ratio="4.78"/>
15
+ </nsp_information>
16
+ <ni_information>
17
+ </ni_information>
18
+ <protein_summary_data_filter min_probability="0.00" sensitivity="1.000" false_positive_error_rate="0.835" predicted_num_correct="1787" predicted_num_incorrect="9044"/>
19
+ <protein_summary_data_filter min_probability="0.10" sensitivity="1.000" false_positive_error_rate="0.235" predicted_num_correct="1787" predicted_num_incorrect="548"/>
20
+ <protein_summary_data_filter min_probability="0.20" sensitivity="1.000" false_positive_error_rate="0.235" predicted_num_correct="1787" predicted_num_incorrect="548"/>
21
+ <protein_summary_data_filter min_probability="0.30" sensitivity="0.956" false_positive_error_rate="0.151" predicted_num_correct="1709" predicted_num_incorrect="305"/>
22
+ <protein_summary_data_filter min_probability="0.40" sensitivity="0.916" false_positive_error_rate="0.095" predicted_num_correct="1638" predicted_num_incorrect="171"/>
23
+ <protein_summary_data_filter min_probability="0.50" sensitivity="0.887" false_positive_error_rate="0.063" predicted_num_correct="1585" predicted_num_incorrect="106"/>
24
+ <protein_summary_data_filter min_probability="0.60" sensitivity="0.853" false_positive_error_rate="0.036" predicted_num_correct="1525" predicted_num_incorrect="58"/>
25
+ <protein_summary_data_filter min_probability="0.70" sensitivity="0.826" false_positive_error_rate="0.020" predicted_num_correct="1477" predicted_num_incorrect="31"/>
26
+ <protein_summary_data_filter min_probability="0.80" sensitivity="0.805" false_positive_error_rate="0.012" predicted_num_correct="1438" predicted_num_incorrect="18"/>
27
+ <protein_summary_data_filter min_probability="0.90" sensitivity="0.773" false_positive_error_rate="0.006" predicted_num_correct="1381" predicted_num_incorrect="8"/>
28
+ <protein_summary_data_filter min_probability="0.95" sensitivity="0.749" false_positive_error_rate="0.004" predicted_num_correct="1339" predicted_num_incorrect="5"/>
29
+ <protein_summary_data_filter min_probability="0.96" sensitivity="0.738" false_positive_error_rate="0.003" predicted_num_correct="1318" predicted_num_incorrect="4"/>
30
+ <protein_summary_data_filter min_probability="0.97" sensitivity="0.728" false_positive_error_rate="0.002" predicted_num_correct="1302" predicted_num_incorrect="3"/>
31
+ <protein_summary_data_filter min_probability="0.98" sensitivity="0.711" false_positive_error_rate="0.002" predicted_num_correct="1272" predicted_num_incorrect="2"/>
32
+ <protein_summary_data_filter min_probability="0.99" sensitivity="0.609" false_positive_error_rate="0.000" predicted_num_correct="1088" predicted_num_incorrect="0"/>
33
+ <protein_summary_data_filter min_probability="1.00" sensitivity="0.164" false_positive_error_rate="0.000" predicted_num_correct="294" predicted_num_incorrect="0"/>
34
+ </proteinprophet_details>
35
+ </program_details>
36
+ </protein_summary_header>
37
+ <dataset_derivation generation_no="0">
38
+ </dataset_derivation>
39
+ </protein_summary>
@@ -0,0 +1,140 @@
1
+ require 'libxml'
2
+
3
+ include LibXML
4
+
5
+ class MzIdentMLDoc < Object
6
+
7
+ MZID_NS_PREFIX="mzidentml"
8
+ MZID_NS='http://psidev.info/psi/pi/mzIdentML/1.1'
9
+
10
+ def initialize(path)
11
+ parser=XML::Parser.file(path)
12
+ @document=parser.parse
13
+ end
14
+
15
+
16
+ def spectrum_queries
17
+ @document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationResult","#{MZID_NS_PREFIX}:#{MZID_NS}")
18
+ end
19
+
20
+ def peptide_evidence
21
+ @document.find("//#{MZID_NS_PREFIX}:PeptideEvidence","#{MZID_NS_PREFIX}:#{MZID_NS}")
22
+ end
23
+
24
+ def psms
25
+ @document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationItem","#{MZID_NS_PREFIX}:#{MZID_NS}")
26
+ end
27
+
28
+ def protein_groups
29
+ @document.find("//#{MZID_NS_PREFIX}:ProteinAmbiguityGroup","#{MZID_NS_PREFIX}:#{MZID_NS}")
30
+ end
31
+
32
+
33
+ def proteins
34
+ @document.find("//#{MZID_NS_PREFIX}:ProteinDetectionHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}")
35
+ end
36
+
37
+ # Peptides are referenced in many ways in mzidentml.
38
+ # We define a "Peptide" as a peptide supporting a particular protein
39
+ # Such peptides may encompass several PSM's
40
+ #
41
+ def peptides
42
+ @document.find("//#{MZID_NS_PREFIX}:PeptideHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}")
43
+ end
44
+
45
+
46
+
47
+ # -----------------------------------------------------------
48
+ #
49
+ # Class Level Utility methods for searching from a given node
50
+ #
51
+ # -----------------------------------------------------------
52
+
53
+ def self.find(node,expression,root=false)
54
+ pp = root ? "//" : "./"
55
+ node.find("#{pp}#{MZID_NS_PREFIX}:#{expression}","#{MZID_NS_PREFIX}:#{MZID_NS}")
56
+ end
57
+
58
+
59
+ def self.get_cvParam(mzidnode,accession)
60
+ self.find(mzidnode,"cvParam[@accession=\'#{accession}\']")[0]
61
+ end
62
+
63
+ def self.get_dbsequence(mzidnode,accession)
64
+ self.find(mzidnode,"DBSequence[@accession=\'#{accession}\']",true)[0]
65
+ end
66
+
67
+ # As per PeptideShaker. Assume group probability used for protein if it is group rep otherwise 0
68
+ def self.get_protein_probability(protein_node)
69
+
70
+ #MS:1002403
71
+ is_group_representative=(self.get_cvParam(protein_node,"MS:1002403")!=nil)
72
+ if is_group_representative
73
+ return self.get_cvParam(protein_node.parent,"MS:1002470").attributes['value'].to_f*0.01
74
+ else
75
+ return 0
76
+ end
77
+ end
78
+
79
+ def self.get_proteins_for_group(group_node)
80
+ self.find(group_node,"ProteinDetectionHypothesis")
81
+ end
82
+
83
+ # def self.get_sister_proteins(protein_node)
84
+ # self.find(protein_node.parent,"ProteinDetectionHypothesis")
85
+ # end
86
+
87
+ def self.get_peptides_for_protein(protein_node)
88
+ self.find(protein_node,"PeptideHypothesis")
89
+ end
90
+
91
+ # <PeptideHypothesis peptideEvidence_ref="PepEv_1">
92
+ # <SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
93
+ # </PeptideHypothesis>
94
+ def self.get_best_psm_for_peptide(peptide_node)
95
+
96
+ best_score=-1
97
+ best_psm=nil
98
+ self.find(peptide_node,"SpectrumIdentificationItemRef").each do |id_ref_node|
99
+ id_ref = id_ref_node.attributes['spectrumIdentificationItem_ref']
100
+ psm_node = self.find(peptide_node,"SpectrumIdentificationItem[@id=\'#{id_ref}\']",true)[0]
101
+ score = self.get_cvParam(psm_node,"MS:1002466")['value'].to_f
102
+ if score>best_score
103
+ best_psm=psm_node
104
+ best_score=score
105
+ end
106
+ end
107
+ best_psm
108
+ end
109
+
110
+ def self.get_sequence_for_peptide(peptide_node)
111
+ evidence_ref = peptide_node.attributes['peptideEvidence_ref']
112
+ pep_ref = peptide_node.find("//#{MZID_NS_PREFIX}:PeptideEvidence[@id=\'#{evidence_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].attributes['peptide_ref']
113
+ peptide=peptide_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0]
114
+ # require 'byebug';byebug
115
+ peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content
116
+ end
117
+
118
+ def self.get_sequence_for_psm(psm_node)
119
+ pep_ref = psm_node.attributes['peptide_ref']
120
+ peptide=psm_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0]
121
+ peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content
122
+ end
123
+
124
+ def self.get_peptide_evidence_from_psm(psm_node)
125
+ pe_nodes = []
126
+ self.find(psm_node,"PeptideEvidenceRef").each do |pe_node|
127
+ ev_id=pe_node.attributes['peptideEvidence_ref']
128
+ pe_nodes << self.find(pe_node,"PeptideEvidence[@id=\'#{ev_id}\']",true)[0]
129
+ end
130
+ pe_nodes
131
+ end
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+ end
@@ -14,6 +14,15 @@ class MzMLParser < Object
14
14
  @file_reader=XML::Reader.document(doc)
15
15
  end
16
16
 
17
+ def next_runid()
18
+ until @file_reader.name=="run"
19
+ if !@file_reader.read()
20
+ return nil
21
+ end
22
+ end
23
+ return @file_reader.get_attribute('id')
24
+ end
25
+
17
26
  def next_spectrum()
18
27
 
19
28
  until @file_reader.name=="spectrum"
data/lib/protk/peptide.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  require 'libxml'
2
2
  require 'bio'
3
3
  require 'protk/bio_gff3_extensions'
4
+ require 'protk/mzidentml_doc'
4
5
  require 'protk/error'
5
6
 
6
7
  include LibXML
@@ -10,22 +11,55 @@ end
10
11
 
11
12
  class Peptide
12
13
 
14
+ # Stripped sequence (no modifications)
13
15
  attr_accessor :sequence
14
16
  attr_accessor :protein_name
15
17
  attr_accessor :charge
16
- attr_accessor :nsp_adjusted_probability
17
-
18
-
18
+ attr_accessor :probability
19
+ attr_accessor :theoretical_neutral_mass
20
+
21
+ def as_protxml
22
+ node = XML::Node.new('peptide')
23
+ node['peptide_sequence']=self.sequence.to_s
24
+ node['charge']=self.charge.to_s
25
+ node['nsp_adjusted_probability']=self.probability.to_s
26
+ node['calc_neutral_pep_mass']=self.theoretical_neutral_mass.to_s
27
+ node
28
+ end
19
29
 
20
30
  class << self
21
31
  def from_protxml(xmlnode)
22
32
  pep=new()
23
33
  pep.sequence=xmlnode['peptide_sequence']
24
- pep.nsp_adjusted_probability=xmlnode['nsp_adjusted_probability'].to_f
34
+ pep.probability=xmlnode['nsp_adjusted_probability'].to_f
25
35
  pep.charge=xmlnode['charge'].to_i
26
36
  pep
27
37
  end
28
38
 
39
+ # <ProteinDetectionHypothesis id="PAG_0_1" dBSequence_ref="JEMP01000193.1_rev_g3500.t1 280755" passThreshold="false">
40
+ # <PeptideHypothesis peptideEvidence_ref="PepEv_1">
41
+ # <SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
42
+ # </PeptideHypothesis>
43
+ # <cvParam cvRef="PSI-MS" accession="MS:1002403" name="group representative"/>
44
+ # <cvParam cvRef="PSI-MS" accession="MS:1002401" name="leading protein"/>
45
+ # <cvParam cvRef="PSI-MS" accession="MS:1001093" name="sequence coverage" value="0.0"/>
46
+ # </ProteinDetectionHypothesis>
47
+
48
+ def from_mzid(xmlnode)
49
+ pep=new()
50
+ pep.sequence=MzIdentMLDoc.get_sequence_for_peptide(xmlnode)
51
+ best_psm = MzIdentMLDoc.get_best_psm_for_peptide(xmlnode)
52
+ # require 'byebug';byebug
53
+ pep.probability = MzIdentMLDoc.get_cvParam(best_psm,"MS:1002466")['value'].to_f
54
+ pep.theoretical_neutral_mass = MzIdentMLDoc.get_cvParam(best_psm,"MS:1001117")['value'].to_f
55
+ pep.charge = best_psm.attributes['chargeState'].to_i
56
+ pep.protein_name = MzIdentMLDoc.get_dbsequence(xmlnode.parent,xmlnode.parent.attributes['dBSequence_ref']).attributes['accession']
57
+
58
+ # pep.charge = MzIdentMLDoc.get_charge_for_psm(best_psm)
59
+
60
+ pep
61
+ end
62
+
29
63
  def from_sequence(seq,charge=nil)
30
64
  pep=new()
31
65
  pep.sequence=seq
@@ -146,7 +180,7 @@ class Peptide
146
180
  cds_id = parent_record.id
147
181
  this_id = "#{cds_id}.#{self.sequence}"
148
182
  this_id << ".#{self.charge}" unless self.charge.nil?
149
- score = self.nsp_adjusted_probability.nil? ? "." : self.nsp_adjusted_probability.to_s
183
+ score = self.probability.nil? ? "." : self.probability.to_s
150
184
  gff_string = "#{parent_record.seqid}\tMSMS\tpolypeptide\t#{start_i}\t#{end_i}\t#{score}\t#{parent_record.strand}\t0\tID=#{this_id};Parent=#{cds_id}"
151
185
  Bio::GFF::GFF3::Record.new(gff_string)
152
186
  end
@@ -0,0 +1,24 @@
1
+ include LibXML
2
+
3
+ class PepXMLWriter < Object
4
+
5
+ PEPXML_NS_PREFIX="pepxml"
6
+ PEPXML_NS="http://regis-web.systemsbiology.net/pepXML"
7
+
8
+ attr :template_doc
9
+
10
+ def initialize
11
+ template_path="#{File.dirname(__FILE__)}/data/template_pep.xml"
12
+ template_parser=XML::Parser.file(template_path)
13
+ @template_doc=template_parser.parse
14
+ end
15
+
16
+ def append_spectrum_query(query_node)
17
+ @template_doc.root << query_node
18
+ end
19
+
20
+ def save(file_path)
21
+ @template_doc.save(file_path,:indent=>true,:encoding => XML::Encoding::UTF_8)
22
+ end
23
+
24
+ end