RubyGems - protk - Versions diffs - 1.4.1 → 1.4.2 - Mend

protk 1.4.1 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/README.md +32 -15
data/bin/mzid_to_pepxml.rb +75 -0
data/bin/mzid_to_protxml.rb +77 -0
data/bin/protxml_to_gff.rb +1 -1
data/bin/sixframe.rb +24 -5
data/bin/spectrast_create.rb +125 -0
data/bin/spectrast_filter.rb +108 -0
data/lib/protk/command_runner.rb +1 -1
data/lib/protk/data/template_pep.xml +34 -0
data/lib/protk/data/template_prot.xml +39 -0
data/lib/protk/mzidentml_doc.rb +140 -0
data/lib/protk/mzml_parser.rb +9 -0
data/lib/protk/peptide.rb +39 -5
data/lib/protk/pepxml_writer.rb +24 -0
data/lib/protk/physical_constants.rb +1 -0
data/lib/protk/protein.rb +64 -1
data/lib/protk/protein_group.rb +70 -0
data/lib/protk/protxml_writer.rb +27 -0
data/lib/protk/psm.rb +222 -0
data/lib/protk/search_tool.rb +1 -6
data/lib/protk/sniffer.rb +35 -0
data/lib/protk/spectrum_query.rb +132 -0
metadata +20 -2

data/bin/spectrast_filter.rb ADDED Viewed

@@ -0,0 +1,108 @@
+#!/usr/bin/env ruby
+#
+# This file is part of protk
+# Created by Ira Cooke 30/4/2015
+#
+# A wrapper for SpectraST commands that manipulate splib files
+#
+#
+require 'protk/constants'
+require 'protk/command_runner'
+require 'protk/tool'
+require 'protk/galaxy_util'
+for_galaxy = GalaxyUtil.for_galaxy?
+genv=Constants.instance
+# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
+#
+spectrast_tool=Tool.new([:explicit_output])
+spectrast_tool.option_parser.banner = "Manipulate splib files.\n\nUsage: spectrast_filter.rb [options] file1.splib file1.splib ..."
+spectrast_tool.add_boolean_option(:binary_output,false,['-B','--binary-output','Produce spectral libraries in binary format rather than ASCII'])
+spectrast_tool.add_value_option(:filter_predicate,nil,['--predicate pred','Keep only spectra satifying predicate pred. Should be a C-style predicate'])
+spectrast_tool.add_value_option(:merge_operation,"U",['--merge method',
+								'How to combine multiple splib files (if provided). Options are U,S,H
+				     U: Union. Include all the peptide ions in all the files.
+				     S: Subtraction. Only include peptide ions in the first file
+				     	that are not present in any of the other files.
+				     H: Subtraction of homologs. Only include peptide ions in the
+				     	first file that do not have any homologs with
+				     	same charge and similar m/z in any of the other files.
+				     A: Appending. Each peptide ion is added from only one library:
+				     	the first file in the argument list that contains that peptide ion.
+				     	Useful for keeping existing consensus spectra unchanged while adding
+				     	only previously unseen peptide ions.'])
+spectrast_tool.add_value_option(:spectrum_operation,"None",['--replicates method',
+								'How to derive a single spectrum from replicates. Options are None, C,B
+				     C: Consensus. Create the consensus spectrum of all replicate spectra of each peptide ion.
+				     B: Best replicate. Pick the best replicate of each peptide ion.'])
+exit unless spectrast_tool.check_options(true)
+spectrast_bin = %x[which spectrast].chomp
+        # LIBRARY MANIPULATION OPTIONS (Applicable with .splib files)
+        #  -cf<pred>    Filter library. Keep only those entries satisfying the predicate <pred>.
+        #                    <pred> should be a C-style predicate in quotes.
+        #  -cJU         Union. Include all the peptide ions in all the files.
+        #  -cJI         Intersection. Only include peptide ions that are present in all the files.
+        #  -cJS         Subtraction. Only include peptide ions in the first file that are not present in any of the other files.
+        #  -cJH         Subtraction of homologs. Only include peptide ions in the first file
+        #                    that do not have any homologs with same charge and similar m/z in any of the other files.
+        #  -cJA         Appending. Each peptide ion is added from only one library: the first file in the argument list that contains that peptide ion.
+        #                    Useful for keeping existing consensus spectra unchanged while adding only previously unseen peptide ions.
+        #  -cAB         Best replicate. Pick the best replicate of each peptide ion.
+        #  -cAC         Consensus. Create the consensus spectrum of all replicate spectra of each peptide ion.
+        #  -cAQ         Quality filter. Apply quality filters to library.
+        #                    IMPORTANT: Quality filter can only be applied on a SINGLE .splib file with no peptide ion represented by more than one spectrum.
+        #  -cAD         Create artificial decoy spectra.
+        #  -cAN         Sort library entries by descending number of replicates used (tie-breaking by probability).
+        #  -cAM         Create semi-empirical spectra based on allowable modifications specified by -cx option.
+        #  -cQ<num>     Produce reduced spectra of at most <num> peaks. Inactive with -cAQ and -cAD.
+        #  -cD<file>    Refresh protein mappings of each library entry against the protein database <file> (Must be in .fasta format).
+        #  -cu          Delete entries whose peptide sequences do not map to any protein during refreshing with -cD option.
+        #                    When off, unmapped entries will be marked with Protein=0/UNMAPPED but retained in library. (Turn off with -cu!).
+        #  -cd          Delete entries whose peptide sequences map to multiple proteins during refreshing with -cD option. (Turn off with -cd!).
+input_stagers=[]
+inputs=ARGV.collect { |file_name| file_name.chomp}
+if for_galaxy
+  input_stagers = inputs.collect {|ip| GalaxyStager.new(ip,{:extension=>".splib"}) }
+  inputs=input_stagers.collect { |sg| sg.staged_path }
+end
+cmd="#{spectrast_bin} "
+unless spectrast_tool.binary_output
+	cmd << " -c_BIN!"
+end
+if spectrast_tool.filter_predicate
+	cmd << "  -cf'#{spectrast_tool.filter_predicate}'"
+end
+if inputs.length > 1
+	cmd << " -cJ#{spectrast_tool.merge_operation}"
+end
+if spectrast_tool.spectrum_operation!="None"
+	cmd << " -cA#{spectrast_tool.spectrum_operation}"
+end
+if spectrast_tool.explicit_output==nil
+    output_file_name=Tool.default_output_path(inputs,"","","")
+else
+    output_file_name=spectrast_tool.explicit_output
+end
+cmd << " -cN#{output_file_name}"
+inputs.each { |ip| cmd << " #{ip}" }
+# code = spectrast_tool.run(cmd,genv)
+# throw "Command failed with exit code #{code}" unless code==0
+%x[#{cmd}]

data/lib/protk/command_runner.rb CHANGED Viewed

@@ -30,7 +30,7 @@ class CommandRunner
   def run_local(command_string)
     @env.log("Command: #{command_string} started",:info)
     status = Open4::popen4("#{command_string} ") do |pid, stdin, stdout, stderr|
-      puts "PID #{pid}"
+      @env.log "PID #{pid}" , :info
       stdout.each { |line| @env.log(line.chomp,:info) }

data/lib/protk/data/template_pep.xml ADDED Viewed

@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<?xml-stylesheet type="text/xsl" href="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem_pproph.pep.xsl"?>
+<msms_pipeline_analysis date="2014-06-22T15:28:36" summary_xml="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem_pproph.pep.xml" xmlns="http://regis-web.systemsbiology.net/pepXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://regis-web.systemsbiology.net/pepXML /Users/icooke/bin/tpp/schema/pepXML_v117.xsd">
+<analysis_summary analysis="peptideprophet" time="2014-06-22T15:28:36">
+</analysis_summary>
+<analysis_summary analysis="database_refresh" time="2014-06-22T15:28:36"/>
+<analysis_summary analysis="interact" time="2014-06-22T15:28:36">
+<interact_summary filename="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem_pproph.pep.xml" directory="">
+<inputfile name="mr176-BSA100fmole_BA3_01_8167.d_tandem.pep.xml" directory="/Users/icooke/Sources/protk/spec/data"/>
+</interact_summary>
+</analysis_summary>
+<dataset_derivation generation_no="0"/>
+<msms_run_summary base_name="/Users/icooke/Sources/protk/spec/data/mr176-BSA100fmole_BA3_01_8167.d_tandem.tandem" search_engine="X! Tandem" raw_data_type="raw" raw_data=".?">
+<sample_enzyme name="trypsin">
+<specificity cut="KR" no_cut="P" sense="C"/>
+</sample_enzyme>
+<search_summary base_name="mr176-BSA100fmole_BA3_01_8167.d_tandem.tandem" search_engine="X! Tandem" precursor_mass_type="monoisotopic" fragment_mass_type="monoisotopic" search_id="1">
+<search_database local_path="/Users/icooke/Sources/protk/spec/data/AASequences.fasta" type="AA"/>
+<enzymatic_search_constraint enzyme="trypsin" max_num_internal_cleavages="2" min_number_termini="1"/>
+<aminoacid_modification aminoacid="E" massdiff="-18.0106" mass="111.0320" variable="Y" symbol="^"/>
+<!--X! Tandem n-terminal AA variable modification-->
+<aminoacid_modification aminoacid="M" massdiff="15.9949" mass="147.0354" variable="Y"/>
+<aminoacid_modification aminoacid="Q" massdiff="-17.0265" mass="111.0321" variable="Y" symbol="^"/>
+<!--X! Tandem n-terminal AA variable modification-->
+<terminal_modification terminus="n" massdiff="42.0106" mass="43.0184" protein_terminus="N" variable="Y" symbol="^"/>
+</search_summary>
+<analysis_timestamp analysis="peptideprophet" time="2014-06-22T15:28:36" id="1"/>
+<analysis_timestamp analysis="database_refresh" time="2014-06-22T15:28:36" id="1">
+<database_refresh_timestamp database="/Users/icooke/Sources/protk/spec/data/AASequences.fasta" min_num_enz_term="1"/>
+</analysis_timestamp>
+</msms_run_summary>
+</msms_pipeline_analysis>

data/lib/protk/data/template_prot.xml ADDED Viewed

@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<protein_summary xmlns="http://regis-web.systemsbiology.net/protXML" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/protXML/protXML_v6.xsd" summary_xml="">
+<protein_summary_header reference_database="FULLPATH_TO_REFERENCE_DB" residue_substitution_list="I -&gt; L" source_files="FULLPATH_TO_SOURCE_PEPXML" source_files_alt="FULLPATH_TO_SOURCE_PEPXML" min_peptide_probability="" min_peptide_weight="" num_predicted_correct_prots="" num_input_1_spectra="" num_input_2_spectra="" num_input_3_spectra="" num_input_4_spectra="" num_input_5_spectra="" initial_min_peptide_prob="" total_no_spectrum_ids="" sample_enzyme="trypsin">
+<program_details analysis="proteinprophet" time="2014-01-20T14:17:37" version=" Insilicos_LabKey_C++ (TPP v0.0 Development trunk rev 0, Build 201307090846 (linux))">
+<proteinprophet_details occam_flag="Y" groups_flag="Y" degen_flag="Y" nsp_flag="Y" initial_peptide_wt_iters="2" nsp_distribution_iters="2" final_peptide_wt_iters="3">
+      <nsp_information neighboring_bin_smoothing="Y">
+         <nsp_distribution bin_no="0" nsp_lower_bound_incl="0.00" nsp_upper_bound_incl="0.00" pos_freq="0.057" neg_freq="0.625" pos_to_neg_ratio="0.09"/>
+         <nsp_distribution bin_no="1" nsp_lower_bound_excl="0.00" nsp_upper_bound_incl="0.31" pos_freq="0.037" neg_freq="0.152" pos_to_neg_ratio="0.24"/>
+         <nsp_distribution bin_no="2" nsp_lower_bound_excl="0.31" nsp_upper_bound_incl="1.00" pos_freq="0.077" neg_freq="0.032" pos_to_neg_ratio="2.42"/>
+         <nsp_distribution bin_no="3" nsp_lower_bound_excl="1.00" nsp_upper_bound_incl="2.50" pos_freq="0.113" neg_freq="0.033" pos_to_neg_ratio="3.39"/>
+         <nsp_distribution bin_no="4" nsp_lower_bound_excl="2.50" nsp_upper_bound_incl="4.63" pos_freq="0.123" neg_freq="0.032" pos_to_neg_ratio="3.91"/>
+         <nsp_distribution bin_no="5" nsp_lower_bound_excl="4.63" nsp_upper_bound_incl="7.90" pos_freq="0.143" neg_freq="0.032" pos_to_neg_ratio="4.50"/>
+         <nsp_distribution bin_no="6" nsp_lower_bound_excl="7.90" nsp_upper_bound_incl="14.92" pos_freq="0.196" neg_freq="0.041" pos_to_neg_ratio="4.78"/>
+         <nsp_distribution bin_no="7" nsp_lower_bound_excl="14.92" nsp_upper_bound_excl="inf" pos_freq="0.254" neg_freq="0.054" pos_to_neg_ratio="4.72" alt_pos_to_neg_ratio="4.78"/>
+      </nsp_information>
+      <ni_information>
+      </ni_information>
+      <protein_summary_data_filter min_probability="0.00" sensitivity="1.000" false_positive_error_rate="0.835" predicted_num_correct="1787" predicted_num_incorrect="9044"/>
+      <protein_summary_data_filter min_probability="0.10" sensitivity="1.000" false_positive_error_rate="0.235" predicted_num_correct="1787" predicted_num_incorrect="548"/>
+      <protein_summary_data_filter min_probability="0.20" sensitivity="1.000" false_positive_error_rate="0.235" predicted_num_correct="1787" predicted_num_incorrect="548"/>
+      <protein_summary_data_filter min_probability="0.30" sensitivity="0.956" false_positive_error_rate="0.151" predicted_num_correct="1709" predicted_num_incorrect="305"/>
+      <protein_summary_data_filter min_probability="0.40" sensitivity="0.916" false_positive_error_rate="0.095" predicted_num_correct="1638" predicted_num_incorrect="171"/>
+      <protein_summary_data_filter min_probability="0.50" sensitivity="0.887" false_positive_error_rate="0.063" predicted_num_correct="1585" predicted_num_incorrect="106"/>
+      <protein_summary_data_filter min_probability="0.60" sensitivity="0.853" false_positive_error_rate="0.036" predicted_num_correct="1525" predicted_num_incorrect="58"/>
+      <protein_summary_data_filter min_probability="0.70" sensitivity="0.826" false_positive_error_rate="0.020" predicted_num_correct="1477" predicted_num_incorrect="31"/>
+      <protein_summary_data_filter min_probability="0.80" sensitivity="0.805" false_positive_error_rate="0.012" predicted_num_correct="1438" predicted_num_incorrect="18"/>
+      <protein_summary_data_filter min_probability="0.90" sensitivity="0.773" false_positive_error_rate="0.006" predicted_num_correct="1381" predicted_num_incorrect="8"/>
+      <protein_summary_data_filter min_probability="0.95" sensitivity="0.749" false_positive_error_rate="0.004" predicted_num_correct="1339" predicted_num_incorrect="5"/>
+      <protein_summary_data_filter min_probability="0.96" sensitivity="0.738" false_positive_error_rate="0.003" predicted_num_correct="1318" predicted_num_incorrect="4"/>
+      <protein_summary_data_filter min_probability="0.97" sensitivity="0.728" false_positive_error_rate="0.002" predicted_num_correct="1302" predicted_num_incorrect="3"/>
+      <protein_summary_data_filter min_probability="0.98" sensitivity="0.711" false_positive_error_rate="0.002" predicted_num_correct="1272" predicted_num_incorrect="2"/>
+      <protein_summary_data_filter min_probability="0.99" sensitivity="0.609" false_positive_error_rate="0.000" predicted_num_correct="1088" predicted_num_incorrect="0"/>
+      <protein_summary_data_filter min_probability="1.00" sensitivity="0.164" false_positive_error_rate="0.000" predicted_num_correct="294" predicted_num_incorrect="0"/>
+</proteinprophet_details>
+</program_details>
+</protein_summary_header>
+<dataset_derivation generation_no="0">
+</dataset_derivation>
+</protein_summary>

data/lib/protk/mzidentml_doc.rb ADDED Viewed

@@ -0,0 +1,140 @@
+require 'libxml'
+include LibXML
+class MzIdentMLDoc < Object
+	MZID_NS_PREFIX="mzidentml"
+	MZID_NS='http://psidev.info/psi/pi/mzIdentML/1.1'
+	def initialize(path)
+		parser=XML::Parser.file(path)
+		@document=parser.parse
+	end
+	def spectrum_queries
+		@document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationResult","#{MZID_NS_PREFIX}:#{MZID_NS}")
+	end
+	def peptide_evidence
+		@document.find("//#{MZID_NS_PREFIX}:PeptideEvidence","#{MZID_NS_PREFIX}:#{MZID_NS}")
+	end
+	def psms
+		@document.find("//#{MZID_NS_PREFIX}:SpectrumIdentificationItem","#{MZID_NS_PREFIX}:#{MZID_NS}")
+	end
+	def protein_groups
+		@document.find("//#{MZID_NS_PREFIX}:ProteinAmbiguityGroup","#{MZID_NS_PREFIX}:#{MZID_NS}")
+	end
+	def proteins
+		@document.find("//#{MZID_NS_PREFIX}:ProteinDetectionHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}")
+	end
+	# Peptides are referenced in many ways in mzidentml.
+	# We define a "Peptide" as a peptide supporting a particular protein
+	# Such peptides may encompass several PSM's
+	#
+	def peptides
+		@document.find("//#{MZID_NS_PREFIX}:PeptideHypothesis","#{MZID_NS_PREFIX}:#{MZID_NS}")
+	end
+	# -----------------------------------------------------------
+	#
+	# Class Level Utility methods for searching from a given node
+	#
+	# -----------------------------------------------------------
+	def self.find(node,expression,root=false)
+		pp = root ? "//" : "./"
+		node.find("#{pp}#{MZID_NS_PREFIX}:#{expression}","#{MZID_NS_PREFIX}:#{MZID_NS}")
+	end
+	def self.get_cvParam(mzidnode,accession)
+		self.find(mzidnode,"cvParam[@accession=\'#{accession}\']")[0]
+	end
+	def self.get_dbsequence(mzidnode,accession)
+		self.find(mzidnode,"DBSequence[@accession=\'#{accession}\']",true)[0]
+	end
+	# As per PeptideShaker. Assume group probability used for protein if it is group rep otherwise 0
+	def self.get_protein_probability(protein_node)
+		#MS:1002403
+		is_group_representative=(self.get_cvParam(protein_node,"MS:1002403")!=nil)
+		if is_group_representative
+			return 	self.get_cvParam(protein_node.parent,"MS:1002470").attributes['value'].to_f*0.01
+		else
+			return 0
+		end
+	end
+	def self.get_proteins_for_group(group_node)
+		self.find(group_node,"ProteinDetectionHypothesis")
+	end
+	# def self.get_sister_proteins(protein_node)
+	# 	self.find(protein_node.parent,"ProteinDetectionHypothesis")
+	# end
+	def self.get_peptides_for_protein(protein_node)
+		self.find(protein_node,"PeptideHypothesis")
+	end
+	# <PeptideHypothesis peptideEvidence_ref="PepEv_1">
+	# 	<SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
+	# </PeptideHypothesis>
+	def self.get_best_psm_for_peptide(peptide_node)
+		best_score=-1
+		best_psm=nil
+		self.find(peptide_node,"SpectrumIdentificationItemRef").each do |id_ref_node|
+			id_ref = id_ref_node.attributes['spectrumIdentificationItem_ref']
+			psm_node = self.find(peptide_node,"SpectrumIdentificationItem[@id=\'#{id_ref}\']",true)[0]
+			score = self.get_cvParam(psm_node,"MS:1002466")['value'].to_f
+			if score>best_score
+				best_psm=psm_node
+				best_score=score
+			end
+		end
+		best_psm
+	end
+	def self.get_sequence_for_peptide(peptide_node)
+		evidence_ref = peptide_node.attributes['peptideEvidence_ref']
+		pep_ref = peptide_node.find("//#{MZID_NS_PREFIX}:PeptideEvidence[@id=\'#{evidence_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].attributes['peptide_ref']
+		peptide=peptide_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0]
+		# require 'byebug';byebug
+		peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content
+	end
+	def self.get_sequence_for_psm(psm_node)
+		pep_ref = psm_node.attributes['peptide_ref']
+		peptide=psm_node.find("//#{MZID_NS_PREFIX}:Peptide[@id=\'#{pep_ref}\']","#{MZID_NS_PREFIX}:#{MZID_NS}")[0]
+		peptide.find("./#{MZID_NS_PREFIX}:PeptideSequence","#{MZID_NS_PREFIX}:#{MZID_NS}")[0].content
+	end
+	def self.get_peptide_evidence_from_psm(psm_node)
+		pe_nodes = []
+		self.find(psm_node,"PeptideEvidenceRef").each do |pe_node|
+			ev_id=pe_node.attributes['peptideEvidence_ref']
+			pe_nodes << self.find(pe_node,"PeptideEvidence[@id=\'#{ev_id}\']",true)[0]
+		end
+		pe_nodes
+	end
+end

data/lib/protk/mzml_parser.rb CHANGED Viewed

@@ -14,6 +14,15 @@ class MzMLParser < Object
 		@file_reader=XML::Reader.document(doc)
 	end
+	def next_runid()
+		until @file_reader.name=="run"
+			if !@file_reader.read()
+				return nil
+			end
+		end
+		return @file_reader.get_attribute('id')
+	end
 	def next_spectrum()
 		until @file_reader.name=="spectrum"

data/lib/protk/peptide.rb CHANGED Viewed

@@ -1,6 +1,7 @@
 require 'libxml'
 require 'bio'
 require 'protk/bio_gff3_extensions'
+require 'protk/mzidentml_doc'
 require 'protk/error'
 include LibXML
@@ -10,22 +11,55 @@ end
 class Peptide
+	# Stripped sequence (no modifications)
 	attr_accessor :sequence
 	attr_accessor :protein_name
 	attr_accessor :charge
-	attr_accessor :nsp_adjusted_probability
+	attr_accessor :probability
+	attr_accessor :theoretical_neutral_mass
+	def as_protxml
+		node = XML::Node.new('peptide')
+		node['peptide_sequence']=self.sequence.to_s
+		node['charge']=self.charge.to_s
+		node['nsp_adjusted_probability']=self.probability.to_s
+		node['calc_neutral_pep_mass']=self.theoretical_neutral_mass.to_s
+		node
+	end
 	class << self
 		def from_protxml(xmlnode)
 			pep=new()
 			pep.sequence=xmlnode['peptide_sequence']
-			pep.nsp_adjusted_probability=xmlnode['nsp_adjusted_probability'].to_f
+			pep.probability=xmlnode['nsp_adjusted_probability'].to_f
 			pep.charge=xmlnode['charge'].to_i
 			pep
 		end
+		# <ProteinDetectionHypothesis id="PAG_0_1" dBSequence_ref="JEMP01000193.1_rev_g3500.t1 280755" passThreshold="false">
+		# 	<PeptideHypothesis peptideEvidence_ref="PepEv_1">
+		# 		<SpectrumIdentificationItemRef spectrumIdentificationItem_ref="SII_1_1"/>
+		# 	</PeptideHypothesis>
+		# 	<cvParam cvRef="PSI-MS" accession="MS:1002403" name="group representative"/>
+		# 	<cvParam cvRef="PSI-MS" accession="MS:1002401" name="leading protein"/>
+		# 	<cvParam cvRef="PSI-MS" accession="MS:1001093" name="sequence coverage" value="0.0"/>
+		# </ProteinDetectionHypothesis>
+		def from_mzid(xmlnode)
+			pep=new()
+			pep.sequence=MzIdentMLDoc.get_sequence_for_peptide(xmlnode)
+			best_psm = MzIdentMLDoc.get_best_psm_for_peptide(xmlnode)
+			# require 'byebug';byebug
+			pep.probability = MzIdentMLDoc.get_cvParam(best_psm,"MS:1002466")['value'].to_f
+			pep.theoretical_neutral_mass = MzIdentMLDoc.get_cvParam(best_psm,"MS:1001117")['value'].to_f
+			pep.charge = best_psm.attributes['chargeState'].to_i
+			pep.protein_name = MzIdentMLDoc.get_dbsequence(xmlnode.parent,xmlnode.parent.attributes['dBSequence_ref']).attributes['accession']
+			# pep.charge = MzIdentMLDoc.get_charge_for_psm(best_psm)
+			pep
+		end
 		def from_sequence(seq,charge=nil)
 			pep=new()
 			pep.sequence=seq
@@ -146,7 +180,7 @@ class Peptide
 		cds_id = parent_record.id
 		this_id = "#{cds_id}.#{self.sequence}"
 		this_id << ".#{self.charge}" unless self.charge.nil?
-		score = self.nsp_adjusted_probability.nil? ? "." : self.nsp_adjusted_probability.to_s
+		score = self.probability.nil? ? "." : self.probability.to_s
 		gff_string = "#{parent_record.seqid}\tMSMS\tpolypeptide\t#{start_i}\t#{end_i}\t#{score}\t#{parent_record.strand}\t0\tID=#{this_id};Parent=#{cds_id}"
 		Bio::GFF::GFF3::Record.new(gff_string)
 	end

data/lib/protk/pepxml_writer.rb ADDED Viewed

@@ -0,0 +1,24 @@
+include LibXML
+class PepXMLWriter < Object
+	PEPXML_NS_PREFIX="pepxml"
+	PEPXML_NS="http://regis-web.systemsbiology.net/pepXML"
+	attr :template_doc
+	def initialize
+		template_path="#{File.dirname(__FILE__)}/data/template_pep.xml"
+		template_parser=XML::Parser.file(template_path)
+		@template_doc=template_parser.parse
+	end
+	def append_spectrum_query(query_node)
+		@template_doc.root << query_node
+	end
+	def save(file_path)
+		@template_doc.save(file_path,:indent=>true,:encoding => XML::Encoding::UTF_8)
+	end
+end