RubyGems - protk - Versions diffs - 1.1.0.pre → 1.1.0 - Mend

protk 1.1.0.pre → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

data/README.md +19 -17
data/bin/annotate_ids.rb +1 -1
data/bin/asapratio.rb +27 -0
data/bin/file_convert.rb +3 -3
data/bin/libra.rb +70 -0
data/bin/msgfplus_search.rb +41 -35
data/bin/omssa_search.rb +33 -1
data/bin/peptide_prophet.rb +17 -4
data/bin/pepxml_to_table.rb +17 -6
data/bin/protein_prophet.rb +1 -1
data/bin/tandem_search.rb +49 -5
data/bin/uniprot_mapper.rb +77 -0
data/bin/xpress.rb +27 -0
data/lib/protk/constants.rb +47 -1
data/lib/protk/convert_util.rb +27 -0
data/lib/protk/data/apt-get_packages.yaml +4 -1
data/lib/protk/data/default_config.yml +1 -0
data/lib/protk/data/make_uniprot_table.rb +29 -0
data/lib/protk/data/predefined_db.sphuman.yaml +1 -1
data/lib/protk/data/tandem_params.xml +17 -3
data/lib/protk/data/uniprot_accessions.loc +96 -0
data/lib/protk/data/uniprot_accessions_table.txt +97 -0
data/lib/protk/data/uniprot_input_accessions.loc +95 -0
data/lib/protk/data/yum_packages.yaml +65 -0
data/lib/protk/galaxy_stager.rb +18 -5
data/lib/protk/galaxy_util.rb +39 -2
data/lib/protk/manage_db_rakefile.rake +43 -30
data/lib/protk/pepxml.rb +22 -0
data/lib/protk/protxml.rb +5 -1
data/lib/protk/setup_rakefile.rake +55 -8
data/lib/protk/swissprot_database.rb +1 -1
data/lib/protk/uniprot_mapper.rb +47 -0
data/lib/protk.rb +1 -0
metadata +20 -4

data/README.md CHANGED Viewed

@@ -4,11 +4,7 @@
 ***
 ## What is it?
-Protk is a wrapper for various proteomics tools. Initially it focusses on MS/MS database search and validation.
-## Why do we need a wrapper around these tools
-The aim of protk is present a consistent interface to numerous proteomics tools that is as uniform as possible. Protk also provides built-in support for managing protein databases.
+Protk is a wrapper for various proteomics tools. It aims to present a consistent interface to a wide variety of tools and provides support for managing protein databases.
 ***
@@ -16,27 +12,33 @@ The aim of protk is present a consistent interface to numerous proteomics tools
 ## Basic Installation
-1. Install rvm
-curl -L https://get.rvm.io | bash -s stable
+Protk depends on ruby 1.9.  The recommended way to install ruby and manage ruby gems is with rvm. Install rvm using this command.
+    curl -L https://get.rvm.io | bash -s stable
+Next install ruby and protk's dependencies
 On OSX
-- rvm install 1.9.3 --with-gcc=clang
-- rvm use 1.9.3
-- gem install protk
-- protk_setup.rb all
+    rvm install 1.9.3 --with-gcc=clang
+    rvm use 1.9.3
+    gem install protk
+    protk_setup.rb all
 On Linux
-- rvm install 1.9.3
-- rvm use 1.9.3
-- gem install protk
-- sudo protk_setup.rb system_dependencies
-- protk_setup all
+    rvm install 1.9.3
+    rvm use 1.9.3
+    gem install protk
+    sudo protk_setup.rb system_dependencies
+    protk_setup all
 ## Sequence databases
-After running the setup.sh script you should run manage_db.rb to install specific sequence databases for use by the search engines. Protk comes with several predefined database configurations. For example, to install a database consisting of human entries from Swissprot plus known contaminants use the following command;
+After running the setup.sh script you should run manage_db.rb to install specific sequence databases for use by the search engines. Protk comes with several predefined database configurations. For example, to install a database consisting of human entries from Swissprot plus known contaminants use the following commands;
+    manage_db.rb add crap
     manage_db.rb add sphuman
 You should now be able to run database searches, specifying this database by using the -d sphuman flag.  Every month or so swissprot will release a new database version. You can keep your database up to date using;

data/bin/annotate_ids.rb CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env ruby
 #
-# This file is part of MSLIMS
+# This file is part of Protk
 # Created by Ira Cooke 21/7/2011
 #
 # Takes an input file with a list of identified proteins and creates a table with swissprot/uniprot database details in various columns for each protein in the input file.

data/bin/asapratio.rb ADDED Viewed

@@ -0,0 +1,27 @@
+#!/usr/bin/env ruby
+#
+# Created by John Chilton
+#
+# Run ASAPRatio against protein prophet results.
+#
+#
+require 'protk/constants'
+require 'protk/protxml'
+require 'protk/galaxy_util'
+for_galaxy = GalaxyUtil.for_galaxy?
+protxml_path = ARGV.shift
+if for_galaxy
+  protxml_path = GalaxyUtil.stage_protxml(protxml_path)
+end
+protxml = ProtXML.new(protxml_path)
+pepxml_path = protxml.find_pep_xml()
+genv=Constants.new
+command="#{genv.asapratiopeptideparser} '#{pepxml_path}' #{ARGV.join(" ")} ; #{genv.asapratioproteinparser} '#{protxml_path}'; #{genv.asaprationpvalueparser} '#{protxml_path}' "
+%x[#{command}]

data/bin/file_convert.rb CHANGED Viewed

@@ -126,14 +126,14 @@ basedir=Pathname.new(filename).dirname.to_s #Where we run the tool
 if ( convert_tool.maldi )
   #For MALDI we know the charge is 1 so set it explicitly. Sometimes it is missing from the data
-  runner.run_local("cd #{basedir}; #{genv.tpp_root}/msconvert #{input_relative_filename} --filter \"titleMaker <RunId>.<ScanNumber>.<ScanNumber>.1\" --#{convert_tool.output_format} -o #{output_dir}")
+  runner.run_local("cd #{basedir}; #{genv.msconvert} #{input_relative_filename} --filter \"titleMaker <RunId>.<ScanNumber>.<ScanNumber>.1\" --#{convert_tool.output_format} -o #{output_dir}")
 else
   if ( has_charge_information(filename) )
-    runner.run_local("cd #{basedir}; #{genv.tpp_root}/msconvert #{input_relative_filename} --filter \"titleMaker <RunId>.<ScanNumber>.<ScanNumber>.<ChargeState>\" --#{convert_tool.output_format} -o #{output_dir}")
+    runner.run_local("cd #{basedir}; #{genv.msconvert} #{input_relative_filename} --filter \"titleMaker <RunId>.<ScanNumber>.<ScanNumber>.<ChargeState>\" --#{convert_tool.output_format} -o #{output_dir}")
   else
     # If input file is missing charges the best we can do is just assign charge=1. Search engines can choose to ignore this value anyway.
     #
-    runner.run_local("cd #{basedir}; #{genv.tpp_root}/msconvert #{input_relative_filename} --filter \"titleMaker <RunId>.<ScanNumber>.<ScanNumber>.1\" --#{convert_tool.output_format} -o #{output_dir}")
+    runner.run_local("cd #{basedir}; #{genv.msconvert} #{input_relative_filename} --filter \"titleMaker <RunId>.<ScanNumber>.<ScanNumber>.1\" --#{convert_tool.output_format} -o #{output_dir}")
   end
 end

data/bin/libra.rb ADDED Viewed

@@ -0,0 +1,70 @@
+#!/usr/bin/env ruby
+#
+# Created by John Chilton
+#
+# Run libra quantification against protein prophet results.
+#
+#
+require 'protk/constants'
+require 'protk/protxml'
+require 'protk/galaxy_util'
+require 'optparse'
+for_galaxy = GalaxyUtil.for_galaxy?
+protxml_path = ARGV.shift
+if for_galaxy
+  protxml_path = GalaxyUtil.stage_protxml(protxml_path)
+end
+protxml = ProtXML.new(protxml_path)
+pepxml_path = protxml.find_pep_xml()
+genv=Constants.new
+option_parser=OptionParser.new()
+reagents = []
+mass_tolerance = "0.2"
+option_parser.on( '--mass-tolerance TOL',"Specifies the mass tolerance (window libra will search for the most intense m/z value in)." ) do |tol|
+  mass_tolerance = tol
+end
+option_parser.on( '--reagent MZ', "Specify a reagent (via m/z values).") do |reagent|
+  reagents << reagent
+end
+minimum_threshold_string = ""
+option_parser.on( '--minimum-threshold THRESH', "Minimum threshhold intensity (not required).") do |thresh|
+  minimum_threshold_string = "<minimumThreshhold value=\"#{thresh}\"/>"
+end
+option_parser.parse!
+reagent_strings = reagents.map do |reagent|
+  "<reagent mz=\"#{reagent}\" />"
+end
+reagents_string = reagent_strings.join(" ")
+isotopic_contributions = ""
+condition_contents = "<SUMmOnCondition description=\"libra_galaxy_run\">
+  <fragmentMasses>
+    #{reagents_string}
+  </fragmentMasses>
+  #{isotopic_contributions}
+  <massTolerance value=\"#{mass_tolerance}\"/>
+  <centroiding type=\"2\" iterations=\"1\"/>
+  <normalization type=\"4\"/>
+  <targetMs level=\"2\"/>
+  <output type=\"1\"/>
+  <quantitationFile name=\"quantitation.tsv\"/>
+  #{minimum_threshold_string}
+</SUMmOnCondition>"
+File.open("condition.xml", "w") { |f| f.write(condition_contents) }
+print condition_contents
+command="#{genv.librapeptideparser} '#{pepxml_path}' -ccondition.xml; #{genv.libraproteinratioparser} '#{protxml_path}' -c#{condition_file}"
+%x[#{command}]

data/bin/msgfplus_search.rb CHANGED Viewed

@@ -9,11 +9,15 @@ $VERBOSE=nil
 require 'protk/constants'
 require 'protk/command_runner'
 require 'protk/search_tool'
+require 'protk/galaxy_stager'
+require 'protk/galaxy_util'
+for_galaxy = GalaxyUtil.for_galaxy
+input_stager = nil
 # Setup specific command-line options for this tool. Other options are inherited from SearchTool
 #
-search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
+search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>false,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
 search_tool.option_parser.banner = "Run an MSGFPlus msms search on a set of msms spectrum input files.\n\nUsage: msgfplus_search.rb [options] file1.mzML file2.mzML ..."
 search_tool.options.output_suffix="_msgfplus"
@@ -92,16 +96,26 @@ ARGV.each do |filename|
   if ( search_tool.explicit_output!=nil)
     output_path=search_tool.explicit_output
   else
-    output_path="#{search_tool.output_base_path(filename.chomp)}.mzid"
+    output_path="#{search_tool.output_base_path(filename.chomp)}.pepXML"
   end
   # (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
   # Get the input file extension
   ext = Pathname.new(filename).extname
+  input_path="#{search_tool.input_base_path(filename.chomp)}#{ext}"
+  mzid_output_path="#{search_tool.input_base_path(filename.chomp)}.mzid"
+  if for_galaxy
+    original_input_file = input_path
+    original_input_path = Pathname.new("#{original_input_file}")
+    input_stager = GalaxyStager.new("#{original_input_file}", :extension => '.mzML')
+    input_path = input_stager.staged_path
+  end
-  input_path="#{search_tool.input_base_path(filename.chomp)}#{ext}"
   # Only proceed if the output file is not present or we have opted to over-write it
   #
@@ -109,11 +123,10 @@ ARGV.each do |filename|
     # The basic command
     #
-    cmd= "java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{current_db} -s #{input_path} -o #{output_path} "
+    cmd= "java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{current_db} -s #{input_path} -o #{mzid_output_path} "
     #Missed cleavages
     #
-    throw "Maximum value for missed cleavages is 2" if ( search_tool.missed_cleavages > 2)
+    throw "Maximum value for missed cleavages is 2" if ( search_tool.missed_cleavages.to_i > 2)
     cmd << " -ntt #{search_tool.missed_cleavages}"
     # Precursor tolerance
@@ -121,8 +134,7 @@ ARGV.each do |filename|
     cmd << " -t #{search_tool.precursor_tol}#{search_tool.precursor_tolu}"
     # Instrument type
-    #
-    cmd << " -inst 2"
+    cmd << " -inst #{search_tool.instrument}"
 #    cmd << " -m 4"
@@ -134,55 +146,49 @@ ARGV.each do |filename|
   #      cmd << " -e #{search_tool.enzyme}"
   #    end
-  mods_path="#{search_tool.input_base_path(filename.chomp)}.msgfplus_mods.txt"
-  mods_file=File.open(mods_path,'w+')
+  mods_file_content = ""
     # Variable Modifications
     #
     if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
-      var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
+      var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join("\n")
       if ( var_mods !="" )
-        cmd << " -mv #{var_mods}"
-      end
-    else
-      # Add options related to peptide modifications
-      #
-      if ( search_tool.glyco )
-        cmd << " -mv 119 "
+        mods_file_content << "#{var_mods}\n"
       end
     end
   # Fixed modifications
   #
     if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
-      fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
+      fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join("\n")
       if ( fix_mods !="")
-        cmd << " -mf #{fix_mods}"
+        mods_file_content << "#{fix_mods}"
       end
-    else
-      if ( search_tool.has_modifications )
-        cmd << " -mf "
-        if ( search_tool.carbamidomethyl )
-          cmd<<"3 "
-        end
-        if ( search_tool.methionine_oxidation )
-          cmd<<"1 "
-        end
+    end
-      end
+    if ( mods_file_content != "")
+      mods_path="#{search_tool.input_base_path(filename.chomp)}.msgfplus_mods.txt"
+      mods_file=File.open(mods_path,'w+')
+      mods_file.write "NumMods=2\n#{mods_file_content}"
+      mods_file.close
+      cmd << " -mod #{mods_path}"
     end
-    # Up to here we've formulated the omssa command. The rest is cleanup
+    # As a final part of the command we convert to pepxml
+    cmd << "; #{genv.idconvert} #{mzid_output_path} --pepXML -o #{Pathname.new(mzid_output_path).dirname}"
+    #Then copy the pepxml to the final output path
+    cmd << "; cp #{mzid_output_path.chomp('.mzid')}.pepXML #{output_path}"
+    # Up to here we've formulated the command. The rest is cleanup
     p "Running:#{cmd}"
     # Run the search
     #
     job_params= {:jobid => search_tool.jobid_from_filename(filename) }
-    job_params[:queue]="lowmem"
-    job_params[:vmem]="900mb"
     search_tool.run(cmd,genv,job_params)
+    input_stager.restore_references(output_path)
   else
     genv.log("Skipping search on existing file #{output_path}",:warn)

data/bin/omssa_search.rb CHANGED Viewed

@@ -10,7 +10,9 @@ $VERBOSE=nil
 require 'protk/constants'
 require 'protk/command_runner'
 require 'protk/search_tool'
+require 'protk/galaxy_util'
+for_galaxy = GalaxyUtil.for_galaxy?
 # Setup specific command-line options for this tool. Other options are inherited from SearchTool
 #
@@ -33,6 +35,10 @@ search_tool.option_parser.on(  '--intensity-cut-off co', 'Peak intensity cut-off
   search_tool.options.intensity_cut_off=co
 end
+search_tool.options.galaxy_index_dir=nil
+search_tool.option_parser.on( '--galaxy-index-dir dir', 'Specify galaxy index directory, will search for mods file there.' ) do |dir|
+  search_tool.options.galaxy_index_dir=dir
+end
 search_tool.option_parser.parse!
@@ -45,9 +51,14 @@ genv=Constants.new
 rt_correct_bin="#{File.dirname(__FILE__)}/correct_omssa_retention_times.rb"
 repair_script_bin="#{File.dirname(__FILE__)}/repair_run_summary.rb"
+make_blastdb_cmd=""
 case
 when Pathname.new(search_tool.database).exist? # It's an explicitly named db
   current_db=Pathname.new(search_tool.database).realpath.to_s
+  if(not FileTest.exists?("#{current_db}.phr"))
+    make_blastdb_cmd << "#{@genv.makeblastdb} -dbtype prot -parse_seqids -in #{current_db}; "
+  end
 else
   current_db=search_tool.current_database :fasta
 end
@@ -85,12 +96,29 @@ ARGV.each do |filename|
     # The basic command
     #
-    cmd= "#{genv.omssacl} -d #{current_db} -fm #{input_path} -op #{output_path} -w"
+    cmd = "#{make_blastdb_cmd} #{genv.omssacl} -d #{current_db} -fm #{input_path} -op #{output_path} -w"
     #Missed cleavages
     #
     cmd << " -v #{search_tool.missed_cleavages}"
+    # If this is for Galaxy and a data directory has been specified
+    # look for a common unimod.xml file.
+    if for_galaxy
+      galaxy_index_dir = search_tool.galaxy_index_dir
+      if galaxy_index_dir
+        galaxy_mods = File.join(galaxy_index_dir, "mods.xml")
+        if( FileTest.exists?(galaxy_mods) )
+          cmd << " -mx #{galaxy_mods}"
+        end
+        galaxy_usermods = File.join(galaxy_index_dir, "usermods.xml")
+        if( FileTest.exists?(galaxy_usermods) )
+          cmd << " -mux #{galaxy_usermods}"
+        end
+      end
+    end
     # Precursor tolerance
     #
     if ( search_tool.precursor_tolu=="ppm")
@@ -202,4 +230,8 @@ ARGV.each do |filename|
     genv.log("Skipping search on existing file #{output_path}",:warn)
   end
+  # Reset this.  We only want to index the database at most once
+  #
+  make_blastdb_cmd=""
 end

data/bin/peptide_prophet.rb CHANGED Viewed

@@ -82,6 +82,15 @@ prophet_tool.option_parser.on( '-F', '--one-ata-time', 'Create a separate pproph
   prophet_tool.options.one_ata_time = true
 end
+prophet_tool.options.decoy_prefix="decoy"
+prophet_tool.option_parser.on( '--decoy-prefix prefix', 'Prefix for decoy sequences') do |prefix|
+  prophet_tool.options.decoy_prefix = prefix
+end
+prophet_tool.options.override_database=nil
+prophet_tool.option_parser.on( '--override-database database', 'Manually specify database') do |database|
+  prophet_tool.options.override_database = database
+end
 prophet_tool.option_parser.parse!
@@ -99,7 +108,11 @@ ARGV.each {|file_name|
   name=file_name.chomp
   engine=prophet_tool.extract_engine(name)
-  db_path=prophet_tool.extract_db(name)
+  if prophet_tool.override_database
+    db_path = prophet_tool.override_database
+  else
+    db_path=prophet_tool.extract_db(name)
+  end
   file_info[name]={:engine=>engine , :database=>db_path }
@@ -130,7 +143,7 @@ end
 def generate_command(genv,prophet_tool,inputs,output,database,engine)
-  cmd="#{genv.xinteract} -N#{output}  -l7 -eT -D#{database} "
+  cmd="#{genv.xinteract} -N#{output}  -l7 -eT -D'#{database}' "
   if prophet_tool.glyco
     cmd << " -Og "
@@ -189,9 +202,9 @@ def generate_command(genv,prophet_tool,inputs,output,database,engine)
   end
   if engine=="omssa" || engine=="phenyx"
-    cmd << "-Op -P -ddecoy "
+    cmd << " -Op -P -d#{prophet_tool.decoy_prefix} "
   else
-    cmd << "-ddecoy "
+    cmd << " -d#{prophet_tool.decoy_prefix} "
   end

data/bin/pepxml_to_table.rb CHANGED Viewed

@@ -34,10 +34,20 @@ output_fh=File.new("#{output_file}",'w')
 output_fh.write "protein\tpeptide\tassumed_charge\tcalc_neutral_pep_mass\tneutral_mass\tretention_time\tstart_scan\tend_scan\tsearch_engine\tpeptideprophet_prob\tinterprophet_prob\n"
+XML::Error.set_handler(&XML::Error::QUIET_HANDLER)
 pepxml_parser=XML::Parser.file("#{input_file}")
-pepxml_doc=pepxml_parser.parse
-spectrum_queries=pepxml_doc.find('//xmlns:spectrum_query','xmlns:http://regis-web.systemsbiology.net/pepXML')
+pepxml_ns_prefix="xmlns:"
+pepxml_ns="xmlns:http://regis-web.systemsbiology.net/pepXML"
+ pepxml_doc=pepxml_parser.parse
+if not pepxml_doc.root.namespaces.default
+  pepxml_ns_prefix=""
+  pepxml_ns=nil
+end
+spectrum_queries=pepxml_doc.find("//#{pepxml_ns_prefix}spectrum_query", pepxml_ns)
 spectrum_queries.each do |query|
@@ -45,7 +55,7 @@ spectrum_queries.each do |query|
   neutral_mass=query.attributes['precursor_neutral_mass']
   assumed_charge=query.attributes['assumed_charge']
-  top_search_hit=query.find('./xmlns:search_result/xmlns:search_hit','xmlns:http://regis-web.systemsbiology.net/pepXML')[0]
+  top_search_hit=query.find("./#{pepxml_ns_prefix}search_result/#{pepxml_ns_prefix}search_hit",pepxml_ns)[0]
   peptide=top_search_hit.attributes['peptide']
   protein=top_search_hit.attributes['protein']
   calc_neutral_pep_mass=top_search_hit.attributes['calc_neutral_pep_mass']
@@ -53,7 +63,7 @@ spectrum_queries.each do |query|
   end_scan=query.attributes['end_scan']
   search_engine=""
-  search_score_names=top_search_hit.find('./xmlns:search_score/@name','xmlns:http://regis-web.systemsbiology.net/pepXML').collect {|s| s.to_s}
+  search_score_names=top_search_hit.find("./#{pepxml_ns_prefix}search_score/@name",pepxml_ns).collect {|s| s.to_s}
   if ( search_score_names.length==2 && search_score_names.grep(/^name.*=.*pvalue/))
     search_engine="omssa"
@@ -63,9 +73,10 @@ spectrum_queries.each do |query|
     search_engine="x!tandem"
   end
-  pp_result=top_search_hit.find('./xmlns:analysis_result/xmlns:peptideprophet_result/@probability','xmlns:http://regis-web.systemsbiology.net/pepXML')
-  ip_result=top_search_hit.find('./xmlns:analysis_result/xmlns:interprophet_result/@probability','xmlns:http://regis-web.systemsbiology.net/pepXML')
+  pp_result=top_search_hit.find("./#{pepxml_ns_prefix}analysis_result/#{pepxml_ns_prefix}peptideprophet_result/@probability",pepxml_ns)
+  ip_result=top_search_hit.find("./#{pepxml_ns_prefix}analysis_result/#{pepxml_ns_prefix}interprophet_result/@probability",pepxml_ns)
   peptide_prophet_prob=""
   interprophet_prob=""
   peptide_prophet_prob=pp_result[0].value if ( pp_result.length>0 )

data/bin/protein_prophet.rb CHANGED Viewed

@@ -13,7 +13,7 @@ require 'protk/prophet_tool'
 require 'protk/galaxy_stager'
 require 'protk/galaxy_util'
-for_galaxy = GalaxyUtil.for_galaxy
+for_galaxy = GalaxyUtil.for_galaxy?
 if for_galaxy
   # Stage files for galaxy

data/bin/tandem_search.rb CHANGED Viewed

@@ -41,6 +41,47 @@ search_tool.option_parser.on( '-K', '--keep-params-files', 'Keep X!Tandem parame
   search_tool.options.keep_params_files = true
 end
+# In case want pepXML, but still want tandem output also.
+search_tool.options.tandem_output=nil
+search_tool.option_parser.on( '--tandem-output tandem_output', 'Keep X! Tandem Output') do |tandem_output|
+  search_tool.options.tandem_output=tandem_output
+end
+search_tool.options.thresholds_type = 'isb_kscore'
+search_tool.option_parser.on( '--thresholds-type thresholds_type', 'Threshold Type (tandem_default, isb_native, isb_kscore, scaffold)' ) do |thresholds_type|
+  search_tool.options.thresholds_type = thresholds_type
+end
+search_tool.options.algorithm = "kscore"
+search_tool.option_parser.on( '--algorithm algorithm', "Scoring algorithm (kscore or native)" ) do |algorithm|
+  search_tool.options.algorithm = algorithm
+end
+search_tool.options.cleavage_semi = false
+search_tool.option_parser.on( '--cleavage-semi' ) do
+  search_tool.options.cleavage_semi = true
+end
+search_tool.options.n_terminal_mod_mass=nil
+search_tool.option_parser.on('--n-terminal-mod-mass mass') do |mass|
+    search_tool.options.n_terminal_mod_mass = mass
+end
+search_tool.options.c_terminal_mod_mass=nil
+search_tool.option_parser.on('--c-terminal-mod-mass mass') do |mass|
+    search_tool.options.c_terminal_mod_mass = mass
+end
+search_tool.options.cleavage_n_terminal_mod_mass=nil
+search_tool.option_parser.on('--cleavage-n-terminal-mod-mass mass') do |mass|
+    search_tool.options.cleavage_n_terminal_mod_mass = mass
+end
+search_tool.options.cleavage_c_terminal_mod_mass=nil
+search_tool.option_parser.on('--cleavage-c-terminal-mod-mass mass') do |mass|
+    search_tool.options.cleavage_c_terminal_mod_mass = mass
+end
 search_tool.option_parser.parse!
@@ -60,8 +101,6 @@ else
 end
 # Parse options from a parameter file (if provided), or from the default parameter file
 #
 params_parser=XML::Parser.file(tandem_params)
@@ -100,7 +139,7 @@ def generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_d
   #
   scoring_notes=std_params.find('/bioml/note[@type="input" and @label="list path, default parameters"]')
   throw "Exactly one list path, default parameters note is required in the parameter file" unless scoring_notes.length==1
-  scoring_notes[0].content="#{genv.tpp_root}/bin/isb_default_input_kscore.xml"
+  scoring_notes[0].content="#{genv.tpp_root}/bin/isb_default_input_#{search_tool.algorithm}.xml"
   # Taxonomy and Database
   #
@@ -264,8 +303,13 @@ ARGV.each do |filename|
     # pepXML conversion and repair
     #
     unless search_tool.no_pepxml
-      repair_script="#{File.dirname(__FILE__)}/repair_run_summary.rb"
-      cmd << "; #{genv.tandem2xml} #{output_path} #{pepxml_path}; #{repair_script} #{pepxml_path}; rm #{output_path}"
+      repair_script="#{File.dirname(__FILE__)}/repair_run_summary.rb"
+      cmd << "; #{genv.tandem2xml} #{output_path} #{pepxml_path}; #{repair_script} #{pepxml_path}"
+      if search_tool.tandem_output
+        cmd << "; cp #{output_path} #{search_tool.tandem_output}"
+      else
+        cmd << "; rm #{output_path}"
+      end
     end
     # Add a cleanup command unless the user wants to keep params files