protk 1.1.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +85 -0
 - data/bin/annotate_ids.rb +59 -0
 - data/bin/big_search.rb +41 -0
 - data/bin/correct_omssa_retention_times.rb +27 -0
 - data/bin/feature_finder.rb +76 -0
 - data/bin/file_convert.rb +157 -0
 - data/bin/generate_omssa_loc.rb +42 -0
 - data/bin/interprophet.rb +91 -0
 - data/bin/make_decoy.rb +64 -0
 - data/bin/manage_db.rb +123 -0
 - data/bin/mascot_search.rb +187 -0
 - data/bin/mascot_to_pepxml.rb +44 -0
 - data/bin/msgfplus_search.rb +191 -0
 - data/bin/omssa_search.rb +205 -0
 - data/bin/peptide_prophet.rb +245 -0
 - data/bin/pepxml_to_table.rb +78 -0
 - data/bin/protein_prophet.rb +140 -0
 - data/bin/protk_setup.rb +31 -0
 - data/bin/repair_run_summary.rb +113 -0
 - data/bin/tandem_search.rb +292 -0
 - data/bin/template_search.rb +144 -0
 - data/bin/unimod_to_loc.rb +118 -0
 - data/bin/xls_to_table.rb +46 -0
 - data/ext/protk/extconf.rb +3 -0
 - data/ext/protk/protk.c +235 -0
 - data/lib/protk/big_search_rakefile.rake +16 -0
 - data/lib/protk/big_search_tool.rb +23 -0
 - data/lib/protk/bio_sptr_extensions.rb +210 -0
 - data/lib/protk/biotools_excel_converter.rb +60 -0
 - data/lib/protk/command_runner.rb +84 -0
 - data/lib/protk/constants.rb +296 -0
 - data/lib/protk/data/FeatureFinderCentroided.ini +63 -0
 - data/lib/protk/data/apt-get_packages.yaml +47 -0
 - data/lib/protk/data/brew_packages.yaml +10 -0
 - data/lib/protk/data/default_config.yml +20 -0
 - data/lib/protk/data/predefined_db.crap.yaml +19 -0
 - data/lib/protk/data/predefined_db.sphuman.yaml +25 -0
 - data/lib/protk/data/predefined_db.swissprot_annotation.yaml +20 -0
 - data/lib/protk/data/predefined_db.swissprot_fasta_annotation.yaml +20 -0
 - data/lib/protk/data/tandem_params.xml +56 -0
 - data/lib/protk/data/taxonomy_template.xml +9 -0
 - data/lib/protk/data/unimod.xml +16780 -0
 - data/lib/protk/eupathdb_gene_information_table.rb +158 -0
 - data/lib/protk/galaxy_stager.rb +24 -0
 - data/lib/protk/galaxy_util.rb +9 -0
 - data/lib/protk/manage_db_rakefile.rake +484 -0
 - data/lib/protk/manage_db_tool.rb +181 -0
 - data/lib/protk/mascot_util.rb +63 -0
 - data/lib/protk/omssa_util.rb +57 -0
 - data/lib/protk/plasmodb.rb +50 -0
 - data/lib/protk/prophet_tool.rb +85 -0
 - data/lib/protk/protein_annotator.rb +646 -0
 - data/lib/protk/protxml.rb +137 -0
 - data/lib/protk/randomize.rb +7 -0
 - data/lib/protk/search_tool.rb +182 -0
 - data/lib/protk/setup_rakefile.rake +245 -0
 - data/lib/protk/setup_tool.rb +19 -0
 - data/lib/protk/spreadsheet_extensions.rb +78 -0
 - data/lib/protk/swissprot_database.rb +38 -0
 - data/lib/protk/tool.rb +182 -0
 - data/lib/protk/xtandem_defaults.rb +11 -0
 - data/lib/protk.rb +18 -0
 - metadata +256 -0
 
| 
         @@ -0,0 +1,113 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
            #
         
     | 
| 
      
 3 
     | 
    
         
            +
            # This file is part of protk
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Created by Ira Cooke 2/12/2011
         
     | 
| 
      
 5 
     | 
    
         
            +
            #
         
     | 
| 
      
 6 
     | 
    
         
            +
            # Repairs the msms_run_summary tag in a pepXML document to contain a specified file and datatype
         
     | 
| 
      
 7 
     | 
    
         
            +
            # This tool should only be used on pepXML files that contain a single msms_run_summary (eg not interprophet results)
         
     | 
| 
      
 8 
     | 
    
         
            +
            #
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            require 'protk/constants'
         
     | 
| 
      
 12 
     | 
    
         
            +
            require 'protk/command_runner'
         
     | 
| 
      
 13 
     | 
    
         
            +
            require 'protk/tool'
         
     | 
| 
      
 14 
     | 
    
         
            +
            require 'libxml'
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            include LibXML
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            # Environment with global constants
         
     | 
| 
      
 19 
     | 
    
         
            +
            #
         
     | 
| 
      
 20 
     | 
    
         
            +
            genv=Constants.new
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            # Setup specific command-line options for this tool. Other options are inherited from Tool
         
     | 
| 
      
 24 
     | 
    
         
            +
            #
         
     | 
| 
      
 25 
     | 
    
         
            +
            tool=Tool.new()
         
     | 
| 
      
 26 
     | 
    
         
            +
            tool.option_parser.banner = "Repair msms_run_summary tag in a pepXML file.\n\nUsage: repair_run_summary.rb [options] file1.pepXML"
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
            tool.options.new_base_name=nil
         
     | 
| 
      
 29 
     | 
    
         
            +
            tool.option_parser.on( '-N', '--base-name mzmlfile', 'Original MSMS spectrum file used for search' ) do |file| 
         
     | 
| 
      
 30 
     | 
    
         
            +
              tool.options.new_base_name = file
         
     | 
| 
      
 31 
     | 
    
         
            +
            end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
            tool.options.raw_data_type=nil
         
     | 
| 
      
 34 
     | 
    
         
            +
            tool.option_parser.on( '-R', '--raw-type type', 'Raw data type used for search' ) do |type| 
         
     | 
| 
      
 35 
     | 
    
         
            +
              tool.options.raw_data_type = type
         
     | 
| 
      
 36 
     | 
    
         
            +
            end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
            tool.options.omssa_ion_tolerance=nil
         
     | 
| 
      
 39 
     | 
    
         
            +
            tool.option_parser.on('--omssa-itol fitol','Add a fragment ion tolerance parameter to the omssa search summary') do |fitol|
         
     | 
| 
      
 40 
     | 
    
         
            +
              tool.options.omssa_ion_tolerance=fitol
         
     | 
| 
      
 41 
     | 
    
         
            +
            end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            tool.option_parser.parse!
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
            pepxml_file=ARGV[0]
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            # Read the input file
         
     | 
| 
      
 48 
     | 
    
         
            +
            #
         
     | 
| 
      
 49 
     | 
    
         
            +
            parser=XML::Parser.file(pepxml_file)
         
     | 
| 
      
 50 
     | 
    
         
            +
            doc=parser.parse
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
            new_base_name=tool.new_base_name
         
     | 
| 
      
 53 
     | 
    
         
            +
            raw_data_type=tool.raw_data_type
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            genv.log("Repairing #{pepxml_file} to #{new_base_name} format #{raw_data_type}",:info)
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
            if ( new_base_name==nil )
         
     | 
| 
      
 58 
     | 
    
         
            +
              # Try X!Tandem first 
         
     | 
| 
      
 59 
     | 
    
         
            +
              # It would be parameter spectrum,path
         
     | 
| 
      
 60 
     | 
    
         
            +
              #
         
     | 
| 
      
 61 
     | 
    
         
            +
              spectrum_path = doc.find('//xmlns:msms_run_summary/xmlns:search_summary/xmlns:parameter[@name="spectrum, path"]','xmlns:http://regis-web.systemsbiology.net/pepXML')[0]
         
     | 
| 
      
 62 
     | 
    
         
            +
              if ( spectrum_path!=nil)
         
     | 
| 
      
 63 
     | 
    
         
            +
                new_base_name=spectrum_path.attributes['value']
         
     | 
| 
      
 64 
     | 
    
         
            +
                raw_data_type="mzML" # Always is for X!Tandem
         
     | 
| 
      
 65 
     | 
    
         
            +
              end
         
     | 
| 
      
 66 
     | 
    
         
            +
            end
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
            if ( new_base_name==nil )
         
     | 
| 
      
 69 
     | 
    
         
            +
              # Try Mascot 
         
     | 
| 
      
 70 
     | 
    
         
            +
              # It would be parameter File path
         
     | 
| 
      
 71 
     | 
    
         
            +
              #
         
     | 
| 
      
 72 
     | 
    
         
            +
              #<parameter name="FILE" value="dataset_2.dat"/>
         
     | 
| 
      
 73 
     | 
    
         
            +
              file_path = doc.find('//xmlns:msms_run_summary/xmlns:search_summary/xmlns:parameter[@name="FILE"]','xmlns:http://regis-web.systemsbiology.net/pepXML')[0]
         
     | 
| 
      
 74 
     | 
    
         
            +
              if ( file_path!=nil)
         
     | 
| 
      
 75 
     | 
    
         
            +
                
         
     | 
| 
      
 76 
     | 
    
         
            +
                run_summary=doc.find('//xmlns:msms_run_summary','xmlns:http://regis-web.systemsbiology.net/pepXML')[0]
         
     | 
| 
      
 77 
     | 
    
         
            +
                old_base_name=run_summary.attributes['base_name']
         
     | 
| 
      
 78 
     | 
    
         
            +
                base_dir_path=Pathname.new(old_base_name).dirname.to_s
         
     | 
| 
      
 79 
     | 
    
         
            +
                
         
     | 
| 
      
 80 
     | 
    
         
            +
                new_base_name="#{base_dir_path}/#{file_path.attributes['value']}"
         
     | 
| 
      
 81 
     | 
    
         
            +
                raw_data_type="mgf" # Always is for Mascot
         
     | 
| 
      
 82 
     | 
    
         
            +
              end
         
     | 
| 
      
 83 
     | 
    
         
            +
              
         
     | 
| 
      
 84 
     | 
    
         
            +
            end
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
      
 86 
     | 
    
         
            +
            throw "Could not find original spectrum filename in pepXML and none provided" unless new_base_name!=nil
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
            run_summary=doc.find('//xmlns:msms_run_summary','xmlns:http://regis-web.systemsbiology.net/pepXML')
         
     | 
| 
      
 90 
     | 
    
         
            +
            if ( run_summary[0]==nil)
         
     | 
| 
      
 91 
     | 
    
         
            +
              # Try without namespace (OMSSA)
         
     | 
| 
      
 92 
     | 
    
         
            +
              run_summary=doc.find('//msms_run_summary')
         
     | 
| 
      
 93 
     | 
    
         
            +
              
         
     | 
| 
      
 94 
     | 
    
         
            +
              if ( tool.options.omssa_ion_tolerance !=nil)
         
     | 
| 
      
 95 
     | 
    
         
            +
                search_summary=doc.find('//search_summary')[0]
         
     | 
| 
      
 96 
     | 
    
         
            +
                p search_summary
         
     | 
| 
      
 97 
     | 
    
         
            +
                pmnode=XML::Node.new('parameter')
         
     | 
| 
      
 98 
     | 
    
         
            +
                pmnode["name"]="to"
         
     | 
| 
      
 99 
     | 
    
         
            +
                pmnode["value"]=tool.options.omssa_ion_tolerance.to_s
         
     | 
| 
      
 100 
     | 
    
         
            +
                search_summary << pmnode
         
     | 
| 
      
 101 
     | 
    
         
            +
                
         
     | 
| 
      
 102 
     | 
    
         
            +
              end
         
     | 
| 
      
 103 
     | 
    
         
            +
              
         
     | 
| 
      
 104 
     | 
    
         
            +
              raw_data_type="mgf"
         
     | 
| 
      
 105 
     | 
    
         
            +
            end
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
            throw "No run summary found" unless run_summary[0]!=nil
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
      
 109 
     | 
    
         
            +
            run_summary[0].attributes['base_name']=new_base_name
         
     | 
| 
      
 110 
     | 
    
         
            +
            run_summary[0].attributes['raw_data']=raw_data_type
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
             
     | 
| 
      
 113 
     | 
    
         
            +
            doc.save(pepxml_file)
         
     | 
| 
         @@ -0,0 +1,292 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
            #
         
     | 
| 
      
 3 
     | 
    
         
            +
            # This file is part of protk
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Created by Ira Cooke 17/12/2010
         
     | 
| 
      
 5 
     | 
    
         
            +
            #
         
     | 
| 
      
 6 
     | 
    
         
            +
            # Runs an MS/MS search using the X!Tandem search engine
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            require 'protk/constants'
         
     | 
| 
      
 10 
     | 
    
         
            +
            require 'protk/command_runner'
         
     | 
| 
      
 11 
     | 
    
         
            +
            require 'protk/search_tool'
         
     | 
| 
      
 12 
     | 
    
         
            +
            require 'protk/xtandem_defaults'
         
     | 
| 
      
 13 
     | 
    
         
            +
            require 'libxml'
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            include LibXML
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            # Environment with global constants
         
     | 
| 
      
 18 
     | 
    
         
            +
            #
         
     | 
| 
      
 19 
     | 
    
         
            +
            genv=Constants.new
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            # Setup specific command-line options for this tool. Other options are inherited from SearchTool
         
     | 
| 
      
 22 
     | 
    
         
            +
            #
         
     | 
| 
      
 23 
     | 
    
         
            +
            search_tool=SearchTool.new({:msms_search=>true,:background=>true,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
         
     | 
| 
      
 24 
     | 
    
         
            +
            search_tool.jobid_prefix="x"
         
     | 
| 
      
 25 
     | 
    
         
            +
            search_tool.option_parser.banner = "Run an X!Tandem msms search on a set of mzML input files.\n\nUsage: tandem_search.rb [options] file1.mzML file2.mzML ..."
         
     | 
| 
      
 26 
     | 
    
         
            +
            search_tool.options.output_suffix="_tandem"
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
            tandem_defaults=XTandemDefaults.new.path
         
     | 
| 
      
 29 
     | 
    
         
            +
            search_tool.options.tandem_params=tandem_defaults
         
     | 
| 
      
 30 
     | 
    
         
            +
            search_tool.option_parser.on( '-T', '--tandem-params tandem', 'XTandem parameters to use' ) do |parms| 
         
     | 
| 
      
 31 
     | 
    
         
            +
              search_tool.options.tandem_params = parms
         
     | 
| 
      
 32 
     | 
    
         
            +
            end
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
            search_tool.options.no_pepxml=false
         
     | 
| 
      
 35 
     | 
    
         
            +
            search_tool.option_parser.on( '-P', '--no-pepxml', 'Dont convert to pepXML after running the search') do
         
     | 
| 
      
 36 
     | 
    
         
            +
              search_tool.options.no_pepxml=true
         
     | 
| 
      
 37 
     | 
    
         
            +
            end
         
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            search_tool.options.keep_params_files=false
         
     | 
| 
      
 40 
     | 
    
         
            +
            search_tool.option_parser.on( '-K', '--keep-params-files', 'Keep X!Tandem parameter files' ) do 
         
     | 
| 
      
 41 
     | 
    
         
            +
              search_tool.options.keep_params_files = true
         
     | 
| 
      
 42 
     | 
    
         
            +
            end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
            search_tool.option_parser.parse!
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            # Set search engine specific parameters on the SearchTool object
         
     | 
| 
      
 48 
     | 
    
         
            +
            #
         
     | 
| 
      
 49 
     | 
    
         
            +
            tandem_bin="#{genv.xtandem}"
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
            throw "Could not find X!Tandem executable" unless FileTest.exists?(tandem_bin)
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            tandem_params=search_tool.tandem_params
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            case
         
     | 
| 
      
 56 
     | 
    
         
            +
            when Pathname.new(search_tool.database).exist? # It's an explicitly named db  
         
     | 
| 
      
 57 
     | 
    
         
            +
              current_db=Pathname.new(search_tool.database).realpath.to_s
         
     | 
| 
      
 58 
     | 
    
         
            +
            else
         
     | 
| 
      
 59 
     | 
    
         
            +
              current_db=search_tool.current_database :fasta
         
     | 
| 
      
 60 
     | 
    
         
            +
            end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
            # Parse options from a parameter file (if provided), or from the default parameter file
         
     | 
| 
      
 66 
     | 
    
         
            +
            #
         
     | 
| 
      
 67 
     | 
    
         
            +
            params_parser=XML::Parser.file(tandem_params)
         
     | 
| 
      
 68 
     | 
    
         
            +
            std_params=params_parser.parse
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
            # Parse taxonomy template file
         
     | 
| 
      
 71 
     | 
    
         
            +
            #
         
     | 
| 
      
 72 
     | 
    
         
            +
            taxo_parser=XML::Parser.file(XTandemDefaults.new.taxonomy_path)
         
     | 
| 
      
 73 
     | 
    
         
            +
            taxo_doc=taxo_parser.parse
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
      
 75 
     | 
    
         
            +
            # Galaxy changes things like @ to __at__ we need to change it back
         
     | 
| 
      
 76 
     | 
    
         
            +
            #
         
     | 
| 
      
 77 
     | 
    
         
            +
            def decode_modification_string(mstring)
         
     | 
| 
      
 78 
     | 
    
         
            +
              mstring.gsub!("__at__","@")
         
     | 
| 
      
 79 
     | 
    
         
            +
              mstring.gsub!("__oc__","{")
         
     | 
| 
      
 80 
     | 
    
         
            +
              mstring.gsub!("__cc__","}")
         
     | 
| 
      
 81 
     | 
    
         
            +
              mstring.gsub!("__ob__","[")
         
     | 
| 
      
 82 
     | 
    
         
            +
              mstring.gsub!("__cb__","]")
         
     | 
| 
      
 83 
     | 
    
         
            +
              mstring
         
     | 
| 
      
 84 
     | 
    
         
            +
            end
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
      
 86 
     | 
    
         
            +
            def generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_db,search_tool,genv)
         
     | 
| 
      
 87 
     | 
    
         
            +
              
         
     | 
| 
      
 88 
     | 
    
         
            +
              
         
     | 
| 
      
 89 
     | 
    
         
            +
              # Set the input and output paths 
         
     | 
| 
      
 90 
     | 
    
         
            +
              #
         
     | 
| 
      
 91 
     | 
    
         
            +
              input_notes=std_params.find('/bioml/note[@type="input" and @label="spectrum, path"]')
         
     | 
| 
      
 92 
     | 
    
         
            +
              throw "Exactly one spectrum, path note is required in the parameter file" unless input_notes.length==1
         
     | 
| 
      
 93 
     | 
    
         
            +
              input_notes[0].content=input_path
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
              output_notes=std_params.find('/bioml/note[@type="input" and @label="output, path"]')
         
     | 
| 
      
 96 
     | 
    
         
            +
              throw "Exactly one output, path note is required in the parameter file" unless output_notes.length==1
         
     | 
| 
      
 97 
     | 
    
         
            +
              output_notes[0].content=output_path
         
     | 
| 
      
 98 
     | 
    
         
            +
              
         
     | 
| 
      
 99 
     | 
    
         
            +
              # Set the path to the scoring algorithm default params. We use one from ISB
         
     | 
| 
      
 100 
     | 
    
         
            +
              #
         
     | 
| 
      
 101 
     | 
    
         
            +
              scoring_notes=std_params.find('/bioml/note[@type="input" and @label="list path, default parameters"]')
         
     | 
| 
      
 102 
     | 
    
         
            +
              throw "Exactly one list path, default parameters note is required in the parameter file" unless scoring_notes.length==1
         
     | 
| 
      
 103 
     | 
    
         
            +
              scoring_notes[0].content="#{genv.tpp_root}/bin/isb_default_input_kscore.xml"
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
              # Taxonomy and Database
         
     | 
| 
      
 106 
     | 
    
         
            +
              #  
         
     | 
| 
      
 107 
     | 
    
         
            +
              db_notes=std_params.find('/bioml/note[@type="input" and @label="protein, taxon"]')
         
     | 
| 
      
 108 
     | 
    
         
            +
              throw "Exactly one protein, taxon note is required in the parameter file" unless db_notes.length==1
         
     | 
| 
      
 109 
     | 
    
         
            +
              db_notes[0].content=search_tool.database.downcase
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
              taxo_notes=std_params.find('/bioml/note[@type="input" and @label="list path, taxonomy information"]')
         
     | 
| 
      
 112 
     | 
    
         
            +
              throw "Exactly one list path, taxonomy information note is required in the parameter file" unless taxo_notes.length==1
         
     | 
| 
      
 113 
     | 
    
         
            +
              taxo_notes[0].content=taxo_path
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
              fragment_tol = search_tool.fragment_tol
         
     | 
| 
      
 116 
     | 
    
         
            +
              
         
     | 
| 
      
 117 
     | 
    
         
            +
              fmass=std_params.find('/bioml/note[@type="input" and @label="spectrum, fragment monoisotopic mass error"]')
         
     | 
| 
      
 118 
     | 
    
         
            +
              p fmass
         
     | 
| 
      
 119 
     | 
    
         
            +
              throw "Exactly one spectrum, fragment monoisotopic mass error note is required in the parameter file" unless fmass.length==1
         
     | 
| 
      
 120 
     | 
    
         
            +
              fmass[0].content=fragment_tol.to_s
         
     | 
| 
      
 121 
     | 
    
         
            +
              
         
     | 
| 
      
 122 
     | 
    
         
            +
              precursor_tol = search_tool.precursor_tol
         
     | 
| 
      
 123 
     | 
    
         
            +
              ptol_plus=precursor_tol*0.5
         
     | 
| 
      
 124 
     | 
    
         
            +
              ptol_minus=precursor_tol*0.5
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
              # Precursor mass matching 
         
     | 
| 
      
 127 
     | 
    
         
            +
              #
         
     | 
| 
      
 128 
     | 
    
         
            +
              pmass_minus=std_params.find('/bioml/note[@type="input" and @label="spectrum, parent monoisotopic mass error minus"]')
         
     | 
| 
      
 129 
     | 
    
         
            +
              throw "Exactly one spectrum, parent monoisotopic mass error minus note is required in the parameter file" unless pmass_minus.length==1
         
     | 
| 
      
 130 
     | 
    
         
            +
              pmass_minus[0].content=ptol_minus.to_s
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
              pmass_plus=std_params.find('/bioml/note[@type="input" and @label="spectrum, parent monoisotopic mass error plus"]')
         
     | 
| 
      
 133 
     | 
    
         
            +
              throw "Exactly one spectrum, parent monoisotopic mass error plus note is required in the parameter file" unless pmass_plus.length==1
         
     | 
| 
      
 134 
     | 
    
         
            +
              pmass_plus[0].content=ptol_plus.to_s
         
     | 
| 
      
 135 
     | 
    
         
            +
             
     | 
| 
      
 136 
     | 
    
         
            +
              pmass_err_units=std_params.find('/bioml/note[@type="input" and @label="spectrum, parent monoisotopic mass error units"]')
         
     | 
| 
      
 137 
     | 
    
         
            +
              throw "Exactly one spectrum, parent monoisotopic mass error units note is required in the parameter file. Got #{pmass_err_units.length}" unless pmass_err_units.length==1
         
     | 
| 
      
 138 
     | 
    
         
            +
              
         
     | 
| 
      
 139 
     | 
    
         
            +
              
         
     | 
| 
      
 140 
     | 
    
         
            +
              pmass_err_units[0].content=search_tool.precursor_tolu
         
     | 
| 
      
 141 
     | 
    
         
            +
             
     | 
| 
      
 142 
     | 
    
         
            +
              if search_tool.strict_monoisotopic_mass
         
     | 
| 
      
 143 
     | 
    
         
            +
                isotopic_error=std_params.find('/bioml/note[@type="input" and @label="spectrum, parent monoisotopic mass isotope error"]')
         
     | 
| 
      
 144 
     | 
    
         
            +
                throw "Exactly one spectrum, parent monoisotopic mass isotope error is required in the parameter file" unless isotopic_error.length==1
         
     | 
| 
      
 145 
     | 
    
         
            +
                isotopic_error[0].content="no"
         
     | 
| 
      
 146 
     | 
    
         
            +
              end
         
     | 
| 
      
 147 
     | 
    
         
            +
              
         
     | 
| 
      
 148 
     | 
    
         
            +
              
         
     | 
| 
      
 149 
     | 
    
         
            +
              # Fixed and Variable Modifications
         
     | 
| 
      
 150 
     | 
    
         
            +
              #
         
     | 
| 
      
 151 
     | 
    
         
            +
              unless search_tool.carbamidomethyl 
         
     | 
| 
      
 152 
     | 
    
         
            +
                mods=std_params.find('/bioml/note[@type="input" and @id="carbamidomethyl-fixed"]')
         
     | 
| 
      
 153 
     | 
    
         
            +
                mods.each{ |node| node.remove!}
         
     | 
| 
      
 154 
     | 
    
         
            +
              end
         
     | 
| 
      
 155 
     | 
    
         
            +
              
         
     | 
| 
      
 156 
     | 
    
         
            +
              unless search_tool.glyco
         
     | 
| 
      
 157 
     | 
    
         
            +
                mods=std_params.find('/bioml/note[@type="input" and @id="glyco-variable"]')
         
     | 
| 
      
 158 
     | 
    
         
            +
                mods.each{ |node| node.remove!}    
         
     | 
| 
      
 159 
     | 
    
         
            +
              end
         
     | 
| 
      
 160 
     | 
    
         
            +
              
         
     | 
| 
      
 161 
     | 
    
         
            +
              unless search_tool.methionine_oxidation
         
     | 
| 
      
 162 
     | 
    
         
            +
                mods=std_params.find('/bioml/note[@type="input" and @id="methionine-oxidation-variable"]')
         
     | 
| 
      
 163 
     | 
    
         
            +
                mods.each{ |node| node.remove!}        
         
     | 
| 
      
 164 
     | 
    
         
            +
              end  
         
     | 
| 
      
 165 
     | 
    
         
            +
              
         
     | 
| 
      
 166 
     | 
    
         
            +
              var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }
         
     | 
| 
      
 167 
     | 
    
         
            +
              var_mods=var_mods.collect {|mod| decode_modification_string(mod) }
         
     | 
| 
      
 168 
     | 
    
         
            +
              fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }
         
     | 
| 
      
 169 
     | 
    
         
            +
              fix_mods=fix_mods.collect {|mod| decode_modification_string(mod)}
         
     | 
| 
      
 170 
     | 
    
         
            +
              
         
     | 
| 
      
 171 
     | 
    
         
            +
              root_bioml_node=std_params.find('/bioml')[0]
         
     | 
| 
      
 172 
     | 
    
         
            +
              
         
     | 
| 
      
 173 
     | 
    
         
            +
              mod_id=1
         
     | 
| 
      
 174 
     | 
    
         
            +
              var_mods.each do |vm|
         
     | 
| 
      
 175 
     | 
    
         
            +
             
     | 
| 
      
 176 
     | 
    
         
            +
                mod_type="potential modification mass"
         
     | 
| 
      
 177 
     | 
    
         
            +
                mod_type = "potential modification motif" if ( vm=~/[\[\]\(\)\{\}\!]/ )      
         
     | 
| 
      
 178 
     | 
    
         
            +
                mod_id_label = "custom-variable-mod-#{mod_id.to_s}"
         
     | 
| 
      
 179 
     | 
    
         
            +
                mod_id=mod_id+1
         
     | 
| 
      
 180 
     | 
    
         
            +
                mnode=XML::Node.new('node')
         
     | 
| 
      
 181 
     | 
    
         
            +
                mnode["id"]=mod_id_label
         
     | 
| 
      
 182 
     | 
    
         
            +
                mnode["type"]="input"
         
     | 
| 
      
 183 
     | 
    
         
            +
                mnode["label"]="residue, #{mod_type}"
         
     | 
| 
      
 184 
     | 
    
         
            +
                mnode.content=vm
         
     | 
| 
      
 185 
     | 
    
         
            +
                
         
     | 
| 
      
 186 
     | 
    
         
            +
                root_bioml_node << mnode
         
     | 
| 
      
 187 
     | 
    
         
            +
              end
         
     | 
| 
      
 188 
     | 
    
         
            +
              
         
     | 
| 
      
 189 
     | 
    
         
            +
              mod_id=1
         
     | 
| 
      
 190 
     | 
    
         
            +
              fix_mods.each do |fm|
         
     | 
| 
      
 191 
     | 
    
         
            +
                mod_type="modification mass"
         
     | 
| 
      
 192 
     | 
    
         
            +
                mod_type = "modification motif" if ( fm=~/[\[\]\(\)\{\}\!]/ )      
         
     | 
| 
      
 193 
     | 
    
         
            +
                mod_id_label = "custom-fixed-mod-#{mod_id.to_s}"
         
     | 
| 
      
 194 
     | 
    
         
            +
                mod_id=mod_id+1
         
     | 
| 
      
 195 
     | 
    
         
            +
                mnode=XML::Node.new('node')
         
     | 
| 
      
 196 
     | 
    
         
            +
                mnode["id"]=mod_id_label
         
     | 
| 
      
 197 
     | 
    
         
            +
                mnode["type"]="input"
         
     | 
| 
      
 198 
     | 
    
         
            +
                mnode["label"]="residue, #{mod_type}"
         
     | 
| 
      
 199 
     | 
    
         
            +
                mnode.content=fm
         
     | 
| 
      
 200 
     | 
    
         
            +
                
         
     | 
| 
      
 201 
     | 
    
         
            +
                root_bioml_node << mnode
         
     | 
| 
      
 202 
     | 
    
         
            +
              end
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
              #p root_bioml_node
         
     | 
| 
      
 205 
     | 
    
         
            +
              std_params
         
     | 
| 
      
 206 
     | 
    
         
            +
              
         
     | 
| 
      
 207 
     | 
    
         
            +
            end
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
            def generate_taxonomy_doc(taxo_doc,current_db,search_tool)
         
     | 
| 
      
 210 
     | 
    
         
            +
             
     | 
| 
      
 211 
     | 
    
         
            +
              taxon_label=taxo_doc.find('/bioml/taxon')
         
     | 
| 
      
 212 
     | 
    
         
            +
              throw "Exactly one taxon label is required in the taxonomy_template file" unless taxon_label.length==1
         
     | 
| 
      
 213 
     | 
    
         
            +
              taxon_label[0].attributes['label']=search_tool.database.downcase
         
     | 
| 
      
 214 
     | 
    
         
            +
             
     | 
| 
      
 215 
     | 
    
         
            +
              db_file=taxo_doc.find('/bioml/taxon/file')
         
     | 
| 
      
 216 
     | 
    
         
            +
              throw "Exactly one database file is required in the taxonomy_template file" unless db_file.length==1
         
     | 
| 
      
 217 
     | 
    
         
            +
              db_file[0].attributes['URL']=current_db
         
     | 
| 
      
 218 
     | 
    
         
            +
             
     | 
| 
      
 219 
     | 
    
         
            +
              taxo_doc
         
     | 
| 
      
 220 
     | 
    
         
            +
            end
         
     | 
| 
      
 221 
     | 
    
         
            +
             
     | 
| 
      
 222 
     | 
    
         
            +
            # Run the search engine on each input file
         
     | 
| 
      
 223 
     | 
    
         
            +
            #
         
     | 
| 
      
 224 
     | 
    
         
            +
            ARGV.each do |filename|
         
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
              input_path=Pathname.new(filename.chomp).realpath.to_s
         
     | 
| 
      
 227 
     | 
    
         
            +
              output_path="#{search_tool.output_base_path(filename.chomp)}.tandem"
         
     | 
| 
      
 228 
     | 
    
         
            +
             
     | 
| 
      
 229 
     | 
    
         
            +
              if ( search_tool.explicit_output==nil )
         
     | 
| 
      
 230 
     | 
    
         
            +
                pepxml_path="#{output_path.match(/(.*)\.tandem$/)[1]}.pep.xml"
         
     | 
| 
      
 231 
     | 
    
         
            +
              else
         
     | 
| 
      
 232 
     | 
    
         
            +
                pepxml_path=search_tool.explicit_output
         
     | 
| 
      
 233 
     | 
    
         
            +
              end
         
     | 
| 
      
 234 
     | 
    
         
            +
              
         
     | 
| 
      
 235 
     | 
    
         
            +
              output_exists=false
         
     | 
| 
      
 236 
     | 
    
         
            +
              if ( !search_tool.no_pepxml && Pathname.new(pepxml_path).exist?)
         
     | 
| 
      
 237 
     | 
    
         
            +
                output_exists=true
         
     | 
| 
      
 238 
     | 
    
         
            +
              end
         
     | 
| 
      
 239 
     | 
    
         
            +
              
         
     | 
| 
      
 240 
     | 
    
         
            +
              if ( search_tool.no_pepxml && Pathname.new(output_path).exist? )
         
     | 
| 
      
 241 
     | 
    
         
            +
                output_exists=true
         
     | 
| 
      
 242 
     | 
    
         
            +
              end
         
     | 
| 
      
 243 
     | 
    
         
            +
                
         
     | 
| 
      
 244 
     | 
    
         
            +
              # Only proceed if the output file is not present or we have opted to over-write it
         
     | 
| 
      
 245 
     | 
    
         
            +
              #
         
     | 
| 
      
 246 
     | 
    
         
            +
              if ( search_tool.over_write || !output_exists )
         
     | 
| 
      
 247 
     | 
    
         
            +
             
     | 
| 
      
 248 
     | 
    
         
            +
                # Create the taxonomy file in the same directory as the params file
         
     | 
| 
      
 249 
     | 
    
         
            +
                # 
         
     | 
| 
      
 250 
     | 
    
         
            +
                taxo_path="#{search_tool.input_base_path(filename.chomp)}.taxonomy.xml"
         
     | 
| 
      
 251 
     | 
    
         
            +
                mod_taxo_doc=generate_taxonomy_doc(taxo_doc,current_db,search_tool)
         
     | 
| 
      
 252 
     | 
    
         
            +
                mod_taxo_doc.save(taxo_path)
         
     | 
| 
      
 253 
     | 
    
         
            +
             
     | 
| 
      
 254 
     | 
    
         
            +
                # Modify the default XML document to contain search specific details and save it so it can be used in the search
         
     | 
| 
      
 255 
     | 
    
         
            +
                #    
         
     | 
| 
      
 256 
     | 
    
         
            +
                mod_params=generate_parameter_doc(std_params,output_path,input_path,taxo_path,current_db,search_tool,genv)
         
     | 
| 
      
 257 
     | 
    
         
            +
                params_path="#{search_tool.input_base_path(filename.chomp)}.tandem.params"
         
     | 
| 
      
 258 
     | 
    
         
            +
                mod_params.save(params_path)
         
     | 
| 
      
 259 
     | 
    
         
            +
             
     | 
| 
      
 260 
     | 
    
         
            +
                # The basic command
         
     | 
| 
      
 261 
     | 
    
         
            +
                #
         
     | 
| 
      
 262 
     | 
    
         
            +
                cmd= "#{tandem_bin} #{params_path}"
         
     | 
| 
      
 263 
     | 
    
         
            +
             
     | 
| 
      
 264 
     | 
    
         
            +
                # pepXML conversion and repair
         
     | 
| 
      
 265 
     | 
    
         
            +
                #
         
     | 
| 
      
 266 
     | 
    
         
            +
                unless search_tool.no_pepxml
         
     | 
| 
      
 267 
     | 
    
         
            +
                  repair_script="#{File.dirname(__FILE__)}/repair_run_summary.rb"      
         
     | 
| 
      
 268 
     | 
    
         
            +
                  cmd << "; #{genv.tandem2xml} #{output_path} #{pepxml_path}; #{repair_script} #{pepxml_path}; rm #{output_path}"
         
     | 
| 
      
 269 
     | 
    
         
            +
                end
         
     | 
| 
      
 270 
     | 
    
         
            +
             
     | 
| 
      
 271 
     | 
    
         
            +
                # Add a cleanup command unless the user wants to keep params files
         
     | 
| 
      
 272 
     | 
    
         
            +
                #
         
     | 
| 
      
 273 
     | 
    
         
            +
                unless search_tool.keep_params_files 
         
     | 
| 
      
 274 
     | 
    
         
            +
                  cmd << "; rm #{params_path}; rm #{taxo_path}"
         
     | 
| 
      
 275 
     | 
    
         
            +
                end
         
     | 
| 
      
 276 
     | 
    
         
            +
             
     | 
| 
      
 277 
     | 
    
         
            +
                # In case the user specified background running we need to create a jobscript path
         
     | 
| 
      
 278 
     | 
    
         
            +
                #
         
     | 
| 
      
 279 
     | 
    
         
            +
                jobscript_path="#{output_path}.pbs.sh"
         
     | 
| 
      
 280 
     | 
    
         
            +
             
         
     | 
| 
      
 281 
     | 
    
         
            +
                # Run the search
         
     | 
| 
      
 282 
     | 
    
         
            +
                #
         
     | 
| 
      
 283 
     | 
    
         
            +
                job_params= {:jobid => search_tool.jobid_from_filename(filename)}
         
     | 
| 
      
 284 
     | 
    
         
            +
                job_params[:queue]="lowmem"
         
     | 
| 
      
 285 
     | 
    
         
            +
                job_params[:vmem]="900mb"
         
     | 
| 
      
 286 
     | 
    
         
            +
                code = search_tool.run(cmd,genv,job_params,jobscript_path)
         
     | 
| 
      
 287 
     | 
    
         
            +
                throw "Command failed with exit code #{code}" unless code==0
         
     | 
| 
      
 288 
     | 
    
         
            +
              else
         
     | 
| 
      
 289 
     | 
    
         
            +
                genv.log("Skipping search on existing file #{output_path}",:warn)        
         
     | 
| 
      
 290 
     | 
    
         
            +
              end
         
     | 
| 
      
 291 
     | 
    
         
            +
             
     | 
| 
      
 292 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,144 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
            #
         
     | 
| 
      
 3 
     | 
    
         
            +
            # This file is part of protk
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Created by Ira Cooke 14/12/2010
         
     | 
| 
      
 5 
     | 
    
         
            +
            #
         
     | 
| 
      
 6 
     | 
    
         
            +
            # Runs an MS/MS search using the MSGFPlus search engine
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            require 'protk/search_tool'
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            # Setup specific command-line options for this tool. Other options are inherited from SearchTool
         
     | 
| 
      
 12 
     | 
    
         
            +
            #
         
     | 
| 
      
 13 
     | 
    
         
            +
            search_tool=SearchTool.new({:msms_search=>true,:background=>false,:glyco=>true,:database=>true,:explicit_output=>true,:over_write=>true,:msms_search_detailed_options=>true})
         
     | 
| 
      
 14 
     | 
    
         
            +
            search_tool.option_parser.banner = "Run an msms search on a set of msms spectrum input files.\n\nUsage: template_search.rb [options] file1.mzML file2.mzML ..."
         
     | 
| 
      
 15 
     | 
    
         
            +
            search_tool.options.output_suffix="_template"
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            search_tool.options.custom_option="default"
         
     | 
| 
      
 18 
     | 
    
         
            +
            search_tool.option_parser.on('--custom-opt value','Custom option relevant to this tool only (Default default)') do |val|
         
     | 
| 
      
 19 
     | 
    
         
            +
              search_tool.options.custom_option=val
         
     | 
| 
      
 20 
     | 
    
         
            +
            end
         
     | 
| 
      
 21 
     | 
    
         
            +
              
         
     | 
| 
      
 22 
     | 
    
         
            +
            search_tool.option_parser.parse!
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            # Set search engine specific parameters on the SearchTool object
         
     | 
| 
      
 25 
     | 
    
         
            +
            #
         
     | 
| 
      
 26 
     | 
    
         
            +
            msgf_bin="#{genv.msgf_bin}/MSGFPlus.jar"
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
            case 
         
     | 
| 
      
 29 
     | 
    
         
            +
            when Pathname.new(search_tool.database).exist? # It's an explicitly named db
         
     | 
| 
      
 30 
     | 
    
         
            +
              current_db=Pathname.new(search_tool.database).realpath.to_s
         
     | 
| 
      
 31 
     | 
    
         
            +
            else
         
     | 
| 
      
 32 
     | 
    
         
            +
              current_db=search_tool.current_database :fasta
         
     | 
| 
      
 33 
     | 
    
         
            +
            end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
            fragment_tol = search_tool.fragment_tol
         
     | 
| 
      
 36 
     | 
    
         
            +
            precursor_tol = search_tool.precursor_tol
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
             
     | 
| 
      
 39 
     | 
    
         
            +
            throw "When --output is set only one file at a time can be run" if  ( ARGV.length> 1 ) && ( search_tool.explicit_output!=nil ) 
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            # Run the search engine on each input file
         
     | 
| 
      
 42 
     | 
    
         
            +
            #
         
     | 
| 
      
 43 
     | 
    
         
            +
            ARGV.each do |filename|
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
              if ( search_tool.explicit_output!=nil)
         
     | 
| 
      
 46 
     | 
    
         
            +
                output_path=search_tool.explicit_output
         
     | 
| 
      
 47 
     | 
    
         
            +
              else
         
     | 
| 
      
 48 
     | 
    
         
            +
                output_path="#{search_tool.output_base_path(filename.chomp)}.mzid"
         
     | 
| 
      
 49 
     | 
    
         
            +
              end
         
     | 
| 
      
 50 
     | 
    
         
            +
              
         
     | 
| 
      
 51 
     | 
    
         
            +
              # (*.mzML, *.mzXML, *.mgf, *.ms2, *.pkl or *_dta.txt)
         
     | 
| 
      
 52 
     | 
    
         
            +
              # Get the input file extension
         
     | 
| 
      
 53 
     | 
    
         
            +
              ext = Pathname.new(filename).extname
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
              input_path="#{search_tool.input_base_path(filename.chomp)}#{ext}"
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
              # Only proceed if the output file is not present or we have opted to over-write it
         
     | 
| 
      
 60 
     | 
    
         
            +
              #
         
     | 
| 
      
 61 
     | 
    
         
            +
              if ( search_tool.over_write || !Pathname.new(output_path).exist? )
         
     | 
| 
      
 62 
     | 
    
         
            +
              
         
     | 
| 
      
 63 
     | 
    
         
            +
                # The basic command
         
     | 
| 
      
 64 
     | 
    
         
            +
                #
         
     | 
| 
      
 65 
     | 
    
         
            +
                cmd= "java -Xmx#{search_tool.java_mem} -jar #{msgf_bin} -d #{current_db} -s #{input_path} -o #{output_path} "
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                #Missed cleavages
         
     | 
| 
      
 68 
     | 
    
         
            +
                #
         
     | 
| 
      
 69 
     | 
    
         
            +
                throw "Maximum value for missed cleavages is 2" if ( search_tool.missed_cleavages > 2)
         
     | 
| 
      
 70 
     | 
    
         
            +
                cmd << " -ntt #{search_tool.missed_cleavages}"
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                # Precursor tolerance
         
     | 
| 
      
 73 
     | 
    
         
            +
                #
         
     | 
| 
      
 74 
     | 
    
         
            +
                cmd << " -t #{search_tool.precursor_tol}#{search_tool.precursor_tolu}"
         
     | 
| 
      
 75 
     | 
    
         
            +
                
         
     | 
| 
      
 76 
     | 
    
         
            +
                # Instrument type
         
     | 
| 
      
 77 
     | 
    
         
            +
                #
         
     | 
| 
      
 78 
     | 
    
         
            +
                cmd << " -inst 2"
         
     | 
| 
      
 79 
     | 
    
         
            +
                
         
     | 
| 
      
 80 
     | 
    
         
            +
            #    cmd << " -m 4"
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
                cmd << " -addFeatures 1"
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                # Enzyme
         
     | 
| 
      
 85 
     | 
    
         
            +
                #
         
     | 
| 
      
 86 
     | 
    
         
            +
              #    if ( search_tool.enzyme!="Trypsin")
         
     | 
| 
      
 87 
     | 
    
         
            +
              #      cmd << " -e #{search_tool.enzyme}"
         
     | 
| 
      
 88 
     | 
    
         
            +
              #    end
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
              mods_path="#{search_tool.input_base_path(filename.chomp)}.msgfplus_mods.txt"
         
     | 
| 
      
 91 
     | 
    
         
            +
              mods_file=File.open(mods_path,'w+')
         
     | 
| 
      
 92 
     | 
    
         
            +
             
     | 
| 
      
 93 
     | 
    
         
            +
                # Variable Modifications
         
     | 
| 
      
 94 
     | 
    
         
            +
                #
         
     | 
| 
      
 95 
     | 
    
         
            +
                if ( search_tool.var_mods !="" && !search_tool.var_mods =~/None/) # Checking for none is to cope with galaxy input
         
     | 
| 
      
 96 
     | 
    
         
            +
                  var_mods = search_tool.var_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject {|e| e.empty? }.join(",")
         
     | 
| 
      
 97 
     | 
    
         
            +
                  if ( var_mods !="" )
         
     | 
| 
      
 98 
     | 
    
         
            +
                    cmd << " -mv #{var_mods}"
         
     | 
| 
      
 99 
     | 
    
         
            +
                  end
         
     | 
| 
      
 100 
     | 
    
         
            +
                else 
         
     | 
| 
      
 101 
     | 
    
         
            +
                  # Add options related to peptide modifications
         
     | 
| 
      
 102 
     | 
    
         
            +
                  #
         
     | 
| 
      
 103 
     | 
    
         
            +
                  if ( search_tool.glyco )
         
     | 
| 
      
 104 
     | 
    
         
            +
                    cmd << " -mv 119 "
         
     | 
| 
      
 105 
     | 
    
         
            +
                  end
         
     | 
| 
      
 106 
     | 
    
         
            +
                end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
              # Fixed modifications
         
     | 
| 
      
 109 
     | 
    
         
            +
              #
         
     | 
| 
      
 110 
     | 
    
         
            +
                if ( search_tool.fix_mods !="" && !search_tool.fix_mods=~/None/)
         
     | 
| 
      
 111 
     | 
    
         
            +
                  fix_mods = search_tool.fix_mods.split(",").collect { |mod| mod.lstrip.rstrip }.reject { |e| e.empty? }.join(",")
         
     | 
| 
      
 112 
     | 
    
         
            +
                  if ( fix_mods !="")
         
     | 
| 
      
 113 
     | 
    
         
            +
                    cmd << " -mf #{fix_mods}"    
         
     | 
| 
      
 114 
     | 
    
         
            +
                  end
         
     | 
| 
      
 115 
     | 
    
         
            +
                else
         
     | 
| 
      
 116 
     | 
    
         
            +
                  if ( search_tool.has_modifications )
         
     | 
| 
      
 117 
     | 
    
         
            +
                    cmd << " -mf "
         
     | 
| 
      
 118 
     | 
    
         
            +
                    if ( search_tool.carbamidomethyl )
         
     | 
| 
      
 119 
     | 
    
         
            +
                      cmd<<"3 "
         
     | 
| 
      
 120 
     | 
    
         
            +
                    end
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
                    if ( search_tool.methionine_oxidation )
         
     | 
| 
      
 123 
     | 
    
         
            +
                      cmd<<"1 "
         
     | 
| 
      
 124 
     | 
    
         
            +
                    end
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
                  end
         
     | 
| 
      
 127 
     | 
    
         
            +
                end
         
     | 
| 
      
 128 
     | 
    
         
            +
                
         
     | 
| 
      
 129 
     | 
    
         
            +
                # Up to here we've formulated the omssa command. The rest is cleanup
         
     | 
| 
      
 130 
     | 
    
         
            +
                p "Running:#{cmd}"
         
     | 
| 
      
 131 
     | 
    
         
            +
                
         
     | 
| 
      
 132 
     | 
    
         
            +
                # Run the search
         
     | 
| 
      
 133 
     | 
    
         
            +
                #
         
     | 
| 
      
 134 
     | 
    
         
            +
                job_params= {:jobid => search_tool.jobid_from_filename(filename) }
         
     | 
| 
      
 135 
     | 
    
         
            +
                job_params[:queue]="lowmem"
         
     | 
| 
      
 136 
     | 
    
         
            +
                job_params[:vmem]="900mb"    
         
     | 
| 
      
 137 
     | 
    
         
            +
                search_tool.run(cmd,genv,job_params)
         
     | 
| 
      
 138 
     | 
    
         
            +
             
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
              else
         
     | 
| 
      
 141 
     | 
    
         
            +
                genv.log("Skipping search on existing file #{output_path}",:warn)       
         
     | 
| 
      
 142 
     | 
    
         
            +
              end
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,118 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
            #
         
     | 
| 
      
 3 
     | 
    
         
            +
            # This file is part of MSLIMS
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Created by Ira Cooke 12/4/2010
         
     | 
| 
      
 5 
     | 
    
         
            +
            #
         
     | 
| 
      
 6 
     | 
    
         
            +
            # Reads a unimod xml file (eg from a Mascot installation) and produces a loc file with names of allowable chemical modifications
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            require 'libxml'
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            include LibXML
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            unimod_file=ARGV[0]
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            unimod_file=XML::Parser.file(unimod_file)
         
     | 
| 
      
 16 
     | 
    
         
            +
            unimod_doc=unimod_file.parse
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            all_mods=[]
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            umd = unimod_doc.find('//umod:unimod/umod:modifications/umod:mod')
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            umd.each { |mod| 
         
     | 
| 
      
 24 
     | 
    
         
            +
              
         
     | 
| 
      
 25 
     | 
    
         
            +
              # Special Cases
         
     | 
| 
      
 26 
     | 
    
         
            +
              #
         
     | 
| 
      
 27 
     | 
    
         
            +
              title=mod.attributes['title']
         
     | 
| 
      
 28 
     | 
    
         
            +
              if ( title=="Oxidation" || title=="Phospho" || title=="Sulfo")
         
     | 
| 
      
 29 
     | 
    
         
            +
                if ( title=="Oxidation")
         
     | 
| 
      
 30 
     | 
    
         
            +
                  all_mods.push("Oxidation (HW)")
         
     | 
| 
      
 31 
     | 
    
         
            +
                  all_mods.push("Oxidation (M)")
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
                
         
     | 
| 
      
 34 
     | 
    
         
            +
                if ( title=="Phospho")
         
     | 
| 
      
 35 
     | 
    
         
            +
                  all_mods.push("Phospho (ST)")
         
     | 
| 
      
 36 
     | 
    
         
            +
                  all_mods.push("Phospho (Y)")      
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
                
         
     | 
| 
      
 39 
     | 
    
         
            +
                if ( title=="Sulfo")
         
     | 
| 
      
 40 
     | 
    
         
            +
                  all_mods.push("Sulfo (S)")
         
     | 
| 
      
 41 
     | 
    
         
            +
                  all_mods.push("Sulfo (T)")      
         
     | 
| 
      
 42 
     | 
    
         
            +
                  all_mods.push("Sulfo (Y)")      
         
     | 
| 
      
 43 
     | 
    
         
            +
                end
         
     | 
| 
      
 44 
     | 
    
         
            +
                
         
     | 
| 
      
 45 
     | 
    
         
            +
              else
         
     | 
| 
      
 46 
     | 
    
         
            +
              
         
     | 
| 
      
 47 
     | 
    
         
            +
                # Deal with the anywhere sites which can be concatenated
         
     | 
| 
      
 48 
     | 
    
         
            +
                #
         
     | 
| 
      
 49 
     | 
    
         
            +
                if ( mod.attributes['title'] !~ /^iTRAQ/ && mod.attributes['title'] !~ /^mTRAQ/ )
         
     | 
| 
      
 50 
     | 
    
         
            +
                  anywhere_sites = mod.find('./umod:specificity[@hidden="0" and @position="Anywhere"]')
         
     | 
| 
      
 51 
     | 
    
         
            +
                  if ( anywhere_sites.length>0 )
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                    sites=[]
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                    anywhere_sites.each { |s| 
         
     | 
| 
      
 56 
     | 
    
         
            +
                      sites.push("#{s.attributes['site']}")
         
     | 
| 
      
 57 
     | 
    
         
            +
                    }
         
     | 
| 
      
 58 
     | 
    
         
            +
                    sites.sort!
         
     | 
| 
      
 59 
     | 
    
         
            +
                    specificity="("
         
     | 
| 
      
 60 
     | 
    
         
            +
                    sites.each { |s| specificity<<s }
         
     | 
| 
      
 61 
     | 
    
         
            +
                    specificity<<")"
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                    all_mods.push("#{mod.attributes['title']} #{specificity}")
         
     | 
| 
      
 64 
     | 
    
         
            +
                     
         
     | 
| 
      
 65 
     | 
    
         
            +
                  end    
         
     | 
| 
      
 66 
     | 
    
         
            +
                
         
     | 
| 
      
 67 
     | 
    
         
            +
                else
         
     | 
| 
      
 68 
     | 
    
         
            +
                  anywhere_sites = mod.find('./umod:specificity[@hidden="0" and @position="Anywhere"]')
         
     | 
| 
      
 69 
     | 
    
         
            +
                  anywhere_sites.each { |s| 
         
     | 
| 
      
 70 
     | 
    
         
            +
                    all_mods.push("#{mod.attributes['title']} (#{s.attributes['site']})")    
         
     | 
| 
      
 71 
     | 
    
         
            +
                  }
         
     | 
| 
      
 72 
     | 
    
         
            +
                end
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                specifics=mod.find('./umod:specificity[@hidden="0" and @position!="Anywhere"]')
         
     | 
| 
      
 75 
     | 
    
         
            +
                if ( specifics.length > 0 )
         
     | 
| 
      
 76 
     | 
    
         
            +
                  specifics.each { |specific_mod|
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                    specificity=specific_mod.attributes['site']
         
     | 
| 
      
 79 
     | 
    
         
            +
                    if ( specific_mod.attributes['position'] =~ /^Protein/)
         
     | 
| 
      
 80 
     | 
    
         
            +
                      specificity=specific_mod.attributes['position']
         
     | 
| 
      
 81 
     | 
    
         
            +
                    end
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
                    if ( (specific_mod.attributes['position'] =~ /Any N-term/) && (specific_mod.attributes['site'] =~ /^[CQEM]$/) )
         
     | 
| 
      
 84 
     | 
    
         
            +
                      specificity="N-term #{specific_mod.attributes['site']}"
         
     | 
| 
      
 85 
     | 
    
         
            +
                    end
         
     | 
| 
      
 86 
     | 
    
         
            +
             
     | 
| 
      
 87 
     | 
    
         
            +
                    if ( (specific_mod.attributes['position'] =~ /Any C-term/) && (specific_mod.attributes['site'] =~ /^[M]$/) )
         
     | 
| 
      
 88 
     | 
    
         
            +
                      specificity="C-term #{specific_mod.attributes['site']}"
         
     | 
| 
      
 89 
     | 
    
         
            +
                    end
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
                    all_mods.push("#{mod.attributes['title']} (#{specificity})")
         
     | 
| 
      
 92 
     | 
    
         
            +
                    
         
     | 
| 
      
 93 
     | 
    
         
            +
                  }
         
     | 
| 
      
 94 
     | 
    
         
            +
                  
         
     | 
| 
      
 95 
     | 
    
         
            +
                end
         
     | 
| 
      
 96 
     | 
    
         
            +
                
         
     | 
| 
      
 97 
     | 
    
         
            +
              end
         
     | 
| 
      
 98 
     | 
    
         
            +
              
         
     | 
| 
      
 99 
     | 
    
         
            +
            }
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
      
 101 
     | 
    
         
            +
             
     | 
| 
      
 102 
     | 
    
         
            +
            all_mods=all_mods.sort {|a,b| a.downcase <=> b.downcase}
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
            loc_output=File.new("mascot_mods.loc",'w')
         
     | 
| 
      
 105 
     | 
    
         
            +
             
     | 
| 
      
 106 
     | 
    
         
            +
            loc_output << "#This file lists the names of chemical modifications acceptable for proteomics search engines\n"
         
     | 
| 
      
 107 
     | 
    
         
            +
            loc_output << "#\n"
         
     | 
| 
      
 108 
     | 
    
         
            +
            loc_output << "#So, unimod_names.loc could look something like this:\n"
         
     | 
| 
      
 109 
     | 
    
         
            +
            loc_output << "#\n"
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
            all_mods.each { |am| 
         
     | 
| 
      
 112 
     | 
    
         
            +
              key = am.downcase.gsub(" ","").gsub("\(","\_").gsub("\)","\_").gsub("\:","\_").gsub("\-\>","\_")
         
     | 
| 
      
 113 
     | 
    
         
            +
              loc_output << "#{am}\t#{key}\t#{am}\t#{key}\n"
         
     | 
| 
      
 114 
     | 
    
         
            +
            }
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
            loc_output.close
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
             
     |