protk 1.2.6.pre5 → 1.3.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/README.md +84 -45
 - data/bin/add_retention_times.rb +9 -5
 - data/bin/augustus_to_proteindb.rb +7 -11
 - data/bin/interprophet.rb +28 -46
 - data/bin/make_decoy.rb +16 -48
 - data/bin/mascot_search.rb +57 -71
 - data/bin/mascot_to_pepxml.rb +13 -26
 - data/bin/msgfplus_search.rb +70 -107
 - data/bin/omssa_search.rb +52 -109
 - data/bin/peptide_prophet.rb +44 -119
 - data/bin/pepxml_to_table.rb +24 -27
 - data/bin/protein_prophet.rb +22 -82
 - data/bin/protxml_to_gff.rb +22 -519
 - data/bin/protxml_to_table.rb +2 -16
 - data/bin/sixframe.rb +10 -32
 - data/bin/tandem_search.rb +30 -403
 - data/bin/tandem_to_pepxml.rb +43 -0
 - data/bin/unimod_to_loc.rb +1 -1
 - data/ext/{protk/decoymaker → decoymaker}/decoymaker.c +74 -21
 - data/ext/decoymaker/extconf.rb +3 -0
 - data/lib/protk/constants.rb +16 -2
 - data/lib/protk/data/default_config.yml +2 -1
 - data/lib/protk/data/tandem_gpm_defaults.xml +175 -0
 - data/lib/protk/data/tandem_isb_kscore_defaults.xml +123 -0
 - data/lib/protk/data/tandem_isb_native_defaults.xml +123 -0
 - data/lib/protk/data/tandem_params.xml +17 -54
 - data/lib/protk/fastadb.rb +2 -2
 - data/lib/protk/prophet_tool.rb +1 -1
 - data/lib/protk/protxml_to_gff_tool.rb +474 -0
 - data/lib/protk/search_tool.rb +58 -103
 - data/lib/protk/setup_rakefile.rake +9 -5
 - data/lib/protk/tandem_search_tool.rb +256 -0
 - data/lib/protk/tool.rb +85 -104
 - data/lib/protk.rb +1 -6
 - metadata +24 -103
 - data/bin/annotate_ids.rb +0 -59
 - data/bin/asapratio.rb +0 -27
 - data/bin/blastxml_to_table.rb +0 -119
 - data/bin/correct_omssa_retention_times.rb +0 -27
 - data/bin/feature_finder.rb +0 -95
 - data/bin/file_convert.rb +0 -164
 - data/bin/generate_omssa_loc.rb +0 -42
 - data/bin/gffmerge.rb +0 -208
 - data/bin/libra.rb +0 -70
 - data/bin/toppas_pipeline.rb +0 -84
 - data/bin/uniprot_annotation.rb +0 -141
 - data/bin/xls_to_table.rb +0 -52
 - data/bin/xpress.rb +0 -27
 - data/ext/protk/decoymaker/extconf.rb +0 -3
 - data/ext/protk/simplealign/extconf.rb +0 -3
 - data/lib/protk/biotools_excel_converter.rb +0 -60
 - data/lib/protk/eupathdb_gene_information_table.rb +0 -158
 - data/lib/protk/gapped_aligner.rb +0 -264
 - data/lib/protk/protein_annotator.rb +0 -646
 - data/lib/protk/spreadsheet_extensions.rb +0 -79
 - data/lib/protk/xtandem_defaults.rb +0 -11
 
    
        data/bin/libra.rb
    DELETED
    
    | 
         @@ -1,70 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            #!/usr/bin/env ruby
         
     | 
| 
       2 
     | 
    
         
            -
            #
         
     | 
| 
       3 
     | 
    
         
            -
            # Created by John Chilton
         
     | 
| 
       4 
     | 
    
         
            -
            #
         
     | 
| 
       5 
     | 
    
         
            -
            # Run libra quantification against protein prophet results.
         
     | 
| 
       6 
     | 
    
         
            -
            #
         
     | 
| 
       7 
     | 
    
         
            -
            #
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
            require 'protk/constants'
         
     | 
| 
       10 
     | 
    
         
            -
            require 'protk/protxml'
         
     | 
| 
       11 
     | 
    
         
            -
            require 'protk/galaxy_util'
         
     | 
| 
       12 
     | 
    
         
            -
            require 'optparse'
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
            for_galaxy = GalaxyUtil.for_galaxy?
         
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
            protxml_path = ARGV.shift
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
            if for_galaxy
         
     | 
| 
       19 
     | 
    
         
            -
              protxml_path = GalaxyUtil.stage_protxml(protxml_path)
         
     | 
| 
       20 
     | 
    
         
            -
            end
         
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
            protxml = ProtXML.new(protxml_path)
         
     | 
| 
       23 
     | 
    
         
            -
            pepxml_path = protxml.find_pep_xml()
         
     | 
| 
       24 
     | 
    
         
            -
             
     | 
| 
       25 
     | 
    
         
            -
            genv=Constants.new
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
            option_parser=OptionParser.new()
         
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
            reagents = []
         
     | 
| 
       30 
     | 
    
         
            -
            mass_tolerance = "0.2"
         
     | 
| 
       31 
     | 
    
         
            -
            option_parser.on( '--mass-tolerance TOL',"Specifies the mass tolerance (window libra will search for the most intense m/z value in)." ) do |tol|
         
     | 
| 
       32 
     | 
    
         
            -
              mass_tolerance = tol
         
     | 
| 
       33 
     | 
    
         
            -
            end
         
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
            option_parser.on( '--reagent MZ', "Specify a reagent (via m/z values).") do |reagent|
         
     | 
| 
       36 
     | 
    
         
            -
              reagents << reagent
         
     | 
| 
       37 
     | 
    
         
            -
            end
         
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
            minimum_threshold_string = ""
         
     | 
| 
       40 
     | 
    
         
            -
            option_parser.on( '--minimum-threshold THRESH', "Minimum threshhold intensity (not required).") do |thresh|
         
     | 
| 
       41 
     | 
    
         
            -
              minimum_threshold_string = "<minimumThreshhold value=\"#{thresh}\"/>"
         
     | 
| 
       42 
     | 
    
         
            -
            end
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
            option_parser.parse!
         
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
       47 
     | 
    
         
            -
            reagent_strings = reagents.map do |reagent|
         
     | 
| 
       48 
     | 
    
         
            -
              "<reagent mz=\"#{reagent}\" />"
         
     | 
| 
       49 
     | 
    
         
            -
            end
         
     | 
| 
       50 
     | 
    
         
            -
            reagents_string = reagent_strings.join(" ")
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
            isotopic_contributions = ""
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
            condition_contents = "<SUMmOnCondition description=\"libra_galaxy_run\">
         
     | 
| 
       55 
     | 
    
         
            -
              <fragmentMasses>
         
     | 
| 
       56 
     | 
    
         
            -
                #{reagents_string}
         
     | 
| 
       57 
     | 
    
         
            -
              </fragmentMasses>
         
     | 
| 
       58 
     | 
    
         
            -
              #{isotopic_contributions}
         
     | 
| 
       59 
     | 
    
         
            -
              <massTolerance value=\"#{mass_tolerance}\"/>
         
     | 
| 
       60 
     | 
    
         
            -
              <centroiding type=\"2\" iterations=\"1\"/>
         
     | 
| 
       61 
     | 
    
         
            -
              <normalization type=\"4\"/>
         
     | 
| 
       62 
     | 
    
         
            -
              <targetMs level=\"2\"/>
         
     | 
| 
       63 
     | 
    
         
            -
              <output type=\"1\"/>
         
     | 
| 
       64 
     | 
    
         
            -
              <quantitationFile name=\"quantitation.tsv\"/>
         
     | 
| 
       65 
     | 
    
         
            -
              #{minimum_threshold_string}
         
     | 
| 
       66 
     | 
    
         
            -
            </SUMmOnCondition>"
         
     | 
| 
       67 
     | 
    
         
            -
            File.open("condition.xml", "w") { |f| f.write(condition_contents) }
         
     | 
| 
       68 
     | 
    
         
            -
            print condition_contents
         
     | 
| 
       69 
     | 
    
         
            -
            command="#{genv.librapeptideparser} '#{pepxml_path}' -ccondition.xml; #{genv.libraproteinratioparser} '#{protxml_path}' -c#{condition_file}"
         
     | 
| 
       70 
     | 
    
         
            -
            %x[#{command}]
         
     | 
    
        data/bin/toppas_pipeline.rb
    DELETED
    
    | 
         @@ -1,84 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            #!/usr/bin/env ruby
         
     | 
| 
       2 
     | 
    
         
            -
            #
         
     | 
| 
       3 
     | 
    
         
            -
            # This file is part of protk
         
     | 
| 
       4 
     | 
    
         
            -
            # Created by Ira Cooke 30/01/13
         
     | 
| 
       5 
     | 
    
         
            -
            #
         
     | 
| 
       6 
     | 
    
         
            -
            # A wrapper for the OpenMS tool ExecutePipeline. 
         
     | 
| 
       7 
     | 
    
         
            -
            # Executes simple toppas pipelines, automatically creating the trf file.
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
            require 'protk/constants'
         
     | 
| 
       10 
     | 
    
         
            -
            require 'protk/command_runner'
         
     | 
| 
       11 
     | 
    
         
            -
            require 'protk/tool'
         
     | 
| 
       12 
     | 
    
         
            -
            require 'protk/openms_defaults'
         
     | 
| 
       13 
     | 
    
         
            -
            require 'tempfile'
         
     | 
| 
       14 
     | 
    
         
            -
            require 'libxml'
         
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
            include LibXML
         
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
            tool=Tool.new([:background,:over_write])
         
     | 
| 
       19 
     | 
    
         
            -
            tool.option_parser.banner = "Execute a toppas pipeline with a single inputs node\n\nUsage: toppas_pipeline.rb [options] input1 input2 ..."
         
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
            tool.options.outdir = ""
         
     | 
| 
       22 
     | 
    
         
            -
            tool.option_parser.on( '--outdir dir',"save outputs to dir" ) do |dir|
         
     | 
| 
       23 
     | 
    
         
            -
              tool.options.outdir = dir
         
     | 
| 
       24 
     | 
    
         
            -
            end
         
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
            tool.options.toppas_file = ""
         
     | 
| 
       27 
     | 
    
         
            -
            tool.option_parser.on( '--toppas-file f',"the toppas file to run" ) do |file|
         
     | 
| 
       28 
     | 
    
         
            -
              tool.options.toppas_file = file
         
     | 
| 
       29 
     | 
    
         
            -
            end
         
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
            tool.options.threads = "1"
         
     | 
| 
       32 
     | 
    
         
            -
            tool.option_parser.on( '--threads t',"Number of threads to use" ) do |tr|
         
     | 
| 
       33 
     | 
    
         
            -
              tool.options.threads=tr
         
     | 
| 
       34 
     | 
    
         
            -
            end
         
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
            exit unless tool.check_options 
         
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
            if ( ARGV[0].nil? )
         
     | 
| 
       39 
     | 
    
         
            -
                puts "You must supply an input file"
         
     | 
| 
       40 
     | 
    
         
            -
                puts tool.option_parser 
         
     | 
| 
       41 
     | 
    
         
            -
                exit
         
     | 
| 
       42 
     | 
    
         
            -
            end
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
            # Obtain a global environment object
         
     | 
| 
       45 
     | 
    
         
            -
            genv=Constants.new
         
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
       47 
     | 
    
         
            -
            def run_pipeline(genv,tool,cmd,output_path,jobid)
         
     | 
| 
       48 
     | 
    
         
            -
              jobscript_path="#{output_path}.pbs.sh"
         
     | 
| 
       49 
     | 
    
         
            -
              job_params={:jobid=>jobid, :vmem=>"14Gb", :queue => "sixteen"}
         
     | 
| 
       50 
     | 
    
         
            -
              code=tool.run(cmd,genv,job_params,jobscript_path)
         
     | 
| 
       51 
     | 
    
         
            -
              throw "Command failed with exit code #{code}" unless code==0
         
     | 
| 
       52 
     | 
    
         
            -
            end
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
            def generate_trf(input_files,out_path)
         
     | 
| 
       55 
     | 
    
         
            -
              p OpenMSDefaults.new.trf_path
         
     | 
| 
       56 
     | 
    
         
            -
              parser=XML::Parser.file(OpenMSDefaults.new.trf_path)
         
     | 
| 
       57 
     | 
    
         
            -
              doc=parser.parse
         
     | 
| 
       58 
     | 
    
         
            -
              itemlist_node=doc.find('/PARAMETERS/NODE/ITEMLIST')[0]
         
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
       60 
     | 
    
         
            -
              input_files.each do |f|
         
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
                mnode=XML::Node.new('LISTITEM')
         
     | 
| 
       63 
     | 
    
         
            -
                mnode["value"]="file://#{Pathname.new(f).realpath.to_s}"
         
     | 
| 
       64 
     | 
    
         
            -
                
         
     | 
| 
       65 
     | 
    
         
            -
                itemlist_node << mnode
         
     | 
| 
       66 
     | 
    
         
            -
              end
         
     | 
| 
       67 
     | 
    
         
            -
              p out_path
         
     | 
| 
       68 
     | 
    
         
            -
              doc.save(out_path)
         
     | 
| 
       69 
     | 
    
         
            -
            end
         
     | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
       71 
     | 
    
         
            -
            throw "outdir is a required parameter" if tool.outdir==""
         
     | 
| 
       72 
     | 
    
         
            -
            throw "toppas-file is a required parameter" if tool.toppas_file==""
         
     | 
| 
       73 
     | 
    
         
            -
            throw "outdir must exist" unless Dir.exist?(tool.outdir)
         
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
       75 
     | 
    
         
            -
            trf_path = "#{Pathname.new(Tempfile.new(tool.toppas_file).path).basename.to_s}.trf"
         
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
            generate_trf(ARGV,trf_path)
         
     | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
            cmd=""
         
     | 
| 
       80 
     | 
    
         
            -
            cmd<<"export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:#{genv.openms_root}/lib;
         
     | 
| 
       81 
     | 
    
         
            -
            #{genv.executepipeline} -in #{Pathname.new(tool.toppas_file).realpath.to_s} -out_dir #{Pathname.new(tool.outdir).realpath.to_s} -resource_file #{Pathname.new(trf_path).realpath.to_s} -threads #{tool.threads}"
         
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
            run_pipeline(genv,tool,cmd,tool.outdir,tool.jobid_from_filename(tool.toppas_file))
         
     | 
| 
       84 
     | 
    
         
            -
             
     | 
    
        data/bin/uniprot_annotation.rb
    DELETED
    
    | 
         @@ -1,141 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            #!/usr/bin/env ruby
         
     | 
| 
       2 
     | 
    
         
            -
            #
         
     | 
| 
       3 
     | 
    
         
            -
            # This file is part of Protk
         
     | 
| 
       4 
     | 
    
         
            -
            # Created by Ira Cooke 24/3/2013
         
     | 
| 
       5 
     | 
    
         
            -
            #
         
     | 
| 
       6 
     | 
    
         
            -
            # Retrieve annotation information for proteins from the Uniprot Swissprot database
         
     | 
| 
       7 
     | 
    
         
            -
            #
         
     | 
| 
       8 
     | 
    
         
            -
            # 
         
     | 
| 
       9 
     | 
    
         
            -
            require 'protk/constants'
         
     | 
| 
       10 
     | 
    
         
            -
            require 'protk/command_runner'
         
     | 
| 
       11 
     | 
    
         
            -
            require 'protk/tool'
         
     | 
| 
       12 
     | 
    
         
            -
            require 'protk/swissprot_database'
         
     | 
| 
       13 
     | 
    
         
            -
            require 'protk/bio_sptr_extensions'
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
            # Setup specific command-line options for this tool. Other options are inherited from Tool
         
     | 
| 
       17 
     | 
    
         
            -
            #
         
     | 
| 
       18 
     | 
    
         
            -
            tool=Tool.new([:explicit_output])
         
     | 
| 
       19 
     | 
    
         
            -
            tool.option_parser.banner = "Retrieve information from the Uniprot database given a list of ID's.\n\n\
         
     | 
| 
       20 
     | 
    
         
            -
            Usage: uniprot_annotation.rb [options] input.tsv"
         
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
            tool.options.id_column=1
         
     | 
| 
       23 
     | 
    
         
            -
            tool.option_parser.on(  '--id-column num', 'Specify a column for ids (default is column 1)' ) do |col|
         
     | 
| 
       24 
     | 
    
         
            -
              tool.options.id_column=col.to_i
         
     | 
| 
       25 
     | 
    
         
            -
            end
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
            tool.options.flatfiledb="swissprot"
         
     | 
| 
       28 
     | 
    
         
            -
            tool.option_parser.on(  '--flatfiledb dbname', 'Specify path to a Uniprot flatfile' ) do |dbname|
         
     | 
| 
       29 
     | 
    
         
            -
              tool.options.flatfiledb=dbname
         
     | 
| 
       30 
     | 
    
         
            -
            end
         
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
            tool.options.fields=nil
         
     | 
| 
       33 
     | 
    
         
            -
            tool.option_parser.on(  '--fields flds', 'A comma separated list of fields to extract' ) do |flds|
         
     | 
| 
       34 
     | 
    
         
            -
              tool.options.fields=flds
         
     | 
| 
       35 
     | 
    
         
            -
            end
         
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
       37 
     | 
    
         
            -
            exit unless tool.check_options 
         
     | 
| 
       38 
     | 
    
         
            -
             
     | 
| 
       39 
     | 
    
         
            -
            if ( ARGV[0].nil? )
         
     | 
| 
       40 
     | 
    
         
            -
                puts "You must supply an input file"
         
     | 
| 
       41 
     | 
    
         
            -
                puts tool.option_parser 
         
     | 
| 
       42 
     | 
    
         
            -
                exit
         
     | 
| 
       43 
     | 
    
         
            -
            end
         
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
            # Obtain a global environment object
         
     | 
| 
       46 
     | 
    
         
            -
            genv=Constants.new
         
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
       48 
     | 
    
         
            -
            input_file=ARGV[0]
         
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
            swissprotdb=SwissprotDatabase.new(genv,tool.flatfiledb)
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
            output_file=nil
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
            if ( tool.explicit_output==nil)
         
     | 
| 
       55 
     | 
    
         
            -
              output_file=$stdout
         
     | 
| 
       56 
     | 
    
         
            -
            else
         
     | 
| 
       57 
     | 
    
         
            -
              output_file=File.open(tool.explicit_output,'w+')
         
     | 
| 
       58 
     | 
    
         
            -
            end
         
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
       60 
     | 
    
         
            -
            ac_column = tool.id_column-1
         
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
            db_fields = {
         
     | 
| 
       63 
     | 
    
         
            -
              'recname'=>"Primary Name",
         
     | 
| 
       64 
     | 
    
         
            -
              'cd'=>"CD Antigen Name",
         
     | 
| 
       65 
     | 
    
         
            -
              'altnames'=>"Alternate Names", 
         
     | 
| 
       66 
     | 
    
         
            -
              'location' => "Subcellular Location",
         
     | 
| 
       67 
     | 
    
         
            -
              'function' => "Known Function",
         
     | 
| 
       68 
     | 
    
         
            -
              'similarity' => "Similarity",
         
     | 
| 
       69 
     | 
    
         
            -
              'tissues' => "Tissue Specificity",
         
     | 
| 
       70 
     | 
    
         
            -
              'disease' => "Disease Association",
         
     | 
| 
       71 
     | 
    
         
            -
              'domain' => "Domain",
         
     | 
| 
       72 
     | 
    
         
            -
              'subunit' => "Sub Unit",
         
     | 
| 
       73 
     | 
    
         
            -
              'nextbio' => "NextBio",
         
     | 
| 
       74 
     | 
    
         
            -
              'ipi' => "IPI",
         
     | 
| 
       75 
     | 
    
         
            -
              'intact' => "Interactions",
         
     | 
| 
       76 
     | 
    
         
            -
              'pride' => 'Pride',
         
     | 
| 
       77 
     | 
    
         
            -
              'ensembl'=> 'Ensembl',
         
     | 
| 
       78 
     | 
    
         
            -
              'num_transmem'=>"Transmembrane Regions",
         
     | 
| 
       79 
     | 
    
         
            -
              'signalp'=>'Signal Peptide',
         
     | 
| 
       80 
     | 
    
         
            -
              'ref_dump'=>'References',
         
     | 
| 
       81 
     | 
    
         
            -
              'tax_dump'=>'Taxonomy Cross Ref',
         
     | 
| 
       82 
     | 
    
         
            -
              'species_dump'=>'Species',
         
     | 
| 
       83 
     | 
    
         
            -
              'feature_dump'=>'Feature Table',
         
     | 
| 
       84 
     | 
    
         
            -
              'seq_dump' => 'AA Sequence'
         
     | 
| 
       85 
     | 
    
         
            -
              }
         
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
            hyperlink_fields = {
         
     | 
| 
       88 
     | 
    
         
            -
              'uniprot_link'=>"Uniprot Link",
         
     | 
| 
       89 
     | 
    
         
            -
              'nextbio_link'=>'NextBio Link',
         
     | 
| 
       90 
     | 
    
         
            -
              'intact_link'=>"Interactions Link",
         
     | 
| 
       91 
     | 
    
         
            -
              'pride_link'=>"Pride Link",
         
     | 
| 
       92 
     | 
    
         
            -
              'ensembl_link'=>"Ensembl Link"
         
     | 
| 
       93 
     | 
    
         
            -
            }
         
     | 
| 
       94 
     | 
    
         
            -
             
     | 
| 
       95 
     | 
    
         
            -
            if tool.fields !=nil
         
     | 
| 
       96 
     | 
    
         
            -
              fields = tool.fields.split(",").collect { |f| f.lstrip.rstrip }.reject {|e| e.empty? }
         
     | 
| 
       97 
     | 
    
         
            -
              db_fields = db_fields.select { |k| fields.include? k }
         
     | 
| 
       98 
     | 
    
         
            -
              hyperlink_fields = hyperlink_fields.select { |k| fields.include? k}
         
     | 
| 
       99 
     | 
    
         
            -
            end
         
     | 
| 
       100 
     | 
    
         
            -
             
     | 
| 
       101 
     | 
    
         
            -
            output_file.write db_fields.values.join("\t")
         
     | 
| 
       102 
     | 
    
         
            -
            if ( hyperlink_fields.count > 0 )
         
     | 
| 
       103 
     | 
    
         
            -
              output_file.write("\t")
         
     | 
| 
       104 
     | 
    
         
            -
              output_file.write hyperlink_fields.values.join("\t")
         
     | 
| 
       105 
     | 
    
         
            -
            end
         
     | 
| 
       106 
     | 
    
         
            -
            output_file.write("\n")
         
     | 
| 
       107 
     | 
    
         
            -
             
     | 
| 
       108 
     | 
    
         
            -
            line_num=0
         
     | 
| 
       109 
     | 
    
         
            -
            File.foreach(input_file) { |line|  
         
     | 
| 
       110 
     | 
    
         
            -
              input_cols=line.split("\t")
         
     | 
| 
       111 
     | 
    
         
            -
              throw "Not enough columns in line #{line_num}" unless input_cols.count > ac_column
         
     | 
| 
       112 
     | 
    
         
            -
              accession=input_cols[ac_column].chomp
         
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
       114 
     | 
    
         
            -
              sptr_entry=swissprotdb.get_entry_for_name(accession)
         
     | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
       116 
     | 
    
         
            -
              if ( sptr_entry==nil)
         
     | 
| 
       117 
     | 
    
         
            -
                genv.log("No entry for #{accession} in uniprot database",:warn)  
         
     | 
| 
       118 
     | 
    
         
            -
              else
         
     | 
| 
       119 
     | 
    
         
            -
             
     | 
| 
       120 
     | 
    
         
            -
                db_values = db_fields.collect { |key,value|     
         
     | 
| 
       121 
     | 
    
         
            -
                  sptr_entry.send(key)
         
     | 
| 
       122 
     | 
    
         
            -
                }
         
     | 
| 
       123 
     | 
    
         
            -
             
     | 
| 
       124 
     | 
    
         
            -
                hyperlink_values = hyperlink_fields.collect { |key,value|
         
     | 
| 
       125 
     | 
    
         
            -
                  sptr_entry.send(key)
         
     | 
| 
       126 
     | 
    
         
            -
                }
         
     | 
| 
       127 
     | 
    
         
            -
             
     | 
| 
       128 
     | 
    
         
            -
                output_file.write db_values.join("\t")
         
     | 
| 
       129 
     | 
    
         
            -
                if ( hyperlink_fields.count > 0 )
         
     | 
| 
       130 
     | 
    
         
            -
                  output_file.write("\t")
         
     | 
| 
       131 
     | 
    
         
            -
                  output_file.write hyperlink_values.join("\t")
         
     | 
| 
       132 
     | 
    
         
            -
                end
         
     | 
| 
       133 
     | 
    
         
            -
                output_file.write "\n"
         
     | 
| 
       134 
     | 
    
         
            -
              end
         
     | 
| 
       135 
     | 
    
         
            -
             
     | 
| 
       136 
     | 
    
         
            -
              line_num+=1
         
     | 
| 
       137 
     | 
    
         
            -
             
     | 
| 
       138 
     | 
    
         
            -
            }
         
     | 
| 
       139 
     | 
    
         
            -
             
     | 
| 
       140 
     | 
    
         
            -
             
     | 
| 
       141 
     | 
    
         
            -
             
     | 
    
        data/bin/xls_to_table.rb
    DELETED
    
    | 
         @@ -1,52 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            #!/usr/bin/env ruby
         
     | 
| 
       2 
     | 
    
         
            -
            #
         
     | 
| 
       3 
     | 
    
         
            -
            # This file is part of protk
         
     | 
| 
       4 
     | 
    
         
            -
            # Created by Ira Cooke 18/1/2011
         
     | 
| 
       5 
     | 
    
         
            -
            #
         
     | 
| 
       6 
     | 
    
         
            -
            # Converts an Excel Spreadsheet to a tab delimited table
         
     | 
| 
       7 
     | 
    
         
            -
            #
         
     | 
| 
       8 
     | 
    
         
            -
            #
         
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
            require 'protk/constants'
         
     | 
| 
       11 
     | 
    
         
            -
            require 'protk/command_runner'
         
     | 
| 
       12 
     | 
    
         
            -
            require 'protk/tool'
         
     | 
| 
       13 
     | 
    
         
            -
            require 'spreadsheet'
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
            # Setup command-line options for this tool. 
         
     | 
| 
       16 
     | 
    
         
            -
            #
         
     | 
| 
       17 
     | 
    
         
            -
            tool=Tool.new([:explicit_output])
         
     | 
| 
       18 
     | 
    
         
            -
            tool.option_parser.banner = "Convert an xls file to a tab delimited table.\n\nUsage: xls_to_table.rb [options] file1.xls"
         
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
       20 
     | 
    
         
            -
            exit unless tool.check_options 
         
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
            if ( ARGV[0].nil? )
         
     | 
| 
       23 
     | 
    
         
            -
                puts "You must supply an input file"
         
     | 
| 
       24 
     | 
    
         
            -
                puts tool.option_parser 
         
     | 
| 
       25 
     | 
    
         
            -
                exit
         
     | 
| 
       26 
     | 
    
         
            -
            end
         
     | 
| 
       27 
     | 
    
         
            -
             
     | 
| 
       28 
     | 
    
         
            -
            input_file=ARGV[0]
         
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
            output_file=tool.explicit_output
         
     | 
| 
       31 
     | 
    
         
            -
            output_file="#{input_file}.csv" unless ( output_file != nil )
         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
       33 
     | 
    
         
            -
            output_fh = File.new(output_file,'w')
         
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
             
     | 
| 
       36 
     | 
    
         
            -
            # Open the original excel workbook for reading
         
     | 
| 
       37 
     | 
    
         
            -
            Spreadsheet.client_encoding = 'UTF-8'   
         
     | 
| 
       38 
     | 
    
         
            -
            inputBook = Spreadsheet.open "#{input_file}"
         
     | 
| 
       39 
     | 
    
         
            -
            inputSheet = inputBook.worksheet 0
         
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
       41 
     | 
    
         
            -
            inputSheet.each do |row|
         
     | 
| 
       42 
     | 
    
         
            -
              line=""
         
     | 
| 
       43 
     | 
    
         
            -
              row.each do |colv| 
         
     | 
| 
       44 
     | 
    
         
            -
                line << "#{colv}\t" 
         
     | 
| 
       45 
     | 
    
         
            -
              end
         
     | 
| 
       46 
     | 
    
         
            -
              line.chop!
         
     | 
| 
       47 
     | 
    
         
            -
              output_fh.write "#{line}\n"
         
     | 
| 
       48 
     | 
    
         
            -
            end
         
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
            output_fh.close
         
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
             
     | 
    
        data/bin/xpress.rb
    DELETED
    
    | 
         @@ -1,27 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            #!/usr/bin/env ruby
         
     | 
| 
       2 
     | 
    
         
            -
            #
         
     | 
| 
       3 
     | 
    
         
            -
            # Created by John Chilton
         
     | 
| 
       4 
     | 
    
         
            -
            #
         
     | 
| 
       5 
     | 
    
         
            -
            # Run XPRESS against protein prophet results.
         
     | 
| 
       6 
     | 
    
         
            -
            #
         
     | 
| 
       7 
     | 
    
         
            -
            #
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
            require 'protk/constants'
         
     | 
| 
       10 
     | 
    
         
            -
            require 'protk/protxml'
         
     | 
| 
       11 
     | 
    
         
            -
            require 'protk/galaxy_util'
         
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
            for_galaxy = GalaxyUtil.for_galaxy?
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
            protxml_path = ARGV.shift
         
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
            if for_galaxy
         
     | 
| 
       18 
     | 
    
         
            -
              protxml_path = GalaxyUtil.stage_protxml(protxml_path)
         
     | 
| 
       19 
     | 
    
         
            -
            end
         
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
            protxml = ProtXML.new(protxml_path)
         
     | 
| 
       22 
     | 
    
         
            -
            pepxml_path = protxml.find_pep_xml()
         
     | 
| 
       23 
     | 
    
         
            -
             
     | 
| 
       24 
     | 
    
         
            -
            genv=Constants.new
         
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
            command="#{genv.xpresspeptideparser} '#{pepxml_path}' #{ARGV.join(" ")} ; #{genv.xpressproteinratioparser} '#{protxml_path}'"
         
     | 
| 
       27 
     | 
    
         
            -
            %x[#{command}]
         
     | 
| 
         @@ -1,60 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require 'rubygems'
         
     | 
| 
       2 
     | 
    
         
            -
            require 'spreadsheet'
         
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
            class BioToolsExcelConverter 
         
     | 
| 
       6 
     | 
    
         
            -
              
         
     | 
| 
       7 
     | 
    
         
            -
              def initialize(filename)
         
     | 
| 
       8 
     | 
    
         
            -
                @inputBook = Spreadsheet.open File.new("#{filename}")
         
     | 
| 
       9 
     | 
    
         
            -
              end
         
     | 
| 
       10 
     | 
    
         
            -
              
         
     | 
| 
       11 
     | 
    
         
            -
              def self.isBiotools(filename)
         
     | 
| 
       12 
     | 
    
         
            -
                testBook = Spreadsheet.open File.new("#{filename}")
         
     | 
| 
       13 
     | 
    
         
            -
                testSheet = testBook.worksheet 0
         
     | 
| 
       14 
     | 
    
         
            -
                
         
     | 
| 
       15 
     | 
    
         
            -
                isbiotools=FALSE
         
     | 
| 
       16 
     | 
    
         
            -
                testSheet.each do |row|
         
     | 
| 
       17 
     | 
    
         
            -
                  if  (row[0].class==String) && row[0].match(/Digest Matches.*?Score:\s(.*)\)/)   
         
     | 
| 
       18 
     | 
    
         
            -
                    isbiotools=TRUE
         
     | 
| 
       19 
     | 
    
         
            -
                  end
         
     | 
| 
       20 
     | 
    
         
            -
                end
         
     | 
| 
       21 
     | 
    
         
            -
                
         
     | 
| 
       22 
     | 
    
         
            -
                
         
     | 
| 
       23 
     | 
    
         
            -
                isbiotools
         
     | 
| 
       24 
     | 
    
         
            -
              end
         
     | 
| 
       25 
     | 
    
         
            -
              
         
     | 
| 
       26 
     | 
    
         
            -
              def get_rows
         
     | 
| 
       27 
     | 
    
         
            -
                
         
     | 
| 
       28 
     | 
    
         
            -
                sheet=@inputBook.worksheet 0
         
     | 
| 
       29 
     | 
    
         
            -
                
         
     | 
| 
       30 
     | 
    
         
            -
                protein_rows=[]
         
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
                n_rows=sheet.dimensions[1]
         
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
       34 
     | 
    
         
            -
                protein_rows=(0...n_rows).collect do |row_i|      
         
     | 
| 
       35 
     | 
    
         
            -
                  new_row=nil
         
     | 
| 
       36 
     | 
    
         
            -
                  
         
     | 
| 
       37 
     | 
    
         
            -
                  row=sheet.row row_i      
         
     | 
| 
       38 
     | 
    
         
            -
                  if ( row[0]!=nil)
         
     | 
| 
       39 
     | 
    
         
            -
                    digmatch=row[0].match(/Digest Matches.*?Score:\s(.*)\)/)
         
     | 
| 
       40 
     | 
    
         
            -
                    if  ( digmatch!=nil )
         
     | 
| 
       41 
     | 
    
         
            -
                      new_row=[]          
         
     | 
| 
       42 
     | 
    
         
            -
                      text= sheet.row(row_i-1)[0] 
         
     | 
| 
       43 
     | 
    
         
            -
                      m=text.match(/\s(\S*)\s*$/)
         
     | 
| 
       44 
     | 
    
         
            -
                      throw "Badly formed protein line in biotools file ... could not parse protein name from #{text}" unless m!=nil
         
     | 
| 
       45 
     | 
    
         
            -
                      new_row[0]=m[1]
         
     | 
| 
       46 
     | 
    
         
            -
                      new_row[1]=digmatch[1]
         
     | 
| 
       47 
     | 
    
         
            -
                    end
         
     | 
| 
       48 
     | 
    
         
            -
                  end
         
     | 
| 
       49 
     | 
    
         
            -
                  
         
     | 
| 
       50 
     | 
    
         
            -
                  new_row
         
     | 
| 
       51 
     | 
    
         
            -
                end
         
     | 
| 
       52 
     | 
    
         
            -
                
         
     | 
| 
       53 
     | 
    
         
            -
                protein_rows.compact!
         
     | 
| 
       54 
     | 
    
         
            -
                protein_rows.insert(0,["Accession","Ion Scores"])
         
     | 
| 
       55 
     | 
    
         
            -
                
         
     | 
| 
       56 
     | 
    
         
            -
                protein_rows
         
     | 
| 
       57 
     | 
    
         
            -
                
         
     | 
| 
       58 
     | 
    
         
            -
              end
         
     | 
| 
       59 
     | 
    
         
            -
              
         
     | 
| 
       60 
     | 
    
         
            -
            end
         
     | 
| 
         @@ -1,158 +0,0 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            # Code for interacting with EuPathDB gene information files e.g. http://cryptodb.org/common/downloads/release-4.3/Cmuris/txt/CmurisGene_CryptoDB-4.3.txt
         
     | 
| 
       2 
     | 
    
         
            -
            # These gene information files contain a large amount of information about individual genes/proteins in EuPathDBs.
         
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
            require 'tempfile'
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
            # A class for extracting gene info from a particular gene from the information file
         
     | 
| 
       7 
     | 
    
         
            -
            class EuPathDBGeneInformationFileExtractor
         
     | 
| 
       8 
     | 
    
         
            -
              # A filename path to the gene information file
         
     | 
| 
       9 
     | 
    
         
            -
              attr_accessor :filename
         
     | 
| 
       10 
     | 
    
         
            -
              
         
     | 
| 
       11 
     | 
    
         
            -
              def initialize(filename = nil)
         
     | 
| 
       12 
     | 
    
         
            -
                @filename = filename
         
     | 
| 
       13 
     | 
    
         
            -
              end
         
     | 
| 
       14 
     | 
    
         
            -
              
         
     | 
| 
       15 
     | 
    
         
            -
              # Returns a EuPathDBGeneInformation object corresponding to the wanted key. If
         
     | 
| 
       16 
     | 
    
         
            -
              # there are multiple in the file, only the first is returned. If none are found, nil is returned.
         
     | 
| 
       17 
     | 
    
         
            -
              #
         
     | 
| 
       18 
     | 
    
         
            -
              # If grep_hack_lines is defined (as an integer), then a shortcut is applied to speed things up. Before parsing the gene info file, grep some lines after the "Gene Id: .." line. Then feed that into the parser. 
         
     | 
| 
       19 
     | 
    
         
            -
              def extract_gene_info(wanted_gene_id, grep_hack_lines = nil)
         
     | 
| 
       20 
     | 
    
         
            -
                inside_iterator = lambda do |gene|
         
     | 
| 
       21 
     | 
    
         
            -
                  return gene if wanted_gene_id == gene.info['Gene Id']
         
     | 
| 
       22 
     | 
    
         
            -
                end
         
     | 
| 
       23 
     | 
    
         
            -
                
         
     | 
| 
       24 
     | 
    
         
            -
                filename = @filename
         
     | 
| 
       25 
     | 
    
         
            -
                p @filename
         
     | 
| 
       26 
     | 
    
         
            -
                if grep_hack_lines and grep_hack_lines.to_i != 0
         
     | 
| 
       27 
     | 
    
         
            -
                  tempfile=Tempfile.new('reubypathdb_grep_hack')
         
     | 
| 
       28 
     | 
    
         
            -
                  # grep however many lines from past the point. Rather dodgy, but faster.
         
     | 
| 
       29 
     | 
    
         
            -
                  raise Exception, "grep_hack_lines should be an integer" unless grep_hack_lines.is_a?(Integer)
         
     | 
| 
       30 
     | 
    
         
            -
                  `grep -A #{grep_hack_lines} 'Gene Id: #{wanted_gene_id}' '#{@filename}' >#{tempfile.path}`
         
     | 
| 
       31 
     | 
    
         
            -
                  EuPathDBGeneInformationTable.new(File.open(tempfile.path)).each do |gene|
         
     | 
| 
       32 
     | 
    
         
            -
                    return inside_iterator.call(gene)
         
     | 
| 
       33 
     | 
    
         
            -
                  end
         
     | 
| 
       34 
     | 
    
         
            -
                else
         
     | 
| 
       35 
     | 
    
         
            -
                  # no grep hack. Parse the whole gene information file
         
     | 
| 
       36 
     | 
    
         
            -
                  EuPathDBGeneInformationTable.new(File.open(@filename)).each do |gene|
         
     | 
| 
       37 
     | 
    
         
            -
                    return inside_iterator.call(gene)
         
     | 
| 
       38 
     | 
    
         
            -
                  end
         
     | 
| 
       39 
     | 
    
         
            -
                end
         
     | 
| 
       40 
     | 
    
         
            -
                return nil
         
     | 
| 
       41 
     | 
    
         
            -
              end
         
     | 
| 
       42 
     | 
    
         
            -
            end
         
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
            # A class for parsing the 'gene information table' files from EuPathDB, such
         
     | 
| 
       45 
     | 
    
         
            -
            # as http://cryptodb.org/common/downloads/release-4.3/Cmuris/txt/CmurisGene_CryptoDB-4.3.txt
         
     | 
| 
       46 
     | 
    
         
            -
            #
         
     | 
| 
       47 
     | 
    
         
            -
            # The usual way of interacting with these is the use of the each method, 
         
     | 
| 
       48 
     | 
    
         
            -
            # which returns a EuPathDBGeneInformation object with all of the recorded
         
     | 
| 
       49 
     | 
    
         
            -
            # information in it.
         
     | 
| 
       50 
     | 
    
         
            -
            class EuPathDBGeneInformationTable
         
     | 
| 
       51 
     | 
    
         
            -
              include Enumerable
         
     | 
| 
       52 
     | 
    
         
            -
              
         
     | 
| 
       53 
     | 
    
         
            -
              # Initialise using an IO object, say File.open('/path/to/CmurisGene_CryptoDB-4.3.txt'). After opening, the #each method can be used to iterate over the genes that are present in the file
         
     | 
| 
       54 
     | 
    
         
            -
              def initialize(io)
         
     | 
| 
       55 
     | 
    
         
            -
                @io = io
         
     | 
| 
       56 
     | 
    
         
            -
              end
         
     | 
| 
       57 
     | 
    
         
            -
              
         
     | 
| 
       58 
     | 
    
         
            -
              # Return a EuPathDBGeneInformation object with
         
     | 
| 
       59 
     | 
    
         
            -
              # the contained info in it, one at a time
         
     | 
| 
       60 
     | 
    
         
            -
              def each
         
     | 
| 
       61 
     | 
    
         
            -
                while g = next_gene
         
     | 
| 
       62 
     | 
    
         
            -
                  yield g
         
     | 
| 
       63 
     | 
    
         
            -
                end
         
     | 
| 
       64 
     | 
    
         
            -
              end
         
     | 
| 
       65 
     | 
    
         
            -
              
         
     | 
| 
       66 
     | 
    
         
            -
              # Returns a EuPathDBGeneInformation object with all the data you could
         
     | 
| 
       67 
     | 
    
         
            -
              # possibly want.
         
     | 
| 
       68 
     | 
    
         
            -
              def next_gene
         
     | 
| 
       69 
     | 
    
         
            -
                info = EuPathDBGeneInformation.new
         
     | 
| 
       70 
     | 
    
         
            -
                
         
     | 
| 
       71 
     | 
    
         
            -
                # first, read the table, which should start with the ID column
         
     | 
| 
       72 
     | 
    
         
            -
                line = @io.readline.strip
         
     | 
| 
       73 
     | 
    
         
            -
                while line == ''
         
     | 
| 
       74 
     | 
    
         
            -
                  return nil if @io.eof?
         
     | 
| 
       75 
     | 
    
         
            -
                  line = @io.readline.strip
         
     | 
| 
       76 
     | 
    
         
            -
                end
         
     | 
| 
       77 
     | 
    
         
            -
                
         
     | 
| 
       78 
     | 
    
         
            -
                while line != ''
         
     | 
| 
       79 
     | 
    
         
            -
                  if matches = line.match(/^(.*?)\: (.*)$/)
         
     | 
| 
       80 
     | 
    
         
            -
                    info.add_information(matches[1], matches[2])
         
     | 
| 
       81 
     | 
    
         
            -
                  else
         
     | 
| 
       82 
     | 
    
         
            -
                    raise Exception, "EuPathDBGeneInformationTable Couldn't parse this line: #{line}"
         
     | 
| 
       83 
     | 
    
         
            -
                  end
         
     | 
| 
       84 
     | 
    
         
            -
                  
         
     | 
| 
       85 
     | 
    
         
            -
                  line = @io.readline.strip
         
     | 
| 
       86 
     | 
    
         
            -
                end
         
     | 
| 
       87 
     | 
    
         
            -
                
         
     | 
| 
       88 
     | 
    
         
            -
                # now read each of the tables, which should start with the
         
     | 
| 
       89 
     | 
    
         
            -
                # 'TABLE: <name>' entry
         
     | 
| 
       90 
     | 
    
         
            -
                line = @io.readline.strip
         
     | 
| 
       91 
     | 
    
         
            -
                table_name = nil
         
     | 
| 
       92 
     | 
    
         
            -
                headers = nil
         
     | 
| 
       93 
     | 
    
         
            -
                data = []
         
     | 
| 
       94 
     | 
    
         
            -
                while line != '------------------------------------------------------------'
         
     | 
| 
       95 
     | 
    
         
            -
                  if line == ''
         
     | 
| 
       96 
     | 
    
         
            -
                    # add it to the stack unless we are just starting out
         
     | 
| 
       97 
     | 
    
         
            -
                    info.add_table(table_name, headers, data) unless table_name.nil?
         
     | 
| 
       98 
     | 
    
         
            -
                    
         
     | 
| 
       99 
     | 
    
         
            -
                    # reset things
         
     | 
| 
       100 
     | 
    
         
            -
                    table_name = nil
         
     | 
| 
       101 
     | 
    
         
            -
                    headers = nil
         
     | 
| 
       102 
     | 
    
         
            -
                    data = []
         
     | 
| 
       103 
     | 
    
         
            -
                  elsif matches = line.match(/^TABLE\: (.*)$/)
         
     | 
| 
       104 
     | 
    
         
            -
                    # name of a table
         
     | 
| 
       105 
     | 
    
         
            -
                    table_name = matches[1]
         
     | 
| 
       106 
     | 
    
         
            -
                  elsif line.match(/^\[.*\]/)
         
     | 
| 
       107 
     | 
    
         
            -
                    # headings of the table
         
     | 
| 
       108 
     | 
    
         
            -
                    headers = line.split("\t").collect do |header|
         
     | 
| 
       109 
     | 
    
         
            -
                      header.gsub(/^\[/,'').gsub(/\]$/,'')
         
     | 
| 
       110 
     | 
    
         
            -
                    end
         
     | 
| 
       111 
     | 
    
         
            -
                  else
         
     | 
| 
       112 
     | 
    
         
            -
                    # a proper data row
         
     | 
| 
       113 
     | 
    
         
            -
                    data.push line.split("\t")
         
     | 
| 
       114 
     | 
    
         
            -
                  end
         
     | 
| 
       115 
     | 
    
         
            -
                  line = @io.readline.strip      
         
     | 
| 
       116 
     | 
    
         
            -
                end
         
     | 
| 
       117 
     | 
    
         
            -
                        
         
     | 
| 
       118 
     | 
    
         
            -
                # return the object that has been created
         
     | 
| 
       119 
     | 
    
         
            -
                return info
         
     | 
| 
       120 
     | 
    
         
            -
              end
         
     | 
| 
       121 
     | 
    
         
            -
            end
         
     | 
| 
       122 
     | 
    
         
            -
             
     | 
| 
       123 
     | 
    
         
            -
            # Each gene in the gene information table is represented
         
     | 
| 
       124 
     | 
    
         
            -
            # by 2 types of information - info and tables.
         
     | 
| 
       125 
     | 
    
         
            -
            # info are 1 line data, whereas tables are tables of
         
     | 
| 
       126 
     | 
    
         
            -
            # data with possibly multiple rows
         
     | 
| 
       127 
     | 
    
         
            -
            class EuPathDBGeneInformation
         
     | 
| 
       128 
     | 
    
         
            -
              def info
         
     | 
| 
       129 
     | 
    
         
            -
                @info
         
     | 
| 
       130 
     | 
    
         
            -
              end
         
     | 
| 
       131 
     | 
    
         
            -
              
         
     | 
| 
       132 
     | 
    
         
            -
              def get_info(key)
         
     | 
| 
       133 
     | 
    
         
            -
                @info[key]
         
     | 
| 
       134 
     | 
    
         
            -
              end
         
     | 
| 
       135 
     | 
    
         
            -
              alias_method :[], :get_info
         
     | 
| 
       136 
     | 
    
         
            -
              
         
     | 
| 
       137 
     | 
    
         
            -
              def get_table(table_name)
         
     | 
| 
       138 
     | 
    
         
            -
                @tables[table_name]
         
     | 
| 
       139 
     | 
    
         
            -
              end
         
     | 
| 
       140 
     | 
    
         
            -
              
         
     | 
| 
       141 
     | 
    
         
            -
              def add_information(key, value)
         
     | 
| 
       142 
     | 
    
         
            -
                @info ||= {}
         
     | 
| 
       143 
     | 
    
         
            -
                @info[key] = value
         
     | 
| 
       144 
     | 
    
         
            -
                "Added info #{key}, now is #{@info[key]}"
         
     | 
| 
       145 
     | 
    
         
            -
              end
         
     | 
| 
       146 
     | 
    
         
            -
              
         
     | 
| 
       147 
     | 
    
         
            -
              def add_table(name, headers, data)
         
     | 
| 
       148 
     | 
    
         
            -
                @tables ||= {}
         
     | 
| 
       149 
     | 
    
         
            -
                @tables[name] = []
         
     | 
| 
       150 
     | 
    
         
            -
                data.each do |row|
         
     | 
| 
       151 
     | 
    
         
            -
                  final = {}
         
     | 
| 
       152 
     | 
    
         
            -
                  row.each_with_index do |cell, i|
         
     | 
| 
       153 
     | 
    
         
            -
                    final[headers[i]] = cell
         
     | 
| 
       154 
     | 
    
         
            -
                  end
         
     | 
| 
       155 
     | 
    
         
            -
                  @tables[name].push final
         
     | 
| 
       156 
     | 
    
         
            -
                end
         
     | 
| 
       157 
     | 
    
         
            -
              end
         
     | 
| 
       158 
     | 
    
         
            -
            end
         
     |