protk 1.3.0 → 1.3.1.pre2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e8f8a571cb43ed61984a34b6e1fb51caf979593
4
- data.tar.gz: b53857f75c1ff6ca850859c3985aee36533e437f
3
+ metadata.gz: 888f8ebff75c2c33497c9bf4f7aeec182311b7e3
4
+ data.tar.gz: 4102a91afbee688babe093df8a53b84b097ba0c3
5
5
  SHA512:
6
- metadata.gz: 9450fccc4a5ce59f064927d62fbc6a4342a1710c3b82707e0908dea52af7d0b50f215e64073bb067a506d204701acea11b6d28f302447494b8a30b1e7af2df2d
7
- data.tar.gz: 1b8bc78fc09b4c81eee72fad169a6aee7145a16312c01bc95a5dd590f08cb98194b26115a166a759b9c52c7c67204a767747642e5e9331de4562d52f31eb1e11
6
+ metadata.gz: 3e67189a07c6ac237a4def19ad90043ab8919d5492fd43b67cfa5fc3285819b2fd62671375283c5d9dd05618c746603ba829c70225b140b3a52ccba9fafb24f8
7
+ data.tar.gz: 354a9eb2499d3f8b194ccdef82f06692672435a0e47b5b49197b9f1fba2c27181275d38c98fe6f644750b554b4b1601b873f6a51a0318853b86b17d7783f2e57
data/bin/make_decoy.rb CHANGED
@@ -49,10 +49,9 @@ if (tool.reverse_only)
49
49
  Bio::FastaFormat.open(input_file).each do |seq|
50
50
  id=nil
51
51
  begin
52
- # require 'debugger';debugger
53
52
  id=seq.definition.chomp.scan(/#{tool.id_regex}/)[0][0]
54
53
  revdef=seq.definition.sub(id,"#{tool.prefix_string}#{id}")
55
- decoys_out.write ">#{revdef}\n#{seq.aaseq}\n"
54
+ decoys_out.write ">#{revdef}\n#{seq.aaseq.reverse}\n"
56
55
  rescue
57
56
  puts "Unable to parse id for #{seq.definition}. Skipping" if (id==nil)
58
57
  end
data/bin/mascot_search.rb CHANGED
@@ -103,6 +103,7 @@ def search_params_dictionary(search_tool,input_file)
103
103
  postdict[:FILE]=File.new(input_file)
104
104
  postdict[:FORMVER]='1.01'
105
105
  postdict[:INTERMEDIATE]=''
106
+ postdict[:QUANTITATION]=search_tool.quantitation
106
107
 
107
108
  postdict
108
109
  end
@@ -134,6 +135,7 @@ search_tool.options.output_suffix="_mascot"
134
135
 
135
136
  search_tool.add_value_option(:mascot_server,"#{$genv.default_mascot_server}/mascot/cgi",['-S', '--server url', 'The url to the cgi directory of the mascot server'])
136
137
  search_tool.add_value_option(:allowed_charges,"1+,2+,3+",['--allowed-charges ac', 'Allowed precursor ion charges.'])
138
+ search_tool.add_value_option(:quantitation,"",['--quantitation method','Mascot quant method'])
137
139
  search_tool.add_value_option(:email,"",['--email em', 'User email.'])
138
140
  search_tool.add_value_option(:username,"",['--username un', 'Username.'])
139
141
  search_tool.add_value_option(:httpproxy,nil,['--proxy url', 'The url to a proxy server'])
@@ -170,7 +170,7 @@ ARGV.each do |filename|
170
170
 
171
171
  # Num Threads
172
172
  #
173
- cmd << " -thread #{search_tool.threads}" if search_tool.threads > 0
173
+ cmd << " -thread #{search_tool.threads}" if search_tool.threads.to_i > 0
174
174
 
175
175
  mods_file_content = ""
176
176
 
@@ -1,44 +1,51 @@
1
1
  #!/usr/bin/env ruby
2
2
  #
3
3
  # This file is part of protk
4
- # Original python version created by Max Grant
5
- # Translated to ruby by Ira Cooke 29/1/2013
4
+ # Created by Ira Cooke 3/8/2014
6
5
  #
7
6
  #
8
7
 
9
8
  require 'protk/constants'
10
- require 'protk/protxml_to_gff_tool'
11
9
  require 'protk/fastadb'
10
+ require 'protk/gffdb'
11
+ require 'protk/protein'
12
+ require 'protk/peptide'
13
+ require 'protk/tool'
12
14
  require 'libxml'
13
15
  require 'bio'
14
16
 
15
17
  include LibXML
16
18
 
17
- tool=ProtXMLToGFFTool.new()
18
19
 
19
- @output_extension=".gff"
20
- @output_suffix=""
21
-
22
- exit unless tool.check_options(true,[:database])
23
-
24
- input_proxml=ARGV[0]
25
-
26
- if ( tool.explicit_output!=nil)
27
- gff_out_file=tool.explicit_output
28
- else
29
- gff_out_file=Tool.default_output_path(input_proxml,@output_extension,tool.output_prefix,@output_suffix)
20
+ class NoGFFEntryFoundError < StandardError
30
21
  end
31
22
 
32
- gff_db = Bio::GFF.new()
33
- f = open(gff_out_file,'w+')
23
+ class ProteinNotInDBError < StandardError
24
+ end
34
25
 
26
+ class MultipleGFFEntriesForProteinError < StandardError
27
+ end
35
28
 
36
29
  def parse_proteins(protxml_file)
37
- puts "Parsing proteins from protxml"
38
30
  protxml_parser=XML::Parser.file(protxml_file)
39
31
  protxml_doc=protxml_parser.parse
40
32
  proteins = protxml_doc.find('.//protxml:protein','protxml:http://regis-web.systemsbiology.net/protXML')
41
- proteins
33
+ proteins.collect { |node| Protein.from_protxml(node) }
34
+ end
35
+
36
+ def protein_id_to_gffid(protein_id,gff_idregex)
37
+ return protein_id if gff_idregex.nil?
38
+ return protein_id.match(/#{gff_idregex}/)[1]
39
+ end
40
+
41
+ def protein_id_to_genomeid(protein_id,genome_idregex)
42
+ return protein_id if genome_idregex.nil?
43
+ return protein_id.match(/#{genome_idregex}/)[1]
44
+ end
45
+
46
+ def protein_id_to_protdbid(protein_id)
47
+ # return protein_id.sub(/^lcl\|/,"")
48
+ return protein_id
42
49
  end
43
50
 
44
51
  def prepare_fasta(database_path,type)
@@ -50,134 +57,106 @@ def prepare_fasta(database_path,type)
50
57
  db_filename=Constants.new.current_database_for_name(database_path)
51
58
  end
52
59
 
53
- db_indexfilename = "#{db_filename}.pin"
60
+
61
+ db_indexfilename = type=='prot' ? "#{db_filename}.pin" : "#{db_filename}.nhr"
54
62
 
55
63
  if File.exist?(db_indexfilename)
56
- puts "Using existing indexed database"
57
64
  orf_lookup = FastaDB.new(db_filename)
58
65
  else
59
- puts "Indexing database"
60
66
  orf_lookup = FastaDB.create(db_filename,db_filename,type)
61
67
  end
62
68
  orf_lookup
63
69
  end
64
70
 
65
- proteins = parse_proteins(input_proxml)
66
- fastadb = prepare_fasta(tool.database,'prot')
67
- genomedb = nil
68
- if tool.genome
69
- genomedb = prepare_fasta(tool.genome,'nucl')
70
- end
71
-
72
- puts "Aligning peptides and writing GFF data..."
73
-
74
- low_prob = 0
75
- skipped = 0
76
- peptide_count = 0
77
- protein_count = 0
78
- total_peptides = 0
79
-
80
- peptides_covered_genome={}
81
-
82
- for prot in proteins
83
- prot_prob = prot['probability']
84
- if ( prot_prob.to_f < tool.protein_probability_threshold )
85
- next
86
- end
87
-
88
- # Gets identifiers of all proteins (includeing indistinguishable ones)
89
- prot_names=tool.protein_names(prot)
90
71
 
91
72
 
92
- if tool.protein_find!=nil
93
- prot_names=prot_names.keep_if { |pname| pname.include? tool.protein_find }
94
- end
73
+ tool=Tool.new([:explicit_output,:debug])
74
+ tool.option_parser.banner = "Map proteins and peptides to genomic coordinates.\n\nUsage: protxml_to_gff.rb [options] proteins.<protXML>"
95
75
 
76
+ tool.add_value_option(:database,nil,['-d filename','--database filename','Database used for ms/ms searches (Fasta Format)'])
77
+ # tool.add_value_option(:genome,nil,['-g filename','--genome filename', 'Nucleotide sequences for scaffolds (Fasta Format)'])
78
+ tool.add_value_option(:coords_file,nil,['-c filename','--coords-file filename.gff3', 'A file containing genomic coordinates for predicted proteins and/or 6-frame translations'])
79
+ tool.add_boolean_option(:stack_charge_states,false,['--stack-charge-states','Different peptide charge states get separate gff entries'])
80
+ tool.add_value_option(:peptide_probability_threshold,0.95,['--threshold prob','Peptide Probability Threshold (Default 0.95)'])
81
+ tool.add_value_option(:protein_probability_threshold,0.99,['--prot-threshold prob','Protein Probability Threshold (Default 0.99)'])
82
+ tool.add_value_option(:gff_idregex,nil,['--gff-idregex pre','Regex with capture group for parsing gff ids from protein ids'])
83
+ tool.add_value_option(:genome_idregex,nil,['--genome-idregex pre','Regex with capture group for parsing genomic ids from protein ids'])
96
84
 
97
- peptides=tool.peptide_nodes(prot)
98
- entries_covered=[]
99
- for protein_name in prot_names
100
- protein_count += 1
101
- prot_id = "pr#{protein_count.to_s}"
102
- begin
85
+ exit unless tool.check_options(true,[:database,:coords_file])
103
86
 
104
- protein_fasta_entry = tool.get_fasta_record(protein_name,fastadb)
105
- protein_info = tool.cds_info_from_fasta(protein_fasta_entry)
87
+ $protk = Constants.new
88
+ log_level = tool.debug ? "info" : "warn"
89
+ $protk.info_level= log_level
106
90
 
107
- unless (tool.collapse_redundant_proteins && !tool.is_new_genome_location(protein_info,entries_covered) )
108
91
 
109
- protein_gff = tool.generate_protein_gff(protein_name,protein_info,prot_prob,protein_count)
92
+ input_file=ARGV[0]
110
93
 
111
- gff_db.records += ["##gff-version 3\n","##sequence-region #{protein_info.scaffold} 1 160\n",protein_gff]
94
+ if tool.explicit_output
95
+ output_fh=File.new("#{tool.explicit_output}",'w')
96
+ else
97
+ output_fh=$stdout
98
+ end
112
99
 
113
- prot_seq = protein_fasta_entry.aaseq.to_s
114
- throw "Not amino_acids" if prot_seq != protein_fasta_entry.seq.to_s
100
+ should_ = tool.debug || (output_fh!=$stdout)
115
101
 
116
- peptides_covered_protein=[]
117
- peptide_count=1
118
- for peptide in peptides
102
+ input_protxml=ARGV[0]
119
103
 
120
- pprob = peptide['nsp_adjusted_probability'].to_f
121
- # puts peptide
122
- # puts pprob
123
- pep_seq = peptide['peptide_sequence']
104
+ gffdb = GFFDB.create(tool.coords_file) if tool.coords_file
124
105
 
125
- if ( pprob >= tool.peptide_probability_threshold && (!peptides_covered_protein.include?(pep_seq) || tool.stack_charge_states))
106
+ # genome_db = prepare_fasta(tool.genome,'nucl')
107
+ prot_db = prepare_fasta(tool.database,'prot')
126
108
 
127
- dna_sequence=nil
128
- if !protein_info.is_sixframe
129
- throw "A genome is required if predicted transcripts are to be mapped" unless genomedb!=nil
130
- dna_sequence = tool.get_dna_sequence(protein_info,genomedb)
131
- end
109
+ proteins = parse_proteins(input_protxml)
132
110
 
111
+ num_missing_gff_entries = 0
133
112
 
134
- peptide_gff = tool.generate_gff_for_peptide_mapped_to_protein(prot_seq,pep_seq,protein_info,prot_id,pprob,peptide_count,dna_sequence,genomedb)
113
+ proteins.each do |protein|
135
114
 
136
- unless (peptide_gff.length==0 || tool.peptide_gff_is_duplicate(peptide_gff[0],peptides_covered_genome))
115
+ begin
116
+ # Get the full protein sequence
117
+ #
118
+ parsed_name_for_protdb = protein_id_to_protdbid(protein.protein_name)
119
+ protein_entry = prot_db.get_by_id parsed_name_for_protdb
120
+ raise ProteinNotInDBError if ( protein_entry == nil)
137
121
 
138
- tool.add_putative_nterm_to_gff(peptide_gff,pep_seq,prot_seq,protein_info,prot_id,peptide_count,dna_sequence,genomedb)
122
+ protein.sequence = protein_entry.aaseq
139
123
 
140
- gff_db.records += peptide_gff
124
+ # Get the CDS and parent entries from the gff file
125
+ #
126
+ parsed_name_for_gffid = protein_id_to_gffid(protein.protein_name,tool.gff_idregex)
127
+ gff_parent_entries = gffdb.get_by_id(parsed_name_for_gffid)
128
+ raise NoGFFEntryFoundError if gff_parent_entries.nil? || gff_parent_entries.length==0
129
+ raise MultipleGFFEntriesForProteinError if gff_parent_entries.length > 1
141
130
 
142
- peptides_covered_protein << pep_seq unless tool.stack_charge_states
143
- peptides_covered_genome[pep_seq] = peptide_gff[0].start
131
+ gff_parent_entry = gff_parent_entries.first
132
+ gff_cds_entries = gffdb.get_cds_by_parent_id(parsed_name_for_gffid)
144
133
 
145
- total_peptides += 1
146
- peptide_count+=1
147
- else
148
- puts "Duplicate peptide #{peptide_gff[0]}"
149
- end
150
- # puts gff_db.records.last
151
- end
152
- end
153
- else
154
- puts "Skipping redundant entry #{protein_name}"
155
- protein_count-=1 # To counter +1 prior to begin rescue end block
156
- end
134
+ # Account for sixframe case. Parent is CDS and there are no children
135
+ #
136
+ gff_cds_entries=[gff_parent_entry] if gff_cds_entries.nil? && gff_parent_entry.feature=="CDS"
157
137
 
158
- entries_covered<<protein_info
138
+ peptides = tool.stack_charge_states ? protein.peptides : protein.representative_peptides
159
139
 
160
- # puts protein_gff
161
- # puts gff_db.records
162
- rescue KeyError,EncodingError
163
- skipped+=0
164
- end
140
+ peptides.each do |peptide|
141
+ peptide_entries = peptide.to_gff3_records(protein_entry.aaseq,gff_parent_entry,gff_cds_entries)
142
+ peptide_entries.each do |peptide_entry|
143
+ output_fh.write peptide_entry.to_s
144
+ end
145
+ end
165
146
 
166
- # exit
167
- end
147
+ rescue NoGFFEntryFoundError
148
+ $protk.log "No gff entry for #{parsed_name_for_gffid}", :info
149
+ num_missing_gff_entries+=1
150
+ rescue ProteinNotInDBError
151
+ $protk.log "No entry for #{parsed_name_for_protdb}", :info
152
+ rescue MultipleGFFEntriesForProteinError
153
+ $protk.log "Multiple entries in gff file for #{parsed_name_for_gffid}", :info
154
+ rescue PeptideNotInProteinError
155
+ $protk.log "A peptide was not found in its parent protein #{protein.protein_name}" , :warn
156
+ end
157
+ end
168
158
 
159
+ if num_missing_gff_entries>0
160
+ $protk.log "Failed to lookup gff entries. Try setting --gff-idregex" if tool.gff_idregex.nil?
169
161
  end
170
162
 
171
- f = open(gff_out_file,'w+')
172
- gff_db.records.each { |rec|
173
- f.write(rec.to_s)
174
- }
175
- f.close
176
-
177
- p "Finished."
178
- p "Proteins: #{protein_count}"
179
- p "Skipped Decoys: #{skipped}"
180
- p "Total Peptides: #{total_peptides}"
181
- p "Peptides Written: #{total_peptides - low_prob}"
182
- p "Peptides Culled: #{low_prob}"
183
- exit(0)
@@ -242,7 +242,7 @@ def insert_psms_from_file(filepath)
242
242
 
243
243
  spectrum_queries.each do |query|
244
244
 
245
- spectrum_name = query.attributes['spectrum'].chomp.gsub("0","").sub(/\.\d+$/,"")
245
+ spectrum_name = query.attributes['spectrum'].chomp.gsub(/\.0+/,"\.").sub(/\.\d+$/,"")
246
246
 
247
247
  start_scan=query.attributes['start_scan'].to_i
248
248
  end_scan=query.attributes['end_scan'].to_i
@@ -318,7 +318,8 @@ def lookup_spectra_from_files(file_list,matched_spectra)
318
318
  SQL
319
319
 
320
320
  else
321
-
321
+ # require 'debugger';debugger
322
+ # puts "Unmatched spectrum #{spec[:title]}"
322
323
  end
323
324
  spec = mzml_parser.next_spectrum
324
325
  end
data/bin/sixframe.rb CHANGED
@@ -29,6 +29,7 @@ tool.option_parser.banner = "Create a sixframe translation of a genome.\n\nUsage
29
29
  tool.add_boolean_option(:print_coords,false,['--coords', 'Write genomic coordinates in the fasta header'])
30
30
  tool.add_boolean_option(:keep_header,true,['--strip-header', 'Dont write sequence definition'])
31
31
  tool.add_value_option(:min_len,20,['--min-len','Minimum ORF length to keep'])
32
+ tool.add_boolean_option(:write_gff,false,['--gff3','Output gff3 instead of fasta'])
32
33
 
33
34
  exit unless tool.check_options(true)
34
35
 
@@ -38,6 +39,9 @@ output_file = tool.explicit_output!=nil ? tool.explicit_output : nil
38
39
 
39
40
  output_fh = output_file!=nil ? File.new("#{output_file}",'w') : $stdout
40
41
 
42
+ if tool.write_gff
43
+ output_fh.write "##gff-version 3\n"
44
+ end
41
45
 
42
46
  file = Bio::FastaFormat.open(input_file)
43
47
 
@@ -66,13 +70,11 @@ file.each do |entry|
66
70
  position_end=forward_position_end
67
71
  end
68
72
 
69
-
70
-
71
-
72
73
  # Create accession compliant with NCBI naming standard
73
74
  # See http://www.ncbi.nlm.nih.gov/books/NBK7183/?rendertype=table&id=ch_demo.T5
74
75
  ncbi_scaffold_id = entry.entry_id.gsub('|','_').gsub(' ','_')
75
76
  ncbi_accession = "lcl|#{ncbi_scaffold_id}_frame_#{frame}_orf_#{oi}"
77
+ gff_id = "#{ncbi_scaffold_id}_frame_#{frame}_orf_#{oi}"
76
78
 
77
79
  defline=">#{ncbi_accession}"
78
80
 
@@ -84,11 +86,16 @@ file.each do |entry|
84
86
  defline << " #{entry.definition}"
85
87
  end
86
88
 
87
- # Output in fasta format
88
- # start and end positions are always relative to the forward strand
89
-
90
- output_fh.write("#{defline}\n#{orf}\n")
91
-
89
+ if tool.write_gff
90
+ strand = frame>3 ? "-" : "+"
91
+ # score = self.nsp_adjusted_probability.nil? ? "." : self.nsp_adjusted_probability.to_s
92
+ # gff_string = "#{parent_record.seqid}\tMSMS\tpolypeptide\t#{start_i}\t#{end_i}\t#{score}\t#{parent_record.strand}\t0\tID=#{this_id};Parent=#{cds_id}"
93
+ output_fh.write("#{ncbi_scaffold_id}\tsixframe\tCDS\t#{position_start}\t#{position_end}\t.\t#{strand}\t0\tID=#{gff_id}\n")
94
+ else
95
+ # Output in fasta format
96
+ # start and end positions are always relative to the forward strand
97
+ output_fh.write("#{defline}\n#{orf}\n")
98
+ end
92
99
  end
93
100
  position += orf.length*3+3
94
101
  end
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 18/1/2011
5
+ #
6
+ # Convert a pepXML file to a tab delimited table
7
+ #
8
+ #
9
+ require 'protk/tool'
10
+ require 'protk/swissprot_database'
11
+ require 'protk/bio_sptr_extensions'
12
+ require 'protk/fastadb'
13
+
14
+
15
+
16
+ columns={'recname'=>"Primary Name",'cd'=>"CD Antigen Name",'altnames'=>"Alternate Names",
17
+ 'location' => "Subcellular Location",
18
+ 'function' => "Known Function",
19
+ 'similarity' => "Similarity",
20
+ 'tissues' => "Tissue Specificity",
21
+ 'disease' => "Disease Association",
22
+ 'domain' => "Domain",
23
+ 'subunit' => "Sub Unit",
24
+ 'nextbio' => "NextBio",
25
+ 'ipi' => "IPI",
26
+ 'intact' => "Interactions",
27
+ 'pride' => 'Pride',
28
+ 'ensembl'=> 'Ensembl',
29
+ 'num_transmem'=>"Transmembrane Regions",
30
+ 'signalp'=>'Signal Peptide',
31
+ 'go_terms'=>"GO Terms",
32
+ 'go_entries'=>"GO Entries",
33
+ 'accessions'=>"Uniprot Accessions",
34
+ 'ncbi_taxon_id'=>"NCBI Taxon ID"
35
+ }
36
+
37
+
38
+
39
+
40
+
41
+ # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
42
+ #
43
+ tool=Tool.new([:explicit_output,:debug])
44
+ tool.option_parser.banner = "Query a swissprot flat file and output to tab delimited table.\n\nUsage: swissprot_to_table.rb [options] -d flatfile.dat queries.txt"
45
+
46
+ tool.add_value_option(:database,nil,['-d','--database file','Uniprot flatfile database containing full records for proteins'])
47
+ tool.add_value_option(:output_keys,nil,['-K','--keys keys','Filter output to only the specified keys (comma separated)'])
48
+ tool.add_boolean_option(:show_keys,false,['--show-keys','Print a list of possible values for the keys field and exit'])
49
+ tool.add_value_option(:separator,"\t",['-S','--separator sep','Separator character for output, default (tab)'])
50
+ tool.add_value_option(:array_separator,",",['-A','--array-separator sep','Array Separator character, default ,'])
51
+ tool.add_value_option(:query_separator,"\t",['--query-separator sep','Separator character for queries.txt, default is tab'])
52
+ tool.add_value_option(:id_column,1,['--id-column num','Column in queries.txt in which Uniprot Accessions are found'])
53
+
54
+
55
+ if ARGV.include? "--show-keys"
56
+ columns.each_pair { |name, val| $stdout.write "#{name} (#{val})\n" }
57
+ exit
58
+ end
59
+
60
+
61
+ exit unless tool.check_options(true,[:database])
62
+
63
+
64
+ $protk = Constants.new
65
+ log_level = tool.debug ? :debug : :fatal
66
+ $protk.info_level= log_level
67
+
68
+
69
+ if tool.explicit_output
70
+ output_fh=File.new("#{tool.explicit_output}",'w')
71
+ else
72
+ output_fh=$stdout
73
+ end
74
+
75
+
76
+ if tool.output_keys
77
+ output_keys=tool.output_keys.split(",").collect { |k| k.strip }
78
+ columns.delete_if { |key, value| !output_keys.include? key }
79
+ end
80
+
81
+
82
+ db_info=tool.database_info
83
+ database_path=db_info.path
84
+
85
+ database_index_path = "#{Pathname.new(database_path).dirname}/config.dat"
86
+
87
+ skip_index = File.exists?(database_index_path) ? true : false
88
+
89
+
90
+ swissprotdb=SwissprotDatabase.new(database_path,skip_index)
91
+
92
+
93
+ def write_entry(item_name,item,columns,tool,output_fh)
94
+ row=[item_name]
95
+ row << columns.keys.collect do |name|
96
+ colvalue = item.send(name)
97
+ colvalue = "" unless colvalue
98
+ colvalue = colvalue.join(tool.array_separator) if colvalue.class==Array
99
+ colvalue
100
+ end
101
+ output_fh.write "#{row.join(tool.separator)}\n"
102
+ end
103
+
104
+ File.open(ARGV[0]).each_line do |line|
105
+
106
+ begin
107
+ query_id = line.chomp.split(tool.query_separator)[tool.id_column.to_i-1]
108
+ rescue
109
+ query_id = line.chomp
110
+ end
111
+
112
+ begin
113
+ item = swissprotdb.get_entry_for_name(query_id)
114
+ write_entry(query_id,item,columns,tool,output_fh)
115
+ rescue
116
+ $protk.log "Unable to retrieve entry for #{query_id}" , :debug
117
+ end
118
+
119
+ end
120
+