protk 1.3.0 → 1.3.1.pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5e8f8a571cb43ed61984a34b6e1fb51caf979593
4
- data.tar.gz: b53857f75c1ff6ca850859c3985aee36533e437f
3
+ metadata.gz: 888f8ebff75c2c33497c9bf4f7aeec182311b7e3
4
+ data.tar.gz: 4102a91afbee688babe093df8a53b84b097ba0c3
5
5
  SHA512:
6
- metadata.gz: 9450fccc4a5ce59f064927d62fbc6a4342a1710c3b82707e0908dea52af7d0b50f215e64073bb067a506d204701acea11b6d28f302447494b8a30b1e7af2df2d
7
- data.tar.gz: 1b8bc78fc09b4c81eee72fad169a6aee7145a16312c01bc95a5dd590f08cb98194b26115a166a759b9c52c7c67204a767747642e5e9331de4562d52f31eb1e11
6
+ metadata.gz: 3e67189a07c6ac237a4def19ad90043ab8919d5492fd43b67cfa5fc3285819b2fd62671375283c5d9dd05618c746603ba829c70225b140b3a52ccba9fafb24f8
7
+ data.tar.gz: 354a9eb2499d3f8b194ccdef82f06692672435a0e47b5b49197b9f1fba2c27181275d38c98fe6f644750b554b4b1601b873f6a51a0318853b86b17d7783f2e57
data/bin/make_decoy.rb CHANGED
@@ -49,10 +49,9 @@ if (tool.reverse_only)
49
49
  Bio::FastaFormat.open(input_file).each do |seq|
50
50
  id=nil
51
51
  begin
52
- # require 'debugger';debugger
53
52
  id=seq.definition.chomp.scan(/#{tool.id_regex}/)[0][0]
54
53
  revdef=seq.definition.sub(id,"#{tool.prefix_string}#{id}")
55
- decoys_out.write ">#{revdef}\n#{seq.aaseq}\n"
54
+ decoys_out.write ">#{revdef}\n#{seq.aaseq.reverse}\n"
56
55
  rescue
57
56
  puts "Unable to parse id for #{seq.definition}. Skipping" if (id==nil)
58
57
  end
data/bin/mascot_search.rb CHANGED
@@ -103,6 +103,7 @@ def search_params_dictionary(search_tool,input_file)
103
103
  postdict[:FILE]=File.new(input_file)
104
104
  postdict[:FORMVER]='1.01'
105
105
  postdict[:INTERMEDIATE]=''
106
+ postdict[:QUANTITATION]=search_tool.quantitation
106
107
 
107
108
  postdict
108
109
  end
@@ -134,6 +135,7 @@ search_tool.options.output_suffix="_mascot"
134
135
 
135
136
  search_tool.add_value_option(:mascot_server,"#{$genv.default_mascot_server}/mascot/cgi",['-S', '--server url', 'The url to the cgi directory of the mascot server'])
136
137
  search_tool.add_value_option(:allowed_charges,"1+,2+,3+",['--allowed-charges ac', 'Allowed precursor ion charges.'])
138
+ search_tool.add_value_option(:quantitation,"",['--quantitation method','Mascot quant method'])
137
139
  search_tool.add_value_option(:email,"",['--email em', 'User email.'])
138
140
  search_tool.add_value_option(:username,"",['--username un', 'Username.'])
139
141
  search_tool.add_value_option(:httpproxy,nil,['--proxy url', 'The url to a proxy server'])
@@ -170,7 +170,7 @@ ARGV.each do |filename|
170
170
 
171
171
  # Num Threads
172
172
  #
173
- cmd << " -thread #{search_tool.threads}" if search_tool.threads > 0
173
+ cmd << " -thread #{search_tool.threads}" if search_tool.threads.to_i > 0
174
174
 
175
175
  mods_file_content = ""
176
176
 
@@ -1,44 +1,51 @@
1
1
  #!/usr/bin/env ruby
2
2
  #
3
3
  # This file is part of protk
4
- # Original python version created by Max Grant
5
- # Translated to ruby by Ira Cooke 29/1/2013
4
+ # Created by Ira Cooke 3/8/2014
6
5
  #
7
6
  #
8
7
 
9
8
  require 'protk/constants'
10
- require 'protk/protxml_to_gff_tool'
11
9
  require 'protk/fastadb'
10
+ require 'protk/gffdb'
11
+ require 'protk/protein'
12
+ require 'protk/peptide'
13
+ require 'protk/tool'
12
14
  require 'libxml'
13
15
  require 'bio'
14
16
 
15
17
  include LibXML
16
18
 
17
- tool=ProtXMLToGFFTool.new()
18
19
 
19
- @output_extension=".gff"
20
- @output_suffix=""
21
-
22
- exit unless tool.check_options(true,[:database])
23
-
24
- input_proxml=ARGV[0]
25
-
26
- if ( tool.explicit_output!=nil)
27
- gff_out_file=tool.explicit_output
28
- else
29
- gff_out_file=Tool.default_output_path(input_proxml,@output_extension,tool.output_prefix,@output_suffix)
20
+ class NoGFFEntryFoundError < StandardError
30
21
  end
31
22
 
32
- gff_db = Bio::GFF.new()
33
- f = open(gff_out_file,'w+')
23
+ class ProteinNotInDBError < StandardError
24
+ end
34
25
 
26
+ class MultipleGFFEntriesForProteinError < StandardError
27
+ end
35
28
 
36
29
  def parse_proteins(protxml_file)
37
- puts "Parsing proteins from protxml"
38
30
  protxml_parser=XML::Parser.file(protxml_file)
39
31
  protxml_doc=protxml_parser.parse
40
32
  proteins = protxml_doc.find('.//protxml:protein','protxml:http://regis-web.systemsbiology.net/protXML')
41
- proteins
33
+ proteins.collect { |node| Protein.from_protxml(node) }
34
+ end
35
+
36
+ def protein_id_to_gffid(protein_id,gff_idregex)
37
+ return protein_id if gff_idregex.nil?
38
+ return protein_id.match(/#{gff_idregex}/)[1]
39
+ end
40
+
41
+ def protein_id_to_genomeid(protein_id,genome_idregex)
42
+ return protein_id if genome_idregex.nil?
43
+ return protein_id.match(/#{genome_idregex}/)[1]
44
+ end
45
+
46
+ def protein_id_to_protdbid(protein_id)
47
+ # return protein_id.sub(/^lcl\|/,"")
48
+ return protein_id
42
49
  end
43
50
 
44
51
  def prepare_fasta(database_path,type)
@@ -50,134 +57,106 @@ def prepare_fasta(database_path,type)
50
57
  db_filename=Constants.new.current_database_for_name(database_path)
51
58
  end
52
59
 
53
- db_indexfilename = "#{db_filename}.pin"
60
+
61
+ db_indexfilename = type=='prot' ? "#{db_filename}.pin" : "#{db_filename}.nhr"
54
62
 
55
63
  if File.exist?(db_indexfilename)
56
- puts "Using existing indexed database"
57
64
  orf_lookup = FastaDB.new(db_filename)
58
65
  else
59
- puts "Indexing database"
60
66
  orf_lookup = FastaDB.create(db_filename,db_filename,type)
61
67
  end
62
68
  orf_lookup
63
69
  end
64
70
 
65
- proteins = parse_proteins(input_proxml)
66
- fastadb = prepare_fasta(tool.database,'prot')
67
- genomedb = nil
68
- if tool.genome
69
- genomedb = prepare_fasta(tool.genome,'nucl')
70
- end
71
-
72
- puts "Aligning peptides and writing GFF data..."
73
-
74
- low_prob = 0
75
- skipped = 0
76
- peptide_count = 0
77
- protein_count = 0
78
- total_peptides = 0
79
-
80
- peptides_covered_genome={}
81
-
82
- for prot in proteins
83
- prot_prob = prot['probability']
84
- if ( prot_prob.to_f < tool.protein_probability_threshold )
85
- next
86
- end
87
-
88
- # Gets identifiers of all proteins (includeing indistinguishable ones)
89
- prot_names=tool.protein_names(prot)
90
71
 
91
72
 
92
- if tool.protein_find!=nil
93
- prot_names=prot_names.keep_if { |pname| pname.include? tool.protein_find }
94
- end
73
+ tool=Tool.new([:explicit_output,:debug])
74
+ tool.option_parser.banner = "Map proteins and peptides to genomic coordinates.\n\nUsage: protxml_to_gff.rb [options] proteins.<protXML>"
95
75
 
76
+ tool.add_value_option(:database,nil,['-d filename','--database filename','Database used for ms/ms searches (Fasta Format)'])
77
+ # tool.add_value_option(:genome,nil,['-g filename','--genome filename', 'Nucleotide sequences for scaffolds (Fasta Format)'])
78
+ tool.add_value_option(:coords_file,nil,['-c filename','--coords-file filename.gff3', 'A file containing genomic coordinates for predicted proteins and/or 6-frame translations'])
79
+ tool.add_boolean_option(:stack_charge_states,false,['--stack-charge-states','Different peptide charge states get separate gff entries'])
80
+ tool.add_value_option(:peptide_probability_threshold,0.95,['--threshold prob','Peptide Probability Threshold (Default 0.95)'])
81
+ tool.add_value_option(:protein_probability_threshold,0.99,['--prot-threshold prob','Protein Probability Threshold (Default 0.99)'])
82
+ tool.add_value_option(:gff_idregex,nil,['--gff-idregex pre','Regex with capture group for parsing gff ids from protein ids'])
83
+ tool.add_value_option(:genome_idregex,nil,['--genome-idregex pre','Regex with capture group for parsing genomic ids from protein ids'])
96
84
 
97
- peptides=tool.peptide_nodes(prot)
98
- entries_covered=[]
99
- for protein_name in prot_names
100
- protein_count += 1
101
- prot_id = "pr#{protein_count.to_s}"
102
- begin
85
+ exit unless tool.check_options(true,[:database,:coords_file])
103
86
 
104
- protein_fasta_entry = tool.get_fasta_record(protein_name,fastadb)
105
- protein_info = tool.cds_info_from_fasta(protein_fasta_entry)
87
+ $protk = Constants.new
88
+ log_level = tool.debug ? "info" : "warn"
89
+ $protk.info_level= log_level
106
90
 
107
- unless (tool.collapse_redundant_proteins && !tool.is_new_genome_location(protein_info,entries_covered) )
108
91
 
109
- protein_gff = tool.generate_protein_gff(protein_name,protein_info,prot_prob,protein_count)
92
+ input_file=ARGV[0]
110
93
 
111
- gff_db.records += ["##gff-version 3\n","##sequence-region #{protein_info.scaffold} 1 160\n",protein_gff]
94
+ if tool.explicit_output
95
+ output_fh=File.new("#{tool.explicit_output}",'w')
96
+ else
97
+ output_fh=$stdout
98
+ end
112
99
 
113
- prot_seq = protein_fasta_entry.aaseq.to_s
114
- throw "Not amino_acids" if prot_seq != protein_fasta_entry.seq.to_s
100
+ should_ = tool.debug || (output_fh!=$stdout)
115
101
 
116
- peptides_covered_protein=[]
117
- peptide_count=1
118
- for peptide in peptides
102
+ input_protxml=ARGV[0]
119
103
 
120
- pprob = peptide['nsp_adjusted_probability'].to_f
121
- # puts peptide
122
- # puts pprob
123
- pep_seq = peptide['peptide_sequence']
104
+ gffdb = GFFDB.create(tool.coords_file) if tool.coords_file
124
105
 
125
- if ( pprob >= tool.peptide_probability_threshold && (!peptides_covered_protein.include?(pep_seq) || tool.stack_charge_states))
106
+ # genome_db = prepare_fasta(tool.genome,'nucl')
107
+ prot_db = prepare_fasta(tool.database,'prot')
126
108
 
127
- dna_sequence=nil
128
- if !protein_info.is_sixframe
129
- throw "A genome is required if predicted transcripts are to be mapped" unless genomedb!=nil
130
- dna_sequence = tool.get_dna_sequence(protein_info,genomedb)
131
- end
109
+ proteins = parse_proteins(input_protxml)
132
110
 
111
+ num_missing_gff_entries = 0
133
112
 
134
- peptide_gff = tool.generate_gff_for_peptide_mapped_to_protein(prot_seq,pep_seq,protein_info,prot_id,pprob,peptide_count,dna_sequence,genomedb)
113
+ proteins.each do |protein|
135
114
 
136
- unless (peptide_gff.length==0 || tool.peptide_gff_is_duplicate(peptide_gff[0],peptides_covered_genome))
115
+ begin
116
+ # Get the full protein sequence
117
+ #
118
+ parsed_name_for_protdb = protein_id_to_protdbid(protein.protein_name)
119
+ protein_entry = prot_db.get_by_id parsed_name_for_protdb
120
+ raise ProteinNotInDBError if ( protein_entry == nil)
137
121
 
138
- tool.add_putative_nterm_to_gff(peptide_gff,pep_seq,prot_seq,protein_info,prot_id,peptide_count,dna_sequence,genomedb)
122
+ protein.sequence = protein_entry.aaseq
139
123
 
140
- gff_db.records += peptide_gff
124
+ # Get the CDS and parent entries from the gff file
125
+ #
126
+ parsed_name_for_gffid = protein_id_to_gffid(protein.protein_name,tool.gff_idregex)
127
+ gff_parent_entries = gffdb.get_by_id(parsed_name_for_gffid)
128
+ raise NoGFFEntryFoundError if gff_parent_entries.nil? || gff_parent_entries.length==0
129
+ raise MultipleGFFEntriesForProteinError if gff_parent_entries.length > 1
141
130
 
142
- peptides_covered_protein << pep_seq unless tool.stack_charge_states
143
- peptides_covered_genome[pep_seq] = peptide_gff[0].start
131
+ gff_parent_entry = gff_parent_entries.first
132
+ gff_cds_entries = gffdb.get_cds_by_parent_id(parsed_name_for_gffid)
144
133
 
145
- total_peptides += 1
146
- peptide_count+=1
147
- else
148
- puts "Duplicate peptide #{peptide_gff[0]}"
149
- end
150
- # puts gff_db.records.last
151
- end
152
- end
153
- else
154
- puts "Skipping redundant entry #{protein_name}"
155
- protein_count-=1 # To counter +1 prior to begin rescue end block
156
- end
134
+ # Account for sixframe case. Parent is CDS and there are no children
135
+ #
136
+ gff_cds_entries=[gff_parent_entry] if gff_cds_entries.nil? && gff_parent_entry.feature=="CDS"
157
137
 
158
- entries_covered<<protein_info
138
+ peptides = tool.stack_charge_states ? protein.peptides : protein.representative_peptides
159
139
 
160
- # puts protein_gff
161
- # puts gff_db.records
162
- rescue KeyError,EncodingError
163
- skipped+=0
164
- end
140
+ peptides.each do |peptide|
141
+ peptide_entries = peptide.to_gff3_records(protein_entry.aaseq,gff_parent_entry,gff_cds_entries)
142
+ peptide_entries.each do |peptide_entry|
143
+ output_fh.write peptide_entry.to_s
144
+ end
145
+ end
165
146
 
166
- # exit
167
- end
147
+ rescue NoGFFEntryFoundError
148
+ $protk.log "No gff entry for #{parsed_name_for_gffid}", :info
149
+ num_missing_gff_entries+=1
150
+ rescue ProteinNotInDBError
151
+ $protk.log "No entry for #{parsed_name_for_protdb}", :info
152
+ rescue MultipleGFFEntriesForProteinError
153
+ $protk.log "Multiple entries in gff file for #{parsed_name_for_gffid}", :info
154
+ rescue PeptideNotInProteinError
155
+ $protk.log "A peptide was not found in its parent protein #{protein.protein_name}" , :warn
156
+ end
157
+ end
168
158
 
159
+ if num_missing_gff_entries>0
160
+ $protk.log "Failed to lookup gff entries. Try setting --gff-idregex" if tool.gff_idregex.nil?
169
161
  end
170
162
 
171
- f = open(gff_out_file,'w+')
172
- gff_db.records.each { |rec|
173
- f.write(rec.to_s)
174
- }
175
- f.close
176
-
177
- p "Finished."
178
- p "Proteins: #{protein_count}"
179
- p "Skipped Decoys: #{skipped}"
180
- p "Total Peptides: #{total_peptides}"
181
- p "Peptides Written: #{total_peptides - low_prob}"
182
- p "Peptides Culled: #{low_prob}"
183
- exit(0)
@@ -242,7 +242,7 @@ def insert_psms_from_file(filepath)
242
242
 
243
243
  spectrum_queries.each do |query|
244
244
 
245
- spectrum_name = query.attributes['spectrum'].chomp.gsub("0","").sub(/\.\d+$/,"")
245
+ spectrum_name = query.attributes['spectrum'].chomp.gsub(/\.0+/,"\.").sub(/\.\d+$/,"")
246
246
 
247
247
  start_scan=query.attributes['start_scan'].to_i
248
248
  end_scan=query.attributes['end_scan'].to_i
@@ -318,7 +318,8 @@ def lookup_spectra_from_files(file_list,matched_spectra)
318
318
  SQL
319
319
 
320
320
  else
321
-
321
+ # require 'debugger';debugger
322
+ # puts "Unmatched spectrum #{spec[:title]}"
322
323
  end
323
324
  spec = mzml_parser.next_spectrum
324
325
  end
data/bin/sixframe.rb CHANGED
@@ -29,6 +29,7 @@ tool.option_parser.banner = "Create a sixframe translation of a genome.\n\nUsage
29
29
  tool.add_boolean_option(:print_coords,false,['--coords', 'Write genomic coordinates in the fasta header'])
30
30
  tool.add_boolean_option(:keep_header,true,['--strip-header', 'Dont write sequence definition'])
31
31
  tool.add_value_option(:min_len,20,['--min-len','Minimum ORF length to keep'])
32
+ tool.add_boolean_option(:write_gff,false,['--gff3','Output gff3 instead of fasta'])
32
33
 
33
34
  exit unless tool.check_options(true)
34
35
 
@@ -38,6 +39,9 @@ output_file = tool.explicit_output!=nil ? tool.explicit_output : nil
38
39
 
39
40
  output_fh = output_file!=nil ? File.new("#{output_file}",'w') : $stdout
40
41
 
42
+ if tool.write_gff
43
+ output_fh.write "##gff-version 3\n"
44
+ end
41
45
 
42
46
  file = Bio::FastaFormat.open(input_file)
43
47
 
@@ -66,13 +70,11 @@ file.each do |entry|
66
70
  position_end=forward_position_end
67
71
  end
68
72
 
69
-
70
-
71
-
72
73
  # Create accession compliant with NCBI naming standard
73
74
  # See http://www.ncbi.nlm.nih.gov/books/NBK7183/?rendertype=table&id=ch_demo.T5
74
75
  ncbi_scaffold_id = entry.entry_id.gsub('|','_').gsub(' ','_')
75
76
  ncbi_accession = "lcl|#{ncbi_scaffold_id}_frame_#{frame}_orf_#{oi}"
77
+ gff_id = "#{ncbi_scaffold_id}_frame_#{frame}_orf_#{oi}"
76
78
 
77
79
  defline=">#{ncbi_accession}"
78
80
 
@@ -84,11 +86,16 @@ file.each do |entry|
84
86
  defline << " #{entry.definition}"
85
87
  end
86
88
 
87
- # Output in fasta format
88
- # start and end positions are always relative to the forward strand
89
-
90
- output_fh.write("#{defline}\n#{orf}\n")
91
-
89
+ if tool.write_gff
90
+ strand = frame>3 ? "-" : "+"
91
+ # score = self.nsp_adjusted_probability.nil? ? "." : self.nsp_adjusted_probability.to_s
92
+ # gff_string = "#{parent_record.seqid}\tMSMS\tpolypeptide\t#{start_i}\t#{end_i}\t#{score}\t#{parent_record.strand}\t0\tID=#{this_id};Parent=#{cds_id}"
93
+ output_fh.write("#{ncbi_scaffold_id}\tsixframe\tCDS\t#{position_start}\t#{position_end}\t.\t#{strand}\t0\tID=#{gff_id}\n")
94
+ else
95
+ # Output in fasta format
96
+ # start and end positions are always relative to the forward strand
97
+ output_fh.write("#{defline}\n#{orf}\n")
98
+ end
92
99
  end
93
100
  position += orf.length*3+3
94
101
  end
@@ -0,0 +1,120 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file is part of protk
4
+ # Created by Ira Cooke 18/1/2011
5
+ #
6
+ # Convert a pepXML file to a tab delimited table
7
+ #
8
+ #
9
+ require 'protk/tool'
10
+ require 'protk/swissprot_database'
11
+ require 'protk/bio_sptr_extensions'
12
+ require 'protk/fastadb'
13
+
14
+
15
+
16
+ columns={'recname'=>"Primary Name",'cd'=>"CD Antigen Name",'altnames'=>"Alternate Names",
17
+ 'location' => "Subcellular Location",
18
+ 'function' => "Known Function",
19
+ 'similarity' => "Similarity",
20
+ 'tissues' => "Tissue Specificity",
21
+ 'disease' => "Disease Association",
22
+ 'domain' => "Domain",
23
+ 'subunit' => "Sub Unit",
24
+ 'nextbio' => "NextBio",
25
+ 'ipi' => "IPI",
26
+ 'intact' => "Interactions",
27
+ 'pride' => 'Pride',
28
+ 'ensembl'=> 'Ensembl',
29
+ 'num_transmem'=>"Transmembrane Regions",
30
+ 'signalp'=>'Signal Peptide',
31
+ 'go_terms'=>"GO Terms",
32
+ 'go_entries'=>"GO Entries",
33
+ 'accessions'=>"Uniprot Accessions",
34
+ 'ncbi_taxon_id'=>"NCBI Taxon ID"
35
+ }
36
+
37
+
38
+
39
+
40
+
41
+ # Setup specific command-line options for this tool. Other options are inherited from ProphetTool
42
+ #
43
+ tool=Tool.new([:explicit_output,:debug])
44
+ tool.option_parser.banner = "Query a swissprot flat file and output to tab delimited table.\n\nUsage: swissprot_to_table.rb [options] -d flatfile.dat queries.txt"
45
+
46
+ tool.add_value_option(:database,nil,['-d','--database file','Uniprot flatfile database containing full records for proteins'])
47
+ tool.add_value_option(:output_keys,nil,['-K','--keys keys','Filter output to only the specified keys (comma separated)'])
48
+ tool.add_boolean_option(:show_keys,false,['--show-keys','Print a list of possible values for the keys field and exit'])
49
+ tool.add_value_option(:separator,"\t",['-S','--separator sep','Separator character for output, default (tab)'])
50
+ tool.add_value_option(:array_separator,",",['-A','--array-separator sep','Array Separator character, default ,'])
51
+ tool.add_value_option(:query_separator,"\t",['--query-separator sep','Separator character for queries.txt, default is tab'])
52
+ tool.add_value_option(:id_column,1,['--id-column num','Column in queries.txt in which Uniprot Accessions are found'])
53
+
54
+
55
+ if ARGV.include? "--show-keys"
56
+ columns.each_pair { |name, val| $stdout.write "#{name} (#{val})\n" }
57
+ exit
58
+ end
59
+
60
+
61
+ exit unless tool.check_options(true,[:database])
62
+
63
+
64
+ $protk = Constants.new
65
+ log_level = tool.debug ? :debug : :fatal
66
+ $protk.info_level= log_level
67
+
68
+
69
+ if tool.explicit_output
70
+ output_fh=File.new("#{tool.explicit_output}",'w')
71
+ else
72
+ output_fh=$stdout
73
+ end
74
+
75
+
76
+ if tool.output_keys
77
+ output_keys=tool.output_keys.split(",").collect { |k| k.strip }
78
+ columns.delete_if { |key, value| !output_keys.include? key }
79
+ end
80
+
81
+
82
+ db_info=tool.database_info
83
+ database_path=db_info.path
84
+
85
+ database_index_path = "#{Pathname.new(database_path).dirname}/config.dat"
86
+
87
+ skip_index = File.exists?(database_index_path) ? true : false
88
+
89
+
90
+ swissprotdb=SwissprotDatabase.new(database_path,skip_index)
91
+
92
+
93
+ def write_entry(item_name,item,columns,tool,output_fh)
94
+ row=[item_name]
95
+ row << columns.keys.collect do |name|
96
+ colvalue = item.send(name)
97
+ colvalue = "" unless colvalue
98
+ colvalue = colvalue.join(tool.array_separator) if colvalue.class==Array
99
+ colvalue
100
+ end
101
+ output_fh.write "#{row.join(tool.separator)}\n"
102
+ end
103
+
104
+ File.open(ARGV[0]).each_line do |line|
105
+
106
+ begin
107
+ query_id = line.chomp.split(tool.query_separator)[tool.id_column.to_i-1]
108
+ rescue
109
+ query_id = line.chomp
110
+ end
111
+
112
+ begin
113
+ item = swissprotdb.get_entry_for_name(query_id)
114
+ write_entry(query_id,item,columns,tool,output_fh)
115
+ rescue
116
+ $protk.log "Unable to retrieve entry for #{query_id}" , :debug
117
+ end
118
+
119
+ end
120
+