protk 1.2.1 → 1.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/annotate_ids.rb +2 -2
- data/bin/feature_finder.rb +8 -2
- data/bin/file_convert.rb +8 -2
- data/bin/gffmerge.rb +15 -20
- data/bin/interprophet.rb +7 -3
- data/bin/make_decoy.rb +9 -2
- data/bin/mascot2xml.rb +87 -0
- data/bin/mascot_search.rb +126 -187
- data/bin/mascot_to_pepxml.rb +32 -3
- data/bin/msgfplus_search.rb +58 -12
- data/bin/omssa_search.rb +13 -2
- data/bin/peptide_prophet.rb +8 -2
- data/bin/pepxml_to_table.rb +8 -2
- data/bin/protein_prophet.rb +8 -2
- data/bin/protxml_to_table.rb +82 -0
- data/bin/repair_run_summary.rb +7 -1
- data/bin/sixframe.rb +48 -2
- data/bin/tandem_search.rb +11 -2
- data/bin/toppas_pipeline.rb +8 -2
- data/bin/uniprot_annotation.rb +8 -2
- data/bin/uniprot_mapper.rb +8 -2
- data/bin/xls_to_table.rb +8 -2
- data/lib/protk/constants.rb +2 -0
- data/lib/protk/data/pepxml_mascot_template.xml +29 -0
- data/lib/protk/mascot_util.rb +5 -0
- data/lib/protk/prophet_tool.rb +1 -3
- data/lib/protk/search_tool.rb +75 -86
- data/lib/protk/setup_rakefile.rake +12 -5
- data/lib/protk/tool.rb +26 -12
- metadata +23 -9
- data/bin/big_search.rb +0 -41
- data/bin/template_search.rb +0 -144
- data/lib/convert_util.rb +0 -27
- data/lib/pepxml.rb +0 -22
- data/lib/protk/big_search_rakefile.rake +0 -16
- data/lib/protk/big_search_tool.rb +0 -23
data/bin/mascot_to_pepxml.rb
CHANGED
@@ -15,10 +15,36 @@ require 'protk/mascot_util'
|
|
15
15
|
#
|
16
16
|
genv=Constants.new
|
17
17
|
|
18
|
-
tool=SearchTool.new(
|
18
|
+
tool=SearchTool.new([:database,:explicit_output,:over_write,:enzyme])
|
19
19
|
tool.option_parser.banner = "Convert mascot dat files to pep.xml files.\n\nUsage: mascot_to_pepxml.rb [options] file1.dat file2.dat ... "
|
20
|
+
|
21
|
+
tool.options.enzyme="trypsin"
|
22
|
+
|
23
|
+
tool.options.shortid=false
|
24
|
+
tool.option_parser.on( '--shortid', 'Use short protein id as per Mascot result (default uses full protein ids in fasta file)' ) do
|
25
|
+
tool.options.shortid=true
|
26
|
+
end
|
27
|
+
|
20
28
|
tool.option_parser.parse!
|
21
29
|
|
30
|
+
exit unless tool.check_options
|
31
|
+
|
32
|
+
if ( ARGV[0].nil? )
|
33
|
+
puts "You must supply an input file"
|
34
|
+
puts tool.option_parser
|
35
|
+
exit
|
36
|
+
end
|
37
|
+
|
38
|
+
current_db=""
|
39
|
+
|
40
|
+
case
|
41
|
+
when Pathname.new(tool.database).exist? # It's an explicitly named db
|
42
|
+
current_db=Pathname.new(tool.database).realpath.to_s
|
43
|
+
else
|
44
|
+
current_db=tool.current_database :fasta
|
45
|
+
end
|
46
|
+
|
47
|
+
|
22
48
|
|
23
49
|
ARGV.each do |file_name|
|
24
50
|
name=file_name.chomp
|
@@ -28,12 +54,15 @@ ARGV.each do |file_name|
|
|
28
54
|
if ( tool.explicit_output==nil )
|
29
55
|
new_basename="#{this_dir}/#{MascotUtil.input_basename(name)}_mascot2xml"
|
30
56
|
cmd="cp #{name} #{new_basename}.dat"
|
31
|
-
cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{tool.
|
57
|
+
cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{current_db} -E#{tool.enzyme}"
|
32
58
|
|
59
|
+
cmd << " -shortid" if tool.shortid
|
60
|
+
|
33
61
|
else #Mascot2XML doesn't support explicitly named output files so we move the file to an appropriate output filename after finishing
|
34
62
|
new_basename="#{this_dir}/#{MascotUtil.input_basename(name)}_mascot2xml"
|
35
63
|
cmd="cp #{name} #{new_basename}.dat"
|
36
|
-
cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{tool.
|
64
|
+
cmd << "; #{genv.mascot2xml} #{new_basename}.dat -D#{current_db} -E#{tool.enzyme}"
|
65
|
+
cmd << " -shortid" if tool.shortid
|
37
66
|
cmd << "; mv #{new_basename}.pep.xml #{tool.explicit_output}; rm #{new_basename}.dat"
|
38
67
|
repair_script="#{File.dirname(__FILE__)}/repair_run_summary.rb"
|
39
68
|
cmd << "; #{repair_script} #{tool.explicit_output}"
|
data/bin/msgfplus_search.rb
CHANGED
@@ -17,17 +17,32 @@ input_stager = nil
|
|
17
17
|
|
18
18
|
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
19
19
|
#
|
20
|
-
search_tool=SearchTool.new(
|
20
|
+
search_tool=SearchTool.new([:database,:explicit_output,:over_write,:enzyme,
|
21
|
+
:modifications,:instrument,:mass_tolerance_units,:mass_tolerance,:missed_cleavages])
|
22
|
+
|
21
23
|
search_tool.option_parser.banner = "Run an MSGFPlus msms search on a set of msms spectrum input files.\n\nUsage: msgfplus_search.rb [options] file1.mzML file2.mzML ..."
|
22
24
|
search_tool.options.output_suffix="_msgfplus"
|
23
25
|
|
26
|
+
search_tool.options.enzyme=1
|
27
|
+
search_tool.options.instrument=0
|
28
|
+
|
29
|
+
search_tool.options.no_pepxml=false
|
30
|
+
search_tool.option_parser.on( '--no-pepxml', 'Dont convert results to pepxml. Keep native mzidentml format' ) do
|
31
|
+
search_tool.options.no_pepxml=true
|
32
|
+
end
|
33
|
+
|
34
|
+
search_tool.options.isotope_error_range="0,1"
|
35
|
+
search_tool.option_parser.on( '--isotope-error-range range', 'Takes into account of the error introduced by chooosing a non-monoisotopic peak for fragmentation.(Default 0,1)' ) do |range|
|
36
|
+
search_tool.options.isotope_error_range=range
|
37
|
+
end
|
38
|
+
|
24
39
|
search_tool.options.fragment_method=0
|
25
40
|
search_tool.option_parser.on( '--fragment-method method', 'Fragment method 0: As written in the spectrum or CID if no info (Default), 1: CID, 2: ETD, 3: HCD, 4: Merge spectra from the same precursor' ) do |method|
|
26
41
|
search_tool.options.fragment_method=method
|
27
42
|
end
|
28
43
|
|
29
44
|
search_tool.options.protocol=0
|
30
|
-
search_tool.option_parser.on( '--protocol p', '0: NoProtocol (Default), 1: Phosphorylation' ) do |p|
|
45
|
+
search_tool.option_parser.on( '--protocol p', '0: NoProtocol (Default), 1: Phosphorylation, 2: iTRAQ, 3: iTRAQPhospho' ) do |p|
|
31
46
|
search_tool.options.protocol=p
|
32
47
|
end
|
33
48
|
|
@@ -61,12 +76,23 @@ search_tool.option_parser.on( '--add-features', 'output additional features' )
|
|
61
76
|
search_tool.options.add_features=true
|
62
77
|
end
|
63
78
|
|
79
|
+
search_tool.options.num_threads=nil
|
80
|
+
search_tool.option_parser.on('--threads NumThreads','Number of processing threads to use') do |nt|
|
81
|
+
search_tool.options.num_threads=nt
|
82
|
+
end
|
83
|
+
|
64
84
|
search_tool.options.java_mem="3500M"
|
65
85
|
search_tool.option_parser.on('--java-mem mem','Java memory limit when running the search (Default 3.5Gb)') do |mem|
|
66
86
|
search_tool.options.java_mem=mem
|
67
87
|
end
|
68
88
|
|
69
|
-
search_tool.
|
89
|
+
exit unless search_tool.check_options
|
90
|
+
|
91
|
+
if ( ARGV[0].nil? )
|
92
|
+
puts "You must supply an input file"
|
93
|
+
puts search_tool.option_parser
|
94
|
+
exit
|
95
|
+
end
|
70
96
|
|
71
97
|
# Environment with global constants
|
72
98
|
#
|
@@ -149,17 +175,33 @@ ARGV.each do |filename|
|
|
149
175
|
# Instrument type
|
150
176
|
cmd << " -inst #{search_tool.instrument}"
|
151
177
|
|
152
|
-
|
178
|
+
cmd << " -m #{search_tool.fragment_method}"
|
153
179
|
|
154
180
|
cmd << " -addFeatures 1"
|
155
181
|
|
182
|
+
cmd << " -protocol #{search_tool.protocol}"
|
183
|
+
|
184
|
+
cmd << " -minLength #{search_tool.min_pep_length}"
|
185
|
+
|
186
|
+
cmd << " -maxLength #{search_tool.max_pep_length}"
|
187
|
+
|
188
|
+
cmd << " -minCharge #{search_tool.min_pep_charge}"
|
189
|
+
|
190
|
+
cmd << " -maxCharge #{search_tool.max_pep_charge}"
|
191
|
+
|
192
|
+
cmd << " -ti #{search_tool.isotope_error_range}"
|
193
|
+
|
194
|
+
cmd << " -n #{search_tool.num_reported_matches}"
|
195
|
+
|
156
196
|
# Enzyme
|
157
197
|
#
|
158
|
-
|
159
|
-
|
160
|
-
|
198
|
+
cmd << " -e #{search_tool.enzyme}"
|
199
|
+
|
200
|
+
# Num Threads
|
201
|
+
#
|
202
|
+
cmd << " -thread #{search_tool.num_threads}" if search_tool.num_threads
|
161
203
|
|
162
|
-
|
204
|
+
mods_file_content = ""
|
163
205
|
|
164
206
|
# Variable Modifications
|
165
207
|
#
|
@@ -188,10 +230,14 @@ ARGV.each do |filename|
|
|
188
230
|
end
|
189
231
|
|
190
232
|
# As a final part of the command we convert to pepxml
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
233
|
+
if search_tool.no_pepxml
|
234
|
+
cmd << "; #{genv.idconvert} #{mzid_output_path} --pepXML -o #{Pathname.new(mzid_output_path).dirname}"
|
235
|
+
#Then copy the pepxml to the final output path
|
236
|
+
cmd << "; cp #{mzid_output_path.chomp('.mzid')}.pepXML #{output_path}"
|
237
|
+
elsif search_tool.explicit_output
|
238
|
+
cmd << "; cp #{mzid_output_path} #{output_path}"
|
239
|
+
end
|
240
|
+
|
195
241
|
|
196
242
|
# Up to here we've formulated the command. The rest is cleanup
|
197
243
|
p "Running:#{cmd}"
|
data/bin/omssa_search.rb
CHANGED
@@ -16,7 +16,12 @@ for_galaxy = GalaxyUtil.for_galaxy?
|
|
16
16
|
|
17
17
|
# Setup specific command-line options for this tool. Other options are inherited from SearchTool
|
18
18
|
#
|
19
|
-
search_tool=SearchTool.new(
|
19
|
+
search_tool=SearchTool.new([:database,:explicit_output,:over_write,:enzyme,
|
20
|
+
:modifications,:instrument,:mass_tolerance_units,:mass_tolerance,:missed_cleavages,
|
21
|
+
:precursor_search_type,:respect_precursor_charges,:num_peaks_for_multi_isotope_search,:searched_ions
|
22
|
+
])
|
23
|
+
|
24
|
+
|
20
25
|
search_tool.option_parser.banner = "Run an OMSSA msms search on a set of mgf input files.\n\nUsage: omssa_search.rb [options] file1.mgf file2.mgf ..."
|
21
26
|
search_tool.options.output_suffix="_omssa"
|
22
27
|
|
@@ -54,7 +59,13 @@ search_tool.option_parser.on( '--nthreads num', 'Number of search threads to use
|
|
54
59
|
search_tool.options.nthreads=num
|
55
60
|
end
|
56
61
|
|
57
|
-
search_tool.
|
62
|
+
exit unless search_tool.check_options
|
63
|
+
|
64
|
+
if ( ARGV[0].nil? )
|
65
|
+
puts "You must supply an input file"
|
66
|
+
puts search_tool.option_parser
|
67
|
+
exit
|
68
|
+
end
|
58
69
|
|
59
70
|
# Environment with global constants
|
60
71
|
#
|
data/bin/peptide_prophet.rb
CHANGED
@@ -13,7 +13,7 @@ require 'protk/prophet_tool'
|
|
13
13
|
|
14
14
|
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
15
15
|
#
|
16
|
-
prophet_tool=ProphetTool.new(
|
16
|
+
prophet_tool=ProphetTool.new([:glyco,:explicit_output,:maldi])
|
17
17
|
prophet_tool.option_parser.banner = "Run PeptideProphet on a set of pep.xml input files.\n\nUsage: peptide_prophet.rb [options] file1.pep.xml file2.pep.xml ..."
|
18
18
|
prophet_tool.options.output_suffix="_pproph"
|
19
19
|
|
@@ -92,7 +92,13 @@ prophet_tool.option_parser.on( '--override-database database', 'Manually specify
|
|
92
92
|
prophet_tool.options.override_database = database
|
93
93
|
end
|
94
94
|
|
95
|
-
prophet_tool.
|
95
|
+
exit unless prophet_tool.check_options
|
96
|
+
|
97
|
+
if ( ARGV[0].nil? )
|
98
|
+
puts "You must supply an input file"
|
99
|
+
puts prophet_tool.option_parser
|
100
|
+
exit
|
101
|
+
end
|
96
102
|
|
97
103
|
throw "When --output and -F options are set only one file at a time can be run" if ( ARGV.length> 1 ) && ( prophet_tool.explicit_output!=nil ) && (prophet_tool.one_ata_time!=nil)
|
98
104
|
|
data/bin/pepxml_to_table.rb
CHANGED
@@ -16,10 +16,16 @@ include LibXML
|
|
16
16
|
|
17
17
|
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
18
18
|
#
|
19
|
-
tool=Tool.new(
|
19
|
+
tool=Tool.new([:explicit_output])
|
20
20
|
tool.option_parser.banner = "Convert a pepXML file to a tab delimited table.\n\nUsage: pepxml_to_table.rb [options] file1.pep.xml"
|
21
21
|
|
22
|
-
tool.
|
22
|
+
exit unless tool.check_options
|
23
|
+
|
24
|
+
if ( ARGV[0].nil? )
|
25
|
+
puts "You must supply an input file"
|
26
|
+
puts tool.option_parser
|
27
|
+
exit
|
28
|
+
end
|
23
29
|
|
24
30
|
# Obtain a global environment object
|
25
31
|
#genv=Constants.new
|
data/bin/protein_prophet.rb
CHANGED
@@ -26,7 +26,7 @@ end
|
|
26
26
|
|
27
27
|
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
28
28
|
#
|
29
|
-
prophet_tool=ProphetTool.new(
|
29
|
+
prophet_tool=ProphetTool.new([:glyco,:explicit_output])
|
30
30
|
prophet_tool.option_parser.banner = "Run ProteinProphet on a set of pep.xml input files.\n\nUsage: protein_prophet.rb [options] file1.pep.xml file2.pep.xml ..."
|
31
31
|
prophet_tool.options.output_suffix="_protproph"
|
32
32
|
|
@@ -90,7 +90,13 @@ prophet_tool.option_parser.on( '--minindep mp',"Minimum percentage of independen
|
|
90
90
|
prophet_tool.options.minindep = mp
|
91
91
|
end
|
92
92
|
|
93
|
-
prophet_tool.
|
93
|
+
exit unless prophet_tool.check_options
|
94
|
+
|
95
|
+
if ( ARGV[0].nil? )
|
96
|
+
puts "You must supply an input file"
|
97
|
+
puts prophet_tool.option_parser
|
98
|
+
exit
|
99
|
+
end
|
94
100
|
|
95
101
|
|
96
102
|
# Obtain a global environment object
|
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file is part of protk
|
4
|
+
# Created by Ira Cooke 18/1/2011
|
5
|
+
#
|
6
|
+
# Convert a pepXML file to a tab delimited table
|
7
|
+
#
|
8
|
+
#
|
9
|
+
|
10
|
+
require 'libxml'
|
11
|
+
require 'protk/constants'
|
12
|
+
require 'protk/command_runner'
|
13
|
+
require 'protk/tool'
|
14
|
+
|
15
|
+
include LibXML
|
16
|
+
|
17
|
+
# Setup specific command-line options for this tool. Other options are inherited from ProphetTool
|
18
|
+
#
|
19
|
+
tool=Tool.new([:explicit_output])
|
20
|
+
tool.option_parser.banner = "Convert a protXML file to a tab delimited table.\n\nUsage: protxml_to_table.rb [options] file1.protXML"
|
21
|
+
|
22
|
+
exit unless tool.check_options
|
23
|
+
|
24
|
+
if ( ARGV[0].nil? )
|
25
|
+
puts "You must supply an input file"
|
26
|
+
puts tool.option_parser
|
27
|
+
exit
|
28
|
+
end
|
29
|
+
|
30
|
+
input_file=ARGV[0]
|
31
|
+
|
32
|
+
output_file = tool.explicit_output!=nil ? tool.explicit_output : nil
|
33
|
+
|
34
|
+
output_fh = output_file!=nil ? File.new("#{output_file}",'w') : $stdout
|
35
|
+
|
36
|
+
|
37
|
+
XML::Error.set_handler(&XML::Error::QUIET_HANDLER)
|
38
|
+
|
39
|
+
protxml_parser=XML::Parser.file("#{input_file}")
|
40
|
+
|
41
|
+
protxml_ns_prefix="xmlns:"
|
42
|
+
protxml_ns="xmlns:http://regis-web.systemsbiology.net/protXML"
|
43
|
+
protxml_doc=protxml_parser.parse
|
44
|
+
if not protxml_doc.root.namespaces.default
|
45
|
+
protxml_ns_prefix=""
|
46
|
+
protxml_ns=nil
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
column_headers=[
|
51
|
+
"group_number","group_probability","protein_name",
|
52
|
+
"protein_probability","coverage","peptides",
|
53
|
+
"num_peptides","confidence"
|
54
|
+
]
|
55
|
+
|
56
|
+
output_fh.write "#{column_headers.join("\t")}\n"
|
57
|
+
|
58
|
+
|
59
|
+
protein_groups=protxml_doc.find("//#{protxml_ns_prefix}protein_group", protxml_ns)
|
60
|
+
|
61
|
+
protein_groups.each do |protein_group|
|
62
|
+
|
63
|
+
proteins=protein_group.find("./#{protxml_ns_prefix}protein", protxml_ns)
|
64
|
+
|
65
|
+
proteins.each do |protein|
|
66
|
+
column_values=[]
|
67
|
+
|
68
|
+
column_values << protein_group.attributes['group_number']
|
69
|
+
column_values << protein_group.attributes['probability']
|
70
|
+
|
71
|
+
column_values << protein.attributes['protein_name']
|
72
|
+
column_values << protein.attributes['probability']
|
73
|
+
column_values << protein.attributes['percent_coverage']
|
74
|
+
column_values << protein.attributes['unique_stripped_peptides']
|
75
|
+
column_values << protein.attributes['total_number_peptides']
|
76
|
+
column_values << protein.attributes['confidence']
|
77
|
+
output_fh.write(column_values.join("\t"))
|
78
|
+
output_fh.write("\n")
|
79
|
+
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
data/bin/repair_run_summary.rb
CHANGED
@@ -40,7 +40,13 @@ tool.option_parser.on('--omssa-itol fitol','Add a fragment ion tolerance paramet
|
|
40
40
|
tool.options.omssa_ion_tolerance=fitol
|
41
41
|
end
|
42
42
|
|
43
|
-
tool.
|
43
|
+
exit unless tool.check_options
|
44
|
+
|
45
|
+
if ( ARGV[0].nil? )
|
46
|
+
puts "You must supply an input file"
|
47
|
+
puts tool.option_parser
|
48
|
+
exit
|
49
|
+
end
|
44
50
|
|
45
51
|
pepxml_file=ARGV[0]
|
46
52
|
|
data/bin/sixframe.rb
CHANGED
@@ -10,10 +10,34 @@ require 'protk/constants'
|
|
10
10
|
require 'protk/tool'
|
11
11
|
require 'bio'
|
12
12
|
|
13
|
-
|
13
|
+
def check_coords(naseq,aaseq,frame,pstart,pend)
|
14
|
+
orf_from_coords=""
|
15
|
+
if ( frame<=3)
|
16
|
+
orf_from_coords=naseq[pstart-1..pend-1].translate(1)
|
17
|
+
else
|
18
|
+
orf_from_coords=naseq[pstart-1..pend-1].reverse_complement.translate(1)
|
19
|
+
# current coords give
|
20
|
+
# naseq.reverse_complement[pstart-1..pend-1].translate(1)
|
21
|
+
# naseq[350368-pend..(350367-pstart+1)].reverse_complement.translate(1)
|
22
|
+
# orf_from_coords=naseq[naseq.length-pend..naseq.length-pstart].reverse_complement.translate(1)
|
23
|
+
end
|
24
|
+
if ( orf_from_coords!=aaseq)
|
25
|
+
require 'debugger'; debugger
|
26
|
+
end
|
27
|
+
# p "#{aaseq} #{frame}"
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
tool=Tool.new([:explicit_output])
|
14
32
|
tool.option_parser.banner = "Create a sixframe translation of a genome.\n\nUsage: sixframe.rb [options] genome.fasta"
|
15
33
|
|
16
|
-
tool.
|
34
|
+
exit unless tool.check_options
|
35
|
+
|
36
|
+
if ( ARGV[0].nil? )
|
37
|
+
puts "You must supply an input file"
|
38
|
+
puts tool.option_parser
|
39
|
+
exit
|
40
|
+
end
|
17
41
|
|
18
42
|
inname=ARGV.shift
|
19
43
|
|
@@ -26,7 +50,11 @@ end
|
|
26
50
|
file = Bio::FastaFormat.open(inname)
|
27
51
|
|
28
52
|
file.each do |entry|
|
53
|
+
|
54
|
+
puts entry.entry_id
|
55
|
+
|
29
56
|
length = entry.naseq.length
|
57
|
+
|
30
58
|
(1...7).each do |frame|
|
31
59
|
translated_seq= entry.naseq.translate(frame)
|
32
60
|
orfs=translated_seq.split("*")
|
@@ -37,15 +65,30 @@ file.each do |entry|
|
|
37
65
|
orfs.each do |orf|
|
38
66
|
oi+=1
|
39
67
|
if ( orf.length > 20 )
|
68
|
+
|
40
69
|
position_start = position
|
41
70
|
position_end = position_start + orf.length*3 -1
|
42
71
|
|
72
|
+
if ( frame>3) #On reverse strand. Coordinates need translating to forward strand
|
73
|
+
forward_position_start=length-position_end+1
|
74
|
+
forward_position_end = length-position_start+1
|
75
|
+
position_start=forward_position_start
|
76
|
+
position_end=forward_position_end
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
|
81
|
+
|
43
82
|
# Create accession compliant with NCBI naming standard
|
44
83
|
# See http://www.ncbi.nlm.nih.gov/books/NBK7183/?rendertype=table&id=ch_demo.T5
|
45
84
|
ncbi_scaffold_id = entry.entry_id.gsub('|','_').gsub(' ','_')
|
46
85
|
ncbi_accession = "lcl|#{ncbi_scaffold_id}_frame_#{frame}_orf_#{oi}"
|
47
86
|
|
87
|
+
# check_coords(entry.naseq,orf,frame,position_start,position_end)
|
88
|
+
|
48
89
|
# Output in fasta format
|
90
|
+
# start and end positions are always relative to the forward strand
|
91
|
+
|
49
92
|
outfile.write(">#{ncbi_accession} #{position_start}|#{position_end}\n#{orf}\n")
|
50
93
|
|
51
94
|
end
|
@@ -54,3 +97,6 @@ file.each do |entry|
|
|
54
97
|
|
55
98
|
end
|
56
99
|
end
|
100
|
+
|
101
|
+
|
102
|
+
|