ORForise 1.5.1__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ORForise/Aggregate_Compare.py +2 -4
- ORForise/Annotation_Compare.py +16 -53
- ORForise/Annotation_Intersector.py +726 -0
- ORForise/Aux/TabToGFF/TabToGFF.py +140 -0
- ORForise/Convert_To_GFF.py +139 -0
- ORForise/GFF_Adder.py +454 -179
- ORForise/List_Tools.py +63 -0
- ORForise/StORForise.py +8 -4
- ORForise/Tools/EasyGene/EasyGene.py +13 -1
- ORForise/Tools/{GLIMMER_3/GLIMMER_3.py → GLIMMER3/GLIMMER3.py} +2 -2
- ORForise/Tools/GLIMMER3/__init__.py +0 -0
- ORForise/Tools/{GeneMark_HA/GeneMark_HA.py → GeneMarkHA/GeneMarkHA.py} +1 -1
- ORForise/Tools/GeneMarkHA/__init__.py +0 -0
- ORForise/Tools/Prodigal/Prodigal.py +13 -1
- ORForise/utils.py +4 -1
- orforise-1.6.1.dist-info/METADATA +1038 -0
- {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/RECORD +29 -24
- {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/entry_points.txt +6 -2
- ORForise/GFF_Intersector.py +0 -192
- orforise-1.5.1.dist-info/METADATA +0 -427
- /ORForise/{Tools → Aux}/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
- /ORForise/{Tools/GLIMMER_3 → Aux/StORF_Undetected/Completely_Undetected}/__init__.py +0 -0
- /ORForise/{Tools → Aux}/StORF_Undetected/StORF_Undetected.py +0 -0
- /ORForise/{Tools/GeneMark_HA → Aux/StORF_Undetected}/__init__.py +0 -0
- /ORForise/{Tools/StORF_Undetected/Completely_Undetected → Aux/StORF_Undetected/unvitiated_Genes}/__init__.py +0 -0
- /ORForise/{Tools → Aux}/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
- /ORForise/{Tools/StORF_Undetected → Aux/TabToGFF}/__init__.py +0 -0
- /ORForise/{Tools/StORF_Undetected/unvitiated_Genes → Aux}/__init__.py +0 -0
- {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/WHEEL +0 -0
- {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/licenses/LICENSE +0 -0
- {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/top_level.txt +0 -0
ORForise/Aggregate_Compare.py
CHANGED
|
@@ -339,9 +339,7 @@ def comparator(options):
|
|
|
339
339
|
|
|
340
340
|
|
|
341
341
|
def main():
|
|
342
|
-
print(
|
|
343
|
-
"Please Cite: https://doi.org/10.1093/bioinformatics/btab827\n"
|
|
344
|
-
"#####")
|
|
342
|
+
print(WELCOME)
|
|
345
343
|
|
|
346
344
|
parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': Aggregate-Compare Run Parameters.')
|
|
347
345
|
parser._action_groups.pop()
|
|
@@ -350,7 +348,7 @@ def main():
|
|
|
350
348
|
|
|
351
349
|
required.add_argument('-dna', dest='genome_dna', required=True, help='Genome DNA file (.fa) which both annotations '
|
|
352
350
|
'are based on')
|
|
353
|
-
required.add_argument('-t', dest='tools', required=True, help='Which tools to analyse?
|
|
351
|
+
required.add_argument('-t', dest='tools', required=True, help='Which tools to analyse?')
|
|
354
352
|
required.add_argument('-tp', dest='tool_predictions', required=True, help='Tool genome prediction file (.gff) - Provide'
|
|
355
353
|
'file locations for each tool comma separated')
|
|
356
354
|
required.add_argument('-ref', dest='reference_annotation', required=True,
|
ORForise/Annotation_Compare.py
CHANGED
|
@@ -7,15 +7,11 @@ from datetime import datetime
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
try:
|
|
10
|
+
from utils import *
|
|
10
11
|
from Comparator import tool_comparison
|
|
11
12
|
except ImportError:
|
|
12
13
|
from .Comparator import tool_comparison
|
|
13
|
-
|
|
14
|
-
try:
|
|
15
|
-
from utils import *
|
|
16
|
-
except ImportError:
|
|
17
|
-
from ORForise.utils import *
|
|
18
|
-
|
|
14
|
+
from .utils import *
|
|
19
15
|
|
|
20
16
|
##########################
|
|
21
17
|
|
|
@@ -131,16 +127,6 @@ def comparator(options):
|
|
|
131
127
|
print(full_msg)
|
|
132
128
|
options.output_logger.info(full_msg)
|
|
133
129
|
|
|
134
|
-
# print("These are the results for: " + dna_region + '\n')
|
|
135
|
-
# print('Current Contig: ' + str(dna_region))
|
|
136
|
-
# print('Number of Genes: ' + str(num_current_genes))
|
|
137
|
-
# print('Number of ORFs: ' + str(result['pred_metrics']['Number_of_ORFs']))
|
|
138
|
-
# print('Perfect Matches: ' + str(result['pred_metrics']['Number_of_Perfect_Matches']) + ' [' + str(num_current_genes)+ '] - '+ format(100 * result['pred_metrics']['Number_of_Perfect_Matches']/num_current_genes,'.2f')+'%')
|
|
139
|
-
# print('Partial Matches: ' + str(len(result['pred_metrics']['partial_Hits'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['pred_metrics']['partial_Hits'])/num_current_genes,'.2f')+'%')
|
|
140
|
-
# print('Missed Genes: ' + str(len(result['rep_metrics']['genes_Undetected'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['rep_metrics']['genes_Undetected'])/num_current_genes,'.2f')+'%')
|
|
141
|
-
# print('Unmatched ORFs: ' + str(len(result['pred_metrics']['unmatched_ORFs'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['pred_metrics']['unmatched_ORFs'])/num_current_genes,'.2f')+'%')
|
|
142
|
-
# print('Multi-matched ORFs: ' + str(len(result['pred_metrics']['multi_Matched_ORFs'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['pred_metrics']['multi_Matched_ORFs'])/num_current_genes,'.2f')+'%')
|
|
143
|
-
|
|
144
130
|
# Prepare output directory and file names for each contig
|
|
145
131
|
contig_save = dna_region.replace('/', '_').replace('\\', '_')
|
|
146
132
|
contig_dir = os.path.join(options.outdir, contig_save)
|
|
@@ -176,8 +162,8 @@ def comparator(options):
|
|
|
176
162
|
|
|
177
163
|
|
|
178
164
|
# Write metrics to CSV
|
|
179
|
-
with open(csv_file, 'w', newline='\n', encoding='utf-8') as out_file:
|
|
180
165
|
tool_out = csv.writer(out_file, quoting=csv.QUOTE_NONE, escapechar=" ")
|
|
166
|
+
tool_out = csv.writer(out_file, quoting=csv.QUOTE_NONE, escapechar=" ") # type: ignore[arg-type]
|
|
181
167
|
tool_out.writerow(['Representative_Metrics:'])
|
|
182
168
|
tool_out.writerow(rep_metric_description.split(','))
|
|
183
169
|
tool_out.writerow([*rep_metrics])
|
|
@@ -190,24 +176,6 @@ def comparator(options):
|
|
|
190
176
|
tool_out.writerow([''.join(map(str, result['pred_metrics']['orf_Coverage_Genome']))])
|
|
191
177
|
tool_out.writerow(['Matched_Predicted_CDS_Coverage_of_Genome'])
|
|
192
178
|
tool_out.writerow([''.join(map(str, result['pred_metrics']['matched_ORF_Coverage_Genome']))])
|
|
193
|
-
# tool_out.writerow(['Start_Position_Difference:'])
|
|
194
|
-
# tool_out.writerow(result.get('start_Difference', []))
|
|
195
|
-
# tool_out.writerow(['Stop_Position_Difference:'])
|
|
196
|
-
# tool_out.writerow(result.get('stop_Difference', []))
|
|
197
|
-
# tool_out.writerow(['Alternative_Starts_Predicted:'])
|
|
198
|
-
# tool_out.writerow(result.get('other_Starts', []))
|
|
199
|
-
# tool_out.writerow(['Alternative_Stops_Predicted:'])
|
|
200
|
-
# tool_out.writerow(result.get('other_Stops', []))
|
|
201
|
-
# tool_out.writerow(['Undetected_Gene_Metrics:'])
|
|
202
|
-
# tool_out.writerow([
|
|
203
|
-
# 'ATG_Start,GTG_Start,TTG_Start,ATT_Start,CTG_Start,Alternative_Start_Codon,TGA_Stop,TAA_Stop,TAG_Stop,Alternative_Stop_Codon,Median_Length,ORFs_on_Positive_Strand,ORFs_on_Negative_Strand'
|
|
204
|
-
# ])
|
|
205
|
-
# tool_out.writerow(result.get('undetected_Gene_Metrics', []))
|
|
206
|
-
# tool_out.writerow(['\nPredicted_CDSs_Without_Corresponding_Gene_In_Reference_Metrics:'])
|
|
207
|
-
# tool_out.writerow([
|
|
208
|
-
# 'ATG_Start,GTG_Start,TTG_Start,ATT_Start,CTG_Start,Alternative_Start_Codon,TGA_Stop,TAA_Stop,TAG_Stop,Alternative_Stop_Codon,Median_Length,ORFs_on_Positive_Strand,ORFs_on_Negative_Strand'
|
|
209
|
-
# ])
|
|
210
|
-
# tool_out.writerow(result.get('unmatched_ORF_Metrics', []))
|
|
211
179
|
|
|
212
180
|
# Write perfect matches to FASTA
|
|
213
181
|
with open(perfect_fasta, 'w', encoding='utf-8') as f:
|
|
@@ -266,26 +234,21 @@ def comparator(options):
|
|
|
266
234
|
out_file.write('\nOverall Summary:\n')
|
|
267
235
|
out_file.write(f'Number of Genes: {total_genes}\n')
|
|
268
236
|
out_file.write(f'Number of ORFs: {total_orfs}\n')
|
|
269
|
-
out_file.write(
|
|
270
|
-
|
|
271
|
-
out_file.write(
|
|
272
|
-
|
|
273
|
-
out_file.write(
|
|
274
|
-
f'Missed Genes: {total_missed} [{total_genes}] - {format(100 * total_missed / total_genes, ".2f")}%\n')
|
|
275
|
-
out_file.write(
|
|
276
|
-
f'Unmatched ORFs: {total_unmatched} [{total_genes}] - {format(100 * total_unmatched / total_genes, ".2f")}%\n')
|
|
277
|
-
out_file.write(
|
|
278
|
-
f'Multi-matched ORFs: {total_multi} [{total_genes}] - {format(100 * total_multi / total_genes, ".2f")}%\n')
|
|
237
|
+
out_file.write(f'Perfect Matches: {total_perfect} [{total_genes}] - {100 * total_perfect / total_genes:.2f}%\n')
|
|
238
|
+
out_file.write(f'Partial Matches: {total_partial} [{total_genes}] - {100 * total_partial / total_genes:.2f}%\n')
|
|
239
|
+
out_file.write(f'Missed Genes: {total_missed} [{total_genes}] - {100 * total_missed / total_genes:.2f}%\n')
|
|
240
|
+
out_file.write(f'Unmatched ORFs: {total_unmatched} [{total_genes}] - {100 * total_unmatched / total_genes:.2f}%\n')
|
|
241
|
+
out_file.write(f'Multi-matched ORFs: {total_multi} [{total_genes}] - {100 * total_multi / total_genes:.2f}%\n')
|
|
279
242
|
|
|
280
243
|
lines = [
|
|
281
244
|
f"Combined metrics for all contigs:",
|
|
282
245
|
f"Number of Genes: {total_genes}",
|
|
283
246
|
f"Number of ORFs: {total_orfs}",
|
|
284
|
-
f"Perfect Matches: {total_perfect} [{total_genes}] - {
|
|
285
|
-
f"Partial Matches: {total_partial} [{total_genes}] - {
|
|
286
|
-
f"Missed Genes: {total_missed} [{total_genes}] - {
|
|
287
|
-
f"Unmatched ORFs: {total_unmatched} [{total_genes}] - {
|
|
288
|
-
f"Multi-matched ORFs: {total_multi} [{total_genes}] - {
|
|
247
|
+
f"Perfect Matches: {total_perfect} [{total_genes}] - {100 * total_perfect / total_genes:.2f}%",
|
|
248
|
+
f"Partial Matches: {total_partial} [{total_genes}] - {100 * total_partial / total_genes:.2f}%",
|
|
249
|
+
f"Missed Genes: {total_missed} [{total_genes}] - {100 * total_missed / total_genes:.2f}%",
|
|
250
|
+
f"Unmatched ORFs: {total_unmatched} [{total_genes}] - {100 * total_unmatched / total_genes:.2f}%",
|
|
251
|
+
f"Multi-matched ORFs: {total_multi} [{total_genes}] - {100 * total_multi / total_genes:.2f}%"
|
|
289
252
|
]
|
|
290
253
|
|
|
291
254
|
full_msg = '\n'.join(lines) + '\n'
|
|
@@ -295,7 +258,7 @@ def comparator(options):
|
|
|
295
258
|
|
|
296
259
|
|
|
297
260
|
def main():
|
|
298
|
-
print(
|
|
261
|
+
print(WELCOME)
|
|
299
262
|
|
|
300
263
|
parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': Annotatione-Compare Run Parameters.')
|
|
301
264
|
parser._action_groups.pop()
|
|
@@ -305,7 +268,7 @@ def main():
|
|
|
305
268
|
'are based on')
|
|
306
269
|
required.add_argument('-ref', dest='reference_annotation', required=True,
|
|
307
270
|
help='Which reference annotation file to use as reference?')
|
|
308
|
-
required.add_argument('-t', dest='tool', required=True, help='Which tool to analyse?
|
|
271
|
+
required.add_argument('-t', dest='tool', required=True, help='Which tool to analyse?')
|
|
309
272
|
required.add_argument('-tp', dest='tool_prediction', required=True,
|
|
310
273
|
help='Tool genome prediction file (.gff) - Different Tool Parameters'
|
|
311
274
|
' are compared individually via separate files')
|
|
@@ -350,4 +313,4 @@ def main():
|
|
|
350
313
|
|
|
351
314
|
if __name__ == "__main__":
|
|
352
315
|
main()
|
|
353
|
-
print("Complete")
|
|
316
|
+
print("Complete")
|