kssdtree 2.0.4__tar.gz → 2.0.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kssdtree-2.0.4/kssdtree.egg-info → kssdtree-2.0.6}/PKG-INFO +3 -6
- {kssdtree-2.0.4 → kssdtree-2.0.6/kssdtree.egg-info}/PKG-INFO +3 -6
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.py +96 -12
- {kssdtree-2.0.4 → kssdtree-2.0.6}/pykssd.c +22 -3
- {kssdtree-2.0.4 → kssdtree-2.0.6}/setup.py +1 -1
- {kssdtree-2.0.4 → kssdtree-2.0.6}/toolutils.py +81 -13
- {kssdtree-2.0.4 → kssdtree-2.0.6}/MANIFEST.in +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/README.md +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/align.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/buildtree.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/bytescale.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/cluster.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/co2mco.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/command_composite.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/command_dist.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/command_dist_wrapper.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/command_set.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/command_shuffle.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/distancemat.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnj.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/bytescale.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/dnj.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/filebuff.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/hclust.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/matrix.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/mman.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/nj.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/nwck.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/pherror.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/phy.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/qseqs.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/str.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/threader.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/tmp.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/vector.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/filebuff.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/global_basic.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/hclust.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/iseq2comem.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/co2mco.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/command_composite.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/command_dist.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/command_dist_wrapper.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/command_set.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/command_shuffle.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/global_basic.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/iseq2comem.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/mman.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/mytime.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.egg-info/SOURCES.txt +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.egg-info/dependency_links.txt +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.egg-info/not-zip-safe +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.egg-info/requires.txt +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.egg-info/top_level.txt +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/matrix.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/mman.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/mytime.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/nj.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/align.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/buildtree.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/cluster.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/distancemat.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/sequence.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/tree.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/util.h +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/nwck.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/pherror.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/phy.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/pydnj.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/pynj.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/qseqs.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/sequence.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/setup.cfg +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/str.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/tmp.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/tree.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/util.c +0 -0
- {kssdtree-2.0.4 → kssdtree-2.0.6}/vector.c +0 -0
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
Metadata-Version:
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: kssdtree
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.6
|
|
4
4
|
Summary: Kssdtree is a versatile Python package for phylogenetic analysis. It also provides one-stop tree construction and visualization. It can handle DNA sequences of both fasta or fastq format, whether gzipped or not.
|
|
5
5
|
Home-page: https://github.com/yhlink/kssdtree
|
|
6
|
+
Download-URL: https://pypi.org/project/kssdtree
|
|
6
7
|
Author: Hang Yang
|
|
7
8
|
Author-email: yhlink1207@gmail.com
|
|
8
|
-
License: UNKNOWN
|
|
9
|
-
Download-URL: https://pypi.org/project/kssdtree
|
|
10
|
-
Description: UNKNOWN
|
|
11
|
-
Platform: UNKNOWN
|
|
@@ -1,11 +1,8 @@
|
|
|
1
|
-
Metadata-Version:
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: kssdtree
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.6
|
|
4
4
|
Summary: Kssdtree is a versatile Python package for phylogenetic analysis. It also provides one-stop tree construction and visualization. It can handle DNA sequences of both fasta or fastq format, whether gzipped or not.
|
|
5
5
|
Home-page: https://github.com/yhlink/kssdtree
|
|
6
|
+
Download-URL: https://pypi.org/project/kssdtree
|
|
6
7
|
Author: Hang Yang
|
|
7
8
|
Author-email: yhlink1207@gmail.com
|
|
8
|
-
License: UNKNOWN
|
|
9
|
-
Download-URL: https://pypi.org/project/kssdtree
|
|
10
|
-
Description: UNKNOWN
|
|
11
|
-
Platform: UNKNOWN
|
|
@@ -28,6 +28,7 @@ def sketch(shuf_file=None, genome_files=None, output=None, set_opt=None):
|
|
|
28
28
|
if not os.path.exists(shuf_file):
|
|
29
29
|
if shuf_file in ['L3K9.shuf', './L3K9.shuf', 'L3K10.shuf', './L3K10.shuf']:
|
|
30
30
|
print('Downloading...', shuf_file)
|
|
31
|
+
start_time = time.time()
|
|
31
32
|
if shuf_file == 'L3K9.shuf' or shuf_file == './L3K9.shuf':
|
|
32
33
|
url = 'https://zenodo.org/records/12699159/files/L3K9.shuf?download=1'
|
|
33
34
|
else:
|
|
@@ -59,9 +60,9 @@ def sketch(shuf_file=None, genome_files=None, output=None, set_opt=None):
|
|
|
59
60
|
print('Sketching...')
|
|
60
61
|
start = time.time()
|
|
61
62
|
if set_opt:
|
|
62
|
-
kssd.dist_dispatch(shuf_file, genome_files, output, 1, 0, 0, '')
|
|
63
|
+
kssd.dist_dispatch(shuf_file, genome_files, output, 1, 0, 0, '', '')
|
|
63
64
|
else:
|
|
64
|
-
kssd.dist_dispatch(shuf_file, genome_files, output, 0, 0, 0, '')
|
|
65
|
+
kssd.dist_dispatch(shuf_file, genome_files, output, 0, 0, 0, '', '')
|
|
65
66
|
end = time.time()
|
|
66
67
|
print('Sketch spend time:%.2fs' % (end - start))
|
|
67
68
|
print('Sketch finished!')
|
|
@@ -71,19 +72,16 @@ def sketch(shuf_file=None, genome_files=None, output=None, set_opt=None):
|
|
|
71
72
|
return False
|
|
72
73
|
|
|
73
74
|
|
|
74
|
-
def dist(genome_sketch=None, output=None, metric
|
|
75
|
+
def dist(genome_sketch=None, output=None, metric=None, flag=None):
|
|
75
76
|
if genome_sketch is not None and output is not None:
|
|
76
77
|
if not os.path.exists(genome_sketch):
|
|
77
78
|
print('No such file or directory: ', genome_sketch)
|
|
78
79
|
return False
|
|
79
|
-
# if not os.path.exists(qry_sketch):
|
|
80
|
-
# print('No such file or directory: ', qry_sketch)
|
|
81
|
-
# return False
|
|
82
80
|
if flag is None:
|
|
83
81
|
flag = 0
|
|
84
82
|
if metric is None:
|
|
85
83
|
metric = 'mash'
|
|
86
|
-
|
|
84
|
+
|
|
87
85
|
print('Disting...')
|
|
88
86
|
start = time.time()
|
|
89
87
|
if '/' in output:
|
|
@@ -99,7 +97,7 @@ def dist(genome_sketch=None, output=None, metric = None, flag=None):
|
|
|
99
97
|
print('Metric type error, only supports mash or aaf distance')
|
|
100
98
|
return False
|
|
101
99
|
else:
|
|
102
|
-
kssd.dist_dispatch(genome_sketch, output, genome_sketch, 2, 0, flag, metric)
|
|
100
|
+
kssd.dist_dispatch(genome_sketch, output, genome_sketch, 2, 0, flag, metric, '')
|
|
103
101
|
end = time.time()
|
|
104
102
|
print('Dist spend time:%.2fs' % (end - start))
|
|
105
103
|
print('Dist finished!')
|
|
@@ -112,6 +110,27 @@ def dist(genome_sketch=None, output=None, metric = None, flag=None):
|
|
|
112
110
|
return False
|
|
113
111
|
|
|
114
112
|
|
|
113
|
+
def combine(genome_sketch1=None, genome_sketch2=None, output=None):
|
|
114
|
+
if genome_sketch1 is not None and genome_sketch2 is not None and output is not None:
|
|
115
|
+
if not os.path.exists(genome_sketch1):
|
|
116
|
+
print('No such file or directory: ', genome_sketch1)
|
|
117
|
+
return False
|
|
118
|
+
if not os.path.exists(genome_sketch2):
|
|
119
|
+
print('No such file or directory: ', genome_sketch2)
|
|
120
|
+
return False
|
|
121
|
+
kssd.dist_dispatch(output, genome_sketch1, genome_sketch2, 3, 0, 0, '', '')
|
|
122
|
+
return True
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def getlist(genome_sketch=None, output=None):
|
|
126
|
+
if genome_sketch is not None and output is not None:
|
|
127
|
+
if not os.path.exists(genome_sketch):
|
|
128
|
+
print('No such file or directory: ', genome_sketch)
|
|
129
|
+
return False
|
|
130
|
+
kssd.print_gnames(genome_sketch, output)
|
|
131
|
+
return True
|
|
132
|
+
|
|
133
|
+
|
|
115
134
|
def retrieve(database=None, genome_sketch=None, output=None, N=None, method=None):
|
|
116
135
|
if database is not None and genome_sketch is not None and output is not None:
|
|
117
136
|
if method is None:
|
|
@@ -145,7 +164,7 @@ def retrieve(database=None, genome_sketch=None, output=None, N=None, method=None
|
|
|
145
164
|
print('Retrieve finished!')
|
|
146
165
|
return True
|
|
147
166
|
else:
|
|
148
|
-
print(
|
|
167
|
+
print('Only support gtdbr214 database!!!')
|
|
149
168
|
return False
|
|
150
169
|
else:
|
|
151
170
|
print('Args error!!!')
|
|
@@ -317,7 +336,6 @@ def quick(shuf_file=None, genome_files=None, output=None, reference=None, databa
|
|
|
317
336
|
if not s3:
|
|
318
337
|
return False
|
|
319
338
|
print('Step4...')
|
|
320
|
-
print('Tree visualization finished!')
|
|
321
339
|
visualize(newick=output, mode=mode)
|
|
322
340
|
if platform.system() == 'Linux':
|
|
323
341
|
current_directory = os.getcwd()
|
|
@@ -348,7 +366,6 @@ def quick(shuf_file=None, genome_files=None, output=None, reference=None, databa
|
|
|
348
366
|
s2 = retrieve(database=database, genome_sketch=qry_sketch, output=output, N=N, method=method)
|
|
349
367
|
if not s2:
|
|
350
368
|
return False
|
|
351
|
-
print('Tree visualization finished!')
|
|
352
369
|
visualize(newick=os.path.join(output, 'output.newick'),
|
|
353
370
|
taxonomy=os.path.join(output, 'output_accession_taxonomy.txt'), mode=None)
|
|
354
371
|
if platform.system() == 'Linux':
|
|
@@ -362,6 +379,74 @@ def quick(shuf_file=None, genome_files=None, output=None, reference=None, databa
|
|
|
362
379
|
else:
|
|
363
380
|
print('Args error, please see https://kssdtree.readthedocs.io/en/latest!!!')
|
|
364
381
|
return False
|
|
382
|
+
elif reference is None and database != 'gtdbr214':
|
|
383
|
+
if shuf_file is not None and genome_files is not None and output is not None:
|
|
384
|
+
if toolutils.is_positive_integer(N) or toolutils.is_negative_integer(N):
|
|
385
|
+
print("N must = 0 !!!")
|
|
386
|
+
return False
|
|
387
|
+
if not os.path.exists(database):
|
|
388
|
+
print('No such file or directory: ', database)
|
|
389
|
+
return False
|
|
390
|
+
if '/' in output:
|
|
391
|
+
output_dir = os.path.dirname(output)
|
|
392
|
+
output_name = output.split('/')[-1]
|
|
393
|
+
if not os.path.exists(output_dir):
|
|
394
|
+
os.makedirs(output_dir)
|
|
395
|
+
print("Created directory:", output_dir)
|
|
396
|
+
else:
|
|
397
|
+
output_name = output
|
|
398
|
+
if output_name.endswith(".newick"):
|
|
399
|
+
timeStamp = int(time.mktime(time.localtime(time.time())))
|
|
400
|
+
qry_sketch = toolutils.rs() + '_sketch_' + str(timeStamp)
|
|
401
|
+
temp_combine_sketch = toolutils.rs() + '_combine_sketch_' + str(timeStamp)
|
|
402
|
+
temp_phy = toolutils.rs() + '.phy'
|
|
403
|
+
s1 = sketch(shuf_file=shuf_file, genome_files=genome_files, output=qry_sketch, set_opt=True)
|
|
404
|
+
if not s1:
|
|
405
|
+
return False
|
|
406
|
+
print('Step2...')
|
|
407
|
+
combine(genome_sketch1=database, genome_sketch2=qry_sketch, output=temp_combine_sketch)
|
|
408
|
+
if method == 'nj':
|
|
409
|
+
s2 = dist(genome_sketch=temp_combine_sketch, output=temp_phy, flag=0)
|
|
410
|
+
else:
|
|
411
|
+
s2 = dist(genome_sketch=temp_combine_sketch, output=temp_phy, flag=1)
|
|
412
|
+
if not s2:
|
|
413
|
+
return False
|
|
414
|
+
print('Step3...')
|
|
415
|
+
s3 = build(phylip=temp_phy, output=output, method=method)
|
|
416
|
+
if not s3:
|
|
417
|
+
return False
|
|
418
|
+
print('Step4...')
|
|
419
|
+
getlist(genome_sketch=database, output='ref.txt')
|
|
420
|
+
getlist(genome_sketch=qry_sketch, output='qry.txt')
|
|
421
|
+
with open('ref.txt', 'r') as ref_file:
|
|
422
|
+
ref_lines = ref_file.readlines()
|
|
423
|
+
with open('qry.txt', 'r') as qry_file:
|
|
424
|
+
qry_lines = qry_file.readlines()
|
|
425
|
+
with open('ref_qry.txt', 'w') as result_file:
|
|
426
|
+
for line in ref_lines:
|
|
427
|
+
new_name = toolutils.rename_genome(line.strip())
|
|
428
|
+
result_file.write(new_name + '\tReference\n')
|
|
429
|
+
for line in qry_lines:
|
|
430
|
+
new_name = toolutils.rename_genome(line.strip())
|
|
431
|
+
result_file.write(new_name + '\tUnknown\n')
|
|
432
|
+
os.remove('ref.txt')
|
|
433
|
+
os.remove('qry.txt')
|
|
434
|
+
os.remove(temp_phy)
|
|
435
|
+
visualize(newick=output, taxonomy='ref_qry.txt', mode='r')
|
|
436
|
+
if platform.system() == 'Linux':
|
|
437
|
+
current_directory = os.getcwd()
|
|
438
|
+
temp_dir1 = os.path.join(current_directory, qry_sketch)
|
|
439
|
+
if os.path.exists(temp_dir1):
|
|
440
|
+
shutil.rmtree(temp_dir1)
|
|
441
|
+
temp_dir2 = os.path.join(current_directory, temp_combine_sketch)
|
|
442
|
+
if os.path.exists(temp_dir2):
|
|
443
|
+
shutil.rmtree(temp_dir2)
|
|
444
|
+
else:
|
|
445
|
+
print('Output type error, only supports .newick format:', output_name)
|
|
446
|
+
return False
|
|
447
|
+
else:
|
|
448
|
+
print('Args error, please see https://kssdtree.readthedocs.io/en/latest!!!')
|
|
449
|
+
return False
|
|
365
450
|
elif reference is not None and database is None:
|
|
366
451
|
if shuf_file is not None and genome_files is not None and output is not None and method in ['nj', 'dnj']:
|
|
367
452
|
if toolutils.is_positive_integer(N) or toolutils.is_negative_integer(N):
|
|
@@ -419,7 +504,6 @@ def quick(shuf_file=None, genome_files=None, output=None, reference=None, databa
|
|
|
419
504
|
if not s6:
|
|
420
505
|
return False
|
|
421
506
|
print('Step5...')
|
|
422
|
-
print('Tree visualization finished!')
|
|
423
507
|
visualize(newick=output, mode=mode)
|
|
424
508
|
if platform.system() == 'Linux':
|
|
425
509
|
current_directory = os.getcwd()
|
|
@@ -252,7 +252,6 @@ static PyObject *py_write_dim_shuffle_file(PyObject *self, PyObject *args) {
|
|
|
252
252
|
return Py_BuildValue("i", state);
|
|
253
253
|
}
|
|
254
254
|
|
|
255
|
-
|
|
256
255
|
static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
|
|
257
256
|
char *str1;
|
|
258
257
|
char *str2;
|
|
@@ -260,8 +259,9 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
|
|
|
260
259
|
int flag1;
|
|
261
260
|
int flag2;
|
|
262
261
|
char *str4;
|
|
262
|
+
char *str5;
|
|
263
263
|
int N;
|
|
264
|
-
if (!PyArg_ParseTuple(args, "
|
|
264
|
+
if (!PyArg_ParseTuple(args, "sssiiiss", &str1, &str2, &str3, &flag1, &N, &flag2, &str4, &str5)) {
|
|
265
265
|
return NULL;
|
|
266
266
|
}
|
|
267
267
|
if (flag1 == 0) {
|
|
@@ -308,6 +308,11 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
|
|
|
308
308
|
strcpy(dist_opt_val1.refpath, str2);
|
|
309
309
|
strcpy(dist_opt_val1.outdir, str3);
|
|
310
310
|
dist_opt_val1.num_remaining_args = 0;
|
|
311
|
+
if (strcmp(str5, "abundance") == 0) {
|
|
312
|
+
dist_opt_val1.abundance = true;
|
|
313
|
+
} else {
|
|
314
|
+
dist_opt_val1.abundance = false;
|
|
315
|
+
}
|
|
311
316
|
dist_opt_val1.remaining_args = NULL;
|
|
312
317
|
#ifdef _OPENMP
|
|
313
318
|
if(dist_opt_val1.p == 0)
|
|
@@ -366,6 +371,11 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
|
|
|
366
371
|
strcpy(dist_opt_val2.outdir, str3);
|
|
367
372
|
dist_opt_val2.num_remaining_args = 1;
|
|
368
373
|
dist_opt_val2.remaining_args = &str2;
|
|
374
|
+
if (strcmp(str5, "abundance") == 0) {
|
|
375
|
+
dist_opt_val2.abundance = true;
|
|
376
|
+
} else {
|
|
377
|
+
dist_opt_val2.abundance = false;
|
|
378
|
+
}
|
|
369
379
|
#ifdef _OPENMP
|
|
370
380
|
if(dist_opt_val2.p == 0)
|
|
371
381
|
dist_opt_val2.p = omp_get_num_procs();
|
|
@@ -424,6 +434,11 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
|
|
|
424
434
|
} else {
|
|
425
435
|
dist_opt_val3.metric = 1;
|
|
426
436
|
}
|
|
437
|
+
if (strcmp(str5, "abundance") == 0) {
|
|
438
|
+
dist_opt_val3.abundance = true;
|
|
439
|
+
} else {
|
|
440
|
+
dist_opt_val3.abundance = false;
|
|
441
|
+
}
|
|
427
442
|
#ifdef _OPENMP
|
|
428
443
|
if(dist_opt_val3.p == 0)
|
|
429
444
|
dist_opt_val3.p = omp_get_num_procs();
|
|
@@ -490,6 +505,11 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
|
|
|
490
505
|
dist_opt_val4.num_remaining_args = 2;
|
|
491
506
|
dist_opt_val4.remaining_args[0] = str2;
|
|
492
507
|
dist_opt_val4.remaining_args[1] = str3;
|
|
508
|
+
if (strcmp(str5, "abundance") == 0) {
|
|
509
|
+
dist_opt_val4.abundance = true;
|
|
510
|
+
} else {
|
|
511
|
+
dist_opt_val4.abundance = false;
|
|
512
|
+
}
|
|
493
513
|
#ifdef _OPENMP
|
|
494
514
|
if(dist_opt_val4.p == 0)
|
|
495
515
|
dist_opt_val4.p = omp_get_num_procs();
|
|
@@ -515,7 +535,6 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
|
|
|
515
535
|
return Py_BuildValue("i", 1);
|
|
516
536
|
}
|
|
517
537
|
|
|
518
|
-
|
|
519
538
|
static PyObject *py_sketch_union(PyObject *self, PyObject *args) {
|
|
520
539
|
char *i;
|
|
521
540
|
char *o;
|
|
@@ -87,7 +87,7 @@ require_pakages = [
|
|
|
87
87
|
|
|
88
88
|
setup(
|
|
89
89
|
name='kssdtree',
|
|
90
|
-
version='2.0.
|
|
90
|
+
version='2.0.6',
|
|
91
91
|
author='Hang Yang',
|
|
92
92
|
author_email='yhlink1207@gmail.com',
|
|
93
93
|
description="Kssdtree is a versatile Python package for phylogenetic analysis. It also provides one-stop tree construction and visualization. It can handle DNA sequences of both fasta or fastq format, whether gzipped or not. ",
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import random
|
|
2
2
|
import operator
|
|
3
3
|
import os
|
|
4
|
+
import re
|
|
5
|
+
|
|
4
6
|
import requests
|
|
5
7
|
import json
|
|
6
8
|
import zipfile
|
|
@@ -91,6 +93,62 @@ def upload_request(qry_sketch, method, N):
|
|
|
91
93
|
else:
|
|
92
94
|
return None, None
|
|
93
95
|
|
|
96
|
+
|
|
97
|
+
def rename_genome(name):
|
|
98
|
+
if '.fq' in name:
|
|
99
|
+
if '/' in name:
|
|
100
|
+
qry = name.split('/')[-1].split('.fq')[0]
|
|
101
|
+
else:
|
|
102
|
+
qry = name.split('.fq')[0]
|
|
103
|
+
elif '.fastq' in name:
|
|
104
|
+
if '/' in name:
|
|
105
|
+
qry = name.split('/')[-1].split('.fastq')[0]
|
|
106
|
+
else:
|
|
107
|
+
qry = name.split('.fastq')[0]
|
|
108
|
+
elif '.fq.gz' in name:
|
|
109
|
+
if '/' in name:
|
|
110
|
+
qry = name.split('/')[-1].split('.fq.gz')[0]
|
|
111
|
+
else:
|
|
112
|
+
qry = name.split('.fq.gz')[0]
|
|
113
|
+
elif '.fastq.gz' in name:
|
|
114
|
+
if '/' in name:
|
|
115
|
+
qry = name.split('/')[-1].split('.fastq.gz')[0]
|
|
116
|
+
else:
|
|
117
|
+
qry = name.split('.fastq.gz')[0]
|
|
118
|
+
elif '.fa' in name:
|
|
119
|
+
if '/' in name:
|
|
120
|
+
qry = name.split('/')[-1].split('.fa')[0]
|
|
121
|
+
else:
|
|
122
|
+
qry = name.split('.fa')[0]
|
|
123
|
+
elif '.fna' in name:
|
|
124
|
+
if '/' in name:
|
|
125
|
+
qry = name.split('/')[-1].split('.fna')[0]
|
|
126
|
+
else:
|
|
127
|
+
qry = name.split('.fna')[0]
|
|
128
|
+
elif '.fasta' in name:
|
|
129
|
+
if '/' in name:
|
|
130
|
+
qry = name.split('/')[-1].split('.fasta')[0]
|
|
131
|
+
else:
|
|
132
|
+
qry = name.split('.fasta')[0]
|
|
133
|
+
elif '.fa.gz' in name:
|
|
134
|
+
if '/' in name:
|
|
135
|
+
qry = name.split('/')[-1].split('.fa.gz')[0]
|
|
136
|
+
else:
|
|
137
|
+
qry = name.split('.fa.gz')[0]
|
|
138
|
+
elif '.fna.gz' in name:
|
|
139
|
+
if '/' in name:
|
|
140
|
+
qry = name.split('/')[-1].split('.fna.gz')[0]
|
|
141
|
+
else:
|
|
142
|
+
qry = name.split('.fna.gz')[0]
|
|
143
|
+
elif '.fasta.gz' in name:
|
|
144
|
+
if '/' in name:
|
|
145
|
+
qry = name.split('/')[-1].split('.fasta.gz')[0]
|
|
146
|
+
else:
|
|
147
|
+
qry = name.split('.fasta.gz')[0]
|
|
148
|
+
else:
|
|
149
|
+
qry = name
|
|
150
|
+
return qry
|
|
151
|
+
|
|
94
152
|
def view_tree(newick, taxonomy, mode):
|
|
95
153
|
if taxonomy is None:
|
|
96
154
|
from ete3 import PhyloTree, TreeStyle, NodeStyle, TextFace
|
|
@@ -118,6 +176,7 @@ def view_tree(newick, taxonomy, mode):
|
|
|
118
176
|
branch_name_face = TextFace(node.dist, fsize=8, fgcolor='black', tight_text=False,
|
|
119
177
|
bold=False)
|
|
120
178
|
node.add_face(branch_name_face, column=0, position='branch-top')
|
|
179
|
+
print('Tree visualization finished!')
|
|
121
180
|
t.show(tree_style=ts)
|
|
122
181
|
else:
|
|
123
182
|
mode = 'r'
|
|
@@ -147,35 +206,43 @@ def view_tree(newick, taxonomy, mode):
|
|
|
147
206
|
current_directory = os.getcwd()
|
|
148
207
|
taxonomy_path = os.path.join(current_directory, taxonomy)
|
|
149
208
|
if not os.path.exists(taxonomy_path):
|
|
150
|
-
print('
|
|
151
|
-
return
|
|
209
|
+
print('The file taxonomy txt does not exist!')
|
|
152
210
|
|
|
153
211
|
all_accessions = []
|
|
154
212
|
accession_taxonomy = {}
|
|
155
213
|
with open(taxonomy_path, 'r') as file:
|
|
156
214
|
for line in file:
|
|
157
215
|
columns = line.split()
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
216
|
+
if len(columns) == 2:
|
|
217
|
+
column_1 = columns[0]
|
|
218
|
+
column_2 = columns[1:]
|
|
219
|
+
tempfile = ''
|
|
220
|
+
for x in column_2:
|
|
221
|
+
tempfile = tempfile + x + ' '
|
|
222
|
+
tempfile = tempfile[:-1]
|
|
223
|
+
if column_1 in all_accessions:
|
|
224
|
+
print('Tree visualization failed!')
|
|
225
|
+
print('Tip: There are same genome names between the query genome and the reference genome.')
|
|
226
|
+
return False
|
|
227
|
+
all_accessions.append(column_1)
|
|
228
|
+
accession_taxonomy[column_1] = tempfile
|
|
167
229
|
known_species = []
|
|
168
230
|
for x in all_accessions:
|
|
169
231
|
if accession_taxonomy[x] != 'Unknown':
|
|
170
232
|
known_species.append(accession_taxonomy[x])
|
|
171
|
-
|
|
172
233
|
temp_nwk = 'temp_kssdtree.newick'
|
|
173
234
|
|
|
174
235
|
with open(newick, 'r') as f:
|
|
175
236
|
lines = f.readlines()[0]
|
|
176
237
|
for x in all_accessions:
|
|
238
|
+
# print(x)
|
|
239
|
+
pattern = r'\b{}\b'.format(re.escape(x))
|
|
240
|
+
# print(pattern)
|
|
241
|
+
match = re.search(pattern, lines)
|
|
242
|
+
if match:
|
|
243
|
+
x_index = match.start()
|
|
177
244
|
x_len = len(x)
|
|
178
|
-
|
|
245
|
+
# print(x_index)
|
|
179
246
|
loc_index = x_index + x_len + 8
|
|
180
247
|
if x in accession_taxonomy.keys():
|
|
181
248
|
lines = str_insert(lines, loc_index, '[&&NHX:species=' + accession_taxonomy.get(x) + ']')
|
|
@@ -234,6 +301,7 @@ def view_tree(newick, taxonomy, mode):
|
|
|
234
301
|
node.img_style = nst
|
|
235
302
|
node.set_style(nst)
|
|
236
303
|
# t.render("bubble_map.png", w=600, dpi=300, tree_style=ts)
|
|
304
|
+
print('Tree visualization finished!')
|
|
237
305
|
t.show(tree_style=ts)
|
|
238
306
|
|
|
239
307
|
def deal_gtdb_txt(temp_dist_output):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|