kssdtree 2.0.4__tar.gz → 2.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {kssdtree-2.0.4/kssdtree.egg-info → kssdtree-2.0.6}/PKG-INFO +3 -6
  2. {kssdtree-2.0.4 → kssdtree-2.0.6/kssdtree.egg-info}/PKG-INFO +3 -6
  3. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.py +96 -12
  4. {kssdtree-2.0.4 → kssdtree-2.0.6}/pykssd.c +22 -3
  5. {kssdtree-2.0.4 → kssdtree-2.0.6}/setup.py +1 -1
  6. {kssdtree-2.0.4 → kssdtree-2.0.6}/toolutils.py +81 -13
  7. {kssdtree-2.0.4 → kssdtree-2.0.6}/MANIFEST.in +0 -0
  8. {kssdtree-2.0.4 → kssdtree-2.0.6}/README.md +0 -0
  9. {kssdtree-2.0.4 → kssdtree-2.0.6}/align.c +0 -0
  10. {kssdtree-2.0.4 → kssdtree-2.0.6}/buildtree.c +0 -0
  11. {kssdtree-2.0.4 → kssdtree-2.0.6}/bytescale.c +0 -0
  12. {kssdtree-2.0.4 → kssdtree-2.0.6}/cluster.c +0 -0
  13. {kssdtree-2.0.4 → kssdtree-2.0.6}/co2mco.c +0 -0
  14. {kssdtree-2.0.4 → kssdtree-2.0.6}/command_composite.c +0 -0
  15. {kssdtree-2.0.4 → kssdtree-2.0.6}/command_dist.c +0 -0
  16. {kssdtree-2.0.4 → kssdtree-2.0.6}/command_dist_wrapper.c +0 -0
  17. {kssdtree-2.0.4 → kssdtree-2.0.6}/command_set.c +0 -0
  18. {kssdtree-2.0.4 → kssdtree-2.0.6}/command_shuffle.c +0 -0
  19. {kssdtree-2.0.4 → kssdtree-2.0.6}/distancemat.c +0 -0
  20. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnj.c +0 -0
  21. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/bytescale.h +0 -0
  22. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/dnj.h +0 -0
  23. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/filebuff.h +0 -0
  24. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/hclust.h +0 -0
  25. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/matrix.h +0 -0
  26. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/mman.h +0 -0
  27. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/nj.h +0 -0
  28. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/nwck.h +0 -0
  29. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/pherror.h +0 -0
  30. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/phy.h +0 -0
  31. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/qseqs.h +0 -0
  32. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/str.h +0 -0
  33. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/threader.h +0 -0
  34. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/tmp.h +0 -0
  35. {kssdtree-2.0.4 → kssdtree-2.0.6}/dnjheaders/vector.h +0 -0
  36. {kssdtree-2.0.4 → kssdtree-2.0.6}/filebuff.c +0 -0
  37. {kssdtree-2.0.4 → kssdtree-2.0.6}/global_basic.c +0 -0
  38. {kssdtree-2.0.4 → kssdtree-2.0.6}/hclust.c +0 -0
  39. {kssdtree-2.0.4 → kssdtree-2.0.6}/iseq2comem.c +0 -0
  40. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/co2mco.h +0 -0
  41. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/command_composite.h +0 -0
  42. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/command_dist.h +0 -0
  43. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/command_dist_wrapper.h +0 -0
  44. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/command_set.h +0 -0
  45. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/command_shuffle.h +0 -0
  46. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/global_basic.h +0 -0
  47. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/iseq2comem.h +0 -0
  48. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/mman.h +0 -0
  49. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdheaders/mytime.h +0 -0
  50. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.egg-info/SOURCES.txt +0 -0
  51. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.egg-info/dependency_links.txt +0 -0
  52. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.egg-info/not-zip-safe +0 -0
  53. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.egg-info/requires.txt +0 -0
  54. {kssdtree-2.0.4 → kssdtree-2.0.6}/kssdtree.egg-info/top_level.txt +0 -0
  55. {kssdtree-2.0.4 → kssdtree-2.0.6}/matrix.c +0 -0
  56. {kssdtree-2.0.4 → kssdtree-2.0.6}/mman.c +0 -0
  57. {kssdtree-2.0.4 → kssdtree-2.0.6}/mytime.c +0 -0
  58. {kssdtree-2.0.4 → kssdtree-2.0.6}/nj.c +0 -0
  59. {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/align.h +0 -0
  60. {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/buildtree.h +0 -0
  61. {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/cluster.h +0 -0
  62. {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/distancemat.h +0 -0
  63. {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/sequence.h +0 -0
  64. {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/tree.h +0 -0
  65. {kssdtree-2.0.4 → kssdtree-2.0.6}/njheaders/util.h +0 -0
  66. {kssdtree-2.0.4 → kssdtree-2.0.6}/nwck.c +0 -0
  67. {kssdtree-2.0.4 → kssdtree-2.0.6}/pherror.c +0 -0
  68. {kssdtree-2.0.4 → kssdtree-2.0.6}/phy.c +0 -0
  69. {kssdtree-2.0.4 → kssdtree-2.0.6}/pydnj.c +0 -0
  70. {kssdtree-2.0.4 → kssdtree-2.0.6}/pynj.c +0 -0
  71. {kssdtree-2.0.4 → kssdtree-2.0.6}/qseqs.c +0 -0
  72. {kssdtree-2.0.4 → kssdtree-2.0.6}/sequence.c +0 -0
  73. {kssdtree-2.0.4 → kssdtree-2.0.6}/setup.cfg +0 -0
  74. {kssdtree-2.0.4 → kssdtree-2.0.6}/str.c +0 -0
  75. {kssdtree-2.0.4 → kssdtree-2.0.6}/tmp.c +0 -0
  76. {kssdtree-2.0.4 → kssdtree-2.0.6}/tree.c +0 -0
  77. {kssdtree-2.0.4 → kssdtree-2.0.6}/util.c +0 -0
  78. {kssdtree-2.0.4 → kssdtree-2.0.6}/vector.c +0 -0
@@ -1,11 +1,8 @@
1
- Metadata-Version: 1.1
1
+ Metadata-Version: 2.1
2
2
  Name: kssdtree
3
- Version: 2.0.4
3
+ Version: 2.0.6
4
4
  Summary: Kssdtree is a versatile Python package for phylogenetic analysis. It also provides one-stop tree construction and visualization. It can handle DNA sequences of both fasta or fastq format, whether gzipped or not.
5
5
  Home-page: https://github.com/yhlink/kssdtree
6
+ Download-URL: https://pypi.org/project/kssdtree
6
7
  Author: Hang Yang
7
8
  Author-email: yhlink1207@gmail.com
8
- License: UNKNOWN
9
- Download-URL: https://pypi.org/project/kssdtree
10
- Description: UNKNOWN
11
- Platform: UNKNOWN
@@ -1,11 +1,8 @@
1
- Metadata-Version: 1.1
1
+ Metadata-Version: 2.1
2
2
  Name: kssdtree
3
- Version: 2.0.4
3
+ Version: 2.0.6
4
4
  Summary: Kssdtree is a versatile Python package for phylogenetic analysis. It also provides one-stop tree construction and visualization. It can handle DNA sequences of both fasta or fastq format, whether gzipped or not.
5
5
  Home-page: https://github.com/yhlink/kssdtree
6
+ Download-URL: https://pypi.org/project/kssdtree
6
7
  Author: Hang Yang
7
8
  Author-email: yhlink1207@gmail.com
8
- License: UNKNOWN
9
- Download-URL: https://pypi.org/project/kssdtree
10
- Description: UNKNOWN
11
- Platform: UNKNOWN
@@ -28,6 +28,7 @@ def sketch(shuf_file=None, genome_files=None, output=None, set_opt=None):
28
28
  if not os.path.exists(shuf_file):
29
29
  if shuf_file in ['L3K9.shuf', './L3K9.shuf', 'L3K10.shuf', './L3K10.shuf']:
30
30
  print('Downloading...', shuf_file)
31
+ start_time = time.time()
31
32
  if shuf_file == 'L3K9.shuf' or shuf_file == './L3K9.shuf':
32
33
  url = 'https://zenodo.org/records/12699159/files/L3K9.shuf?download=1'
33
34
  else:
@@ -59,9 +60,9 @@ def sketch(shuf_file=None, genome_files=None, output=None, set_opt=None):
59
60
  print('Sketching...')
60
61
  start = time.time()
61
62
  if set_opt:
62
- kssd.dist_dispatch(shuf_file, genome_files, output, 1, 0, 0, '')
63
+ kssd.dist_dispatch(shuf_file, genome_files, output, 1, 0, 0, '', '')
63
64
  else:
64
- kssd.dist_dispatch(shuf_file, genome_files, output, 0, 0, 0, '')
65
+ kssd.dist_dispatch(shuf_file, genome_files, output, 0, 0, 0, '', '')
65
66
  end = time.time()
66
67
  print('Sketch spend time:%.2fs' % (end - start))
67
68
  print('Sketch finished!')
@@ -71,19 +72,16 @@ def sketch(shuf_file=None, genome_files=None, output=None, set_opt=None):
71
72
  return False
72
73
 
73
74
 
74
- def dist(genome_sketch=None, output=None, metric = None, flag=None):
75
+ def dist(genome_sketch=None, output=None, metric=None, flag=None):
75
76
  if genome_sketch is not None and output is not None:
76
77
  if not os.path.exists(genome_sketch):
77
78
  print('No such file or directory: ', genome_sketch)
78
79
  return False
79
- # if not os.path.exists(qry_sketch):
80
- # print('No such file or directory: ', qry_sketch)
81
- # return False
82
80
  if flag is None:
83
81
  flag = 0
84
82
  if metric is None:
85
83
  metric = 'mash'
86
-
84
+
87
85
  print('Disting...')
88
86
  start = time.time()
89
87
  if '/' in output:
@@ -99,7 +97,7 @@ def dist(genome_sketch=None, output=None, metric = None, flag=None):
99
97
  print('Metric type error, only supports mash or aaf distance')
100
98
  return False
101
99
  else:
102
- kssd.dist_dispatch(genome_sketch, output, genome_sketch, 2, 0, flag, metric)
100
+ kssd.dist_dispatch(genome_sketch, output, genome_sketch, 2, 0, flag, metric, '')
103
101
  end = time.time()
104
102
  print('Dist spend time:%.2fs' % (end - start))
105
103
  print('Dist finished!')
@@ -112,6 +110,27 @@ def dist(genome_sketch=None, output=None, metric = None, flag=None):
112
110
  return False
113
111
 
114
112
 
113
+ def combine(genome_sketch1=None, genome_sketch2=None, output=None):
114
+ if genome_sketch1 is not None and genome_sketch2 is not None and output is not None:
115
+ if not os.path.exists(genome_sketch1):
116
+ print('No such file or directory: ', genome_sketch1)
117
+ return False
118
+ if not os.path.exists(genome_sketch2):
119
+ print('No such file or directory: ', genome_sketch2)
120
+ return False
121
+ kssd.dist_dispatch(output, genome_sketch1, genome_sketch2, 3, 0, 0, '', '')
122
+ return True
123
+
124
+
125
+ def getlist(genome_sketch=None, output=None):
126
+ if genome_sketch is not None and output is not None:
127
+ if not os.path.exists(genome_sketch):
128
+ print('No such file or directory: ', genome_sketch)
129
+ return False
130
+ kssd.print_gnames(genome_sketch, output)
131
+ return True
132
+
133
+
115
134
  def retrieve(database=None, genome_sketch=None, output=None, N=None, method=None):
116
135
  if database is not None and genome_sketch is not None and output is not None:
117
136
  if method is None:
@@ -145,7 +164,7 @@ def retrieve(database=None, genome_sketch=None, output=None, N=None, method=None
145
164
  print('Retrieve finished!')
146
165
  return True
147
166
  else:
148
- print("database only support 'gtdbr214'")
167
+ print('Only support gtdbr214 database!!!')
149
168
  return False
150
169
  else:
151
170
  print('Args error!!!')
@@ -317,7 +336,6 @@ def quick(shuf_file=None, genome_files=None, output=None, reference=None, databa
317
336
  if not s3:
318
337
  return False
319
338
  print('Step4...')
320
- print('Tree visualization finished!')
321
339
  visualize(newick=output, mode=mode)
322
340
  if platform.system() == 'Linux':
323
341
  current_directory = os.getcwd()
@@ -348,7 +366,6 @@ def quick(shuf_file=None, genome_files=None, output=None, reference=None, databa
348
366
  s2 = retrieve(database=database, genome_sketch=qry_sketch, output=output, N=N, method=method)
349
367
  if not s2:
350
368
  return False
351
- print('Tree visualization finished!')
352
369
  visualize(newick=os.path.join(output, 'output.newick'),
353
370
  taxonomy=os.path.join(output, 'output_accession_taxonomy.txt'), mode=None)
354
371
  if platform.system() == 'Linux':
@@ -362,6 +379,74 @@ def quick(shuf_file=None, genome_files=None, output=None, reference=None, databa
362
379
  else:
363
380
  print('Args error, please see https://kssdtree.readthedocs.io/en/latest!!!')
364
381
  return False
382
+ elif reference is None and database != 'gtdbr214':
383
+ if shuf_file is not None and genome_files is not None and output is not None:
384
+ if toolutils.is_positive_integer(N) or toolutils.is_negative_integer(N):
385
+ print("N must = 0 !!!")
386
+ return False
387
+ if not os.path.exists(database):
388
+ print('No such file or directory: ', database)
389
+ return False
390
+ if '/' in output:
391
+ output_dir = os.path.dirname(output)
392
+ output_name = output.split('/')[-1]
393
+ if not os.path.exists(output_dir):
394
+ os.makedirs(output_dir)
395
+ print("Created directory:", output_dir)
396
+ else:
397
+ output_name = output
398
+ if output_name.endswith(".newick"):
399
+ timeStamp = int(time.mktime(time.localtime(time.time())))
400
+ qry_sketch = toolutils.rs() + '_sketch_' + str(timeStamp)
401
+ temp_combine_sketch = toolutils.rs() + '_combine_sketch_' + str(timeStamp)
402
+ temp_phy = toolutils.rs() + '.phy'
403
+ s1 = sketch(shuf_file=shuf_file, genome_files=genome_files, output=qry_sketch, set_opt=True)
404
+ if not s1:
405
+ return False
406
+ print('Step2...')
407
+ combine(genome_sketch1=database, genome_sketch2=qry_sketch, output=temp_combine_sketch)
408
+ if method == 'nj':
409
+ s2 = dist(genome_sketch=temp_combine_sketch, output=temp_phy, flag=0)
410
+ else:
411
+ s2 = dist(genome_sketch=temp_combine_sketch, output=temp_phy, flag=1)
412
+ if not s2:
413
+ return False
414
+ print('Step3...')
415
+ s3 = build(phylip=temp_phy, output=output, method=method)
416
+ if not s3:
417
+ return False
418
+ print('Step4...')
419
+ getlist(genome_sketch=database, output='ref.txt')
420
+ getlist(genome_sketch=qry_sketch, output='qry.txt')
421
+ with open('ref.txt', 'r') as ref_file:
422
+ ref_lines = ref_file.readlines()
423
+ with open('qry.txt', 'r') as qry_file:
424
+ qry_lines = qry_file.readlines()
425
+ with open('ref_qry.txt', 'w') as result_file:
426
+ for line in ref_lines:
427
+ new_name = toolutils.rename_genome(line.strip())
428
+ result_file.write(new_name + '\tReference\n')
429
+ for line in qry_lines:
430
+ new_name = toolutils.rename_genome(line.strip())
431
+ result_file.write(new_name + '\tUnknown\n')
432
+ os.remove('ref.txt')
433
+ os.remove('qry.txt')
434
+ os.remove(temp_phy)
435
+ visualize(newick=output, taxonomy='ref_qry.txt', mode='r')
436
+ if platform.system() == 'Linux':
437
+ current_directory = os.getcwd()
438
+ temp_dir1 = os.path.join(current_directory, qry_sketch)
439
+ if os.path.exists(temp_dir1):
440
+ shutil.rmtree(temp_dir1)
441
+ temp_dir2 = os.path.join(current_directory, temp_combine_sketch)
442
+ if os.path.exists(temp_dir2):
443
+ shutil.rmtree(temp_dir2)
444
+ else:
445
+ print('Output type error, only supports .newick format:', output_name)
446
+ return False
447
+ else:
448
+ print('Args error, please see https://kssdtree.readthedocs.io/en/latest!!!')
449
+ return False
365
450
  elif reference is not None and database is None:
366
451
  if shuf_file is not None and genome_files is not None and output is not None and method in ['nj', 'dnj']:
367
452
  if toolutils.is_positive_integer(N) or toolutils.is_negative_integer(N):
@@ -419,7 +504,6 @@ def quick(shuf_file=None, genome_files=None, output=None, reference=None, databa
419
504
  if not s6:
420
505
  return False
421
506
  print('Step5...')
422
- print('Tree visualization finished!')
423
507
  visualize(newick=output, mode=mode)
424
508
  if platform.system() == 'Linux':
425
509
  current_directory = os.getcwd()
@@ -252,7 +252,6 @@ static PyObject *py_write_dim_shuffle_file(PyObject *self, PyObject *args) {
252
252
  return Py_BuildValue("i", state);
253
253
  }
254
254
 
255
-
256
255
  static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
257
256
  char *str1;
258
257
  char *str2;
@@ -260,8 +259,9 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
260
259
  int flag1;
261
260
  int flag2;
262
261
  char *str4;
262
+ char *str5;
263
263
  int N;
264
- if (!PyArg_ParseTuple(args, "sssiiis", &str1, &str2, &str3, &flag1, &N, &flag2, &str4)) {
264
+ if (!PyArg_ParseTuple(args, "sssiiiss", &str1, &str2, &str3, &flag1, &N, &flag2, &str4, &str5)) {
265
265
  return NULL;
266
266
  }
267
267
  if (flag1 == 0) {
@@ -308,6 +308,11 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
308
308
  strcpy(dist_opt_val1.refpath, str2);
309
309
  strcpy(dist_opt_val1.outdir, str3);
310
310
  dist_opt_val1.num_remaining_args = 0;
311
+ if (strcmp(str5, "abundance") == 0) {
312
+ dist_opt_val1.abundance = true;
313
+ } else {
314
+ dist_opt_val1.abundance = false;
315
+ }
311
316
  dist_opt_val1.remaining_args = NULL;
312
317
  #ifdef _OPENMP
313
318
  if(dist_opt_val1.p == 0)
@@ -366,6 +371,11 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
366
371
  strcpy(dist_opt_val2.outdir, str3);
367
372
  dist_opt_val2.num_remaining_args = 1;
368
373
  dist_opt_val2.remaining_args = &str2;
374
+ if (strcmp(str5, "abundance") == 0) {
375
+ dist_opt_val2.abundance = true;
376
+ } else {
377
+ dist_opt_val2.abundance = false;
378
+ }
369
379
  #ifdef _OPENMP
370
380
  if(dist_opt_val2.p == 0)
371
381
  dist_opt_val2.p = omp_get_num_procs();
@@ -424,6 +434,11 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
424
434
  } else {
425
435
  dist_opt_val3.metric = 1;
426
436
  }
437
+ if (strcmp(str5, "abundance") == 0) {
438
+ dist_opt_val3.abundance = true;
439
+ } else {
440
+ dist_opt_val3.abundance = false;
441
+ }
427
442
  #ifdef _OPENMP
428
443
  if(dist_opt_val3.p == 0)
429
444
  dist_opt_val3.p = omp_get_num_procs();
@@ -490,6 +505,11 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
490
505
  dist_opt_val4.num_remaining_args = 2;
491
506
  dist_opt_val4.remaining_args[0] = str2;
492
507
  dist_opt_val4.remaining_args[1] = str3;
508
+ if (strcmp(str5, "abundance") == 0) {
509
+ dist_opt_val4.abundance = true;
510
+ } else {
511
+ dist_opt_val4.abundance = false;
512
+ }
493
513
  #ifdef _OPENMP
494
514
  if(dist_opt_val4.p == 0)
495
515
  dist_opt_val4.p = omp_get_num_procs();
@@ -515,7 +535,6 @@ static PyObject *py_dist_dispatch(PyObject *self, PyObject *args) {
515
535
  return Py_BuildValue("i", 1);
516
536
  }
517
537
 
518
-
519
538
  static PyObject *py_sketch_union(PyObject *self, PyObject *args) {
520
539
  char *i;
521
540
  char *o;
@@ -87,7 +87,7 @@ require_pakages = [
87
87
 
88
88
  setup(
89
89
  name='kssdtree',
90
- version='2.0.4',
90
+ version='2.0.6',
91
91
  author='Hang Yang',
92
92
  author_email='yhlink1207@gmail.com',
93
93
  description="Kssdtree is a versatile Python package for phylogenetic analysis. It also provides one-stop tree construction and visualization. It can handle DNA sequences of both fasta or fastq format, whether gzipped or not. ",
@@ -1,6 +1,8 @@
1
1
  import random
2
2
  import operator
3
3
  import os
4
+ import re
5
+
4
6
  import requests
5
7
  import json
6
8
  import zipfile
@@ -91,6 +93,62 @@ def upload_request(qry_sketch, method, N):
91
93
  else:
92
94
  return None, None
93
95
 
96
+
97
+ def rename_genome(name):
98
+ if '.fq' in name:
99
+ if '/' in name:
100
+ qry = name.split('/')[-1].split('.fq')[0]
101
+ else:
102
+ qry = name.split('.fq')[0]
103
+ elif '.fastq' in name:
104
+ if '/' in name:
105
+ qry = name.split('/')[-1].split('.fastq')[0]
106
+ else:
107
+ qry = name.split('.fastq')[0]
108
+ elif '.fq.gz' in name:
109
+ if '/' in name:
110
+ qry = name.split('/')[-1].split('.fq.gz')[0]
111
+ else:
112
+ qry = name.split('.fq.gz')[0]
113
+ elif '.fastq.gz' in name:
114
+ if '/' in name:
115
+ qry = name.split('/')[-1].split('.fastq.gz')[0]
116
+ else:
117
+ qry = name.split('.fastq.gz')[0]
118
+ elif '.fa' in name:
119
+ if '/' in name:
120
+ qry = name.split('/')[-1].split('.fa')[0]
121
+ else:
122
+ qry = name.split('.fa')[0]
123
+ elif '.fna' in name:
124
+ if '/' in name:
125
+ qry = name.split('/')[-1].split('.fna')[0]
126
+ else:
127
+ qry = name.split('.fna')[0]
128
+ elif '.fasta' in name:
129
+ if '/' in name:
130
+ qry = name.split('/')[-1].split('.fasta')[0]
131
+ else:
132
+ qry = name.split('.fasta')[0]
133
+ elif '.fa.gz' in name:
134
+ if '/' in name:
135
+ qry = name.split('/')[-1].split('.fa.gz')[0]
136
+ else:
137
+ qry = name.split('.fa.gz')[0]
138
+ elif '.fna.gz' in name:
139
+ if '/' in name:
140
+ qry = name.split('/')[-1].split('.fna.gz')[0]
141
+ else:
142
+ qry = name.split('.fna.gz')[0]
143
+ elif '.fasta.gz' in name:
144
+ if '/' in name:
145
+ qry = name.split('/')[-1].split('.fasta.gz')[0]
146
+ else:
147
+ qry = name.split('.fasta.gz')[0]
148
+ else:
149
+ qry = name
150
+ return qry
151
+
94
152
  def view_tree(newick, taxonomy, mode):
95
153
  if taxonomy is None:
96
154
  from ete3 import PhyloTree, TreeStyle, NodeStyle, TextFace
@@ -118,6 +176,7 @@ def view_tree(newick, taxonomy, mode):
118
176
  branch_name_face = TextFace(node.dist, fsize=8, fgcolor='black', tight_text=False,
119
177
  bold=False)
120
178
  node.add_face(branch_name_face, column=0, position='branch-top')
179
+ print('Tree visualization finished!')
121
180
  t.show(tree_style=ts)
122
181
  else:
123
182
  mode = 'r'
@@ -147,35 +206,43 @@ def view_tree(newick, taxonomy, mode):
147
206
  current_directory = os.getcwd()
148
207
  taxonomy_path = os.path.join(current_directory, taxonomy)
149
208
  if not os.path.exists(taxonomy_path):
150
- print('"The file taxonomy txt does not exist."')
151
- return
209
+ print('The file taxonomy txt does not exist!')
152
210
 
153
211
  all_accessions = []
154
212
  accession_taxonomy = {}
155
213
  with open(taxonomy_path, 'r') as file:
156
214
  for line in file:
157
215
  columns = line.split()
158
- column_1 = columns[0]
159
- column_2 = columns[1:]
160
- tempfile = ''
161
- for x in column_2:
162
- tempfile = tempfile + x + ' '
163
- tempfile = tempfile[:-1]
164
- all_accessions.append(column_1)
165
- accession_taxonomy[column_1] = tempfile
166
-
216
+ if len(columns) == 2:
217
+ column_1 = columns[0]
218
+ column_2 = columns[1:]
219
+ tempfile = ''
220
+ for x in column_2:
221
+ tempfile = tempfile + x + ' '
222
+ tempfile = tempfile[:-1]
223
+ if column_1 in all_accessions:
224
+ print('Tree visualization failed!')
225
+ print('Tip: There are same genome names between the query genome and the reference genome.')
226
+ return False
227
+ all_accessions.append(column_1)
228
+ accession_taxonomy[column_1] = tempfile
167
229
  known_species = []
168
230
  for x in all_accessions:
169
231
  if accession_taxonomy[x] != 'Unknown':
170
232
  known_species.append(accession_taxonomy[x])
171
-
172
233
  temp_nwk = 'temp_kssdtree.newick'
173
234
 
174
235
  with open(newick, 'r') as f:
175
236
  lines = f.readlines()[0]
176
237
  for x in all_accessions:
238
+ # print(x)
239
+ pattern = r'\b{}\b'.format(re.escape(x))
240
+ # print(pattern)
241
+ match = re.search(pattern, lines)
242
+ if match:
243
+ x_index = match.start()
177
244
  x_len = len(x)
178
- x_index = lines.index(x)
245
+ # print(x_index)
179
246
  loc_index = x_index + x_len + 8
180
247
  if x in accession_taxonomy.keys():
181
248
  lines = str_insert(lines, loc_index, '[&&NHX:species=' + accession_taxonomy.get(x) + ']')
@@ -234,6 +301,7 @@ def view_tree(newick, taxonomy, mode):
234
301
  node.img_style = nst
235
302
  node.set_style(nst)
236
303
  # t.render("bubble_map.png", w=600, dpi=300, tree_style=ts)
304
+ print('Tree visualization finished!')
237
305
  t.show(tree_style=ts)
238
306
 
239
307
  def deal_gtdb_txt(temp_dist_output):
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes