kssdtree 1.1.1__tar.gz → 1.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kssdtree-1.1.1/kssdtree.egg-info → kssdtree-1.1.2}/PKG-INFO +1 -1
- {kssdtree-1.1.1 → kssdtree-1.1.2/kssdtree.egg-info}/PKG-INFO +1 -1
- kssdtree-1.1.2/kssdtree.py +402 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/setup.py +1 -1
- {kssdtree-1.1.1 → kssdtree-1.1.2}/toolutils.py +12 -6
- kssdtree-1.1.1/kssdtree.py +0 -390
- {kssdtree-1.1.1 → kssdtree-1.1.2}/MANIFEST.in +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/README.md +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/align.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/buildtree.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/bytescale.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/cluster.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/co2mco.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/command_composite.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/command_dist.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/command_dist_wrapper.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/command_set.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/command_shuffle.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/distancemat.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnj.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/bytescale.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/dnj.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/filebuff.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/hclust.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/matrix.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/mman.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/nj.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/nwck.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/pherror.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/phy.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/qseqs.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/str.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/threader.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/tmp.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/dnjheaders/vector.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/filebuff.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/global_basic.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/hclust.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/iseq2comem.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdheaders/co2mco.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdheaders/command_composite.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdheaders/command_dist.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdheaders/command_dist_wrapper.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdheaders/command_set.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdheaders/command_shuffle.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdheaders/global_basic.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdheaders/iseq2comem.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdheaders/mman.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdheaders/mytime.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdtree.egg-info/SOURCES.txt +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdtree.egg-info/dependency_links.txt +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdtree.egg-info/not-zip-safe +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdtree.egg-info/requires.txt +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/kssdtree.egg-info/top_level.txt +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/matrix.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/mman.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/mytime.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/nj.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/njheaders/align.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/njheaders/buildtree.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/njheaders/cluster.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/njheaders/distancemat.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/njheaders/sequence.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/njheaders/tree.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/njheaders/util.h +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/nwck.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/pherror.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/phy.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/pydnj.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/pykssd.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/pynj.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/qseqs.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/sequence.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/setup.cfg +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/str.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/tmp.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/tree.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/util.c +0 -0
- {kssdtree-1.1.1 → kssdtree-1.1.2}/vector.c +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 1.1
|
|
2
2
|
Name: kssdtree
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.2
|
|
4
4
|
Summary: Kssdtree is a versatile Python package for phylogenetic analysis. It also provides one-stop tree construction and visualization. It can handle DNA sequences of both fasta or fastq format, whether gzipped or not.
|
|
5
5
|
Home-page: https://github.com/yhlink/kssdtree
|
|
6
6
|
Author: Hang Yang
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 1.1
|
|
2
2
|
Name: kssdtree
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.2
|
|
4
4
|
Summary: Kssdtree is a versatile Python package for phylogenetic analysis. It also provides one-stop tree construction and visualization. It can handle DNA sequences of both fasta or fastq format, whether gzipped or not.
|
|
5
5
|
Home-page: https://github.com/yhlink/kssdtree
|
|
6
6
|
Author: Hang Yang
|
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
import kssd
|
|
2
|
+
import nj
|
|
3
|
+
import dnj
|
|
4
|
+
import toolutils
|
|
5
|
+
import os
|
|
6
|
+
import platform
|
|
7
|
+
import shutil
|
|
8
|
+
import time
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def shuffle(k=None, s=None, l=None, o=None):
|
|
13
|
+
kssd.write_dim_shuffle_file(k, s, l, o)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def sketch(shuf_file=None, genomes_file=None, output=None, set_opt=None):
|
|
17
|
+
if shuf_file is not None and genomes_file is not None and output is not None:
|
|
18
|
+
if not os.path.exists(genomes_file):
|
|
19
|
+
print('No such file or directory: ', genomes_file)
|
|
20
|
+
return False
|
|
21
|
+
if set_opt is None:
|
|
22
|
+
set_opt = False
|
|
23
|
+
if not toolutils.allowed_file(genomes_file):
|
|
24
|
+
for filename in os.listdir(genomes_file):
|
|
25
|
+
if not toolutils.allowed_file(filename):
|
|
26
|
+
print('Genome format error for file:', filename)
|
|
27
|
+
return False
|
|
28
|
+
if not os.path.exists(shuf_file):
|
|
29
|
+
if shuf_file in ['L3K9.shuf']:
|
|
30
|
+
print('Downloading...', shuf_file)
|
|
31
|
+
import http.client
|
|
32
|
+
http.client.HTTPConnection._http_vsn = 10
|
|
33
|
+
http.client.HTTPConnection._http_vsn_str = 'HTTP/1.0'
|
|
34
|
+
url = 'http://www.metakssdcoabundance.link/kssdtree/shuffle/' + shuf_file
|
|
35
|
+
start_time = time.time()
|
|
36
|
+
response = requests.get(url, stream=True)
|
|
37
|
+
with open(shuf_file, 'wb') as file:
|
|
38
|
+
for chunk in response.iter_content(chunk_size=1024):
|
|
39
|
+
if chunk:
|
|
40
|
+
file.write(chunk)
|
|
41
|
+
end_time = time.time()
|
|
42
|
+
if end_time - start_time > 120:
|
|
43
|
+
print(
|
|
44
|
+
"Network timeout, please manually download from github (https://github.com/yhlink/kssdtree/tree/master/shuffle_file)")
|
|
45
|
+
return False
|
|
46
|
+
print('Download finished: ', shuf_file)
|
|
47
|
+
elif shuf_file in ['L2K8.shuf', 'L3K10.shuf', 'L2K9.shuf', 'L3K11.shuf']:
|
|
48
|
+
print('Shuffling...', shuf_file)
|
|
49
|
+
file_name = shuf_file.split('.')[0]
|
|
50
|
+
k = int(file_name[3:])
|
|
51
|
+
if k == 11 or k == 10:
|
|
52
|
+
s = 6
|
|
53
|
+
else:
|
|
54
|
+
s = 5
|
|
55
|
+
l = int(file_name[1])
|
|
56
|
+
shuffle(k, s, l, file_name)
|
|
57
|
+
print('Shuffle finished: ', shuf_file)
|
|
58
|
+
else:
|
|
59
|
+
print('No such file or directory: ', shuf_file)
|
|
60
|
+
return False
|
|
61
|
+
print('Sketching...')
|
|
62
|
+
start = time.time()
|
|
63
|
+
if set_opt:
|
|
64
|
+
kssd.dist_dispatch(shuf_file, genomes_file, output, 1, 0, 0)
|
|
65
|
+
else:
|
|
66
|
+
kssd.dist_dispatch(shuf_file, genomes_file, output, 0, 0, 0)
|
|
67
|
+
end = time.time()
|
|
68
|
+
print('Sketch spend time:%.2fs' % (end - start))
|
|
69
|
+
print('Sketch finished!')
|
|
70
|
+
return True
|
|
71
|
+
else:
|
|
72
|
+
print('Args error!!!')
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def dist(ref_sketch=None, qry_sketch=None, output=None, flag=None):
|
|
77
|
+
if ref_sketch is not None and qry_sketch is not None and output is not None:
|
|
78
|
+
if not os.path.exists(ref_sketch):
|
|
79
|
+
print('No such file or directory: ', ref_sketch)
|
|
80
|
+
return False
|
|
81
|
+
if not os.path.exists(qry_sketch):
|
|
82
|
+
print('No such file or directory: ', qry_sketch)
|
|
83
|
+
return False
|
|
84
|
+
if flag is None:
|
|
85
|
+
flag = 0
|
|
86
|
+
print('Disting...')
|
|
87
|
+
start = time.time()
|
|
88
|
+
if '/' in output:
|
|
89
|
+
output_dir = os.path.dirname(output)
|
|
90
|
+
output_name = output.split('/')[-1]
|
|
91
|
+
if not os.path.exists(output_dir):
|
|
92
|
+
os.makedirs(output_dir)
|
|
93
|
+
print("Created directory:", output_dir)
|
|
94
|
+
else:
|
|
95
|
+
output_name = output
|
|
96
|
+
if output_name.endswith(".phy") or output_name.endswith(".phylip"):
|
|
97
|
+
kssd.dist_dispatch(ref_sketch, output, qry_sketch, 2, 0, flag)
|
|
98
|
+
end = time.time()
|
|
99
|
+
print('Dist spend time:%.2fs' % (end - start))
|
|
100
|
+
print('Dist finished!')
|
|
101
|
+
return True
|
|
102
|
+
else:
|
|
103
|
+
print('Output type error, only supports .phylip (.phy) format:', output_name)
|
|
104
|
+
return False
|
|
105
|
+
else:
|
|
106
|
+
print('Args error!!!')
|
|
107
|
+
return False
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def retrieve(ref_sketch=None, qry_sketch=None, output=None, N=None, method=None):
|
|
111
|
+
if ref_sketch is not None and qry_sketch is not None and output is not None:
|
|
112
|
+
if method is None:
|
|
113
|
+
method = 'nj'
|
|
114
|
+
if method not in ['nj', 'dnj']:
|
|
115
|
+
print('Only support nj and dnj methods!!!')
|
|
116
|
+
return
|
|
117
|
+
if not os.path.exists(qry_sketch):
|
|
118
|
+
print('No such file or directory: ', qry_sketch)
|
|
119
|
+
return False
|
|
120
|
+
if ref_sketch == 'gtdbr214_sketch':
|
|
121
|
+
print('Retrieving...')
|
|
122
|
+
start = time.time()
|
|
123
|
+
if not os.path.exists(output):
|
|
124
|
+
os.makedirs(output)
|
|
125
|
+
print("Created directory:", output)
|
|
126
|
+
newick, accession_taxonomy = toolutils.upload_request(qry_sketch=qry_sketch, method=method, N=N)
|
|
127
|
+
with open(os.path.join(output, 'output.newick'), 'w') as f:
|
|
128
|
+
f.write(newick)
|
|
129
|
+
with open(os.path.join(output, 'output_accession_taxonomy.txt'), 'w') as f:
|
|
130
|
+
for key, value in accession_taxonomy.items():
|
|
131
|
+
f.write("%s %s\n" % (key, value))
|
|
132
|
+
end = time.time()
|
|
133
|
+
print('Retrieve spend time:%.2fs' % (end - start))
|
|
134
|
+
print('Retrieve finished!')
|
|
135
|
+
return True
|
|
136
|
+
else:
|
|
137
|
+
print("ref_sketch must be set to 'gtdbr214_sketc'")
|
|
138
|
+
return False
|
|
139
|
+
else:
|
|
140
|
+
print('Args error!!!')
|
|
141
|
+
return False
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def build(phylip=None, output=None, method=None):
|
|
145
|
+
if phylip is not None:
|
|
146
|
+
if not os.path.exists(phylip):
|
|
147
|
+
print('No such file or directory: ', phylip)
|
|
148
|
+
return False
|
|
149
|
+
if method is None:
|
|
150
|
+
method = 'nj'
|
|
151
|
+
if method not in ['nj', 'dnj']:
|
|
152
|
+
print('Only support nj and dnj methods!!!')
|
|
153
|
+
return False
|
|
154
|
+
print('Building...')
|
|
155
|
+
if '/' in output:
|
|
156
|
+
output_dir = os.path.dirname(output)
|
|
157
|
+
output_name = output.split('/')[-1]
|
|
158
|
+
if not os.path.exists(output_dir):
|
|
159
|
+
os.makedirs(output_dir)
|
|
160
|
+
print("Created directory:", output_dir)
|
|
161
|
+
else:
|
|
162
|
+
output_name = output
|
|
163
|
+
if output_name.endswith(".nwk") or output_name.endswith(".newick"):
|
|
164
|
+
start = time.time()
|
|
165
|
+
if method == 'nj':
|
|
166
|
+
state = nj.build(phylip, output)
|
|
167
|
+
else:
|
|
168
|
+
if platform.system() == 'Linux':
|
|
169
|
+
state = dnj.build(phylip, output, method)
|
|
170
|
+
else:
|
|
171
|
+
state = nj.build(phylip, output)
|
|
172
|
+
if state == 1:
|
|
173
|
+
with open(output, 'r') as f:
|
|
174
|
+
lines = f.readlines()
|
|
175
|
+
newick = ''.join(lines)
|
|
176
|
+
newick = newick.replace('\n', '')
|
|
177
|
+
with open(output, 'w') as f:
|
|
178
|
+
f.write(newick)
|
|
179
|
+
end = time.time()
|
|
180
|
+
print('Build spend time:%.2fs' % (end - start))
|
|
181
|
+
print('Build finished!')
|
|
182
|
+
return True
|
|
183
|
+
else:
|
|
184
|
+
print('Args error!!!')
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def visualize(newick=None, taxonomy=None, mode=None):
|
|
189
|
+
if newick is not None:
|
|
190
|
+
if not os.path.exists(newick):
|
|
191
|
+
print('No such file or directory: ', newick)
|
|
192
|
+
return False
|
|
193
|
+
if mode is None:
|
|
194
|
+
mode = 'r'
|
|
195
|
+
toolutils.view_tree(newick, taxonomy, mode=mode)
|
|
196
|
+
else:
|
|
197
|
+
print('Args error!!!')
|
|
198
|
+
return False
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def union(ref_sketch=None, output=None):
|
|
202
|
+
if ref_sketch is not None and output is not None:
|
|
203
|
+
if not os.path.exists(ref_sketch):
|
|
204
|
+
print('No such file or directory: ', ref_sketch)
|
|
205
|
+
return False
|
|
206
|
+
kssd.sketch_union(ref_sketch, output)
|
|
207
|
+
return True
|
|
208
|
+
else:
|
|
209
|
+
return False
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def subtract(ref_sketch=None, genomes_sketch=None, output=None, flag=None):
|
|
213
|
+
if ref_sketch is not None and genomes_sketch is not None and output is not None:
|
|
214
|
+
if not os.path.exists(ref_sketch):
|
|
215
|
+
print('No such file or directory: ', ref_sketch)
|
|
216
|
+
return False
|
|
217
|
+
if not os.path.exists(genomes_sketch):
|
|
218
|
+
print('No such file or directory: ', genomes_sketch)
|
|
219
|
+
return False
|
|
220
|
+
if flag == 1:
|
|
221
|
+
print('Subtracting...')
|
|
222
|
+
start = time.time()
|
|
223
|
+
kssd.sketch_operate(ref_sketch, output, genomes_sketch)
|
|
224
|
+
end = time.time()
|
|
225
|
+
print('Subtract spend time:%.2fs' % (end - start))
|
|
226
|
+
print('Subtract finished!')
|
|
227
|
+
return True
|
|
228
|
+
else:
|
|
229
|
+
timeStamp = int(time.mktime(time.localtime(time.time())))
|
|
230
|
+
print('Subtracting...')
|
|
231
|
+
start = time.time()
|
|
232
|
+
temp_txt = 'ref.txt'
|
|
233
|
+
kssd.print_gnames(ref_sketch, temp_txt)
|
|
234
|
+
nums = 0
|
|
235
|
+
with open(temp_txt, 'r') as file:
|
|
236
|
+
for line in file:
|
|
237
|
+
nums += 1
|
|
238
|
+
if nums == 1:
|
|
239
|
+
temp_union_sketch = ref_sketch
|
|
240
|
+
else:
|
|
241
|
+
temp_union_sketch = 'ref_union_sketch_' + str(timeStamp)
|
|
242
|
+
r = union(ref_sketch=ref_sketch, output=temp_union_sketch)
|
|
243
|
+
if not r:
|
|
244
|
+
print('Union error!!!')
|
|
245
|
+
return False
|
|
246
|
+
kssd.sketch_operate(temp_union_sketch, output, genomes_sketch)
|
|
247
|
+
end = time.time()
|
|
248
|
+
current_directory = os.getcwd()
|
|
249
|
+
temp_dir = os.path.join(current_directory, temp_union_sketch)
|
|
250
|
+
if platform.system() == 'Linux':
|
|
251
|
+
if os.path.exists(temp_dir):
|
|
252
|
+
shutil.rmtree(temp_dir)
|
|
253
|
+
if os.path.exists(temp_txt):
|
|
254
|
+
os.remove(temp_txt)
|
|
255
|
+
else:
|
|
256
|
+
pass
|
|
257
|
+
print('Subtract spend time:%.2fs' % (end - start))
|
|
258
|
+
print('Subtract finished!')
|
|
259
|
+
return True
|
|
260
|
+
else:
|
|
261
|
+
print('Args error!!!')
|
|
262
|
+
return False
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def quick(shuf_file=None, genomes_file=None, output=None, reference=None, taxonomy=None, method='nj', mode='r', N=0):
|
|
266
|
+
if reference is None and taxonomy is None:
|
|
267
|
+
if shuf_file is not None and genomes_file is not None and output is not None:
|
|
268
|
+
timeStamp = int(time.mktime(time.localtime(time.time())))
|
|
269
|
+
temp_sketch = genomes_file + '_sketch_' + str(timeStamp)
|
|
270
|
+
temp_phy = 'temp.phy'
|
|
271
|
+
print('Step1...')
|
|
272
|
+
s1 = sketch(shuf_file=shuf_file, genomes_file=genomes_file, output=temp_sketch, set_opt=False)
|
|
273
|
+
if not s1:
|
|
274
|
+
return False
|
|
275
|
+
print('Step2...')
|
|
276
|
+
if method == 'nj':
|
|
277
|
+
s2 = dist(ref_sketch=temp_sketch, qry_sketch=temp_sketch, output=temp_phy, flag=0)
|
|
278
|
+
else:
|
|
279
|
+
s2 = dist(ref_sketch=temp_sketch, qry_sketch=temp_sketch, output=temp_phy, flag=1)
|
|
280
|
+
if not s2:
|
|
281
|
+
return False
|
|
282
|
+
print('Step3...')
|
|
283
|
+
s3 = build(phylip=temp_phy, output=output, method=method)
|
|
284
|
+
if not s3:
|
|
285
|
+
return False
|
|
286
|
+
if platform.system() == 'Linux':
|
|
287
|
+
current_directory = os.getcwd()
|
|
288
|
+
temp_dir1 = os.path.join(current_directory, temp_sketch)
|
|
289
|
+
temp_dir2 = os.path.join(current_directory, 'distout')
|
|
290
|
+
if os.path.exists(temp_dir1):
|
|
291
|
+
shutil.rmtree(temp_dir1)
|
|
292
|
+
if os.path.exists(temp_dir2):
|
|
293
|
+
shutil.rmtree(temp_dir2)
|
|
294
|
+
if os.path.exists(temp_phy):
|
|
295
|
+
os.remove(temp_phy)
|
|
296
|
+
return True
|
|
297
|
+
else:
|
|
298
|
+
print('Step4...')
|
|
299
|
+
print('Tree visualization finished!')
|
|
300
|
+
visualize(newick=output, taxonomy=taxonomy, mode=mode)
|
|
301
|
+
else:
|
|
302
|
+
print('Args error!!!')
|
|
303
|
+
return False
|
|
304
|
+
|
|
305
|
+
elif reference == 'gtdbr214_sketch' and taxonomy is None:
|
|
306
|
+
if shuf_file is not None and genomes_file is not None and output is not None:
|
|
307
|
+
if not toolutils.is_positive_integer(N):
|
|
308
|
+
print("N must >0 !!!")
|
|
309
|
+
return False
|
|
310
|
+
if shuf_file != 'L3K9.shuf':
|
|
311
|
+
print("shuffle file must be set to 'L3K9.shuf'")
|
|
312
|
+
return False
|
|
313
|
+
timeStamp = int(time.mktime(time.localtime(time.time())))
|
|
314
|
+
qry_sketch = genomes_file + '_sketch_' + str(timeStamp)
|
|
315
|
+
s1 = sketch(shuf_file=shuf_file, genomes_file=genomes_file, output=qry_sketch, set_opt=True)
|
|
316
|
+
if not s1:
|
|
317
|
+
return False
|
|
318
|
+
s2 = retrieve(ref_sketch=reference, qry_sketch=qry_sketch, output=output, N=N, method=method)
|
|
319
|
+
if not s2:
|
|
320
|
+
return False
|
|
321
|
+
if platform.system() == 'Linux':
|
|
322
|
+
return True
|
|
323
|
+
else:
|
|
324
|
+
print('Tree visualization finished!')
|
|
325
|
+
visualize(newick=os.path.join(output, 'output.newick'),
|
|
326
|
+
taxonomy=os.path.join(output, 'output_accession_taxonomy.txt'), mode=None)
|
|
327
|
+
else:
|
|
328
|
+
print('Args error!!!')
|
|
329
|
+
return False
|
|
330
|
+
else:
|
|
331
|
+
if shuf_file is not None and genomes_file is not None and output is not None and method in ['nj', 'dnj']:
|
|
332
|
+
if shuf_file is not None and genomes_file is not None and output is not None and method in ['nj', 'dnj']:
|
|
333
|
+
timeStamp = int(time.mktime(time.localtime(time.time())))
|
|
334
|
+
temp_reference_sketch = 'ref_sketch_' + str(timeStamp)
|
|
335
|
+
temp_genomes_sketch = genomes_file + '_sketch_' + str(timeStamp)
|
|
336
|
+
if not toolutils.allowed_file(reference):
|
|
337
|
+
cur_path = os.getcwd()
|
|
338
|
+
ref_path = os.path.join(cur_path, reference)
|
|
339
|
+
num = toolutils.get_file_num(ref_path)
|
|
340
|
+
if num == 1:
|
|
341
|
+
temp_union_sketch = temp_reference_sketch
|
|
342
|
+
else:
|
|
343
|
+
temp_union_sketch = 'ref_union_sketch_' + str(timeStamp)
|
|
344
|
+
else:
|
|
345
|
+
temp_union_sketch = temp_reference_sketch
|
|
346
|
+
temp_subtract_sketch = genomes_file + '_subtract_sketch_' + str(timeStamp)
|
|
347
|
+
temp_phy = 'temp.phy'
|
|
348
|
+
print('Step1...')
|
|
349
|
+
s1 = sketch(shuf_file=shuf_file, genomes_file=reference, output=temp_reference_sketch, set_opt=True)
|
|
350
|
+
if not s1:
|
|
351
|
+
return False
|
|
352
|
+
s2 = sketch(shuf_file=shuf_file, genomes_file=genomes_file, output=temp_genomes_sketch, set_opt=True)
|
|
353
|
+
if not s2:
|
|
354
|
+
return False
|
|
355
|
+
print('Step2...')
|
|
356
|
+
s3 = union(ref_sketch=temp_reference_sketch, output=temp_union_sketch)
|
|
357
|
+
if not s3:
|
|
358
|
+
return False
|
|
359
|
+
s4 = subtract(ref_sketch=temp_union_sketch, genomes_sketch=temp_genomes_sketch,
|
|
360
|
+
output=temp_subtract_sketch, flag=1)
|
|
361
|
+
if not s4:
|
|
362
|
+
return False
|
|
363
|
+
print('Step3...')
|
|
364
|
+
if method == 'nj':
|
|
365
|
+
s5 = dist(ref_sketch=temp_subtract_sketch, qry_sketch=temp_subtract_sketch, output=temp_phy,
|
|
366
|
+
flag=0)
|
|
367
|
+
else:
|
|
368
|
+
s5 = dist(ref_sketch=temp_subtract_sketch, qry_sketch=temp_subtract_sketch, output=temp_phy,
|
|
369
|
+
flag=1)
|
|
370
|
+
if not s5:
|
|
371
|
+
return False
|
|
372
|
+
print('Step4...')
|
|
373
|
+
s6 = build(phylip=temp_phy, output=output, method=method)
|
|
374
|
+
if not s6:
|
|
375
|
+
return False
|
|
376
|
+
if platform.system() == 'Linux':
|
|
377
|
+
current_directory = os.getcwd()
|
|
378
|
+
temp_dir1 = os.path.join(current_directory, temp_reference_sketch)
|
|
379
|
+
temp_dir2 = os.path.join(current_directory, temp_genomes_sketch)
|
|
380
|
+
temp_dir3 = os.path.join(current_directory, temp_union_sketch)
|
|
381
|
+
temp_dir4 = os.path.join(current_directory, temp_subtract_sketch)
|
|
382
|
+
temp_dir5 = os.path.join(current_directory, 'distout')
|
|
383
|
+
if os.path.exists(temp_dir1):
|
|
384
|
+
shutil.rmtree(temp_dir1)
|
|
385
|
+
if os.path.exists(temp_dir2):
|
|
386
|
+
shutil.rmtree(temp_dir2)
|
|
387
|
+
if os.path.exists(temp_dir3):
|
|
388
|
+
shutil.rmtree(temp_dir3)
|
|
389
|
+
if os.path.exists(temp_dir4):
|
|
390
|
+
shutil.rmtree(temp_dir4)
|
|
391
|
+
if os.path.exists(temp_dir5):
|
|
392
|
+
shutil.rmtree(temp_dir5)
|
|
393
|
+
if os.path.exists(temp_phy):
|
|
394
|
+
os.remove(temp_phy)
|
|
395
|
+
return True
|
|
396
|
+
else:
|
|
397
|
+
print('Step5...')
|
|
398
|
+
print('Tree visualization finished!')
|
|
399
|
+
visualize(newick=output, taxonomy=taxonomy, mode=mode)
|
|
400
|
+
else:
|
|
401
|
+
print('Args error!!!')
|
|
402
|
+
return False
|
|
@@ -93,7 +93,7 @@ require_pakages = [
|
|
|
93
93
|
|
|
94
94
|
setup(
|
|
95
95
|
name='kssdtree',
|
|
96
|
-
version='1.1.
|
|
96
|
+
version='1.1.2',
|
|
97
97
|
author='Hang Yang',
|
|
98
98
|
author_email='yhlink1207@gmail.com',
|
|
99
99
|
description="Kssdtree is a versatile Python package for phylogenetic analysis. It also provides one-stop tree construction and visualization. It can handle DNA sequences of both fasta or fastq format, whether gzipped or not. ",
|
|
@@ -47,13 +47,20 @@ def get_file_num(cwd):
|
|
|
47
47
|
return len(res)
|
|
48
48
|
|
|
49
49
|
|
|
50
|
-
def
|
|
51
|
-
|
|
50
|
+
def decompress_zip(zip_path, dir_path):
|
|
51
|
+
f = zipfile.ZipFile(zip_path, 'r')
|
|
52
|
+
for file in f.namelist():
|
|
53
|
+
f.extract(file, dir_path)
|
|
54
|
+
f.close()
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def upload_request(qry_sketch, method, N):
|
|
58
|
+
zip_file = qry_sketch + '.zip'
|
|
52
59
|
zip = zipfile.ZipFile(zip_file, 'w', zipfile.ZIP_DEFLATED)
|
|
53
|
-
for item in os.listdir(
|
|
54
|
-
zip.write(
|
|
60
|
+
for item in os.listdir(qry_sketch):
|
|
61
|
+
zip.write(qry_sketch + os.sep + item)
|
|
55
62
|
zip.close()
|
|
56
|
-
url = "http://
|
|
63
|
+
url = "http://www.metakssdcoabundance.link/kssdtree/upload"
|
|
57
64
|
header = {
|
|
58
65
|
"kssdtree": 'upload'
|
|
59
66
|
}
|
|
@@ -219,7 +226,6 @@ def view_tree(newick, taxonomy, mode):
|
|
|
219
226
|
# t.render("bubble_map.png", w=600, dpi=300, tree_style=ts)
|
|
220
227
|
t.show(tree_style=ts)
|
|
221
228
|
|
|
222
|
-
|
|
223
229
|
def deal_gtdb_txt(temp_dist_output):
|
|
224
230
|
data = pd.read_csv(temp_dist_output, delimiter='\t', header=None, skiprows=1)
|
|
225
231
|
column_2 = data.iloc[:, 1]
|
kssdtree-1.1.1/kssdtree.py
DELETED
|
@@ -1,390 +0,0 @@
|
|
|
1
|
-
import kssd
|
|
2
|
-
import nj
|
|
3
|
-
import dnj
|
|
4
|
-
import toolutils
|
|
5
|
-
import os
|
|
6
|
-
import platform
|
|
7
|
-
import shutil
|
|
8
|
-
import time
|
|
9
|
-
import requests
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def shuffle(k=8, s=5, l=2, o='default'):
|
|
13
|
-
print('shuffling...')
|
|
14
|
-
kssd.write_dim_shuffle_file(k, s, l, o)
|
|
15
|
-
print('shuffle finished!')
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def sketch(shuffle=None, genomes=None, output=None, set_opt=None):
|
|
19
|
-
"""
|
|
20
|
-
sketch: sketching genomes into sketch and generating sketch files.
|
|
21
|
-
:param shuffle: Kssdtree provide 'L3K9.shuf' and 'L3K10.shuf' files as input for genome sketching or decomposition. The default is 'L3K10.shuf'.
|
|
22
|
-
:param genomes: The folder path for genome files. It supports the input of genome files in fasta/fastq formats.
|
|
23
|
-
:param output: The output folder path for sketch result files of genome files.
|
|
24
|
-
:param set_opt: Whether to do the set operation, default is False, if you want to do the set operation, you can set set_opt=True.
|
|
25
|
-
:return: null
|
|
26
|
-
"""
|
|
27
|
-
if set_opt is None:
|
|
28
|
-
set_opt = False
|
|
29
|
-
if shuffle is not None and genomes is not None and output is not None:
|
|
30
|
-
current_directory = os.getcwd()
|
|
31
|
-
shuf_file_path = os.path.join(current_directory, shuffle)
|
|
32
|
-
if not os.path.exists(shuf_file_path):
|
|
33
|
-
if shuffle == 'L3K9.shuf':
|
|
34
|
-
print('downloading...', shuffle)
|
|
35
|
-
import http.client
|
|
36
|
-
http.client.HTTPConnection._http_vsn = 10
|
|
37
|
-
http.client.HTTPConnection._http_vsn_str = 'HTTP/1.0'
|
|
38
|
-
url = 'http://18.205.53.149:8000/kssdtree/shuffle/' + shuffle
|
|
39
|
-
headers = {
|
|
40
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'
|
|
41
|
-
}
|
|
42
|
-
r = requests.get(url, headers=headers, stream=True)
|
|
43
|
-
with open(os.getcwd() + "\\" + shuffle, mode="wb") as f:
|
|
44
|
-
f.write(r.content)
|
|
45
|
-
print('download finished!', shuffle)
|
|
46
|
-
else:
|
|
47
|
-
file_name = shuffle.split('.')[0]
|
|
48
|
-
k = int(file_name[3:])
|
|
49
|
-
if k == 10:
|
|
50
|
-
s = 6
|
|
51
|
-
else:
|
|
52
|
-
s = 5
|
|
53
|
-
l = int(file_name[1])
|
|
54
|
-
shuffle(k=k, s=s, l=l, o=file_name)
|
|
55
|
-
print('sketching...')
|
|
56
|
-
start = time.time()
|
|
57
|
-
if set_opt:
|
|
58
|
-
kssd.dist_dispatch(shuffle, genomes, output, 1, 0, 0)
|
|
59
|
-
else:
|
|
60
|
-
kssd.dist_dispatch(shuffle, genomes, output, 0, 0, 0)
|
|
61
|
-
end = time.time()
|
|
62
|
-
print('sketch spend time:%.2fs' % (end - start))
|
|
63
|
-
print('sketch finished!')
|
|
64
|
-
else:
|
|
65
|
-
print('args error!!!')
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def dist(ref_sketch=None, qry_sketch=None, output=None, flag=None):
|
|
69
|
-
"""
|
|
70
|
-
computing pairwise distances between reference and query genomes, and then generating a distance matrix in phylip format.
|
|
71
|
-
:param ref_sketch: The folder path for sketch result files of reference genome files.
|
|
72
|
-
:param qry_sketch: The folder path for sketch result files of query genome files.
|
|
73
|
-
:param output: The output filename of distance matrix in phylip format.
|
|
74
|
-
:param flag: 0 or 1. 0,1 is used to generate the distance matrix required by NJ (0 for diagonal elements) and DNJ (no diagonal elements) respectively.
|
|
75
|
-
:return: null
|
|
76
|
-
"""
|
|
77
|
-
if flag is None:
|
|
78
|
-
flag = 0
|
|
79
|
-
if ref_sketch is not None and qry_sketch is not None and output is not None:
|
|
80
|
-
print('disting...')
|
|
81
|
-
start = time.time()
|
|
82
|
-
kssd.dist_dispatch(ref_sketch, output, qry_sketch, 2, 0, flag)
|
|
83
|
-
end = time.time()
|
|
84
|
-
print('dist spend time:%.2fs' % (end - start))
|
|
85
|
-
print('dist finished!')
|
|
86
|
-
else:
|
|
87
|
-
print('args error!!!')
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def retrieve(ref_sketch=None, qry_sketch=None, output=None, N=None):
|
|
91
|
-
"""
|
|
92
|
-
retrieving N closest sketches from reference or GTDB (R214) sketches and combining query sketch files.
|
|
93
|
-
:param ref_sketch: The folder path for sketch result files of reference genome files.
|
|
94
|
-
:param qry_sketch: The folder path for sketch result files of query genome files.
|
|
95
|
-
:param output: The output folder path for retrieve sketch result files of genome files.
|
|
96
|
-
:param N: Max number of nearest reference genomes.
|
|
97
|
-
:return: null
|
|
98
|
-
"""
|
|
99
|
-
if ref_sketch is not None and qry_sketch is not None and output is not None:
|
|
100
|
-
if ref_sketch == 'gtdbr214_sketch':
|
|
101
|
-
print('retrieving...')
|
|
102
|
-
start = time.time()
|
|
103
|
-
temp_related_sketch = 'related_sketch'
|
|
104
|
-
reference = 'static/gtdbr214_sketch'
|
|
105
|
-
kssd.dist_dispatch(reference, output, qry_sketch, 2, N, 3)
|
|
106
|
-
kssd.print_gnames(reference, 'gtdb.txt')
|
|
107
|
-
file_path1 = os.path.join(os.getcwd(), 'distout', 'distance.out')
|
|
108
|
-
toolutils.deal_gtdb_txt(file_path1)
|
|
109
|
-
kssd.grouping_genomes('new_gtdb.txt', reference, temp_related_sketch)
|
|
110
|
-
kssd.dist_dispatch(output, qry_sketch, temp_related_sketch, 3, 0, 0)
|
|
111
|
-
end = time.time()
|
|
112
|
-
print('retrieve spend time:%.2fs' % (end - start))
|
|
113
|
-
print('retrieve finished!')
|
|
114
|
-
else:
|
|
115
|
-
print('retrieving...')
|
|
116
|
-
start = time.time()
|
|
117
|
-
timeStamp = int(time.mktime(time.localtime(time.time())))
|
|
118
|
-
temp_related_sketch = 'related_sketch_' + str(timeStamp)
|
|
119
|
-
kssd.dist_dispatch(ref_sketch, output, qry_sketch, 2, N, 3)
|
|
120
|
-
kssd.print_gnames(ref_sketch, 'gtdb.txt')
|
|
121
|
-
file_path1 = os.path.join(os.getcwd(), 'distout', 'distance.out')
|
|
122
|
-
toolutils.deal_gtdb_txt(file_path1)
|
|
123
|
-
kssd.grouping_genomes('new_gtdb.txt', ref_sketch, temp_related_sketch)
|
|
124
|
-
kssd.dist_dispatch(output, qry_sketch, temp_related_sketch, 3, 0, 0)
|
|
125
|
-
end = time.time()
|
|
126
|
-
file_path1 = 'new.txt'
|
|
127
|
-
file_path2 = 'gtdb.txt'
|
|
128
|
-
file_path3 = 'new_gtdb.txt'
|
|
129
|
-
file_path4 = 'related_genomes_values.txt'
|
|
130
|
-
file_path5 = 'modified_file.txt'
|
|
131
|
-
file_path6 = 'new_accession_taxonomy.txt'
|
|
132
|
-
if os.path.exists(file_path1):
|
|
133
|
-
os.remove(file_path1)
|
|
134
|
-
if os.path.exists(file_path2):
|
|
135
|
-
os.remove(file_path2)
|
|
136
|
-
if os.path.exists(file_path3):
|
|
137
|
-
os.remove(file_path3)
|
|
138
|
-
if os.path.exists(file_path4):
|
|
139
|
-
os.remove(file_path4)
|
|
140
|
-
if os.path.exists(file_path5):
|
|
141
|
-
os.remove(file_path5)
|
|
142
|
-
if os.path.exists(file_path6):
|
|
143
|
-
os.remove(file_path6)
|
|
144
|
-
print('retrieve spend time:%.2fs' % (end - start))
|
|
145
|
-
print('retrieve finished!')
|
|
146
|
-
else:
|
|
147
|
-
print('args error!!!')
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
def build(phylip=None, output=None, method=None):
|
|
151
|
-
"""
|
|
152
|
-
constructing tree with NJ or DNJ and generating tree in newick format.
|
|
153
|
-
:param phylip: The distance matrix in phylip format.
|
|
154
|
-
:param output: 'nj'(NJ) or 'dnj'(DNJ) method for constructing tree. The default is 'nj'.
|
|
155
|
-
:param method: The output filename of tree in newick format.
|
|
156
|
-
:return: null
|
|
157
|
-
"""
|
|
158
|
-
if method is None:
|
|
159
|
-
method = 'nj'
|
|
160
|
-
if method not in ['nj', 'dnj']:
|
|
161
|
-
print('method only support nj and dnj!!!')
|
|
162
|
-
return
|
|
163
|
-
if phylip is not None:
|
|
164
|
-
print('building...')
|
|
165
|
-
start = time.time()
|
|
166
|
-
if output is None:
|
|
167
|
-
output = 'kssdtree.newick'
|
|
168
|
-
if method == 'nj':
|
|
169
|
-
state = nj.build(phylip, output)
|
|
170
|
-
else:
|
|
171
|
-
if platform.system() == 'Linux':
|
|
172
|
-
state = dnj.build(phylip, output, method)
|
|
173
|
-
else:
|
|
174
|
-
state = nj.build(phylip, output)
|
|
175
|
-
if state == 1:
|
|
176
|
-
nwk_path = os.path.join(os.getcwd(), output)
|
|
177
|
-
with open(nwk_path, 'r') as f:
|
|
178
|
-
lines = f.readlines()
|
|
179
|
-
newick = ''.join(lines)
|
|
180
|
-
newick = newick.replace('\n', '')
|
|
181
|
-
with open(nwk_path, 'w') as f:
|
|
182
|
-
f.write(newick)
|
|
183
|
-
end = time.time()
|
|
184
|
-
print('build spend time:%.2fs' % (end - start))
|
|
185
|
-
print('build finished!')
|
|
186
|
-
else:
|
|
187
|
-
print('args error!!!')
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def visualize(newick=None, taxonomy=None, mode=None):
|
|
191
|
-
"""
|
|
192
|
-
visualizing tree with ETE3 toolkit.
|
|
193
|
-
:param newick: The tree in newick format.
|
|
194
|
-
:param taxonomy: The taxonomy information in txt format, which records the name (accession) of genome and its taxonomy. The default is None.
|
|
195
|
-
:param mode: 'r'(rectangle) or 'c'(circle) mode for visualizing tree. The default is 'r'.
|
|
196
|
-
:return: null
|
|
197
|
-
"""
|
|
198
|
-
if mode is None:
|
|
199
|
-
mode = 'r'
|
|
200
|
-
if newick is not None:
|
|
201
|
-
toolutils.view_tree(newick, taxonomy, mode=mode)
|
|
202
|
-
else:
|
|
203
|
-
print('args error!!!')
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
def union(ref_sketch=None, output=None):
|
|
207
|
-
"""
|
|
208
|
-
:param sketch:
|
|
209
|
-
:param output:
|
|
210
|
-
:return:
|
|
211
|
-
"""
|
|
212
|
-
if ref_sketch is not None and output is not None:
|
|
213
|
-
kssd.sketch_union(sketch, output)
|
|
214
|
-
else:
|
|
215
|
-
print('args error!!!')
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
def subtract(ref_sketch=None, genomes_sketch=None, output=None, flag=0):
|
|
219
|
-
"""
|
|
220
|
-
subtracting the ref_sketch from genomes_sketch and creating the remainder sketch files.
|
|
221
|
-
:param ref_sketch: The folder path for reference sketch result files.
|
|
222
|
-
:param genomes_sketch: The folder path for sketch result files of genome files.
|
|
223
|
-
:param output: The output folder path for remainder sketch result files.
|
|
224
|
-
:param flag: 0.
|
|
225
|
-
:return: null
|
|
226
|
-
"""
|
|
227
|
-
if ref_sketch is not None and genomes_sketch is not None and output is not None:
|
|
228
|
-
if flag == 1:
|
|
229
|
-
print('subtracting...')
|
|
230
|
-
start = time.time()
|
|
231
|
-
kssd.sketch_operate(ref_sketch, output, genomes_sketch)
|
|
232
|
-
end = time.time()
|
|
233
|
-
print('subtract spend time:%.2fs' % (end - start))
|
|
234
|
-
print('subtract finished!')
|
|
235
|
-
else:
|
|
236
|
-
timeStamp = int(time.mktime(time.localtime(time.time())))
|
|
237
|
-
temp_union_sketch = 'ref_union_sketch_' + str(timeStamp)
|
|
238
|
-
print('subtracting...')
|
|
239
|
-
start = time.time()
|
|
240
|
-
union(ref_sketch=ref_sketch, output=temp_union_sketch)
|
|
241
|
-
kssd.sketch_operate(temp_union_sketch, output, genomes_sketch)
|
|
242
|
-
end = time.time()
|
|
243
|
-
current_directory = os.getcwd()
|
|
244
|
-
temp_dir = os.path.join(current_directory, temp_union_sketch)
|
|
245
|
-
if platform.system() == 'Linux':
|
|
246
|
-
if os.path.exists(temp_dir):
|
|
247
|
-
shutil.rmtree(temp_dir)
|
|
248
|
-
else:
|
|
249
|
-
pass
|
|
250
|
-
print('subtract spend time:%.2fs' % (end - start))
|
|
251
|
-
print('subtract finished!')
|
|
252
|
-
else:
|
|
253
|
-
print('args error!!!')
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def quick(shuffle=None, genomes=None, output=None, reference=None, taxonomy=None, method='nj', mode='r', N=0):
|
|
257
|
-
"""
|
|
258
|
-
simplifying pipeline and eliminating the necessity of organizing many intermediate files.
|
|
259
|
-
:param shuffle: Kssdtree provide frequently-used 'L3K9.shuf' and 'L3K10.shuf' files as input for genome sketching or decomposition. The default is 'L3K10.shuf'. If you want to perform phylogenetic placement, you must use 'L3K9.shuf' file.
|
|
260
|
-
:param genomes: The folder path for genome files. It supports the input of genome files in fasta/fastq formats.
|
|
261
|
-
:param output: The output filename of tree in newick format.
|
|
262
|
-
:param reference: The default is None, will perform the routine workflow. If you want to perform the reference subtraction workflow, you can set reference to the reference genome file or folder path. If you want to perform the phylogenetic placement, you must set reference to ‘gtdbr214’.
|
|
263
|
-
:param taxonomy: The filename of taxonomy information in txt format, which records the name (accession) of genome and its taxonomy. The default is None.
|
|
264
|
-
:param method: 'nj'(NJ) or 'dnj'(DNJ) method for constructing tree. The default is 'nj'.
|
|
265
|
-
:param mode: 'r'(rectangle) or 'c'(circle) mode for visualizing tree. The default is 'r'.
|
|
266
|
-
:param N: Max number of nearest reference genomes. The default is 0 for computing pairwise distances between genomes on routine and reference subtraction workflows. If you want to perform the phylogenetic placement, you can set N > 0.
|
|
267
|
-
:return: null
|
|
268
|
-
"""
|
|
269
|
-
if reference is None and taxonomy is None:
|
|
270
|
-
if shuffle is not None and genomes is not None and output is not None:
|
|
271
|
-
for filename in os.listdir(genomes):
|
|
272
|
-
if not toolutils.allowed_file(filename):
|
|
273
|
-
print('Genome format error for file:', filename)
|
|
274
|
-
return 0
|
|
275
|
-
timeStamp = int(time.mktime(time.localtime(time.time())))
|
|
276
|
-
temp_sketch = genomes + '_sketch_' + str(timeStamp)
|
|
277
|
-
temp_phy = 'temp.phy'
|
|
278
|
-
print('step1...')
|
|
279
|
-
sketch(shuffle=shuffle, genomes=genomes, output=temp_sketch, set_opt=False)
|
|
280
|
-
print('step2...')
|
|
281
|
-
if method == 'nj':
|
|
282
|
-
dist(ref_sketch=temp_sketch, qry_sketch=temp_sketch, output=temp_phy, flag=0)
|
|
283
|
-
else:
|
|
284
|
-
dist(ref_sketch=temp_sketch, qry_sketch=temp_sketch, output=temp_phy, flag=1)
|
|
285
|
-
print('step3...')
|
|
286
|
-
build(phylip=temp_phy, output=output, method=method)
|
|
287
|
-
if platform.system() == 'Linux':
|
|
288
|
-
pass
|
|
289
|
-
else:
|
|
290
|
-
print('step4...')
|
|
291
|
-
print('tree visualization finished!')
|
|
292
|
-
visualize(newick=output, taxonomy=taxonomy, mode=mode)
|
|
293
|
-
current_directory = os.getcwd()
|
|
294
|
-
temp_dir1 = os.path.join(current_directory, temp_sketch)
|
|
295
|
-
temp_dir2 = os.path.join(current_directory, 'distout')
|
|
296
|
-
if platform.system() == 'Linux':
|
|
297
|
-
if os.path.exists(temp_dir1):
|
|
298
|
-
shutil.rmtree(temp_dir1)
|
|
299
|
-
if os.path.exists(temp_dir2):
|
|
300
|
-
shutil.rmtree(temp_dir2)
|
|
301
|
-
else:
|
|
302
|
-
pass
|
|
303
|
-
else:
|
|
304
|
-
print('args error!!!')
|
|
305
|
-
elif reference == 'gtdbr214' and taxonomy is None:
|
|
306
|
-
if shuffle is not None and genomes is not None and output is not None and toolutils.is_positive_integer(N):
|
|
307
|
-
if shuffle != 'L3K9.shuf':
|
|
308
|
-
print("shuffle must be set to 'L3K9.shuf'")
|
|
309
|
-
return 0
|
|
310
|
-
for filename in os.listdir(genomes):
|
|
311
|
-
if not toolutils.allowed_file(filename):
|
|
312
|
-
print('Genome format error for file:', filename)
|
|
313
|
-
return 0
|
|
314
|
-
timeStamp = int(time.mktime(time.localtime(time.time())))
|
|
315
|
-
temp_sketch = genomes + '_sketch_' + str(timeStamp)
|
|
316
|
-
sketch(shuffle=shuffle, genomes=genomes, output=temp_sketch, set_opt=True)
|
|
317
|
-
newick, accession_taxonomy = toolutils.upload_request(dir_name=temp_sketch, method=method, N=N)
|
|
318
|
-
with open(output, 'w') as f:
|
|
319
|
-
f.write(newick)
|
|
320
|
-
with open('accession_taxonomy.txt', 'w') as f:
|
|
321
|
-
for key, value in accession_taxonomy.items():
|
|
322
|
-
f.write("%s %s\n" % (key, value))
|
|
323
|
-
if platform.system() == 'Linux':
|
|
324
|
-
pass
|
|
325
|
-
else:
|
|
326
|
-
print('tree visualization finished!')
|
|
327
|
-
visualize(newick=output, taxonomy='accession_taxonomy.txt', mode=None)
|
|
328
|
-
else:
|
|
329
|
-
print('args error or N<=0!!!')
|
|
330
|
-
else:
|
|
331
|
-
if shuffle is not None and genomes is not None and output is not None and method in ['nj', 'dnj']:
|
|
332
|
-
if shuffle is not None and genomes is not None and output is not None and method in ['nj', 'dnj']:
|
|
333
|
-
timeStamp = int(time.mktime(time.localtime(time.time())))
|
|
334
|
-
temp_reference_sketch = 'ref_sketch_' + str(timeStamp)
|
|
335
|
-
temp_genomes_sketch = genomes + '_sketch_' + str(timeStamp)
|
|
336
|
-
if not toolutils.allowed_file(reference):
|
|
337
|
-
cur_path = os.getcwd()
|
|
338
|
-
ref_path = os.path.join(cur_path, reference)
|
|
339
|
-
num = toolutils.get_file_num(ref_path)
|
|
340
|
-
if num == 1:
|
|
341
|
-
temp_union_sketch = temp_reference_sketch
|
|
342
|
-
else:
|
|
343
|
-
temp_union_sketch = 'ref_union_sketch_' + str(timeStamp)
|
|
344
|
-
else:
|
|
345
|
-
temp_union_sketch = temp_reference_sketch
|
|
346
|
-
temp_subtract_sketch = genomes + '_subtract_sketch_' + str(timeStamp)
|
|
347
|
-
temp_phy = 'temp.phy'
|
|
348
|
-
print('step1...')
|
|
349
|
-
sketch(shuffle=shuffle, genomes=reference, output=temp_reference_sketch, set_opt=True)
|
|
350
|
-
sketch(shuffle=shuffle, genomes=genomes, output=temp_genomes_sketch, set_opt=True)
|
|
351
|
-
print('step2...')
|
|
352
|
-
union(ref_sketch=temp_reference_sketch, output=temp_union_sketch)
|
|
353
|
-
subtract(ref_sketch=temp_union_sketch, genomes_sketch=temp_genomes_sketch,
|
|
354
|
-
output=temp_subtract_sketch, flag=1)
|
|
355
|
-
print('step3...')
|
|
356
|
-
if method == 'nj':
|
|
357
|
-
dist(ref_sketch=temp_subtract_sketch, qry_sketch=temp_subtract_sketch, output=temp_phy,
|
|
358
|
-
flag=0)
|
|
359
|
-
else:
|
|
360
|
-
dist(ref_sketch=temp_subtract_sketch, qry_sketch=temp_subtract_sketch, output=temp_phy,
|
|
361
|
-
flag=1)
|
|
362
|
-
print('step4...')
|
|
363
|
-
build(phylip=temp_phy, output=output, method=method)
|
|
364
|
-
if platform.system() == 'Linux':
|
|
365
|
-
pass
|
|
366
|
-
else:
|
|
367
|
-
print('step5...')
|
|
368
|
-
print('tree visualization finished!')
|
|
369
|
-
visualize(newick=output, taxonomy=taxonomy, mode=mode)
|
|
370
|
-
current_directory = os.getcwd()
|
|
371
|
-
temp_dir1 = os.path.join(current_directory, temp_reference_sketch)
|
|
372
|
-
temp_dir2 = os.path.join(current_directory, temp_genomes_sketch)
|
|
373
|
-
temp_dir3 = os.path.join(current_directory, temp_union_sketch)
|
|
374
|
-
temp_dir4 = os.path.join(current_directory, temp_subtract_sketch)
|
|
375
|
-
temp_dir5 = os.path.join(current_directory, 'distout')
|
|
376
|
-
if platform.system() == 'Linux':
|
|
377
|
-
if os.path.exists(temp_dir1):
|
|
378
|
-
shutil.rmtree(temp_dir1)
|
|
379
|
-
if os.path.exists(temp_dir2):
|
|
380
|
-
shutil.rmtree(temp_dir2)
|
|
381
|
-
if os.path.exists(temp_dir3):
|
|
382
|
-
shutil.rmtree(temp_dir3)
|
|
383
|
-
if os.path.exists(temp_dir4):
|
|
384
|
-
shutil.rmtree(temp_dir4)
|
|
385
|
-
if os.path.exists(temp_dir5):
|
|
386
|
-
shutil.rmtree(temp_dir5)
|
|
387
|
-
else:
|
|
388
|
-
pass
|
|
389
|
-
else:
|
|
390
|
-
print('args error!!!')
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|