cvmcore 0.2.2__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cvmcore-0.3.0/.gitignore +2 -0
- cvmcore-0.3.0/.python-version +1 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/PKG-INFO +13 -18
- cvmcore-0.3.0/pyproject.toml +34 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0/src}/cvmcore/__init__.py +1 -1
- {cvmcore-0.2.2 → cvmcore-0.3.0/src}/cvmcore/cvmcore.py +112 -17
- cvmcore-0.3.0/uv.lock +608 -0
- cvmcore-0.2.2/cvmcore.egg-info/PKG-INFO +0 -736
- cvmcore-0.2.2/cvmcore.egg-info/SOURCES.txt +0 -42
- cvmcore-0.2.2/cvmcore.egg-info/dependency_links.txt +0 -1
- cvmcore-0.2.2/cvmcore.egg-info/requires.txt +0 -7
- cvmcore-0.2.2/cvmcore.egg-info/top_level.txt +0 -1
- cvmcore-0.2.2/requirements.txt +0 -8
- cvmcore-0.2.2/setup.cfg +0 -4
- cvmcore-0.2.2/setup.py +0 -76
- {cvmcore-0.2.2 → cvmcore-0.3.0}/README.md +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/demo.ipynb +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/gene_feature.csv +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/circular_dendrogram.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/circular_dendrogram_color_label.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/circular_dendrogram_openangle.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/circular_dendrogram_startangle.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/circular_dendrogram_tippoints.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/dendrogram.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/dendrogram_heatmap_cmap.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/dendrogram_heatmap_minimumvalue.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/dendrogram_with_heatmap.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/gene_arrow_all.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/gene_arrow_bottom.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/gene_arrow_top.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/multiple_heatmap.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/phylogenetic_tree.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/phylotree_heatmap_withgenes.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/phylotree_multiple_heatmap.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/phylotree_with_heatmap-remove_tiplabel.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/phylotree_with_heatmap.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_11_1.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_13_1.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_15_1.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_20_0.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_23_0.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_25_0.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_27_0.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_29_0.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_35_0.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_39_1.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_41_1.png +0 -0
- {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_7_0.png +0 -0
cvmcore-0.3.0/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.10
|
|
@@ -1,22 +1,17 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: cvmcore
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: SZQ lab
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
Author-email: cqp@cau.edu.cn
|
|
8
|
-
License: MIT
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.7
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Major plot function used by SZQ lab from China Agricultural University
|
|
5
|
+
Project-URL: Homepage, https://github.com/hbucqp/cvmcore
|
|
6
|
+
Project-URL: Repository, https://github.com/hbucqp/cvmcore
|
|
7
|
+
Author-email: Qingpo Cui <cqp@cau.edu.cn>
|
|
8
|
+
License: MIT
|
|
9
|
+
Requires-Python: >=3.10
|
|
10
|
+
Requires-Dist: biopython>=1.87
|
|
11
|
+
Requires-Dist: numba>=0.65.1
|
|
12
|
+
Requires-Dist: numpy>=2.2.6
|
|
13
|
+
Requires-Dist: pandas>=2.3.3
|
|
14
|
+
Requires-Dist: scipy>=1.15.3
|
|
20
15
|
Description-Content-Type: text/markdown
|
|
21
16
|
|
|
22
17
|
# cvmcore
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.20"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "cvmcore"
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "Major plot function used by SZQ lab from China Agricultural University"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
authors = [
|
|
12
|
+
{ name = "Qingpo Cui", email = "cqp@cau.edu.cn" },
|
|
13
|
+
]
|
|
14
|
+
license = { text = "MIT" }
|
|
15
|
+
dependencies = [
|
|
16
|
+
"biopython>=1.87",
|
|
17
|
+
"numba>=0.65.1",
|
|
18
|
+
"numpy>=2.2.6",
|
|
19
|
+
"pandas>=2.3.3",
|
|
20
|
+
"scipy>=1.15.3",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.urls]
|
|
24
|
+
Homepage = "https://github.com/hbucqp/cvmcore"
|
|
25
|
+
Repository = "https://github.com/hbucqp/cvmcore"
|
|
26
|
+
|
|
27
|
+
[tool.hatch.build.targets.wheel]
|
|
28
|
+
packages = ["src/cvmcore"]
|
|
29
|
+
|
|
30
|
+
[[tool.uv.index]]
|
|
31
|
+
url = "https://mirrors.aliyun.com/pypi/simple"
|
|
32
|
+
default = true
|
|
33
|
+
|
|
34
|
+
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
__title__ = 'cvmplot'
|
|
4
4
|
__description__ = 'Major plot function used by SZQ lab from China Agricultural University'
|
|
5
5
|
__url__ = 'https://github.com/hbucqp/cvmplot'
|
|
6
|
-
__version__ = "0.
|
|
6
|
+
__version__ = "0.3.0"
|
|
7
7
|
__author__ = 'Qingpo Cui'
|
|
8
8
|
__author_email__ = 'cqp@cau.edu.cn'
|
|
9
9
|
__license__ = 'MIT'
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import sys
|
|
3
2
|
import time
|
|
4
3
|
|
|
5
4
|
# data process
|
|
@@ -7,8 +6,6 @@ import numpy as np
|
|
|
7
6
|
import pandas as pd
|
|
8
7
|
from scipy.cluster.hierarchy import linkage, dendrogram, complete, to_tree
|
|
9
8
|
from scipy.spatial.distance import squareform
|
|
10
|
-
from tabulate import tabulate
|
|
11
|
-
from io import StringIO
|
|
12
9
|
import warnings
|
|
13
10
|
# from Bio.Blast import NCBIWWW
|
|
14
11
|
with warnings.catch_warnings():
|
|
@@ -20,21 +17,44 @@ with warnings.catch_warnings():
|
|
|
20
17
|
from Bio.Blast import NCBIXML
|
|
21
18
|
|
|
22
19
|
|
|
23
|
-
#
|
|
24
|
-
import
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
20
|
+
# import numba for multithreads compute
|
|
21
|
+
from numba import njit, prange, set_num_threads
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@njit(parallel=True)
|
|
27
|
+
def _diff_matrix_numba(array):
|
|
28
|
+
"""
|
|
29
|
+
计算 cgMLST 两两 allelic distance。
|
|
30
|
+
|
|
31
|
+
array:
|
|
32
|
+
n_samples x n_loci
|
|
33
|
+
dtype 建议 int32
|
|
34
|
+
缺失值用 -1 表示
|
|
35
|
+
|
|
36
|
+
规则:
|
|
37
|
+
两个样本同一位点都非缺失,且等位基因不同,则差异 +1
|
|
38
|
+
任一方缺失,则该位点跳过
|
|
39
|
+
"""
|
|
40
|
+
n, m = array.shape
|
|
41
|
+
diff_matrix = np.zeros((n, n), dtype=np.uint16)
|
|
42
|
+
|
|
43
|
+
for i in prange(n):
|
|
44
|
+
row_i = array[i]
|
|
45
|
+
for j in range(i + 1, n):
|
|
46
|
+
row_j = array[j]
|
|
47
|
+
d = 0
|
|
48
|
+
for k in range(m):
|
|
49
|
+
a = row_i[k]
|
|
50
|
+
b = row_j[k]
|
|
51
|
+
if a != -1 and b != -1 and a != b:
|
|
52
|
+
d += 1
|
|
53
|
+
diff_matrix[i, j] = d
|
|
54
|
+
diff_matrix[j, i] = d
|
|
55
|
+
|
|
56
|
+
return diff_matrix
|
|
34
57
|
|
|
35
|
-
plt.rcParams['font.family'] = 'sans-serif'
|
|
36
|
-
plt.rcParams['font.sans-serif'] = ['Arial']
|
|
37
|
-
plt.rcParams['svg.fonttype'] = 'none'
|
|
38
58
|
|
|
39
59
|
|
|
40
60
|
class cfunc():
|
|
@@ -139,3 +159,78 @@ class cfunc():
|
|
|
139
159
|
# Write the modified sequences to the new fasta file
|
|
140
160
|
with open(output_file, "w") as output_handle:
|
|
141
161
|
SeqIO.write(new_records, output_handle, "fasta")
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
def get_diff_matrix(array, threads=None):
|
|
165
|
+
"""
|
|
166
|
+
Optimized Function for Distance Matrix Calculation
|
|
167
|
+
|
|
168
|
+
input:
|
|
169
|
+
array: numpy array or pandas values
|
|
170
|
+
the input arraly could contains np.nan
|
|
171
|
+
threads: number of threads
|
|
172
|
+
|
|
173
|
+
out:
|
|
174
|
+
n x n pairwise diff matrix, dtype=uint16
|
|
175
|
+
|
|
176
|
+
note:
|
|
177
|
+
If the number of input array columns exceeds 65,535, change uint16 to uint32
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
# 1. 转成 numpy array
|
|
181
|
+
array = np.asarray(array)
|
|
182
|
+
|
|
183
|
+
# 2. 把 float/NaN 转成 int32/-1
|
|
184
|
+
# cgMLST allele 本质是整数,没必要用 float
|
|
185
|
+
if np.issubdtype(array.dtype, np.floating):
|
|
186
|
+
array = np.where(np.isnan(array), -1, array).astype(np.int32)
|
|
187
|
+
else:
|
|
188
|
+
array = array.astype(np.int32, copy=False)
|
|
189
|
+
|
|
190
|
+
# 3. 如果有线程参数,设置 numba 线程数
|
|
191
|
+
if threads is not None:
|
|
192
|
+
set_num_threads(int(threads))
|
|
193
|
+
|
|
194
|
+
# 4. 计算距离矩阵
|
|
195
|
+
return _diff_matrix_numba(array)
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@staticmethod
|
|
199
|
+
def df2matrix(df):
|
|
200
|
+
"""
|
|
201
|
+
convert np.nan to -1 in the input dataframe and return as numpy array
|
|
202
|
+
input:
|
|
203
|
+
pandas dataframe
|
|
204
|
+
output:
|
|
205
|
+
numpy array
|
|
206
|
+
"""
|
|
207
|
+
numeric_df = df.apply(pd.to_numeric, errors="coerce")
|
|
208
|
+
matrix = numeric_df.to_numpy(dtype=np.float64, copy=False)
|
|
209
|
+
matrix = np.where(np.isnan(matrix), -1, matrix).astype(np.int32)
|
|
210
|
+
return matrix
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@staticmethod
|
|
214
|
+
def get_diff_df(df, threads=None):
|
|
215
|
+
"""
|
|
216
|
+
优化版 DataFrame 输入函数。
|
|
217
|
+
|
|
218
|
+
input:
|
|
219
|
+
df: input dataframe, such as the cgMLST results
|
|
220
|
+
|
|
221
|
+
output:
|
|
222
|
+
n x n pairwise distance dataframe
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
# 比 df.astype('float').values 更稳
|
|
226
|
+
# 先强制转数值,非法值变成 NaN
|
|
227
|
+
numeric_df = df.apply(pd.to_numeric, errors="coerce")
|
|
228
|
+
matrix = numeric_df.to_numpy(dtype=np.float64, copy=False)
|
|
229
|
+
diff_matrix = cfunc.get_diff_matrix(matrix, threads=threads)
|
|
230
|
+
|
|
231
|
+
return pd.DataFrame(
|
|
232
|
+
diff_matrix,
|
|
233
|
+
index=df.index,
|
|
234
|
+
columns=df.index,
|
|
235
|
+
)
|
|
236
|
+
|