cvmcore 0.2.2__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. cvmcore-0.3.0/.gitignore +2 -0
  2. cvmcore-0.3.0/.python-version +1 -0
  3. {cvmcore-0.2.2 → cvmcore-0.3.0}/PKG-INFO +13 -18
  4. cvmcore-0.3.0/pyproject.toml +34 -0
  5. {cvmcore-0.2.2 → cvmcore-0.3.0/src}/cvmcore/__init__.py +1 -1
  6. {cvmcore-0.2.2 → cvmcore-0.3.0/src}/cvmcore/cvmcore.py +112 -17
  7. cvmcore-0.3.0/uv.lock +608 -0
  8. cvmcore-0.2.2/cvmcore.egg-info/PKG-INFO +0 -736
  9. cvmcore-0.2.2/cvmcore.egg-info/SOURCES.txt +0 -42
  10. cvmcore-0.2.2/cvmcore.egg-info/dependency_links.txt +0 -1
  11. cvmcore-0.2.2/cvmcore.egg-info/requires.txt +0 -7
  12. cvmcore-0.2.2/cvmcore.egg-info/top_level.txt +0 -1
  13. cvmcore-0.2.2/requirements.txt +0 -8
  14. cvmcore-0.2.2/setup.cfg +0 -4
  15. cvmcore-0.2.2/setup.py +0 -76
  16. {cvmcore-0.2.2 → cvmcore-0.3.0}/README.md +0 -0
  17. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/demo.ipynb +0 -0
  18. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/gene_feature.csv +0 -0
  19. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/circular_dendrogram.png +0 -0
  20. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/circular_dendrogram_color_label.png +0 -0
  21. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/circular_dendrogram_openangle.png +0 -0
  22. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/circular_dendrogram_startangle.png +0 -0
  23. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/circular_dendrogram_tippoints.png +0 -0
  24. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/dendrogram.png +0 -0
  25. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/dendrogram_heatmap_cmap.png +0 -0
  26. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/dendrogram_heatmap_minimumvalue.png +0 -0
  27. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/dendrogram_with_heatmap.png +0 -0
  28. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/gene_arrow_all.png +0 -0
  29. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/gene_arrow_bottom.png +0 -0
  30. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/gene_arrow_top.png +0 -0
  31. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/multiple_heatmap.png +0 -0
  32. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/phylogenetic_tree.png +0 -0
  33. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/phylotree_heatmap_withgenes.png +0 -0
  34. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/phylotree_multiple_heatmap.png +0 -0
  35. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/phylotree_with_heatmap-remove_tiplabel.png +0 -0
  36. {cvmcore-0.2.2 → cvmcore-0.3.0}/demodata/screenshots/phylotree_with_heatmap.png +0 -0
  37. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_11_1.png +0 -0
  38. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_13_1.png +0 -0
  39. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_15_1.png +0 -0
  40. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_20_0.png +0 -0
  41. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_23_0.png +0 -0
  42. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_25_0.png +0 -0
  43. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_27_0.png +0 -0
  44. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_29_0.png +0 -0
  45. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_35_0.png +0 -0
  46. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_39_1.png +0 -0
  47. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_41_1.png +0 -0
  48. {cvmcore-0.2.2 → cvmcore-0.3.0}/screenshots/output_7_0.png +0 -0
@@ -0,0 +1,2 @@
1
+ # 忽略mac文件
2
+ .DS_Store
@@ -0,0 +1 @@
1
+ 3.10
@@ -1,22 +1,17 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: cvmcore
3
- Version: 0.2.2
4
- Summary: SZQ lab data analysis core function
5
- Home-page: https://github.com/hbucqp/cvmplot
6
- Author: Qingpo Cui
7
- Author-email: cqp@cau.edu.cn
8
- License: MIT Licence
9
- Keywords: pip,mlst,cgmlst,plot
10
- Platform: any
11
- Classifier: Development Status :: 3 - Alpha
12
- Classifier: Intended Audience :: Developers
13
- Classifier: Topic :: Software Development :: Build Tools
14
- Classifier: License :: OSI Approved :: MIT License
15
- Classifier: Programming Language :: Python :: 3.7
16
- Classifier: Programming Language :: Python :: 3.8
17
- Classifier: Programming Language :: Python :: 3.9
18
- Classifier: Programming Language :: Python :: 3.10
19
- Classifier: Programming Language :: Python :: 3.11
3
+ Version: 0.3.0
4
+ Summary: Major plot function used by SZQ lab from China Agricultural University
5
+ Project-URL: Homepage, https://github.com/hbucqp/cvmcore
6
+ Project-URL: Repository, https://github.com/hbucqp/cvmcore
7
+ Author-email: Qingpo Cui <cqp@cau.edu.cn>
8
+ License: MIT
9
+ Requires-Python: >=3.10
10
+ Requires-Dist: biopython>=1.87
11
+ Requires-Dist: numba>=0.65.1
12
+ Requires-Dist: numpy>=2.2.6
13
+ Requires-Dist: pandas>=2.3.3
14
+ Requires-Dist: scipy>=1.15.3
20
15
  Description-Content-Type: text/markdown
21
16
 
22
17
  # cvmcore
@@ -0,0 +1,34 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.20"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "cvmcore"
7
+ version = "0.3.0"
8
+ description = "Major plot function used by SZQ lab from China Agricultural University"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ authors = [
12
+ { name = "Qingpo Cui", email = "cqp@cau.edu.cn" },
13
+ ]
14
+ license = { text = "MIT" }
15
+ dependencies = [
16
+ "biopython>=1.87",
17
+ "numba>=0.65.1",
18
+ "numpy>=2.2.6",
19
+ "pandas>=2.3.3",
20
+ "scipy>=1.15.3",
21
+ ]
22
+
23
+ [project.urls]
24
+ Homepage = "https://github.com/hbucqp/cvmcore"
25
+ Repository = "https://github.com/hbucqp/cvmcore"
26
+
27
+ [tool.hatch.build.targets.wheel]
28
+ packages = ["src/cvmcore"]
29
+
30
+ [[tool.uv.index]]
31
+ url = "https://mirrors.aliyun.com/pypi/simple"
32
+ default = true
33
+
34
+
@@ -3,7 +3,7 @@
3
3
  __title__ = 'cvmplot'
4
4
  __description__ = 'Major plot function used by SZQ lab from China Agricultural University'
5
5
  __url__ = 'https://github.com/hbucqp/cvmplot'
6
- __version__ = "0.2.2"
6
+ __version__ = "0.3.0"
7
7
  __author__ = 'Qingpo Cui'
8
8
  __author_email__ = 'cqp@cau.edu.cn'
9
9
  __license__ = 'MIT'
@@ -1,5 +1,4 @@
1
1
  import os
2
- import sys
3
2
  import time
4
3
 
5
4
  # data process
@@ -7,8 +6,6 @@ import numpy as np
7
6
  import pandas as pd
8
7
  from scipy.cluster.hierarchy import linkage, dendrogram, complete, to_tree
9
8
  from scipy.spatial.distance import squareform
10
- from tabulate import tabulate
11
- from io import StringIO
12
9
  import warnings
13
10
  # from Bio.Blast import NCBIWWW
14
11
  with warnings.catch_warnings():
@@ -20,21 +17,44 @@ with warnings.catch_warnings():
20
17
  from Bio.Blast import NCBIXML
21
18
 
22
19
 
23
- # matplotlib
24
- import matplotlib
25
- import matplotlib as mpl
26
- from matplotlib import pyplot as plt
27
- from matplotlib.lines import Line2D
28
- from matplotlib.patches import Patch, FancyArrow
29
- from matplotlib.transforms import Affine2D
30
- from matplotlib.colors import LinearSegmentedColormap
31
- import matplotlib.collections as mpcollections
32
- from mpl_toolkits.axes_grid1.inset_locator import inset_axes
33
- from typing import Optional, List, Dict, Union, Tuple
20
+ # import numba for multithreads compute
21
+ from numba import njit, prange, set_num_threads
22
+
23
+
24
+
25
+
26
+ @njit(parallel=True)
27
+ def _diff_matrix_numba(array):
28
+ """
29
+ 计算 cgMLST 两两 allelic distance。
30
+
31
+ array:
32
+ n_samples x n_loci
33
+ dtype 建议 int32
34
+ 缺失值用 -1 表示
35
+
36
+ 规则:
37
+ 两个样本同一位点都非缺失,且等位基因不同,则差异 +1
38
+ 任一方缺失,则该位点跳过
39
+ """
40
+ n, m = array.shape
41
+ diff_matrix = np.zeros((n, n), dtype=np.uint16)
42
+
43
+ for i in prange(n):
44
+ row_i = array[i]
45
+ for j in range(i + 1, n):
46
+ row_j = array[j]
47
+ d = 0
48
+ for k in range(m):
49
+ a = row_i[k]
50
+ b = row_j[k]
51
+ if a != -1 and b != -1 and a != b:
52
+ d += 1
53
+ diff_matrix[i, j] = d
54
+ diff_matrix[j, i] = d
55
+
56
+ return diff_matrix
34
57
 
35
- plt.rcParams['font.family'] = 'sans-serif'
36
- plt.rcParams['font.sans-serif'] = ['Arial']
37
- plt.rcParams['svg.fonttype'] = 'none'
38
58
 
39
59
 
40
60
  class cfunc():
@@ -139,3 +159,78 @@ class cfunc():
139
159
  # Write the modified sequences to the new fasta file
140
160
  with open(output_file, "w") as output_handle:
141
161
  SeqIO.write(new_records, output_handle, "fasta")
162
+
163
+ @staticmethod
164
+ def get_diff_matrix(array, threads=None):
165
+ """
166
+ Optimized Function for Distance Matrix Calculation
167
+
168
+ input:
169
+ array: numpy array or pandas values
170
+ the input arraly could contains np.nan
171
+ threads: number of threads
172
+
173
+ out:
174
+ n x n pairwise diff matrix, dtype=uint16
175
+
176
+ note:
177
+ If the number of input array columns exceeds 65,535, change uint16 to uint32
178
+ """
179
+
180
+ # 1. 转成 numpy array
181
+ array = np.asarray(array)
182
+
183
+ # 2. 把 float/NaN 转成 int32/-1
184
+ # cgMLST allele 本质是整数,没必要用 float
185
+ if np.issubdtype(array.dtype, np.floating):
186
+ array = np.where(np.isnan(array), -1, array).astype(np.int32)
187
+ else:
188
+ array = array.astype(np.int32, copy=False)
189
+
190
+ # 3. 如果有线程参数,设置 numba 线程数
191
+ if threads is not None:
192
+ set_num_threads(int(threads))
193
+
194
+ # 4. 计算距离矩阵
195
+ return _diff_matrix_numba(array)
196
+
197
+
198
+ @staticmethod
199
+ def df2matrix(df):
200
+ """
201
+ convert np.nan to -1 in the input dataframe and return as numpy array
202
+ input:
203
+ pandas dataframe
204
+ output:
205
+ numpy array
206
+ """
207
+ numeric_df = df.apply(pd.to_numeric, errors="coerce")
208
+ matrix = numeric_df.to_numpy(dtype=np.float64, copy=False)
209
+ matrix = np.where(np.isnan(matrix), -1, matrix).astype(np.int32)
210
+ return matrix
211
+
212
+
213
+ @staticmethod
214
+ def get_diff_df(df, threads=None):
215
+ """
216
+ 优化版 DataFrame 输入函数。
217
+
218
+ input:
219
+ df: input dataframe, such as the cgMLST results
220
+
221
+ output:
222
+ n x n pairwise distance dataframe
223
+ """
224
+
225
+ # 比 df.astype('float').values 更稳
226
+ # 先强制转数值,非法值变成 NaN
227
+ numeric_df = df.apply(pd.to_numeric, errors="coerce")
228
+ matrix = numeric_df.to_numpy(dtype=np.float64, copy=False)
229
+ diff_matrix = cfunc.get_diff_matrix(matrix, threads=threads)
230
+
231
+ return pd.DataFrame(
232
+ diff_matrix,
233
+ index=df.index,
234
+ columns=df.index,
235
+ )
236
+