cvmcore 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cvmcore-0.2.0 → cvmcore-0.2.2}/PKG-INFO +2 -2
- cvmcore-0.2.2/cvmcore/__init__.py +10 -0
- cvmcore-0.2.2/cvmcore/cvmcore.py +141 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/cvmcore.egg-info/PKG-INFO +2 -2
- {cvmcore-0.2.0 → cvmcore-0.2.2}/cvmcore.egg-info/SOURCES.txt +0 -4
- {cvmcore-0.2.0 → cvmcore-0.2.2}/cvmcore.egg-info/requires.txt +0 -1
- {cvmcore-0.2.0 → cvmcore-0.2.2}/setup.py +7 -1
- cvmcore-0.2.0/cvmcore/__init__.py +0 -10
- cvmcore-0.2.0/cvmcore/cvmcore.py +0 -1455
- cvmcore-0.2.0/cvmcore/inputdir/CPF_RS14355.fasta +0 -260
- cvmcore-0.2.0/cvmcore/inputdir/CPF_RS14360.fasta +0 -449
- cvmcore-0.2.0/cvmcore/inputdir/CPF_RS14365.fasta +0 -491
- cvmcore-0.2.0/cvmcore/test.py +0 -4
- {cvmcore-0.2.0 → cvmcore-0.2.2}/README.md +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/cvmcore.egg-info/dependency_links.txt +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/cvmcore.egg-info/top_level.txt +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/demo.ipynb +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/gene_feature.csv +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/circular_dendrogram.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/circular_dendrogram_color_label.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/circular_dendrogram_openangle.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/circular_dendrogram_startangle.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/circular_dendrogram_tippoints.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/dendrogram.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/dendrogram_heatmap_cmap.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/dendrogram_heatmap_minimumvalue.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/dendrogram_with_heatmap.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/gene_arrow_all.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/gene_arrow_bottom.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/gene_arrow_top.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/multiple_heatmap.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/phylogenetic_tree.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/phylotree_heatmap_withgenes.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/phylotree_multiple_heatmap.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/phylotree_with_heatmap-remove_tiplabel.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/demodata/screenshots/phylotree_with_heatmap.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/requirements.txt +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_11_1.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_13_1.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_15_1.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_20_0.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_23_0.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_25_0.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_27_0.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_29_0.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_35_0.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_39_1.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_41_1.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/screenshots/output_7_0.png +0 -0
- {cvmcore-0.2.0 → cvmcore-0.2.2}/setup.cfg +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cvmcore
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: SZQ lab data analysis core function
|
|
5
|
-
Home-page: https://github.com/hbucqp/
|
|
5
|
+
Home-page: https://github.com/hbucqp/cvmplot
|
|
6
6
|
Author: Qingpo Cui
|
|
7
7
|
Author-email: cqp@cau.edu.cn
|
|
8
8
|
License: MIT Licence
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# -*- coding:utf-8 -*-
|
|
2
|
+
|
|
3
|
+
__title__ = 'cvmplot'
|
|
4
|
+
__description__ = 'Major plot function used by SZQ lab from China Agricultural University'
|
|
5
|
+
__url__ = 'https://github.com/hbucqp/cvmplot'
|
|
6
|
+
__version__ = "0.2.2"
|
|
7
|
+
__author__ = 'Qingpo Cui'
|
|
8
|
+
__author_email__ = 'cqp@cau.edu.cn'
|
|
9
|
+
__license__ = 'MIT'
|
|
10
|
+
__copyright__ = 'Copyright 2023 Qingpo Cui'
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import time
|
|
4
|
+
|
|
5
|
+
# data process
|
|
6
|
+
import numpy as np
|
|
7
|
+
import pandas as pd
|
|
8
|
+
from scipy.cluster.hierarchy import linkage, dendrogram, complete, to_tree
|
|
9
|
+
from scipy.spatial.distance import squareform
|
|
10
|
+
from tabulate import tabulate
|
|
11
|
+
from io import StringIO
|
|
12
|
+
import warnings
|
|
13
|
+
# from Bio.Blast import NCBIWWW
|
|
14
|
+
with warnings.catch_warnings():
|
|
15
|
+
warnings.simplefilter('ignore', category=DeprecationWarning)
|
|
16
|
+
from Bio import SeqIO
|
|
17
|
+
from Bio import Phylo
|
|
18
|
+
from Bio.Seq import Seq
|
|
19
|
+
from Bio.SeqRecord import SeqRecord
|
|
20
|
+
from Bio.Blast import NCBIXML
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# matplotlib
|
|
24
|
+
import matplotlib
|
|
25
|
+
import matplotlib as mpl
|
|
26
|
+
from matplotlib import pyplot as plt
|
|
27
|
+
from matplotlib.lines import Line2D
|
|
28
|
+
from matplotlib.patches import Patch, FancyArrow
|
|
29
|
+
from matplotlib.transforms import Affine2D
|
|
30
|
+
from matplotlib.colors import LinearSegmentedColormap
|
|
31
|
+
import matplotlib.collections as mpcollections
|
|
32
|
+
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
|
|
33
|
+
from typing import Optional, List, Dict, Union, Tuple
|
|
34
|
+
|
|
35
|
+
plt.rcParams['font.family'] = 'sans-serif'
|
|
36
|
+
plt.rcParams['font.sans-serif'] = ['Arial']
|
|
37
|
+
plt.rcParams['svg.fonttype'] = 'none'
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class cfunc():
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def is_fasta(file):
|
|
45
|
+
"""
|
|
46
|
+
chcek if the input file is fasta format
|
|
47
|
+
"""
|
|
48
|
+
try:
|
|
49
|
+
with open(file, "r") as handle:
|
|
50
|
+
fasta = SeqIO.parse(handle, "fasta")
|
|
51
|
+
# False when `fasta` is empty, i.e. wasn't a FASTA file
|
|
52
|
+
return any(fasta)
|
|
53
|
+
except:
|
|
54
|
+
print(f'The input file {file} is not a valid fasta file.')
|
|
55
|
+
return False
|
|
56
|
+
|
|
57
|
+
def get_mod_time(file):
|
|
58
|
+
"""
|
|
59
|
+
Return the last modified time of file as YYYY-MM-DD string format.
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
file :
|
|
63
|
+
file path string
|
|
64
|
+
Returns
|
|
65
|
+
----------
|
|
66
|
+
Raises
|
|
67
|
+
----------
|
|
68
|
+
Notes
|
|
69
|
+
----------
|
|
70
|
+
References
|
|
71
|
+
----------
|
|
72
|
+
See Also
|
|
73
|
+
----------
|
|
74
|
+
Examples
|
|
75
|
+
----------
|
|
76
|
+
"""
|
|
77
|
+
file = os.path.abspath(file)
|
|
78
|
+
md_time = os.stat(file).st_mtime
|
|
79
|
+
lst_mod_time = time.strftime("%Y-%m-%d", time.localtime(md_time))
|
|
80
|
+
return lst_mod_time
|
|
81
|
+
|
|
82
|
+
def check_sequence_type(file_path):
|
|
83
|
+
"""
|
|
84
|
+
Check the input file type (DNA or Amino Acid)
|
|
85
|
+
"""
|
|
86
|
+
try:
|
|
87
|
+
# Read the sequence from the file
|
|
88
|
+
records = list(SeqIO.parse(file_path, "fasta"))
|
|
89
|
+
if not records:
|
|
90
|
+
return "Unknown"
|
|
91
|
+
|
|
92
|
+
sequence = str(records[0].seq).upper()
|
|
93
|
+
|
|
94
|
+
# Define sets of characters for DNA and amino acids
|
|
95
|
+
dna_chars = set("ATCG")
|
|
96
|
+
amino_acid_chars = set("ACDEFGHIKLMNPQRSTVWY")
|
|
97
|
+
|
|
98
|
+
# Check if the sequence contains only DNA characters
|
|
99
|
+
if set(sequence).issubset(dna_chars):
|
|
100
|
+
return "DNA"
|
|
101
|
+
# Check if the sequence contains only amino acid characters
|
|
102
|
+
elif set(sequence).issubset(amino_acid_chars):
|
|
103
|
+
return "Amino Acid"
|
|
104
|
+
else:
|
|
105
|
+
return "Unknown"
|
|
106
|
+
except Exception as e:
|
|
107
|
+
return f"Error: {e}"
|
|
108
|
+
|
|
109
|
+
@staticmethod
|
|
110
|
+
def alleles2ref(files_dir: str, outpath: str, outname: str):
|
|
111
|
+
"""
|
|
112
|
+
Create cgMLST reference sequences using fasta files downloaded from "https://www.cgmlst.org/"
|
|
113
|
+
"""
|
|
114
|
+
files_dir = os.path.abspath(files_dir)
|
|
115
|
+
new_records = []
|
|
116
|
+
for file in os.listdir(files_dir):
|
|
117
|
+
# print(file)
|
|
118
|
+
if file.endswith('.fasta'):
|
|
119
|
+
file_base = file.split('.')[0]
|
|
120
|
+
# print(file_base)
|
|
121
|
+
file = os.path.join(files_dir, file)
|
|
122
|
+
records = SeqIO.parse(file, 'fasta')
|
|
123
|
+
for record in records:
|
|
124
|
+
record.id = file_base + "_" + record.id
|
|
125
|
+
record.name = file_base + "_" + record.name
|
|
126
|
+
record.description = ''
|
|
127
|
+
# print(record.id)
|
|
128
|
+
# print(record)
|
|
129
|
+
new_records.append(record)
|
|
130
|
+
|
|
131
|
+
# check if outpath exists
|
|
132
|
+
outdir = os.path.abspath(outpath)
|
|
133
|
+
if not os.path.exists(outdir):
|
|
134
|
+
os.makedirs(outdir, exist_ok=True)
|
|
135
|
+
|
|
136
|
+
# Specify the output file nameå
|
|
137
|
+
output_file = f'{outname}.fsa'
|
|
138
|
+
output_file = os.path.join(outdir, output_file)
|
|
139
|
+
# Write the modified sequences to the new fasta file
|
|
140
|
+
with open(output_file, "w") as output_handle:
|
|
141
|
+
SeqIO.write(new_records, output_handle, "fasta")
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: cvmcore
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: SZQ lab data analysis core function
|
|
5
|
-
Home-page: https://github.com/hbucqp/
|
|
5
|
+
Home-page: https://github.com/hbucqp/cvmplot
|
|
6
6
|
Author: Qingpo Cui
|
|
7
7
|
Author-email: cqp@cau.edu.cn
|
|
8
8
|
License: MIT Licence
|
|
@@ -3,15 +3,11 @@ requirements.txt
|
|
|
3
3
|
setup.py
|
|
4
4
|
cvmcore/__init__.py
|
|
5
5
|
cvmcore/cvmcore.py
|
|
6
|
-
cvmcore/test.py
|
|
7
6
|
cvmcore.egg-info/PKG-INFO
|
|
8
7
|
cvmcore.egg-info/SOURCES.txt
|
|
9
8
|
cvmcore.egg-info/dependency_links.txt
|
|
10
9
|
cvmcore.egg-info/requires.txt
|
|
11
10
|
cvmcore.egg-info/top_level.txt
|
|
12
|
-
cvmcore/inputdir/CPF_RS14355.fasta
|
|
13
|
-
cvmcore/inputdir/CPF_RS14360.fasta
|
|
14
|
-
cvmcore/inputdir/CPF_RS14365.fasta
|
|
15
11
|
demodata/demo.ipynb
|
|
16
12
|
demodata/gene_feature.csv
|
|
17
13
|
demodata/screenshots/circular_dendrogram.png
|
|
@@ -18,7 +18,13 @@ except ImportError:
|
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
requirements = [
|
|
21
|
-
'Bio',
|
|
21
|
+
'Bio',
|
|
22
|
+
'pandas',
|
|
23
|
+
'setuptools',
|
|
24
|
+
'matplotlib',
|
|
25
|
+
'numpy',
|
|
26
|
+
'scipy',
|
|
27
|
+
'tabulate'
|
|
22
28
|
]
|
|
23
29
|
|
|
24
30
|
about = {}
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
# -*- coding:utf-8 -*-
|
|
2
|
-
|
|
3
|
-
__title__ = 'cvmbcore'
|
|
4
|
-
__description__ = 'core function of data analysis used by SZQ lab from China Agricultural University'
|
|
5
|
-
__url__ = 'https://github.com/hbucqp/cvmcore'
|
|
6
|
-
__version__ = "0.2.0"
|
|
7
|
-
__author__ = 'Qingpo Cui'
|
|
8
|
-
__author_email__ = 'cqp@cau.edu.cn'
|
|
9
|
-
__license__ = 'MIT'
|
|
10
|
-
__copyright__ = 'Copyright 2023 Qingpo Cui'
|