PyamilySeq 1.1.2__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- PyamilySeq/Seq_Combiner.py +2 -2
- PyamilySeq/Seq_Extractor.py +6 -1
- PyamilySeq/constants.py +1 -1
- PyamilySeq/utils.py +1 -3
- {pyamilyseq-1.1.2.dist-info → pyamilyseq-1.2.0.dist-info}/METADATA +6 -6
- {pyamilyseq-1.1.2.dist-info → pyamilyseq-1.2.0.dist-info}/RECORD +10 -10
- {pyamilyseq-1.1.2.dist-info → pyamilyseq-1.2.0.dist-info}/WHEEL +1 -1
- {pyamilyseq-1.1.2.dist-info → pyamilyseq-1.2.0.dist-info}/entry_points.txt +3 -0
- {pyamilyseq-1.1.2.dist-info → pyamilyseq-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {pyamilyseq-1.1.2.dist-info → pyamilyseq-1.2.0.dist-info}/top_level.txt +0 -0
PyamilySeq/Seq_Combiner.py
CHANGED
|
@@ -59,7 +59,7 @@ def main():
|
|
|
59
59
|
exit(1)
|
|
60
60
|
if options.input_type == 'fasta' and options.name_split_fasta is None:
|
|
61
61
|
print("Please provide a substring to split the filename and extract the genome name.")
|
|
62
|
-
exit
|
|
62
|
+
exit(1)
|
|
63
63
|
|
|
64
64
|
output_path = os.path.abspath(options.output_dir)
|
|
65
65
|
if not os.path.exists(output_path):
|
|
@@ -77,7 +77,7 @@ def main():
|
|
|
77
77
|
elif options.input_type == 'combined':
|
|
78
78
|
read_combined_files(options.input_dir, options.name_split_gff, options.gene_ident, combined_out_file, options.translate, True)
|
|
79
79
|
elif options.input_type == 'fasta':
|
|
80
|
-
read_fasta_files(options.input_dir, options.name_split_fasta, combined_out_file, options.translate)
|
|
80
|
+
read_fasta_files(options.input_dir, options.name_split_fasta, combined_out_file, options.translate, True)
|
|
81
81
|
|
|
82
82
|
if __name__ == "__main__":
|
|
83
83
|
main()
|
PyamilySeq/Seq_Extractor.py
CHANGED
|
@@ -9,8 +9,13 @@ def find_gene_ids_in_csv(csv_file, group_name):
|
|
|
9
9
|
cells = line.strip().split(',')
|
|
10
10
|
if cells[0].replace('"','') == group_name:
|
|
11
11
|
# Collect gene IDs from column 14 onward
|
|
12
|
+
# for cell in cells[14:]:
|
|
13
|
+
# gene_ids.extend(cell.strip().replace('"','').split()) # Splitting by spaces if there are multiple IDs in a cell break
|
|
12
14
|
for cell in cells[14:]:
|
|
13
|
-
|
|
15
|
+
for gene in cell.strip().replace('"', '').split(';'):
|
|
16
|
+
if gene:
|
|
17
|
+
gene_ids.append(gene)
|
|
18
|
+
|
|
14
19
|
return gene_ids
|
|
15
20
|
|
|
16
21
|
def extract_sequences(fasta_file, gene_ids):
|
PyamilySeq/constants.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
PyamilySeq_Version = 'v1.
|
|
1
|
+
PyamilySeq_Version = 'v1.2.0'
|
|
2
2
|
|
PyamilySeq/utils.py
CHANGED
|
@@ -7,7 +7,6 @@ from tempfile import NamedTemporaryFile
|
|
|
7
7
|
import sys
|
|
8
8
|
import re
|
|
9
9
|
import math
|
|
10
|
-
#from config import config_params
|
|
11
10
|
|
|
12
11
|
####
|
|
13
12
|
# Placeholder for the distance function
|
|
@@ -15,11 +14,10 @@ levenshtein_distance_cal = None
|
|
|
15
14
|
# Check for Levenshtein library once
|
|
16
15
|
try:
|
|
17
16
|
import Levenshtein as LV
|
|
18
|
-
# Assign the
|
|
17
|
+
# Assign the optimised function
|
|
19
18
|
def levenshtein_distance_calc(seq1, seq2):
|
|
20
19
|
return LV.distance(seq1, seq2)
|
|
21
20
|
except (ModuleNotFoundError, ImportError):
|
|
22
|
-
#if config_params.verbose == True: - Not implemented yet
|
|
23
21
|
print("Levenshtein package not installed - Will fallback to slower Python implementation.")
|
|
24
22
|
# Fallback implementation
|
|
25
23
|
def levenshtein_distance_calc(seq1, seq2):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PyamilySeq
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: PyamilySeq - A a tool to investigate sequence-based gene groups identified by clustering methods such as CD-HIT, DIAMOND, BLAST or MMseqs2.
|
|
5
5
|
Home-page: https://github.com/NickJD/PyamilySeq
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -46,7 +46,7 @@ To update to the newest version add '-U' to end of the pip install command.
|
|
|
46
46
|
```commandline
|
|
47
47
|
usage: PyamilySeq.py [-h] {Full,Partial} ...
|
|
48
48
|
|
|
49
|
-
PyamilySeq v1.
|
|
49
|
+
PyamilySeq v1.2.0: A tool for gene clustering and analysis.
|
|
50
50
|
|
|
51
51
|
positional arguments:
|
|
52
52
|
{Full,Partial} Choose a mode: 'Full' or 'Partial'.
|
|
@@ -76,7 +76,7 @@ Escherichia_coli_110957|ENSB_TIZS9kbTvShDvyX Escherichia_coli_110957|ENSB_TIZS9k
|
|
|
76
76
|
```
|
|
77
77
|
### Example output:
|
|
78
78
|
```
|
|
79
|
-
Running PyamilySeq v1.
|
|
79
|
+
Running PyamilySeq v1.2.0
|
|
80
80
|
Calculating Groups
|
|
81
81
|
Number of Genomes: 10
|
|
82
82
|
Gene Groups
|
|
@@ -221,7 +221,7 @@ Seq-Combiner -input_dir .../test_data/genomes -name_split_gff .gff3 -output_dir
|
|
|
221
221
|
```
|
|
222
222
|
usage: Seq_Combiner.py [-h] -input_dir INPUT_DIR -input_type {separate,combined,fasta} [-name_split_gff NAME_SPLIT_GFF] [-name_split_fasta NAME_SPLIT_FASTA] -output_dir OUTPUT_DIR -output_name OUTPUT_FILE [-gene_ident GENE_IDENT] [-translate] [-v]
|
|
223
223
|
|
|
224
|
-
PyamilySeq v1.
|
|
224
|
+
PyamilySeq v1.2.0: Seq-Combiner - A tool to extract sequences from GFF/FASTA files and prepare them for PyamilySeq.
|
|
225
225
|
|
|
226
226
|
options:
|
|
227
227
|
-h, --help show this help message and exit
|
|
@@ -264,7 +264,7 @@ usage: Group_Splitter.py [-h] -input_fasta INPUT_FASTA -sequence_type {AA,DNA}
|
|
|
264
264
|
[-M CLUSTERING_MEMORY] [-no_delete_temp_files]
|
|
265
265
|
[-verbose] [-v]
|
|
266
266
|
|
|
267
|
-
PyamilySeq v1.
|
|
267
|
+
PyamilySeq v1.2.0: Group-Splitter - A tool to split multi-copy gene groups
|
|
268
268
|
identified by PyamilySeq.
|
|
269
269
|
|
|
270
270
|
options:
|
|
@@ -317,7 +317,7 @@ Cluster-Summary -genome_num 10 -input_clstr .../test_data/species/E-coli/E-coli_
|
|
|
317
317
|
usage: Cluster_Summary.py [-h] -input_clstr INPUT_CLSTR -output OUTPUT -genome_num GENOME_NUM
|
|
318
318
|
[-output_dir OUTPUT_DIR] [-verbose] [-v]
|
|
319
319
|
|
|
320
|
-
PyamilySeq v1.
|
|
320
|
+
PyamilySeq v1.2.0: Cluster-Summary - A tool to summarise CD-HIT clustering files.
|
|
321
321
|
|
|
322
322
|
options:
|
|
323
323
|
-h, --help show this help message and exit
|
|
@@ -6,17 +6,17 @@ PyamilySeq/Group_Splitter.py,sha256=OcMj9GnAyybs_DaNKRyvfL_nl2dB2gUI4BD_EQrBbWo,
|
|
|
6
6
|
PyamilySeq/PyamilySeq.py,sha256=tdmIDB2ZYCRfMFQSuWrN0Psr5ggSaoUcT2wEv54jWos,17462
|
|
7
7
|
PyamilySeq/PyamilySeq_Genus.py,sha256=KUC0QkCRpKQ9HEgxyTSD7Nc63wSXtriWyIqt_YOy5ys,12470
|
|
8
8
|
PyamilySeq/PyamilySeq_Species.py,sha256=gJy8Pn82Za44l6y9tg7bWJri2k_0OwZiplANIEH2o-c,16289
|
|
9
|
-
PyamilySeq/Seq_Combiner.py,sha256=
|
|
10
|
-
PyamilySeq/Seq_Extractor.py,sha256=
|
|
9
|
+
PyamilySeq/Seq_Combiner.py,sha256=G49zthWtsTfqYX1tqc7op9a9cSia1IL0VTiAtwgdTwc,4746
|
|
10
|
+
PyamilySeq/Seq_Extractor.py,sha256=IQk4Qn6LJkPXD1O4TQesneS3_ZN8hBsTVZQGlZ1c-Dk,3072
|
|
11
11
|
PyamilySeq/Seq_Finder.py,sha256=ht-fSQ_opWKydcoWI9D3nTwLt6Rpgevnf2y0KxVjw4M,1881
|
|
12
12
|
PyamilySeq/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
PyamilySeq/clusterings.py,sha256=9t9Q7IYb9x9gXxcv_FxsWqgdMQ-MYa-5OpkBzpgbrXc,22291
|
|
14
14
|
PyamilySeq/config.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
PyamilySeq/constants.py,sha256=
|
|
16
|
-
PyamilySeq/utils.py,sha256=
|
|
17
|
-
pyamilyseq-1.
|
|
18
|
-
pyamilyseq-1.
|
|
19
|
-
pyamilyseq-1.
|
|
20
|
-
pyamilyseq-1.
|
|
21
|
-
pyamilyseq-1.
|
|
22
|
-
pyamilyseq-1.
|
|
15
|
+
PyamilySeq/constants.py,sha256=mmBQfTz9VT8Cb9wOd6Rg1k20kuWT6jdDqpKrTRZWuNI,31
|
|
16
|
+
PyamilySeq/utils.py,sha256=1U794Xd5qzmaIz2VujdnPkND729kr7rKjei0Y57f-QE,32972
|
|
17
|
+
pyamilyseq-1.2.0.dist-info/licenses/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
|
|
18
|
+
pyamilyseq-1.2.0.dist-info/METADATA,sha256=B65K76ds4UaNEE8ZKTBq2valwsaslAc0yICg37l6tM4,17979
|
|
19
|
+
pyamilyseq-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
20
|
+
pyamilyseq-1.2.0.dist-info/entry_points.txt,sha256=5RkUWIneXu-kCnClJhv0u27lWHAoyoVmospZMU5Cs2U,846
|
|
21
|
+
pyamilyseq-1.2.0.dist-info/top_level.txt,sha256=J6JhugUQTq4rq96yibAlQu3o4KCM9WuYfqr3w1r119M,11
|
|
22
|
+
pyamilyseq-1.2.0.dist-info/RECORD,,
|
|
@@ -8,6 +8,9 @@ Seq-Extractor = PyamilySeq.Seq_Extractor:main
|
|
|
8
8
|
Seq-Finder = PyamilySeq.Seq_Finder:main
|
|
9
9
|
cluster-extractor = PyamilySeq.Cluster_Extractor:main
|
|
10
10
|
cluster-summary = PyamilySeq.Cluster_Summary:main
|
|
11
|
+
compare-contree-singletrees = aux_tools.RF.compare_contree_singletrees:main
|
|
12
|
+
compare-rf = aux_tools.RF.compare_RF:main
|
|
13
|
+
compute-singletrees-rf = aux_tools.RF.Compute_SingleTree_RFs:main
|
|
11
14
|
group-splitter = PyamilySeq.Group_Splitter:main
|
|
12
15
|
pyamilyseq = PyamilySeq.PyamilySeq:main
|
|
13
16
|
seq-combiner = PyamilySeq.Seq_Combiner:main
|
|
File without changes
|
|
File without changes
|