plasmidhub 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of plasmidhub might be problematic. Click here for more details.

plasmidhub/plot.py ADDED
@@ -0,0 +1,169 @@
1
+ import json
2
+ import networkx as nx
3
+ import matplotlib.pyplot as plt
4
+ import matplotlib.patches as mpatches
5
+ import os
6
+ import logging
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def load_network_from_json(json_file):
11
+ """Load a network graph from a JSON file."""
12
+ with open(json_file, 'r') as f:
13
+ data = json.load(f)
14
+ return nx.cytoscape_graph(data)
15
+
16
+ def visualize_network_basic(G, k, output_path_no_ext):
17
+ """Visualize the network with basic grey nodes and edges"""
18
+
19
+ min_weight = 5
20
+ max_weight = 100
21
+ min_width = 0.2 # reduced thickness
22
+ max_width = 2.0 # reduced thickness
23
+
24
+ edge_weights = nx.get_edge_attributes(G, 'weight')
25
+ for edge in G.edges():
26
+ if edge not in edge_weights:
27
+ edge_weights[edge] = min_weight
28
+
29
+ edge_widths = []
30
+ for edge in G.edges():
31
+ weight = edge_weights.get(edge, min_weight)
32
+ weight_clipped = max(min_weight, min(weight, max_weight))
33
+ scaled_width = min_width + (weight_clipped - min_weight) / (max_weight - min_weight) * (max_width - min_width)
34
+ edge_widths.append(scaled_width)
35
+
36
+ pos = nx.spring_layout(G, k=k, seed=69420, iterations=100)
37
+
38
+ # With labels
39
+ fig, ax = plt.subplots(figsize=(25, 25))
40
+ nx.draw_networkx_nodes(G, pos, node_color='grey', node_size=900, node_shape='o',
41
+ linewidths=1, edgecolors='black', alpha=0.65, ax=ax)
42
+ nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color='grey', alpha=0.7, ax=ax)
43
+ nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold', ax=ax)
44
+ plt.axis('off')
45
+ plt.tight_layout()
46
+ plt.savefig(output_path_no_ext + ".pdf", format="pdf")
47
+ plt.savefig(output_path_no_ext + ".svg", format="svg")
48
+ plt.close()
49
+
50
+ # Without labels
51
+ fig, ax = plt.subplots(figsize=(25, 25))
52
+ nx.draw_networkx_nodes(G, pos, node_color='grey', node_size=900, node_shape='o',
53
+ linewidths=1, edgecolors='black', alpha=0.65, ax=ax)
54
+ nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color='grey', alpha=0.7, ax=ax)
55
+ plt.axis('off')
56
+ plt.tight_layout()
57
+ plt.savefig(output_path_no_ext + "_nolabels.pdf", format="pdf")
58
+ plt.savefig(output_path_no_ext + "_nolabels.svg", format="svg")
59
+ plt.close()
60
+
61
+ def visualize_network_colored_by_cluster(G, k, output_path_no_ext, cluster_mapping_file, cluster_color_file):
62
+ """Visualize the network with node colors based on cluster assignment"""
63
+
64
+ # Check if cluster_list.txt contains only the header
65
+ cluster_list_file = os.path.join(os.path.dirname(cluster_mapping_file), "cluster_list.txt")
66
+ if os.path.exists(cluster_list_file):
67
+ with open(cluster_list_file) as f:
68
+ lines = f.readlines()
69
+ if len(lines) <= 1:
70
+ logger.warning("No clusters detected with the given parameters!")
71
+
72
+ # Load plasmid-cluster mappings
73
+ cluster_map = {}
74
+ with open(cluster_mapping_file) as f:
75
+ for line in f:
76
+ parts = line.strip().split('\t')
77
+ if len(parts) == 2:
78
+ plasmid, cluster = parts
79
+ cluster_map[plasmid] = cluster
80
+
81
+ # Load cluster-color mappings
82
+ color_map = {}
83
+ with open(cluster_color_file) as f:
84
+ for line in f:
85
+ parts = line.strip().split('\t')
86
+ if len(parts) == 2:
87
+ cluster, color = parts
88
+ color_map[cluster] = color
89
+
90
+ # Assign colors to nodes
91
+ node_colors = []
92
+ for node in G.nodes():
93
+ cluster = cluster_map.get(node)
94
+ color = color_map.get(cluster, "#cccccc") # default light grey for unclustered
95
+ node_colors.append(color)
96
+
97
+ # Edge weight scaling
98
+ min_weight = 5
99
+ max_weight = 100
100
+ min_width = 0.2
101
+ max_width = 2.0
102
+ edge_weights = nx.get_edge_attributes(G, 'weight')
103
+ edge_widths = []
104
+ for edge in G.edges():
105
+ weight = edge_weights.get(edge, min_weight)
106
+ weight_clipped = max(min_weight, min(weight, max_weight))
107
+ scaled_width = min_width + (weight_clipped - min_weight) / (max_weight - min_weight) * (max_width - min_width)
108
+ edge_widths.append(scaled_width)
109
+
110
+ pos = nx.spring_layout(G, k=k, seed=69420, iterations=100)
111
+
112
+ # --- Create legend handles ---
113
+ legend_handles = [
114
+ mpatches.Patch(color=color, label=cluster)
115
+ for cluster, color in sorted(color_map.items())
116
+ ]
117
+
118
+ # With labels
119
+ fig, ax = plt.subplots(figsize=(25, 25))
120
+ nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=900, node_shape='o',
121
+ linewidths=1, edgecolors='black', alpha=0.9, ax=ax)
122
+ nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color='grey', alpha=0.7, ax=ax)
123
+ nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold', ax=ax)
124
+ plt.axis('off')
125
+
126
+ # Add legend
127
+ legend = ax.legend(handles=legend_handles, loc='upper right', fontsize=16, title="Clusters", title_fontsize=18,
128
+ bbox_to_anchor=(1.02, 1.0))
129
+ plt.tight_layout()
130
+ plt.savefig(output_path_no_ext + "_cluster_colored.pdf", format="pdf")
131
+ plt.savefig(output_path_no_ext + "_cluster_colored.svg", format="svg")
132
+ plt.close()
133
+
134
+ # Without labels
135
+ fig, ax = plt.subplots(figsize=(25, 25))
136
+ nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=900, node_shape='o',
137
+ linewidths=1, edgecolors='black', alpha=0.9, ax=ax)
138
+ nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color='grey', alpha=0.7, ax=ax)
139
+ plt.axis('off')
140
+
141
+ # Add legend
142
+ legend = ax.legend(handles=legend_handles, loc='upper right', fontsize=16, title="Clusters", title_fontsize=18,
143
+ bbox_to_anchor=(1.02, 1.0))
144
+ plt.tight_layout()
145
+ plt.savefig(output_path_no_ext + "_cluster_colored_nolabels.pdf", format="pdf")
146
+ plt.savefig(output_path_no_ext + "_cluster_colored_nolabels.svg", format="svg")
147
+ plt.close()
148
+
149
+
150
+ def run_visualizations(results_dir, k_min, k_max):
151
+ """Main function to load graph and generate visualizations over k range."""
152
+ json_file = os.path.join(results_dir, "network.json")
153
+ G = load_network_from_json(json_file)
154
+
155
+ # Add cluster-colored visualizations if cluster color file exists
156
+ cluster_mapping_file = os.path.join(results_dir, "plasmid_cluster_mapping.txt")
157
+ cluster_color_file = os.path.join(results_dir, "cluster_colours.txt")
158
+ if os.path.exists(cluster_mapping_file) and os.path.exists(cluster_color_file):
159
+ logger.info("Generating cluster-colored visualizations...")
160
+ for k in range(k_min, k_max):
161
+ filename_base = os.path.join(results_dir, f"network_k_{k}")
162
+ visualize_network_colored_by_cluster(G, k, filename_base, cluster_mapping_file, cluster_color_file)
163
+ else:
164
+ logger.info("Cluster mapping or color file not found. Skipping cluster-colored plots.")
165
+
166
+
167
+ for k in range(k_min, k_max):
168
+ filename_base = os.path.join(results_dir, f"network_k_{k}")
169
+ visualize_network_basic(G, k, filename_base)
@@ -0,0 +1,153 @@
1
+ import argparse
2
+ import os
3
+ import json
4
+ import numpy as np
5
+ import networkx as nx
6
+ import matplotlib.pyplot as plt
7
+ import matplotlib.patches as mpatches
8
+
9
+ def parse_args():
10
+ parser = argparse.ArgumentParser(description="Plot-only mode in plasmidnet for custom plots")
11
+ parser.add_argument("--results_dir", required=True, help="Path to directory")
12
+ parser.add_argument("--plot_k", nargs=2, type=float, metavar=('MIN_K', 'MAX_K'),
13
+ required=True, help="Specify minimum and maximum k values for plotting")
14
+ parser.add_argument("--min_edge_width", type=float, default=0.2)
15
+ parser.add_argument("--max_edge_width", type=float, default=2.0)
16
+ parser.add_argument("--node_size", type=int, default=900)
17
+ parser.add_argument("--node_color", type=str, default="#cccccc")
18
+ parser.add_argument("--node_shape", type=str, default="o")
19
+ parser.add_argument("--figsize", nargs=2, type=float, default=[25, 25])
20
+ parser.add_argument("--iterations", type=int, default=100)
21
+ return parser.parse_args()
22
+
23
+ def load_network_from_json(json_file):
24
+ with open(json_file, 'r') as f:
25
+ data = json.load(f)
26
+ return nx.cytoscape_graph(data)
27
+
28
+ def scale_edge_weights(G, min_width, max_width):
29
+ edge_weights = nx.get_edge_attributes(G, 'weight')
30
+ min_weight = 5
31
+ max_weight = 100
32
+ scaled = []
33
+ for edge in G.edges():
34
+ weight = edge_weights.get(edge, min_weight)
35
+ clipped = max(min_weight, min(weight, max_weight))
36
+ scaled_width = min_width + ((clipped - min_weight) / (max_weight - min_weight)) * (max_width - min_width)
37
+ scaled.append(scaled_width)
38
+ return scaled
39
+
40
+ def visualize_network(G, k, output_path, args):
41
+ pos = nx.spring_layout(G, k=k, seed=69420, iterations=args.iterations)
42
+ edge_widths = scale_edge_weights(G, args.min_edge_width, args.max_edge_width)
43
+
44
+ # With labels
45
+ fig, ax = plt.subplots(figsize=tuple(args.figsize))
46
+ nx.draw_networkx_nodes(G, pos, node_color=args.node_color, node_size=args.node_size,
47
+ node_shape=args.node_shape, linewidths=1, edgecolors='black', alpha=0.85, ax=ax)
48
+ nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color='grey', alpha=0.7, ax=ax)
49
+ nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold', ax=ax)
50
+ plt.axis('off')
51
+ plt.tight_layout()
52
+ plt.savefig(output_path + ".pdf")
53
+ plt.close()
54
+
55
+ # Without labels
56
+ fig, ax = plt.subplots(figsize=tuple(args.figsize))
57
+ nx.draw_networkx_nodes(G, pos, node_color=args.node_color, node_size=args.node_size,
58
+ node_shape=args.node_shape, linewidths=1, edgecolors='black', alpha=0.85, ax=ax)
59
+ nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color='grey', alpha=0.7, ax=ax)
60
+ plt.axis('off')
61
+ plt.tight_layout()
62
+ plt.savefig(output_path + "_nolabels.pdf")
63
+ plt.close()
64
+
65
+ def visualize_network_colored_by_cluster(G, k, output_path_no_ext, cluster_mapping_file, cluster_color_file, args):
66
+ cluster_map = {}
67
+ with open(cluster_mapping_file) as f:
68
+ for line in f:
69
+ parts = line.strip().split('\t')
70
+ if len(parts) == 2:
71
+ plasmid, cluster = parts
72
+ cluster_map[plasmid] = cluster
73
+
74
+ color_map = {}
75
+ with open(cluster_color_file) as f:
76
+ for line in f:
77
+ parts = line.strip().split('\t')
78
+ if len(parts) == 2:
79
+ cluster, color = parts
80
+ color_map[cluster] = color
81
+
82
+ node_colors = []
83
+ for node in G.nodes():
84
+ cluster = cluster_map.get(node)
85
+ color = color_map.get(cluster, args.node_color)
86
+ node_colors.append(color)
87
+
88
+ edge_widths = scale_edge_weights(G, args.min_edge_width, args.max_edge_width)
89
+ pos = nx.spring_layout(G, k=k, seed=69420, iterations=args.iterations)
90
+
91
+ legend_handles = [
92
+ mpatches.Patch(color=color, label=cluster)
93
+ for cluster, color in sorted(color_map.items())
94
+ ]
95
+
96
+ # With labels
97
+ fig, ax = plt.subplots(figsize=tuple(args.figsize))
98
+ nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=args.node_size, node_shape=args.node_shape,
99
+ linewidths=1, edgecolors='black', alpha=0.9, ax=ax)
100
+ nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color='grey', alpha=0.7, ax=ax)
101
+ nx.draw_networkx_labels(G, pos, font_size=10, font_weight='bold', ax=ax)
102
+ ax.legend(handles=legend_handles, loc='upper right', fontsize=16, title="Clusters", title_fontsize=18,
103
+ bbox_to_anchor=(1.02, 1.0))
104
+ plt.axis('off')
105
+ plt.tight_layout()
106
+ plt.savefig(output_path_no_ext + "_cluster_colored.pdf")
107
+ plt.savefig(output_path_no_ext + "_cluster_colored.svg")
108
+ plt.close()
109
+
110
+ # Without labels
111
+ fig, ax = plt.subplots(figsize=tuple(args.figsize))
112
+ nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=args.node_size, node_shape=args.node_shape,
113
+ linewidths=1, edgecolors='black', alpha=0.9, ax=ax)
114
+ nx.draw_networkx_edges(G, pos, width=edge_widths, edge_color='grey', alpha=0.7, ax=ax)
115
+ ax.legend(handles=legend_handles, loc='upper right', fontsize=16, title="Clusters", title_fontsize=18,
116
+ bbox_to_anchor=(1.02, 1.0))
117
+ plt.axis('off')
118
+ plt.tight_layout()
119
+ plt.savefig(output_path_no_ext + "_cluster_colored_nolabels.pdf")
120
+ plt.savefig(output_path_no_ext + "_cluster_colored_nolabels.svg")
121
+ plt.close()
122
+
123
+ def run():
124
+ args = parse_args()
125
+
126
+ print("Generating network visualizations...")
127
+
128
+ json_path = os.path.join(args.results_dir, "network.json")
129
+ if not os.path.exists(json_path):
130
+ raise FileNotFoundError(f"network.json not found in {args.results_dir}")
131
+
132
+ G = load_network_from_json(json_path)
133
+
134
+ plots_dir = os.path.join(args.results_dir, "plots")
135
+ os.makedirs(plots_dir, exist_ok=True)
136
+
137
+ cluster_mapping = os.path.join(args.results_dir, "plasmid_cluster_mapping.txt")
138
+ cluster_colors = os.path.join(args.results_dir, "cluster_colours.txt")
139
+ colored = os.path.exists(cluster_mapping) and os.path.exists(cluster_colors)
140
+
141
+ k_min, k_max = args.plot_k
142
+
143
+ for k in np.arange(k_min, k_max + 0.1, 1.0):
144
+ output_base = os.path.join(plots_dir, f"custom_plot_k_{k}")
145
+ visualize_network(G, k, output_base, args)
146
+
147
+ if colored:
148
+ visualize_network_colored_by_cluster(G, k, output_base, cluster_mapping, cluster_colors, args)
149
+
150
+ print(f"Plots saved to: {plots_dir}")
151
+
152
+ if __name__ == "__main__":
153
+ run()
@@ -0,0 +1,48 @@
1
+ import os
2
+ from Bio import SeqIO
3
+ import logging
4
+ logger = logging.getLogger(__name__)
5
+
6
+ def validate_and_list_plasmids(input_dir):
7
+ valid_extensions = ['.fna', '.fa', '.fasta']
8
+ plasmid_files = []
9
+ invalid_files = []
10
+
11
+ for fname in os.listdir(input_dir):
12
+ if not any(fname.lower().endswith(ext) for ext in valid_extensions):
13
+ continue
14
+ fpath = os.path.join(input_dir, fname)
15
+ try:
16
+ with open(fpath, 'r') as handle:
17
+ records = list(SeqIO.parse(handle, 'fasta'))
18
+ if len(records) == 0:
19
+ invalid_files.append(fname)
20
+ else:
21
+ plasmid_files.append(os.path.abspath(fpath))
22
+ except Exception:
23
+ invalid_files.append(fname)
24
+
25
+ if invalid_files:
26
+ logger.warning("Warning: The following files are not valid FASTA files or unreadable:")
27
+ for f in invalid_files:
28
+ logger.warning(f" - {f}")
29
+
30
+ # Sort by filename - if you sort by name, it affect the layout of the plot (just the visualization, not the network itrself)!
31
+ # plasmid_files.sort(key=lambda x: os.path.basename(x).lower())
32
+
33
+ return plasmid_files
34
+
35
+ def write_plasmid_list(plasmid_files, output_file="Plasmid_list.txt"):
36
+ with open(output_file, 'w') as f:
37
+ for path in plasmid_files:
38
+ f.write(path + '\n')
39
+
40
+ def write_plasmid_sizes(plasmid_files, output_file="Plasmid_sizes.txt"):
41
+ with open(output_file, 'w') as f:
42
+ f.write("PlasmidID\tSize\n")
43
+ for path in plasmid_files:
44
+ total_len = 0
45
+ with open(path, 'r') as handle:
46
+ for rec in SeqIO.parse(handle, 'fasta'):
47
+ total_len += len(rec.seq)
48
+ f.write(f"{os.path.basename(path)}\t{total_len}\n")
@@ -0,0 +1,24 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Dr. Balint Timmer
4
+
5
+ Institute of Metagenomics, University of Debrecen, Debrecen, Hungary
6
+ Institute of Medical Microbiology, Faculty of Medicine, University of Pecs, Pecs, Hungary
7
+
8
+ Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ of this software and associated documentation files (the "Software"), to deal
10
+ in the Software without restriction, including without limitation the rights
11
+ to use, copy, modify, merge, publish, distribute, sublicense,
12
+ and to permit persons to whom the Software is
13
+ furnished to do so, subject to the following conditions:
14
+
15
+ The above copyright notice and this permission notice shall be included in all
16
+ copies or substantial portions of the Software.
17
+
18
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ SOFTWARE.
@@ -0,0 +1,193 @@
1
+ Metadata-Version: 2.1
2
+ Name: plasmidhub
3
+ Version: 1.0.0
4
+ Summary: A command-line tool for plasmid clustering, analysis, and visualization.
5
+ Home-page: https://github.com/YOUR_USERNAME/Plasmidhub
6
+ Author: Dr. Balint Timmer
7
+ Author-email: "Dr. Balint Timmer" <timmer.balint@med.unideb.hu>
8
+ License: MIT License
9
+
10
+ Copyright (c) 2025 Dr. Balint Timmer
11
+
12
+ Institute of Metagenomics, University of Debrecen, Debrecen, Hungary
13
+ Institute of Medical Microbiology, Faculty of Medicine, University of Pecs, Pecs, Hungary
14
+
15
+ Permission is hereby granted, free of charge, to any person obtaining a copy
16
+ of this software and associated documentation files (the "Software"), to deal
17
+ in the Software without restriction, including without limitation the rights
18
+ to use, copy, modify, merge, publish, distribute, sublicense,
19
+ and to permit persons to whom the Software is
20
+ furnished to do so, subject to the following conditions:
21
+
22
+ The above copyright notice and this permission notice shall be included in all
23
+ copies or substantial portions of the Software.
24
+
25
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31
+ SOFTWARE.
32
+ Keywords: plasmid,bioinformatics,network,clustering,AMR,virulence,plasmid network
33
+ Classifier: Programming Language :: Python :: 3
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Operating System :: OS Independent
36
+ Classifier: Intended Audience :: Science/Research
37
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
38
+ Requires-Python: >=3.8
39
+ Description-Content-Type: text/markdown
40
+ License-File: LICENSE
41
+ Requires-Dist: biopython>=1.83
42
+ Requires-Dist: pandas>=2.0
43
+ Requires-Dist: networkx>=3.1
44
+ Requires-Dist: matplotlib>=3.7
45
+ Requires-Dist: python-louvain>=0.16
46
+ Requires-Dist: numpy>=1.24
47
+ Requires-Dist: scipy>=1.8
48
+
49
+ <img src="https://img.shields.io/github/license/BALINTESBL/plasmidhub" alt="License"> <img src="https://img.shields.io/pypi/v/plasmidhub" alt="PyPI"> ![Build Status](https://github.com/BALINTESBL/plasmidhub/actions/workflows/tests.yml/badge.svg)
50
+
51
+ # Plasmidhub
52
+ Plasmidhub is a free and open-source command-line tool for comprehensive plasmid network analysis based on nucleotide sequence similarity. It enables researchers to cluster plasmids and identify genetically related groups using a dynamic, database-independent approach. Plasmidhub's approach:
53
+ * Is applicable to any plasmid
54
+ * Provides an unambiguous classification
55
+ * Considers the whole sequence of the plasmids
56
+
57
+ Network visualizations, stats and data are provided for further analysis.
58
+
59
+ ## Download and Installation
60
+ PlasmidHub can be installed easily via PyPI, Bioconda, or directly from GitHub.
61
+
62
+ ### Pip
63
+ ```
64
+ pip install plasmidhub
65
+ ```
66
+ **Note:** It's highly recommended to use a virtual environment or conda environment.
67
+ Recommended environment setup:
68
+ ```
69
+ conda create -n plasmidhub python=3.8
70
+ conda activate plasmidhub
71
+ ```
72
+ ### Bioconda
73
+
74
+ If you use Conda for environment management:
75
+ ```
76
+ conda install -c bioconda plasmidhub
77
+ ```
78
+ Make sure you have the bioconda channel configured. If not, configure them with:
79
+ ```
80
+ conda config --add channels defaults
81
+ conda config --add channels bioconda
82
+ conda config --add channels conda-forge
83
+ ```
84
+ ### GitHub
85
+ To get the latest version:
86
+ ```
87
+ git clone https://github.com/BALINTESBL/plasmidhub.git
88
+ cd plasmidhub
89
+ pip install .
90
+ ```
91
+ ### Dependencies
92
+ This tool requires the following external software to be installed:
93
+ - [FastANI](https://github.com/ParBLiSS/FastANI)
94
+ - [ABRicate](https://github.com/tseemann/abricate)
95
+
96
+ ## Inputs
97
+ Plasmidhub requires plasmid FASTA files (.fna or .fa or .fasta). Your FASTA files need to be placed in one directory. Ideally, there are no other files in the directory.
98
+
99
+ ## Usage
100
+ Perform plasmid network analysis with default settings by defining only the directory path of your plasmid FASTA files! Or, you can also adjust parameters.
101
+ Example usage:
102
+ ```
103
+ % plasmidhub path/to/my/plasmid/FASTA/files --fragLen 1000 --kmer 14 --coverage_threshold 0.5 --ani_threshold 95 --min_cluster_size 4 --plot_k 2.0 3.0 -t 32
104
+
105
+ ```
106
+ This command will:
107
+ * Compute pairwise ANI using FastANI
108
+ * Build a plasmid similarity network
109
+ * Save network metrics and statistics (results/statistics)
110
+ * Cluster plasmids
111
+ * Annotate resistance and virulence genes with ABRicate (results/abricate_results)
112
+ * Generate network visualizations (results/plots)
113
+ ### Key Options
114
+
115
+ | Category | Flag | Description | Default |
116
+ | -------------- | ---------------------- | --------------------------------------- | ------------------------- |
117
+ | **Input** | ` | Path to folder with plasmid FASTA files | – |
118
+ | **FastANI** | `--fragLen` | Fragment length | `1000` |
119
+ | | `--kmer` | K-mer size | `14` |
120
+ | | `--coverage_threshold` | Minimum proportion of the plasmid lenghts| `0.5` |
121
+ | | | covered by the matching fragments | |
122
+ | | `--ani_threshold` | Minimum ANI score (after applying | `95.0` |
123
+ | | | coverage threshold) | |
124
+ | **Clustering** | `--cluster_off` | Disable clustering | – |
125
+ | | `--min_cluster_size` | Minimum cluster size (plasmids) | `3` |
126
+ | **ABRicate** | `--skip_abricate` | Skip annotation step | – |
127
+ | | `--abricate_dbs` | Databases to use e.g.: | `plasmidfinder card vfdb` |
128
+ | | | --abricate_dbs ncbi ecoli_vf | |
129
+ | **Plotting** | `--plot_k` | Range of k values |`3` `3` |
130
+ | | `--plot_skip` | Skips plotting | |
131
+ | **Threads** | `-t` or `--threads` | Number of threads | `4` |
132
+ ### Plot-only mode
133
+ In plot-only mode, network visualizations can be generated from existing networks directly, by using --plot_only flag and defining the directory path. In this mode, several parameters can be adjusted.
134
+ Example usage:
135
+ ```
136
+ % plasmidhub --plot_only path/to/my/results --plot_k 3 5 --plot_node_color blue --plot_node_size 500 --plot_node_shape s --plot_figsize 20 20 -t 32
137
+
138
+ ```
139
+ | **Plotting** | Flag | Description | Default |
140
+ | -------------- | ---------------------- | --------------------------------------- | ------------------------- |
141
+ | | `--plot_node_size` | Size of nodes | `900` |
142
+ | | `--plot_node_shape` | Shape of nodes (`o`, `s`, `^`, etc.) | `o` (circle) |
143
+ | | `--plot_edge_width` | Min/max edge width | `0.2 2.0` |
144
+ | | `--plot_figsize` | Figure size in inches | `25 25` |
145
+ | | `--plot_iterations` | Spring layout iterations | `100` |
146
+
147
+ Node shapes:
148
+ | Marker | Description |
149
+ | ------ | -------------------------- |
150
+ | `'o'` | Circle |
151
+ | `'s'` | Square |
152
+ | `'^'` | Upward-pointing triangle |
153
+ | `'v'` | Downward-pointing triangle |
154
+ | `'>'` | Right-pointing triangle |
155
+ | `'<'` | Left-pointing triangle |
156
+ | `'D'` | Diamond |
157
+ | `'d'` | Thin diamond |
158
+ | `'p'` | Pentagon |
159
+ | `'h'` | Hexagon 1 |
160
+ | `'H'` | Hexagon 2 |
161
+ | `'*'` | Star |
162
+ | `'+'` | Plus |
163
+ | `'x'` | Cross |
164
+ | `'X'` | Filled X |
165
+
166
+ Plots generated with Plasmidhub:
167
+ <img width="1668" height="1668" alt="image" src="https://github.com/user-attachments/assets/afed18b8-6dbe-44b8-b539-23aa47b4bfb0" />
168
+
169
+ ## Overview
170
+
171
+ Plasmidhub performs an all-vs-all comparison of input plasmid sequences using FastANI. FastANI results ("raw results") are filtered by the coverage (what proportion of the full plasmid sequences are covered by the matching fragments). The remaining pairs are filtered by the minimum ANI score. ANI scores are further weighted by the proportion of matching fragments, and data are sorted into a similarity matrix. The network is build from the similarity matrix, where:
172
+ - **Nodes** represent plasmids
173
+ - **Edges** represent genetic relatedness (weighted ANI)
174
+
175
+ Within the network, communities are detected via Louvain method (subclusters). Plasmid clusters are complete subgraphs (cliques) detected within the whole network. Clusters comprising highly similar or identical plasmids. If relevant and scientifically appropriate, plasmids of the same cluster may be considered as equivalent. This approach is alignment-free, reference-free, database-independent, and uses relative similarity-based system to overcome the limitations of database dependency (untypeable plasmids, multireplicon/multi-MOB plasmids, mosaic, hybrid plasmids ect.)
176
+ Network and node statistics are saved to a distinct directory for downstream analyses (connectance, modularity, nestedness, community partition, degree centrality, node degrees, betweenness, closeness ect.)
177
+
178
+ Resistance and virulence genes can be annotated via [ABRicate](https://github.com/tseemann/abricate). The abricate files are saved to a distinct subdirectory. By default, plasmidfinder, vfdb and card databases are used, but optionally other databases can be specified from the databases available with ABRicate.
179
+
180
+ To generate custom visualizations, feel free to use and modify the *plot.py*.
181
+
182
+ ## Troubleshooting
183
+ Users are welcome to report any issue or feedback related to Plasmidhub by posting a [Github issue](https://github.com/BALINTESBL/plasmidhub/issues).
184
+
185
+ ---
186
+
187
+ Developed by **Dr. Bálint Timmer**
188
+ *Institute of Metagenomics, University of Debrecen, Debrecen, Hungary*
189
+ *Department of Medical Microbiology, University of Pécs Medical School, Pécs, Hungary*
190
+
191
+ <img width="33" height="33" alt="image" src="https://github.com/user-attachments/assets/bd9f17e9-e9ce-4edb-8319-ef0091c45f00" /> <img width="99" height="32.054" alt="image" src="https://github.com/user-attachments/assets/5f3d5b6b-cef6-478a-af66-614b2e2860b2" />
192
+
193
+ Contact: [timmer.balint@med.unideb.hu](mailto:timmer.balint@med.unideb.hu) , [timmer.balint@pte.hu](mailto:timmer.balint@pte.hu)
@@ -0,0 +1,18 @@
1
+ plasmidhub/__init__.py,sha256=jGpatuTP-5KcGG6mvMculsbklnol_RIr2tTuDj2IHh8,21
2
+ plasmidhub/abricate.py,sha256=T295S6vXbOAomgcEtWmq9wnj5ZxhlFcJJKJ7AmpJD00,1428
3
+ plasmidhub/ani.py,sha256=59Idb0M0MWwkBw-R-9AzZpONr4YAFza2l47IiT2phGI,904
4
+ plasmidhub/cluster_color.py,sha256=Sp7hV0sRbhGOx2dVMAeeaiEtEf5FX9oyHQNOeAKapBc,1579
5
+ plasmidhub/clustering.py,sha256=9sdMbyBgDokId1xnzFKX4XI5V4COJ5x1XATYfSKD4UE,5804
6
+ plasmidhub/filtering.py,sha256=e-bpJf9Ge6Vbmyy1K4cDp2yPMqnJpJ1wAE046mf4ga4,2539
7
+ plasmidhub/main.py,sha256=w7SHWUwj2Z_OCHw-wTmIJCEnMt46KIyipFLahyuDvTQ,15417
8
+ plasmidhub/network_builder.py,sha256=0hlJP_mYluQ20PR-A0YjOBQErGr-O7NCtIeMQRNHpVo,8480
9
+ plasmidhub/node_stats.py,sha256=85Uy-NlT8MfErZQ65jtRdFIQHD32Z1BefJDk5OXfoAU,2318
10
+ plasmidhub/plot.py,sha256=hGWyRGPM-q77qBILHLsaFaNZoeekQ39ZDcsSD3abqMI,6983
11
+ plasmidhub/plot_only.py,sha256=UDEmH3FBFTIMPfymScvbuKo8lzB8VniCRlpEVtnSubI,6475
12
+ plasmidhub/preprocessing.py,sha256=xj1BDT9gsiLVj2gLuLaiGvJmb_6tFMH_5HTJNWW3jxs,1780
13
+ plasmidhub-1.0.0.dist-info/LICENSE,sha256=TLYeEpymgVrs1pFYz30Tj_911HUZbakCHp0HNfpHiMA,1282
14
+ plasmidhub-1.0.0.dist-info/METADATA,sha256=DIo8OLbFLY-kpwzq16gLHyCweOSyy5gAiCWfCSGGbdM,11531
15
+ plasmidhub-1.0.0.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
16
+ plasmidhub-1.0.0.dist-info/entry_points.txt,sha256=RypObT2-nI493vnZSRZf-E47n8mzvzsHDCYLlQHX1FU,52
17
+ plasmidhub-1.0.0.dist-info/top_level.txt,sha256=yTI35SryspznaWY3hgD7mJmaYI-w5fBHqS03OKytKsM,11
18
+ plasmidhub-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.3.2)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ plasmidhub = plasmidhub.main:main
@@ -0,0 +1 @@
1
+ plasmidhub