PanGBank-cli 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ __version__ = "0.1.1"
@@ -0,0 +1,107 @@
1
+ import requests
2
+ from pydantic import HttpUrl, ValidationError
3
+ from typing import Any, List, Dict, Optional
4
+ import logging
5
+ import pandas as pd
6
+
7
+ from pangbank_api.models import CollectionPublicWithReleases # type: ignore
8
+ from pangbank_api.crud.common import FilterCollection # type: ignore
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def get_collections(api_url: HttpUrl, filter_params: FilterCollection):
14
+ """Fetch collections from the given API URL."""
15
+
16
+ params = filter_params.model_dump()
17
+
18
+ try:
19
+ response = requests.get(f"{api_url}/collections/", params=params, timeout=10)
20
+ response.raise_for_status()
21
+ return response.json()
22
+ except requests.exceptions.RequestException as e:
23
+ logger.warning(f"Request failed: {e}")
24
+ raise requests.HTTPError(f"Failed to fetch collections from {api_url}") from e
25
+
26
+
27
+ def validate_collections(collections: List[Any]) -> List[CollectionPublicWithReleases]:
28
+ """Validate the fetched collections against the CollectionPublicWithReleases model."""
29
+ validated_collections: List[CollectionPublicWithReleases] = []
30
+
31
+ for i, collection in enumerate(collections):
32
+ try:
33
+ validated_collections.append(CollectionPublicWithReleases(**collection))
34
+ except ValidationError as e:
35
+ logger.warning(f"Validation failed for collection at index {i}: {e}")
36
+ raise ValueError(f"Failed to validate collections: {e}") from e
37
+
38
+ return validated_collections
39
+
40
+
41
+ def query_collections(
42
+ api_url: HttpUrl, collection_name: Optional[str] = None
43
+ ) -> List[CollectionPublicWithReleases]:
44
+ """Fetch and validate collections from the given API URL."""
45
+
46
+ name_query = f"with name: '{collection_name}'" if collection_name else ""
47
+
48
+ logger.debug(f"Fetching collections {name_query}")
49
+ filter_params = FilterCollection(
50
+ collection_name=collection_name, only_latest_release=True
51
+ )
52
+ collections_response = get_collections(api_url, filter_params)
53
+ return validate_collections(collections_response)
54
+
55
+
56
+ def format_collections_to_dataframe(
57
+ collections: List[CollectionPublicWithReleases],
58
+ ) -> pd.DataFrame:
59
+ """Convert a list of CollectionPublicWithReleases objects into a pandas DataFrame."""
60
+
61
+ data: List[Dict[str, Any]] = []
62
+
63
+ for collection in collections:
64
+ for release in collection.releases:
65
+ if release.latest:
66
+
67
+ data.append(
68
+ {
69
+ "Collection": collection.name,
70
+ "Description": collection.description,
71
+ "Latest release": release.version,
72
+ "Release date": release.date.strftime("%d %b %Y"),
73
+ "Taxonomy": (
74
+ f"{release.taxonomy_source.name}:{release.taxonomy_source.version}"
75
+ ),
76
+ "Pangenome Count": release.pangenome_count,
77
+ }
78
+ )
79
+
80
+ return pd.DataFrame(data)
81
+
82
+
83
+ def format_collections_to_yaml(
84
+ collections: List[CollectionPublicWithReleases],
85
+ ):
86
+ """Convert a list of CollectionPublicWithReleases objects into a YAML string."""
87
+
88
+ data: List[Dict[str, Any]] = []
89
+
90
+ for collection in collections:
91
+ for release in collection.releases:
92
+ if release.latest:
93
+ data.append(
94
+ {
95
+ "Collection": collection.name,
96
+ "Description": collection.description,
97
+ "Latest release": release.version,
98
+ "Release date": release.date.strftime("%d %b %Y"),
99
+ "Taxonomy": {
100
+ "name": release.taxonomy_source.name,
101
+ "version": release.taxonomy_source.version,
102
+ },
103
+ "Pangenome Count": release.pangenome_count,
104
+ }
105
+ )
106
+
107
+ return data
pangbank_cli/main.py ADDED
@@ -0,0 +1,380 @@
1
+ from pathlib import Path
2
+ from typing import Optional, TextIO
3
+ import sys
4
+ import typer
5
+ from typing_extensions import Annotated
6
+
7
+ from pangbank_cli import __version__
8
+ from rich.logging import RichHandler
9
+ import logging
10
+ import requests
11
+
12
+ from pydantic import HttpUrl
13
+ from rich.console import Console
14
+ from pangbank_cli.collections import (
15
+ query_collections,
16
+ format_collections_to_dataframe,
17
+ format_collections_to_yaml,
18
+ )
19
+ from pangbank_cli.utils import (
20
+ print_dataframe_as_rich_table,
21
+ check_mash_availability,
22
+ print_yaml_with_rich,
23
+ )
24
+
25
+ from pangbank_cli.pangenomes import (
26
+ query_pangenomes,
27
+ format_pangenomes_to_dataframe,
28
+ download_pangenomes,
29
+ display_pangenome_summary_by_collection,
30
+ print_pangenome_info,
31
+ )
32
+
33
+ from pangbank_cli.match_pangenome import (
34
+ get_mash_sketch_file,
35
+ compute_mash_distance,
36
+ get_matching_pangenome,
37
+ )
38
+
39
+ logger = logging.getLogger(__name__)
40
+ err_console = Console(stderr=True)
41
+
42
+ app = typer.Typer(
43
+ name="PanGBank CLI",
44
+ help=f"PanGBank CLI {__version__}: Command-line tool for retrieving pangenomes using the PanGBank API.",
45
+ context_settings={"help_option_names": ["-h", "--help"]},
46
+ add_completion=False,
47
+ rich_markup_mode="rich",
48
+ )
49
+
50
+
51
+ def validate_api_url(api_url: str) -> HttpUrl:
52
+ """Check if the API is reachable by making a GET request and validating the URL."""
53
+
54
+ # Validate the URL format using Pydantic HttpUrl
55
+ try:
56
+ # This will raise a ValueError if the URL is not valid
57
+ valid_url = HttpUrl(api_url)
58
+ except ValueError:
59
+ err_console.print(f"[bold red]Error: Invalid URL format: {api_url}[/bold red]")
60
+ err_console.print(
61
+ "[yellow]Tip: Ensure the URL is correctly formatted. Example: https://api.example.com[/yellow]"
62
+ )
63
+ raise typer.Exit(code=1)
64
+
65
+ try:
66
+ # Make a request to the API URL with a timeout
67
+ health_response = requests.get(api_url, timeout=5)
68
+ health_response.raise_for_status() # Raise an error for bad status codes (4xx, 5xx)
69
+
70
+ # Optionally: Check for a specific endpoint that indicates the service is healthy
71
+ if health_response.status_code == 200:
72
+ logger.info(f"Successfully connected to API at {api_url}")
73
+ else:
74
+ err_console.print(
75
+ f"[yellow]Warning: API at {api_url} responded with status code {health_response.status_code}[/yellow]",
76
+ )
77
+
78
+ except requests.exceptions.RequestException as e:
79
+ err_console.print(
80
+ f"[bold red]Error: Could not connect to API at {api_url}[/bold red]"
81
+ )
82
+ err_console.print(f"[red]{e}[/red]")
83
+ raise typer.Exit(code=1)
84
+
85
+ return valid_url
86
+
87
+
88
+ def version_callback(
89
+ value: bool,
90
+ ctx: typer.Context,
91
+ ):
92
+ """Prints the version and exits if --version is passed."""
93
+ if ctx.resilient_parsing:
94
+ return
95
+
96
+ if value:
97
+ typer.echo(f"PanGBank {__version__}")
98
+ raise typer.Exit()
99
+
100
+
101
+ def verbose_callback(
102
+ verbose: bool,
103
+ ):
104
+ """Sets the logging level to DEBUG if --verbose is passed."""
105
+ lvl = logging.INFO
106
+
107
+ if verbose:
108
+ lvl = logging.DEBUG
109
+
110
+ # Set up logging
111
+ logging.basicConfig(
112
+ level=lvl,
113
+ format="%(message)s",
114
+ datefmt="[%X]",
115
+ handlers=[RichHandler(console=err_console)],
116
+ )
117
+
118
+
119
+ Verbose = typer.Option(
120
+ False,
121
+ "--verbose",
122
+ help="Enable verbose logging.",
123
+ callback=verbose_callback,
124
+ rich_help_panel="Execution settings",
125
+ )
126
+ Outdir = typer.Option(
127
+ help="Output directory for downloaded pangenomes.",
128
+ rich_help_panel="Output and downloads",
129
+ )
130
+ Download = typer.Option(
131
+ help="Download HDF5 pangenome files.",
132
+ rich_help_panel="Output and downloads",
133
+ )
134
+ Progress = typer.Option(
135
+ help="Show progress bar while fetching pangenomes (disable with --no-progress).",
136
+ rich_help_panel="Execution settings",
137
+ )
138
+
139
+
140
+ @app.callback(no_args_is_help=True)
141
+ def main(
142
+ version: Annotated[
143
+ Optional[bool],
144
+ typer.Option(
145
+ "--version",
146
+ "-v",
147
+ callback=version_callback,
148
+ is_eager=True,
149
+ help="Show the version and exit.",
150
+ ),
151
+ ] = None,
152
+ ):
153
+
154
+ pass
155
+ """Main entry point for PanGBank CLI."""
156
+
157
+
158
+ ApiUrlOption = typer.Option(
159
+ HttpUrl("https://pangbank-api.genoscope.cns.fr/"),
160
+ envvar="PANGBANK_API_URL",
161
+ parser=validate_api_url,
162
+ help="URL of the PanGBank API.",
163
+ rich_help_panel="Execution settings",
164
+ )
165
+
166
+
167
+ @app.command(no_args_is_help=False)
168
+ def list_collections(
169
+ api_url: HttpUrl = ApiUrlOption,
170
+ verbose: bool = Verbose,
171
+ ):
172
+ """List available collections."""
173
+ collections = query_collections(api_url)
174
+ logger.info(f"Found {len(collections)} collections in PanGBank.")
175
+
176
+ df = format_collections_to_dataframe(collections)
177
+ print_dataframe_as_rich_table(df, title="Available collections of PanGBank:")
178
+ print_yaml = False
179
+ if print_yaml:
180
+ yaml_collections = format_collections_to_yaml(collections)
181
+ print_yaml_with_rich(yaml_collections)
182
+
183
+
184
+ @app.command(no_args_is_help=True)
185
+ def search_pangenomes(
186
+ # Search filters
187
+ collection: Annotated[
188
+ Optional[str],
189
+ typer.Option(
190
+ "--collection",
191
+ "-c",
192
+ help="Filter pangenomes by collection name (e.g. 'GTDB_refseq').",
193
+ rich_help_panel="Search filters",
194
+ ),
195
+ ] = None,
196
+ taxon: Annotated[
197
+ Optional[str],
198
+ typer.Option(
199
+ "--taxon",
200
+ "-t",
201
+ help="Filter pangenomes by taxon name (e.g. 'Escherichia').",
202
+ rich_help_panel="Search filters",
203
+ ),
204
+ ] = None,
205
+ genome: Annotated[
206
+ Optional[str],
207
+ typer.Option(
208
+ "--genome",
209
+ "-g",
210
+ help="Filter pangenomes by genome assembly identifier (e.g. 'GCF_000354175.2').",
211
+ rich_help_panel="Search filters",
212
+ ),
213
+ ] = None,
214
+ exact_match: Annotated[
215
+ bool,
216
+ typer.Option(
217
+ help="Use exact string matching instead of partial matches.",
218
+ rich_help_panel="Search filters",
219
+ ),
220
+ ] = False,
221
+ # Output and downloads
222
+ download: Annotated[
223
+ bool,
224
+ Download,
225
+ ] = False,
226
+ outdir: Annotated[
227
+ Path,
228
+ Outdir,
229
+ ] = Path("pangbank"),
230
+ details: Annotated[
231
+ bool,
232
+ typer.Option(
233
+ help="Display summary information for each matching pangenome.",
234
+ rich_help_panel="Output and downloads",
235
+ ),
236
+ ] = False,
237
+ table_path: Annotated[
238
+ Path,
239
+ typer.Option(
240
+ "--table",
241
+ help=(
242
+ "Save a TSV table summarizing the matching pangenomes. "
243
+ "Use '-' to print the table to stdout."
244
+ ),
245
+ rich_help_panel="Output and downloads",
246
+ ),
247
+ ] = Path("pangenomes_information.tsv"),
248
+ # Execution settings
249
+ api_url: HttpUrl = ApiUrlOption,
250
+ verbose: bool = Verbose,
251
+ progress: Annotated[
252
+ bool,
253
+ Progress,
254
+ ] = True,
255
+ ):
256
+ """Search for pangenomes."""
257
+
258
+ pangenomes = query_pangenomes(
259
+ api_url,
260
+ taxon_name=taxon,
261
+ substring_taxon_match=not exact_match,
262
+ collection_name=collection,
263
+ genome_name=genome,
264
+ only_latest_release=True,
265
+ disable_progress_bar=not progress,
266
+ )
267
+
268
+ if not pangenomes:
269
+
270
+ if collection is not None:
271
+ collections = query_collections(api_url)
272
+ existing_collection_names = [c.name for c in collections]
273
+ if collection not in existing_collection_names:
274
+ names_formatted = ", ".join(
275
+ (f"'{name}'" for name in existing_collection_names)
276
+ )
277
+ logger.warning(
278
+ f"Collection '{collection}' not found in PanGBank. "
279
+ f"Available collections are: {names_formatted}."
280
+ )
281
+ raise typer.Exit(code=1)
282
+
283
+ df = format_pangenomes_to_dataframe(pangenomes)
284
+
285
+ if str(table_path) == "-":
286
+ logger.info("Printing pangenomes information as TSV table to stdout")
287
+ output_handle: TextIO | Path = sys.stdout
288
+ else:
289
+ logger.info(f"Saving pangenomes information as TSV table to file: {table_path}")
290
+ output_handle: TextIO | Path = table_path
291
+
292
+ df.to_csv(output_handle, index=False, sep="\t")
293
+
294
+ if details:
295
+ display_pangenome_summary_by_collection(pangenomes, True)
296
+ print_pangenome_info(pangenomes)
297
+
298
+ if download:
299
+ outdir.mkdir(parents=True, exist_ok=True)
300
+ download_pangenomes(
301
+ api_url, pangenomes, outdir, disable_progress_bar=not progress
302
+ )
303
+
304
+
305
+ @app.command(no_args_is_help=True)
306
+ def match_pangenome(
307
+ collection_name: Annotated[
308
+ str,
309
+ typer.Option(
310
+ "--collection",
311
+ "-c",
312
+ help="The pangenome collection to match in.",
313
+ rich_help_panel="Match parameters",
314
+ ),
315
+ ],
316
+ input_genome_file: Annotated[
317
+ Path,
318
+ typer.Option(
319
+ "--input-genome",
320
+ "-i",
321
+ help="Input genome to search a matching pangenome from.",
322
+ exists=True,
323
+ rich_help_panel="Match parameters",
324
+ ),
325
+ ],
326
+ download: Annotated[
327
+ bool,
328
+ Download,
329
+ ] = False,
330
+ outdir: Annotated[
331
+ Path,
332
+ Outdir,
333
+ ] = Path("pangbank"),
334
+ api_url: HttpUrl = ApiUrlOption,
335
+ progress: Annotated[
336
+ bool,
337
+ Progress,
338
+ ] = True,
339
+ verbose: bool = Verbose,
340
+ ):
341
+ """Match a pangenome from an input genome."""
342
+ logger.info(
343
+ f"Searching a matching pangenome in collection '{collection_name}' for genome '{input_genome_file}'"
344
+ )
345
+ collections = query_collections(api_url, collection_name=collection_name)
346
+
347
+ check_mash_availability()
348
+
349
+ if not collections:
350
+ logger.warning(f"No collections found for {collection_name}")
351
+ raise typer.Exit(code=1)
352
+
353
+ elif len(collections) > 1:
354
+ logger.warning(
355
+ f"Only one collection should be returned. Got {len(collections)} "
356
+ f"when querying collection_name={collection_name}"
357
+ )
358
+ raise typer.Exit(code=1)
359
+ else:
360
+ collection = collections[0]
361
+
362
+ logger.debug(f"Collection found: {collection.name}")
363
+ mash_sketch_file = get_mash_sketch_file(api_url, collection, outdir)
364
+
365
+ query_to_best_match = compute_mash_distance(mash_sketch_file, [input_genome_file])
366
+ if query_to_best_match is None:
367
+ raise typer.Exit(code=1)
368
+
369
+ get_matching_pangenome(
370
+ api_url=api_url,
371
+ collection=collection,
372
+ query_to_best_match=query_to_best_match,
373
+ outdir=outdir,
374
+ download=download,
375
+ progress=progress,
376
+ )
377
+
378
+
379
+ if __name__ == "__main__":
380
+ app()