pdbminebuilder 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. pdbminebuilder/__init__.py +3 -0
  2. pdbminebuilder/__main__.py +6 -0
  3. pdbminebuilder/cli.py +369 -0
  4. pdbminebuilder/commands/__init__.py +1 -0
  5. pdbminebuilder/commands/load.py +164 -0
  6. pdbminebuilder/commands/reset.py +101 -0
  7. pdbminebuilder/commands/stats.py +147 -0
  8. pdbminebuilder/commands/sync.py +183 -0
  9. pdbminebuilder/commands/test.py +185 -0
  10. pdbminebuilder/commands/update.py +212 -0
  11. pdbminebuilder/commands/utils.py +47 -0
  12. pdbminebuilder/config.py +140 -0
  13. pdbminebuilder/db/__init__.py +5 -0
  14. pdbminebuilder/db/_type_utils.py +62 -0
  15. pdbminebuilder/db/connection.py +128 -0
  16. pdbminebuilder/db/delta.py +764 -0
  17. pdbminebuilder/db/loader.py +680 -0
  18. pdbminebuilder/db/metadata.py +188 -0
  19. pdbminebuilder/models/__init__.py +53 -0
  20. pdbminebuilder/models/cc.py +794 -0
  21. pdbminebuilder/models/ccmodel.py +283 -0
  22. pdbminebuilder/models/contacts.py +87 -0
  23. pdbminebuilder/models/emdb.py +3987 -0
  24. pdbminebuilder/models/ihm.py +2079 -0
  25. pdbminebuilder/models/pdbj.py +16913 -0
  26. pdbminebuilder/models/prd.py +1044 -0
  27. pdbminebuilder/models/prd_family.py +414 -0
  28. pdbminebuilder/models/vrpt.py +4077 -0
  29. pdbminebuilder/parsers/__init__.py +20 -0
  30. pdbminebuilder/parsers/cif.py +205 -0
  31. pdbminebuilder/parsers/mmjson.py +246 -0
  32. pdbminebuilder/pipelines/__init__.py +1 -0
  33. pdbminebuilder/pipelines/base.py +861 -0
  34. pdbminebuilder/pipelines/cc.py +822 -0
  35. pdbminebuilder/pipelines/ccmodel.py +508 -0
  36. pdbminebuilder/pipelines/contacts.py +340 -0
  37. pdbminebuilder/pipelines/emdb.py +413 -0
  38. pdbminebuilder/pipelines/ihm.py +508 -0
  39. pdbminebuilder/pipelines/pdbj.py +820 -0
  40. pdbminebuilder/pipelines/prd.py +619 -0
  41. pdbminebuilder/pipelines/prd_family.py +255 -0
  42. pdbminebuilder/pipelines/vrpt.py +341 -0
  43. pdbminebuilder/py.typed +0 -0
  44. pdbminebuilder/utils/__init__.py +1 -0
  45. pdbminebuilder/utils/assembly.py +232 -0
  46. pdbminebuilder/utils/brief_summary.py +365 -0
  47. pdbminebuilder/utils/patches.py +61 -0
  48. pdbminebuilder-0.2.0.dist-info/METADATA +136 -0
  49. pdbminebuilder-0.2.0.dist-info/RECORD +52 -0
  50. pdbminebuilder-0.2.0.dist-info/WHEEL +4 -0
  51. pdbminebuilder-0.2.0.dist-info/entry_points.txt +2 -0
  52. pdbminebuilder-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,3 @@
1
+ """pdb-mine-builder - Build a Mine-schema database from PDB data."""
2
+
3
+ __version__ = "0.2.0"
@@ -0,0 +1,6 @@
1
+ """Entry point for python -m pdbminebuilder."""
2
+
3
+ from pdbminebuilder.cli import app
4
+
5
+ if __name__ == "__main__":
6
+ app()
pdbminebuilder/cli.py ADDED
@@ -0,0 +1,369 @@
1
+ """CLI interface using typer + rich."""
2
+
3
+ import logging
4
+ from datetime import datetime
5
+ from pathlib import Path
6
+ from typing import Annotated, Optional
7
+
8
+ import typer
9
+ from rich.console import Console
10
+
11
+ from pdbminebuilder import __version__
12
+ from pdbminebuilder.config import load_config
13
+
14
+ app = typer.Typer(
15
+ name="pmb",
16
+ help="pdb-mine-builder - Build a Mine-schema database from PDB data.",
17
+ rich_markup_mode="rich",
18
+ )
19
+ console = Console()
20
+
21
+
22
+ def setup_logging(log_file: Path | None, verbose: bool = False) -> logging.Logger:
23
+ """Configure logging with optional file output.
24
+
25
+ Args:
26
+ log_file: Path to log file (None for no file logging)
27
+ verbose: If True, set DEBUG level; otherwise INFO
28
+
29
+ Returns:
30
+ Configured logger
31
+ """
32
+ logger = logging.getLogger("pmb")
33
+ logger.setLevel(logging.DEBUG if verbose else logging.INFO)
34
+
35
+ # Clear existing handlers
36
+ logger.handlers.clear()
37
+
38
+ # Console handler (only warnings and errors)
39
+ console_handler = logging.StreamHandler()
40
+ console_handler.setLevel(logging.WARNING)
41
+ console_handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
42
+ logger.addHandler(console_handler)
43
+
44
+ # File handler (if specified)
45
+ if log_file:
46
+ log_file.parent.mkdir(parents=True, exist_ok=True)
47
+ file_handler = logging.FileHandler(log_file, mode="w", encoding="utf-8")
48
+ file_handler.setLevel(logging.DEBUG if verbose else logging.INFO)
49
+ file_handler.setFormatter(
50
+ logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
51
+ )
52
+ logger.addHandler(file_handler)
53
+ console.print(f"[dim]Logging to: {log_file}[/dim]")
54
+
55
+ return logger
56
+
57
+
58
+ def version_callback(value: bool) -> None:
59
+ if value:
60
+ console.print(f"pmb version {__version__}")
61
+ raise typer.Exit()
62
+
63
+
64
+ @app.callback()
65
+ def main(
66
+ version: Annotated[
67
+ Optional[bool],
68
+ typer.Option("--version", "-v", callback=version_callback, is_eager=True),
69
+ ] = None,
70
+ ) -> None:
71
+ """pdb-mine-builder - Build a Mine-schema database from PDB data."""
72
+ pass
73
+
74
+
75
+ @app.command()
76
+ def sync(
77
+ targets: Annotated[
78
+ Optional[list[str]],
79
+ typer.Argument(
80
+ help="Sync targets: pdbj, pdbj-json, cc, cc-json, ccmodel, ccmodel-json, prd, prd-json, prd-family, vrpt, contacts, schemas"
81
+ ),
82
+ ] = None,
83
+ config: Annotated[
84
+ Path,
85
+ typer.Option("--config", "-c", help="Config file path"),
86
+ ] = Path("config.yml"),
87
+ dry_run: Annotated[
88
+ bool,
89
+ typer.Option(
90
+ "--dry-run", "-n", help="Show what would be synced without actually syncing"
91
+ ),
92
+ ] = False,
93
+ ) -> None:
94
+ """Synchronize data from PDBj via rsync."""
95
+ from pdbminebuilder.commands.sync import run_sync
96
+
97
+ settings = load_config(config)
98
+ run_sync(settings, targets or [], dry_run=dry_run)
99
+
100
+
101
+ @app.command()
102
+ def update(
103
+ pipelines: Annotated[
104
+ Optional[list[str]],
105
+ typer.Argument(
106
+ help="Pipelines: pdbj, cc, ccmodel, prd, prd_family, vrpt, contacts, emdb, ihm (format via config)"
107
+ ),
108
+ ] = None,
109
+ config: Annotated[
110
+ Path,
111
+ typer.Option("--config", "-c", help="Config file path"),
112
+ ] = Path("config.yml"),
113
+ limit: Annotated[
114
+ Optional[int],
115
+ typer.Option("--limit", "-l", help="Limit number of entries to process"),
116
+ ] = None,
117
+ workers: Annotated[
118
+ Optional[int],
119
+ typer.Option(
120
+ "--workers", "-w", help="Number of worker processes (overrides config)"
121
+ ),
122
+ ] = None,
123
+ log: Annotated[
124
+ Optional[Path],
125
+ typer.Option(
126
+ "--log",
127
+ help="Log file path (default: logs/<pipeline>_YYYYMMDD_HHMMSS.log)",
128
+ ),
129
+ ] = None,
130
+ verbose: Annotated[
131
+ bool,
132
+ typer.Option("--verbose", "-v", help="Enable verbose (DEBUG) logging"),
133
+ ] = False,
134
+ force: Annotated[
135
+ bool,
136
+ typer.Option(
137
+ "--force",
138
+ "-f",
139
+ help="Reprocess all entries ignoring cached mtimes (pdbj, vrpt, contacts only)",
140
+ ),
141
+ ] = False,
142
+ ) -> None:
143
+ """Run database update pipelines."""
144
+ from pdbminebuilder.commands.update import run_update
145
+
146
+ # Setup logging with pipeline name in filename
147
+ if log is None:
148
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
149
+ if pipelines and len(pipelines) == 1:
150
+ # Single pipeline: use pipeline name
151
+ log_name = pipelines[0].replace("-", "_")
152
+ elif pipelines:
153
+ # Multiple pipelines: use "multi"
154
+ log_name = "multi"
155
+ else:
156
+ # All pipelines
157
+ log_name = "all"
158
+ log = Path(f"logs/{log_name}_{timestamp}.log")
159
+ logger = setup_logging(log, verbose)
160
+
161
+ settings = load_config(config)
162
+ if workers is not None:
163
+ settings.rdb.nworkers = workers
164
+
165
+ logger.info(f"Starting update: pipelines={pipelines or 'all'}, limit={limit}")
166
+ run_update(settings, pipelines or [], limit=limit, force=force)
167
+ logger.info("Update completed")
168
+
169
+
170
+ @app.command()
171
+ def load(
172
+ pipelines: Annotated[
173
+ Optional[list[str]],
174
+ typer.Argument(
175
+ help="Pipelines: pdbj, cc, ccmodel, prd, prd_family, vrpt, contacts"
176
+ ),
177
+ ] = None,
178
+ config: Annotated[
179
+ Path,
180
+ typer.Option("--config", "-c", help="Config file path"),
181
+ ] = Path("config.yml"),
182
+ limit: Annotated[
183
+ Optional[int],
184
+ typer.Option("--limit", "-l", help="Limit number of entries to process"),
185
+ ] = None,
186
+ workers: Annotated[
187
+ Optional[int],
188
+ typer.Option(
189
+ "--workers", "-w", help="Number of worker processes (overrides config)"
190
+ ),
191
+ ] = None,
192
+ log: Annotated[
193
+ Optional[Path],
194
+ typer.Option(
195
+ "--log",
196
+ help="Log file path (default: logs/load_<pipeline>_YYYYMMDD_HHMMSS.log)",
197
+ ),
198
+ ] = None,
199
+ verbose: Annotated[
200
+ bool,
201
+ typer.Option("--verbose", "-v", help="Enable verbose (DEBUG) logging"),
202
+ ] = False,
203
+ force: Annotated[
204
+ bool,
205
+ typer.Option("--force", "-f", help="Skip TRUNCATE confirmation prompt"),
206
+ ] = False,
207
+ ) -> None:
208
+ """Bulk load data using COPY protocol (TRUNCATE + COPY).
209
+
210
+ Significantly faster than 'update' for initial/full database loads.
211
+ WARNING: This will TRUNCATE all tables in the target schema before loading.
212
+
213
+ Examples:
214
+ pmb load pdbj --limit 1000 --force
215
+ pmb load cc ccmodel prd --force
216
+ """
217
+ from pdbminebuilder.commands.load import run_load
218
+
219
+ if log is None:
220
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
221
+ if pipelines and len(pipelines) == 1:
222
+ log_name = f"load_{pipelines[0].replace('-', '_')}"
223
+ elif pipelines:
224
+ log_name = "load_multi"
225
+ else:
226
+ log_name = "load_all"
227
+ log = Path(f"logs/{log_name}_{timestamp}.log")
228
+ logger = setup_logging(log, verbose)
229
+
230
+ settings = load_config(config)
231
+ if workers is not None:
232
+ settings.rdb.nworkers = workers
233
+
234
+ logger.info(f"Starting load: pipelines={pipelines or []}, limit={limit}")
235
+ run_load(settings, pipelines or [], limit=limit, force=force)
236
+ logger.info("Load completed")
237
+
238
+
239
+ @app.command(name="all")
240
+ def run_all(
241
+ config: Annotated[
242
+ Path,
243
+ typer.Option("--config", "-c", help="Config file path"),
244
+ ] = Path("config.yml"),
245
+ ) -> None:
246
+ """Run full sync and update cycle."""
247
+ from pdbminebuilder.commands.sync import run_sync
248
+ from pdbminebuilder.commands.update import run_update
249
+
250
+ settings = load_config(config)
251
+ console.print("[bold blue]Starting full sync and update cycle...[/bold blue]")
252
+
253
+ console.print("\n[bold]Phase 1: Sync[/bold]")
254
+ run_sync(settings, [], dry_run=False)
255
+
256
+ console.print("\n[bold]Phase 2: Update[/bold]")
257
+ run_update(settings, [])
258
+
259
+ console.print("\n[bold green]Full cycle completed![/bold green]")
260
+
261
+
262
+ @app.command(name="setup-rdkit")
263
+ def setup_rdkit(
264
+ config: Annotated[
265
+ Path,
266
+ typer.Option("--config", "-c", help="Config file path"),
267
+ ] = Path("config.yml"),
268
+ ) -> None:
269
+ """Setup RDKit extension and SQL functions.
270
+
271
+ Creates RDKit extension, mol column on cc.brief_summary,
272
+ and loads chemical search functions (similar_compounds, substructure_search, etc.).
273
+
274
+ This is automatically run by the cc pipeline, but can be run
275
+ independently to add functions to an existing database.
276
+ """
277
+ from pdbminebuilder.pipelines.cc import _ensure_rdkit_setup
278
+
279
+ settings = load_config(config)
280
+ console.print("[bold]Setting up RDKit extension and functions...[/bold]")
281
+ _ensure_rdkit_setup(settings.rdb.constring)
282
+ console.print("[bold green]RDKit setup completed![/bold green]")
283
+
284
+
285
+ @app.command()
286
+ def test(
287
+ pipelines: Annotated[
288
+ Optional[list[str]],
289
+ typer.Argument(help="Pipelines to test"),
290
+ ] = None,
291
+ config: Annotated[
292
+ Path,
293
+ typer.Option("--config", "-c", help="Config file path"),
294
+ ] = Path("config.test.yml"),
295
+ drop: Annotated[
296
+ bool,
297
+ typer.Option("--drop", "-d", help="Drop existing test database"),
298
+ ] = False,
299
+ limit: Annotated[
300
+ int,
301
+ typer.Option("--limit", "-l", help="Limit number of files to process"),
302
+ ] = 10,
303
+ workers: Annotated[
304
+ Optional[int],
305
+ typer.Option(
306
+ "--workers", "-w", help="Number of worker processes (overrides config)"
307
+ ),
308
+ ] = None,
309
+ ) -> None:
310
+ """Create test database and validate pipelines."""
311
+ from pdbminebuilder.commands.test import run_test
312
+
313
+ settings = load_config(config)
314
+ if workers is not None:
315
+ settings.rdb.nworkers = workers
316
+ run_test(settings, pipelines or [], drop=drop, limit=limit)
317
+
318
+
319
+ @app.command()
320
+ def reset(
321
+ schemas: Annotated[
322
+ Optional[list[str]],
323
+ typer.Argument(
324
+ help="Schemas to reset: pdbj, cc, ccmodel, prd, prd_family, vrpt, contacts, emdb, ihm (or 'all')"
325
+ ),
326
+ ] = None,
327
+ config: Annotated[
328
+ Path,
329
+ typer.Option("--config", "-c", help="Config file path"),
330
+ ] = Path("config.yml"),
331
+ force: Annotated[
332
+ bool,
333
+ typer.Option("--force", "-f", help="Skip confirmation prompt"),
334
+ ] = False,
335
+ ) -> None:
336
+ """Drop and reset database schemas (for testing/reloading).
337
+
338
+ Examples:
339
+ pmb reset cc # Reset cc schema only
340
+ pmb reset cc pdbj # Reset cc and pdbj schemas
341
+ pmb reset all # Reset ALL schemas (dangerous!)
342
+ pmb reset all -f # Reset all without confirmation
343
+ """
344
+ from pdbminebuilder.commands.reset import run_reset
345
+
346
+ settings = load_config(config)
347
+ run_reset(settings, schemas or [], force=force)
348
+
349
+
350
+ @app.command()
351
+ def stats(
352
+ config: Annotated[
353
+ Path,
354
+ typer.Option("--config", "-c", help="Config file path"),
355
+ ] = Path("config.yml"),
356
+ ) -> None:
357
+ """Show database statistics.
358
+
359
+ Displays table counts, row counts, and last update timestamps
360
+ for each schema in the database.
361
+ """
362
+ from pdbminebuilder.commands.stats import run_stats
363
+
364
+ settings = load_config(config)
365
+ run_stats(settings)
366
+
367
+
368
+ if __name__ == "__main__":
369
+ app()
@@ -0,0 +1 @@
1
+ """CLI commands."""
@@ -0,0 +1,164 @@
1
+ """Load command - bulk load data using COPY protocol."""
2
+
3
+ import importlib
4
+ from typing import Any, Callable
5
+
6
+ import typer
7
+ from rich.console import Console
8
+
9
+ from pdbminebuilder.commands.update import DUAL_FORMAT_PIPELINES, LEGACY_ALIASES
10
+ from pdbminebuilder.commands.utils import resolve_legacy_aliases
11
+ from pdbminebuilder.config import Settings
12
+ from pdbminebuilder.db.connection import close_pool, init_pool
13
+ from pdbminebuilder.db.loader import LoaderResult, ensure_schema, truncate_schema_tables
14
+ from pdbminebuilder.db.metadata import (
15
+ ensure_entry_metadata_table,
16
+ ensure_metadata_table,
17
+ update_pipeline_metadata,
18
+ )
19
+ from pdbminebuilder.models import get_metadata
20
+
21
+ console = Console()
22
+
23
+ # Pipelines supported by load command.
24
+ # Each pipeline module must expose run_cif_load() for CIF format.
25
+ # Dual-format pipelines (DUAL_FORMAT_PIPELINES) must also expose
26
+ # run_load() for mmJSON format.
27
+ LOAD_PIPELINES = ["pdbj", "cc", "ccmodel", "prd", "prd_family", "vrpt", "contacts"]
28
+
29
+
30
+ def _get_load_runner(
31
+ pipeline_name: str, settings: Settings
32
+ ) -> Callable[..., list[LoaderResult]]:
33
+ """Get the load runner function for a pipeline.
34
+
35
+ For dual-format pipelines, reads format from config:
36
+ - format=cif -> run_cif_load()
37
+ - format=mmjson -> run_load()
38
+
39
+ Other pipelines always use run_cif_load().
40
+
41
+ Returns:
42
+ Callable with signature (settings, config, meta, limit=...) -> list[LoaderResult]
43
+
44
+ Raises:
45
+ RuntimeError: If the pipeline module cannot be imported or the
46
+ required load function is missing.
47
+ """
48
+ try:
49
+ pipeline_module = importlib.import_module(
50
+ f"pdbminebuilder.pipelines.{pipeline_name}"
51
+ )
52
+ except ImportError as e:
53
+ raise RuntimeError(
54
+ f"Failed to import pipeline module 'pdbminebuilder.pipelines.{pipeline_name}': {e}. "
55
+ f"Check that all required dependencies are installed."
56
+ ) from e
57
+
58
+ if pipeline_name in DUAL_FORMAT_PIPELINES:
59
+ pipeline_config = settings.pipelines.get(pipeline_name)
60
+ if pipeline_config and pipeline_config.format == "mmjson":
61
+ runner = getattr(pipeline_module, "run_load", None)
62
+ if runner is None:
63
+ raise RuntimeError(
64
+ f"Pipeline '{pipeline_name}' does not support mmJSON load mode "
65
+ f"(missing run_load in pdbminebuilder.pipelines.{pipeline_name}). "
66
+ f"Set format='cif' in config.yml or implement run_load()."
67
+ )
68
+ return runner
69
+
70
+ runner = getattr(pipeline_module, "run_cif_load", None)
71
+ if runner is None:
72
+ raise RuntimeError(
73
+ f"Pipeline '{pipeline_name}' does not support load mode "
74
+ f"(missing run_cif_load in pdbminebuilder.pipelines.{pipeline_name})."
75
+ )
76
+ return runner
77
+
78
+
79
+ def run_load(
80
+ settings: Settings,
81
+ pipelines: list[str],
82
+ limit: int | None = None,
83
+ force: bool = False,
84
+ ) -> None:
85
+ """Run bulk load pipelines (TRUNCATE + COPY).
86
+
87
+ Args:
88
+ settings: Application settings
89
+ pipelines: List of pipeline names to run
90
+ limit: Optional limit on number of entries to process
91
+ force: Skip interactive TRUNCATE confirmation
92
+ """
93
+ if not pipelines:
94
+ console.print("[red]No pipelines specified.[/red]")
95
+ console.print(f"[dim]Available: {', '.join(LOAD_PIPELINES)}[/dim]")
96
+ return
97
+
98
+ # Resolve legacy aliases with deprecation warnings
99
+ pipelines = resolve_legacy_aliases(pipelines, LEGACY_ALIASES, "Pipeline")
100
+
101
+ invalid = [p for p in pipelines if p not in LOAD_PIPELINES]
102
+ if invalid:
103
+ console.print(f"[red]Invalid pipelines: {', '.join(invalid)}[/red]")
104
+ console.print(f"[dim]Available: {', '.join(LOAD_PIPELINES)}[/dim]")
105
+ return
106
+
107
+ # Confirmation prompt unless --force
108
+ if not force:
109
+ schema_names = sorted(set(pipelines))
110
+ console.print(
111
+ f"[bold red]WARNING: This will TRUNCATE all tables in: "
112
+ f"{', '.join(schema_names)}[/bold red]"
113
+ )
114
+ typer.confirm("Continue?", abort=True)
115
+
116
+ console.print(f"[bold]Loading {len(pipelines)} pipeline(s)...[/bold]")
117
+
118
+ # Pre-flight: verify all pipelines are importable and configured
119
+ # BEFORE truncating any data.
120
+ pipeline_runners: list[tuple[str, Any, Any, Any]] = []
121
+
122
+ for pipeline_name in pipelines:
123
+ pipeline_config = settings.pipelines.get(pipeline_name)
124
+ if not pipeline_config:
125
+ msg = (
126
+ f"Pipeline {pipeline_name!r} has no configuration in "
127
+ f"settings.pipelines. Check config.yml."
128
+ )
129
+ raise RuntimeError(msg)
130
+
131
+ runner = _get_load_runner(pipeline_name, settings)
132
+ meta = get_metadata(pipeline_name)
133
+ pipeline_runners.append((pipeline_name, pipeline_config, meta, runner))
134
+
135
+ init_pool(settings.rdb.constring, max_size=settings.rdb.get_workers() + 2)
136
+ ensure_metadata_table(settings.rdb.constring)
137
+ ensure_entry_metadata_table(settings.rdb.constring)
138
+
139
+ try:
140
+ for pipeline_name, pipeline_config, meta, runner in pipeline_runners:
141
+ console.print(f"\n[bold blue]Pipeline: {pipeline_name} (load)[/bold blue]")
142
+ console.print(f" Schema: {meta.schema}")
143
+ console.print(f" Tables: {len(meta.tables)}")
144
+
145
+ # Ensure schema exists
146
+ ensure_schema(meta, settings.rdb.constring)
147
+
148
+ # TRUNCATE all tables
149
+ truncate_schema_tables(meta, settings.rdb.constring)
150
+
151
+ # Run load pipeline
152
+ results = runner(settings, pipeline_config, meta, limit=limit)
153
+
154
+ success_count = sum(1 for r in results if r.success) if results else None
155
+ update_pipeline_metadata(
156
+ settings.rdb.constring,
157
+ meta.schema,
158
+ entries_count=success_count,
159
+ )
160
+
161
+ finally:
162
+ close_pool()
163
+
164
+ console.print("\n[bold green]Load completed![/bold green]")
@@ -0,0 +1,101 @@
1
+ """Reset command - drop and reset database schemas."""
2
+
3
+ import psycopg
4
+ from rich.console import Console
5
+ from rich.prompt import Confirm
6
+
7
+ from pdbminebuilder.config import Settings
8
+
9
+ console = Console()
10
+
11
+ # Known schemas that can be reset
12
+ KNOWN_SCHEMAS = [
13
+ "pdbj",
14
+ "cc",
15
+ "ccmodel",
16
+ "prd",
17
+ "prd_family",
18
+ "vrpt",
19
+ "contacts",
20
+ "emdb",
21
+ "ihm",
22
+ ]
23
+
24
+
25
+ def run_reset(
26
+ settings: Settings,
27
+ schemas: list[str],
28
+ force: bool = False,
29
+ ) -> None:
30
+ """Drop specified schemas from the database.
31
+
32
+ Args:
33
+ settings: Application settings
34
+ schemas: List of schema names to drop, or ["all"] for all schemas
35
+ force: Skip confirmation prompt
36
+ """
37
+ if not schemas:
38
+ console.print("[yellow]No schemas specified. Available schemas:[/yellow]")
39
+ console.print(f" {', '.join(KNOWN_SCHEMAS)}")
40
+ console.print("\nUsage:")
41
+ console.print(" pmb reset cc # Reset cc schema")
42
+ console.print(" pmb reset cc pdbj # Reset multiple schemas")
43
+ console.print(" pmb reset all # Reset ALL schemas")
44
+ return
45
+
46
+ # Handle 'all' keyword
47
+ if "all" in schemas:
48
+ target_schemas = KNOWN_SCHEMAS.copy()
49
+ else:
50
+ # Validate schema names
51
+ invalid = [s for s in schemas if s not in KNOWN_SCHEMAS]
52
+ if invalid:
53
+ console.print(f"[red]Unknown schema(s): {', '.join(invalid)}[/red]")
54
+ console.print(f"[yellow]Valid schemas: {', '.join(KNOWN_SCHEMAS)}[/yellow]")
55
+ return
56
+ target_schemas = schemas
57
+
58
+ # Show what will be dropped
59
+ console.print(
60
+ "\n[bold red]WARNING: This will DROP the following schemas:[/bold red]"
61
+ )
62
+ for schema in target_schemas:
63
+ console.print(f" • {schema}")
64
+ console.print(
65
+ "\n[yellow]All data in these schemas will be permanently deleted![/yellow]"
66
+ )
67
+
68
+ # Confirm unless --force
69
+ if not force:
70
+ confirmed = Confirm.ask("\nAre you sure you want to continue?", default=False)
71
+ if not confirmed:
72
+ console.print("[dim]Aborted.[/dim]")
73
+ return
74
+
75
+ # Drop schemas
76
+ with psycopg.connect(settings.rdb.constring) as conn:
77
+ with conn.cursor() as cur:
78
+ for schema in target_schemas:
79
+ try:
80
+ # Check if schema exists
81
+ cur.execute(
82
+ "SELECT EXISTS(SELECT 1 FROM information_schema.schemata WHERE schema_name = %s)",
83
+ (schema,),
84
+ )
85
+ result = cur.fetchone()
86
+ exists = result[0] if result else False
87
+
88
+ if exists:
89
+ cur.execute(
90
+ f"DROP SCHEMA {schema} CASCADE" # type: ignore[arg-type]
91
+ )
92
+ console.print(f" [green]✓[/green] Dropped schema: {schema}")
93
+ else:
94
+ console.print(f" [dim]○[/dim] Schema not found: {schema}")
95
+ except Exception as e:
96
+ console.print(f" [red]✗[/red] Error dropping {schema}: {e}")
97
+
98
+ conn.commit()
99
+
100
+ console.print("\n[bold green]Reset completed![/bold green]")
101
+ console.print("[dim]Run 'pmb update <pipeline>' to reload data.[/dim]")