pdbminebuilder 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pdbminebuilder/__init__.py +3 -0
- pdbminebuilder/__main__.py +6 -0
- pdbminebuilder/cli.py +369 -0
- pdbminebuilder/commands/__init__.py +1 -0
- pdbminebuilder/commands/load.py +164 -0
- pdbminebuilder/commands/reset.py +101 -0
- pdbminebuilder/commands/stats.py +147 -0
- pdbminebuilder/commands/sync.py +183 -0
- pdbminebuilder/commands/test.py +185 -0
- pdbminebuilder/commands/update.py +212 -0
- pdbminebuilder/commands/utils.py +47 -0
- pdbminebuilder/config.py +140 -0
- pdbminebuilder/db/__init__.py +5 -0
- pdbminebuilder/db/_type_utils.py +62 -0
- pdbminebuilder/db/connection.py +128 -0
- pdbminebuilder/db/delta.py +764 -0
- pdbminebuilder/db/loader.py +680 -0
- pdbminebuilder/db/metadata.py +188 -0
- pdbminebuilder/models/__init__.py +53 -0
- pdbminebuilder/models/cc.py +794 -0
- pdbminebuilder/models/ccmodel.py +283 -0
- pdbminebuilder/models/contacts.py +87 -0
- pdbminebuilder/models/emdb.py +3987 -0
- pdbminebuilder/models/ihm.py +2079 -0
- pdbminebuilder/models/pdbj.py +16913 -0
- pdbminebuilder/models/prd.py +1044 -0
- pdbminebuilder/models/prd_family.py +414 -0
- pdbminebuilder/models/vrpt.py +4077 -0
- pdbminebuilder/parsers/__init__.py +20 -0
- pdbminebuilder/parsers/cif.py +205 -0
- pdbminebuilder/parsers/mmjson.py +246 -0
- pdbminebuilder/pipelines/__init__.py +1 -0
- pdbminebuilder/pipelines/base.py +861 -0
- pdbminebuilder/pipelines/cc.py +822 -0
- pdbminebuilder/pipelines/ccmodel.py +508 -0
- pdbminebuilder/pipelines/contacts.py +340 -0
- pdbminebuilder/pipelines/emdb.py +413 -0
- pdbminebuilder/pipelines/ihm.py +508 -0
- pdbminebuilder/pipelines/pdbj.py +820 -0
- pdbminebuilder/pipelines/prd.py +619 -0
- pdbminebuilder/pipelines/prd_family.py +255 -0
- pdbminebuilder/pipelines/vrpt.py +341 -0
- pdbminebuilder/py.typed +0 -0
- pdbminebuilder/utils/__init__.py +1 -0
- pdbminebuilder/utils/assembly.py +232 -0
- pdbminebuilder/utils/brief_summary.py +365 -0
- pdbminebuilder/utils/patches.py +61 -0
- pdbminebuilder-0.2.0.dist-info/METADATA +136 -0
- pdbminebuilder-0.2.0.dist-info/RECORD +52 -0
- pdbminebuilder-0.2.0.dist-info/WHEEL +4 -0
- pdbminebuilder-0.2.0.dist-info/entry_points.txt +2 -0
- pdbminebuilder-0.2.0.dist-info/licenses/LICENSE +21 -0
pdbminebuilder/cli.py
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
"""CLI interface using typer + rich."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Annotated, Optional
|
|
7
|
+
|
|
8
|
+
import typer
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
|
|
11
|
+
from pdbminebuilder import __version__
|
|
12
|
+
from pdbminebuilder.config import load_config
|
|
13
|
+
|
|
14
|
+
app = typer.Typer(
|
|
15
|
+
name="pmb",
|
|
16
|
+
help="pdb-mine-builder - Build a Mine-schema database from PDB data.",
|
|
17
|
+
rich_markup_mode="rich",
|
|
18
|
+
)
|
|
19
|
+
console = Console()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def setup_logging(log_file: Path | None, verbose: bool = False) -> logging.Logger:
|
|
23
|
+
"""Configure logging with optional file output.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
log_file: Path to log file (None for no file logging)
|
|
27
|
+
verbose: If True, set DEBUG level; otherwise INFO
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Configured logger
|
|
31
|
+
"""
|
|
32
|
+
logger = logging.getLogger("pmb")
|
|
33
|
+
logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
|
34
|
+
|
|
35
|
+
# Clear existing handlers
|
|
36
|
+
logger.handlers.clear()
|
|
37
|
+
|
|
38
|
+
# Console handler (only warnings and errors)
|
|
39
|
+
console_handler = logging.StreamHandler()
|
|
40
|
+
console_handler.setLevel(logging.WARNING)
|
|
41
|
+
console_handler.setFormatter(logging.Formatter("%(levelname)s: %(message)s"))
|
|
42
|
+
logger.addHandler(console_handler)
|
|
43
|
+
|
|
44
|
+
# File handler (if specified)
|
|
45
|
+
if log_file:
|
|
46
|
+
log_file.parent.mkdir(parents=True, exist_ok=True)
|
|
47
|
+
file_handler = logging.FileHandler(log_file, mode="w", encoding="utf-8")
|
|
48
|
+
file_handler.setLevel(logging.DEBUG if verbose else logging.INFO)
|
|
49
|
+
file_handler.setFormatter(
|
|
50
|
+
logging.Formatter("%(asctime)s %(levelname)s %(name)s: %(message)s")
|
|
51
|
+
)
|
|
52
|
+
logger.addHandler(file_handler)
|
|
53
|
+
console.print(f"[dim]Logging to: {log_file}[/dim]")
|
|
54
|
+
|
|
55
|
+
return logger
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def version_callback(value: bool) -> None:
|
|
59
|
+
if value:
|
|
60
|
+
console.print(f"pmb version {__version__}")
|
|
61
|
+
raise typer.Exit()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@app.callback()
|
|
65
|
+
def main(
|
|
66
|
+
version: Annotated[
|
|
67
|
+
Optional[bool],
|
|
68
|
+
typer.Option("--version", "-v", callback=version_callback, is_eager=True),
|
|
69
|
+
] = None,
|
|
70
|
+
) -> None:
|
|
71
|
+
"""pdb-mine-builder - Build a Mine-schema database from PDB data."""
|
|
72
|
+
pass
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@app.command()
|
|
76
|
+
def sync(
|
|
77
|
+
targets: Annotated[
|
|
78
|
+
Optional[list[str]],
|
|
79
|
+
typer.Argument(
|
|
80
|
+
help="Sync targets: pdbj, pdbj-json, cc, cc-json, ccmodel, ccmodel-json, prd, prd-json, prd-family, vrpt, contacts, schemas"
|
|
81
|
+
),
|
|
82
|
+
] = None,
|
|
83
|
+
config: Annotated[
|
|
84
|
+
Path,
|
|
85
|
+
typer.Option("--config", "-c", help="Config file path"),
|
|
86
|
+
] = Path("config.yml"),
|
|
87
|
+
dry_run: Annotated[
|
|
88
|
+
bool,
|
|
89
|
+
typer.Option(
|
|
90
|
+
"--dry-run", "-n", help="Show what would be synced without actually syncing"
|
|
91
|
+
),
|
|
92
|
+
] = False,
|
|
93
|
+
) -> None:
|
|
94
|
+
"""Synchronize data from PDBj via rsync."""
|
|
95
|
+
from pdbminebuilder.commands.sync import run_sync
|
|
96
|
+
|
|
97
|
+
settings = load_config(config)
|
|
98
|
+
run_sync(settings, targets or [], dry_run=dry_run)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@app.command()
|
|
102
|
+
def update(
|
|
103
|
+
pipelines: Annotated[
|
|
104
|
+
Optional[list[str]],
|
|
105
|
+
typer.Argument(
|
|
106
|
+
help="Pipelines: pdbj, cc, ccmodel, prd, prd_family, vrpt, contacts, emdb, ihm (format via config)"
|
|
107
|
+
),
|
|
108
|
+
] = None,
|
|
109
|
+
config: Annotated[
|
|
110
|
+
Path,
|
|
111
|
+
typer.Option("--config", "-c", help="Config file path"),
|
|
112
|
+
] = Path("config.yml"),
|
|
113
|
+
limit: Annotated[
|
|
114
|
+
Optional[int],
|
|
115
|
+
typer.Option("--limit", "-l", help="Limit number of entries to process"),
|
|
116
|
+
] = None,
|
|
117
|
+
workers: Annotated[
|
|
118
|
+
Optional[int],
|
|
119
|
+
typer.Option(
|
|
120
|
+
"--workers", "-w", help="Number of worker processes (overrides config)"
|
|
121
|
+
),
|
|
122
|
+
] = None,
|
|
123
|
+
log: Annotated[
|
|
124
|
+
Optional[Path],
|
|
125
|
+
typer.Option(
|
|
126
|
+
"--log",
|
|
127
|
+
help="Log file path (default: logs/<pipeline>_YYYYMMDD_HHMMSS.log)",
|
|
128
|
+
),
|
|
129
|
+
] = None,
|
|
130
|
+
verbose: Annotated[
|
|
131
|
+
bool,
|
|
132
|
+
typer.Option("--verbose", "-v", help="Enable verbose (DEBUG) logging"),
|
|
133
|
+
] = False,
|
|
134
|
+
force: Annotated[
|
|
135
|
+
bool,
|
|
136
|
+
typer.Option(
|
|
137
|
+
"--force",
|
|
138
|
+
"-f",
|
|
139
|
+
help="Reprocess all entries ignoring cached mtimes (pdbj, vrpt, contacts only)",
|
|
140
|
+
),
|
|
141
|
+
] = False,
|
|
142
|
+
) -> None:
|
|
143
|
+
"""Run database update pipelines."""
|
|
144
|
+
from pdbminebuilder.commands.update import run_update
|
|
145
|
+
|
|
146
|
+
# Setup logging with pipeline name in filename
|
|
147
|
+
if log is None:
|
|
148
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
149
|
+
if pipelines and len(pipelines) == 1:
|
|
150
|
+
# Single pipeline: use pipeline name
|
|
151
|
+
log_name = pipelines[0].replace("-", "_")
|
|
152
|
+
elif pipelines:
|
|
153
|
+
# Multiple pipelines: use "multi"
|
|
154
|
+
log_name = "multi"
|
|
155
|
+
else:
|
|
156
|
+
# All pipelines
|
|
157
|
+
log_name = "all"
|
|
158
|
+
log = Path(f"logs/{log_name}_{timestamp}.log")
|
|
159
|
+
logger = setup_logging(log, verbose)
|
|
160
|
+
|
|
161
|
+
settings = load_config(config)
|
|
162
|
+
if workers is not None:
|
|
163
|
+
settings.rdb.nworkers = workers
|
|
164
|
+
|
|
165
|
+
logger.info(f"Starting update: pipelines={pipelines or 'all'}, limit={limit}")
|
|
166
|
+
run_update(settings, pipelines or [], limit=limit, force=force)
|
|
167
|
+
logger.info("Update completed")
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@app.command()
|
|
171
|
+
def load(
|
|
172
|
+
pipelines: Annotated[
|
|
173
|
+
Optional[list[str]],
|
|
174
|
+
typer.Argument(
|
|
175
|
+
help="Pipelines: pdbj, cc, ccmodel, prd, prd_family, vrpt, contacts"
|
|
176
|
+
),
|
|
177
|
+
] = None,
|
|
178
|
+
config: Annotated[
|
|
179
|
+
Path,
|
|
180
|
+
typer.Option("--config", "-c", help="Config file path"),
|
|
181
|
+
] = Path("config.yml"),
|
|
182
|
+
limit: Annotated[
|
|
183
|
+
Optional[int],
|
|
184
|
+
typer.Option("--limit", "-l", help="Limit number of entries to process"),
|
|
185
|
+
] = None,
|
|
186
|
+
workers: Annotated[
|
|
187
|
+
Optional[int],
|
|
188
|
+
typer.Option(
|
|
189
|
+
"--workers", "-w", help="Number of worker processes (overrides config)"
|
|
190
|
+
),
|
|
191
|
+
] = None,
|
|
192
|
+
log: Annotated[
|
|
193
|
+
Optional[Path],
|
|
194
|
+
typer.Option(
|
|
195
|
+
"--log",
|
|
196
|
+
help="Log file path (default: logs/load_<pipeline>_YYYYMMDD_HHMMSS.log)",
|
|
197
|
+
),
|
|
198
|
+
] = None,
|
|
199
|
+
verbose: Annotated[
|
|
200
|
+
bool,
|
|
201
|
+
typer.Option("--verbose", "-v", help="Enable verbose (DEBUG) logging"),
|
|
202
|
+
] = False,
|
|
203
|
+
force: Annotated[
|
|
204
|
+
bool,
|
|
205
|
+
typer.Option("--force", "-f", help="Skip TRUNCATE confirmation prompt"),
|
|
206
|
+
] = False,
|
|
207
|
+
) -> None:
|
|
208
|
+
"""Bulk load data using COPY protocol (TRUNCATE + COPY).
|
|
209
|
+
|
|
210
|
+
Significantly faster than 'update' for initial/full database loads.
|
|
211
|
+
WARNING: This will TRUNCATE all tables in the target schema before loading.
|
|
212
|
+
|
|
213
|
+
Examples:
|
|
214
|
+
pmb load pdbj --limit 1000 --force
|
|
215
|
+
pmb load cc ccmodel prd --force
|
|
216
|
+
"""
|
|
217
|
+
from pdbminebuilder.commands.load import run_load
|
|
218
|
+
|
|
219
|
+
if log is None:
|
|
220
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
221
|
+
if pipelines and len(pipelines) == 1:
|
|
222
|
+
log_name = f"load_{pipelines[0].replace('-', '_')}"
|
|
223
|
+
elif pipelines:
|
|
224
|
+
log_name = "load_multi"
|
|
225
|
+
else:
|
|
226
|
+
log_name = "load_all"
|
|
227
|
+
log = Path(f"logs/{log_name}_{timestamp}.log")
|
|
228
|
+
logger = setup_logging(log, verbose)
|
|
229
|
+
|
|
230
|
+
settings = load_config(config)
|
|
231
|
+
if workers is not None:
|
|
232
|
+
settings.rdb.nworkers = workers
|
|
233
|
+
|
|
234
|
+
logger.info(f"Starting load: pipelines={pipelines or []}, limit={limit}")
|
|
235
|
+
run_load(settings, pipelines or [], limit=limit, force=force)
|
|
236
|
+
logger.info("Load completed")
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
@app.command(name="all")
|
|
240
|
+
def run_all(
|
|
241
|
+
config: Annotated[
|
|
242
|
+
Path,
|
|
243
|
+
typer.Option("--config", "-c", help="Config file path"),
|
|
244
|
+
] = Path("config.yml"),
|
|
245
|
+
) -> None:
|
|
246
|
+
"""Run full sync and update cycle."""
|
|
247
|
+
from pdbminebuilder.commands.sync import run_sync
|
|
248
|
+
from pdbminebuilder.commands.update import run_update
|
|
249
|
+
|
|
250
|
+
settings = load_config(config)
|
|
251
|
+
console.print("[bold blue]Starting full sync and update cycle...[/bold blue]")
|
|
252
|
+
|
|
253
|
+
console.print("\n[bold]Phase 1: Sync[/bold]")
|
|
254
|
+
run_sync(settings, [], dry_run=False)
|
|
255
|
+
|
|
256
|
+
console.print("\n[bold]Phase 2: Update[/bold]")
|
|
257
|
+
run_update(settings, [])
|
|
258
|
+
|
|
259
|
+
console.print("\n[bold green]Full cycle completed![/bold green]")
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
@app.command(name="setup-rdkit")
|
|
263
|
+
def setup_rdkit(
|
|
264
|
+
config: Annotated[
|
|
265
|
+
Path,
|
|
266
|
+
typer.Option("--config", "-c", help="Config file path"),
|
|
267
|
+
] = Path("config.yml"),
|
|
268
|
+
) -> None:
|
|
269
|
+
"""Setup RDKit extension and SQL functions.
|
|
270
|
+
|
|
271
|
+
Creates RDKit extension, mol column on cc.brief_summary,
|
|
272
|
+
and loads chemical search functions (similar_compounds, substructure_search, etc.).
|
|
273
|
+
|
|
274
|
+
This is automatically run by the cc pipeline, but can be run
|
|
275
|
+
independently to add functions to an existing database.
|
|
276
|
+
"""
|
|
277
|
+
from pdbminebuilder.pipelines.cc import _ensure_rdkit_setup
|
|
278
|
+
|
|
279
|
+
settings = load_config(config)
|
|
280
|
+
console.print("[bold]Setting up RDKit extension and functions...[/bold]")
|
|
281
|
+
_ensure_rdkit_setup(settings.rdb.constring)
|
|
282
|
+
console.print("[bold green]RDKit setup completed![/bold green]")
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
@app.command()
|
|
286
|
+
def test(
|
|
287
|
+
pipelines: Annotated[
|
|
288
|
+
Optional[list[str]],
|
|
289
|
+
typer.Argument(help="Pipelines to test"),
|
|
290
|
+
] = None,
|
|
291
|
+
config: Annotated[
|
|
292
|
+
Path,
|
|
293
|
+
typer.Option("--config", "-c", help="Config file path"),
|
|
294
|
+
] = Path("config.test.yml"),
|
|
295
|
+
drop: Annotated[
|
|
296
|
+
bool,
|
|
297
|
+
typer.Option("--drop", "-d", help="Drop existing test database"),
|
|
298
|
+
] = False,
|
|
299
|
+
limit: Annotated[
|
|
300
|
+
int,
|
|
301
|
+
typer.Option("--limit", "-l", help="Limit number of files to process"),
|
|
302
|
+
] = 10,
|
|
303
|
+
workers: Annotated[
|
|
304
|
+
Optional[int],
|
|
305
|
+
typer.Option(
|
|
306
|
+
"--workers", "-w", help="Number of worker processes (overrides config)"
|
|
307
|
+
),
|
|
308
|
+
] = None,
|
|
309
|
+
) -> None:
|
|
310
|
+
"""Create test database and validate pipelines."""
|
|
311
|
+
from pdbminebuilder.commands.test import run_test
|
|
312
|
+
|
|
313
|
+
settings = load_config(config)
|
|
314
|
+
if workers is not None:
|
|
315
|
+
settings.rdb.nworkers = workers
|
|
316
|
+
run_test(settings, pipelines or [], drop=drop, limit=limit)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
@app.command()
|
|
320
|
+
def reset(
|
|
321
|
+
schemas: Annotated[
|
|
322
|
+
Optional[list[str]],
|
|
323
|
+
typer.Argument(
|
|
324
|
+
help="Schemas to reset: pdbj, cc, ccmodel, prd, prd_family, vrpt, contacts, emdb, ihm (or 'all')"
|
|
325
|
+
),
|
|
326
|
+
] = None,
|
|
327
|
+
config: Annotated[
|
|
328
|
+
Path,
|
|
329
|
+
typer.Option("--config", "-c", help="Config file path"),
|
|
330
|
+
] = Path("config.yml"),
|
|
331
|
+
force: Annotated[
|
|
332
|
+
bool,
|
|
333
|
+
typer.Option("--force", "-f", help="Skip confirmation prompt"),
|
|
334
|
+
] = False,
|
|
335
|
+
) -> None:
|
|
336
|
+
"""Drop and reset database schemas (for testing/reloading).
|
|
337
|
+
|
|
338
|
+
Examples:
|
|
339
|
+
pmb reset cc # Reset cc schema only
|
|
340
|
+
pmb reset cc pdbj # Reset cc and pdbj schemas
|
|
341
|
+
pmb reset all # Reset ALL schemas (dangerous!)
|
|
342
|
+
pmb reset all -f # Reset all without confirmation
|
|
343
|
+
"""
|
|
344
|
+
from pdbminebuilder.commands.reset import run_reset
|
|
345
|
+
|
|
346
|
+
settings = load_config(config)
|
|
347
|
+
run_reset(settings, schemas or [], force=force)
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
@app.command()
|
|
351
|
+
def stats(
|
|
352
|
+
config: Annotated[
|
|
353
|
+
Path,
|
|
354
|
+
typer.Option("--config", "-c", help="Config file path"),
|
|
355
|
+
] = Path("config.yml"),
|
|
356
|
+
) -> None:
|
|
357
|
+
"""Show database statistics.
|
|
358
|
+
|
|
359
|
+
Displays table counts, row counts, and last update timestamps
|
|
360
|
+
for each schema in the database.
|
|
361
|
+
"""
|
|
362
|
+
from pdbminebuilder.commands.stats import run_stats
|
|
363
|
+
|
|
364
|
+
settings = load_config(config)
|
|
365
|
+
run_stats(settings)
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
if __name__ == "__main__":
|
|
369
|
+
app()
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI commands."""
|
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""Load command - bulk load data using COPY protocol."""
|
|
2
|
+
|
|
3
|
+
import importlib
|
|
4
|
+
from typing import Any, Callable
|
|
5
|
+
|
|
6
|
+
import typer
|
|
7
|
+
from rich.console import Console
|
|
8
|
+
|
|
9
|
+
from pdbminebuilder.commands.update import DUAL_FORMAT_PIPELINES, LEGACY_ALIASES
|
|
10
|
+
from pdbminebuilder.commands.utils import resolve_legacy_aliases
|
|
11
|
+
from pdbminebuilder.config import Settings
|
|
12
|
+
from pdbminebuilder.db.connection import close_pool, init_pool
|
|
13
|
+
from pdbminebuilder.db.loader import LoaderResult, ensure_schema, truncate_schema_tables
|
|
14
|
+
from pdbminebuilder.db.metadata import (
|
|
15
|
+
ensure_entry_metadata_table,
|
|
16
|
+
ensure_metadata_table,
|
|
17
|
+
update_pipeline_metadata,
|
|
18
|
+
)
|
|
19
|
+
from pdbminebuilder.models import get_metadata
|
|
20
|
+
|
|
21
|
+
console = Console()
|
|
22
|
+
|
|
23
|
+
# Pipelines supported by load command.
|
|
24
|
+
# Each pipeline module must expose run_cif_load() for CIF format.
|
|
25
|
+
# Dual-format pipelines (DUAL_FORMAT_PIPELINES) must also expose
|
|
26
|
+
# run_load() for mmJSON format.
|
|
27
|
+
LOAD_PIPELINES = ["pdbj", "cc", "ccmodel", "prd", "prd_family", "vrpt", "contacts"]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _get_load_runner(
|
|
31
|
+
pipeline_name: str, settings: Settings
|
|
32
|
+
) -> Callable[..., list[LoaderResult]]:
|
|
33
|
+
"""Get the load runner function for a pipeline.
|
|
34
|
+
|
|
35
|
+
For dual-format pipelines, reads format from config:
|
|
36
|
+
- format=cif -> run_cif_load()
|
|
37
|
+
- format=mmjson -> run_load()
|
|
38
|
+
|
|
39
|
+
Other pipelines always use run_cif_load().
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
Callable with signature (settings, config, meta, limit=...) -> list[LoaderResult]
|
|
43
|
+
|
|
44
|
+
Raises:
|
|
45
|
+
RuntimeError: If the pipeline module cannot be imported or the
|
|
46
|
+
required load function is missing.
|
|
47
|
+
"""
|
|
48
|
+
try:
|
|
49
|
+
pipeline_module = importlib.import_module(
|
|
50
|
+
f"pdbminebuilder.pipelines.{pipeline_name}"
|
|
51
|
+
)
|
|
52
|
+
except ImportError as e:
|
|
53
|
+
raise RuntimeError(
|
|
54
|
+
f"Failed to import pipeline module 'pdbminebuilder.pipelines.{pipeline_name}': {e}. "
|
|
55
|
+
f"Check that all required dependencies are installed."
|
|
56
|
+
) from e
|
|
57
|
+
|
|
58
|
+
if pipeline_name in DUAL_FORMAT_PIPELINES:
|
|
59
|
+
pipeline_config = settings.pipelines.get(pipeline_name)
|
|
60
|
+
if pipeline_config and pipeline_config.format == "mmjson":
|
|
61
|
+
runner = getattr(pipeline_module, "run_load", None)
|
|
62
|
+
if runner is None:
|
|
63
|
+
raise RuntimeError(
|
|
64
|
+
f"Pipeline '{pipeline_name}' does not support mmJSON load mode "
|
|
65
|
+
f"(missing run_load in pdbminebuilder.pipelines.{pipeline_name}). "
|
|
66
|
+
f"Set format='cif' in config.yml or implement run_load()."
|
|
67
|
+
)
|
|
68
|
+
return runner
|
|
69
|
+
|
|
70
|
+
runner = getattr(pipeline_module, "run_cif_load", None)
|
|
71
|
+
if runner is None:
|
|
72
|
+
raise RuntimeError(
|
|
73
|
+
f"Pipeline '{pipeline_name}' does not support load mode "
|
|
74
|
+
f"(missing run_cif_load in pdbminebuilder.pipelines.{pipeline_name})."
|
|
75
|
+
)
|
|
76
|
+
return runner
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def run_load(
|
|
80
|
+
settings: Settings,
|
|
81
|
+
pipelines: list[str],
|
|
82
|
+
limit: int | None = None,
|
|
83
|
+
force: bool = False,
|
|
84
|
+
) -> None:
|
|
85
|
+
"""Run bulk load pipelines (TRUNCATE + COPY).
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
settings: Application settings
|
|
89
|
+
pipelines: List of pipeline names to run
|
|
90
|
+
limit: Optional limit on number of entries to process
|
|
91
|
+
force: Skip interactive TRUNCATE confirmation
|
|
92
|
+
"""
|
|
93
|
+
if not pipelines:
|
|
94
|
+
console.print("[red]No pipelines specified.[/red]")
|
|
95
|
+
console.print(f"[dim]Available: {', '.join(LOAD_PIPELINES)}[/dim]")
|
|
96
|
+
return
|
|
97
|
+
|
|
98
|
+
# Resolve legacy aliases with deprecation warnings
|
|
99
|
+
pipelines = resolve_legacy_aliases(pipelines, LEGACY_ALIASES, "Pipeline")
|
|
100
|
+
|
|
101
|
+
invalid = [p for p in pipelines if p not in LOAD_PIPELINES]
|
|
102
|
+
if invalid:
|
|
103
|
+
console.print(f"[red]Invalid pipelines: {', '.join(invalid)}[/red]")
|
|
104
|
+
console.print(f"[dim]Available: {', '.join(LOAD_PIPELINES)}[/dim]")
|
|
105
|
+
return
|
|
106
|
+
|
|
107
|
+
# Confirmation prompt unless --force
|
|
108
|
+
if not force:
|
|
109
|
+
schema_names = sorted(set(pipelines))
|
|
110
|
+
console.print(
|
|
111
|
+
f"[bold red]WARNING: This will TRUNCATE all tables in: "
|
|
112
|
+
f"{', '.join(schema_names)}[/bold red]"
|
|
113
|
+
)
|
|
114
|
+
typer.confirm("Continue?", abort=True)
|
|
115
|
+
|
|
116
|
+
console.print(f"[bold]Loading {len(pipelines)} pipeline(s)...[/bold]")
|
|
117
|
+
|
|
118
|
+
# Pre-flight: verify all pipelines are importable and configured
|
|
119
|
+
# BEFORE truncating any data.
|
|
120
|
+
pipeline_runners: list[tuple[str, Any, Any, Any]] = []
|
|
121
|
+
|
|
122
|
+
for pipeline_name in pipelines:
|
|
123
|
+
pipeline_config = settings.pipelines.get(pipeline_name)
|
|
124
|
+
if not pipeline_config:
|
|
125
|
+
msg = (
|
|
126
|
+
f"Pipeline {pipeline_name!r} has no configuration in "
|
|
127
|
+
f"settings.pipelines. Check config.yml."
|
|
128
|
+
)
|
|
129
|
+
raise RuntimeError(msg)
|
|
130
|
+
|
|
131
|
+
runner = _get_load_runner(pipeline_name, settings)
|
|
132
|
+
meta = get_metadata(pipeline_name)
|
|
133
|
+
pipeline_runners.append((pipeline_name, pipeline_config, meta, runner))
|
|
134
|
+
|
|
135
|
+
init_pool(settings.rdb.constring, max_size=settings.rdb.get_workers() + 2)
|
|
136
|
+
ensure_metadata_table(settings.rdb.constring)
|
|
137
|
+
ensure_entry_metadata_table(settings.rdb.constring)
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
for pipeline_name, pipeline_config, meta, runner in pipeline_runners:
|
|
141
|
+
console.print(f"\n[bold blue]Pipeline: {pipeline_name} (load)[/bold blue]")
|
|
142
|
+
console.print(f" Schema: {meta.schema}")
|
|
143
|
+
console.print(f" Tables: {len(meta.tables)}")
|
|
144
|
+
|
|
145
|
+
# Ensure schema exists
|
|
146
|
+
ensure_schema(meta, settings.rdb.constring)
|
|
147
|
+
|
|
148
|
+
# TRUNCATE all tables
|
|
149
|
+
truncate_schema_tables(meta, settings.rdb.constring)
|
|
150
|
+
|
|
151
|
+
# Run load pipeline
|
|
152
|
+
results = runner(settings, pipeline_config, meta, limit=limit)
|
|
153
|
+
|
|
154
|
+
success_count = sum(1 for r in results if r.success) if results else None
|
|
155
|
+
update_pipeline_metadata(
|
|
156
|
+
settings.rdb.constring,
|
|
157
|
+
meta.schema,
|
|
158
|
+
entries_count=success_count,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
finally:
|
|
162
|
+
close_pool()
|
|
163
|
+
|
|
164
|
+
console.print("\n[bold green]Load completed![/bold green]")
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Reset command - drop and reset database schemas."""
|
|
2
|
+
|
|
3
|
+
import psycopg
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.prompt import Confirm
|
|
6
|
+
|
|
7
|
+
from pdbminebuilder.config import Settings
|
|
8
|
+
|
|
9
|
+
console = Console()
|
|
10
|
+
|
|
11
|
+
# Known schemas that can be reset
|
|
12
|
+
KNOWN_SCHEMAS = [
|
|
13
|
+
"pdbj",
|
|
14
|
+
"cc",
|
|
15
|
+
"ccmodel",
|
|
16
|
+
"prd",
|
|
17
|
+
"prd_family",
|
|
18
|
+
"vrpt",
|
|
19
|
+
"contacts",
|
|
20
|
+
"emdb",
|
|
21
|
+
"ihm",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def run_reset(
|
|
26
|
+
settings: Settings,
|
|
27
|
+
schemas: list[str],
|
|
28
|
+
force: bool = False,
|
|
29
|
+
) -> None:
|
|
30
|
+
"""Drop specified schemas from the database.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
settings: Application settings
|
|
34
|
+
schemas: List of schema names to drop, or ["all"] for all schemas
|
|
35
|
+
force: Skip confirmation prompt
|
|
36
|
+
"""
|
|
37
|
+
if not schemas:
|
|
38
|
+
console.print("[yellow]No schemas specified. Available schemas:[/yellow]")
|
|
39
|
+
console.print(f" {', '.join(KNOWN_SCHEMAS)}")
|
|
40
|
+
console.print("\nUsage:")
|
|
41
|
+
console.print(" pmb reset cc # Reset cc schema")
|
|
42
|
+
console.print(" pmb reset cc pdbj # Reset multiple schemas")
|
|
43
|
+
console.print(" pmb reset all # Reset ALL schemas")
|
|
44
|
+
return
|
|
45
|
+
|
|
46
|
+
# Handle 'all' keyword
|
|
47
|
+
if "all" in schemas:
|
|
48
|
+
target_schemas = KNOWN_SCHEMAS.copy()
|
|
49
|
+
else:
|
|
50
|
+
# Validate schema names
|
|
51
|
+
invalid = [s for s in schemas if s not in KNOWN_SCHEMAS]
|
|
52
|
+
if invalid:
|
|
53
|
+
console.print(f"[red]Unknown schema(s): {', '.join(invalid)}[/red]")
|
|
54
|
+
console.print(f"[yellow]Valid schemas: {', '.join(KNOWN_SCHEMAS)}[/yellow]")
|
|
55
|
+
return
|
|
56
|
+
target_schemas = schemas
|
|
57
|
+
|
|
58
|
+
# Show what will be dropped
|
|
59
|
+
console.print(
|
|
60
|
+
"\n[bold red]WARNING: This will DROP the following schemas:[/bold red]"
|
|
61
|
+
)
|
|
62
|
+
for schema in target_schemas:
|
|
63
|
+
console.print(f" • {schema}")
|
|
64
|
+
console.print(
|
|
65
|
+
"\n[yellow]All data in these schemas will be permanently deleted![/yellow]"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Confirm unless --force
|
|
69
|
+
if not force:
|
|
70
|
+
confirmed = Confirm.ask("\nAre you sure you want to continue?", default=False)
|
|
71
|
+
if not confirmed:
|
|
72
|
+
console.print("[dim]Aborted.[/dim]")
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
# Drop schemas
|
|
76
|
+
with psycopg.connect(settings.rdb.constring) as conn:
|
|
77
|
+
with conn.cursor() as cur:
|
|
78
|
+
for schema in target_schemas:
|
|
79
|
+
try:
|
|
80
|
+
# Check if schema exists
|
|
81
|
+
cur.execute(
|
|
82
|
+
"SELECT EXISTS(SELECT 1 FROM information_schema.schemata WHERE schema_name = %s)",
|
|
83
|
+
(schema,),
|
|
84
|
+
)
|
|
85
|
+
result = cur.fetchone()
|
|
86
|
+
exists = result[0] if result else False
|
|
87
|
+
|
|
88
|
+
if exists:
|
|
89
|
+
cur.execute(
|
|
90
|
+
f"DROP SCHEMA {schema} CASCADE" # type: ignore[arg-type]
|
|
91
|
+
)
|
|
92
|
+
console.print(f" [green]✓[/green] Dropped schema: {schema}")
|
|
93
|
+
else:
|
|
94
|
+
console.print(f" [dim]○[/dim] Schema not found: {schema}")
|
|
95
|
+
except Exception as e:
|
|
96
|
+
console.print(f" [red]✗[/red] Error dropping {schema}: {e}")
|
|
97
|
+
|
|
98
|
+
conn.commit()
|
|
99
|
+
|
|
100
|
+
console.print("\n[bold green]Reset completed![/bold green]")
|
|
101
|
+
console.print("[dim]Run 'pmb update <pipeline>' to reload data.[/dim]")
|