datamasque-cli 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,526 @@
1
+ """Masking run management commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from datetime import UTC, datetime
8
+ from pathlib import Path
9
+
10
+ import typer
11
+ from datamasque.client import DataMasqueClient, RunId
12
+ from datamasque.client.exceptions import DataMasqueApiError, RunNotCancellableError
13
+ from datamasque.client.models.connection import ConnectionConfig
14
+ from datamasque.client.models.runs import MaskingRunOptions, MaskingRunRequest, RunInfo
15
+
16
+ from datamasque_cli.client import get_client
17
+ from datamasque_cli.output import (
18
+ abort,
19
+ console,
20
+ print_error,
21
+ print_json,
22
+ print_success,
23
+ render_output,
24
+ stdout_console,
25
+ style_status,
26
+ )
27
+
28
+ app = typer.Typer(help="Manage masking runs.")
29
+
30
+ _POLL_INTERVAL_SECONDS = 5
31
+
32
+ _HTTP_NOT_FOUND = 404
33
+
34
+
35
+ def _format_run_info(run: RunInfo, *, is_styled: bool = False) -> dict[str, object]:
36
+ """Extract the fields most useful for display from a `RunInfo`."""
37
+ status = run.status.value
38
+ destination = run.destination_connection.name if run.destination_connection else None
39
+ return {
40
+ "id": run.id,
41
+ "status": style_status(status) if is_styled else status,
42
+ "ruleset": run.ruleset_name,
43
+ "source": run.source_connection.name,
44
+ "destination": destination,
45
+ "created": run.start_time.isoformat() if run.start_time else None,
46
+ }
47
+
48
+
49
+ def _format_run_dict(run_data: dict[str, object], *, is_styled: bool = False) -> dict[str, object]:
50
+ """Extract display fields from a raw run list-response dict (not yet modelled in dm-python 1.0.0)."""
51
+ status = str(run_data.get("status") or run_data.get("run_status") or "")
52
+ return {
53
+ "id": run_data.get("id"),
54
+ "status": style_status(status) if is_styled else status,
55
+ "ruleset": run_data.get("ruleset_name"),
56
+ "source": run_data.get("source_connection_name"),
57
+ "destination": run_data.get("destination_connection_name"),
58
+ "created": run_data.get("created_time"),
59
+ }
60
+
61
+
62
+ def _resolve_connection(client: DataMasqueClient, name_or_id: str) -> ConnectionConfig:
63
+ """Return the connection matching `name_or_id`, preferring name."""
64
+ connections = client.list_connections()
65
+
66
+ match = next((c for c in connections if c.name == name_or_id), None)
67
+ if match is not None:
68
+ return match
69
+
70
+ match = next((c for c in connections if str(c.id) == name_or_id), None)
71
+ if match is not None:
72
+ return match
73
+
74
+ available = ", ".join(c.name for c in connections)
75
+ abort(f"Connection '{name_or_id}' not found. Available: {available}")
76
+
77
+
78
+ def _resolve_connection_id(client: DataMasqueClient, name_or_id: str) -> str:
79
+ """Resolve a connection name to its UUID. Pass through if already a UUID."""
80
+ return str(_resolve_connection(client, name_or_id).id)
81
+
82
+
83
+ def _resolve_ruleset_id(client: DataMasqueClient, name_or_id: str, mask_type: str | None = None) -> str:
84
+ """Resolve a ruleset name to its UUID.
85
+
86
+ `mask_type` is the source connection's type when called from `run start`.
87
+ Providing it narrows the lookup to the matching namespace, which is what
88
+ disambiguates same-named rulesets across the database/file split.
89
+ """
90
+ rulesets = client.list_rulesets()
91
+ by_name = [r for r in rulesets if r.name == name_or_id]
92
+
93
+ if mask_type is not None:
94
+ by_name_and_type = [r for r in by_name if r.ruleset_type.value == mask_type]
95
+ if len(by_name_and_type) == 1:
96
+ return str(by_name_and_type[0].id)
97
+ if len(by_name_and_type) == 0 and by_name:
98
+ existing = ", ".join(f"{r.ruleset_type.value}" for r in by_name)
99
+ abort(
100
+ f"Ruleset '{name_or_id}' exists as {existing}, "
101
+ f"but a {mask_type} ruleset is required for this connection."
102
+ )
103
+
104
+ if len(by_name) == 1:
105
+ return str(by_name[0].id)
106
+ if len(by_name) > 1:
107
+ options = ", ".join(f"{r.ruleset_type.value}:{r.id}" for r in by_name)
108
+ abort(f"Multiple rulesets named '{name_or_id}' ({options}). Pass a UUID instead, or rename one of them.")
109
+
110
+ by_id = next((r for r in rulesets if str(r.id) == name_or_id), None)
111
+ if by_id is not None:
112
+ if mask_type is not None and by_id.ruleset_type.value != mask_type:
113
+ abort(
114
+ f"Ruleset {name_or_id} is a {by_id.ruleset_type.value} ruleset "
115
+ f"but a {mask_type} ruleset is required for this connection."
116
+ )
117
+ return name_or_id
118
+
119
+ available = ", ".join(r.name for r in rulesets)
120
+ abort(f"Ruleset '{name_or_id}' not found. Available: {available}")
121
+
122
+
123
+ def _coerce_option_value(value: str) -> object:
124
+ """Parse a string value into bool/int/float when it looks like one, else keep as string."""
125
+ if value.lower() == "true":
126
+ return True
127
+ if value.lower() == "false":
128
+ return False
129
+ try:
130
+ return int(value)
131
+ except ValueError:
132
+ pass
133
+ try:
134
+ return float(value)
135
+ except ValueError:
136
+ pass
137
+ return value
138
+
139
+
140
+ def _parse_options(pairs: list[str]) -> dict[str, object]:
141
+ """Turn a list of `key=value` strings into a dict, coercing values to bool/int/float/str."""
142
+ parsed: dict[str, object] = {}
143
+ for pair in pairs:
144
+ if "=" not in pair:
145
+ abort(f"--options expects key=value, got '{pair}'.")
146
+ key, _, raw = pair.partition("=")
147
+ parsed[key.strip()] = _coerce_option_value(raw.strip())
148
+ return parsed
149
+
150
+
151
+ @app.command("start")
152
+ def start_run(
153
+ connection: str = typer.Option(..., "--connection", "-c", help="Source connection name or ID"),
154
+ ruleset: str = typer.Option(..., "--ruleset", "-r", help="Ruleset name or ID"),
155
+ destination: str | None = typer.Option(None, "--destination", "-d", help="Destination connection (optional)"),
156
+ options: list[str] = typer.Option(
157
+ [],
158
+ "--options",
159
+ help="Run options as key=value (repeatable). E.g. --options batch_size=1000 --options dry_run=true",
160
+ ),
161
+ is_background: bool = typer.Option(
162
+ False, "--background", "-b", help="Return immediately without waiting for completion"
163
+ ),
164
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
165
+ is_json: bool = typer.Option(False, "--json", help="Output as JSON"),
166
+ ) -> None:
167
+ """Start a new masking run."""
168
+ client = get_client(profile)
169
+
170
+ source = _resolve_connection(client, connection)
171
+ ruleset_id = _resolve_ruleset_id(client, ruleset, mask_type=source.mask_type)
172
+
173
+ # Without `destination_connection` in the payload the server defaults
174
+ # `mask_type` to `database`, so a file source produces a confusing
175
+ # "source must be a database connection" error. Reject client-side.
176
+ if source.mask_type == "file" and destination is None:
177
+ abort(
178
+ f"File masking requires a destination connection. "
179
+ f"Pass --destination <name> (source '{source.name}' is a file-type connection)."
180
+ )
181
+
182
+ timestamp = datetime.now(tz=UTC).strftime("%Y%m%d_%H%M%S")
183
+ run_name = f"{source.name}_{timestamp}"
184
+
185
+ destination_id: str | None = None
186
+ if destination is not None:
187
+ dest = _resolve_connection(client, destination)
188
+ if dest.mask_type != source.mask_type:
189
+ abort(
190
+ f"Connection type mismatch: source '{source.name}' is {source.mask_type} "
191
+ f"but destination '{dest.name}' is {dest.mask_type}."
192
+ )
193
+ destination_id = str(dest.id)
194
+
195
+ run_request = MaskingRunRequest(
196
+ name=run_name,
197
+ connection=str(source.id),
198
+ ruleset=ruleset_id,
199
+ mask_type=source.mask_type,
200
+ destination_connection=destination_id,
201
+ options=MaskingRunOptions.model_validate(_parse_options(options)),
202
+ )
203
+ run_id = client.start_masking_run(run_request)
204
+ print_success(f"Run {run_id} started ({run_name}).")
205
+
206
+ if is_background:
207
+ if is_json:
208
+ print_json({"id": int(run_id), "status": "queued"})
209
+ return
210
+
211
+ _wait_for_run(client, run_id, is_json=is_json, ruleset=ruleset, connection=connection)
212
+
213
+
214
+ @app.command("status")
215
+ def run_status(
216
+ run_id: int = typer.Argument(help="Run ID"),
217
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
218
+ is_json: bool = typer.Option(False, "--json", help="Output as JSON"),
219
+ ) -> None:
220
+ """Get status of a masking run."""
221
+ client = get_client(profile)
222
+ run = client.get_run_info(RunId(run_id))
223
+ render_output(_format_run_info(run, is_styled=not is_json), is_json=is_json, title=f"Run {run_id}")
224
+
225
+
226
+ @app.command("list")
227
+ def list_runs(
228
+ status_filter: str | None = typer.Option(None, "--status", "-s", help="Filter by status"),
229
+ limit: int = typer.Option(20, "--limit", "-l", help="Maximum number of runs to show"),
230
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
231
+ is_json: bool = typer.Option(False, "--json", help="Output as JSON"),
232
+ ) -> None:
233
+ """List masking runs.
234
+
235
+ Without --status, lists the most recent runs.
236
+ With --status, filters to runs in that state (e.g. running, finished, failed).
237
+ """
238
+ client = get_client(profile)
239
+
240
+ # `list_runs` is not yet wrapped in datamasque-python; hit the endpoint directly.
241
+ params = []
242
+ if status_filter is not None:
243
+ params.append(f"run_status={status_filter}")
244
+ if limit is not None:
245
+ params.append(f"limit={limit}")
246
+ query = f"?{'&'.join(params)}" if params else ""
247
+ response = client.make_request("GET", f"/api/runs/{query}")
248
+ body = response.json()
249
+
250
+ # The API may return a paginated envelope or a flat list depending on version.
251
+ # Fall back to an empty list when the dict shape is missing `results`,
252
+ # otherwise the comprehension below would iterate over dict keys.
253
+ if isinstance(body, dict):
254
+ runs: list[dict[str, object]] = body.get("results", [])
255
+ else:
256
+ runs = body
257
+ data = [_format_run_dict(r, is_styled=not is_json) for r in runs]
258
+
259
+ render_output(
260
+ data,
261
+ is_json=is_json,
262
+ columns=["id", "status", "ruleset", "source", "destination", "created"],
263
+ title="Masking Runs",
264
+ )
265
+
266
+
267
+ @app.command("logs")
268
+ def run_logs(
269
+ run_id: int = typer.Argument(help="Run ID"),
270
+ follow: bool = typer.Option(False, "--follow", "-f", help="Stream logs until the run reaches a terminal state"),
271
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
272
+ is_json: bool = typer.Option(False, "--json", help="Output raw JSON"),
273
+ ) -> None:
274
+ """Show execution logs for a masking run.
275
+
276
+ With `--follow`, polls the log and status until the run finishes,
277
+ printing each new chunk as it appears.
278
+ """
279
+ client = get_client(profile)
280
+
281
+ if not follow:
282
+ log = client.get_run_log(RunId(run_id))
283
+ if is_json:
284
+ typer.echo(log)
285
+ else:
286
+ _print_pretty_logs(log)
287
+ return
288
+
289
+ printed = 0
290
+ while True:
291
+ log = client.get_run_log(RunId(run_id))
292
+ # Defend against server-side log rotation shrinking the buffer:
293
+ # reset the cursor rather than slicing past the end.
294
+ printed = min(printed, len(log))
295
+ if len(log) > printed:
296
+ chunk = log[printed:]
297
+ if is_json:
298
+ typer.echo(chunk, nl=False)
299
+ else:
300
+ _print_pretty_logs(chunk)
301
+ printed = len(log)
302
+
303
+ info = client.get_run_info(RunId(run_id))
304
+ if info.status.is_in_final_state:
305
+ return
306
+ time.sleep(_POLL_INTERVAL_SECONDS)
307
+
308
+
309
+ @app.command("cancel")
310
+ def cancel_run(
311
+ run_id: int = typer.Argument(help="Run ID to cancel"),
312
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
313
+ ) -> None:
314
+ """Cancel a running masking run."""
315
+ client = get_client(profile)
316
+ try:
317
+ client.cancel_run(RunId(run_id))
318
+ except RunNotCancellableError as exc:
319
+ abort(str(exc))
320
+ print_success(f"Run {run_id} cancellation requested.")
321
+
322
+
323
+ @app.command("report")
324
+ def run_report(
325
+ run_id: int = typer.Argument(help="Run ID"),
326
+ output: Path | None = typer.Option(None, "--output", "-o", help="Write CSV to this path"),
327
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
328
+ ) -> None:
329
+ """Download the masking run report (CSV) for a completed run."""
330
+ client = get_client(profile)
331
+ try:
332
+ report = client.get_run_report(RunId(run_id))
333
+ except DataMasqueApiError as exc:
334
+ # Reports are POSTed by the agent-worker as it finishes the run, so
335
+ # `GET .../run-report/` 404s for runs that didn't produce one
336
+ # (still in flight, failed early, or a run type that doesn't emit a
337
+ # report). The default error string is opaque, so name the cause.
338
+ if exc.response is not None and exc.response.status_code == _HTTP_NOT_FOUND:
339
+ abort(
340
+ f"No report available for run {run_id}. Reports are generated by the worker "
341
+ f"once the run reaches a final state — check status with `dm run status {run_id}`."
342
+ )
343
+ raise
344
+
345
+ if output is None:
346
+ typer.echo(report)
347
+ else:
348
+ output.write_text(report)
349
+ print_success(f"Run report written to {output}")
350
+
351
+
352
+ # `MaskingRunOptions` declares `extra="forbid"`; the server can echo back keys
353
+ # on read (e.g. `has_run_secret`) that it won't accept on create, and may add
354
+ # more in future. Drop anything the model doesn't know about — and the
355
+ # server-managed `run_secret` so a fresh per-run key is generated on retry.
356
+ _VALID_OPTION_KEYS = set(MaskingRunOptions.model_fields.keys())
357
+ _SERVER_MANAGED_OPTION_KEYS = frozenset({"run_secret"})
358
+
359
+
360
+ @app.command("retry")
361
+ def retry_run(
362
+ run_id: int = typer.Argument(help="Run ID to retry"),
363
+ is_background: bool = typer.Option(
364
+ False, "--background", "-b", help="Return immediately without waiting for completion"
365
+ ),
366
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
367
+ is_json: bool = typer.Option(False, "--json", help="Output as JSON"),
368
+ ) -> None:
369
+ """Start a new run with the same source, ruleset, destination, and options as an existing one.
370
+
371
+ Useful for re-running a failed or cancelled masking job. The original
372
+ run's config is read back from the server so any manual edits to the
373
+ connection or ruleset since then are picked up automatically.
374
+ """
375
+ client = get_client(profile)
376
+ original = client.get_run_info(RunId(run_id))
377
+
378
+ source_id = original.source_connection.id
379
+ ruleset_id = original.ruleset
380
+ if not source_id or not ruleset_id:
381
+ abort(f"Run {run_id} is missing source or ruleset — cannot retry.")
382
+
383
+ timestamp = datetime.now(tz=UTC).strftime("%Y%m%d_%H%M%S")
384
+ run_name = f"{original.source_connection.name or source_id}_retry_{timestamp}"
385
+
386
+ original_options = original.options or {}
387
+ options = {
388
+ k: v for k, v in original_options.items() if k in _VALID_OPTION_KEYS and k not in _SERVER_MANAGED_OPTION_KEYS
389
+ }
390
+
391
+ destination_id = original.destination_connection.id if original.destination_connection else None
392
+
393
+ run_request = MaskingRunRequest(
394
+ name=run_name,
395
+ connection=str(source_id),
396
+ ruleset=str(ruleset_id),
397
+ mask_type=original.mask_type,
398
+ destination_connection=str(destination_id) if destination_id else None,
399
+ options=MaskingRunOptions.model_validate(options),
400
+ )
401
+ new_run_id = client.start_masking_run(run_request)
402
+ print_success(f"Run {new_run_id} started (retry of {run_id}, {run_name}).")
403
+
404
+ if is_background:
405
+ if is_json:
406
+ print_json({"id": int(new_run_id), "status": "queued"})
407
+ return
408
+
409
+ _wait_for_run(client, new_run_id, is_json=is_json)
410
+
411
+
412
+ @app.command("wait")
413
+ def wait_run(
414
+ run_id: int = typer.Argument(help="Run ID to wait for"),
415
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
416
+ is_json: bool = typer.Option(False, "--json", help="Output final status as JSON"),
417
+ ) -> None:
418
+ """Block until a masking run reaches a terminal state.
419
+
420
+ Exits 0 on success (finished/finished_with_warnings), 1 on failure/cancellation.
421
+ """
422
+ client = get_client(profile)
423
+ _wait_for_run(client, RunId(run_id), is_json=is_json)
424
+
425
+
426
+ def _wait_for_run(
427
+ client: DataMasqueClient,
428
+ run_id: RunId,
429
+ *,
430
+ is_json: bool,
431
+ ruleset: str | None = None,
432
+ connection: str | None = None,
433
+ ) -> None:
434
+ """Poll until the run reaches a terminal state, then report result."""
435
+ run: RunInfo | None = None
436
+ started_at = time.monotonic()
437
+
438
+ with console.status(f"Waiting for run {run_id}...") as spinner:
439
+ while True:
440
+ run = client.get_run_info(run_id)
441
+ if run.status.is_in_final_state:
442
+ break
443
+ spinner.update(f"Run {run_id}: {run.status.value}")
444
+ time.sleep(_POLL_INTERVAL_SECONDS)
445
+
446
+ elapsed = time.monotonic() - started_at
447
+ duration = _format_duration(int(elapsed))
448
+
449
+ if is_json:
450
+ print_json(_format_run_info(run))
451
+
452
+ status = run.status
453
+ summary_parts = [f"Run {run_id} {status.value} in {duration}"]
454
+ if ruleset:
455
+ summary_parts.append(f"ruleset: {ruleset}")
456
+ if connection:
457
+ summary_parts.append(f"source: {connection}")
458
+
459
+ summary = summary_parts[0]
460
+ if len(summary_parts) > 1:
461
+ summary += f" ({', '.join(summary_parts[1:])})"
462
+
463
+ if status.is_finished:
464
+ print_success(summary)
465
+ else:
466
+ print_error(summary)
467
+ raise SystemExit(1)
468
+
469
+
470
+ _SECONDS_PER_MINUTE = 60
471
+ _MINUTES_PER_HOUR = 60
472
+
473
+
474
+ def _format_duration(total_seconds: int) -> str:
475
+ """Format seconds into a human-readable duration string."""
476
+ if total_seconds < _SECONDS_PER_MINUTE:
477
+ return f"{total_seconds}s"
478
+
479
+ minutes, seconds = divmod(total_seconds, _SECONDS_PER_MINUTE)
480
+ if minutes < _MINUTES_PER_HOUR:
481
+ return f"{minutes}m {seconds}s"
482
+
483
+ hours, minutes = divmod(minutes, _MINUTES_PER_HOUR)
484
+ return f"{hours}h {minutes}m {seconds}s"
485
+
486
+
487
+ _LOG_LEVEL_LABELS = {
488
+ 10: ("DEBUG", "dim"),
489
+ 20: ("INFO", "green"),
490
+ 30: ("WARN", "yellow"),
491
+ 40: ("ERROR", "red"),
492
+ 50: ("FATAL", "bold red"),
493
+ }
494
+
495
+
496
+ def _print_pretty_logs(raw_log: str) -> None:
497
+ """Parse JSON log entries and print them in a human-readable format."""
498
+ try:
499
+ entries = json.loads(raw_log)
500
+ except json.JSONDecodeError:
501
+ # Not JSON — just print raw
502
+ typer.echo(raw_log)
503
+ return
504
+
505
+ if not isinstance(entries, list):
506
+ entries = [entries]
507
+
508
+ for entry in entries:
509
+ ts = entry.get("timestamp", "")
510
+ level = entry.get("log_level", 20)
511
+ message = entry.get("message", "")
512
+
513
+ label, style = _LOG_LEVEL_LABELS.get(level, ("INFO", "green"))
514
+
515
+ # Truncate the timestamp to seconds
516
+ if "." in ts:
517
+ ts = ts[: ts.index(".")]
518
+ ts = ts.replace("T", " ")
519
+
520
+ # Escape rich markup in the message, and indent continuation lines
521
+ escaped = message.replace("[", "\\[")
522
+ lines = escaped.split("\n")
523
+ first_line = lines[0]
524
+ stdout_console.print(f"[dim]{ts}[/dim] [{style}]{label:5}[/{style}] {first_line}")
525
+ for continuation in lines[1:]:
526
+ stdout_console.print(f"{'':>27}{continuation}")
@@ -0,0 +1,56 @@
1
+ """Seed file management commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ import typer
8
+ from datamasque.client.models.files import SeedFile
9
+
10
+ from datamasque_cli.client import get_client
11
+ from datamasque_cli.output import abort, print_success, render_output
12
+
13
+ app = typer.Typer(help="Manage seed files.")
14
+
15
+
16
+ @app.command("list")
17
+ def list_seeds(
18
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
19
+ is_json: bool = typer.Option(False, "--json", help="Output as JSON"),
20
+ ) -> None:
21
+ """List all seed files."""
22
+ client = get_client(profile)
23
+ seeds = client.list_files_of_type(SeedFile)
24
+
25
+ data = [{"id": s.id, "name": s.name} for s in seeds]
26
+ render_output(data, is_json=is_json, columns=["id", "name"], title="Seed Files")
27
+
28
+
29
+ @app.command("upload")
30
+ def upload_seed(
31
+ file: Path = typer.Argument(help="Path to seed file", exists=True, readable=True),
32
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
33
+ ) -> None:
34
+ """Upload a seed file."""
35
+ client = get_client(profile)
36
+ client.upload_file(SeedFile, file.name, file)
37
+ print_success(f"Seed file '{file.name}' uploaded.")
38
+
39
+
40
+ @app.command("delete")
41
+ def delete_seed(
42
+ filename: str = typer.Argument(help="Seed filename to delete"),
43
+ profile: str | None = typer.Option(None, "--profile", "-p", help="Profile to use"),
44
+ is_confirmed: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation"),
45
+ ) -> None:
46
+ """Delete a seed file by filename."""
47
+ client = get_client(profile)
48
+ match = client.get_file_of_type_by_name(SeedFile, filename)
49
+ if match is None:
50
+ abort(f"Seed file '{filename}' not found.")
51
+
52
+ if not is_confirmed:
53
+ typer.confirm(f"Delete seed file '{filename}'?", abort=True)
54
+
55
+ client.delete_file_if_exists(match)
56
+ print_success(f"Seed file '{filename}' deleted.")