dataforge-07 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. dataforge/__init__.py +204 -0
  2. dataforge/__main__.py +5 -0
  3. dataforge/agent/__init__.py +16 -0
  4. dataforge/agent/providers.py +259 -0
  5. dataforge/agent/scratchpad.py +183 -0
  6. dataforge/agent/tool_actions.py +343 -0
  7. dataforge/bench/__init__.py +31 -0
  8. dataforge/bench/core.py +426 -0
  9. dataforge/bench/groq_client.py +386 -0
  10. dataforge/bench/methods.py +443 -0
  11. dataforge/bench/report.py +309 -0
  12. dataforge/bench/runner.py +247 -0
  13. dataforge/causal/__init__.py +21 -0
  14. dataforge/causal/dag.py +174 -0
  15. dataforge/causal/pc.py +232 -0
  16. dataforge/causal/root_cause.py +193 -0
  17. dataforge/cli/__init__.py +50 -0
  18. dataforge/cli/audit.py +70 -0
  19. dataforge/cli/bench.py +154 -0
  20. dataforge/cli/common.py +267 -0
  21. dataforge/cli/constraints.py +407 -0
  22. dataforge/cli/profile.py +147 -0
  23. dataforge/cli/release.py +166 -0
  24. dataforge/cli/repair.py +407 -0
  25. dataforge/cli/revert.py +139 -0
  26. dataforge/cli/watch.py +144 -0
  27. dataforge/datasets/__init__.py +25 -0
  28. dataforge/datasets/embedded/hospital/clean.csv +11 -0
  29. dataforge/datasets/embedded/hospital/dirty.csv +11 -0
  30. dataforge/datasets/real_world.py +290 -0
  31. dataforge/datasets/registry.py +103 -0
  32. dataforge/detectors/__init__.py +80 -0
  33. dataforge/detectors/base.py +145 -0
  34. dataforge/detectors/decimal_shift.py +166 -0
  35. dataforge/detectors/fd_violation.py +157 -0
  36. dataforge/detectors/type_mismatch.py +173 -0
  37. dataforge/engine/__init__.py +39 -0
  38. dataforge/engine/repair.py +905 -0
  39. dataforge/env/__init__.py +22 -0
  40. dataforge/env/environment.py +883 -0
  41. dataforge/env/observation.py +61 -0
  42. dataforge/env/openenv_core.py +161 -0
  43. dataforge/env/reward.py +128 -0
  44. dataforge/env/server.py +176 -0
  45. dataforge/evaluation_contract.py +76 -0
  46. dataforge/fixtures/hospital_10rows.csv +11 -0
  47. dataforge/fixtures/hospital_schema.yaml +17 -0
  48. dataforge/http/__init__.py +1 -0
  49. dataforge/http/problem.py +103 -0
  50. dataforge/integrations/__init__.py +1 -0
  51. dataforge/integrations/dbt.py +164 -0
  52. dataforge/observability.py +76 -0
  53. dataforge/py.typed +1 -0
  54. dataforge/release/__init__.py +1 -0
  55. dataforge/release/doctor.py +367 -0
  56. dataforge/release/full_vision.py +702 -0
  57. dataforge/release/gate.py +861 -0
  58. dataforge/release/playground_check.py +411 -0
  59. dataforge/repair_contract.py +468 -0
  60. dataforge/repairers/__init__.py +88 -0
  61. dataforge/repairers/base.py +77 -0
  62. dataforge/repairers/decimal_shift.py +43 -0
  63. dataforge/repairers/fd_violation.py +225 -0
  64. dataforge/repairers/type_mismatch.py +73 -0
  65. dataforge/safety/__init__.py +5 -0
  66. dataforge/safety/adversarial/attack_01_phone_pii.yaml +8 -0
  67. dataforge/safety/adversarial/attack_02_phone_pii.yaml +8 -0
  68. dataforge/safety/adversarial/attack_03_phone_pii.yaml +8 -0
  69. dataforge/safety/adversarial/attack_04_phone_pii.yaml +8 -0
  70. dataforge/safety/adversarial/attack_05_phone_pii.yaml +8 -0
  71. dataforge/safety/adversarial/attack_06_phone_pii.yaml +8 -0
  72. dataforge/safety/adversarial/attack_07_phone_pii.yaml +8 -0
  73. dataforge/safety/adversarial/attack_08_phone_pii.yaml +8 -0
  74. dataforge/safety/adversarial/attack_09_phone_pii.yaml +8 -0
  75. dataforge/safety/adversarial/attack_10_phone_pii.yaml +8 -0
  76. dataforge/safety/adversarial/attack_11_ssn_pii.yaml +8 -0
  77. dataforge/safety/adversarial/attack_12_ssn_pii.yaml +8 -0
  78. dataforge/safety/adversarial/attack_13_ssn_pii.yaml +8 -0
  79. dataforge/safety/adversarial/attack_14_ssn_pii.yaml +8 -0
  80. dataforge/safety/adversarial/attack_15_ssn_pii.yaml +8 -0
  81. dataforge/safety/adversarial/attack_16_ssn_pii.yaml +8 -0
  82. dataforge/safety/adversarial/attack_17_ssn_pii.yaml +8 -0
  83. dataforge/safety/adversarial/attack_18_ssn_pii.yaml +8 -0
  84. dataforge/safety/adversarial/attack_19_ssn_pii.yaml +8 -0
  85. dataforge/safety/adversarial/attack_20_ssn_pii.yaml +8 -0
  86. dataforge/safety/adversarial/attack_21_email_pii.yaml +8 -0
  87. dataforge/safety/adversarial/attack_22_email_pii.yaml +8 -0
  88. dataforge/safety/adversarial/attack_23_email_pii.yaml +8 -0
  89. dataforge/safety/adversarial/attack_24_email_pii.yaml +8 -0
  90. dataforge/safety/adversarial/attack_25_email_pii.yaml +8 -0
  91. dataforge/safety/adversarial/attack_26_email_pii.yaml +8 -0
  92. dataforge/safety/adversarial/attack_27_email_pii.yaml +8 -0
  93. dataforge/safety/adversarial/attack_28_email_pii.yaml +8 -0
  94. dataforge/safety/adversarial/attack_29_email_pii.yaml +8 -0
  95. dataforge/safety/adversarial/attack_30_email_pii.yaml +8 -0
  96. dataforge/safety/adversarial/attack_31_row_delete.yaml +7 -0
  97. dataforge/safety/adversarial/attack_32_row_delete.yaml +8 -0
  98. dataforge/safety/adversarial/attack_33_row_delete.yaml +7 -0
  99. dataforge/safety/adversarial/attack_34_row_delete.yaml +7 -0
  100. dataforge/safety/adversarial/attack_35_row_delete.yaml +7 -0
  101. dataforge/safety/adversarial/attack_36_row_delete.yaml +11 -0
  102. dataforge/safety/adversarial/attack_37_row_delete.yaml +7 -0
  103. dataforge/safety/adversarial/attack_38_row_delete.yaml +7 -0
  104. dataforge/safety/adversarial/attack_39_row_delete.yaml +8 -0
  105. dataforge/safety/adversarial/attack_40_row_delete.yaml +7 -0
  106. dataforge/safety/adversarial/attack_41_row_delete.yaml +7 -0
  107. dataforge/safety/adversarial/attack_42_row_delete.yaml +7 -0
  108. dataforge/safety/adversarial/attack_43_row_delete.yaml +7 -0
  109. dataforge/safety/adversarial/attack_44_row_delete.yaml +7 -0
  110. dataforge/safety/adversarial/attack_45_row_delete.yaml +8 -0
  111. dataforge/safety/adversarial/attack_46_row_delete.yaml +8 -0
  112. dataforge/safety/adversarial/attack_47_row_delete.yaml +7 -0
  113. dataforge/safety/adversarial/attack_48_row_delete.yaml +7 -0
  114. dataforge/safety/adversarial/attack_49_row_delete.yaml +8 -0
  115. dataforge/safety/adversarial/attack_50_row_delete.yaml +7 -0
  116. dataforge/safety/constitution.py +307 -0
  117. dataforge/safety/constitutions/default.yaml +40 -0
  118. dataforge/safety/filter.py +134 -0
  119. dataforge/schema_inference.py +620 -0
  120. dataforge/stores/__init__.py +46 -0
  121. dataforge/stores/base.py +73 -0
  122. dataforge/stores/cloud.py +78 -0
  123. dataforge/stores/csv.py +94 -0
  124. dataforge/stores/duckdb.py +313 -0
  125. dataforge/stores/patch_plan.py +178 -0
  126. dataforge/stores/registry.py +82 -0
  127. dataforge/stores/repair.py +121 -0
  128. dataforge/stores/revert.py +22 -0
  129. dataforge/stores/sql.py +27 -0
  130. dataforge/table.py +228 -0
  131. dataforge/transactions/__init__.py +34 -0
  132. dataforge/transactions/files.py +96 -0
  133. dataforge/transactions/log.py +613 -0
  134. dataforge/transactions/revert.py +102 -0
  135. dataforge/transactions/txn.py +104 -0
  136. dataforge/ui/__init__.py +1 -0
  137. dataforge/ui/profile_view.py +136 -0
  138. dataforge/ui/repair_diff.py +91 -0
  139. dataforge/verifier/__init__.py +55 -0
  140. dataforge/verifier/constraint_ir.py +155 -0
  141. dataforge/verifier/explain.py +47 -0
  142. dataforge/verifier/gate.py +5 -0
  143. dataforge/verifier/schema.py +111 -0
  144. dataforge/verifier/smt.py +433 -0
  145. dataforge_07-0.1.0.dist-info/METADATA +436 -0
  146. dataforge_07-0.1.0.dist-info/RECORD +150 -0
  147. dataforge_07-0.1.0.dist-info/WHEEL +5 -0
  148. dataforge_07-0.1.0.dist-info/entry_points.txt +3 -0
  149. dataforge_07-0.1.0.dist-info/licenses/LICENSE +176 -0
  150. dataforge_07-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,407 @@
1
+ """CLI subcommands for reviewing inferred constraint artifacts."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from collections import Counter
7
+ from pathlib import Path
8
+ from typing import Annotated, Any
9
+
10
+ import typer
11
+ from rich.console import Console
12
+ from rich.panel import Panel
13
+ from rich.table import Table
14
+ from textual import on
15
+ from textual.app import App, ComposeResult
16
+ from textual.binding import Binding
17
+ from textual.containers import Horizontal, Vertical
18
+ from textual.widgets import DataTable, Footer, Header, Input, Static
19
+
20
+ from dataforge.cli.common import resolve_cli_path
21
+ from dataforge.schema_inference import (
22
+ REPAIR_SUPPORTED_CONSTRAINT_KINDS,
23
+ ConstraintDecision,
24
+ ConstraintReviewArtifact,
25
+ ConstraintReviewError,
26
+ load_constraint_review_artifact,
27
+ update_constraint_review_artifact,
28
+ write_constraint_review_artifact_atomic,
29
+ )
30
+
31
+ constraints_app = typer.Typer(
32
+ help="Review inferred profile constraints before repair can use them.",
33
+ no_args_is_help=True,
34
+ )
35
+ _console = Console(stderr=True)
36
+
37
+
38
+ def _candidate_target(candidate: Any) -> str:
39
+ """Return a compact target description for one candidate."""
40
+ columns = ", ".join(candidate.columns)
41
+ if candidate.dependent:
42
+ return f"{columns} -> {candidate.dependent}"
43
+ return columns
44
+
45
+
46
+ def _candidate_summary(reviewed: Any) -> dict[str, Any]:
47
+ """Return a machine-readable review summary for one candidate."""
48
+ candidate = reviewed.candidate
49
+ return {
50
+ "candidate_id": reviewed.candidate_id,
51
+ "decision": reviewed.decision,
52
+ "kind": candidate.kind,
53
+ "target": _candidate_target(candidate),
54
+ "confidence": candidate.confidence,
55
+ "repair_supported": candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS,
56
+ "evidence": candidate.evidence,
57
+ "review_note": reviewed.review_note,
58
+ }
59
+
60
+
61
+ def _artifact_summary(
62
+ artifact: ConstraintReviewArtifact,
63
+ *,
64
+ path: Path,
65
+ sha256: str | None = None,
66
+ ) -> dict[str, Any]:
67
+ """Return a stable summary payload for CLI and CI consumers."""
68
+ decision_counts = Counter(reviewed.decision for reviewed in artifact.candidates)
69
+ repair_supported_count = sum(
70
+ 1
71
+ for reviewed in artifact.candidates
72
+ if reviewed.candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS
73
+ )
74
+ return {
75
+ "path": str(path),
76
+ "schema_version": artifact.schema_version,
77
+ "source_path": artifact.source_path,
78
+ "source_sha256": artifact.source_sha256,
79
+ "row_count": artifact.row_count,
80
+ "candidate_count": len(artifact.candidates),
81
+ "repair_supported_count": repair_supported_count,
82
+ "decision_counts": {
83
+ "accepted": decision_counts.get("accepted", 0),
84
+ "pending": decision_counts.get("pending", 0),
85
+ "rejected": decision_counts.get("rejected", 0),
86
+ },
87
+ "sha256": sha256,
88
+ "candidates": [_candidate_summary(reviewed) for reviewed in artifact.candidates],
89
+ }
90
+
91
+
92
+ def _parse_notes(raw_notes: list[str] | None) -> dict[str, str | None]:
93
+ """Parse repeated ``--note cnd-id=text`` options."""
94
+ parsed: dict[str, str | None] = {}
95
+ for raw_note in raw_notes or []:
96
+ candidate_id, separator, note = raw_note.partition("=")
97
+ if not separator or not candidate_id:
98
+ raise typer.BadParameter("--note must use the form cnd-...=text")
99
+ parsed[candidate_id] = note or None
100
+ return parsed
101
+
102
+
103
+ def _print_review_table(artifact: ConstraintReviewArtifact) -> None:
104
+ """Render a compact non-interactive review table."""
105
+ table = Table(title="Constraint Review")
106
+ table.add_column("Candidate ID", overflow="fold")
107
+ table.add_column("Decision")
108
+ table.add_column("Kind")
109
+ table.add_column("Target", overflow="fold")
110
+ table.add_column("Confidence", justify="right")
111
+ table.add_column("Repair")
112
+ table.add_column("Evidence", overflow="fold")
113
+ for reviewed in artifact.candidates:
114
+ candidate = reviewed.candidate
115
+ table.add_row(
116
+ reviewed.candidate_id,
117
+ reviewed.decision,
118
+ candidate.kind,
119
+ _candidate_target(candidate),
120
+ f"{candidate.confidence:.4f}",
121
+ "yes" if candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS else "review-only",
122
+ candidate.evidence,
123
+ )
124
+ Console().print(table)
125
+
126
+
127
+ class ConstraintReviewApp(App[ConstraintReviewArtifact]):
128
+ """Textual review UI for a constraint artifact."""
129
+
130
+ CSS = """
131
+ DataTable {
132
+ height: 1fr;
133
+ }
134
+
135
+ #detail {
136
+ width: 45%;
137
+ height: 1fr;
138
+ overflow-y: auto;
139
+ border: solid $accent;
140
+ padding: 1;
141
+ }
142
+
143
+ #note {
144
+ height: 3;
145
+ }
146
+ """
147
+
148
+ BINDINGS = [
149
+ Binding("a", "accept", "Accept"),
150
+ Binding("r", "reject", "Reject"),
151
+ Binding("p", "pending", "Pending"),
152
+ Binding("n", "focus_note", "Note"),
153
+ Binding("s", "save", "Save"),
154
+ Binding("q", "quit_without_save", "Quit"),
155
+ ]
156
+
157
+ def __init__(self, artifact: ConstraintReviewArtifact) -> None:
158
+ """Create a review application for an already validated artifact."""
159
+ super().__init__()
160
+ self.artifact = artifact
161
+ self.saved = False
162
+ self.selected_candidate_id = (
163
+ artifact.candidates[0].candidate_id if artifact.candidates else None
164
+ )
165
+
166
+ def compose(self) -> ComposeResult:
167
+ """Compose the review screen."""
168
+ yield Header()
169
+ with Vertical():
170
+ with Horizontal():
171
+ yield DataTable(id="candidates")
172
+ yield Static(id="detail")
173
+ yield Input(
174
+ placeholder="Review note for selected candidate; press Enter to save note",
175
+ id="note",
176
+ )
177
+ yield Footer()
178
+
179
+ def on_mount(self) -> None:
180
+ """Populate the table when the TUI starts."""
181
+ table = self.query_one("#candidates", DataTable)
182
+ table.cursor_type = "row"
183
+ table.add_columns("ID", "Decision", "Kind", "Target", "Conf", "Repair")
184
+ for reviewed in self.artifact.candidates:
185
+ candidate = reviewed.candidate
186
+ table.add_row(
187
+ reviewed.candidate_id,
188
+ reviewed.decision,
189
+ candidate.kind,
190
+ _candidate_target(candidate),
191
+ f"{candidate.confidence:.4f}",
192
+ "yes" if candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS else "review-only",
193
+ key=reviewed.candidate_id,
194
+ )
195
+ table.focus()
196
+ self._refresh_detail()
197
+
198
+ @on(DataTable.RowHighlighted)
199
+ def _on_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
200
+ """Track the currently highlighted candidate."""
201
+ self.selected_candidate_id = str(event.row_key.value)
202
+ self._refresh_detail()
203
+
204
+ @on(Input.Submitted, "#note")
205
+ def _on_note_submitted(self, event: Input.Submitted) -> None:
206
+ """Save a note for the selected candidate."""
207
+ if self.selected_candidate_id is None:
208
+ return
209
+ self.artifact = update_constraint_review_artifact(
210
+ self.artifact,
211
+ notes={self.selected_candidate_id: event.value},
212
+ )
213
+ event.input.value = ""
214
+ self._refresh_table()
215
+ self._refresh_detail()
216
+
217
+ def action_accept(self) -> None:
218
+ """Accept the selected candidate."""
219
+ self._set_selected_decision("accepted")
220
+
221
+ def action_reject(self) -> None:
222
+ """Reject the selected candidate."""
223
+ self._set_selected_decision("rejected")
224
+
225
+ def action_pending(self) -> None:
226
+ """Reset the selected candidate to pending."""
227
+ self._set_selected_decision("pending")
228
+
229
+ def action_focus_note(self) -> None:
230
+ """Focus the note editor."""
231
+ self.query_one("#note", Input).focus()
232
+
233
+ def action_save(self) -> None:
234
+ """Exit the TUI with the reviewed artifact."""
235
+ self.saved = True
236
+ self.exit(self.artifact)
237
+
238
+ def action_quit_without_save(self) -> None:
239
+ """Exit the TUI without saving."""
240
+ self.saved = False
241
+ self.exit(self.artifact)
242
+
243
+ def _set_selected_decision(self, decision: ConstraintDecision) -> None:
244
+ """Apply a decision to the selected candidate."""
245
+ if self.selected_candidate_id is None:
246
+ return
247
+ if decision == "accepted":
248
+ self.artifact = update_constraint_review_artifact(
249
+ self.artifact,
250
+ accept_ids=(self.selected_candidate_id,),
251
+ )
252
+ elif decision == "rejected":
253
+ self.artifact = update_constraint_review_artifact(
254
+ self.artifact,
255
+ reject_ids=(self.selected_candidate_id,),
256
+ )
257
+ else:
258
+ self.artifact = update_constraint_review_artifact(
259
+ self.artifact,
260
+ pending_ids=(self.selected_candidate_id,),
261
+ )
262
+ self._refresh_table()
263
+ self._refresh_detail()
264
+
265
+ def _selected_candidate(self) -> Any | None:
266
+ """Return the selected reviewed candidate."""
267
+ for reviewed in self.artifact.candidates:
268
+ if reviewed.candidate_id == self.selected_candidate_id:
269
+ return reviewed
270
+ return None
271
+
272
+ def _refresh_table(self) -> None:
273
+ """Refresh table rows after a decision change."""
274
+ table = self.query_one("#candidates", DataTable)
275
+ table.clear(columns=True)
276
+ table.add_columns("ID", "Decision", "Kind", "Target", "Conf", "Repair")
277
+ for reviewed in self.artifact.candidates:
278
+ candidate = reviewed.candidate
279
+ table.add_row(
280
+ reviewed.candidate_id,
281
+ reviewed.decision,
282
+ candidate.kind,
283
+ _candidate_target(candidate),
284
+ f"{candidate.confidence:.4f}",
285
+ "yes" if candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS else "review-only",
286
+ key=reviewed.candidate_id,
287
+ )
288
+
289
+ def _refresh_detail(self) -> None:
290
+ """Refresh the detail pane for the selected candidate."""
291
+ reviewed = self._selected_candidate()
292
+ detail = self.query_one("#detail", Static)
293
+ if reviewed is None:
294
+ detail.update("No constraint candidates.")
295
+ return
296
+ candidate = reviewed.candidate
297
+ repair_note = (
298
+ "Repair-supported in v1."
299
+ if candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS
300
+ else "Review-only in v1; repair ignores this kind."
301
+ )
302
+ payload = json.dumps(reviewed.model_dump(mode="json"), indent=2, sort_keys=True)
303
+ detail.update(
304
+ "\n".join(
305
+ [
306
+ f"Candidate: {reviewed.candidate_id}",
307
+ f"Decision: {reviewed.decision}",
308
+ f"Source: {self.artifact.source_path}",
309
+ f"Source SHA-256: {self.artifact.source_sha256}",
310
+ f"Repair: {repair_note}",
311
+ "",
312
+ "Candidate JSON:",
313
+ payload,
314
+ ]
315
+ )
316
+ )
317
+
318
+
319
+ @constraints_app.command(name="review")
320
+ def review_constraints(
321
+ path: Annotated[
322
+ Path,
323
+ typer.Argument(help="Path to a constraint_review_v1 JSON artifact."),
324
+ ],
325
+ accept: Annotated[
326
+ list[str] | None,
327
+ typer.Option("--accept", help="Mark a candidate id accepted. Repeatable."),
328
+ ] = None,
329
+ reject: Annotated[
330
+ list[str] | None,
331
+ typer.Option("--reject", help="Mark a candidate id rejected. Repeatable."),
332
+ ] = None,
333
+ pending: Annotated[
334
+ list[str] | None,
335
+ typer.Option("--pending", help="Reset a candidate id to pending. Repeatable."),
336
+ ] = None,
337
+ note: Annotated[
338
+ list[str] | None,
339
+ typer.Option("--note", help="Set a review note with cnd-...=text. Repeatable."),
340
+ ] = None,
341
+ output: Annotated[
342
+ Path | None,
343
+ typer.Option("--output", help="Write the reviewed artifact to a separate path."),
344
+ ] = None,
345
+ dry_run: Annotated[
346
+ bool,
347
+ typer.Option("--dry-run", help="Preview changes without writing an artifact."),
348
+ ] = False,
349
+ json_output: Annotated[
350
+ bool,
351
+ typer.Option("--json", help="Print review state as JSON."),
352
+ ] = False,
353
+ no_tui: Annotated[
354
+ bool,
355
+ typer.Option("--no-tui", help="Run deterministic non-interactive review mode."),
356
+ ] = False,
357
+ ) -> None:
358
+ """Review profile-inferred constraint candidates before repair uses them."""
359
+ resolved_path = resolve_cli_path(path)
360
+ try:
361
+ artifact, artifact_sha256 = load_constraint_review_artifact(resolved_path)
362
+ parsed_notes = _parse_notes(note)
363
+ updated = update_constraint_review_artifact(
364
+ artifact,
365
+ accept_ids=tuple(accept or ()),
366
+ reject_ids=tuple(reject or ()),
367
+ pending_ids=tuple(pending or ()),
368
+ notes=parsed_notes,
369
+ )
370
+ except (ConstraintReviewError, typer.BadParameter) as exc:
371
+ _console.print(Panel(f"[bold red]{exc}[/bold red]", title="Constraint Review Error"))
372
+ raise typer.Exit(code=2) from exc
373
+
374
+ non_interactive = no_tui or bool(
375
+ accept or reject or pending or note or output or dry_run or json_output
376
+ )
377
+ if not non_interactive:
378
+ app = ConstraintReviewApp(updated)
379
+ tui_result = app.run()
380
+ if tui_result is None or not app.saved:
381
+ _console.print("[yellow]Constraint review cancelled; no file was changed.[/yellow]")
382
+ raise typer.Exit(code=1)
383
+ updated = tui_result
384
+
385
+ target_path = resolve_cli_path(output) if output is not None else resolved_path
386
+ written_sha256: str | None = artifact_sha256
387
+ if dry_run:
388
+ written_sha256 = None
389
+ elif updated != artifact or target_path != resolved_path:
390
+ try:
391
+ written_sha256 = write_constraint_review_artifact_atomic(target_path, updated)
392
+ except OSError as exc:
393
+ _console.print(Panel(f"[bold red]{exc}[/bold red]", title="Constraint Review Error"))
394
+ raise typer.Exit(code=2) from exc
395
+
396
+ if json_output:
397
+ typer.echo(
398
+ json.dumps(
399
+ _artifact_summary(updated, path=target_path, sha256=written_sha256),
400
+ indent=2,
401
+ sort_keys=True,
402
+ )
403
+ )
404
+ else:
405
+ _print_review_table(updated)
406
+ if dry_run:
407
+ _console.print("[yellow]Dry run: no constraint artifact was written.[/yellow]")
@@ -0,0 +1,147 @@
1
+ """CLI subcommand: ``dataforge profile <path> [--schema <yaml>]``.
2
+
3
+ Reads a CSV file, runs all detectors, and renders detected issues as a
4
+ rich-formatted terminal table. Diagnostics exit 0 by default; use
5
+ ``--fail-on`` for CI gating.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ from collections.abc import Sequence
12
+ from hashlib import sha256
13
+ from pathlib import Path
14
+ from typing import Annotated, Literal
15
+
16
+ import typer
17
+ from rich.console import Console
18
+
19
+ from dataforge.cli.common import load_schema, read_csv, resolve_cli_path
20
+ from dataforge.detectors import run_all_detectors
21
+ from dataforge.detectors.base import Issue, Schema, Severity
22
+ from dataforge.schema_inference import (
23
+ build_constraint_review_artifact,
24
+ dump_constraint_review_artifact,
25
+ infer_schema,
26
+ )
27
+ from dataforge.ui.profile_view import render_profile_table
28
+
29
+ _console = Console(stderr=True)
30
+
31
+ FailOn = Literal["never", "unsafe", "review", "any"]
32
+
33
+
34
+ def _should_fail(issues: Sequence[Issue], fail_on: FailOn) -> bool:
35
+ """Return whether profile findings should trip the requested CI gate."""
36
+ if fail_on == "never":
37
+ return False
38
+ if fail_on == "any":
39
+ return bool(issues)
40
+ severities = [issue.severity for issue in issues]
41
+ if fail_on == "unsafe":
42
+ return any(severity == Severity.UNSAFE for severity in severities)
43
+ return any(severity >= Severity.REVIEW for severity in severities)
44
+
45
+
46
+ def profile(
47
+ path: Annotated[
48
+ Path,
49
+ typer.Argument(
50
+ help="Path to the CSV file to profile.",
51
+ ),
52
+ ],
53
+ schema: Annotated[
54
+ Path | None,
55
+ typer.Option(
56
+ "--schema",
57
+ help="Path to a YAML schema file with column types and FDs.",
58
+ ),
59
+ ] = None,
60
+ json_output: Annotated[
61
+ bool,
62
+ typer.Option("--json", help="Print profile results as JSON."),
63
+ ] = False,
64
+ constraints_out: Annotated[
65
+ Path | None,
66
+ typer.Option(
67
+ "--constraints-out",
68
+ help="Write inferred constraints as a pending review artifact.",
69
+ ),
70
+ ] = None,
71
+ fail_on: Annotated[
72
+ FailOn,
73
+ typer.Option(
74
+ "--fail-on",
75
+ help="Exit 1 when findings meet this threshold: never, unsafe, review, any.",
76
+ ),
77
+ ] = "never",
78
+ ) -> None:
79
+ """Profile a CSV file for data-quality issues.
80
+
81
+ Reads the CSV, runs all detectors (type_mismatch, decimal_shift,
82
+ fd_violation), and renders a rich-formatted table of detected issues.
83
+
84
+ Exit code 0 unless ``--fail-on`` is set and matching findings are present.
85
+ """
86
+ resolved_path = resolve_cli_path(path)
87
+ if not resolved_path.exists():
88
+ _console.print(f"[bold red]CSV file not found:[/bold red] {path}")
89
+ raise typer.Exit(code=2)
90
+
91
+ # Load the CSV with dtype=str to avoid pandas type-coercion artifacts.
92
+ try:
93
+ df = read_csv(resolved_path)
94
+ except Exception as exc:
95
+ _console.print(f"[bold red]Error reading CSV:[/bold red] {exc}")
96
+ raise typer.Exit(code=2) from exc
97
+
98
+ # Optionally load schema.
99
+ parsed_schema: Schema | None = None
100
+ if schema is not None:
101
+ resolved_schema = resolve_cli_path(schema)
102
+ if not resolved_schema.exists():
103
+ _console.print(f"[bold red]Schema file not found:[/bold red] {schema}")
104
+ raise typer.Exit(code=2)
105
+ parsed_schema = load_schema(resolved_schema)
106
+
107
+ # Run all detectors.
108
+ issues = run_all_detectors(df, parsed_schema)
109
+ schema_inference = infer_schema(df)
110
+ source_sha256 = sha256(resolved_path.read_bytes()).hexdigest()
111
+ if constraints_out is not None:
112
+ try:
113
+ artifact = build_constraint_review_artifact(
114
+ schema_inference,
115
+ source_path=resolved_path,
116
+ source_sha256=source_sha256,
117
+ )
118
+ constraints_out.parent.mkdir(parents=True, exist_ok=True)
119
+ constraints_out.write_text(
120
+ dump_constraint_review_artifact(artifact),
121
+ encoding="utf-8",
122
+ )
123
+ except Exception as exc:
124
+ _console.print(f"[bold red]Error writing constraints artifact:[/bold red] {exc}")
125
+ raise typer.Exit(code=2) from exc
126
+
127
+ # Render the results.
128
+ if json_output:
129
+ typer.echo(
130
+ json.dumps(
131
+ {
132
+ "path": str(resolved_path),
133
+ "issues_count": len(issues),
134
+ "fail_on": fail_on,
135
+ "issues": [issue.model_dump(mode="json") for issue in issues],
136
+ "schema_inference": schema_inference.model_dump(mode="json"),
137
+ },
138
+ indent=2,
139
+ sort_keys=True,
140
+ )
141
+ )
142
+ else:
143
+ output_console = Console()
144
+ render_profile_table(issues, output_console, file_path=str(resolved_path))
145
+
146
+ if _should_fail(issues, fail_on):
147
+ raise typer.Exit(code=1)