dataforge-07 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataforge/__init__.py +204 -0
- dataforge/__main__.py +5 -0
- dataforge/agent/__init__.py +16 -0
- dataforge/agent/providers.py +259 -0
- dataforge/agent/scratchpad.py +183 -0
- dataforge/agent/tool_actions.py +343 -0
- dataforge/bench/__init__.py +31 -0
- dataforge/bench/core.py +426 -0
- dataforge/bench/groq_client.py +386 -0
- dataforge/bench/methods.py +443 -0
- dataforge/bench/report.py +309 -0
- dataforge/bench/runner.py +247 -0
- dataforge/causal/__init__.py +21 -0
- dataforge/causal/dag.py +174 -0
- dataforge/causal/pc.py +232 -0
- dataforge/causal/root_cause.py +193 -0
- dataforge/cli/__init__.py +50 -0
- dataforge/cli/audit.py +70 -0
- dataforge/cli/bench.py +154 -0
- dataforge/cli/common.py +267 -0
- dataforge/cli/constraints.py +407 -0
- dataforge/cli/profile.py +147 -0
- dataforge/cli/release.py +166 -0
- dataforge/cli/repair.py +407 -0
- dataforge/cli/revert.py +139 -0
- dataforge/cli/watch.py +144 -0
- dataforge/datasets/__init__.py +25 -0
- dataforge/datasets/embedded/hospital/clean.csv +11 -0
- dataforge/datasets/embedded/hospital/dirty.csv +11 -0
- dataforge/datasets/real_world.py +290 -0
- dataforge/datasets/registry.py +103 -0
- dataforge/detectors/__init__.py +80 -0
- dataforge/detectors/base.py +145 -0
- dataforge/detectors/decimal_shift.py +166 -0
- dataforge/detectors/fd_violation.py +157 -0
- dataforge/detectors/type_mismatch.py +173 -0
- dataforge/engine/__init__.py +39 -0
- dataforge/engine/repair.py +905 -0
- dataforge/env/__init__.py +22 -0
- dataforge/env/environment.py +883 -0
- dataforge/env/observation.py +61 -0
- dataforge/env/openenv_core.py +161 -0
- dataforge/env/reward.py +128 -0
- dataforge/env/server.py +176 -0
- dataforge/evaluation_contract.py +76 -0
- dataforge/fixtures/hospital_10rows.csv +11 -0
- dataforge/fixtures/hospital_schema.yaml +17 -0
- dataforge/http/__init__.py +1 -0
- dataforge/http/problem.py +103 -0
- dataforge/integrations/__init__.py +1 -0
- dataforge/integrations/dbt.py +164 -0
- dataforge/observability.py +76 -0
- dataforge/py.typed +1 -0
- dataforge/release/__init__.py +1 -0
- dataforge/release/doctor.py +367 -0
- dataforge/release/full_vision.py +702 -0
- dataforge/release/gate.py +861 -0
- dataforge/release/playground_check.py +411 -0
- dataforge/repair_contract.py +468 -0
- dataforge/repairers/__init__.py +88 -0
- dataforge/repairers/base.py +77 -0
- dataforge/repairers/decimal_shift.py +43 -0
- dataforge/repairers/fd_violation.py +225 -0
- dataforge/repairers/type_mismatch.py +73 -0
- dataforge/safety/__init__.py +5 -0
- dataforge/safety/adversarial/attack_01_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_02_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_03_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_04_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_05_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_06_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_07_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_08_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_09_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_10_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_11_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_12_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_13_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_14_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_15_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_16_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_17_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_18_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_19_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_20_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_21_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_22_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_23_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_24_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_25_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_26_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_27_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_28_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_29_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_30_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_31_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_32_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_33_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_34_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_35_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_36_row_delete.yaml +11 -0
- dataforge/safety/adversarial/attack_37_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_38_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_39_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_40_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_41_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_42_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_43_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_44_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_45_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_46_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_47_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_48_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_49_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_50_row_delete.yaml +7 -0
- dataforge/safety/constitution.py +307 -0
- dataforge/safety/constitutions/default.yaml +40 -0
- dataforge/safety/filter.py +134 -0
- dataforge/schema_inference.py +620 -0
- dataforge/stores/__init__.py +46 -0
- dataforge/stores/base.py +73 -0
- dataforge/stores/cloud.py +78 -0
- dataforge/stores/csv.py +94 -0
- dataforge/stores/duckdb.py +313 -0
- dataforge/stores/patch_plan.py +178 -0
- dataforge/stores/registry.py +82 -0
- dataforge/stores/repair.py +121 -0
- dataforge/stores/revert.py +22 -0
- dataforge/stores/sql.py +27 -0
- dataforge/table.py +228 -0
- dataforge/transactions/__init__.py +34 -0
- dataforge/transactions/files.py +96 -0
- dataforge/transactions/log.py +613 -0
- dataforge/transactions/revert.py +102 -0
- dataforge/transactions/txn.py +104 -0
- dataforge/ui/__init__.py +1 -0
- dataforge/ui/profile_view.py +136 -0
- dataforge/ui/repair_diff.py +91 -0
- dataforge/verifier/__init__.py +55 -0
- dataforge/verifier/constraint_ir.py +155 -0
- dataforge/verifier/explain.py +47 -0
- dataforge/verifier/gate.py +5 -0
- dataforge/verifier/schema.py +111 -0
- dataforge/verifier/smt.py +433 -0
- dataforge_07-0.1.0.dist-info/METADATA +436 -0
- dataforge_07-0.1.0.dist-info/RECORD +150 -0
- dataforge_07-0.1.0.dist-info/WHEEL +5 -0
- dataforge_07-0.1.0.dist-info/entry_points.txt +3 -0
- dataforge_07-0.1.0.dist-info/licenses/LICENSE +176 -0
- dataforge_07-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,407 @@
|
|
|
1
|
+
"""CLI subcommands for reviewing inferred constraint artifacts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from collections import Counter
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Annotated, Any
|
|
9
|
+
|
|
10
|
+
import typer
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
from rich.panel import Panel
|
|
13
|
+
from rich.table import Table
|
|
14
|
+
from textual import on
|
|
15
|
+
from textual.app import App, ComposeResult
|
|
16
|
+
from textual.binding import Binding
|
|
17
|
+
from textual.containers import Horizontal, Vertical
|
|
18
|
+
from textual.widgets import DataTable, Footer, Header, Input, Static
|
|
19
|
+
|
|
20
|
+
from dataforge.cli.common import resolve_cli_path
|
|
21
|
+
from dataforge.schema_inference import (
|
|
22
|
+
REPAIR_SUPPORTED_CONSTRAINT_KINDS,
|
|
23
|
+
ConstraintDecision,
|
|
24
|
+
ConstraintReviewArtifact,
|
|
25
|
+
ConstraintReviewError,
|
|
26
|
+
load_constraint_review_artifact,
|
|
27
|
+
update_constraint_review_artifact,
|
|
28
|
+
write_constraint_review_artifact_atomic,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
constraints_app = typer.Typer(
|
|
32
|
+
help="Review inferred profile constraints before repair can use them.",
|
|
33
|
+
no_args_is_help=True,
|
|
34
|
+
)
|
|
35
|
+
_console = Console(stderr=True)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _candidate_target(candidate: Any) -> str:
|
|
39
|
+
"""Return a compact target description for one candidate."""
|
|
40
|
+
columns = ", ".join(candidate.columns)
|
|
41
|
+
if candidate.dependent:
|
|
42
|
+
return f"{columns} -> {candidate.dependent}"
|
|
43
|
+
return columns
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _candidate_summary(reviewed: Any) -> dict[str, Any]:
|
|
47
|
+
"""Return a machine-readable review summary for one candidate."""
|
|
48
|
+
candidate = reviewed.candidate
|
|
49
|
+
return {
|
|
50
|
+
"candidate_id": reviewed.candidate_id,
|
|
51
|
+
"decision": reviewed.decision,
|
|
52
|
+
"kind": candidate.kind,
|
|
53
|
+
"target": _candidate_target(candidate),
|
|
54
|
+
"confidence": candidate.confidence,
|
|
55
|
+
"repair_supported": candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS,
|
|
56
|
+
"evidence": candidate.evidence,
|
|
57
|
+
"review_note": reviewed.review_note,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _artifact_summary(
|
|
62
|
+
artifact: ConstraintReviewArtifact,
|
|
63
|
+
*,
|
|
64
|
+
path: Path,
|
|
65
|
+
sha256: str | None = None,
|
|
66
|
+
) -> dict[str, Any]:
|
|
67
|
+
"""Return a stable summary payload for CLI and CI consumers."""
|
|
68
|
+
decision_counts = Counter(reviewed.decision for reviewed in artifact.candidates)
|
|
69
|
+
repair_supported_count = sum(
|
|
70
|
+
1
|
|
71
|
+
for reviewed in artifact.candidates
|
|
72
|
+
if reviewed.candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS
|
|
73
|
+
)
|
|
74
|
+
return {
|
|
75
|
+
"path": str(path),
|
|
76
|
+
"schema_version": artifact.schema_version,
|
|
77
|
+
"source_path": artifact.source_path,
|
|
78
|
+
"source_sha256": artifact.source_sha256,
|
|
79
|
+
"row_count": artifact.row_count,
|
|
80
|
+
"candidate_count": len(artifact.candidates),
|
|
81
|
+
"repair_supported_count": repair_supported_count,
|
|
82
|
+
"decision_counts": {
|
|
83
|
+
"accepted": decision_counts.get("accepted", 0),
|
|
84
|
+
"pending": decision_counts.get("pending", 0),
|
|
85
|
+
"rejected": decision_counts.get("rejected", 0),
|
|
86
|
+
},
|
|
87
|
+
"sha256": sha256,
|
|
88
|
+
"candidates": [_candidate_summary(reviewed) for reviewed in artifact.candidates],
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _parse_notes(raw_notes: list[str] | None) -> dict[str, str | None]:
|
|
93
|
+
"""Parse repeated ``--note cnd-id=text`` options."""
|
|
94
|
+
parsed: dict[str, str | None] = {}
|
|
95
|
+
for raw_note in raw_notes or []:
|
|
96
|
+
candidate_id, separator, note = raw_note.partition("=")
|
|
97
|
+
if not separator or not candidate_id:
|
|
98
|
+
raise typer.BadParameter("--note must use the form cnd-...=text")
|
|
99
|
+
parsed[candidate_id] = note or None
|
|
100
|
+
return parsed
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _print_review_table(artifact: ConstraintReviewArtifact) -> None:
|
|
104
|
+
"""Render a compact non-interactive review table."""
|
|
105
|
+
table = Table(title="Constraint Review")
|
|
106
|
+
table.add_column("Candidate ID", overflow="fold")
|
|
107
|
+
table.add_column("Decision")
|
|
108
|
+
table.add_column("Kind")
|
|
109
|
+
table.add_column("Target", overflow="fold")
|
|
110
|
+
table.add_column("Confidence", justify="right")
|
|
111
|
+
table.add_column("Repair")
|
|
112
|
+
table.add_column("Evidence", overflow="fold")
|
|
113
|
+
for reviewed in artifact.candidates:
|
|
114
|
+
candidate = reviewed.candidate
|
|
115
|
+
table.add_row(
|
|
116
|
+
reviewed.candidate_id,
|
|
117
|
+
reviewed.decision,
|
|
118
|
+
candidate.kind,
|
|
119
|
+
_candidate_target(candidate),
|
|
120
|
+
f"{candidate.confidence:.4f}",
|
|
121
|
+
"yes" if candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS else "review-only",
|
|
122
|
+
candidate.evidence,
|
|
123
|
+
)
|
|
124
|
+
Console().print(table)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class ConstraintReviewApp(App[ConstraintReviewArtifact]):
|
|
128
|
+
"""Textual review UI for a constraint artifact."""
|
|
129
|
+
|
|
130
|
+
CSS = """
|
|
131
|
+
DataTable {
|
|
132
|
+
height: 1fr;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
#detail {
|
|
136
|
+
width: 45%;
|
|
137
|
+
height: 1fr;
|
|
138
|
+
overflow-y: auto;
|
|
139
|
+
border: solid $accent;
|
|
140
|
+
padding: 1;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
#note {
|
|
144
|
+
height: 3;
|
|
145
|
+
}
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
BINDINGS = [
|
|
149
|
+
Binding("a", "accept", "Accept"),
|
|
150
|
+
Binding("r", "reject", "Reject"),
|
|
151
|
+
Binding("p", "pending", "Pending"),
|
|
152
|
+
Binding("n", "focus_note", "Note"),
|
|
153
|
+
Binding("s", "save", "Save"),
|
|
154
|
+
Binding("q", "quit_without_save", "Quit"),
|
|
155
|
+
]
|
|
156
|
+
|
|
157
|
+
def __init__(self, artifact: ConstraintReviewArtifact) -> None:
|
|
158
|
+
"""Create a review application for an already validated artifact."""
|
|
159
|
+
super().__init__()
|
|
160
|
+
self.artifact = artifact
|
|
161
|
+
self.saved = False
|
|
162
|
+
self.selected_candidate_id = (
|
|
163
|
+
artifact.candidates[0].candidate_id if artifact.candidates else None
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def compose(self) -> ComposeResult:
|
|
167
|
+
"""Compose the review screen."""
|
|
168
|
+
yield Header()
|
|
169
|
+
with Vertical():
|
|
170
|
+
with Horizontal():
|
|
171
|
+
yield DataTable(id="candidates")
|
|
172
|
+
yield Static(id="detail")
|
|
173
|
+
yield Input(
|
|
174
|
+
placeholder="Review note for selected candidate; press Enter to save note",
|
|
175
|
+
id="note",
|
|
176
|
+
)
|
|
177
|
+
yield Footer()
|
|
178
|
+
|
|
179
|
+
def on_mount(self) -> None:
|
|
180
|
+
"""Populate the table when the TUI starts."""
|
|
181
|
+
table = self.query_one("#candidates", DataTable)
|
|
182
|
+
table.cursor_type = "row"
|
|
183
|
+
table.add_columns("ID", "Decision", "Kind", "Target", "Conf", "Repair")
|
|
184
|
+
for reviewed in self.artifact.candidates:
|
|
185
|
+
candidate = reviewed.candidate
|
|
186
|
+
table.add_row(
|
|
187
|
+
reviewed.candidate_id,
|
|
188
|
+
reviewed.decision,
|
|
189
|
+
candidate.kind,
|
|
190
|
+
_candidate_target(candidate),
|
|
191
|
+
f"{candidate.confidence:.4f}",
|
|
192
|
+
"yes" if candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS else "review-only",
|
|
193
|
+
key=reviewed.candidate_id,
|
|
194
|
+
)
|
|
195
|
+
table.focus()
|
|
196
|
+
self._refresh_detail()
|
|
197
|
+
|
|
198
|
+
@on(DataTable.RowHighlighted)
|
|
199
|
+
def _on_row_highlighted(self, event: DataTable.RowHighlighted) -> None:
|
|
200
|
+
"""Track the currently highlighted candidate."""
|
|
201
|
+
self.selected_candidate_id = str(event.row_key.value)
|
|
202
|
+
self._refresh_detail()
|
|
203
|
+
|
|
204
|
+
@on(Input.Submitted, "#note")
|
|
205
|
+
def _on_note_submitted(self, event: Input.Submitted) -> None:
|
|
206
|
+
"""Save a note for the selected candidate."""
|
|
207
|
+
if self.selected_candidate_id is None:
|
|
208
|
+
return
|
|
209
|
+
self.artifact = update_constraint_review_artifact(
|
|
210
|
+
self.artifact,
|
|
211
|
+
notes={self.selected_candidate_id: event.value},
|
|
212
|
+
)
|
|
213
|
+
event.input.value = ""
|
|
214
|
+
self._refresh_table()
|
|
215
|
+
self._refresh_detail()
|
|
216
|
+
|
|
217
|
+
def action_accept(self) -> None:
|
|
218
|
+
"""Accept the selected candidate."""
|
|
219
|
+
self._set_selected_decision("accepted")
|
|
220
|
+
|
|
221
|
+
def action_reject(self) -> None:
|
|
222
|
+
"""Reject the selected candidate."""
|
|
223
|
+
self._set_selected_decision("rejected")
|
|
224
|
+
|
|
225
|
+
def action_pending(self) -> None:
|
|
226
|
+
"""Reset the selected candidate to pending."""
|
|
227
|
+
self._set_selected_decision("pending")
|
|
228
|
+
|
|
229
|
+
def action_focus_note(self) -> None:
|
|
230
|
+
"""Focus the note editor."""
|
|
231
|
+
self.query_one("#note", Input).focus()
|
|
232
|
+
|
|
233
|
+
def action_save(self) -> None:
|
|
234
|
+
"""Exit the TUI with the reviewed artifact."""
|
|
235
|
+
self.saved = True
|
|
236
|
+
self.exit(self.artifact)
|
|
237
|
+
|
|
238
|
+
def action_quit_without_save(self) -> None:
|
|
239
|
+
"""Exit the TUI without saving."""
|
|
240
|
+
self.saved = False
|
|
241
|
+
self.exit(self.artifact)
|
|
242
|
+
|
|
243
|
+
def _set_selected_decision(self, decision: ConstraintDecision) -> None:
|
|
244
|
+
"""Apply a decision to the selected candidate."""
|
|
245
|
+
if self.selected_candidate_id is None:
|
|
246
|
+
return
|
|
247
|
+
if decision == "accepted":
|
|
248
|
+
self.artifact = update_constraint_review_artifact(
|
|
249
|
+
self.artifact,
|
|
250
|
+
accept_ids=(self.selected_candidate_id,),
|
|
251
|
+
)
|
|
252
|
+
elif decision == "rejected":
|
|
253
|
+
self.artifact = update_constraint_review_artifact(
|
|
254
|
+
self.artifact,
|
|
255
|
+
reject_ids=(self.selected_candidate_id,),
|
|
256
|
+
)
|
|
257
|
+
else:
|
|
258
|
+
self.artifact = update_constraint_review_artifact(
|
|
259
|
+
self.artifact,
|
|
260
|
+
pending_ids=(self.selected_candidate_id,),
|
|
261
|
+
)
|
|
262
|
+
self._refresh_table()
|
|
263
|
+
self._refresh_detail()
|
|
264
|
+
|
|
265
|
+
def _selected_candidate(self) -> Any | None:
|
|
266
|
+
"""Return the selected reviewed candidate."""
|
|
267
|
+
for reviewed in self.artifact.candidates:
|
|
268
|
+
if reviewed.candidate_id == self.selected_candidate_id:
|
|
269
|
+
return reviewed
|
|
270
|
+
return None
|
|
271
|
+
|
|
272
|
+
def _refresh_table(self) -> None:
|
|
273
|
+
"""Refresh table rows after a decision change."""
|
|
274
|
+
table = self.query_one("#candidates", DataTable)
|
|
275
|
+
table.clear(columns=True)
|
|
276
|
+
table.add_columns("ID", "Decision", "Kind", "Target", "Conf", "Repair")
|
|
277
|
+
for reviewed in self.artifact.candidates:
|
|
278
|
+
candidate = reviewed.candidate
|
|
279
|
+
table.add_row(
|
|
280
|
+
reviewed.candidate_id,
|
|
281
|
+
reviewed.decision,
|
|
282
|
+
candidate.kind,
|
|
283
|
+
_candidate_target(candidate),
|
|
284
|
+
f"{candidate.confidence:.4f}",
|
|
285
|
+
"yes" if candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS else "review-only",
|
|
286
|
+
key=reviewed.candidate_id,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
def _refresh_detail(self) -> None:
|
|
290
|
+
"""Refresh the detail pane for the selected candidate."""
|
|
291
|
+
reviewed = self._selected_candidate()
|
|
292
|
+
detail = self.query_one("#detail", Static)
|
|
293
|
+
if reviewed is None:
|
|
294
|
+
detail.update("No constraint candidates.")
|
|
295
|
+
return
|
|
296
|
+
candidate = reviewed.candidate
|
|
297
|
+
repair_note = (
|
|
298
|
+
"Repair-supported in v1."
|
|
299
|
+
if candidate.kind in REPAIR_SUPPORTED_CONSTRAINT_KINDS
|
|
300
|
+
else "Review-only in v1; repair ignores this kind."
|
|
301
|
+
)
|
|
302
|
+
payload = json.dumps(reviewed.model_dump(mode="json"), indent=2, sort_keys=True)
|
|
303
|
+
detail.update(
|
|
304
|
+
"\n".join(
|
|
305
|
+
[
|
|
306
|
+
f"Candidate: {reviewed.candidate_id}",
|
|
307
|
+
f"Decision: {reviewed.decision}",
|
|
308
|
+
f"Source: {self.artifact.source_path}",
|
|
309
|
+
f"Source SHA-256: {self.artifact.source_sha256}",
|
|
310
|
+
f"Repair: {repair_note}",
|
|
311
|
+
"",
|
|
312
|
+
"Candidate JSON:",
|
|
313
|
+
payload,
|
|
314
|
+
]
|
|
315
|
+
)
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
@constraints_app.command(name="review")
|
|
320
|
+
def review_constraints(
|
|
321
|
+
path: Annotated[
|
|
322
|
+
Path,
|
|
323
|
+
typer.Argument(help="Path to a constraint_review_v1 JSON artifact."),
|
|
324
|
+
],
|
|
325
|
+
accept: Annotated[
|
|
326
|
+
list[str] | None,
|
|
327
|
+
typer.Option("--accept", help="Mark a candidate id accepted. Repeatable."),
|
|
328
|
+
] = None,
|
|
329
|
+
reject: Annotated[
|
|
330
|
+
list[str] | None,
|
|
331
|
+
typer.Option("--reject", help="Mark a candidate id rejected. Repeatable."),
|
|
332
|
+
] = None,
|
|
333
|
+
pending: Annotated[
|
|
334
|
+
list[str] | None,
|
|
335
|
+
typer.Option("--pending", help="Reset a candidate id to pending. Repeatable."),
|
|
336
|
+
] = None,
|
|
337
|
+
note: Annotated[
|
|
338
|
+
list[str] | None,
|
|
339
|
+
typer.Option("--note", help="Set a review note with cnd-...=text. Repeatable."),
|
|
340
|
+
] = None,
|
|
341
|
+
output: Annotated[
|
|
342
|
+
Path | None,
|
|
343
|
+
typer.Option("--output", help="Write the reviewed artifact to a separate path."),
|
|
344
|
+
] = None,
|
|
345
|
+
dry_run: Annotated[
|
|
346
|
+
bool,
|
|
347
|
+
typer.Option("--dry-run", help="Preview changes without writing an artifact."),
|
|
348
|
+
] = False,
|
|
349
|
+
json_output: Annotated[
|
|
350
|
+
bool,
|
|
351
|
+
typer.Option("--json", help="Print review state as JSON."),
|
|
352
|
+
] = False,
|
|
353
|
+
no_tui: Annotated[
|
|
354
|
+
bool,
|
|
355
|
+
typer.Option("--no-tui", help="Run deterministic non-interactive review mode."),
|
|
356
|
+
] = False,
|
|
357
|
+
) -> None:
|
|
358
|
+
"""Review profile-inferred constraint candidates before repair uses them."""
|
|
359
|
+
resolved_path = resolve_cli_path(path)
|
|
360
|
+
try:
|
|
361
|
+
artifact, artifact_sha256 = load_constraint_review_artifact(resolved_path)
|
|
362
|
+
parsed_notes = _parse_notes(note)
|
|
363
|
+
updated = update_constraint_review_artifact(
|
|
364
|
+
artifact,
|
|
365
|
+
accept_ids=tuple(accept or ()),
|
|
366
|
+
reject_ids=tuple(reject or ()),
|
|
367
|
+
pending_ids=tuple(pending or ()),
|
|
368
|
+
notes=parsed_notes,
|
|
369
|
+
)
|
|
370
|
+
except (ConstraintReviewError, typer.BadParameter) as exc:
|
|
371
|
+
_console.print(Panel(f"[bold red]{exc}[/bold red]", title="Constraint Review Error"))
|
|
372
|
+
raise typer.Exit(code=2) from exc
|
|
373
|
+
|
|
374
|
+
non_interactive = no_tui or bool(
|
|
375
|
+
accept or reject or pending or note or output or dry_run or json_output
|
|
376
|
+
)
|
|
377
|
+
if not non_interactive:
|
|
378
|
+
app = ConstraintReviewApp(updated)
|
|
379
|
+
tui_result = app.run()
|
|
380
|
+
if tui_result is None or not app.saved:
|
|
381
|
+
_console.print("[yellow]Constraint review cancelled; no file was changed.[/yellow]")
|
|
382
|
+
raise typer.Exit(code=1)
|
|
383
|
+
updated = tui_result
|
|
384
|
+
|
|
385
|
+
target_path = resolve_cli_path(output) if output is not None else resolved_path
|
|
386
|
+
written_sha256: str | None = artifact_sha256
|
|
387
|
+
if dry_run:
|
|
388
|
+
written_sha256 = None
|
|
389
|
+
elif updated != artifact or target_path != resolved_path:
|
|
390
|
+
try:
|
|
391
|
+
written_sha256 = write_constraint_review_artifact_atomic(target_path, updated)
|
|
392
|
+
except OSError as exc:
|
|
393
|
+
_console.print(Panel(f"[bold red]{exc}[/bold red]", title="Constraint Review Error"))
|
|
394
|
+
raise typer.Exit(code=2) from exc
|
|
395
|
+
|
|
396
|
+
if json_output:
|
|
397
|
+
typer.echo(
|
|
398
|
+
json.dumps(
|
|
399
|
+
_artifact_summary(updated, path=target_path, sha256=written_sha256),
|
|
400
|
+
indent=2,
|
|
401
|
+
sort_keys=True,
|
|
402
|
+
)
|
|
403
|
+
)
|
|
404
|
+
else:
|
|
405
|
+
_print_review_table(updated)
|
|
406
|
+
if dry_run:
|
|
407
|
+
_console.print("[yellow]Dry run: no constraint artifact was written.[/yellow]")
|
dataforge/cli/profile.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""CLI subcommand: ``dataforge profile <path> [--schema <yaml>]``.
|
|
2
|
+
|
|
3
|
+
Reads a CSV file, runs all detectors, and renders detected issues as a
|
|
4
|
+
rich-formatted terminal table. Diagnostics exit 0 by default; use
|
|
5
|
+
``--fail-on`` for CI gating.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
from collections.abc import Sequence
|
|
12
|
+
from hashlib import sha256
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Annotated, Literal
|
|
15
|
+
|
|
16
|
+
import typer
|
|
17
|
+
from rich.console import Console
|
|
18
|
+
|
|
19
|
+
from dataforge.cli.common import load_schema, read_csv, resolve_cli_path
|
|
20
|
+
from dataforge.detectors import run_all_detectors
|
|
21
|
+
from dataforge.detectors.base import Issue, Schema, Severity
|
|
22
|
+
from dataforge.schema_inference import (
|
|
23
|
+
build_constraint_review_artifact,
|
|
24
|
+
dump_constraint_review_artifact,
|
|
25
|
+
infer_schema,
|
|
26
|
+
)
|
|
27
|
+
from dataforge.ui.profile_view import render_profile_table
|
|
28
|
+
|
|
29
|
+
_console = Console(stderr=True)
|
|
30
|
+
|
|
31
|
+
FailOn = Literal["never", "unsafe", "review", "any"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _should_fail(issues: Sequence[Issue], fail_on: FailOn) -> bool:
|
|
35
|
+
"""Return whether profile findings should trip the requested CI gate."""
|
|
36
|
+
if fail_on == "never":
|
|
37
|
+
return False
|
|
38
|
+
if fail_on == "any":
|
|
39
|
+
return bool(issues)
|
|
40
|
+
severities = [issue.severity for issue in issues]
|
|
41
|
+
if fail_on == "unsafe":
|
|
42
|
+
return any(severity == Severity.UNSAFE for severity in severities)
|
|
43
|
+
return any(severity >= Severity.REVIEW for severity in severities)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def profile(
|
|
47
|
+
path: Annotated[
|
|
48
|
+
Path,
|
|
49
|
+
typer.Argument(
|
|
50
|
+
help="Path to the CSV file to profile.",
|
|
51
|
+
),
|
|
52
|
+
],
|
|
53
|
+
schema: Annotated[
|
|
54
|
+
Path | None,
|
|
55
|
+
typer.Option(
|
|
56
|
+
"--schema",
|
|
57
|
+
help="Path to a YAML schema file with column types and FDs.",
|
|
58
|
+
),
|
|
59
|
+
] = None,
|
|
60
|
+
json_output: Annotated[
|
|
61
|
+
bool,
|
|
62
|
+
typer.Option("--json", help="Print profile results as JSON."),
|
|
63
|
+
] = False,
|
|
64
|
+
constraints_out: Annotated[
|
|
65
|
+
Path | None,
|
|
66
|
+
typer.Option(
|
|
67
|
+
"--constraints-out",
|
|
68
|
+
help="Write inferred constraints as a pending review artifact.",
|
|
69
|
+
),
|
|
70
|
+
] = None,
|
|
71
|
+
fail_on: Annotated[
|
|
72
|
+
FailOn,
|
|
73
|
+
typer.Option(
|
|
74
|
+
"--fail-on",
|
|
75
|
+
help="Exit 1 when findings meet this threshold: never, unsafe, review, any.",
|
|
76
|
+
),
|
|
77
|
+
] = "never",
|
|
78
|
+
) -> None:
|
|
79
|
+
"""Profile a CSV file for data-quality issues.
|
|
80
|
+
|
|
81
|
+
Reads the CSV, runs all detectors (type_mismatch, decimal_shift,
|
|
82
|
+
fd_violation), and renders a rich-formatted table of detected issues.
|
|
83
|
+
|
|
84
|
+
Exit code 0 unless ``--fail-on`` is set and matching findings are present.
|
|
85
|
+
"""
|
|
86
|
+
resolved_path = resolve_cli_path(path)
|
|
87
|
+
if not resolved_path.exists():
|
|
88
|
+
_console.print(f"[bold red]CSV file not found:[/bold red] {path}")
|
|
89
|
+
raise typer.Exit(code=2)
|
|
90
|
+
|
|
91
|
+
# Load the CSV with dtype=str to avoid pandas type-coercion artifacts.
|
|
92
|
+
try:
|
|
93
|
+
df = read_csv(resolved_path)
|
|
94
|
+
except Exception as exc:
|
|
95
|
+
_console.print(f"[bold red]Error reading CSV:[/bold red] {exc}")
|
|
96
|
+
raise typer.Exit(code=2) from exc
|
|
97
|
+
|
|
98
|
+
# Optionally load schema.
|
|
99
|
+
parsed_schema: Schema | None = None
|
|
100
|
+
if schema is not None:
|
|
101
|
+
resolved_schema = resolve_cli_path(schema)
|
|
102
|
+
if not resolved_schema.exists():
|
|
103
|
+
_console.print(f"[bold red]Schema file not found:[/bold red] {schema}")
|
|
104
|
+
raise typer.Exit(code=2)
|
|
105
|
+
parsed_schema = load_schema(resolved_schema)
|
|
106
|
+
|
|
107
|
+
# Run all detectors.
|
|
108
|
+
issues = run_all_detectors(df, parsed_schema)
|
|
109
|
+
schema_inference = infer_schema(df)
|
|
110
|
+
source_sha256 = sha256(resolved_path.read_bytes()).hexdigest()
|
|
111
|
+
if constraints_out is not None:
|
|
112
|
+
try:
|
|
113
|
+
artifact = build_constraint_review_artifact(
|
|
114
|
+
schema_inference,
|
|
115
|
+
source_path=resolved_path,
|
|
116
|
+
source_sha256=source_sha256,
|
|
117
|
+
)
|
|
118
|
+
constraints_out.parent.mkdir(parents=True, exist_ok=True)
|
|
119
|
+
constraints_out.write_text(
|
|
120
|
+
dump_constraint_review_artifact(artifact),
|
|
121
|
+
encoding="utf-8",
|
|
122
|
+
)
|
|
123
|
+
except Exception as exc:
|
|
124
|
+
_console.print(f"[bold red]Error writing constraints artifact:[/bold red] {exc}")
|
|
125
|
+
raise typer.Exit(code=2) from exc
|
|
126
|
+
|
|
127
|
+
# Render the results.
|
|
128
|
+
if json_output:
|
|
129
|
+
typer.echo(
|
|
130
|
+
json.dumps(
|
|
131
|
+
{
|
|
132
|
+
"path": str(resolved_path),
|
|
133
|
+
"issues_count": len(issues),
|
|
134
|
+
"fail_on": fail_on,
|
|
135
|
+
"issues": [issue.model_dump(mode="json") for issue in issues],
|
|
136
|
+
"schema_inference": schema_inference.model_dump(mode="json"),
|
|
137
|
+
},
|
|
138
|
+
indent=2,
|
|
139
|
+
sort_keys=True,
|
|
140
|
+
)
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
output_console = Console()
|
|
144
|
+
render_profile_table(issues, output_console, file_path=str(resolved_path))
|
|
145
|
+
|
|
146
|
+
if _should_fail(issues, fail_on):
|
|
147
|
+
raise typer.Exit(code=1)
|