dataforge-07-mcp 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,124 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataforge_07_mcp
3
+ Version: 0.1.0
4
+ Summary: Model Context Protocol server for DataForge data-quality tools.
5
+ License-Expression: Apache-2.0
6
+ Project-URL: Homepage, https://github.com/Aegis15/dataforge
7
+ Project-URL: Repository, https://github.com/Aegis15/dataforge
8
+ Project-URL: Documentation, https://dataforge.praneshrajan15.workers.dev/playground
9
+ Keywords: data-quality,dataforge,mcp,model-context-protocol
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Python: <3.13,>=3.11
14
+ Description-Content-Type: text/markdown
15
+ Requires-Dist: dataforge_07<0.2,>=0.1.0
16
+ Requires-Dist: mcp>=1.27
17
+ Provides-Extra: dev
18
+ Requires-Dist: pytest>=9.0.3; extra == "dev"
19
+
20
+ # dataforge-mcp
21
+
22
+ `dataforge-mcp` exposes DataForge's shipped CSV profiling, detection, repair,
23
+ verification, and transaction-revert paths as Model Context Protocol tools.
24
+
25
+ ```bash
26
+ cd dataforge-mcp
27
+ python -m pip install -e ".[dev]"
28
+ dataforge-mcp serve --allowed-root /path/to/csv/workspace
29
+ ```
30
+
31
+ For local development from this repository:
32
+
33
+ ```bash
34
+ cd dataforge-mcp
35
+ python -m pip install -e ".[dev]"
36
+ dataforge-mcp serve --allowed-root ..
37
+ ```
38
+
39
+ The default transport is stdio, which is what local desktop MCP clients expect.
40
+ For local Streamable HTTP experiments:
41
+
42
+ ```bash
43
+ dataforge-mcp serve --transport streamable-http --host 127.0.0.1 --port 8000
44
+ ```
45
+
46
+ `dry_run` is the safe default. To allow file mutation through MCP, start the
47
+ server with an explicit allowed root and `--enable-apply`:
48
+
49
+ ```bash
50
+ dataforge-mcp serve --allowed-root /path/to/csv/workspace --enable-apply
51
+ ```
52
+
53
+ ## Tools
54
+
55
+ - `dataforge_profile(path: str)` - summarize CSV shape plus detected issues.
56
+ - `dataforge_detect_errors(path: str)` - return detected issues only.
57
+ - `dataforge_verify_fix(fix_spec: dict)` - run one candidate fix through stale
58
+ value checks, safety, and verification.
59
+ - `dataforge_apply_repairs(path: str, mode: "dry_run" | "apply")` - propose
60
+ verified repairs and optionally write a reversible transaction.
61
+ - `dataforge_revert(txn_id: str)` - restore a transaction's original bytes.
62
+
63
+ ## Client Configuration
64
+
65
+ Use the same server command for Claude Desktop, Cursor, Windsurf, or any local
66
+ MCP client that supports stdio servers:
67
+
68
+ ```json
69
+ {
70
+ "mcpServers": {
71
+ "dataforge": {
72
+ "command": "dataforge-mcp",
73
+ "args": ["serve", "--allowed-root", "/path/to/csv/workspace"]
74
+ }
75
+ }
76
+ }
77
+ ```
78
+
79
+ If your client cannot resolve the console script, replace `command` with the
80
+ absolute path returned by your shell:
81
+
82
+ ```bash
83
+ which dataforge-mcp
84
+ ```
85
+
86
+ On Windows PowerShell:
87
+
88
+ ```powershell
89
+ Get-Command dataforge-mcp
90
+ ```
91
+
92
+ Before describing a build as agent-ready, run an MCP Inspector smoke check
93
+ against a fixture directory and confirm the profile, detect, verify, dry-run
94
+ apply, and disabled-apply paths:
95
+
96
+ ```bash
97
+ npx @modelcontextprotocol/inspector dataforge-mcp serve --allowed-root /path/to/csv/workspace
98
+ ```
99
+
100
+ ## Safety Model
101
+
102
+ `apply` mode uses DataForge's detector -> repairer -> SafetyFilter ->
103
+ SMTVerifier -> transaction-log path. The tool writes the transaction journal and
104
+ source snapshot before mutating the CSV, and `dataforge_revert` restores the
105
+ snapshot only when the current file still matches the recorded post-state hash.
106
+
107
+ The MCP server does not enable live LLM repair fallback by default. It does not
108
+ send CSV contents to any external model provider. It also rejects CSV and schema
109
+ paths outside the configured allowed roots, and `apply` mode is disabled unless
110
+ the server is started with `--enable-apply` or `DATAFORGE_MCP_ENABLE_APPLY=1`.
111
+
112
+ ## Release
113
+
114
+ The package is intended to release independently from the nested
115
+ `dataforge-mcp/` source directory as the `dataforge_07_mcp` distribution, but
116
+ it is not published yet. After PyPI Trusted Publishing is configured, the
117
+ workflow will build on tags matching:
118
+
119
+ ```text
120
+ dataforge-mcp-v*
121
+ ```
122
+
123
+ The package depends on `dataforge_07` and the official Python `mcp` SDK; it does
124
+ not vendor DataForge or add MCP dependencies to the core package.
@@ -0,0 +1,8 @@
1
+ dataforge_mcp/__init__.py,sha256=xrA7h1skAaR7PfGWfTJNg2m55Oo-SBodhePb0Sk7T20,122
2
+ dataforge_mcp/server.py,sha256=9UYf_gpY-vgvnnBrdnS9K8VNYI4TgOm4aeWjdaYdgN0,3281
3
+ dataforge_mcp/tools.py,sha256=dHDLtOf3dfNSKhCbdqQ3yrSKpWs8faUD2u0YZ1ilpAY,13099
4
+ dataforge_07_mcp-0.1.0.dist-info/METADATA,sha256=DEQqzsdS9h-E5jS9L9uLRtV61B2NqFmS2_NtrQqzrG0,4062
5
+ dataforge_07_mcp-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
6
+ dataforge_07_mcp-0.1.0.dist-info/entry_points.txt,sha256=xO8EZXxkmDLk_tN9W466LImLzqzwRZctLlADJjXIco4,104
7
+ dataforge_07_mcp-0.1.0.dist-info/top_level.txt,sha256=Vz7dtKc20dZhoqKzpFhVlHdcnpIUOccjEq720DroSlM,14
8
+ dataforge_07_mcp-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ dataforge-mcp = dataforge_mcp.server:main
3
+ dataforge15-mcp = dataforge_mcp.server:main
@@ -0,0 +1 @@
1
+ dataforge_mcp
@@ -0,0 +1,7 @@
1
+ """DataForge MCP server package."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __all__ = ["__version__"]
6
+
7
+ __version__ = "0.1.0"
@@ -0,0 +1,104 @@
1
+ """Executable MCP server for DataForge."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ from typing import Literal
7
+
8
+ from mcp.server.fastmcp import FastMCP
9
+
10
+ from dataforge_mcp.tools import (
11
+ configure_mcp_security,
12
+ dataforge_apply_repairs,
13
+ dataforge_detect_errors,
14
+ dataforge_profile,
15
+ dataforge_revert,
16
+ dataforge_verify_fix,
17
+ )
18
+
19
+ TransportLiteral = Literal["stdio", "streamable-http"]
20
+
21
+
22
+ def create_server(*, host: str = "127.0.0.1", port: int = 8000) -> FastMCP:
23
+ """Create a FastMCP server with all DataForge tools registered."""
24
+ mcp = FastMCP(
25
+ "DataForge",
26
+ instructions=(
27
+ "DataForge profiles CSVs, detects data-quality issues, proposes "
28
+ "verified repairs, applies reversible transactions, and reverts them."
29
+ ),
30
+ host=host,
31
+ port=port,
32
+ stateless_http=True,
33
+ json_response=True,
34
+ )
35
+ mcp.tool(name="dataforge_profile")(dataforge_profile)
36
+ mcp.tool(name="dataforge_detect_errors")(dataforge_detect_errors)
37
+ mcp.tool(name="dataforge_verify_fix")(dataforge_verify_fix)
38
+ mcp.tool(name="dataforge_apply_repairs")(dataforge_apply_repairs)
39
+ mcp.tool(name="dataforge_revert")(dataforge_revert)
40
+ return mcp
41
+
42
+
43
+ def serve(
44
+ *,
45
+ transport: TransportLiteral = "stdio",
46
+ host: str = "127.0.0.1",
47
+ port: int = 8000,
48
+ enable_apply: bool = False,
49
+ allowed_roots: list[str] | None = None,
50
+ ) -> None:
51
+ """Run the DataForge MCP server."""
52
+ configure_mcp_security(enable_apply=enable_apply, allowed_roots=allowed_roots)
53
+ server = create_server(host=host, port=port)
54
+ server.run(transport=transport)
55
+
56
+
57
+ def _build_parser() -> argparse.ArgumentParser:
58
+ """Build the command-line parser for the console script."""
59
+ parser = argparse.ArgumentParser(prog="dataforge-mcp")
60
+ subparsers = parser.add_subparsers(dest="command")
61
+ serve_parser = subparsers.add_parser("serve", help="Start the MCP server.")
62
+ serve_parser.add_argument(
63
+ "--transport",
64
+ choices=("stdio", "streamable-http"),
65
+ default="stdio",
66
+ help="MCP transport to use.",
67
+ )
68
+ serve_parser.add_argument("--host", default="127.0.0.1", help="HTTP host.")
69
+ serve_parser.add_argument("--port", default=8000, type=int, help="HTTP port.")
70
+ serve_parser.add_argument(
71
+ "--enable-apply",
72
+ action="store_true",
73
+ help="Allow MCP clients to mutate CSV files through reversible transactions.",
74
+ )
75
+ serve_parser.add_argument(
76
+ "--allowed-root",
77
+ action="append",
78
+ dest="allowed_roots",
79
+ help="Filesystem root that MCP tools may read or mutate. May be repeated.",
80
+ )
81
+ return parser
82
+
83
+
84
+ def main(argv: list[str] | None = None) -> None:
85
+ """Console entry point for ``dataforge-mcp``."""
86
+ parser = _build_parser()
87
+ args = parser.parse_args(argv)
88
+ if args.command is None:
89
+ parser.print_help()
90
+ raise SystemExit(0)
91
+ if args.command == "serve":
92
+ serve(
93
+ transport=args.transport,
94
+ host=args.host,
95
+ port=args.port,
96
+ enable_apply=args.enable_apply,
97
+ allowed_roots=args.allowed_roots,
98
+ )
99
+ return
100
+ parser.error(f"Unknown command: {args.command}")
101
+
102
+
103
+ if __name__ == "__main__":
104
+ main()
dataforge_mcp/tools.py ADDED
@@ -0,0 +1,396 @@
1
+ """Structured MCP tool functions backed by DataForge's public API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from collections.abc import Sequence
7
+ from pathlib import Path
8
+ from typing import Any, Literal
9
+
10
+ from pydantic import BaseModel, Field
11
+
12
+ from dataforge import (
13
+ CONTRACT_VERSION,
14
+ CellFix,
15
+ Issue,
16
+ ProposedFix,
17
+ RepairPipelineRequest,
18
+ SafetyContext,
19
+ SafetyFilter,
20
+ SafetyVerdict,
21
+ Schema,
22
+ SMTVerifier,
23
+ TransactionLogError,
24
+ VerificationVerdict,
25
+ VerifiedFix,
26
+ load_schema,
27
+ read_csv,
28
+ revert_transaction,
29
+ run_all_detectors,
30
+ run_repair_pipeline,
31
+ )
32
+
33
+ _APPLY_ENABLED = False
34
+ _ALLOWED_ROOTS: tuple[Path, ...] | None = None
35
+
36
+
37
+ class IssueResult(BaseModel):
38
+ """MCP-safe representation of a DataForge issue."""
39
+
40
+ row: int
41
+ column: str
42
+ issue_type: str
43
+ severity: str
44
+ confidence: float
45
+ expected: str | None
46
+ actual: str
47
+ reason: str
48
+
49
+
50
+ class FixResult(BaseModel):
51
+ """MCP-safe representation of an accepted repair proposal."""
52
+
53
+ row: int
54
+ column: str
55
+ old_value: str
56
+ new_value: str
57
+ detector_id: str
58
+ operation: str
59
+ reason: str
60
+ confidence: float
61
+ provenance: str
62
+
63
+
64
+ class ProfileResult(BaseModel):
65
+ """Structured result returned by the profile tool."""
66
+
67
+ path: str
68
+ rows: int
69
+ columns: int
70
+ column_names: list[str]
71
+ total_issues: int
72
+ issues: list[IssueResult]
73
+
74
+
75
+ class VerifyFixResult(BaseModel):
76
+ """Structured result returned by the fix verifier tool."""
77
+
78
+ accept: bool
79
+ reason: str
80
+ safety_verdict: str | None = None
81
+ verifier_verdict: str | None = None
82
+ unsat_core: list[str] = Field(default_factory=list)
83
+
84
+
85
+ class TxnReceipt(BaseModel):
86
+ """Structured receipt returned by the repair tool."""
87
+
88
+ path: str
89
+ schema_version: Literal["repair_receipt_v1"] = "repair_receipt_v1"
90
+ receipt_version: Literal["repair_receipt_v1"] = "repair_receipt_v1"
91
+ mode: Literal["dry_run", "apply"]
92
+ contract_version: str = CONTRACT_VERSION
93
+ applied: bool
94
+ txn_id: str | None
95
+ reversible: bool
96
+ source_sha256: str
97
+ post_sha256: str | None = None
98
+ safety_verdict: str
99
+ verifier_verdict: str
100
+ patch_plan_sha256: str | None = None
101
+ revert_command: str | None = None
102
+ allowed_columns: list[str]
103
+ valid_rows: list[int]
104
+ root_causes: list[dict[str, Any]] = Field(default_factory=list)
105
+ candidate_repairs: list[dict[str, Any]] = Field(default_factory=list)
106
+ proof_obligations: list[dict[str, Any]] = Field(default_factory=list)
107
+ limitations: list[str] = Field(default_factory=list)
108
+ issues_count: int
109
+ fixes_count: int
110
+ reason: str
111
+ fixes: list[FixResult]
112
+
113
+
114
+ class RevertReceipt(BaseModel):
115
+ """Structured receipt returned by the revert tool."""
116
+
117
+ txn_id: str
118
+ source_path: str
119
+ restored: bool
120
+ reverted_at: str | None
121
+ reason: str
122
+
123
+
124
+ def configure_mcp_security(
125
+ *,
126
+ enable_apply: bool = False,
127
+ allowed_roots: Sequence[str | Path] | None = None,
128
+ ) -> None:
129
+ """Configure process-wide MCP path and apply safety settings."""
130
+ global _APPLY_ENABLED, _ALLOWED_ROOTS
131
+ _APPLY_ENABLED = enable_apply
132
+ if allowed_roots is None:
133
+ _ALLOWED_ROOTS = None
134
+ return
135
+ _ALLOWED_ROOTS = tuple(Path(root).expanduser().resolve() for root in allowed_roots)
136
+
137
+
138
+ def _env_flag_enabled(name: str) -> bool:
139
+ """Return whether an environment flag is truthy."""
140
+ return os.environ.get(name, "").strip().lower() in {"1", "true", "yes", "on"}
141
+
142
+
143
+ def _apply_is_enabled() -> bool:
144
+ """Return whether MCP apply mode is explicitly enabled."""
145
+ return _APPLY_ENABLED or _env_flag_enabled("DATAFORGE_MCP_ENABLE_APPLY")
146
+
147
+
148
+ def _allowed_roots() -> tuple[Path, ...]:
149
+ """Return configured allowed filesystem roots for MCP file access."""
150
+ raw_roots = os.environ.get("DATAFORGE_MCP_ALLOWED_ROOTS", "")
151
+ if raw_roots.strip():
152
+ return tuple(
153
+ Path(root).expanduser().resolve()
154
+ for root in raw_roots.split(os.pathsep)
155
+ if root.strip()
156
+ )
157
+ if _ALLOWED_ROOTS is not None:
158
+ return _ALLOWED_ROOTS
159
+ return (Path.cwd().resolve(),)
160
+
161
+
162
+ def _ensure_under_allowed_root(path: Path) -> Path:
163
+ """Reject paths outside the configured MCP allowlist."""
164
+ resolved = path.expanduser().resolve()
165
+ roots = _allowed_roots()
166
+ if not roots:
167
+ raise ValueError("At least one MCP allowed root must be configured.")
168
+ for root in roots:
169
+ if resolved == root or resolved.is_relative_to(root):
170
+ return resolved
171
+ allowed = ", ".join(str(root) for root in roots)
172
+ raise ValueError(
173
+ f"Path is outside configured MCP allowed roots: {resolved}. Allowed: {allowed}"
174
+ )
175
+
176
+
177
+ def _resolve_csv_path(path: str) -> Path:
178
+ """Resolve and validate a CSV path supplied by an MCP client."""
179
+ resolved = _ensure_under_allowed_root(Path(path))
180
+ if not resolved.exists():
181
+ raise ValueError(f"CSV file does not exist: {resolved}")
182
+ if not resolved.is_file():
183
+ raise ValueError(f"CSV path is not a file: {resolved}")
184
+ return resolved
185
+
186
+
187
+ def _load_optional_schema(raw_path: object) -> Schema | None:
188
+ """Load an optional schema path from an untrusted payload."""
189
+ if raw_path is None:
190
+ return None
191
+ schema_path = _ensure_under_allowed_root(Path(str(raw_path)))
192
+ if not schema_path.exists():
193
+ raise ValueError(f"Schema file does not exist: {schema_path}")
194
+ return load_schema(schema_path)
195
+
196
+
197
+ def _issue_to_result(issue: Issue) -> IssueResult:
198
+ """Convert a DataForge issue into a stable MCP payload."""
199
+ return IssueResult(
200
+ row=issue.row,
201
+ column=issue.column,
202
+ issue_type=issue.issue_type,
203
+ severity=issue.severity.value,
204
+ confidence=issue.confidence,
205
+ expected=issue.expected,
206
+ actual=issue.actual,
207
+ reason=issue.reason,
208
+ )
209
+
210
+
211
+ def _fix_to_result(proposed_fix: ProposedFix) -> FixResult:
212
+ """Convert a proposed fix into a stable MCP payload."""
213
+ fix = proposed_fix.fix
214
+ return FixResult(
215
+ row=fix.row,
216
+ column=fix.column,
217
+ old_value=fix.old_value,
218
+ new_value=fix.new_value,
219
+ detector_id=fix.detector_id,
220
+ operation=fix.operation,
221
+ reason=proposed_fix.reason,
222
+ confidence=proposed_fix.confidence,
223
+ provenance=proposed_fix.provenance,
224
+ )
225
+
226
+
227
+ def _verified_fix_to_result(verified_fix: VerifiedFix) -> FixResult:
228
+ """Convert a public engine verified fix into a stable MCP payload."""
229
+ return FixResult(
230
+ row=verified_fix.row,
231
+ column=verified_fix.column,
232
+ old_value=verified_fix.old_value,
233
+ new_value=verified_fix.new_value,
234
+ detector_id=verified_fix.detector_id,
235
+ operation=verified_fix.operation,
236
+ reason=verified_fix.reason,
237
+ confidence=verified_fix.confidence,
238
+ provenance=verified_fix.provenance,
239
+ )
240
+
241
+
242
+ def _run_detection(path: Path, schema: Schema | None = None) -> tuple[Any, list[Issue]]:
243
+ """Read a CSV and run all DataForge detectors."""
244
+ df = read_csv(path)
245
+ return df, run_all_detectors(df, schema)
246
+
247
+
248
+ def _proposed_fix_from_spec(fix_spec: dict[str, Any]) -> tuple[Path, Schema | None, ProposedFix]:
249
+ """Parse a verifier payload into a CSV path, optional schema, and fix."""
250
+ raw_path = fix_spec.get("path")
251
+ if not raw_path:
252
+ raise ValueError("fix_spec must include a CSV 'path'.")
253
+ path = _resolve_csv_path(str(raw_path))
254
+ schema = _load_optional_schema(fix_spec.get("schema_path"))
255
+ raw_fix = fix_spec.get("fix")
256
+ if not isinstance(raw_fix, dict):
257
+ raw_fix = {
258
+ key: value
259
+ for key, value in fix_spec.items()
260
+ if key in {"row", "column", "old_value", "new_value", "detector_id", "operation"}
261
+ }
262
+ cell_fix = CellFix.model_validate(raw_fix)
263
+ proposed = ProposedFix(
264
+ fix=cell_fix,
265
+ reason=str(fix_spec.get("reason", "MCP-provided candidate fix.")),
266
+ confidence=float(fix_spec.get("confidence", 1.0)),
267
+ provenance=fix_spec.get("provenance", "deterministic"),
268
+ )
269
+ return path, schema, proposed
270
+
271
+
272
+ def dataforge_profile(path: str) -> ProfileResult:
273
+ """Profile a CSV file and return detected DataForge issues."""
274
+ csv_path = _resolve_csv_path(path)
275
+ df, issues = _run_detection(csv_path)
276
+ return ProfileResult(
277
+ path=str(csv_path),
278
+ rows=len(df.index),
279
+ columns=len(df.columns),
280
+ column_names=[str(column) for column in df.columns],
281
+ total_issues=len(issues),
282
+ issues=[_issue_to_result(issue) for issue in issues],
283
+ )
284
+
285
+
286
+ def dataforge_detect_errors(path: str) -> list[IssueResult]:
287
+ """Detect data-quality errors in a CSV file."""
288
+ csv_path = _resolve_csv_path(path)
289
+ _df, issues = _run_detection(csv_path)
290
+ return [_issue_to_result(issue) for issue in issues]
291
+
292
+
293
+ def dataforge_verify_fix(fix_spec: dict[str, Any]) -> VerifyFixResult:
294
+ """Verify whether one candidate fix may be accepted by DataForge gates."""
295
+ path, schema, proposed = _proposed_fix_from_spec(fix_spec)
296
+ df = read_csv(path)
297
+ fix = proposed.fix
298
+ if fix.column not in df.columns:
299
+ return VerifyFixResult(accept=False, reason=f"Column '{fix.column}' does not exist.")
300
+ if fix.row < 0 or fix.row >= len(df.index):
301
+ return VerifyFixResult(accept=False, reason=f"Row {fix.row} is out of bounds.")
302
+ current_value = str(df.at[fix.row, fix.column])
303
+ if current_value != fix.old_value:
304
+ return VerifyFixResult(
305
+ accept=False,
306
+ reason=(
307
+ f"Refusing stale fix for row {fix.row}, column '{fix.column}': "
308
+ f"expected '{fix.old_value}', found '{current_value}'."
309
+ ),
310
+ )
311
+
312
+ safety_result = SafetyFilter().evaluate(proposed, schema, SafetyContext())
313
+ if safety_result.verdict != SafetyVerdict.ALLOW:
314
+ return VerifyFixResult(
315
+ accept=False,
316
+ reason=safety_result.reason,
317
+ safety_verdict=safety_result.verdict.value,
318
+ )
319
+
320
+ verifier_result = SMTVerifier().verify(df, [proposed], schema)
321
+ return VerifyFixResult(
322
+ accept=verifier_result.verdict == VerificationVerdict.ACCEPT,
323
+ reason=verifier_result.reason,
324
+ safety_verdict=safety_result.verdict.value,
325
+ verifier_verdict=verifier_result.verdict.value,
326
+ unsat_core=list(verifier_result.unsat_core),
327
+ )
328
+
329
+
330
+ def dataforge_apply_repairs(path: str, mode: Literal["dry_run", "apply"]) -> TxnReceipt:
331
+ """Detect, verify, and optionally apply DataForge repairs to a CSV file."""
332
+ csv_path = _resolve_csv_path(path)
333
+ if mode not in {"dry_run", "apply"}:
334
+ raise ValueError("mode must be 'dry_run' or 'apply'.")
335
+ if mode == "apply" and not _apply_is_enabled():
336
+ raise ValueError(
337
+ "MCP apply mode is disabled. Start the server with --enable-apply or set "
338
+ "DATAFORGE_MCP_ENABLE_APPLY=1."
339
+ )
340
+
341
+ result = run_repair_pipeline(
342
+ RepairPipelineRequest(
343
+ source_path=csv_path,
344
+ mode=mode,
345
+ schema=None,
346
+ allow_llm=False,
347
+ )
348
+ )
349
+ receipt = result.receipt
350
+ return TxnReceipt(
351
+ path=str(csv_path),
352
+ mode=mode,
353
+ applied=receipt.applied,
354
+ txn_id=receipt.txn_id,
355
+ reversible=receipt.reversible,
356
+ source_sha256=receipt.source_sha256,
357
+ post_sha256=receipt.post_sha256,
358
+ safety_verdict=receipt.safety_verdict,
359
+ verifier_verdict=receipt.verifier_verdict,
360
+ patch_plan_sha256=receipt.patch_plan_sha256,
361
+ revert_command=receipt.revert_command,
362
+ allowed_columns=receipt.allowed_columns,
363
+ valid_rows=receipt.valid_rows,
364
+ root_causes=[item.model_dump() for item in receipt.root_causes],
365
+ candidate_repairs=[item.model_dump() for item in receipt.candidate_repairs],
366
+ proof_obligations=[item.model_dump() for item in receipt.proof_obligations],
367
+ limitations=receipt.limitations,
368
+ issues_count=receipt.issues_count,
369
+ fixes_count=receipt.fixes_count,
370
+ reason=receipt.reason,
371
+ fixes=[_verified_fix_to_result(fix) for fix in result.fixes],
372
+ )
373
+
374
+
375
+ def dataforge_revert(txn_id: str) -> RevertReceipt:
376
+ """Revert a previously applied DataForge repair transaction."""
377
+ transaction = None
378
+ last_error: Exception | None = None
379
+ for root in _allowed_roots():
380
+ try:
381
+ transaction = revert_transaction(txn_id, search_root=root)
382
+ break
383
+ except TransactionLogError as exc:
384
+ last_error = exc
385
+ continue
386
+ if transaction is None:
387
+ if last_error is not None:
388
+ raise ValueError(str(last_error)) from last_error
389
+ raise ValueError(f"Could not find transaction '{txn_id}' under configured allowed roots.")
390
+ return RevertReceipt(
391
+ txn_id=transaction.txn_id,
392
+ source_path=transaction.source_path,
393
+ restored=transaction.reverted_at is not None,
394
+ reverted_at=transaction.reverted_at.isoformat() if transaction.reverted_at else None,
395
+ reason="Source restored successfully.",
396
+ )