dataforge-07 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataforge/__init__.py +204 -0
- dataforge/__main__.py +5 -0
- dataforge/agent/__init__.py +16 -0
- dataforge/agent/providers.py +259 -0
- dataforge/agent/scratchpad.py +183 -0
- dataforge/agent/tool_actions.py +343 -0
- dataforge/bench/__init__.py +31 -0
- dataforge/bench/core.py +426 -0
- dataforge/bench/groq_client.py +386 -0
- dataforge/bench/methods.py +443 -0
- dataforge/bench/report.py +309 -0
- dataforge/bench/runner.py +247 -0
- dataforge/causal/__init__.py +21 -0
- dataforge/causal/dag.py +174 -0
- dataforge/causal/pc.py +232 -0
- dataforge/causal/root_cause.py +193 -0
- dataforge/cli/__init__.py +50 -0
- dataforge/cli/audit.py +70 -0
- dataforge/cli/bench.py +154 -0
- dataforge/cli/common.py +267 -0
- dataforge/cli/constraints.py +407 -0
- dataforge/cli/profile.py +147 -0
- dataforge/cli/release.py +166 -0
- dataforge/cli/repair.py +407 -0
- dataforge/cli/revert.py +139 -0
- dataforge/cli/watch.py +144 -0
- dataforge/datasets/__init__.py +25 -0
- dataforge/datasets/embedded/hospital/clean.csv +11 -0
- dataforge/datasets/embedded/hospital/dirty.csv +11 -0
- dataforge/datasets/real_world.py +290 -0
- dataforge/datasets/registry.py +103 -0
- dataforge/detectors/__init__.py +80 -0
- dataforge/detectors/base.py +145 -0
- dataforge/detectors/decimal_shift.py +166 -0
- dataforge/detectors/fd_violation.py +157 -0
- dataforge/detectors/type_mismatch.py +173 -0
- dataforge/engine/__init__.py +39 -0
- dataforge/engine/repair.py +905 -0
- dataforge/env/__init__.py +22 -0
- dataforge/env/environment.py +883 -0
- dataforge/env/observation.py +61 -0
- dataforge/env/openenv_core.py +161 -0
- dataforge/env/reward.py +128 -0
- dataforge/env/server.py +176 -0
- dataforge/evaluation_contract.py +76 -0
- dataforge/fixtures/hospital_10rows.csv +11 -0
- dataforge/fixtures/hospital_schema.yaml +17 -0
- dataforge/http/__init__.py +1 -0
- dataforge/http/problem.py +103 -0
- dataforge/integrations/__init__.py +1 -0
- dataforge/integrations/dbt.py +164 -0
- dataforge/observability.py +76 -0
- dataforge/py.typed +1 -0
- dataforge/release/__init__.py +1 -0
- dataforge/release/doctor.py +367 -0
- dataforge/release/full_vision.py +702 -0
- dataforge/release/gate.py +861 -0
- dataforge/release/playground_check.py +411 -0
- dataforge/repair_contract.py +468 -0
- dataforge/repairers/__init__.py +88 -0
- dataforge/repairers/base.py +77 -0
- dataforge/repairers/decimal_shift.py +43 -0
- dataforge/repairers/fd_violation.py +225 -0
- dataforge/repairers/type_mismatch.py +73 -0
- dataforge/safety/__init__.py +5 -0
- dataforge/safety/adversarial/attack_01_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_02_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_03_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_04_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_05_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_06_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_07_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_08_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_09_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_10_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_11_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_12_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_13_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_14_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_15_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_16_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_17_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_18_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_19_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_20_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_21_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_22_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_23_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_24_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_25_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_26_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_27_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_28_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_29_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_30_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_31_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_32_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_33_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_34_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_35_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_36_row_delete.yaml +11 -0
- dataforge/safety/adversarial/attack_37_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_38_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_39_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_40_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_41_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_42_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_43_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_44_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_45_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_46_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_47_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_48_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_49_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_50_row_delete.yaml +7 -0
- dataforge/safety/constitution.py +307 -0
- dataforge/safety/constitutions/default.yaml +40 -0
- dataforge/safety/filter.py +134 -0
- dataforge/schema_inference.py +620 -0
- dataforge/stores/__init__.py +46 -0
- dataforge/stores/base.py +73 -0
- dataforge/stores/cloud.py +78 -0
- dataforge/stores/csv.py +94 -0
- dataforge/stores/duckdb.py +313 -0
- dataforge/stores/patch_plan.py +178 -0
- dataforge/stores/registry.py +82 -0
- dataforge/stores/repair.py +121 -0
- dataforge/stores/revert.py +22 -0
- dataforge/stores/sql.py +27 -0
- dataforge/table.py +228 -0
- dataforge/transactions/__init__.py +34 -0
- dataforge/transactions/files.py +96 -0
- dataforge/transactions/log.py +613 -0
- dataforge/transactions/revert.py +102 -0
- dataforge/transactions/txn.py +104 -0
- dataforge/ui/__init__.py +1 -0
- dataforge/ui/profile_view.py +136 -0
- dataforge/ui/repair_diff.py +91 -0
- dataforge/verifier/__init__.py +55 -0
- dataforge/verifier/constraint_ir.py +155 -0
- dataforge/verifier/explain.py +47 -0
- dataforge/verifier/gate.py +5 -0
- dataforge/verifier/schema.py +111 -0
- dataforge/verifier/smt.py +433 -0
- dataforge_07-0.1.0.dist-info/METADATA +436 -0
- dataforge_07-0.1.0.dist-info/RECORD +150 -0
- dataforge_07-0.1.0.dist-info/WHEEL +5 -0
- dataforge_07-0.1.0.dist-info/entry_points.txt +3 -0
- dataforge_07-0.1.0.dist-info/licenses/LICENSE +176 -0
- dataforge_07-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Filesystem primitives shared by DataForge transaction apply/revert paths."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import os
|
|
7
|
+
import secrets
|
|
8
|
+
import time
|
|
9
|
+
from collections.abc import Iterator
|
|
10
|
+
from contextlib import contextmanager, suppress
|
|
11
|
+
from datetime import UTC, datetime
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SourceLockError(RuntimeError):
|
|
16
|
+
"""Raised when a source file lock cannot be acquired."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def fsync_parent_directory(path: Path) -> None:
|
|
20
|
+
"""Best-effort fsync of a path's parent directory after atomic replacement."""
|
|
21
|
+
parent = path.resolve().parent
|
|
22
|
+
try:
|
|
23
|
+
fd = os.open(parent, os.O_RDONLY)
|
|
24
|
+
except OSError:
|
|
25
|
+
return
|
|
26
|
+
try:
|
|
27
|
+
os.fsync(fd)
|
|
28
|
+
finally:
|
|
29
|
+
os.close(fd)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def atomic_write_bytes(path: Path, payload: bytes) -> None:
|
|
33
|
+
"""Write bytes to ``path`` through an atomic same-directory replacement."""
|
|
34
|
+
resolved = path.resolve()
|
|
35
|
+
resolved.parent.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
temp_path = resolved.with_name(f".{resolved.name}.{secrets.token_hex(8)}.tmp")
|
|
37
|
+
try:
|
|
38
|
+
with temp_path.open("xb") as handle:
|
|
39
|
+
handle.write(payload)
|
|
40
|
+
handle.flush()
|
|
41
|
+
os.fsync(handle.fileno())
|
|
42
|
+
os.replace(temp_path, resolved)
|
|
43
|
+
fsync_parent_directory(resolved)
|
|
44
|
+
finally:
|
|
45
|
+
if temp_path.exists():
|
|
46
|
+
temp_path.unlink()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def lock_path_for(source_path: Path) -> Path:
|
|
50
|
+
"""Return the filesystem lock path for a source file."""
|
|
51
|
+
digest = hashlib.sha256(str(source_path.resolve()).encode("utf-8")).hexdigest()[:24]
|
|
52
|
+
return source_path.resolve().parent / ".dataforge" / "locks" / f"{digest}.lock"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@contextmanager
|
|
56
|
+
def source_path_lock(
|
|
57
|
+
source_path: Path,
|
|
58
|
+
*,
|
|
59
|
+
timeout_seconds: float = 5.0,
|
|
60
|
+
stale_after_seconds: float = 300.0,
|
|
61
|
+
) -> Iterator[None]:
|
|
62
|
+
"""Acquire an exclusive lock for a source path using an atomic lock file."""
|
|
63
|
+
lock_path = lock_path_for(source_path)
|
|
64
|
+
lock_path.parent.mkdir(parents=True, exist_ok=True)
|
|
65
|
+
deadline = time.monotonic() + timeout_seconds
|
|
66
|
+
while True:
|
|
67
|
+
try:
|
|
68
|
+
fd = os.open(lock_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
|
69
|
+
try:
|
|
70
|
+
payload = f"{os.getpid()} {datetime.now(UTC).isoformat()}\n".encode()
|
|
71
|
+
os.write(fd, payload)
|
|
72
|
+
finally:
|
|
73
|
+
os.close(fd)
|
|
74
|
+
break
|
|
75
|
+
except FileExistsError as exc:
|
|
76
|
+
try:
|
|
77
|
+
age = time.time() - lock_path.stat().st_mtime
|
|
78
|
+
except OSError:
|
|
79
|
+
age = 0.0
|
|
80
|
+
if age > stale_after_seconds:
|
|
81
|
+
try:
|
|
82
|
+
lock_path.unlink()
|
|
83
|
+
continue
|
|
84
|
+
except OSError:
|
|
85
|
+
pass
|
|
86
|
+
if time.monotonic() >= deadline:
|
|
87
|
+
raise SourceLockError(
|
|
88
|
+
f"Timed out waiting for DataForge source lock: {source_path.resolve()}"
|
|
89
|
+
) from exc
|
|
90
|
+
time.sleep(0.05)
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
yield
|
|
94
|
+
finally:
|
|
95
|
+
with suppress(FileNotFoundError):
|
|
96
|
+
lock_path.unlink()
|