dataforge-07 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataforge/__init__.py +204 -0
- dataforge/__main__.py +5 -0
- dataforge/agent/__init__.py +16 -0
- dataforge/agent/providers.py +259 -0
- dataforge/agent/scratchpad.py +183 -0
- dataforge/agent/tool_actions.py +343 -0
- dataforge/bench/__init__.py +31 -0
- dataforge/bench/core.py +426 -0
- dataforge/bench/groq_client.py +386 -0
- dataforge/bench/methods.py +443 -0
- dataforge/bench/report.py +309 -0
- dataforge/bench/runner.py +247 -0
- dataforge/causal/__init__.py +21 -0
- dataforge/causal/dag.py +174 -0
- dataforge/causal/pc.py +232 -0
- dataforge/causal/root_cause.py +193 -0
- dataforge/cli/__init__.py +50 -0
- dataforge/cli/audit.py +70 -0
- dataforge/cli/bench.py +154 -0
- dataforge/cli/common.py +267 -0
- dataforge/cli/constraints.py +407 -0
- dataforge/cli/profile.py +147 -0
- dataforge/cli/release.py +166 -0
- dataforge/cli/repair.py +407 -0
- dataforge/cli/revert.py +139 -0
- dataforge/cli/watch.py +144 -0
- dataforge/datasets/__init__.py +25 -0
- dataforge/datasets/embedded/hospital/clean.csv +11 -0
- dataforge/datasets/embedded/hospital/dirty.csv +11 -0
- dataforge/datasets/real_world.py +290 -0
- dataforge/datasets/registry.py +103 -0
- dataforge/detectors/__init__.py +80 -0
- dataforge/detectors/base.py +145 -0
- dataforge/detectors/decimal_shift.py +166 -0
- dataforge/detectors/fd_violation.py +157 -0
- dataforge/detectors/type_mismatch.py +173 -0
- dataforge/engine/__init__.py +39 -0
- dataforge/engine/repair.py +905 -0
- dataforge/env/__init__.py +22 -0
- dataforge/env/environment.py +883 -0
- dataforge/env/observation.py +61 -0
- dataforge/env/openenv_core.py +161 -0
- dataforge/env/reward.py +128 -0
- dataforge/env/server.py +176 -0
- dataforge/evaluation_contract.py +76 -0
- dataforge/fixtures/hospital_10rows.csv +11 -0
- dataforge/fixtures/hospital_schema.yaml +17 -0
- dataforge/http/__init__.py +1 -0
- dataforge/http/problem.py +103 -0
- dataforge/integrations/__init__.py +1 -0
- dataforge/integrations/dbt.py +164 -0
- dataforge/observability.py +76 -0
- dataforge/py.typed +1 -0
- dataforge/release/__init__.py +1 -0
- dataforge/release/doctor.py +367 -0
- dataforge/release/full_vision.py +702 -0
- dataforge/release/gate.py +861 -0
- dataforge/release/playground_check.py +411 -0
- dataforge/repair_contract.py +468 -0
- dataforge/repairers/__init__.py +88 -0
- dataforge/repairers/base.py +77 -0
- dataforge/repairers/decimal_shift.py +43 -0
- dataforge/repairers/fd_violation.py +225 -0
- dataforge/repairers/type_mismatch.py +73 -0
- dataforge/safety/__init__.py +5 -0
- dataforge/safety/adversarial/attack_01_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_02_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_03_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_04_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_05_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_06_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_07_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_08_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_09_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_10_phone_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_11_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_12_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_13_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_14_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_15_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_16_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_17_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_18_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_19_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_20_ssn_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_21_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_22_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_23_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_24_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_25_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_26_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_27_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_28_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_29_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_30_email_pii.yaml +8 -0
- dataforge/safety/adversarial/attack_31_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_32_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_33_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_34_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_35_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_36_row_delete.yaml +11 -0
- dataforge/safety/adversarial/attack_37_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_38_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_39_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_40_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_41_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_42_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_43_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_44_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_45_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_46_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_47_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_48_row_delete.yaml +7 -0
- dataforge/safety/adversarial/attack_49_row_delete.yaml +8 -0
- dataforge/safety/adversarial/attack_50_row_delete.yaml +7 -0
- dataforge/safety/constitution.py +307 -0
- dataforge/safety/constitutions/default.yaml +40 -0
- dataforge/safety/filter.py +134 -0
- dataforge/schema_inference.py +620 -0
- dataforge/stores/__init__.py +46 -0
- dataforge/stores/base.py +73 -0
- dataforge/stores/cloud.py +78 -0
- dataforge/stores/csv.py +94 -0
- dataforge/stores/duckdb.py +313 -0
- dataforge/stores/patch_plan.py +178 -0
- dataforge/stores/registry.py +82 -0
- dataforge/stores/repair.py +121 -0
- dataforge/stores/revert.py +22 -0
- dataforge/stores/sql.py +27 -0
- dataforge/table.py +228 -0
- dataforge/transactions/__init__.py +34 -0
- dataforge/transactions/files.py +96 -0
- dataforge/transactions/log.py +613 -0
- dataforge/transactions/revert.py +102 -0
- dataforge/transactions/txn.py +104 -0
- dataforge/ui/__init__.py +1 -0
- dataforge/ui/profile_view.py +136 -0
- dataforge/ui/repair_diff.py +91 -0
- dataforge/verifier/__init__.py +55 -0
- dataforge/verifier/constraint_ir.py +155 -0
- dataforge/verifier/explain.py +47 -0
- dataforge/verifier/gate.py +5 -0
- dataforge/verifier/schema.py +111 -0
- dataforge/verifier/smt.py +433 -0
- dataforge_07-0.1.0.dist-info/METADATA +436 -0
- dataforge_07-0.1.0.dist-info/RECORD +150 -0
- dataforge_07-0.1.0.dist-info/WHEEL +5 -0
- dataforge_07-0.1.0.dist-info/entry_points.txt +3 -0
- dataforge_07-0.1.0.dist-info/licenses/LICENSE +176 -0
- dataforge_07-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dataforge_07
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: DataForge: CLI-first data-quality detection and reversible repair for tabular data.
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Project-URL: Homepage, https://github.com/Aegis15/dataforge
|
|
7
|
+
Project-URL: Repository, https://github.com/Aegis15/dataforge
|
|
8
|
+
Project-URL: Documentation, https://dataforge.praneshrajan15.workers.dev/playground
|
|
9
|
+
Keywords: data-quality,ai-agent,llm,rl,smt,dbt
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Requires-Python: <3.13,>=3.11
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
License-File: LICENSE
|
|
16
|
+
Requires-Dist: pydantic>=2.7
|
|
17
|
+
Requires-Dist: typer<0.25,>=0.24
|
|
18
|
+
Requires-Dist: rich>=13.7
|
|
19
|
+
Requires-Dist: textual<9,>=8.2
|
|
20
|
+
Requires-Dist: z3-solver>=4.13
|
|
21
|
+
Requires-Dist: pyyaml>=6.0
|
|
22
|
+
Requires-Dist: pandas>=2.2
|
|
23
|
+
Requires-Dist: httpx>=0.27
|
|
24
|
+
Requires-Dist: python-dotenv>=1.0
|
|
25
|
+
Provides-Extra: bench
|
|
26
|
+
Requires-Dist: pandas>=2.2; extra == "bench"
|
|
27
|
+
Requires-Dist: httpx>=0.27; extra == "bench"
|
|
28
|
+
Requires-Dist: tenacity>=8.3; extra == "bench"
|
|
29
|
+
Requires-Dist: python-dotenv>=1.0; extra == "bench"
|
|
30
|
+
Requires-Dist: pyarrow>=16.0; extra == "bench"
|
|
31
|
+
Provides-Extra: causal
|
|
32
|
+
Requires-Dist: pandas>=2.2; extra == "causal"
|
|
33
|
+
Requires-Dist: numpy>=1.26; extra == "causal"
|
|
34
|
+
Requires-Dist: networkx>=3.3; extra == "causal"
|
|
35
|
+
Requires-Dist: causal-learn>=0.1.4; extra == "causal"
|
|
36
|
+
Requires-Dist: hyppo>=0.5.2; extra == "causal"
|
|
37
|
+
Requires-Dist: scipy>=1.13; extra == "causal"
|
|
38
|
+
Provides-Extra: dev
|
|
39
|
+
Requires-Dist: pytest>=9.0.3; extra == "dev"
|
|
40
|
+
Requires-Dist: pytest-cov>=5.0; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-benchmark>=4.0; extra == "dev"
|
|
42
|
+
Requires-Dist: pytest-xdist>=3.6; extra == "dev"
|
|
43
|
+
Requires-Dist: hypothesis>=6.100; extra == "dev"
|
|
44
|
+
Requires-Dist: mutmut>=3.5; extra == "dev"
|
|
45
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
46
|
+
Requires-Dist: pip-audit<3,>=2.10; extra == "dev"
|
|
47
|
+
Requires-Dist: cyclonedx-bom<8,>=7.3; extra == "dev"
|
|
48
|
+
Requires-Dist: cryptography>=46.0.7; extra == "dev"
|
|
49
|
+
Requires-Dist: idna>=3.15; extra == "dev"
|
|
50
|
+
Requires-Dist: pip>=26.1.1; extra == "dev"
|
|
51
|
+
Requires-Dist: urllib3>=2.7; extra == "dev"
|
|
52
|
+
Requires-Dist: ruff>=0.11; extra == "dev"
|
|
53
|
+
Requires-Dist: mypy>=1.10; extra == "dev"
|
|
54
|
+
Requires-Dist: pandas-stubs>=2.2; extra == "dev"
|
|
55
|
+
Requires-Dist: types-PyYAML; extra == "dev"
|
|
56
|
+
Requires-Dist: huggingface_hub==1.13.0; extra == "dev"
|
|
57
|
+
Requires-Dist: httpx>=0.27; extra == "dev"
|
|
58
|
+
Requires-Dist: tenacity>=8.3; extra == "dev"
|
|
59
|
+
Requires-Dist: python-dotenv>=1.0; extra == "dev"
|
|
60
|
+
Requires-Dist: pyarrow>=16.0; extra == "dev"
|
|
61
|
+
Requires-Dist: networkx>=3.3; extra == "dev"
|
|
62
|
+
Requires-Dist: causal-learn>=0.1.4; extra == "dev"
|
|
63
|
+
Requires-Dist: hyppo>=0.5.2; extra == "dev"
|
|
64
|
+
Requires-Dist: scipy>=1.13; extra == "dev"
|
|
65
|
+
Requires-Dist: sqlglot>=25.0; extra == "dev"
|
|
66
|
+
Requires-Dist: duckdb>=1.0; extra == "dev"
|
|
67
|
+
Provides-Extra: train
|
|
68
|
+
Requires-Dist: trl==1.4.0; extra == "train"
|
|
69
|
+
Requires-Dist: transformers==5.7.0; extra == "train"
|
|
70
|
+
Requires-Dist: accelerate==1.13.0; extra == "train"
|
|
71
|
+
Requires-Dist: peft==0.19.1; extra == "train"
|
|
72
|
+
Requires-Dist: bitsandbytes==0.49.2; extra == "train"
|
|
73
|
+
Requires-Dist: datasets==4.8.5; extra == "train"
|
|
74
|
+
Requires-Dist: huggingface_hub==1.13.0; extra == "train"
|
|
75
|
+
Requires-Dist: pyyaml==6.0.3; extra == "train"
|
|
76
|
+
Requires-Dist: pandas==2.3.3; extra == "train"
|
|
77
|
+
Requires-Dist: tensorboard==2.20.0; extra == "train"
|
|
78
|
+
Provides-Extra: eval
|
|
79
|
+
Requires-Dist: matplotlib>=3.9; extra == "eval"
|
|
80
|
+
Requires-Dist: seaborn>=0.13; extra == "eval"
|
|
81
|
+
Provides-Extra: providers
|
|
82
|
+
Requires-Dist: httpx>=0.27; extra == "providers"
|
|
83
|
+
Requires-Dist: tenacity>=8.3; extra == "providers"
|
|
84
|
+
Requires-Dist: python-dotenv>=1.0; extra == "providers"
|
|
85
|
+
Provides-Extra: pandas
|
|
86
|
+
Requires-Dist: pandas>=2.2; extra == "pandas"
|
|
87
|
+
Provides-Extra: playground
|
|
88
|
+
Requires-Dist: pandas>=2.2; extra == "playground"
|
|
89
|
+
Requires-Dist: fastapi>=0.136.1; extra == "playground"
|
|
90
|
+
Requires-Dist: starlette<2,>=1.0.1; extra == "playground"
|
|
91
|
+
Requires-Dist: uvicorn[standard]>=0.35; extra == "playground"
|
|
92
|
+
Requires-Dist: python-multipart>=0.0.27; extra == "playground"
|
|
93
|
+
Requires-Dist: slowapi>=0.1.9; extra == "playground"
|
|
94
|
+
Provides-Extra: openenv
|
|
95
|
+
Requires-Dist: pandas>=2.2; extra == "openenv"
|
|
96
|
+
Requires-Dist: openenv-core[core]>=0.2.2; extra == "openenv"
|
|
97
|
+
Requires-Dist: authlib!=1.7.0,>=1.7.1; extra == "openenv"
|
|
98
|
+
Requires-Dist: cryptography>=46.0.7; extra == "openenv"
|
|
99
|
+
Requires-Dist: duckdb>=1.0; extra == "openenv"
|
|
100
|
+
Requires-Dist: sqlglot>=25.0; extra == "openenv"
|
|
101
|
+
Requires-Dist: scipy>=1.13; extra == "openenv"
|
|
102
|
+
Requires-Dist: networkx>=3.3; extra == "openenv"
|
|
103
|
+
Requires-Dist: causal-learn>=0.1.4; extra == "openenv"
|
|
104
|
+
Requires-Dist: hyppo>=0.5.2; extra == "openenv"
|
|
105
|
+
Provides-Extra: all
|
|
106
|
+
Requires-Dist: dataforge_07[bench,causal,dev,eval,openenv,pandas,playground,providers,train]; extra == "all"
|
|
107
|
+
Dynamic: license-file
|
|
108
|
+
|
|
109
|
+
# DataForge
|
|
110
|
+
|
|
111
|
+
DataForge is a CLI-first data-quality repair toolkit for tabular data. It
|
|
112
|
+
detects common CSV issues, proposes deterministic repairs, checks proposed
|
|
113
|
+
changes through safety and verification gates, and records applied changes in a
|
|
114
|
+
reversible transaction log.
|
|
115
|
+
|
|
116
|
+
The final public product name is DataForge. The PyPI/TestPyPI distribution
|
|
117
|
+
family is `dataforge_07*` because the unqualified `dataforge` project name is
|
|
118
|
+
occupied by unrelated packages. Installing `dataforge_07` still provides the
|
|
119
|
+
`dataforge` import namespace and `dataforge` CLI. `dataforge15` is only a
|
|
120
|
+
temporary staging alias retained for local compatibility.
|
|
121
|
+
|
|
122
|
+
The current repository is an alpha implementation. It also contains the
|
|
123
|
+
OpenEnv-compatible training environment, the SFT warmup workflow, a local MCP
|
|
124
|
+
server package, and playground/demo sources. Warehouse integrations and
|
|
125
|
+
production model-quality claims remain future work.
|
|
126
|
+
|
|
127
|
+
Before any public release, review `THREAT_MODEL.md` and `docs/docs/release.md`.
|
|
128
|
+
They define the security, supply-chain, and evidence gates that separate the
|
|
129
|
+
current alpha from the full original DataForge vision.
|
|
130
|
+
|
|
131
|
+
## Current Status
|
|
132
|
+
|
|
133
|
+
Shipped in the current worktree:
|
|
134
|
+
|
|
135
|
+
- `dataforge profile`, `dataforge repair`, `dataforge revert`,
|
|
136
|
+
`dataforge watch`, `dataforge audit`, and `dataforge bench`
|
|
137
|
+
- Three detector families: `type_mismatch`, `decimal_shift`, `fd_violation`
|
|
138
|
+
- Reviewable schema inference in `profile --json`, including inferred column
|
|
139
|
+
types, domains, regex candidates, uniqueness, and FD candidates
|
|
140
|
+
- Pending constraint review artifacts via `profile --constraints-out`, which
|
|
141
|
+
can feed repair only after individual candidates are marked accepted
|
|
142
|
+
- Matching deterministic repairers wired through SafetyFilter -> SMTVerifier
|
|
143
|
+
- Backend-neutral `PatchPlan` and `TableStore` contracts for CSV, DuckDB, and
|
|
144
|
+
dry-run-only cloud warehouse boundaries
|
|
145
|
+
- Reversible hash-chained transaction journals with immutable source snapshots
|
|
146
|
+
- Public backend repair engine at `dataforge.engine.repair`
|
|
147
|
+
- Real-world benchmark harness for Hospital, Flights, and Beers
|
|
148
|
+
- OpenEnv-compatible HTTP environment with eight typed actions, including
|
|
149
|
+
read-only `ROOT_CAUSE`
|
|
150
|
+
- Causal root-cause analyzer for cascading data-quality errors
|
|
151
|
+
- Standalone `dataforge-mcp` package exposing DataForge tools over MCP
|
|
152
|
+
- Week 9 SFT oracle trajectory workflow, readiness gate, Kaggle notebook, and
|
|
153
|
+
release verifier
|
|
154
|
+
- Separate Gradio model-demo Space source for the published 0.5B SFT smoke
|
|
155
|
+
checkpoint
|
|
156
|
+
|
|
157
|
+
Not shipped yet:
|
|
158
|
+
|
|
159
|
+
- published `dataforge_07`, `dataforge_07_mcp`, `dataforge_07_evals`,
|
|
160
|
+
`dataforge_07_dbt`, and `dataforge_07_agent_patterns` packages
|
|
161
|
+
- committed production verification for the Cloudflare Workers playground
|
|
162
|
+
- warehouse-native or external adapter packages
|
|
163
|
+
- credentialed Snowflake, BigQuery, or Databricks apply/revert conformance
|
|
164
|
+
- design-partner, pilot-user, or customer validation evidence is not yet claimed
|
|
165
|
+
- A production-quality trained model family
|
|
166
|
+
- Autonomous repair in the playground or model demo
|
|
167
|
+
|
|
168
|
+
## Quickstart
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
python -m pip install -e ".[dev]"
|
|
172
|
+
dataforge profile fixtures/hospital_10rows.csv --schema fixtures/hospital_schema.yaml
|
|
173
|
+
dataforge profile fixtures/hospital_10rows.csv --constraints-out constraints.json
|
|
174
|
+
dataforge constraints review constraints.json
|
|
175
|
+
dataforge repair fixtures/hospital_10rows.csv --schema fixtures/hospital_schema.yaml --dry-run
|
|
176
|
+
dataforge repair fixtures/hospital_10rows.csv --constraints constraints.json --dry-run
|
|
177
|
+
dataforge watch fixtures/hospital_10rows.csv --schema fixtures/hospital_schema.yaml --once --json
|
|
178
|
+
dataforge bench --methods random,heuristic --datasets hospital,flights,beers --seeds 3 --seed-list 0,1,2
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
`dataforge15` remains a temporary staging compatibility alias, but public docs
|
|
182
|
+
and release evidence must use `dataforge_07` for PyPI distribution identity and
|
|
183
|
+
`dataforge` for the installed CLI/import identity.
|
|
184
|
+
|
|
185
|
+
To apply repairs, use `--apply`. Applied repairs write a transaction journal and
|
|
186
|
+
source snapshot before mutating the CSV, so they can be reverted:
|
|
187
|
+
|
|
188
|
+
```bash
|
|
189
|
+
dataforge repair path/to/file.csv --schema path/to/schema.yaml --apply
|
|
190
|
+
dataforge audit <txn-id>
|
|
191
|
+
dataforge revert <txn-id>
|
|
192
|
+
dataforge revert <txn-id> --search-root path/to --json
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Warehouse targets use `warehouse://` URIs and always emit a `patch_plan_v1`
|
|
196
|
+
contract before any mutation. DuckDB is the local conformance backend; cloud
|
|
197
|
+
warehouse adapters are dry-run-only boundaries until credentialed apply,
|
|
198
|
+
audit, and rollback suites are enabled:
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
dataforge repair "warehouse://duckdb?database=dev.duckdb&relation=main.model&row_id=id" --dry-run --json
|
|
202
|
+
dataforge repair "warehouse://snowflake?relation=PUBLIC.MODEL&row_id=ID" --dry-run --json
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
DuckDB `--apply` requires a stable row identity, records the patch plan in the
|
|
206
|
+
transaction journal, and can be reverted through the same `audit` and `revert`
|
|
207
|
+
commands. Snowflake, BigQuery, and Databricks apply are intentionally refused
|
|
208
|
+
until their conformance gates prove reversible transactions.
|
|
209
|
+
|
|
210
|
+
New transaction logs are local tamper-evident hash chains. `dataforge audit`
|
|
211
|
+
verifies the chain head, event order, replayability, and revert prerequisites;
|
|
212
|
+
legacy v1 logs remain replayable but are reported as unverified because they do
|
|
213
|
+
not contain event hashes.
|
|
214
|
+
|
|
215
|
+
## Week 9 SFT Warmup
|
|
216
|
+
|
|
217
|
+
The current SFT workflow builds split-safe `expert_v1` trajectory records from
|
|
218
|
+
dirty/clean CSV diffs. Exact repairs in the primary dataset are labeled
|
|
219
|
+
`oracle_from_clean_diff`, not inferred from Groq, Cerebras, or Gemini teacher
|
|
220
|
+
guesses. Clean train chunks are retained as `finish` examples so the model
|
|
221
|
+
learns when no repair is justified.
|
|
222
|
+
|
|
223
|
+
```powershell
|
|
224
|
+
$env:HF_TOKEN="..."
|
|
225
|
+
.\.venv\Scripts\python.exe scripts\data\build_oracle_sft_trajectories.py
|
|
226
|
+
.\.venv\Scripts\python.exe scripts\data\validate_sft_readiness.py
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
This writes local ignored JSONL at `data/sft_traj/expert_v1.jsonl` and an
|
|
230
|
+
auditable row split at `data/sft_traj/split_manifest.json`. Push the dataset
|
|
231
|
+
bundle only after the readiness gate passes:
|
|
232
|
+
|
|
233
|
+
```powershell
|
|
234
|
+
$env:HF_TOKEN="..."
|
|
235
|
+
.\.venv\Scripts\python.exe scripts\data\build_oracle_sft_trajectories.py --push-to-hub --hf-dataset-repo Praneshrajan15/dataforge-sft-trajectories
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
The current public smoke checkpoint is
|
|
239
|
+
`Praneshrajan15/DataForge-0.5B-SFT`, with trajectories at
|
|
240
|
+
`Praneshrajan15/dataforge-sft-trajectories`. It proves the dataset, Kaggle
|
|
241
|
+
training, merge, evaluation, and Hub upload path; it is not a production
|
|
242
|
+
model-quality claim. Verify release artifacts before citing them:
|
|
243
|
+
|
|
244
|
+
```powershell
|
|
245
|
+
.\.venv\Scripts\python.exe scripts\model\verify_sft_release.py --output eval\results\sft_release_v0_smoke.json
|
|
246
|
+
.\.venv\Scripts\python.exe scripts\model\verify_sft_release.py --min-dataset-records 272 --require-sha-metrics --output eval\results\sft_release_contract_v2_20260515.json
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
## Week 12 GRPO Path
|
|
250
|
+
|
|
251
|
+
The repository now contains a gated GRPO post-training path for free-tier
|
|
252
|
+
experiments:
|
|
253
|
+
|
|
254
|
+
- `training/configs/grpo_05b.yaml` targets `DataForge-0.5B-SFT` -> `DataForge-0.5B-GRPO`.
|
|
255
|
+
- `training/configs/grpo_15b.yaml` requires a verified `DataForge-1.5B-SFT`
|
|
256
|
+
prerequisite before attempting `DataForge-1.5B-GRPO`.
|
|
257
|
+
- `training/rewards/dataforge_reward.py` scores completions locally through the
|
|
258
|
+
`repair_contract_v1` exact-repair contract.
|
|
259
|
+
- `training/kaggle/grpo_kaggle.ipynb` blocks Hub upload unless GRPO beats SFT
|
|
260
|
+
by at least 3 absolute F1 points on `DataForge-Bench-light-verified`.
|
|
261
|
+
|
|
262
|
+
No GRPO checkpoint is described as a quality milestone in this README until
|
|
263
|
+
`scripts/model/verify_grpo_release.py` produces committed verification
|
|
264
|
+
evidence. Refresh benchmark tables only from generated JSON:
|
|
265
|
+
|
|
266
|
+
After GRPO eval evidence exists:
|
|
267
|
+
|
|
268
|
+
```powershell
|
|
269
|
+
.\.venv\Scripts\python.exe scripts\bench\refresh_benchmark_table.py --skip-agent-run --trained-model-json eval\results\grpo_model_comparison.json
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## MCP Server
|
|
273
|
+
|
|
274
|
+
The nested `dataforge-mcp/` source directory builds the standalone
|
|
275
|
+
`dataforge_07_mcp` distribution. It is not published yet, so install it from
|
|
276
|
+
source while release ownership is pending:
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
cd dataforge-mcp
|
|
280
|
+
python -m pip install -e ".[dev]"
|
|
281
|
+
dataforge-mcp serve
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
Tools: `dataforge_profile`, `dataforge_detect_errors`,
|
|
285
|
+
`dataforge_verify_fix`, `dataforge_apply_repairs`, and `dataforge_revert`.
|
|
286
|
+
The default transport is stdio. MCP reads and writes are sandboxed to configured
|
|
287
|
+
allowed roots; dry-run works by default, while apply requires `--enable-apply`.
|
|
288
|
+
Streamable HTTP is available for local experiments.
|
|
289
|
+
|
|
290
|
+
The monorepo `packages/` directory contains the side-package release sources
|
|
291
|
+
for `dataforge_07_evals`, `dataforge_07_dbt`, and
|
|
292
|
+
`dataforge_07_agent_patterns`.
|
|
293
|
+
|
|
294
|
+
## Playground And Model Demo
|
|
295
|
+
|
|
296
|
+
- `playground/api/` is the API backend for the CSV playground. Public Space
|
|
297
|
+
deployments use `dataforge-playground`.
|
|
298
|
+
- `playground/web/` is the static browser UI deployed through Cloudflare
|
|
299
|
+
Workers Static Assets. Its primary workflow is `POST /api/analyze`: upload a
|
|
300
|
+
CSV, review categorical risk and pending inferred constraints, inspect
|
|
301
|
+
verified dry-run repairs and non-repairs, then export a receipt with the
|
|
302
|
+
local CLI apply/audit/revert command shape.
|
|
303
|
+
- The current verified public playground URL is
|
|
304
|
+
`https://dataforge.praneshrajan15.workers.dev/playground`, backed by
|
|
305
|
+
`https://Praneshrajan15-dataforge-playground.hf.space`.
|
|
306
|
+
- That Workers URL is the production playground surface for the full original
|
|
307
|
+
vision; this is the release URL.
|
|
308
|
+
- `playground-model/` is a separate Gradio Space demo for the published
|
|
309
|
+
`DataForge-0.5B-SFT` smoke checkpoint. It accepts small CSV snippets and is
|
|
310
|
+
intentionally limited to demo use.
|
|
311
|
+
|
|
312
|
+
The playground does not persist uploaded files, does not use browser storage,
|
|
313
|
+
does not mutate data in the hosted flow, and does not call an LLM unless a
|
|
314
|
+
backend provider key is explicitly configured.
|
|
315
|
+
|
|
316
|
+
## Benchmark Results
|
|
317
|
+
|
|
318
|
+
<!-- BENCH:START -->
|
|
319
|
+
Generated from `eval/results/agent_comparison.json` (schema `dataforge_benchmark_run_v2`, seeds `0, 1, 2`, git `dbd1bed0a03c`, dirty `true`).
|
|
320
|
+
|
|
321
|
+
| Method | Precision | Recall | F1 | Avg Steps | Quota Units | GPU Hours |
|
|
322
|
+
| --- | --- | --- | --- | --- | --- | --- |
|
|
323
|
+
| heuristic | 0.3167 | 0.3025 | 0.2772 | 374.33 | 0.0000 | 0.0000 |
|
|
324
|
+
| random | 0.0038 | 0.0003 | 0.0005 | 150.33 | 0.0000 | 0.0000 |
|
|
325
|
+
|
|
326
|
+
See `BENCHMARK_REPORT.md` for per-dataset tables, error bars, and citation-only SOTA rows.
|
|
327
|
+
|
|
328
|
+
Dataset bytes are pinned to BigDaMa/raha revision `7be1334b8c7bbdac3f47ef514fb3e1e8c5fc181c` for hospital, flights, beers; dirty/clean SHA-256s are recorded in the JSON metadata.
|
|
329
|
+
<!-- BENCH:END -->
|
|
330
|
+
|
|
331
|
+
## Local Setup
|
|
332
|
+
|
|
333
|
+
```bash
|
|
334
|
+
make setup
|
|
335
|
+
make lint
|
|
336
|
+
make type
|
|
337
|
+
make test
|
|
338
|
+
make backend-gate
|
|
339
|
+
make release-gate
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
Verification works on Linux, macOS, and Windows with Git Bash available for GNU
|
|
343
|
+
Make recipes. Python support is `>=3.11,<3.13`.
|
|
344
|
+
|
|
345
|
+
`profile --constraints-out` writes a strict `constraint_review_v1` JSON artifact.
|
|
346
|
+
Every inferred candidate starts as `pending`; repair ignores pending and
|
|
347
|
+
rejected candidates. In v1, only accepted `column_type`, `domain_bound`, and
|
|
348
|
+
`functional_dependency` candidates affect repair. Accepted regex and uniqueness
|
|
349
|
+
candidates remain review evidence until verifier support is added. Use
|
|
350
|
+
`dataforge constraints review constraints.json` for the Textual review UI, or
|
|
351
|
+
use deterministic CI flags such as `--accept cnd-... --no-tui --json`.
|
|
352
|
+
|
|
353
|
+
`make backend-gate` is the release-quality backend check: lint, format, strict
|
|
354
|
+
mypy, root tests, MCP tests, README truth, benchmark truth, OpenAPI snapshot
|
|
355
|
+
drift, secret scan, dependency audit availability, SBOM generation
|
|
356
|
+
availability, and package build availability for both `dataforge_07` and
|
|
357
|
+
`dataforge_07_mcp`. The gate covers the core `dataforge_07` distribution and
|
|
358
|
+
release surfaces; the historical
|
|
359
|
+
`data_quality_env` namespace remains source-tree regression coverage, not part
|
|
360
|
+
of the `dataforge` wheel or source distribution.
|
|
361
|
+
|
|
362
|
+
Before release, run `scripts/ci/backend_gate.py --require-optional` so
|
|
363
|
+
dependency audit, SBOM generation, and package builds are hard failures rather
|
|
364
|
+
than availability checks.
|
|
365
|
+
|
|
366
|
+
Release doctor scopes:
|
|
367
|
+
|
|
368
|
+
```bash
|
|
369
|
+
dataforge release doctor --core --json
|
|
370
|
+
dataforge release doctor --maintainer-deploy --json
|
|
371
|
+
dataforge release gate --json
|
|
372
|
+
dataforge release full-vision --json
|
|
373
|
+
```
|
|
374
|
+
|
|
375
|
+
`--core` is the default OSS release check. `--maintainer-deploy` additionally
|
|
376
|
+
checks maintainer-specific Hugging Face, Kaggle OAuth plus clean-config Kaggle
|
|
377
|
+
CLI execution, and Cloudflare state.
|
|
378
|
+
`release gate` is the authoritative fresh-user proof: it builds the
|
|
379
|
+
distribution, audits wheel contents, creates a dependency wheelhouse, installs
|
|
380
|
+
with `pip --no-index --find-links`, then runs profile, repair dry-run, apply,
|
|
381
|
+
constraint review, audit, revert, and post-revert audit from outside the source
|
|
382
|
+
checkout.
|
|
383
|
+
|
|
384
|
+
Configure pending trusted publishers for `dataforge_07` on TestPyPI and PyPI
|
|
385
|
+
before tagging. The real PyPI workflow refuses pre-release metadata and should
|
|
386
|
+
only run after trusted publishing, attestations, and fresh-install evidence are
|
|
387
|
+
verified. `dataforge release full-vision --json` is expected to fail until PyPI
|
|
388
|
+
publication evidence, dbt-duckdb proof, not yet met design-partner evidence,
|
|
389
|
+
and model-family evidence are real.
|
|
390
|
+
|
|
391
|
+
Windows setup:
|
|
392
|
+
|
|
393
|
+
```powershell
|
|
394
|
+
winget install -e --id Python.Python.3.12
|
|
395
|
+
winget install -e --id ezwinports.make
|
|
396
|
+
py -3.12 -m venv .venv
|
|
397
|
+
.\.venv\Scripts\Activate.ps1
|
|
398
|
+
python -m pip install -e ".[all]"
|
|
399
|
+
make lint && make type && make test
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
## Environment Variables
|
|
403
|
+
|
|
404
|
+
Provider keys belong in a root `.env` file, which is gitignored and loaded with
|
|
405
|
+
`python-dotenv` where needed.
|
|
406
|
+
|
|
407
|
+
- `GROQ_API_KEY`
|
|
408
|
+
- `GEMINI_API_KEY`
|
|
409
|
+
- `CEREBRAS_API_KEY`
|
|
410
|
+
- `OPENROUTER_API_KEY`
|
|
411
|
+
- `HF_TOKEN`
|
|
412
|
+
|
|
413
|
+
## When DataForge Is The Wrong Tool
|
|
414
|
+
|
|
415
|
+
Do not use DataForge for streaming data, very large warehouse tables, regulated
|
|
416
|
+
workflows where every fix must be human-authored, strict low-latency SLAs, or
|
|
417
|
+
teams already well served by maintained Great Expectations/dbt suites. DataForge
|
|
418
|
+
is currently best suited to local CSV profiling, repair experiments, benchmark
|
|
419
|
+
runs, and training/evaluation research.
|
|
420
|
+
|
|
421
|
+
## Repository Docs
|
|
422
|
+
|
|
423
|
+
- [.cursor/rules/dataforge.md](.cursor/rules/dataforge.md) - always-applied contribution rules
|
|
424
|
+
- [ARCHITECTURE.md](ARCHITECTURE.md) - current system architecture and dependencies
|
|
425
|
+
- [DECISIONS.md](DECISIONS.md) - technical decision log
|
|
426
|
+
- [CONTRIBUTING.md](CONTRIBUTING.md) - workflow and code standards
|
|
427
|
+
- [CLAUDE.md](CLAUDE.md) - living gotcha log for agent sessions
|
|
428
|
+
- [CURSOR_MASTER.md](CURSOR_MASTER.md) - context and prompt pack
|
|
429
|
+
- [META_CONTEXT.md](META_CONTEXT.md) - project meta-context
|
|
430
|
+
- [FILE_STRUCTURE.md](FILE_STRUCTURE.md) - current and planned directory map
|
|
431
|
+
- [SECURITY.md](SECURITY.md) - vulnerability reporting policy
|
|
432
|
+
- [specs/SPEC_TEMPLATE.md](specs/SPEC_TEMPLATE.md) - template for new module specs
|
|
433
|
+
|
|
434
|
+
## License
|
|
435
|
+
|
|
436
|
+
Apache-2.0. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
dataforge/__init__.py,sha256=z39bmphToF2N3hyR9d5C1HyciPwXugTF6Z3Ygy6yGbw,8062
|
|
2
|
+
dataforge/__main__.py,sha256=SgiEikL-JtPIlZOHTZzinJIrmu2Wdz5Ydx746h1iJM0,87
|
|
3
|
+
dataforge/evaluation_contract.py,sha256=uTmVFiZ97uMnMxGv1bmvPwZef-RapOKw4OgG7lQ19Ow,2845
|
|
4
|
+
dataforge/observability.py,sha256=Ut5zDsYn6g_il9DlMNYJkmJNcuxF_lzYJPa2iz_0beE,2494
|
|
5
|
+
dataforge/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
6
|
+
dataforge/repair_contract.py,sha256=sfNs9TrAShzdxvXAt4_eWibvEOgRAzj_W9Fqnev9qjo,16014
|
|
7
|
+
dataforge/schema_inference.py,sha256=81dBO9YVwhC3BMrc9BlqajW7-BlcHl0RZq-TDI6F9XQ,23518
|
|
8
|
+
dataforge/table.py,sha256=z4cmE6CdTw3Nhdm_0aPsMWXApaR4BwbrzQfGXaF5ckU,7402
|
|
9
|
+
dataforge/agent/__init__.py,sha256=QJUtXub5du80WkeG3IoA3xQVdALowOZ4Y5RNZhcpTHI,442
|
|
10
|
+
dataforge/agent/providers.py,sha256=SznYR-5y6EDfFSzsLrQhmTH61NHqg17YROS7yoXceC0,8404
|
|
11
|
+
dataforge/agent/scratchpad.py,sha256=s5eagyceXTyht98ffOsh6N3f1HY6VYwBwUumc6Bk2CY,5592
|
|
12
|
+
dataforge/agent/tool_actions.py,sha256=8QakaaR02YqBxOvpPuRJpbhqHIfrCyDeyDP6cgtqa44,12307
|
|
13
|
+
dataforge/bench/__init__.py,sha256=sTUdBc-IuYlk0KJuXL6SJEDBllFCN0KscYoR9G_0grc,787
|
|
14
|
+
dataforge/bench/core.py,sha256=twecr_se0Ap_DPhPq7lywhTOri7XlHbe6oL1_7IvLZs,15100
|
|
15
|
+
dataforge/bench/groq_client.py,sha256=obF7cgE0LJfrn-LV8r_HTNa60Kdsklu8sf6S3qOQ4Ck,14632
|
|
16
|
+
dataforge/bench/methods.py,sha256=6FXw1_FoJmbOQ3B7L5RwBnmynSGNlnSSGVxY4PoSWOw,15484
|
|
17
|
+
dataforge/bench/report.py,sha256=MZBQYTpNV5BmFw1825_r1BI9B-z_QlbTed8JNt3AgqE,12822
|
|
18
|
+
dataforge/bench/runner.py,sha256=htmMFqqwuV3IhlsnokquPkQMiODkEHOTLuomtjSKEOk,8751
|
|
19
|
+
dataforge/causal/__init__.py,sha256=2_bSxv87jvAvk8IslvfjGPqfcdk1RrfzJR73KGdq1jk,540
|
|
20
|
+
dataforge/causal/dag.py,sha256=JXd_xx1GZA0Jgd0Um2_v4Q_sKnjNXK7vSGsORXjDdPA,5454
|
|
21
|
+
dataforge/causal/pc.py,sha256=-BWYoLbX6pYa4lyz4pa_EDv0sNsagigwVjPGA3kYO-E,8418
|
|
22
|
+
dataforge/causal/root_cause.py,sha256=eOuarfWsLWeBsjaQ83KuuiCMtbWEoArJo_vtkqsYs8U,6181
|
|
23
|
+
dataforge/cli/__init__.py,sha256=oqkgUIZQSYADkK8mNwZOHYbZGOxHrAwvtEBTpyAnVQY,1450
|
|
24
|
+
dataforge/cli/audit.py,sha256=ExYR8kkHlAb_R9T6Uy493t4G_ksRInO28sj5Cdmwm-0,2203
|
|
25
|
+
dataforge/cli/bench.py,sha256=ZKHW1UhbYuTt7lSI_-HdyK4DRWkHC_OhEzrd-vxuCT4,4954
|
|
26
|
+
dataforge/cli/common.py,sha256=EPMW8Z_tz7dxzq2wQPyAqvVxy4c0uXqTOs2WuTpgcqc,9717
|
|
27
|
+
dataforge/cli/constraints.py,sha256=Du4pWGGMME__XKcHBQR0NOhi8PnV0EiCd-XaDBd6qIg,14537
|
|
28
|
+
dataforge/cli/profile.py,sha256=wvptLpid_Ejg9otpfBJOPi1a9EHYPR-eNx7-Rh1xSEE,4918
|
|
29
|
+
dataforge/cli/release.py,sha256=7nlvadOPm21vae3IzJ8zkbE5mUcLB_v0-RLCSmQJZqg,5593
|
|
30
|
+
dataforge/cli/repair.py,sha256=efRE88Ef_Ig0mVfDnFUgoQGHpPc6AsCd_hX2Na6kKOM,14262
|
|
31
|
+
dataforge/cli/revert.py,sha256=FoVZaYmNo6ii-oFCaXsEZO4S1pGW0k4t6t_JhTahswY,4883
|
|
32
|
+
dataforge/cli/watch.py,sha256=43j8YJrF5E36iv3cOvsknDGkDnsB8NsLd8HfhNTYo2I,4625
|
|
33
|
+
dataforge/datasets/__init__.py,sha256=9dt-IhGjWlPsYyyJCJ-aDE-zKx4ltylDNZOoUZgdktc,541
|
|
34
|
+
dataforge/datasets/real_world.py,sha256=j3CxX9hwqvWTvpB5-IK1tsGN1koZK7kzTebI63UWq98,10540
|
|
35
|
+
dataforge/datasets/registry.py,sha256=a1_vIa1PXz2DeQp6wfSx5dt0F1SDSHe303YoaBh4m3o,3559
|
|
36
|
+
dataforge/datasets/embedded/hospital/clean.csv,sha256=AXeUrCeS2dDFitGsTAyDEKcKV1yCG0l8FNgZyjp6bBQ,241
|
|
37
|
+
dataforge/datasets/embedded/hospital/dirty.csv,sha256=4qhHWrTbE4-5hdomSBtpn1PgEUaIcoB_ws3eKtDGleI,244
|
|
38
|
+
dataforge/detectors/__init__.py,sha256=sXK04XbckWkWmD8hBBzZHMWrhh9q_LwYq32ynqPR1Yw,2698
|
|
39
|
+
dataforge/detectors/base.py,sha256=uhYQWAKk7qxPRoDW4dOpjziWq4-7mvbLidl04YCBEfI,5062
|
|
40
|
+
dataforge/detectors/decimal_shift.py,sha256=Zn0ks_iyc8prJo7cGtl3QZMIbrWENS0BL5eBmFocmLY,5748
|
|
41
|
+
dataforge/detectors/fd_violation.py,sha256=xJTocibxHHHp3gB1jBu5tkr-Zl0_unG4NhLQwaYfAvY,5602
|
|
42
|
+
dataforge/detectors/type_mismatch.py,sha256=eOC8DGTGHixl04hw39bArHL3oknxNiWqu_UCeLT-3mM,6244
|
|
43
|
+
dataforge/engine/__init__.py,sha256=O5EWkdD9nHg_Ab7uJZMKBSb81qex3gBtGA7fWT7i8Gw,826
|
|
44
|
+
dataforge/engine/repair.py,sha256=x9FQpdnJakMkGCd2MvIYcs4U1ULtwMdIA8nHdDtZI0k,32455
|
|
45
|
+
dataforge/env/__init__.py,sha256=9QefDEGOhAL6iOhEZwXX9G2goIQHdD1Xic8VtubJ3Ww,725
|
|
46
|
+
dataforge/env/environment.py,sha256=gwjBIAoOzTtetB0uIE9hSNOJ85iEkA5YO03zfs5LoiQ,34938
|
|
47
|
+
dataforge/env/observation.py,sha256=pdtyDWEYX46F099KoDOKpaMqADhQCiuLCKZt6Qlau48,2011
|
|
48
|
+
dataforge/env/openenv_core.py,sha256=vpeDDpfXuTrc-208Y9tAuwYAtBanR0OZdmrvNudT8CM,5620
|
|
49
|
+
dataforge/env/reward.py,sha256=MweozmBMRtCsFxUVaSzAWMCjfo-DAcYHLqHWnZwV9g0,3931
|
|
50
|
+
dataforge/env/server.py,sha256=I5PBtB-Ubn8D6ewqFUYlLqpn3fE-qSWkT9itqtM6WIo,5713
|
|
51
|
+
dataforge/fixtures/hospital_10rows.csv,sha256=VNffXWK7_AXJFQS2ArScmZ4eqwRk0a0MHEM4OqQiZJE,859
|
|
52
|
+
dataforge/fixtures/hospital_schema.yaml,sha256=NxXOOxfIMp3OZ_B3GaPKxYe0Exg_WIH_e0KVK4y6n2E,348
|
|
53
|
+
dataforge/http/__init__.py,sha256=Gf1UPnbzy83id26pnZVVUvgO_zndLqjErKRmSuTwPCM,57
|
|
54
|
+
dataforge/http/problem.py,sha256=yeUONvgwuVJLEBjK50A7gmPTgCYQ1xxLLui_G1CWLqE,2957
|
|
55
|
+
dataforge/integrations/__init__.py,sha256=uJqZwanJbQ76k56kDX0HmQEmaJATb4xaQH_zLj7a5V0,53
|
|
56
|
+
dataforge/integrations/dbt.py,sha256=tHbwbhZC667TCOEk_zflHY6fqjyV1YD36GkYq3lkvvY,6160
|
|
57
|
+
dataforge/release/__init__.py,sha256=gQQykxk1od4yvxeeDg0PURcdFKxjYWzcLWUrIu7oaZo,50
|
|
58
|
+
dataforge/release/doctor.py,sha256=YtZKOGmbwFdH1F9v4yZIWL9JY_uQSH0ijfG4C5Gdfhk,12955
|
|
59
|
+
dataforge/release/full_vision.py,sha256=3uo8a7j_oyNQKl81IQBsTL6-7QmieUgJUUuUqgY9Ss8,29889
|
|
60
|
+
dataforge/release/gate.py,sha256=8IZUX6AUpE_diMJusBxLy9msYOnckq-BZveQ33Ko28c,30913
|
|
61
|
+
dataforge/release/playground_check.py,sha256=dLI69_sAxQ9dy6Af-EvjDi8lSRHGSIL-TFTKC-UUSeU,14074
|
|
62
|
+
dataforge/repairers/__init__.py,sha256=NOqYvPIG62kTMDlrSiN7Uosdd3UL-ps2gjxJvdpToWY,2493
|
|
63
|
+
dataforge/repairers/base.py,sha256=amnsP2_7Ul2J0TfqBQeo6bEX6S4WjsahyQ1I66pGLLA,2129
|
|
64
|
+
dataforge/repairers/decimal_shift.py,sha256=X7iO9vHDaKz_cmm7_XM0kJR4iY9OY3nTyjMrznizH5o,1434
|
|
65
|
+
dataforge/repairers/fd_violation.py,sha256=qFYpiduYdCHiiBE8aH0L48xCcKu7ZqaDhAIpELtLYd4,7738
|
|
66
|
+
dataforge/repairers/type_mismatch.py,sha256=5zjj9m7aR2yxTDTD3gMVm59QGDG_QvOCcFH4kl2gGr8,2562
|
|
67
|
+
dataforge/safety/__init__.py,sha256=TRXzoiKUZoSixfNI5cTG8_EbUwsm5mVTgiXum31Y0Nw,213
|
|
68
|
+
dataforge/safety/constitution.py,sha256=jitEHy7bgMStGAsg6ZrhfBj_f5vES5j7_wwJ-BU-_20,10381
|
|
69
|
+
dataforge/safety/filter.py,sha256=tzGEbI2TsL0X_Z07cu035-qHlxZsIe1vWvWATvcSVUA,4584
|
|
70
|
+
dataforge/safety/adversarial/attack_01_phone_pii.yaml,sha256=AWy1HtzN-IoZo9cYMkfmZaKJlxhLqy4TzvkpgbvSqPE,318
|
|
71
|
+
dataforge/safety/adversarial/attack_02_phone_pii.yaml,sha256=VMAXRsbSM9fC1FsDo_L3L8YYFbnWbWxC9rBt5ugeJ6g,301
|
|
72
|
+
dataforge/safety/adversarial/attack_03_phone_pii.yaml,sha256=FP_VV-v9Ve0k_LPC12lei0PRP59S1wpwJ8fxXzLTgoM,317
|
|
73
|
+
dataforge/safety/adversarial/attack_04_phone_pii.yaml,sha256=rieGqnsNERxJ7kmXsOLtsEWgM1bfsllfVEkbgbU5pQg,317
|
|
74
|
+
dataforge/safety/adversarial/attack_05_phone_pii.yaml,sha256=210iwPHtLrPmlPciywNW_J88WOQXo3U3oKBrWLL9XBU,309
|
|
75
|
+
dataforge/safety/adversarial/attack_06_phone_pii.yaml,sha256=Hib4f558Rd___ODXXR9awP_pDqCER8GvwdNDeGtu1L4,292
|
|
76
|
+
dataforge/safety/adversarial/attack_07_phone_pii.yaml,sha256=d5NIpTKA9f1_v0U_-spkr_aobxUoLrGswXKuIvh45aw,322
|
|
77
|
+
dataforge/safety/adversarial/attack_08_phone_pii.yaml,sha256=ICuQvuEBPujUssXrjiKTmAu3LiENFOpQidEW-os9loQ,301
|
|
78
|
+
dataforge/safety/adversarial/attack_09_phone_pii.yaml,sha256=xomA7KGp-SCAE4ib10WGvUQlPbUeDBLOoUgYtf7Egk0,317
|
|
79
|
+
dataforge/safety/adversarial/attack_10_phone_pii.yaml,sha256=e6g6brv9Ma2Y-YCNaM65tTe1wjdc2_BUY-ocOToGETg,307
|
|
80
|
+
dataforge/safety/adversarial/attack_11_ssn_pii.yaml,sha256=N9X4HVfgrSophq2o9atQD6VFSbef4No1U8t8W3dwvv0,274
|
|
81
|
+
dataforge/safety/adversarial/attack_12_ssn_pii.yaml,sha256=qv6xU81Zi3KsnOyEC1Ch6B56mxiyvbmmWrxSqEv3060,264
|
|
82
|
+
dataforge/safety/adversarial/attack_13_ssn_pii.yaml,sha256=o26faIfKYp6NY3wNm4IuQPicvMBcsEck_Mny_JTEbz8,279
|
|
83
|
+
dataforge/safety/adversarial/attack_14_ssn_pii.yaml,sha256=gyczJ2qdf3qlfJiZPuCDATbE3oV24p1yGxvv1I_GR34,268
|
|
84
|
+
dataforge/safety/adversarial/attack_15_ssn_pii.yaml,sha256=ARKePkM9V5EEA5PyP4tKFAkfefPiEROOcCTLAU_wmmk,276
|
|
85
|
+
dataforge/safety/adversarial/attack_16_ssn_pii.yaml,sha256=MxwYBOzlkeNvUZhpuTLtQ6rdKKY5pt5k_MZiz5c7HEE,274
|
|
86
|
+
dataforge/safety/adversarial/attack_17_ssn_pii.yaml,sha256=Cl84OQVxATk5C8fv00DMdIAbDqwzlp3SHgIYi6wk_eY,270
|
|
87
|
+
dataforge/safety/adversarial/attack_18_ssn_pii.yaml,sha256=61aG3jUjCG6PlM0SWJO-cvPrqfc9OWAEzrq6oryB8lI,276
|
|
88
|
+
dataforge/safety/adversarial/attack_19_ssn_pii.yaml,sha256=1V-OFHIlFjc-xHDtTcr3ddpJA4lhObQVInFt3Im_r2E,271
|
|
89
|
+
dataforge/safety/adversarial/attack_20_ssn_pii.yaml,sha256=9UfjzICuFAs33S-qnlYO-JVdjTYG5Qoxt2HWEs0bJXc,275
|
|
90
|
+
dataforge/safety/adversarial/attack_21_email_pii.yaml,sha256=PBE8fnBHvafm4yCvkKyOBKp34lIfk2iQ_nBsVwXiMVo,284
|
|
91
|
+
dataforge/safety/adversarial/attack_22_email_pii.yaml,sha256=BUNIx78Q2lOGqMrXblFbXozvaLo2ftva1nd-zeCSsEU,270
|
|
92
|
+
dataforge/safety/adversarial/attack_23_email_pii.yaml,sha256=z3IICcf312PYx6sbaF0L7U2iOtPlwIkvRPGoe2-eh3Q,292
|
|
93
|
+
dataforge/safety/adversarial/attack_24_email_pii.yaml,sha256=MO5VKzNPrwpRM8gZsZ8QLl7YmCYeExMnuPYi22P3Rjg,284
|
|
94
|
+
dataforge/safety/adversarial/attack_25_email_pii.yaml,sha256=WQXdsrLDvtRLy_sl-cDUVRuNaVJJiMkNUZjmZykvxtY,279
|
|
95
|
+
dataforge/safety/adversarial/attack_26_email_pii.yaml,sha256=4ZDsISY3MHLxMOjzQ30e_mMVHeomaZNeWoz8ot3TJ-U,299
|
|
96
|
+
dataforge/safety/adversarial/attack_27_email_pii.yaml,sha256=ZKRrZSnzT4xqHUyVDb9PDJ0QS0EOHqXhP0-H-lre8bc,277
|
|
97
|
+
dataforge/safety/adversarial/attack_28_email_pii.yaml,sha256=Upxp7Nm-hUG01tBk6jeO_25IoAvgt34JVZr6RAVyzRQ,311
|
|
98
|
+
dataforge/safety/adversarial/attack_29_email_pii.yaml,sha256=-0p_oKbvrXpWI0-QpNSuyaESLmt7syzR9K_TA9qw9TY,272
|
|
99
|
+
dataforge/safety/adversarial/attack_30_email_pii.yaml,sha256=tTk6jkDvwYQdY6edto9p3Bwx8FlsCqBEVgsbgHAAgLk,288
|
|
100
|
+
dataforge/safety/adversarial/attack_31_row_delete.yaml,sha256=y1ErFI6T2uF-7NihCjWYK1vWZT4Qw7M5TzFlfbFjKAA,294
|
|
101
|
+
dataforge/safety/adversarial/attack_32_row_delete.yaml,sha256=3Zdi2SUWY8rl51-iu52DNSEiFF6EM7DmmBeOSUSxfno,324
|
|
102
|
+
dataforge/safety/adversarial/attack_33_row_delete.yaml,sha256=SWCA9Hdftx_t-u4MnkBc3zJ5QMXFTti8OU8MVHft1EA,309
|
|
103
|
+
dataforge/safety/adversarial/attack_34_row_delete.yaml,sha256=ifA95VOSpZggFpaJ4elNBQnapJijAZKWvMq4v8mPyWU,295
|
|
104
|
+
dataforge/safety/adversarial/attack_35_row_delete.yaml,sha256=TfMhH5ntn2iaD-u3vibQo36c7QqGw5LA3oUsjxaCsqU,284
|
|
105
|
+
dataforge/safety/adversarial/attack_36_row_delete.yaml,sha256=EVPazCaA2QGalBNVR7WMetAytm0BT7XJopYlYmuYzvk,408
|
|
106
|
+
dataforge/safety/adversarial/attack_37_row_delete.yaml,sha256=5GKyZj6k8VF5cWmWg4zBHcCsIxALNrLysr28s1ir5e0,302
|
|
107
|
+
dataforge/safety/adversarial/attack_38_row_delete.yaml,sha256=MsQFOjKlqhJOJuyeZAJ6aLSNRu0MBT89qDJN3LA1_L4,314
|
|
108
|
+
dataforge/safety/adversarial/attack_39_row_delete.yaml,sha256=-EMt-H3UuyqhmK_qsoyPvdFworTVseavDOKfD2RRNBc,320
|
|
109
|
+
dataforge/safety/adversarial/attack_40_row_delete.yaml,sha256=mS1NjQwzp7bOUGH-FU9Qb2CcNJlrt4l5h7nm7ujB6KY,291
|
|
110
|
+
dataforge/safety/adversarial/attack_41_row_delete.yaml,sha256=QJvVGhngpbZSzTG03i7G1PBtYm1LTLUp00tjXn8ah4E,289
|
|
111
|
+
dataforge/safety/adversarial/attack_42_row_delete.yaml,sha256=VspfGlc37r_X2_AJRBMRFmsmSfaBJwXPtgnYtIT8TC4,293
|
|
112
|
+
dataforge/safety/adversarial/attack_43_row_delete.yaml,sha256=Sp2aTI3f13WwGWd4bjBTy9bYDnw1JPkmIVRnl_7sZWE,288
|
|
113
|
+
dataforge/safety/adversarial/attack_44_row_delete.yaml,sha256=JjsJ8lrv_B0Jcy_MsaOEyXc1Gk2iiHYgETUlh7MnLcE,291
|
|
114
|
+
dataforge/safety/adversarial/attack_45_row_delete.yaml,sha256=7-WDfP7iriyGjFAWFrIlChmBOUQ9ENJ27QaEFwgXl_g,331
|
|
115
|
+
dataforge/safety/adversarial/attack_46_row_delete.yaml,sha256=NYkrTbFZH7i992ihyTE5_-3gMo2nvO8xIqoFegWHopg,326
|
|
116
|
+
dataforge/safety/adversarial/attack_47_row_delete.yaml,sha256=-27qVsm9msuEpXr36z9leXxRNf0fuhc2Mk-vRNUygrk,295
|
|
117
|
+
dataforge/safety/adversarial/attack_48_row_delete.yaml,sha256=tc292hNaNEnECbDCckWilUTA2ZemYCJCSdBCm8lE4-M,307
|
|
118
|
+
dataforge/safety/adversarial/attack_49_row_delete.yaml,sha256=YdMg1I-p_780G68sSOrfbEkc_Oi-TFK52A06D2Mbyw0,325
|
|
119
|
+
dataforge/safety/adversarial/attack_50_row_delete.yaml,sha256=zJVqaq3dwh7Dwp5Ous_BMxsyTS6UcWvEtKA431G7uFc,299
|
|
120
|
+
dataforge/safety/constitutions/default.yaml,sha256=hUGQftPLDsleEm20Y4yh9jMFGTPWE00emYoa6t4GRRQ,1705
|
|
121
|
+
dataforge/stores/__init__.py,sha256=nqa49dOPIb_rkURh7JapgOYBBY3cu9waZkG6mq7cXqY,1094
|
|
122
|
+
dataforge/stores/base.py,sha256=w2RLdvSVZWFBTqKRG21KU8EwyHyuXdsyXCFS0D5i6gI,2198
|
|
123
|
+
dataforge/stores/cloud.py,sha256=WJ3QKOX44zrOpKmYNkA4_4jKm0f4VYlRd0FGu4W45D8,2547
|
|
124
|
+
dataforge/stores/csv.py,sha256=jqonyrYXZjRgmHTbQTVfVrEwXezqbX8mrRHVQDTWaIE,3406
|
|
125
|
+
dataforge/stores/duckdb.py,sha256=1WT2RzBKvfQgXlQBCTDSryMoasUytwxV57Dfytd_gSQ,13023
|
|
126
|
+
dataforge/stores/patch_plan.py,sha256=y21c5Gb-KyTwSxMS3utGBZKIzeb-_cCfx16p6xJPkpw,6699
|
|
127
|
+
dataforge/stores/registry.py,sha256=UTFd9Qw0OMCTNjp2EUooNfMl9YrgJCfzA98sGnR-n4k,3120
|
|
128
|
+
dataforge/stores/repair.py,sha256=L1ZsD0f3CUp4wkmbhHY0ZXeNkR_UTd0oFreqGdvM_xM,3862
|
|
129
|
+
dataforge/stores/revert.py,sha256=lEYPRFQblHVc_O9IrK4OSG4qdKGUBFLMeBVE697gLUM,860
|
|
130
|
+
dataforge/stores/sql.py,sha256=MtAQDRkD_6qZMszVI6SEmSWvyvH8UZ9nv5e2aKs2UQc,972
|
|
131
|
+
dataforge/transactions/__init__.py,sha256=9YIte8qns8bcnVa2JCwU4IkVJ3A_7_VTX6h-zbF6HEQ,925
|
|
132
|
+
dataforge/transactions/files.py,sha256=AS4W5i6kFSxPYfdxe6UBhUKi6A_UDXOlOy-Xm6aEBbA,3042
|
|
133
|
+
dataforge/transactions/log.py,sha256=ZPc6xRlju6V9tMYX9ZNVAi-zrinCe49b_NsR3FkS2TI,21918
|
|
134
|
+
dataforge/transactions/revert.py,sha256=_3QS5qBbaPi9TJwtIb0dIKQ-Z2A3Q5GONZ0Xfhuwuzg,3859
|
|
135
|
+
dataforge/transactions/txn.py,sha256=HZ0eAHMZze56xULuWr__LMJNtekNpAvuPZIS6yoMEDc,3860
|
|
136
|
+
dataforge/ui/__init__.py,sha256=5ENf2aaywlzAjxlPNlzsE3aNAHv7tpIFAFglkKp3pZ4,53
|
|
137
|
+
dataforge/ui/profile_view.py,sha256=E99bqHtyzLN_eNWIXVCz8g0fTNgOTCd-7BJwJF0nAIc,4199
|
|
138
|
+
dataforge/ui/repair_diff.py,sha256=RS_ZYn8i9tvEfyaGozRNWn4AytBPDjMR0c9ShrWWIgA,2638
|
|
139
|
+
dataforge/verifier/__init__.py,sha256=Dr_lcElz5uMhB3X88W81i0XH_GRlUllFaCUaiypG9Yg,1554
|
|
140
|
+
dataforge/verifier/constraint_ir.py,sha256=FoMIQUKwiA4QGdYlWHoiIvBsr5WvNOTqS0MUrwcmpDM,5417
|
|
141
|
+
dataforge/verifier/explain.py,sha256=U9FR7nmnXR4Cwz2zfaIIVIQHFSAL8Ax00vxKyXMKBKY,2165
|
|
142
|
+
dataforge/verifier/gate.py,sha256=m9T_k0pjFy1Q2W35xHs3gArkE2pfFcl25bVR4munbsI,214
|
|
143
|
+
dataforge/verifier/schema.py,sha256=0Lauq0lgyUer7pl1x8xcfYwwaSKHaTiOrkACS7MJjNU,4253
|
|
144
|
+
dataforge/verifier/smt.py,sha256=4Aus_my4NDCE6ZURSSatW_CrEFdgKLYAc43CdwKQ8Ug,16302
|
|
145
|
+
dataforge_07-0.1.0.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
|
146
|
+
dataforge_07-0.1.0.dist-info/METADATA,sha256=3LDhPiP3spXGVLWV1Nu-bF2Yvl6WroBq8502nb4ODUM,19542
|
|
147
|
+
dataforge_07-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
148
|
+
dataforge_07-0.1.0.dist-info/entry_points.txt,sha256=0gxQ4PXbK8S2mhiSL0LqZdaEqM8LCc3dh0pd0aheQ2Q,80
|
|
149
|
+
dataforge_07-0.1.0.dist-info/top_level.txt,sha256=xv3CY-CdHCuuvu_sV6g-QJzpRJM7YFJwvQunsUpM0As,10
|
|
150
|
+
dataforge_07-0.1.0.dist-info/RECORD,,
|