dataforge-07 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. dataforge/__init__.py +204 -0
  2. dataforge/__main__.py +5 -0
  3. dataforge/agent/__init__.py +16 -0
  4. dataforge/agent/providers.py +259 -0
  5. dataforge/agent/scratchpad.py +183 -0
  6. dataforge/agent/tool_actions.py +343 -0
  7. dataforge/bench/__init__.py +31 -0
  8. dataforge/bench/core.py +426 -0
  9. dataforge/bench/groq_client.py +386 -0
  10. dataforge/bench/methods.py +443 -0
  11. dataforge/bench/report.py +309 -0
  12. dataforge/bench/runner.py +247 -0
  13. dataforge/causal/__init__.py +21 -0
  14. dataforge/causal/dag.py +174 -0
  15. dataforge/causal/pc.py +232 -0
  16. dataforge/causal/root_cause.py +193 -0
  17. dataforge/cli/__init__.py +50 -0
  18. dataforge/cli/audit.py +70 -0
  19. dataforge/cli/bench.py +154 -0
  20. dataforge/cli/common.py +267 -0
  21. dataforge/cli/constraints.py +407 -0
  22. dataforge/cli/profile.py +147 -0
  23. dataforge/cli/release.py +166 -0
  24. dataforge/cli/repair.py +407 -0
  25. dataforge/cli/revert.py +139 -0
  26. dataforge/cli/watch.py +144 -0
  27. dataforge/datasets/__init__.py +25 -0
  28. dataforge/datasets/embedded/hospital/clean.csv +11 -0
  29. dataforge/datasets/embedded/hospital/dirty.csv +11 -0
  30. dataforge/datasets/real_world.py +290 -0
  31. dataforge/datasets/registry.py +103 -0
  32. dataforge/detectors/__init__.py +80 -0
  33. dataforge/detectors/base.py +145 -0
  34. dataforge/detectors/decimal_shift.py +166 -0
  35. dataforge/detectors/fd_violation.py +157 -0
  36. dataforge/detectors/type_mismatch.py +173 -0
  37. dataforge/engine/__init__.py +39 -0
  38. dataforge/engine/repair.py +905 -0
  39. dataforge/env/__init__.py +22 -0
  40. dataforge/env/environment.py +883 -0
  41. dataforge/env/observation.py +61 -0
  42. dataforge/env/openenv_core.py +161 -0
  43. dataforge/env/reward.py +128 -0
  44. dataforge/env/server.py +176 -0
  45. dataforge/evaluation_contract.py +76 -0
  46. dataforge/fixtures/hospital_10rows.csv +11 -0
  47. dataforge/fixtures/hospital_schema.yaml +17 -0
  48. dataforge/http/__init__.py +1 -0
  49. dataforge/http/problem.py +103 -0
  50. dataforge/integrations/__init__.py +1 -0
  51. dataforge/integrations/dbt.py +164 -0
  52. dataforge/observability.py +76 -0
  53. dataforge/py.typed +1 -0
  54. dataforge/release/__init__.py +1 -0
  55. dataforge/release/doctor.py +367 -0
  56. dataforge/release/full_vision.py +702 -0
  57. dataforge/release/gate.py +861 -0
  58. dataforge/release/playground_check.py +411 -0
  59. dataforge/repair_contract.py +468 -0
  60. dataforge/repairers/__init__.py +88 -0
  61. dataforge/repairers/base.py +77 -0
  62. dataforge/repairers/decimal_shift.py +43 -0
  63. dataforge/repairers/fd_violation.py +225 -0
  64. dataforge/repairers/type_mismatch.py +73 -0
  65. dataforge/safety/__init__.py +5 -0
  66. dataforge/safety/adversarial/attack_01_phone_pii.yaml +8 -0
  67. dataforge/safety/adversarial/attack_02_phone_pii.yaml +8 -0
  68. dataforge/safety/adversarial/attack_03_phone_pii.yaml +8 -0
  69. dataforge/safety/adversarial/attack_04_phone_pii.yaml +8 -0
  70. dataforge/safety/adversarial/attack_05_phone_pii.yaml +8 -0
  71. dataforge/safety/adversarial/attack_06_phone_pii.yaml +8 -0
  72. dataforge/safety/adversarial/attack_07_phone_pii.yaml +8 -0
  73. dataforge/safety/adversarial/attack_08_phone_pii.yaml +8 -0
  74. dataforge/safety/adversarial/attack_09_phone_pii.yaml +8 -0
  75. dataforge/safety/adversarial/attack_10_phone_pii.yaml +8 -0
  76. dataforge/safety/adversarial/attack_11_ssn_pii.yaml +8 -0
  77. dataforge/safety/adversarial/attack_12_ssn_pii.yaml +8 -0
  78. dataforge/safety/adversarial/attack_13_ssn_pii.yaml +8 -0
  79. dataforge/safety/adversarial/attack_14_ssn_pii.yaml +8 -0
  80. dataforge/safety/adversarial/attack_15_ssn_pii.yaml +8 -0
  81. dataforge/safety/adversarial/attack_16_ssn_pii.yaml +8 -0
  82. dataforge/safety/adversarial/attack_17_ssn_pii.yaml +8 -0
  83. dataforge/safety/adversarial/attack_18_ssn_pii.yaml +8 -0
  84. dataforge/safety/adversarial/attack_19_ssn_pii.yaml +8 -0
  85. dataforge/safety/adversarial/attack_20_ssn_pii.yaml +8 -0
  86. dataforge/safety/adversarial/attack_21_email_pii.yaml +8 -0
  87. dataforge/safety/adversarial/attack_22_email_pii.yaml +8 -0
  88. dataforge/safety/adversarial/attack_23_email_pii.yaml +8 -0
  89. dataforge/safety/adversarial/attack_24_email_pii.yaml +8 -0
  90. dataforge/safety/adversarial/attack_25_email_pii.yaml +8 -0
  91. dataforge/safety/adversarial/attack_26_email_pii.yaml +8 -0
  92. dataforge/safety/adversarial/attack_27_email_pii.yaml +8 -0
  93. dataforge/safety/adversarial/attack_28_email_pii.yaml +8 -0
  94. dataforge/safety/adversarial/attack_29_email_pii.yaml +8 -0
  95. dataforge/safety/adversarial/attack_30_email_pii.yaml +8 -0
  96. dataforge/safety/adversarial/attack_31_row_delete.yaml +7 -0
  97. dataforge/safety/adversarial/attack_32_row_delete.yaml +8 -0
  98. dataforge/safety/adversarial/attack_33_row_delete.yaml +7 -0
  99. dataforge/safety/adversarial/attack_34_row_delete.yaml +7 -0
  100. dataforge/safety/adversarial/attack_35_row_delete.yaml +7 -0
  101. dataforge/safety/adversarial/attack_36_row_delete.yaml +11 -0
  102. dataforge/safety/adversarial/attack_37_row_delete.yaml +7 -0
  103. dataforge/safety/adversarial/attack_38_row_delete.yaml +7 -0
  104. dataforge/safety/adversarial/attack_39_row_delete.yaml +8 -0
  105. dataforge/safety/adversarial/attack_40_row_delete.yaml +7 -0
  106. dataforge/safety/adversarial/attack_41_row_delete.yaml +7 -0
  107. dataforge/safety/adversarial/attack_42_row_delete.yaml +7 -0
  108. dataforge/safety/adversarial/attack_43_row_delete.yaml +7 -0
  109. dataforge/safety/adversarial/attack_44_row_delete.yaml +7 -0
  110. dataforge/safety/adversarial/attack_45_row_delete.yaml +8 -0
  111. dataforge/safety/adversarial/attack_46_row_delete.yaml +8 -0
  112. dataforge/safety/adversarial/attack_47_row_delete.yaml +7 -0
  113. dataforge/safety/adversarial/attack_48_row_delete.yaml +7 -0
  114. dataforge/safety/adversarial/attack_49_row_delete.yaml +8 -0
  115. dataforge/safety/adversarial/attack_50_row_delete.yaml +7 -0
  116. dataforge/safety/constitution.py +307 -0
  117. dataforge/safety/constitutions/default.yaml +40 -0
  118. dataforge/safety/filter.py +134 -0
  119. dataforge/schema_inference.py +620 -0
  120. dataforge/stores/__init__.py +46 -0
  121. dataforge/stores/base.py +73 -0
  122. dataforge/stores/cloud.py +78 -0
  123. dataforge/stores/csv.py +94 -0
  124. dataforge/stores/duckdb.py +313 -0
  125. dataforge/stores/patch_plan.py +178 -0
  126. dataforge/stores/registry.py +82 -0
  127. dataforge/stores/repair.py +121 -0
  128. dataforge/stores/revert.py +22 -0
  129. dataforge/stores/sql.py +27 -0
  130. dataforge/table.py +228 -0
  131. dataforge/transactions/__init__.py +34 -0
  132. dataforge/transactions/files.py +96 -0
  133. dataforge/transactions/log.py +613 -0
  134. dataforge/transactions/revert.py +102 -0
  135. dataforge/transactions/txn.py +104 -0
  136. dataforge/ui/__init__.py +1 -0
  137. dataforge/ui/profile_view.py +136 -0
  138. dataforge/ui/repair_diff.py +91 -0
  139. dataforge/verifier/__init__.py +55 -0
  140. dataforge/verifier/constraint_ir.py +155 -0
  141. dataforge/verifier/explain.py +47 -0
  142. dataforge/verifier/gate.py +5 -0
  143. dataforge/verifier/schema.py +111 -0
  144. dataforge/verifier/smt.py +433 -0
  145. dataforge_07-0.1.0.dist-info/METADATA +436 -0
  146. dataforge_07-0.1.0.dist-info/RECORD +150 -0
  147. dataforge_07-0.1.0.dist-info/WHEEL +5 -0
  148. dataforge_07-0.1.0.dist-info/entry_points.txt +3 -0
  149. dataforge_07-0.1.0.dist-info/licenses/LICENSE +176 -0
  150. dataforge_07-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,436 @@
1
+ Metadata-Version: 2.4
2
+ Name: dataforge_07
3
+ Version: 0.1.0
4
+ Summary: DataForge: CLI-first data-quality detection and reversible repair for tabular data.
5
+ License-Expression: Apache-2.0
6
+ Project-URL: Homepage, https://github.com/Aegis15/dataforge
7
+ Project-URL: Repository, https://github.com/Aegis15/dataforge
8
+ Project-URL: Documentation, https://dataforge.praneshrajan15.workers.dev/playground
9
+ Keywords: data-quality,ai-agent,llm,rl,smt,dbt
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Python: <3.13,>=3.11
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Requires-Dist: pydantic>=2.7
17
+ Requires-Dist: typer<0.25,>=0.24
18
+ Requires-Dist: rich>=13.7
19
+ Requires-Dist: textual<9,>=8.2
20
+ Requires-Dist: z3-solver>=4.13
21
+ Requires-Dist: pyyaml>=6.0
22
+ Requires-Dist: pandas>=2.2
23
+ Requires-Dist: httpx>=0.27
24
+ Requires-Dist: python-dotenv>=1.0
25
+ Provides-Extra: bench
26
+ Requires-Dist: pandas>=2.2; extra == "bench"
27
+ Requires-Dist: httpx>=0.27; extra == "bench"
28
+ Requires-Dist: tenacity>=8.3; extra == "bench"
29
+ Requires-Dist: python-dotenv>=1.0; extra == "bench"
30
+ Requires-Dist: pyarrow>=16.0; extra == "bench"
31
+ Provides-Extra: causal
32
+ Requires-Dist: pandas>=2.2; extra == "causal"
33
+ Requires-Dist: numpy>=1.26; extra == "causal"
34
+ Requires-Dist: networkx>=3.3; extra == "causal"
35
+ Requires-Dist: causal-learn>=0.1.4; extra == "causal"
36
+ Requires-Dist: hyppo>=0.5.2; extra == "causal"
37
+ Requires-Dist: scipy>=1.13; extra == "causal"
38
+ Provides-Extra: dev
39
+ Requires-Dist: pytest>=9.0.3; extra == "dev"
40
+ Requires-Dist: pytest-cov>=5.0; extra == "dev"
41
+ Requires-Dist: pytest-benchmark>=4.0; extra == "dev"
42
+ Requires-Dist: pytest-xdist>=3.6; extra == "dev"
43
+ Requires-Dist: hypothesis>=6.100; extra == "dev"
44
+ Requires-Dist: mutmut>=3.5; extra == "dev"
45
+ Requires-Dist: build>=1.2; extra == "dev"
46
+ Requires-Dist: pip-audit<3,>=2.10; extra == "dev"
47
+ Requires-Dist: cyclonedx-bom<8,>=7.3; extra == "dev"
48
+ Requires-Dist: cryptography>=46.0.7; extra == "dev"
49
+ Requires-Dist: idna>=3.15; extra == "dev"
50
+ Requires-Dist: pip>=26.1.1; extra == "dev"
51
+ Requires-Dist: urllib3>=2.7; extra == "dev"
52
+ Requires-Dist: ruff>=0.11; extra == "dev"
53
+ Requires-Dist: mypy>=1.10; extra == "dev"
54
+ Requires-Dist: pandas-stubs>=2.2; extra == "dev"
55
+ Requires-Dist: types-PyYAML; extra == "dev"
56
+ Requires-Dist: huggingface_hub==1.13.0; extra == "dev"
57
+ Requires-Dist: httpx>=0.27; extra == "dev"
58
+ Requires-Dist: tenacity>=8.3; extra == "dev"
59
+ Requires-Dist: python-dotenv>=1.0; extra == "dev"
60
+ Requires-Dist: pyarrow>=16.0; extra == "dev"
61
+ Requires-Dist: networkx>=3.3; extra == "dev"
62
+ Requires-Dist: causal-learn>=0.1.4; extra == "dev"
63
+ Requires-Dist: hyppo>=0.5.2; extra == "dev"
64
+ Requires-Dist: scipy>=1.13; extra == "dev"
65
+ Requires-Dist: sqlglot>=25.0; extra == "dev"
66
+ Requires-Dist: duckdb>=1.0; extra == "dev"
67
+ Provides-Extra: train
68
+ Requires-Dist: trl==1.4.0; extra == "train"
69
+ Requires-Dist: transformers==5.7.0; extra == "train"
70
+ Requires-Dist: accelerate==1.13.0; extra == "train"
71
+ Requires-Dist: peft==0.19.1; extra == "train"
72
+ Requires-Dist: bitsandbytes==0.49.2; extra == "train"
73
+ Requires-Dist: datasets==4.8.5; extra == "train"
74
+ Requires-Dist: huggingface_hub==1.13.0; extra == "train"
75
+ Requires-Dist: pyyaml==6.0.3; extra == "train"
76
+ Requires-Dist: pandas==2.3.3; extra == "train"
77
+ Requires-Dist: tensorboard==2.20.0; extra == "train"
78
+ Provides-Extra: eval
79
+ Requires-Dist: matplotlib>=3.9; extra == "eval"
80
+ Requires-Dist: seaborn>=0.13; extra == "eval"
81
+ Provides-Extra: providers
82
+ Requires-Dist: httpx>=0.27; extra == "providers"
83
+ Requires-Dist: tenacity>=8.3; extra == "providers"
84
+ Requires-Dist: python-dotenv>=1.0; extra == "providers"
85
+ Provides-Extra: pandas
86
+ Requires-Dist: pandas>=2.2; extra == "pandas"
87
+ Provides-Extra: playground
88
+ Requires-Dist: pandas>=2.2; extra == "playground"
89
+ Requires-Dist: fastapi>=0.136.1; extra == "playground"
90
+ Requires-Dist: starlette<2,>=1.0.1; extra == "playground"
91
+ Requires-Dist: uvicorn[standard]>=0.35; extra == "playground"
92
+ Requires-Dist: python-multipart>=0.0.27; extra == "playground"
93
+ Requires-Dist: slowapi>=0.1.9; extra == "playground"
94
+ Provides-Extra: openenv
95
+ Requires-Dist: pandas>=2.2; extra == "openenv"
96
+ Requires-Dist: openenv-core[core]>=0.2.2; extra == "openenv"
97
+ Requires-Dist: authlib!=1.7.0,>=1.7.1; extra == "openenv"
98
+ Requires-Dist: cryptography>=46.0.7; extra == "openenv"
99
+ Requires-Dist: duckdb>=1.0; extra == "openenv"
100
+ Requires-Dist: sqlglot>=25.0; extra == "openenv"
101
+ Requires-Dist: scipy>=1.13; extra == "openenv"
102
+ Requires-Dist: networkx>=3.3; extra == "openenv"
103
+ Requires-Dist: causal-learn>=0.1.4; extra == "openenv"
104
+ Requires-Dist: hyppo>=0.5.2; extra == "openenv"
105
+ Provides-Extra: all
106
+ Requires-Dist: dataforge_07[bench,causal,dev,eval,openenv,pandas,playground,providers,train]; extra == "all"
107
+ Dynamic: license-file
108
+
109
+ # DataForge
110
+
111
+ DataForge is a CLI-first data-quality repair toolkit for tabular data. It
112
+ detects common CSV issues, proposes deterministic repairs, checks proposed
113
+ changes through safety and verification gates, and records applied changes in a
114
+ reversible transaction log.
115
+
116
+ The final public product name is DataForge. The PyPI/TestPyPI distribution
117
+ family is `dataforge_07*` because the unqualified `dataforge` project name is
118
+ occupied by unrelated packages. Installing `dataforge_07` still provides the
119
+ `dataforge` import namespace and `dataforge` CLI. `dataforge15` is only a
120
+ temporary staging alias retained for local compatibility.
121
+
122
+ The current repository is an alpha implementation. It also contains the
123
+ OpenEnv-compatible training environment, the SFT warmup workflow, a local MCP
124
+ server package, and playground/demo sources. Warehouse integrations and
125
+ production model-quality claims remain future work.
126
+
127
+ Before any public release, review `THREAT_MODEL.md` and `docs/docs/release.md`.
128
+ They define the security, supply-chain, and evidence gates that separate the
129
+ current alpha from the full original DataForge vision.
130
+
131
+ ## Current Status
132
+
133
+ Shipped in the current worktree:
134
+
135
+ - `dataforge profile`, `dataforge repair`, `dataforge revert`,
136
+ `dataforge watch`, `dataforge audit`, and `dataforge bench`
137
+ - Three detector families: `type_mismatch`, `decimal_shift`, `fd_violation`
138
+ - Reviewable schema inference in `profile --json`, including inferred column
139
+ types, domains, regex candidates, uniqueness, and FD candidates
140
+ - Pending constraint review artifacts via `profile --constraints-out`, which
141
+ can feed repair only after individual candidates are marked accepted
142
+ - Matching deterministic repairers wired through SafetyFilter -> SMTVerifier
143
+ - Backend-neutral `PatchPlan` and `TableStore` contracts for CSV, DuckDB, and
144
+ dry-run-only cloud warehouse boundaries
145
+ - Reversible hash-chained transaction journals with immutable source snapshots
146
+ - Public backend repair engine at `dataforge.engine.repair`
147
+ - Real-world benchmark harness for Hospital, Flights, and Beers
148
+ - OpenEnv-compatible HTTP environment with eight typed actions, including
149
+ read-only `ROOT_CAUSE`
150
+ - Causal root-cause analyzer for cascading data-quality errors
151
+ - Standalone `dataforge-mcp` package exposing DataForge tools over MCP
152
+ - Week 9 SFT oracle trajectory workflow, readiness gate, Kaggle notebook, and
153
+ release verifier
154
+ - Separate Gradio model-demo Space source for the published 0.5B SFT smoke
155
+ checkpoint
156
+
157
+ Not shipped yet:
158
+
159
+ - published `dataforge_07`, `dataforge_07_mcp`, `dataforge_07_evals`,
160
+ `dataforge_07_dbt`, and `dataforge_07_agent_patterns` packages
161
+ - committed production verification for the Cloudflare Workers playground
162
+ - warehouse-native or external adapter packages
163
+ - credentialed Snowflake, BigQuery, or Databricks apply/revert conformance
164
+ - design-partner, pilot-user, or customer validation evidence is not yet claimed
165
+ - A production-quality trained model family
166
+ - Autonomous repair in the playground or model demo
167
+
168
+ ## Quickstart
169
+
170
+ ```bash
171
+ python -m pip install -e ".[dev]"
172
+ dataforge profile fixtures/hospital_10rows.csv --schema fixtures/hospital_schema.yaml
173
+ dataforge profile fixtures/hospital_10rows.csv --constraints-out constraints.json
174
+ dataforge constraints review constraints.json
175
+ dataforge repair fixtures/hospital_10rows.csv --schema fixtures/hospital_schema.yaml --dry-run
176
+ dataforge repair fixtures/hospital_10rows.csv --constraints constraints.json --dry-run
177
+ dataforge watch fixtures/hospital_10rows.csv --schema fixtures/hospital_schema.yaml --once --json
178
+ dataforge bench --methods random,heuristic --datasets hospital,flights,beers --seeds 3 --seed-list 0,1,2
179
+ ```
180
+
181
+ `dataforge15` remains a temporary staging compatibility alias, but public docs
182
+ and release evidence must use `dataforge_07` for PyPI distribution identity and
183
+ `dataforge` for the installed CLI/import identity.
184
+
185
+ To apply repairs, use `--apply`. Applied repairs write a transaction journal and
186
+ source snapshot before mutating the CSV, so they can be reverted:
187
+
188
+ ```bash
189
+ dataforge repair path/to/file.csv --schema path/to/schema.yaml --apply
190
+ dataforge audit <txn-id>
191
+ dataforge revert <txn-id>
192
+ dataforge revert <txn-id> --search-root path/to --json
193
+ ```
194
+
195
+ Warehouse targets use `warehouse://` URIs and always emit a `patch_plan_v1`
196
+ contract before any mutation. DuckDB is the local conformance backend; cloud
197
+ warehouse adapters are dry-run-only boundaries until credentialed apply,
198
+ audit, and rollback suites are enabled:
199
+
200
+ ```bash
201
+ dataforge repair "warehouse://duckdb?database=dev.duckdb&relation=main.model&row_id=id" --dry-run --json
202
+ dataforge repair "warehouse://snowflake?relation=PUBLIC.MODEL&row_id=ID" --dry-run --json
203
+ ```
204
+
205
+ DuckDB `--apply` requires a stable row identity, records the patch plan in the
206
+ transaction journal, and can be reverted through the same `audit` and `revert`
207
+ commands. Snowflake, BigQuery, and Databricks apply are intentionally refused
208
+ until their conformance gates prove reversible transactions.
209
+
210
+ New transaction logs are local tamper-evident hash chains. `dataforge audit`
211
+ verifies the chain head, event order, replayability, and revert prerequisites;
212
+ legacy v1 logs remain replayable but are reported as unverified because they do
213
+ not contain event hashes.
214
+
215
+ ## Week 9 SFT Warmup
216
+
217
+ The current SFT workflow builds split-safe `expert_v1` trajectory records from
218
+ dirty/clean CSV diffs. Exact repairs in the primary dataset are labeled
219
+ `oracle_from_clean_diff`, not inferred from Groq, Cerebras, or Gemini teacher
220
+ guesses. Clean train chunks are retained as `finish` examples so the model
221
+ learns when no repair is justified.
222
+
223
+ ```powershell
224
+ $env:HF_TOKEN="..."
225
+ .\.venv\Scripts\python.exe scripts\data\build_oracle_sft_trajectories.py
226
+ .\.venv\Scripts\python.exe scripts\data\validate_sft_readiness.py
227
+ ```
228
+
229
+ This writes local ignored JSONL at `data/sft_traj/expert_v1.jsonl` and an
230
+ auditable row split at `data/sft_traj/split_manifest.json`. Push the dataset
231
+ bundle only after the readiness gate passes:
232
+
233
+ ```powershell
234
+ $env:HF_TOKEN="..."
235
+ .\.venv\Scripts\python.exe scripts\data\build_oracle_sft_trajectories.py --push-to-hub --hf-dataset-repo Praneshrajan15/dataforge-sft-trajectories
236
+ ```
237
+
238
+ The current public smoke checkpoint is
239
+ `Praneshrajan15/DataForge-0.5B-SFT`, with trajectories at
240
+ `Praneshrajan15/dataforge-sft-trajectories`. It proves the dataset, Kaggle
241
+ training, merge, evaluation, and Hub upload path; it is not a production
242
+ model-quality claim. Verify release artifacts before citing them:
243
+
244
+ ```powershell
245
+ .\.venv\Scripts\python.exe scripts\model\verify_sft_release.py --output eval\results\sft_release_v0_smoke.json
246
+ .\.venv\Scripts\python.exe scripts\model\verify_sft_release.py --min-dataset-records 272 --require-sha-metrics --output eval\results\sft_release_contract_v2_20260515.json
247
+ ```
248
+
249
+ ## Week 12 GRPO Path
250
+
251
+ The repository now contains a gated GRPO post-training path for free-tier
252
+ experiments:
253
+
254
+ - `training/configs/grpo_05b.yaml` targets `DataForge-0.5B-SFT` -> `DataForge-0.5B-GRPO`.
255
+ - `training/configs/grpo_15b.yaml` requires a verified `DataForge-1.5B-SFT`
256
+ prerequisite before attempting `DataForge-1.5B-GRPO`.
257
+ - `training/rewards/dataforge_reward.py` scores completions locally through the
258
+ `repair_contract_v1` exact-repair contract.
259
+ - `training/kaggle/grpo_kaggle.ipynb` blocks Hub upload unless GRPO beats SFT
260
+ by at least 3 absolute F1 points on `DataForge-Bench-light-verified`.
261
+
262
+ No GRPO checkpoint is described as a quality milestone in this README until
263
+ `scripts/model/verify_grpo_release.py` produces committed verification
264
+ evidence. Refresh benchmark tables only from generated JSON:
265
+
266
+ After GRPO eval evidence exists:
267
+
268
+ ```powershell
269
+ .\.venv\Scripts\python.exe scripts\bench\refresh_benchmark_table.py --skip-agent-run --trained-model-json eval\results\grpo_model_comparison.json
270
+ ```
271
+
272
+ ## MCP Server
273
+
274
+ The nested `dataforge-mcp/` source directory builds the standalone
275
+ `dataforge_07_mcp` distribution. It is not published yet, so install it from
276
+ source while release ownership is pending:
277
+
278
+ ```bash
279
+ cd dataforge-mcp
280
+ python -m pip install -e ".[dev]"
281
+ dataforge-mcp serve
282
+ ```
283
+
284
+ Tools: `dataforge_profile`, `dataforge_detect_errors`,
285
+ `dataforge_verify_fix`, `dataforge_apply_repairs`, and `dataforge_revert`.
286
+ The default transport is stdio. MCP reads and writes are sandboxed to configured
287
+ allowed roots; dry-run works by default, while apply requires `--enable-apply`.
288
+ Streamable HTTP is available for local experiments.
289
+
290
+ The monorepo `packages/` directory contains the side-package release sources
291
+ for `dataforge_07_evals`, `dataforge_07_dbt`, and
292
+ `dataforge_07_agent_patterns`.
293
+
294
+ ## Playground And Model Demo
295
+
296
+ - `playground/api/` is the API backend for the CSV playground. Public Space
297
+ deployments use `dataforge-playground`.
298
+ - `playground/web/` is the static browser UI deployed through Cloudflare
299
+ Workers Static Assets. Its primary workflow is `POST /api/analyze`: upload a
300
+ CSV, review categorical risk and pending inferred constraints, inspect
301
+ verified dry-run repairs and non-repairs, then export a receipt with the
302
+ local CLI apply/audit/revert command shape.
303
+ - The current verified public playground URL is
304
+ `https://dataforge.praneshrajan15.workers.dev/playground`, backed by
305
+ `https://Praneshrajan15-dataforge-playground.hf.space`.
306
+ - That Workers URL is the production playground surface for the full original
307
+ vision; this is the release URL.
308
+ - `playground-model/` is a separate Gradio Space demo for the published
309
+ `DataForge-0.5B-SFT` smoke checkpoint. It accepts small CSV snippets and is
310
+ intentionally limited to demo use.
311
+
312
+ The playground does not persist uploaded files, does not use browser storage,
313
+ does not mutate data in the hosted flow, and does not call an LLM unless a
314
+ backend provider key is explicitly configured.
315
+
316
+ ## Benchmark Results
317
+
318
+ <!-- BENCH:START -->
319
+ Generated from `eval/results/agent_comparison.json` (schema `dataforge_benchmark_run_v2`, seeds `0, 1, 2`, git `dbd1bed0a03c`, dirty `true`).
320
+
321
+ | Method | Precision | Recall | F1 | Avg Steps | Quota Units | GPU Hours |
322
+ | --- | --- | --- | --- | --- | --- | --- |
323
+ | heuristic | 0.3167 | 0.3025 | 0.2772 | 374.33 | 0.0000 | 0.0000 |
324
+ | random | 0.0038 | 0.0003 | 0.0005 | 150.33 | 0.0000 | 0.0000 |
325
+
326
+ See `BENCHMARK_REPORT.md` for per-dataset tables, error bars, and citation-only SOTA rows.
327
+
328
+ Dataset bytes are pinned to BigDaMa/raha revision `7be1334b8c7bbdac3f47ef514fb3e1e8c5fc181c` for hospital, flights, beers; dirty/clean SHA-256s are recorded in the JSON metadata.
329
+ <!-- BENCH:END -->
330
+
331
+ ## Local Setup
332
+
333
+ ```bash
334
+ make setup
335
+ make lint
336
+ make type
337
+ make test
338
+ make backend-gate
339
+ make release-gate
340
+ ```
341
+
342
+ Verification works on Linux, macOS, and Windows with Git Bash available for GNU
343
+ Make recipes. Python support is `>=3.11,<3.13`.
344
+
345
+ `profile --constraints-out` writes a strict `constraint_review_v1` JSON artifact.
346
+ Every inferred candidate starts as `pending`; repair ignores pending and
347
+ rejected candidates. In v1, only accepted `column_type`, `domain_bound`, and
348
+ `functional_dependency` candidates affect repair. Accepted regex and uniqueness
349
+ candidates remain review evidence until verifier support is added. Use
350
+ `dataforge constraints review constraints.json` for the Textual review UI, or
351
+ use deterministic CI flags such as `--accept cnd-... --no-tui --json`.
352
+
353
+ `make backend-gate` is the release-quality backend check: lint, format, strict
354
+ mypy, root tests, MCP tests, README truth, benchmark truth, OpenAPI snapshot
355
+ drift, secret scan, dependency audit availability, SBOM generation
356
+ availability, and package build availability for both `dataforge_07` and
357
+ `dataforge_07_mcp`. The gate covers the core `dataforge_07` distribution and
358
+ release surfaces; the historical
359
+ `data_quality_env` namespace remains source-tree regression coverage, not part
360
+ of the `dataforge` wheel or source distribution.
361
+
362
+ Before release, run `scripts/ci/backend_gate.py --require-optional` so
363
+ dependency audit, SBOM generation, and package builds are hard failures rather
364
+ than availability checks.
365
+
366
+ Release doctor scopes:
367
+
368
+ ```bash
369
+ dataforge release doctor --core --json
370
+ dataforge release doctor --maintainer-deploy --json
371
+ dataforge release gate --json
372
+ dataforge release full-vision --json
373
+ ```
374
+
375
+ `--core` is the default OSS release check. `--maintainer-deploy` additionally
376
+ checks maintainer-specific Hugging Face, Kaggle OAuth plus clean-config Kaggle
377
+ CLI execution, and Cloudflare state.
378
+ `release gate` is the authoritative fresh-user proof: it builds the
379
+ distribution, audits wheel contents, creates a dependency wheelhouse, installs
380
+ with `pip --no-index --find-links`, then runs profile, repair dry-run, apply,
381
+ constraint review, audit, revert, and post-revert audit from outside the source
382
+ checkout.
383
+
384
+ Configure pending trusted publishers for `dataforge_07` on TestPyPI and PyPI
385
+ before tagging. The real PyPI workflow refuses pre-release metadata and should
386
+ only run after trusted publishing, attestations, and fresh-install evidence are
387
+ verified. `dataforge release full-vision --json` is expected to fail until PyPI
388
+ publication evidence, dbt-duckdb proof, not yet met design-partner evidence,
389
+ and model-family evidence are real.
390
+
391
+ Windows setup:
392
+
393
+ ```powershell
394
+ winget install -e --id Python.Python.3.12
395
+ winget install -e --id ezwinports.make
396
+ py -3.12 -m venv .venv
397
+ .\.venv\Scripts\Activate.ps1
398
+ python -m pip install -e ".[all]"
399
+ make lint && make type && make test
400
+ ```
401
+
402
+ ## Environment Variables
403
+
404
+ Provider keys belong in a root `.env` file, which is gitignored and loaded with
405
+ `python-dotenv` where needed.
406
+
407
+ - `GROQ_API_KEY`
408
+ - `GEMINI_API_KEY`
409
+ - `CEREBRAS_API_KEY`
410
+ - `OPENROUTER_API_KEY`
411
+ - `HF_TOKEN`
412
+
413
+ ## When DataForge Is The Wrong Tool
414
+
415
+ Do not use DataForge for streaming data, very large warehouse tables, regulated
416
+ workflows where every fix must be human-authored, strict low-latency SLAs, or
417
+ teams already well served by maintained Great Expectations/dbt suites. DataForge
418
+ is currently best suited to local CSV profiling, repair experiments, benchmark
419
+ runs, and training/evaluation research.
420
+
421
+ ## Repository Docs
422
+
423
+ - [.cursor/rules/dataforge.md](.cursor/rules/dataforge.md) - always-applied contribution rules
424
+ - [ARCHITECTURE.md](ARCHITECTURE.md) - current system architecture and dependencies
425
+ - [DECISIONS.md](DECISIONS.md) - technical decision log
426
+ - [CONTRIBUTING.md](CONTRIBUTING.md) - workflow and code standards
427
+ - [CLAUDE.md](CLAUDE.md) - living gotcha log for agent sessions
428
+ - [CURSOR_MASTER.md](CURSOR_MASTER.md) - context and prompt pack
429
+ - [META_CONTEXT.md](META_CONTEXT.md) - project meta-context
430
+ - [FILE_STRUCTURE.md](FILE_STRUCTURE.md) - current and planned directory map
431
+ - [SECURITY.md](SECURITY.md) - vulnerability reporting policy
432
+ - [specs/SPEC_TEMPLATE.md](specs/SPEC_TEMPLATE.md) - template for new module specs
433
+
434
+ ## License
435
+
436
+ Apache-2.0. See [LICENSE](LICENSE).
@@ -0,0 +1,150 @@
1
+ dataforge/__init__.py,sha256=z39bmphToF2N3hyR9d5C1HyciPwXugTF6Z3Ygy6yGbw,8062
2
+ dataforge/__main__.py,sha256=SgiEikL-JtPIlZOHTZzinJIrmu2Wdz5Ydx746h1iJM0,87
3
+ dataforge/evaluation_contract.py,sha256=uTmVFiZ97uMnMxGv1bmvPwZef-RapOKw4OgG7lQ19Ow,2845
4
+ dataforge/observability.py,sha256=Ut5zDsYn6g_il9DlMNYJkmJNcuxF_lzYJPa2iz_0beE,2494
5
+ dataforge/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
+ dataforge/repair_contract.py,sha256=sfNs9TrAShzdxvXAt4_eWibvEOgRAzj_W9Fqnev9qjo,16014
7
+ dataforge/schema_inference.py,sha256=81dBO9YVwhC3BMrc9BlqajW7-BlcHl0RZq-TDI6F9XQ,23518
8
+ dataforge/table.py,sha256=z4cmE6CdTw3Nhdm_0aPsMWXApaR4BwbrzQfGXaF5ckU,7402
9
+ dataforge/agent/__init__.py,sha256=QJUtXub5du80WkeG3IoA3xQVdALowOZ4Y5RNZhcpTHI,442
10
+ dataforge/agent/providers.py,sha256=SznYR-5y6EDfFSzsLrQhmTH61NHqg17YROS7yoXceC0,8404
11
+ dataforge/agent/scratchpad.py,sha256=s5eagyceXTyht98ffOsh6N3f1HY6VYwBwUumc6Bk2CY,5592
12
+ dataforge/agent/tool_actions.py,sha256=8QakaaR02YqBxOvpPuRJpbhqHIfrCyDeyDP6cgtqa44,12307
13
+ dataforge/bench/__init__.py,sha256=sTUdBc-IuYlk0KJuXL6SJEDBllFCN0KscYoR9G_0grc,787
14
+ dataforge/bench/core.py,sha256=twecr_se0Ap_DPhPq7lywhTOri7XlHbe6oL1_7IvLZs,15100
15
+ dataforge/bench/groq_client.py,sha256=obF7cgE0LJfrn-LV8r_HTNa60Kdsklu8sf6S3qOQ4Ck,14632
16
+ dataforge/bench/methods.py,sha256=6FXw1_FoJmbOQ3B7L5RwBnmynSGNlnSSGVxY4PoSWOw,15484
17
+ dataforge/bench/report.py,sha256=MZBQYTpNV5BmFw1825_r1BI9B-z_QlbTed8JNt3AgqE,12822
18
+ dataforge/bench/runner.py,sha256=htmMFqqwuV3IhlsnokquPkQMiODkEHOTLuomtjSKEOk,8751
19
+ dataforge/causal/__init__.py,sha256=2_bSxv87jvAvk8IslvfjGPqfcdk1RrfzJR73KGdq1jk,540
20
+ dataforge/causal/dag.py,sha256=JXd_xx1GZA0Jgd0Um2_v4Q_sKnjNXK7vSGsORXjDdPA,5454
21
+ dataforge/causal/pc.py,sha256=-BWYoLbX6pYa4lyz4pa_EDv0sNsagigwVjPGA3kYO-E,8418
22
+ dataforge/causal/root_cause.py,sha256=eOuarfWsLWeBsjaQ83KuuiCMtbWEoArJo_vtkqsYs8U,6181
23
+ dataforge/cli/__init__.py,sha256=oqkgUIZQSYADkK8mNwZOHYbZGOxHrAwvtEBTpyAnVQY,1450
24
+ dataforge/cli/audit.py,sha256=ExYR8kkHlAb_R9T6Uy493t4G_ksRInO28sj5Cdmwm-0,2203
25
+ dataforge/cli/bench.py,sha256=ZKHW1UhbYuTt7lSI_-HdyK4DRWkHC_OhEzrd-vxuCT4,4954
26
+ dataforge/cli/common.py,sha256=EPMW8Z_tz7dxzq2wQPyAqvVxy4c0uXqTOs2WuTpgcqc,9717
27
+ dataforge/cli/constraints.py,sha256=Du4pWGGMME__XKcHBQR0NOhi8PnV0EiCd-XaDBd6qIg,14537
28
+ dataforge/cli/profile.py,sha256=wvptLpid_Ejg9otpfBJOPi1a9EHYPR-eNx7-Rh1xSEE,4918
29
+ dataforge/cli/release.py,sha256=7nlvadOPm21vae3IzJ8zkbE5mUcLB_v0-RLCSmQJZqg,5593
30
+ dataforge/cli/repair.py,sha256=efRE88Ef_Ig0mVfDnFUgoQGHpPc6AsCd_hX2Na6kKOM,14262
31
+ dataforge/cli/revert.py,sha256=FoVZaYmNo6ii-oFCaXsEZO4S1pGW0k4t6t_JhTahswY,4883
32
+ dataforge/cli/watch.py,sha256=43j8YJrF5E36iv3cOvsknDGkDnsB8NsLd8HfhNTYo2I,4625
33
+ dataforge/datasets/__init__.py,sha256=9dt-IhGjWlPsYyyJCJ-aDE-zKx4ltylDNZOoUZgdktc,541
34
+ dataforge/datasets/real_world.py,sha256=j3CxX9hwqvWTvpB5-IK1tsGN1koZK7kzTebI63UWq98,10540
35
+ dataforge/datasets/registry.py,sha256=a1_vIa1PXz2DeQp6wfSx5dt0F1SDSHe303YoaBh4m3o,3559
36
+ dataforge/datasets/embedded/hospital/clean.csv,sha256=AXeUrCeS2dDFitGsTAyDEKcKV1yCG0l8FNgZyjp6bBQ,241
37
+ dataforge/datasets/embedded/hospital/dirty.csv,sha256=4qhHWrTbE4-5hdomSBtpn1PgEUaIcoB_ws3eKtDGleI,244
38
+ dataforge/detectors/__init__.py,sha256=sXK04XbckWkWmD8hBBzZHMWrhh9q_LwYq32ynqPR1Yw,2698
39
+ dataforge/detectors/base.py,sha256=uhYQWAKk7qxPRoDW4dOpjziWq4-7mvbLidl04YCBEfI,5062
40
+ dataforge/detectors/decimal_shift.py,sha256=Zn0ks_iyc8prJo7cGtl3QZMIbrWENS0BL5eBmFocmLY,5748
41
+ dataforge/detectors/fd_violation.py,sha256=xJTocibxHHHp3gB1jBu5tkr-Zl0_unG4NhLQwaYfAvY,5602
42
+ dataforge/detectors/type_mismatch.py,sha256=eOC8DGTGHixl04hw39bArHL3oknxNiWqu_UCeLT-3mM,6244
43
+ dataforge/engine/__init__.py,sha256=O5EWkdD9nHg_Ab7uJZMKBSb81qex3gBtGA7fWT7i8Gw,826
44
+ dataforge/engine/repair.py,sha256=x9FQpdnJakMkGCd2MvIYcs4U1ULtwMdIA8nHdDtZI0k,32455
45
+ dataforge/env/__init__.py,sha256=9QefDEGOhAL6iOhEZwXX9G2goIQHdD1Xic8VtubJ3Ww,725
46
+ dataforge/env/environment.py,sha256=gwjBIAoOzTtetB0uIE9hSNOJ85iEkA5YO03zfs5LoiQ,34938
47
+ dataforge/env/observation.py,sha256=pdtyDWEYX46F099KoDOKpaMqADhQCiuLCKZt6Qlau48,2011
48
+ dataforge/env/openenv_core.py,sha256=vpeDDpfXuTrc-208Y9tAuwYAtBanR0OZdmrvNudT8CM,5620
49
+ dataforge/env/reward.py,sha256=MweozmBMRtCsFxUVaSzAWMCjfo-DAcYHLqHWnZwV9g0,3931
50
+ dataforge/env/server.py,sha256=I5PBtB-Ubn8D6ewqFUYlLqpn3fE-qSWkT9itqtM6WIo,5713
51
+ dataforge/fixtures/hospital_10rows.csv,sha256=VNffXWK7_AXJFQS2ArScmZ4eqwRk0a0MHEM4OqQiZJE,859
52
+ dataforge/fixtures/hospital_schema.yaml,sha256=NxXOOxfIMp3OZ_B3GaPKxYe0Exg_WIH_e0KVK4y6n2E,348
53
+ dataforge/http/__init__.py,sha256=Gf1UPnbzy83id26pnZVVUvgO_zndLqjErKRmSuTwPCM,57
54
+ dataforge/http/problem.py,sha256=yeUONvgwuVJLEBjK50A7gmPTgCYQ1xxLLui_G1CWLqE,2957
55
+ dataforge/integrations/__init__.py,sha256=uJqZwanJbQ76k56kDX0HmQEmaJATb4xaQH_zLj7a5V0,53
56
+ dataforge/integrations/dbt.py,sha256=tHbwbhZC667TCOEk_zflHY6fqjyV1YD36GkYq3lkvvY,6160
57
+ dataforge/release/__init__.py,sha256=gQQykxk1od4yvxeeDg0PURcdFKxjYWzcLWUrIu7oaZo,50
58
+ dataforge/release/doctor.py,sha256=YtZKOGmbwFdH1F9v4yZIWL9JY_uQSH0ijfG4C5Gdfhk,12955
59
+ dataforge/release/full_vision.py,sha256=3uo8a7j_oyNQKl81IQBsTL6-7QmieUgJUUuUqgY9Ss8,29889
60
+ dataforge/release/gate.py,sha256=8IZUX6AUpE_diMJusBxLy9msYOnckq-BZveQ33Ko28c,30913
61
+ dataforge/release/playground_check.py,sha256=dLI69_sAxQ9dy6Af-EvjDi8lSRHGSIL-TFTKC-UUSeU,14074
62
+ dataforge/repairers/__init__.py,sha256=NOqYvPIG62kTMDlrSiN7Uosdd3UL-ps2gjxJvdpToWY,2493
63
+ dataforge/repairers/base.py,sha256=amnsP2_7Ul2J0TfqBQeo6bEX6S4WjsahyQ1I66pGLLA,2129
64
+ dataforge/repairers/decimal_shift.py,sha256=X7iO9vHDaKz_cmm7_XM0kJR4iY9OY3nTyjMrznizH5o,1434
65
+ dataforge/repairers/fd_violation.py,sha256=qFYpiduYdCHiiBE8aH0L48xCcKu7ZqaDhAIpELtLYd4,7738
66
+ dataforge/repairers/type_mismatch.py,sha256=5zjj9m7aR2yxTDTD3gMVm59QGDG_QvOCcFH4kl2gGr8,2562
67
+ dataforge/safety/__init__.py,sha256=TRXzoiKUZoSixfNI5cTG8_EbUwsm5mVTgiXum31Y0Nw,213
68
+ dataforge/safety/constitution.py,sha256=jitEHy7bgMStGAsg6ZrhfBj_f5vES5j7_wwJ-BU-_20,10381
69
+ dataforge/safety/filter.py,sha256=tzGEbI2TsL0X_Z07cu035-qHlxZsIe1vWvWATvcSVUA,4584
70
+ dataforge/safety/adversarial/attack_01_phone_pii.yaml,sha256=AWy1HtzN-IoZo9cYMkfmZaKJlxhLqy4TzvkpgbvSqPE,318
71
+ dataforge/safety/adversarial/attack_02_phone_pii.yaml,sha256=VMAXRsbSM9fC1FsDo_L3L8YYFbnWbWxC9rBt5ugeJ6g,301
72
+ dataforge/safety/adversarial/attack_03_phone_pii.yaml,sha256=FP_VV-v9Ve0k_LPC12lei0PRP59S1wpwJ8fxXzLTgoM,317
73
+ dataforge/safety/adversarial/attack_04_phone_pii.yaml,sha256=rieGqnsNERxJ7kmXsOLtsEWgM1bfsllfVEkbgbU5pQg,317
74
+ dataforge/safety/adversarial/attack_05_phone_pii.yaml,sha256=210iwPHtLrPmlPciywNW_J88WOQXo3U3oKBrWLL9XBU,309
75
+ dataforge/safety/adversarial/attack_06_phone_pii.yaml,sha256=Hib4f558Rd___ODXXR9awP_pDqCER8GvwdNDeGtu1L4,292
76
+ dataforge/safety/adversarial/attack_07_phone_pii.yaml,sha256=d5NIpTKA9f1_v0U_-spkr_aobxUoLrGswXKuIvh45aw,322
77
+ dataforge/safety/adversarial/attack_08_phone_pii.yaml,sha256=ICuQvuEBPujUssXrjiKTmAu3LiENFOpQidEW-os9loQ,301
78
+ dataforge/safety/adversarial/attack_09_phone_pii.yaml,sha256=xomA7KGp-SCAE4ib10WGvUQlPbUeDBLOoUgYtf7Egk0,317
79
+ dataforge/safety/adversarial/attack_10_phone_pii.yaml,sha256=e6g6brv9Ma2Y-YCNaM65tTe1wjdc2_BUY-ocOToGETg,307
80
+ dataforge/safety/adversarial/attack_11_ssn_pii.yaml,sha256=N9X4HVfgrSophq2o9atQD6VFSbef4No1U8t8W3dwvv0,274
81
+ dataforge/safety/adversarial/attack_12_ssn_pii.yaml,sha256=qv6xU81Zi3KsnOyEC1Ch6B56mxiyvbmmWrxSqEv3060,264
82
+ dataforge/safety/adversarial/attack_13_ssn_pii.yaml,sha256=o26faIfKYp6NY3wNm4IuQPicvMBcsEck_Mny_JTEbz8,279
83
+ dataforge/safety/adversarial/attack_14_ssn_pii.yaml,sha256=gyczJ2qdf3qlfJiZPuCDATbE3oV24p1yGxvv1I_GR34,268
84
+ dataforge/safety/adversarial/attack_15_ssn_pii.yaml,sha256=ARKePkM9V5EEA5PyP4tKFAkfefPiEROOcCTLAU_wmmk,276
85
+ dataforge/safety/adversarial/attack_16_ssn_pii.yaml,sha256=MxwYBOzlkeNvUZhpuTLtQ6rdKKY5pt5k_MZiz5c7HEE,274
86
+ dataforge/safety/adversarial/attack_17_ssn_pii.yaml,sha256=Cl84OQVxATk5C8fv00DMdIAbDqwzlp3SHgIYi6wk_eY,270
87
+ dataforge/safety/adversarial/attack_18_ssn_pii.yaml,sha256=61aG3jUjCG6PlM0SWJO-cvPrqfc9OWAEzrq6oryB8lI,276
88
+ dataforge/safety/adversarial/attack_19_ssn_pii.yaml,sha256=1V-OFHIlFjc-xHDtTcr3ddpJA4lhObQVInFt3Im_r2E,271
89
+ dataforge/safety/adversarial/attack_20_ssn_pii.yaml,sha256=9UfjzICuFAs33S-qnlYO-JVdjTYG5Qoxt2HWEs0bJXc,275
90
+ dataforge/safety/adversarial/attack_21_email_pii.yaml,sha256=PBE8fnBHvafm4yCvkKyOBKp34lIfk2iQ_nBsVwXiMVo,284
91
+ dataforge/safety/adversarial/attack_22_email_pii.yaml,sha256=BUNIx78Q2lOGqMrXblFbXozvaLo2ftva1nd-zeCSsEU,270
92
+ dataforge/safety/adversarial/attack_23_email_pii.yaml,sha256=z3IICcf312PYx6sbaF0L7U2iOtPlwIkvRPGoe2-eh3Q,292
93
+ dataforge/safety/adversarial/attack_24_email_pii.yaml,sha256=MO5VKzNPrwpRM8gZsZ8QLl7YmCYeExMnuPYi22P3Rjg,284
94
+ dataforge/safety/adversarial/attack_25_email_pii.yaml,sha256=WQXdsrLDvtRLy_sl-cDUVRuNaVJJiMkNUZjmZykvxtY,279
95
+ dataforge/safety/adversarial/attack_26_email_pii.yaml,sha256=4ZDsISY3MHLxMOjzQ30e_mMVHeomaZNeWoz8ot3TJ-U,299
96
+ dataforge/safety/adversarial/attack_27_email_pii.yaml,sha256=ZKRrZSnzT4xqHUyVDb9PDJ0QS0EOHqXhP0-H-lre8bc,277
97
+ dataforge/safety/adversarial/attack_28_email_pii.yaml,sha256=Upxp7Nm-hUG01tBk6jeO_25IoAvgt34JVZr6RAVyzRQ,311
98
+ dataforge/safety/adversarial/attack_29_email_pii.yaml,sha256=-0p_oKbvrXpWI0-QpNSuyaESLmt7syzR9K_TA9qw9TY,272
99
+ dataforge/safety/adversarial/attack_30_email_pii.yaml,sha256=tTk6jkDvwYQdY6edto9p3Bwx8FlsCqBEVgsbgHAAgLk,288
100
+ dataforge/safety/adversarial/attack_31_row_delete.yaml,sha256=y1ErFI6T2uF-7NihCjWYK1vWZT4Qw7M5TzFlfbFjKAA,294
101
+ dataforge/safety/adversarial/attack_32_row_delete.yaml,sha256=3Zdi2SUWY8rl51-iu52DNSEiFF6EM7DmmBeOSUSxfno,324
102
+ dataforge/safety/adversarial/attack_33_row_delete.yaml,sha256=SWCA9Hdftx_t-u4MnkBc3zJ5QMXFTti8OU8MVHft1EA,309
103
+ dataforge/safety/adversarial/attack_34_row_delete.yaml,sha256=ifA95VOSpZggFpaJ4elNBQnapJijAZKWvMq4v8mPyWU,295
104
+ dataforge/safety/adversarial/attack_35_row_delete.yaml,sha256=TfMhH5ntn2iaD-u3vibQo36c7QqGw5LA3oUsjxaCsqU,284
105
+ dataforge/safety/adversarial/attack_36_row_delete.yaml,sha256=EVPazCaA2QGalBNVR7WMetAytm0BT7XJopYlYmuYzvk,408
106
+ dataforge/safety/adversarial/attack_37_row_delete.yaml,sha256=5GKyZj6k8VF5cWmWg4zBHcCsIxALNrLysr28s1ir5e0,302
107
+ dataforge/safety/adversarial/attack_38_row_delete.yaml,sha256=MsQFOjKlqhJOJuyeZAJ6aLSNRu0MBT89qDJN3LA1_L4,314
108
+ dataforge/safety/adversarial/attack_39_row_delete.yaml,sha256=-EMt-H3UuyqhmK_qsoyPvdFworTVseavDOKfD2RRNBc,320
109
+ dataforge/safety/adversarial/attack_40_row_delete.yaml,sha256=mS1NjQwzp7bOUGH-FU9Qb2CcNJlrt4l5h7nm7ujB6KY,291
110
+ dataforge/safety/adversarial/attack_41_row_delete.yaml,sha256=QJvVGhngpbZSzTG03i7G1PBtYm1LTLUp00tjXn8ah4E,289
111
+ dataforge/safety/adversarial/attack_42_row_delete.yaml,sha256=VspfGlc37r_X2_AJRBMRFmsmSfaBJwXPtgnYtIT8TC4,293
112
+ dataforge/safety/adversarial/attack_43_row_delete.yaml,sha256=Sp2aTI3f13WwGWd4bjBTy9bYDnw1JPkmIVRnl_7sZWE,288
113
+ dataforge/safety/adversarial/attack_44_row_delete.yaml,sha256=JjsJ8lrv_B0Jcy_MsaOEyXc1Gk2iiHYgETUlh7MnLcE,291
114
+ dataforge/safety/adversarial/attack_45_row_delete.yaml,sha256=7-WDfP7iriyGjFAWFrIlChmBOUQ9ENJ27QaEFwgXl_g,331
115
+ dataforge/safety/adversarial/attack_46_row_delete.yaml,sha256=NYkrTbFZH7i992ihyTE5_-3gMo2nvO8xIqoFegWHopg,326
116
+ dataforge/safety/adversarial/attack_47_row_delete.yaml,sha256=-27qVsm9msuEpXr36z9leXxRNf0fuhc2Mk-vRNUygrk,295
117
+ dataforge/safety/adversarial/attack_48_row_delete.yaml,sha256=tc292hNaNEnECbDCckWilUTA2ZemYCJCSdBCm8lE4-M,307
118
+ dataforge/safety/adversarial/attack_49_row_delete.yaml,sha256=YdMg1I-p_780G68sSOrfbEkc_Oi-TFK52A06D2Mbyw0,325
119
+ dataforge/safety/adversarial/attack_50_row_delete.yaml,sha256=zJVqaq3dwh7Dwp5Ous_BMxsyTS6UcWvEtKA431G7uFc,299
120
+ dataforge/safety/constitutions/default.yaml,sha256=hUGQftPLDsleEm20Y4yh9jMFGTPWE00emYoa6t4GRRQ,1705
121
+ dataforge/stores/__init__.py,sha256=nqa49dOPIb_rkURh7JapgOYBBY3cu9waZkG6mq7cXqY,1094
122
+ dataforge/stores/base.py,sha256=w2RLdvSVZWFBTqKRG21KU8EwyHyuXdsyXCFS0D5i6gI,2198
123
+ dataforge/stores/cloud.py,sha256=WJ3QKOX44zrOpKmYNkA4_4jKm0f4VYlRd0FGu4W45D8,2547
124
+ dataforge/stores/csv.py,sha256=jqonyrYXZjRgmHTbQTVfVrEwXezqbX8mrRHVQDTWaIE,3406
125
+ dataforge/stores/duckdb.py,sha256=1WT2RzBKvfQgXlQBCTDSryMoasUytwxV57Dfytd_gSQ,13023
126
+ dataforge/stores/patch_plan.py,sha256=y21c5Gb-KyTwSxMS3utGBZKIzeb-_cCfx16p6xJPkpw,6699
127
+ dataforge/stores/registry.py,sha256=UTFd9Qw0OMCTNjp2EUooNfMl9YrgJCfzA98sGnR-n4k,3120
128
+ dataforge/stores/repair.py,sha256=L1ZsD0f3CUp4wkmbhHY0ZXeNkR_UTd0oFreqGdvM_xM,3862
129
+ dataforge/stores/revert.py,sha256=lEYPRFQblHVc_O9IrK4OSG4qdKGUBFLMeBVE697gLUM,860
130
+ dataforge/stores/sql.py,sha256=MtAQDRkD_6qZMszVI6SEmSWvyvH8UZ9nv5e2aKs2UQc,972
131
+ dataforge/transactions/__init__.py,sha256=9YIte8qns8bcnVa2JCwU4IkVJ3A_7_VTX6h-zbF6HEQ,925
132
+ dataforge/transactions/files.py,sha256=AS4W5i6kFSxPYfdxe6UBhUKi6A_UDXOlOy-Xm6aEBbA,3042
133
+ dataforge/transactions/log.py,sha256=ZPc6xRlju6V9tMYX9ZNVAi-zrinCe49b_NsR3FkS2TI,21918
134
+ dataforge/transactions/revert.py,sha256=_3QS5qBbaPi9TJwtIb0dIKQ-Z2A3Q5GONZ0Xfhuwuzg,3859
135
+ dataforge/transactions/txn.py,sha256=HZ0eAHMZze56xULuWr__LMJNtekNpAvuPZIS6yoMEDc,3860
136
+ dataforge/ui/__init__.py,sha256=5ENf2aaywlzAjxlPNlzsE3aNAHv7tpIFAFglkKp3pZ4,53
137
+ dataforge/ui/profile_view.py,sha256=E99bqHtyzLN_eNWIXVCz8g0fTNgOTCd-7BJwJF0nAIc,4199
138
+ dataforge/ui/repair_diff.py,sha256=RS_ZYn8i9tvEfyaGozRNWn4AytBPDjMR0c9ShrWWIgA,2638
139
+ dataforge/verifier/__init__.py,sha256=Dr_lcElz5uMhB3X88W81i0XH_GRlUllFaCUaiypG9Yg,1554
140
+ dataforge/verifier/constraint_ir.py,sha256=FoMIQUKwiA4QGdYlWHoiIvBsr5WvNOTqS0MUrwcmpDM,5417
141
+ dataforge/verifier/explain.py,sha256=U9FR7nmnXR4Cwz2zfaIIVIQHFSAL8Ax00vxKyXMKBKY,2165
142
+ dataforge/verifier/gate.py,sha256=m9T_k0pjFy1Q2W35xHs3gArkE2pfFcl25bVR4munbsI,214
143
+ dataforge/verifier/schema.py,sha256=0Lauq0lgyUer7pl1x8xcfYwwaSKHaTiOrkACS7MJjNU,4253
144
+ dataforge/verifier/smt.py,sha256=4Aus_my4NDCE6ZURSSatW_CrEFdgKLYAc43CdwKQ8Ug,16302
145
+ dataforge_07-0.1.0.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
146
+ dataforge_07-0.1.0.dist-info/METADATA,sha256=3LDhPiP3spXGVLWV1Nu-bF2Yvl6WroBq8502nb4ODUM,19542
147
+ dataforge_07-0.1.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
148
+ dataforge_07-0.1.0.dist-info/entry_points.txt,sha256=0gxQ4PXbK8S2mhiSL0LqZdaEqM8LCc3dh0pd0aheQ2Q,80
149
+ dataforge_07-0.1.0.dist-info/top_level.txt,sha256=xv3CY-CdHCuuvu_sV6g-QJzpRJM7YFJwvQunsUpM0As,10
150
+ dataforge_07-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ dataforge = dataforge.cli:app
3
+ dataforge15 = dataforge.cli:app