datadoom 0.1.0.dev0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/.github/workflows/release.yml +6 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/.tmp_pdir.txt +1 -1
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/CHANGELOG.md +1 -1
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/PKG-INFO +107 -8
- datadoom-0.1.1/README.md +179 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/00_README_Index.md +1 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/22_Release_and_Publishing_Runbook.md +16 -2
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/GenerationsPanel.tsx +1 -1
- datadoom-0.1.1/frontend/tsconfig.tsbuildinfo +1 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/cli/main.py +9 -1
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/version.py +1 -1
- datadoom-0.1.1/src/datadoom/webdist/assets/index-BX7Czb8j.js +445 -0
- datadoom-0.1.1/src/datadoom/webdist/assets/index-CpYdcCJT.css +1 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/index.html +2 -2
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/status.md +9 -8
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/testing_guide.md +48 -2
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_failure_audit.py +245 -245
- datadoom-0.1.0.dev0/.claude/settings.local.json +0 -212
- datadoom-0.1.0.dev0/README.md +0 -80
- datadoom-0.1.0.dev0/frontend/tsconfig.tsbuildinfo +0 -1
- datadoom-0.1.0.dev0/src/datadoom/webdist/assets/index-V8VAuTJG.js +0 -445
- datadoom-0.1.0.dev0/src/datadoom/webdist/assets/index-doRjyG5s.css +0 -1
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/.dockerignore +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/.github/workflows/ci.yml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/.github/workflows/docs.yml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/.github/workflows/repro-matrix.yml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/.gitignore +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/.pre-commit-config.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/CLAUDE.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/CODE_OF_CONDUCT.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/CONTRIBUTING.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Dockerfile +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Advanced_System_Documents.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Cost_Estimation_Model.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/DB_Schema&Indexing.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/DataDoom_Critical_Analysis_and_Recommended_Architecture.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/DataDoom_PRD_v2_OpenSource.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Engineering_Roadmap.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/File_Structure.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/HackForge AI User Flow Guide.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/HackForge AI_ AI-Accelerated Engineering Playbook.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Implementation_Guide.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Infrastructure_Summary.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Internal_Data_Models.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Kafka_Event_Contract.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Language_Specification.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Learning_Guide.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Mathematical_Algorithm_Definitions.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Microservices_API_Contract.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Mulit_Tenant_Isolation.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Open_API_Scheme.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/PRD.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Portobuff_Definitions.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/SLA_SLO_RateLimiting.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Security_Compliance.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/Docs/Techincal_Architecture.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/GOVERNANCE.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/LICENSE +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/SECURITY.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/design/UI_Design_Prompts.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_site/architecture.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_site/authoring.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_site/examples.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_site/index.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_site/llm-reference.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_site/plugins.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_site/spec-reference.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/01_PRD.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/02_User_Flow_Guide.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/03_Technical_Architecture.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/04_DataDoom_Spec_Reference.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/05_Mathematical_Algorithm_Definitions.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/06_Internal_Data_Models.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/07_Database_Schema.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/08_API_Contract.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/09_Plugin_System.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/10_File_Structure.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/11_Language_Technology_Specification.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/12_Resource_Estimation_Model.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/13_Testing_and_Reproducibility_Strategy.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/14_Security_and_Privacy.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/15_Open_Source_Governance.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/16_Engineering_Roadmap.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/17_Implementation_Guide.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/18_AI_Accelerated_Engineering_Playbook.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/19_Learning_Guide.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/20_YAML_Authoring_Guide.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/docs_v2/21_LLM_Spec_Authoring_Reference.md +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/examples/causal-fraud.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/examples/difficulty-credit.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/examples/failure-fraud.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/examples/people-realistic.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/examples/tabular-basic.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/examples/timeseries-sensor.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/index.html +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/package-lock.json +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/package.json +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/postcss.config.js +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/App.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/CausalGraphEditor.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/CausalGraphView.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/CausalInspector.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/ColumnGuideView.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/ComparisonView.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/ConfirmHost.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/DifficultyConfigurator.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/DifficultyView.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/ErrorBoundary.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/ExportModal.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/FailureBadges.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/FailureConfigurator.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/FailureInspector.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/Histogram.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/Inspector.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/Layout.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/Modal.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/OverviewView.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/SpecDrawer.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/StageStepper.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/TableCanvas.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/Toaster.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/components/ui.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/api.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/audit.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/causal.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/clsx.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/difficulty.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/failures.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/runSocket.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/sampling.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/schemaForm.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/specDefaults.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/summary.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/types.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/useHistory.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/lib/viewLayout.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/main.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/pages/Canvas.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/pages/Dashboard.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/pages/Placeholder.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/pages/Plugins.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/pages/Results.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/pages/Templates.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/pages/Tracker.tsx +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/store/chrome.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/store/confirm.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/store/toast.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/store/ui.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/src/styles.css +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/tailwind.config.js +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/tsconfig.json +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/tsconfig.node.json +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/tsconfig.node.tsbuildinfo +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/vite.config.d.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/vite.config.js +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/frontend/vite.config.ts +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/mkdocs.yml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/pyproject.toml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/adapters/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/adapters/frameworks.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/adapters/loaders.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/app.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/deps.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/errors.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/estimate.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/routes/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/routes/artifacts.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/routes/datasets.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/routes/meta.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/routes/plugins.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/routes/runs.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/routes/specs.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/routes/templates.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/schemas.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/serializers.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/state.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/store_helpers.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/api/ws.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/cli/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/config.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/advice.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/audit.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/causal/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/causal/execute.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/causal/functions.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/causal/graph.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/difficulty/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/difficulty/calibrate.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/difficulty/knobs.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/difficulty/probes.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/dist/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/dist/base.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/dist/builtins.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/dist/compliance.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/dist/providers.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/errors.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/export/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/export/base.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/export/checksums.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/export/csv_exporter.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/export/json_exporter.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/export/metadata.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/export/parquet_exporter.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/failure/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/failure/apply.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/failure/base.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/failure/modes.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/pipeline.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/profile.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/progress.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/reference.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/reports.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/rng.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/spec/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/spec/hashing.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/spec/models.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/spec/validate.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/engine/timeseries.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/jobs/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/jobs/progress.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/jobs/worker.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/plugin.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/plugins/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/plugins/contracts.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/plugins/loader.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/plugins/registry.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/plugins/scaffold.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/artifacts.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/db.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/migrations/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/migrations/env.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/migrations/script.py.mako +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/migrations/versions/0001_init.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/migrations/versions/0002_report_mutual_information.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/migrations/versions/0003_run_name.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/migrations/versions/0004_report_profile.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/models.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/store/repositories.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/__init__.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/ab_test.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/clinical_deterioration.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/credit_default_challenge.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/customer_churn.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/ecommerce_orders.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/fraud_detection.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/hospital_readmission.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/insurance_claims.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/iot_sensors.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/people_directory.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/predictive_maintenance.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/templates/telecom_churn_challenge.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/inter-cyrillic-ext-wght-normal-BOeWTOD4.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/inter-cyrillic-wght-normal-DqGufNeO.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/inter-greek-ext-wght-normal-DlzME5K_.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/inter-greek-wght-normal-CkhJZR-_.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/inter-latin-ext-wght-normal-DO1Apj_S.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/inter-latin-wght-normal-Dx4kXJAl.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/inter-vietnamese-wght-normal-CBcvBZtf.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/jetbrains-mono-cyrillic-wght-normal-D73BlboJ.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/jetbrains-mono-greek-wght-normal-Bw9x6K1M.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/jetbrains-mono-latin-ext-wght-normal-DBQx-q_a.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/jetbrains-mono-latin-wght-normal-B9CIFXIH.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/jetbrains-mono-vietnamese-wght-normal-Bt-aOZkq.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/space-grotesk-latin-ext-wght-normal-D9tNdqV9.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/space-grotesk-latin-wght-normal-BhU9QXUp.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/src/datadoom/webdist/assets/space-grotesk-vietnamese-wght-normal-D0rl6rjA.woff2 +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/api/conftest.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/api/test_api.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/determinism/test_determinism.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/golden/checksums.json +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/golden/fraud_numeric.datadoom.yaml +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/perf/test_perf_budget.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/plugin_contract/test_plugins.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_adapters.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_audit.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_causal.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_dataset_audit.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_difficulty.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_difficulty_audit.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_dist.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_dist_correctness.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_export.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_export_formats.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_failure.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_hashing.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_latent.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_metadata.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_pipeline.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_profile.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_providers.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_reference.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_rng.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_spec_validate.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_store.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_templates.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_timeseries.py +0 -0
- {datadoom-0.1.0.dev0 → datadoom-0.1.1}/tests/unit/test_version.py +0 -0
|
@@ -111,6 +111,12 @@ jobs:
|
|
|
111
111
|
name: dist
|
|
112
112
|
path: dist
|
|
113
113
|
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
114
|
+
with:
|
|
115
|
+
# PyPI versions are immutable: a re-run (or a tag pointing at an already
|
|
116
|
+
# published version) would otherwise fail with "File already exists".
|
|
117
|
+
# Skip files already on PyPI so re-runs are idempotent; publishing NEW
|
|
118
|
+
# content still requires bumping src/datadoom/version.py.
|
|
119
|
+
skip-existing: true
|
|
114
120
|
|
|
115
121
|
docker:
|
|
116
122
|
name: build + push image (GHCR)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
C:\Users\santh\AppData\Local\Temp\claude\tmp5pkupaok
|
|
1
|
+
C:\Users\santh\AppData\Local\Temp\claude\tmp5pkupaok
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: datadoom
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.1
|
|
4
4
|
Summary: Local-first, open-source engine for controllable, reproducible synthetic data.
|
|
5
5
|
Project-URL: Homepage, https://github.com/SanthoshReddy352/datadoom
|
|
6
6
|
Project-URL: Repository, https://github.com/SanthoshReddy352/datadoom
|
|
@@ -80,12 +80,53 @@ difficulty, and failure modes — and regenerate it identically, forever, from a
|
|
|
80
80
|
📖 **Docs:** <https://santhoshreddy352.github.io/datadoom/> · authoritative design in
|
|
81
81
|
[`docs_v2/`](docs_v2/) (start at [`docs_v2/00_README_Index.md`](docs_v2/00_README_Index.md)).
|
|
82
82
|
|
|
83
|
+
## Why DataDoom
|
|
84
|
+
|
|
85
|
+
Synthetic data usually forces a trade-off: it's either **realistic but a black box**
|
|
86
|
+
(you can't say what relationships or flaws it contains) or **controllable but
|
|
87
|
+
throwaway** (you can't regenerate the exact same dataset tomorrow). That makes it hard
|
|
88
|
+
to teach with, benchmark against, file a bug against, or share.
|
|
89
|
+
|
|
90
|
+
**The goal:** make a dataset something you *design* and *version-control like source
|
|
91
|
+
code*. You declare its structure — distributions, causal relationships, difficulty,
|
|
92
|
+
and data-quality failures — in one spec file, and DataDoom regenerates it
|
|
93
|
+
**byte-for-byte identically** from `(spec_hash, seed)`, while honestly reporting how
|
|
94
|
+
well the realized data matches what you asked for. No network, no telemetry, no
|
|
95
|
+
account: everything runs locally.
|
|
96
|
+
|
|
97
|
+
**Good for:** ML teaching & reproducible benchmarks · testing data pipelines on known
|
|
98
|
+
edge cases · sharing a dataset's *recipe* instead of PII · hackathon / challenge
|
|
99
|
+
datasets with a known ground truth.
|
|
100
|
+
|
|
101
|
+
## What it does
|
|
102
|
+
|
|
103
|
+
- **Deterministic by construction** — one seeded RNG underpins everything; the same
|
|
104
|
+
spec + seed yields a bitwise-identical dataset on the pinned path.
|
|
105
|
+
- **Honest statistics** — distributions are sampled correctly and their fit is
|
|
106
|
+
*reported* (KS / chi-square goodness-of-fit, compliance score); parameters are never
|
|
107
|
+
refit to flatter the sample.
|
|
108
|
+
- **Causal structure** — a DAG of structural equations (linear/logistic/polynomial/…)
|
|
109
|
+
with per-node noise and `do()` interventions, plus a true-graph + mutual-information
|
|
110
|
+
report.
|
|
111
|
+
- **Failure injection** — eight mechanisms (MCAR/MAR/MNAR, label & feature noise,
|
|
112
|
+
drift, covariate shift, leakage) corrupt a *copy* while the clean baseline is kept,
|
|
113
|
+
with realized-effect diffs.
|
|
114
|
+
- **Difficulty targeting** — calibrate a binary label to a chosen baseline-model AUROC
|
|
115
|
+
band, reported with the achieved metric, knobs, and bisection trace.
|
|
116
|
+
- **Rich feature types** — numeric/categorical/boolean/datetime, realistic seeded text
|
|
117
|
+
(names, emails, addresses), additive time-series, and latent (hidden) features.
|
|
118
|
+
- **Extensible** — distributions, structural functions, failure modes, exporters, and
|
|
119
|
+
probes all ship as plugins against the engine ABCs, with zero core changes.
|
|
120
|
+
- **Built to consume** — export CSV / JSON / Parquet, load a run straight into
|
|
121
|
+
pandas / PyTorch / TensorFlow / HuggingFace, and start from built-in domain templates
|
|
122
|
+
(including ready-made hackathon challenges).
|
|
123
|
+
- **Two surfaces, one engine** — a CLI for automation and a web Canvas for design both
|
|
124
|
+
call the exact same pipeline, so results never diverge.
|
|
125
|
+
|
|
83
126
|
## Status
|
|
84
127
|
|
|
85
|
-
**Phases 0–5 complete; 1.0 hardening underway.**
|
|
86
|
-
|
|
87
|
-
exporters, templates, time-series, framework adapters, and the AI spec-authoring
|
|
88
|
-
manifest all ship. Remaining for 1.0 is hardening (docs, release automation, the repro
|
|
128
|
+
**Phases 0–5 complete; 1.0 hardening underway.** Everything in *What it does* above
|
|
129
|
+
ships today. Remaining for 1.0 is hardening (docs site, release automation, the repro
|
|
89
130
|
matrix); see [`status.md`](status.md). Optional team mode is a deferred future addon.
|
|
90
131
|
|
|
91
132
|
## Install
|
|
@@ -110,18 +151,73 @@ datadoom verify examples/causal-fraud.datadoom.yaml --seed 42 --against out/
|
|
|
110
151
|
|
|
111
152
|
# start from a built-in domain template
|
|
112
153
|
datadoom template use fraud-detection --out my.datadoom.yaml
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Web UI (Canvas)
|
|
157
|
+
|
|
158
|
+
The web Canvas — design schemas, wire causal graphs, configure difficulty/failures,
|
|
159
|
+
generate with a live tracker, preview/compare/export — ships **prebuilt inside the
|
|
160
|
+
package** (no Node toolchain needed). There are two ways to run it.
|
|
161
|
+
|
|
162
|
+
### Option A — pip + `datadoom serve`
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
pip install "datadoom[server]" # the [server] extra adds FastAPI/uvicorn
|
|
166
|
+
datadoom serve # serves the API + Canvas on http://127.0.0.1:8000
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Then open <http://127.0.0.1:8000> in your browser. `datadoom serve` is what starts
|
|
170
|
+
the UI — installing the package alone does not run a server.
|
|
171
|
+
|
|
172
|
+
> **Hitting `The web server needs extra deps … pip install 'datadoom[server]'`
|
|
173
|
+
> even after installing it?** You almost certainly have an older `datadoom` already
|
|
174
|
+
> installed, so pip reports "already satisfied" and never pulls the `[server]`
|
|
175
|
+
> dependencies. Force a clean reinstall:
|
|
176
|
+
> ```bash
|
|
177
|
+
> pip install --upgrade --force-reinstall --no-cache-dir "datadoom[server]"
|
|
178
|
+
> ```
|
|
179
|
+
|
|
180
|
+
### Option B — Docker (UI starts automatically)
|
|
181
|
+
|
|
182
|
+
The image's entrypoint **is** `datadoom serve`, so the Canvas comes up as soon as
|
|
183
|
+
the container runs — you do **not** run any extra command.
|
|
113
184
|
|
|
114
|
-
|
|
115
|
-
|
|
185
|
+
**Build and run from a clone (works today):**
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
docker build -t datadoom:local .
|
|
189
|
+
docker run --rm -p 8000:8000 -v datadoom-data:/data datadoom:local
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
**Or pull the published image** (available after a tagged release pushes it to
|
|
193
|
+
GHCR — see [`docs_v2/22`](docs_v2/22_Release_and_Publishing_Runbook.md) §3):
|
|
194
|
+
|
|
195
|
+
```bash
|
|
196
|
+
docker run --rm -p 8000:8000 -v datadoom-data:/data ghcr.io/santhoshreddy352/datadoom:latest
|
|
116
197
|
```
|
|
117
198
|
|
|
199
|
+
> Each `docker run` is a **single line** on purpose — it works in PowerShell, CMD,
|
|
200
|
+
> and bash alike. A `\` line-continuation is bash-only and breaks in PowerShell.
|
|
201
|
+
|
|
202
|
+
Open <http://localhost:8000>. The `-v datadoom-data:/data` volume persists your
|
|
203
|
+
datasets/runs across restarts; the server binds `0.0.0.0:8000` inside the container.
|
|
204
|
+
|
|
118
205
|
## Development
|
|
119
206
|
|
|
207
|
+
Clone the repo (or **fork** it first on GitHub and clone your fork if you intend to
|
|
208
|
+
open a pull request), then set up a project-local virtual environment:
|
|
209
|
+
|
|
120
210
|
```bash
|
|
211
|
+
# clone (use your fork's URL if you forked)
|
|
212
|
+
git clone https://github.com/SanthoshReddy352/datadoom.git
|
|
213
|
+
cd datadoom
|
|
214
|
+
|
|
215
|
+
# project-local venv (Python 3.11 matches CI's lowest supported version)
|
|
121
216
|
python -m venv .venv
|
|
122
217
|
.venv\Scripts\activate # Windows
|
|
123
218
|
# source .venv/bin/activate # macOS/Linux
|
|
124
|
-
|
|
219
|
+
|
|
220
|
+
pip install -e ".[dev]" # editable install + dev tools
|
|
125
221
|
|
|
126
222
|
ruff check src tests # lint
|
|
127
223
|
lint-imports # architecture boundaries
|
|
@@ -129,6 +225,9 @@ mypy # type-check
|
|
|
129
225
|
pytest # test suite
|
|
130
226
|
```
|
|
131
227
|
|
|
228
|
+
Contributions are welcome — please commit with DCO sign-off (`git commit -s`) and run
|
|
229
|
+
the gates above before opening a PR. See [`CONTRIBUTING.md`](CONTRIBUTING.md).
|
|
230
|
+
|
|
132
231
|
## The reproducibility guarantee (scoped)
|
|
133
232
|
|
|
134
233
|
Given the same spec and seed, on the **pinned path** (single-threaded BLAS, pinned
|
datadoom-0.1.1/README.md
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# DataDoom
|
|
2
|
+
|
|
3
|
+
**Local-first, open-source engine for controllable, reproducible synthetic data.**
|
|
4
|
+
|
|
5
|
+
[](https://github.com/SanthoshReddy352/datadoom/actions/workflows/ci.yml)
|
|
6
|
+
[](https://github.com/SanthoshReddy352/datadoom/actions/workflows/repro-matrix.yml)
|
|
7
|
+
[](https://santhoshreddy352.github.io/datadoom/)
|
|
8
|
+
[](https://www.python.org/)
|
|
9
|
+
[](LICENSE)
|
|
10
|
+
|
|
11
|
+
Design the dataset the way you reason about it — distributions, causal relationships,
|
|
12
|
+
difficulty, and failure modes — and regenerate it identically, forever, from a single spec file.
|
|
13
|
+
|
|
14
|
+
> **North star:** a synthetic dataset should be as version-controllable, shareable, and
|
|
15
|
+
> reproducible as source code.
|
|
16
|
+
|
|
17
|
+
📖 **Docs:** <https://santhoshreddy352.github.io/datadoom/> · authoritative design in
|
|
18
|
+
[`docs_v2/`](docs_v2/) (start at [`docs_v2/00_README_Index.md`](docs_v2/00_README_Index.md)).
|
|
19
|
+
|
|
20
|
+
## Why DataDoom
|
|
21
|
+
|
|
22
|
+
Synthetic data usually forces a trade-off: it's either **realistic but a black box**
|
|
23
|
+
(you can't say what relationships or flaws it contains) or **controllable but
|
|
24
|
+
throwaway** (you can't regenerate the exact same dataset tomorrow). That makes it hard
|
|
25
|
+
to teach with, benchmark against, file a bug against, or share.
|
|
26
|
+
|
|
27
|
+
**The goal:** make a dataset something you *design* and *version-control like source
|
|
28
|
+
code*. You declare its structure — distributions, causal relationships, difficulty,
|
|
29
|
+
and data-quality failures — in one spec file, and DataDoom regenerates it
|
|
30
|
+
**byte-for-byte identically** from `(spec_hash, seed)`, while honestly reporting how
|
|
31
|
+
well the realized data matches what you asked for. No network, no telemetry, no
|
|
32
|
+
account: everything runs locally.
|
|
33
|
+
|
|
34
|
+
**Good for:** ML teaching & reproducible benchmarks · testing data pipelines on known
|
|
35
|
+
edge cases · sharing a dataset's *recipe* instead of PII · hackathon / challenge
|
|
36
|
+
datasets with a known ground truth.
|
|
37
|
+
|
|
38
|
+
## What it does
|
|
39
|
+
|
|
40
|
+
- **Deterministic by construction** — one seeded RNG underpins everything; the same
|
|
41
|
+
spec + seed yields a bitwise-identical dataset on the pinned path.
|
|
42
|
+
- **Honest statistics** — distributions are sampled correctly and their fit is
|
|
43
|
+
*reported* (KS / chi-square goodness-of-fit, compliance score); parameters are never
|
|
44
|
+
refit to flatter the sample.
|
|
45
|
+
- **Causal structure** — a DAG of structural equations (linear/logistic/polynomial/…)
|
|
46
|
+
with per-node noise and `do()` interventions, plus a true-graph + mutual-information
|
|
47
|
+
report.
|
|
48
|
+
- **Failure injection** — eight mechanisms (MCAR/MAR/MNAR, label & feature noise,
|
|
49
|
+
drift, covariate shift, leakage) corrupt a *copy* while the clean baseline is kept,
|
|
50
|
+
with realized-effect diffs.
|
|
51
|
+
- **Difficulty targeting** — calibrate a binary label to a chosen baseline-model AUROC
|
|
52
|
+
band, reported with the achieved metric, knobs, and bisection trace.
|
|
53
|
+
- **Rich feature types** — numeric/categorical/boolean/datetime, realistic seeded text
|
|
54
|
+
(names, emails, addresses), additive time-series, and latent (hidden) features.
|
|
55
|
+
- **Extensible** — distributions, structural functions, failure modes, exporters, and
|
|
56
|
+
probes all ship as plugins against the engine ABCs, with zero core changes.
|
|
57
|
+
- **Built to consume** — export CSV / JSON / Parquet, load a run straight into
|
|
58
|
+
pandas / PyTorch / TensorFlow / HuggingFace, and start from built-in domain templates
|
|
59
|
+
(including ready-made hackathon challenges).
|
|
60
|
+
- **Two surfaces, one engine** — a CLI for automation and a web Canvas for design both
|
|
61
|
+
call the exact same pipeline, so results never diverge.
|
|
62
|
+
|
|
63
|
+
## Status
|
|
64
|
+
|
|
65
|
+
**Phases 0–5 complete; 1.0 hardening underway.** Everything in *What it does* above
|
|
66
|
+
ships today. Remaining for 1.0 is hardening (docs site, release automation, the repro
|
|
67
|
+
matrix); see [`status.md`](status.md). Optional team mode is a deferred future addon.
|
|
68
|
+
|
|
69
|
+
## Install
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install datadoom # engine + CLI
|
|
73
|
+
pip install "datadoom[server]" # + web Canvas (datadoom serve)
|
|
74
|
+
pip install "datadoom[parquet]" # + Parquet export
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
## Quickstart
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# generate a dataset from a spec
|
|
81
|
+
datadoom run examples/causal-fraud.datadoom.yaml --seed 42 --out out/
|
|
82
|
+
|
|
83
|
+
# validate a spec
|
|
84
|
+
datadoom validate examples/causal-fraud.datadoom.yaml
|
|
85
|
+
|
|
86
|
+
# verify a run reproduces bitwise from spec + seed
|
|
87
|
+
datadoom verify examples/causal-fraud.datadoom.yaml --seed 42 --against out/
|
|
88
|
+
|
|
89
|
+
# start from a built-in domain template
|
|
90
|
+
datadoom template use fraud-detection --out my.datadoom.yaml
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Web UI (Canvas)
|
|
94
|
+
|
|
95
|
+
The web Canvas — design schemas, wire causal graphs, configure difficulty/failures,
|
|
96
|
+
generate with a live tracker, preview/compare/export — ships **prebuilt inside the
|
|
97
|
+
package** (no Node toolchain needed). There are two ways to run it.
|
|
98
|
+
|
|
99
|
+
### Option A — pip + `datadoom serve`
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
pip install "datadoom[server]" # the [server] extra adds FastAPI/uvicorn
|
|
103
|
+
datadoom serve # serves the API + Canvas on http://127.0.0.1:8000
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Then open <http://127.0.0.1:8000> in your browser. `datadoom serve` is what starts
|
|
107
|
+
the UI — installing the package alone does not run a server.
|
|
108
|
+
|
|
109
|
+
> **Hitting `The web server needs extra deps … pip install 'datadoom[server]'`
|
|
110
|
+
> even after installing it?** You almost certainly have an older `datadoom` already
|
|
111
|
+
> installed, so pip reports "already satisfied" and never pulls the `[server]`
|
|
112
|
+
> dependencies. Force a clean reinstall:
|
|
113
|
+
> ```bash
|
|
114
|
+
> pip install --upgrade --force-reinstall --no-cache-dir "datadoom[server]"
|
|
115
|
+
> ```
|
|
116
|
+
|
|
117
|
+
### Option B — Docker (UI starts automatically)
|
|
118
|
+
|
|
119
|
+
The image's entrypoint **is** `datadoom serve`, so the Canvas comes up as soon as
|
|
120
|
+
the container runs — you do **not** run any extra command.
|
|
121
|
+
|
|
122
|
+
**Build and run from a clone (works today):**
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
docker build -t datadoom:local .
|
|
126
|
+
docker run --rm -p 8000:8000 -v datadoom-data:/data datadoom:local
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
**Or pull the published image** (available after a tagged release pushes it to
|
|
130
|
+
GHCR — see [`docs_v2/22`](docs_v2/22_Release_and_Publishing_Runbook.md) §3):
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
docker run --rm -p 8000:8000 -v datadoom-data:/data ghcr.io/santhoshreddy352/datadoom:latest
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
> Each `docker run` is a **single line** on purpose — it works in PowerShell, CMD,
|
|
137
|
+
> and bash alike. A `\` line-continuation is bash-only and breaks in PowerShell.
|
|
138
|
+
|
|
139
|
+
Open <http://localhost:8000>. The `-v datadoom-data:/data` volume persists your
|
|
140
|
+
datasets/runs across restarts; the server binds `0.0.0.0:8000` inside the container.
|
|
141
|
+
|
|
142
|
+
## Development
|
|
143
|
+
|
|
144
|
+
Clone the repo (or **fork** it first on GitHub and clone your fork if you intend to
|
|
145
|
+
open a pull request), then set up a project-local virtual environment:
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
# clone (use your fork's URL if you forked)
|
|
149
|
+
git clone https://github.com/SanthoshReddy352/datadoom.git
|
|
150
|
+
cd datadoom
|
|
151
|
+
|
|
152
|
+
# project-local venv (Python 3.11 matches CI's lowest supported version)
|
|
153
|
+
python -m venv .venv
|
|
154
|
+
.venv\Scripts\activate # Windows
|
|
155
|
+
# source .venv/bin/activate # macOS/Linux
|
|
156
|
+
|
|
157
|
+
pip install -e ".[dev]" # editable install + dev tools
|
|
158
|
+
|
|
159
|
+
ruff check src tests # lint
|
|
160
|
+
lint-imports # architecture boundaries
|
|
161
|
+
mypy # type-check
|
|
162
|
+
pytest # test suite
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
Contributions are welcome — please commit with DCO sign-off (`git commit -s`) and run
|
|
166
|
+
the gates above before opening a PR. See [`CONTRIBUTING.md`](CONTRIBUTING.md).
|
|
167
|
+
|
|
168
|
+
## The reproducibility guarantee (scoped)
|
|
169
|
+
|
|
170
|
+
Given the same spec and seed, on the **pinned path** (single-threaded BLAS, pinned
|
|
171
|
+
library versions, CPU, same OS/arch), DataDoom produces a **bitwise-identical** dataset.
|
|
172
|
+
Across different OS/architectures we guarantee **statistical** — not bitwise —
|
|
173
|
+
equivalence (FP reductions differ). The cross-OS × cross-Python reproducibility matrix
|
|
174
|
+
enforces this in CI. See
|
|
175
|
+
[`docs_v2/13_Testing_and_Reproducibility_Strategy.md`](docs_v2/13_Testing_and_Reproducibility_Strategy.md).
|
|
176
|
+
|
|
177
|
+
## License
|
|
178
|
+
|
|
179
|
+
[Apache-2.0](LICENSE).
|
|
@@ -46,6 +46,7 @@ This `docs_v2/` set is the **authoritative design** for DataDoom. It supersedes
|
|
|
46
46
|
| 20 | **YAML_Authoring_Guide** | Beginner-friendly, end-to-end guide to writing a spec by hand | `20_YAML_Authoring_Guide.md` |
|
|
47
47
|
| 21 | **LLM_Spec_Authoring_Reference** | Terse authoring contract optimized for AI/agent spec generation | `21_LLM_Spec_Authoring_Reference.md` |
|
|
48
48
|
| 22 | **Release_and_Publishing_Runbook** | Operator steps to publish docs/PyPI/Docker/releases (Pages, OIDC, provenance) | `22_Release_and_Publishing_Runbook.md` |
|
|
49
|
+
| 23 | **Pushing_Changes_DCO_and_Versioning** | How to push to GitHub, sign off commits (DCO), bump versions, cut a release | `23_Pushing_Changes_DCO_and_Versioning.md` |
|
|
49
50
|
|
|
50
51
|
### Legacy docs intentionally **dropped** (SaaS/microservices-only, not part of DataDoom v2 core)
|
|
51
52
|
`Advanced_System_Documents.md`, `Kafka_Event_Contract.md`, `Portobuff_Definitions.md`, `Mulit_Tenant_Isolation.md`, `SLA_SLO_RateLimiting.md`, `Infrastructure_Summary.md` — their still-relevant fragments are folded into 03, 06, 07, 14, 15. Kafka, gRPC/Protobuf, multi-tenant RLS, SLA tiers, and dedicated infra are **out of the core** (may return later as optional editions/plugins).
|
|
@@ -85,6 +85,20 @@ is tag-triggered: publishing is **tokenless** via PyPI Trusted Publishing (OIDC)
|
|
|
85
85
|
build **provenance** (see §4), creates the GitHub Release, and publishes to
|
|
86
86
|
PyPI via OIDC. No token needed.
|
|
87
87
|
|
|
88
|
+
> **PyPI versions are immutable.** Every release must use a **new** version in
|
|
89
|
+
> `src/datadoom/version.py` — you cannot re-upload an existing one. The publish
|
|
90
|
+
> step sets `skip-existing: true`, so a *re-run* of an already-published version
|
|
91
|
+
> succeeds as a no-op (instead of the `400 File already exists` error) — but to
|
|
92
|
+
> ship new content you must bump the version. `pip install datadoom` ignores
|
|
93
|
+
> `.devN` pre-releases unless they are the only versions available, so cut a
|
|
94
|
+
> non-dev version (e.g. `0.1.0`) for the first public release.
|
|
95
|
+
|
|
96
|
+
> **User reports `datadoom serve` says it needs `[server]` even after installing
|
|
97
|
+
> it?** That's a *stale local install*, not a packaging bug — pip sees the version
|
|
98
|
+
> "already satisfied" and skips the extra's deps. Tell them:
|
|
99
|
+
> `pip install --upgrade --force-reinstall --no-cache-dir "datadoom[server]"`.
|
|
100
|
+
> (The published wheel does carry the `server` extra and the bundled web Canvas.)
|
|
101
|
+
|
|
88
102
|
**Manual fallback (if not yet automated):**
|
|
89
103
|
|
|
90
104
|
```bash
|
|
@@ -144,9 +158,9 @@ There is intentionally **no private key checked into or required by the repo**.
|
|
|
144
158
|
|
|
145
159
|
---
|
|
146
160
|
|
|
147
|
-
## 5. Reproducibility & CI badges
|
|
161
|
+
## 5. Reproducibility & CI badges 🟢 ready
|
|
148
162
|
|
|
149
|
-
The README badges (CI
|
|
163
|
+
The README badges (CI, Reproducibility Matrix, Docs) point at the existing
|
|
150
164
|
workflows and need no setup beyond the workflows running at least once on `main`.
|
|
151
165
|
The repro matrix pins numpy in its CI cells so the golden-checksum gate actually
|
|
152
166
|
asserts (rather than skips); see
|
|
@@ -151,7 +151,7 @@ export function GenerationsPanel({
|
|
|
151
151
|
</Button>
|
|
152
152
|
</a>
|
|
153
153
|
)}
|
|
154
|
-
<Menu trigger={({ toggle }) => <IconButton onClick={toggle}><MoreHorizontal size={16} /></IconButton>}>
|
|
154
|
+
<Menu trigger={({ toggle }) => <IconButton onClick={toggle} aria-label="More actions"><MoreHorizontal size={16} /></IconButton>}>
|
|
155
155
|
{(close) => (
|
|
156
156
|
<>
|
|
157
157
|
<MenuItem icon={<Pencil size={14} />} onClick={() => { close(); setRenaming(r); }}>
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"root":["./src/App.tsx","./src/main.tsx","./src/components/CausalGraphEditor.tsx","./src/components/CausalGraphView.tsx","./src/components/CausalInspector.tsx","./src/components/ColumnGuideView.tsx","./src/components/ComparisonView.tsx","./src/components/ConfirmHost.tsx","./src/components/DifficultyConfigurator.tsx","./src/components/DifficultyView.tsx","./src/components/ErrorBoundary.tsx","./src/components/ExportModal.tsx","./src/components/FailureBadges.tsx","./src/components/FailureConfigurator.tsx","./src/components/FailureInspector.tsx","./src/components/GenerationsPanel.tsx","./src/components/Histogram.tsx","./src/components/Inspector.tsx","./src/components/Layout.tsx","./src/components/Modal.tsx","./src/components/OverviewView.tsx","./src/components/SpecDrawer.tsx","./src/components/StageStepper.tsx","./src/components/TableCanvas.tsx","./src/components/Toaster.tsx","./src/components/ui.tsx","./src/lib/api.ts","./src/lib/audit.ts","./src/lib/causal.ts","./src/lib/clsx.ts","./src/lib/difficulty.ts","./src/lib/failures.ts","./src/lib/runSocket.ts","./src/lib/sampling.ts","./src/lib/schemaForm.tsx","./src/lib/specDefaults.ts","./src/lib/summary.ts","./src/lib/types.ts","./src/lib/useHistory.ts","./src/lib/viewLayout.ts","./src/pages/Canvas.tsx","./src/pages/Dashboard.tsx","./src/pages/Placeholder.tsx","./src/pages/Plugins.tsx","./src/pages/Results.tsx","./src/pages/Templates.tsx","./src/pages/Tracker.tsx","./src/store/chrome.ts","./src/store/confirm.ts","./src/store/toast.ts","./src/store/ui.ts"],"version":"5.9.3"}
|
|
@@ -102,8 +102,16 @@ def serve(
|
|
|
102
102
|
bind_port = port or cfg.port
|
|
103
103
|
cfg.ensure_dirs()
|
|
104
104
|
|
|
105
|
+
# 0.0.0.0 / :: are bind-all addresses, not browsable URLs — show a link the
|
|
106
|
+
# user can actually click (e.g. inside Docker, where we bind 0.0.0.0).
|
|
107
|
+
browse_host = "localhost" if bind_host in ("0.0.0.0", "::", "[::]") else bind_host
|
|
105
108
|
typer.secho(
|
|
106
|
-
f"DataDoom
|
|
109
|
+
f"DataDoom — open the web Canvas at http://{browse_host}:{bind_port}",
|
|
110
|
+
fg=typer.colors.GREEN,
|
|
111
|
+
bold=True,
|
|
112
|
+
)
|
|
113
|
+
typer.secho(
|
|
114
|
+
f" (bound to {bind_host}:{bind_port} · data: {cfg.home} · Ctrl+C to stop)",
|
|
107
115
|
fg=typer.colors.GREEN,
|
|
108
116
|
)
|
|
109
117
|
if reload:
|