dataenginex 0.10.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/ci.yml +23 -1
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/docs-notify.yml +1 -1
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.gitignore +1 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/CHANGELOG.md +39 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/CLAUDE.md +1 -1
- dataenginex-1.0.2/CODE_OF_CONDUCT.md +57 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/PKG-INFO +20 -16
- {dataenginex-0.10.0 → dataenginex-1.0.2}/README.md +1 -1
- dataenginex-1.0.2/SECURITY.md +71 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/ci-cd.md +1 -1
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/index.md +1 -1
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/observability.md +1 -1
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/sdlc.md +1 -1
- {dataenginex-0.10.0 → dataenginex-1.0.2}/pyproject.toml +44 -24
- dataenginex-1.0.2/src/dataenginex/ai/__init__.py +79 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ai/agents/builtin.py +10 -0
- dataenginex-1.0.2/src/dataenginex/ai/memory/__init__.py +16 -0
- dataenginex-1.0.2/src/dataenginex/ai/memory/base.py +59 -0
- dataenginex-1.0.2/src/dataenginex/ai/memory/episodic.py +40 -0
- dataenginex-1.0.2/src/dataenginex/ai/memory/long_term.py +52 -0
- dataenginex-1.0.2/src/dataenginex/ai/observability/__init__.py +19 -0
- dataenginex-1.0.2/src/dataenginex/ai/observability/audit.py +34 -0
- dataenginex-1.0.2/src/dataenginex/ai/observability/cost.py +57 -0
- dataenginex-1.0.2/src/dataenginex/ai/observability/langfuse.py +191 -0
- dataenginex-1.0.2/src/dataenginex/ai/observability/metrics.py +26 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ai/retrieval/__init__.py +3 -0
- dataenginex-1.0.2/src/dataenginex/ai/retrieval/graph.py +159 -0
- dataenginex-1.0.2/src/dataenginex/ai/routing/__init__.py +7 -0
- dataenginex-1.0.2/src/dataenginex/ai/routing/anthropic.py +58 -0
- dataenginex-1.0.2/src/dataenginex/ai/routing/huggingface.py +36 -0
- dataenginex-1.0.2/src/dataenginex/ai/routing/ollama.py +48 -0
- dataenginex-1.0.2/src/dataenginex/ai/routing/openai.py +58 -0
- dataenginex-1.0.2/src/dataenginex/ai/routing/router.py +56 -0
- dataenginex-1.0.2/src/dataenginex/ai/runtime/__init__.py +27 -0
- dataenginex-1.0.2/src/dataenginex/ai/runtime/checkpoint.py +31 -0
- dataenginex-1.0.2/src/dataenginex/ai/runtime/executor.py +173 -0
- dataenginex-1.0.2/src/dataenginex/ai/runtime/sandbox.py +220 -0
- dataenginex-1.0.2/src/dataenginex/ai/workflows/__init__.py +9 -0
- dataenginex-1.0.2/src/dataenginex/ai/workflows/conditions.py +48 -0
- dataenginex-1.0.2/src/dataenginex/ai/workflows/dag.py +124 -0
- dataenginex-1.0.2/src/dataenginex/ai/workflows/human_loop.py +47 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/__init__.py +13 -1
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/auth.py +64 -4
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/factory.py +8 -0
- dataenginex-1.0.2/src/dataenginex/api/jwks.py +155 -0
- dataenginex-1.0.2/src/dataenginex/api/ldap_sync.py +170 -0
- dataenginex-1.0.2/src/dataenginex/api/rbac.py +91 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/ai.py +10 -2
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/ml.py +25 -4
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/pipelines.py +20 -4
- dataenginex-1.0.2/src/dataenginex/api/scim.py +391 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/pipeline/run_history.py +1 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/pipeline/runner.py +11 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/quality/gates.py +1 -2
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/quality/spark.py +1 -2
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/middleware/__init__.py +21 -0
- dataenginex-1.0.2/src/dataenginex/middleware/domain_metrics.py +77 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/llm.py +158 -2
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/scheduler.py +7 -0
- dataenginex-1.0.2/tests/integration/test_ai_integration.py +427 -0
- dataenginex-1.0.2/tests/integration/test_api_middleware_integration.py +375 -0
- dataenginex-1.0.2/tests/integration/test_lineage_integration.py +391 -0
- dataenginex-1.0.2/tests/integration/test_ml_integration.py +277 -0
- dataenginex-1.0.2/tests/integration/test_secops_integration.py +316 -0
- dataenginex-1.0.2/tests/unit/test_ai_modules.py +755 -0
- dataenginex-1.0.2/tests/unit/test_ai_router_extended.py +273 -0
- dataenginex-1.0.2/tests/unit/test_api_jwks.py +179 -0
- dataenginex-1.0.2/tests/unit/test_api_rbac.py +109 -0
- dataenginex-1.0.2/tests/unit/test_api_scim.py +162 -0
- dataenginex-1.0.2/tests/unit/test_domain_metrics.py +98 -0
- dataenginex-1.0.2/tests/unit/test_domain_metrics_wiring.py +138 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_llm.py +24 -4
- dataenginex-1.0.2/tests/unit/test_llm_litellm_vllm.py +102 -0
- dataenginex-1.0.2/tests/unit/test_observability_langfuse.py +116 -0
- dataenginex-1.0.2/tests/unit/test_pipeline_router_extended.py +247 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_quality_gates.py +4 -12
- dataenginex-1.0.2/tests/unit/test_retriever_graph.py +89 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_run_history.py +1 -0
- dataenginex-1.0.2/tests/unit/test_security_extended.py +379 -0
- dataenginex-1.0.2/uv.lock +4929 -0
- dataenginex-0.10.0/src/dataenginex/ai/__init__.py +0 -13
- dataenginex-0.10.0/uv.lock +0 -4387
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.claude/commands/new-feature.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.claude/commands/validate.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.claude/settings.json +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.env.template +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/dependabot.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/labels.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/release-pr-template.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/auto-pr-dev-to-main.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/auto-pr-to-dev.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/claude.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/docker-build-push.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/enforce-dev-to-main.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/label-sync.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/pypi-publish.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/release-dex.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/release-please.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.github/workflows/security.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.gitleaks.toml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.pre-commit-config.yaml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.python-version +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/.release-please-manifest.json +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/CODEOWNERS +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/CONTRIBUTING.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/Dockerfile +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/LICENSE +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docker-compose.test.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/adr/0000-template.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/adr/0001-medallion-architecture.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/api-reference/api.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/api-reference/core.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/api-reference/dashboard.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/api-reference/data.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/api-reference/index.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/api-reference/lakehouse.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/api-reference/middleware.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/api-reference/ml.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/api-reference/plugins.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/api-reference/warehouse.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/architecture.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/contributing.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/development.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/quickstart.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/release-notes.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/roadmap/project-roadmap.csv +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/roadmap/project-roadmap.json +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/security-scanning.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-21-phase-0-foundation.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-22-dataenginex-1.0-master-plan.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-22-phase-1-data-layer.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-22-phase-6a-dex-engine-integration.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-22-phase-6b-dex-studio-redesign.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-23-careerdex-example.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-23-dex-studio-direct-import.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-24-dex-naming-architecture-dry.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-28-unified-docs-and-tooling.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/specs/2026-03-21-dataenginex-v2-system-redesign.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/specs/2026-03-22-phase-6-integration-design.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/specs/2026-03-23-dex-studio-direct-import-design.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/specs/2026-03-24-dex-naming-architecture-dry-design.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/docs/superpowers/specs/2026-03-28-unified-docs-and-tooling-design.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/01_hello_pipeline.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/02_api_quickstart.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/03_quality_gate.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/04_ml_training.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/05_rag_demo.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/06_llm_quickstart.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/07_api_ingestion.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/08_spark_ml.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/09_feature_engineering.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/10_model_analysis.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/GUIDE.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/dashboard/dashboard_config.yaml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/dashboard/run_dashboard.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/dex.yaml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/examples/movies.csv +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/poe_tasks.toml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/release-please-config.json +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/scripts/GUIDE.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/scripts/localstack/create-buckets.sh +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/scripts/promote.sh +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/sonar-project.properties +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/README.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/RELEASE_NOTES.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ai/agents/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ai/retrieval/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ai/tools/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ai/tools/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/errors.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/health.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/pagination.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/rate_limit.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/data.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/health.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/root.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/system.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/api/schemas.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/cli/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/cli/main.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/cli/run.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/cli/serve.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/cli/train.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/config/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/config/defaults.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/config/loader.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/config/schema.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/core/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/core/exceptions.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/core/interfaces.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/core/medallion_architecture.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/core/quality.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/core/registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/core/schemas.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/core/validators.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/dashboard/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/dashboard/app.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/dashboard/panels.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/connectors/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/connectors/csv.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/connectors/duckdb.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/connectors/legacy.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/pipeline/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/pipeline/dag.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/profiler.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/quality/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/transforms/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/data/transforms/sql.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/lakehouse/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/lakehouse/catalog.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/lakehouse/partitioning.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/lakehouse/storage.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/middleware/logging_config.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/middleware/metrics.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/middleware/metrics_middleware.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/middleware/request_logging.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/middleware/tracing.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/drift.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/features/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/features/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/metrics.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/mlflow_registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/serving.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/serving_engine/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/serving_engine/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/tracking/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/tracking/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/training.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/ml/vectorstore.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/orchestration/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/orchestration/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/plugins/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/plugins/registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/py.typed +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/secops/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/secops/audit.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/secops/gate.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/secops/masking.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/secops/pii.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/warehouse/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/warehouse/lineage.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/src/dataenginex/warehouse/transforms.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tasks/findings.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tasks/lessons.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/conformance/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/conformance/test_connector.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/conformance/test_feature_store.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/conformance/test_tracker.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/conformance/test_transform.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/conftest.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/fixtures/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/fixtures/sample_data.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/fixtures/sample_jobs.csv +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/fixtures/sample_jobs.json +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/integration/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/integration/test_cli_run.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/integration/test_config_cli.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/integration/test_full_app.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/integration/test_pipeline_e2e.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/integration/test_storage_real.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/load/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_agent_runtime.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_ai_router.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_api_auth.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_api_factory.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_api_pagination.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_api_rate_limit.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_api_schemas.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_api_validators.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_builtin_agent.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_builtin_feature_store.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_builtin_serving.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_builtin_tracker.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_cli_train.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_config_loader.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_config_schema.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_core_exceptions.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_core_interfaces.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_core_registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_csv_connector.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_data.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_data_router.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_drift_scheduler.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_duckdb_connector.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_errors.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_health.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_lakehouse.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_logging.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_medallion.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_metrics.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_middleware.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_ml.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_ml_router.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_pipeline_dag.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_pipeline_runner.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_plugins.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_quality_spark.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_retriever.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_scheduler.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_secops.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_spark_fixtures.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_sql_transforms.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_storage_abstraction.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_system_router.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_tracing.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_vectorstore.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.2}/tests/unit/test_warehouse.py +0 -0
|
@@ -7,6 +7,10 @@ on:
|
|
|
7
7
|
branches: [main, dev]
|
|
8
8
|
workflow_dispatch:
|
|
9
9
|
|
|
10
|
+
schedule:
|
|
11
|
+
# Weekly Python version compatibility check
|
|
12
|
+
- cron: '0 0 * * 0'
|
|
13
|
+
|
|
10
14
|
permissions:
|
|
11
15
|
contents: read
|
|
12
16
|
|
|
@@ -26,7 +30,7 @@ jobs:
|
|
|
26
30
|
- run: uv run poe quality
|
|
27
31
|
|
|
28
32
|
test:
|
|
29
|
-
name: Tests
|
|
33
|
+
name: Tests (Python 3.13)
|
|
30
34
|
runs-on: ubuntu-latest
|
|
31
35
|
needs: quality
|
|
32
36
|
steps:
|
|
@@ -45,3 +49,21 @@ jobs:
|
|
|
45
49
|
fail_ci_if_error: false
|
|
46
50
|
env:
|
|
47
51
|
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
|
52
|
+
|
|
53
|
+
test-compat:
|
|
54
|
+
name: Python Compatibility
|
|
55
|
+
runs-on: ubuntu-latest
|
|
56
|
+
if: github.event_name == 'schedule'
|
|
57
|
+
strategy:
|
|
58
|
+
matrix:
|
|
59
|
+
python-version: ["3.11", "3.12"]
|
|
60
|
+
steps:
|
|
61
|
+
- uses: actions/checkout@v6
|
|
62
|
+
- uses: astral-sh/setup-uv@v7
|
|
63
|
+
with:
|
|
64
|
+
version: "latest"
|
|
65
|
+
python-version: ${{ matrix.python-version }}
|
|
66
|
+
- run: uv sync --group ml
|
|
67
|
+
env:
|
|
68
|
+
UV_PROJECT_ENVIRONMENT: .venv
|
|
69
|
+
- run: uv run poe check-all
|
|
@@ -28,6 +28,45 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
28
28
|
|
|
29
29
|
## [Unreleased]
|
|
30
30
|
|
|
31
|
+
## [1.0.0] - 2026-04-07
|
|
32
|
+
|
|
33
|
+
### Highlights
|
|
34
|
+
|
|
35
|
+
- **Complete Data + ML + AI Framework**: All phases from the v1.0 master plan implemented — config-driven pipeline via `dex.yaml`, BackendRegistry pattern for swappable backends, unified CLI.
|
|
36
|
+
- **Data Layer**: DuckDB connector (default), CSV connector, PipelineRunner with DAG resolution, transforms (filter, derive, cast, deduplicate), quality gates (completeness, uniqueness), column-level lineage tracking, built-in cron scheduler.
|
|
37
|
+
- **ML Layer**: SQLite-backed experiment tracker, DuckDB feature store, sklearn/xgboost training integration, model registry with versioning (dev → staging → production), built-in model serving via FastAPI, PSI drift detection.
|
|
38
|
+
- **AI Layer**: Built-in ReAct agent runtime, Ollama LLM provider (default), tool registry (sql_query, predict, search), BM25 sparse retrieval (DuckDB FTS), dense vector retrieval (DuckDB VSS HNSW), hybrid retrieval with RRF fusion, agent memory (short-term + episodic).
|
|
39
|
+
- **CLI Commands**: `dex init`, `dex validate`, `dex version`, `dex serve`, `dex run`, `dex train`, `dex agent`, `dex query`.
|
|
40
|
+
- **API**: FastAPI app factory, JWT auth, rate limiting, health endpoints, project CRUD, pipeline run/status, data explorer, ML experiments/models, agent chat/manage, WebSocket for live logs and streaming.
|
|
41
|
+
- **Backend Registry Pattern**: Every subsystem follows ABC + BackendRegistry[T] pattern with built-in implementations and optional extras (Dagster, MLflow, Qdrant, LanceDB, sentence-transformers, PySpark).
|
|
42
|
+
|
|
43
|
+
### Breaking Changes
|
|
44
|
+
|
|
45
|
+
- **FastAPI now optional**: Core install (`pip install dataenginex`) includes only lightweight deps. Install `[api]` extra for FastAPI/uvicorn: `pip install dataenginex[api]`
|
|
46
|
+
- **Cloud SDKs now optional**: Core install no longer requires boto3/google-cloud-storage/google-cloud-bigquery. Install `[cloud]` extra: `pip install dataenginex[cloud]`
|
|
47
|
+
- **Routers moved**: API routers moved to application packages. Use `from dataenginex.api import ...` directly (requires `[api]` extra)
|
|
48
|
+
- **Root `__init__.py` slimmed**: Re-exports removed. Import from submodules directly: `from dataenginex.api import HealthChecker` etc.
|
|
49
|
+
|
|
50
|
+
### Added
|
|
51
|
+
|
|
52
|
+
- **Full project templates**: `dex init --template [minimal|data-pipeline|ml-project|ai-agent|full-stack|career-intelligence]`
|
|
53
|
+
- **Docker support**: Multi-stage Dockerfile (`ghcr.io/thedataenginex/dex`), docker-compose.yml for full stack
|
|
54
|
+
- **SecOps**: PII scanning in pipelines, masking, audit trail
|
|
55
|
+
- **Quality schema**: Spark audit integration for data quality validation
|
|
56
|
+
- **Examples**: 5 runnable examples in `examples/` directory
|
|
57
|
+
|
|
58
|
+
### Verification checklist
|
|
59
|
+
|
|
60
|
+
1. `uv run poe lint` — Ruff checks clean
|
|
61
|
+
2. `uv run poe typecheck` — mypy strict (all modules)
|
|
62
|
+
3. `uv run poe test` — 663 passed, 36 skipped
|
|
63
|
+
4. `pip install dataenginex` — installs successfully
|
|
64
|
+
5. `dex validate dex.yaml` — validates config
|
|
65
|
+
6. `dex version` — shows version
|
|
66
|
+
|
|
67
|
+
[Unreleased]: https://github.com/TheDataEngineX/DEX/compare/v1.0.0...HEAD
|
|
68
|
+
[1.0.0]: https://github.com/TheDataEngineX/DEX/releases/tag/v1.0.0
|
|
69
|
+
|
|
31
70
|
## [0.7.1] - 2026-03-17
|
|
32
71
|
|
|
33
72
|
### Fixed
|
|
@@ -11,7 +11,7 @@ Goal is to save Claude code tokens for lower cost without loosing quality.
|
|
|
11
11
|
|
|
12
12
|
| Package | Location | Purpose |
|
|
13
13
|
|---------|----------|---------|
|
|
14
|
-
| `dataenginex` | `src/dataenginex/` | Core framework — config
|
|
14
|
+
| `dataenginex` | `src/dataenginex/` | Core framework — config, registry, CLI, API, ML, AI (routing, runtime, memory, observability, workflows) |
|
|
15
15
|
|
|
16
16
|
**Stack:** Python 3.13+ · FastAPI · DuckDB · structlog · Pydantic · Click · Rich · uv · Ruff · mypy strict · pytest
|
|
17
17
|
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our
|
|
6
|
+
community a harassment-free experience for everyone, regardless of age, body
|
|
7
|
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
|
8
|
+
identity and expression, level of experience, education, socio-economic status,
|
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity and
|
|
10
|
+
orientation.
|
|
11
|
+
|
|
12
|
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
|
13
|
+
diverse, inclusive, and healthy community.
|
|
14
|
+
|
|
15
|
+
## Our Standards
|
|
16
|
+
|
|
17
|
+
Examples of behavior that contributes to a positive environment:
|
|
18
|
+
|
|
19
|
+
- Using welcoming and inclusive language
|
|
20
|
+
- Being respectful of differing viewpoints and experiences
|
|
21
|
+
- Gracefully accepting constructive criticism
|
|
22
|
+
- Focusing on what is best for the community
|
|
23
|
+
- Showing empathy towards other community members
|
|
24
|
+
|
|
25
|
+
Examples of unacceptable behavior:
|
|
26
|
+
|
|
27
|
+
- The use of sexualized language or imagery, and sexual attention or advances
|
|
28
|
+
- Trolling, insulting or derogatory comments, and personal or political attacks
|
|
29
|
+
- Public or private harassment
|
|
30
|
+
- Publishing others' private information without explicit permission
|
|
31
|
+
- Other conduct which could reasonably be considered inappropriate
|
|
32
|
+
|
|
33
|
+
## Enforcement Responsibilities
|
|
34
|
+
|
|
35
|
+
Community leaders are responsible for clarifying and enforcing our standards of
|
|
36
|
+
acceptable behavior and will take appropriate and fair corrective action in
|
|
37
|
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
|
38
|
+
or harmful.
|
|
39
|
+
|
|
40
|
+
## Scope
|
|
41
|
+
|
|
42
|
+
This Code of Conduct applies within all community spaces, and also applies when
|
|
43
|
+
an individual is officially representing the community in public spaces.
|
|
44
|
+
|
|
45
|
+
## Enforcement
|
|
46
|
+
|
|
47
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
|
48
|
+
reported to the community leaders responsible for enforcement at
|
|
49
|
+
**conduct@thedataenginex.dev**.
|
|
50
|
+
|
|
51
|
+
All complaints will be reviewed and investigated promptly and fairly.
|
|
52
|
+
|
|
53
|
+
## Attribution
|
|
54
|
+
|
|
55
|
+
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org),
|
|
56
|
+
version 2.1, available at
|
|
57
|
+
<https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataenginex
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: DataEngineX - Core framework for AI/ML/Data engineering projects
|
|
5
5
|
Author-email: Jay <jayapal.myaka99@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -26,29 +26,33 @@ License: MIT License
|
|
|
26
26
|
SOFTWARE.
|
|
27
27
|
License-File: LICENSE
|
|
28
28
|
Requires-Python: >=3.13
|
|
29
|
-
Requires-Dist: click>=8.3.
|
|
30
|
-
Requires-Dist: croniter>=6.
|
|
31
|
-
Requires-Dist: duckdb>=1.5.
|
|
29
|
+
Requires-Dist: click>=8.3.2
|
|
30
|
+
Requires-Dist: croniter>=6.2.2
|
|
31
|
+
Requires-Dist: duckdb>=1.5.2
|
|
32
32
|
Requires-Dist: email-validator>=2.3.0
|
|
33
|
-
Requires-Dist: fastapi>=0.
|
|
34
|
-
Requires-Dist: httpx>=0.28.
|
|
35
|
-
Requires-Dist: opentelemetry-api>=1.
|
|
36
|
-
Requires-Dist: opentelemetry-exporter-otlp>=1.
|
|
37
|
-
Requires-Dist: opentelemetry-instrumentation-fastapi>=0.
|
|
38
|
-
Requires-Dist: opentelemetry-sdk>=1.
|
|
39
|
-
Requires-Dist: prometheus-client>=0.
|
|
33
|
+
Requires-Dist: fastapi>=0.136.0
|
|
34
|
+
Requires-Dist: httpx>=0.28.1
|
|
35
|
+
Requires-Dist: opentelemetry-api>=1.41.0
|
|
36
|
+
Requires-Dist: opentelemetry-exporter-otlp>=1.41.0
|
|
37
|
+
Requires-Dist: opentelemetry-instrumentation-fastapi>=0.62b0
|
|
38
|
+
Requires-Dist: opentelemetry-sdk>=1.41.0
|
|
39
|
+
Requires-Dist: prometheus-client>=0.25.0
|
|
40
40
|
Requires-Dist: pyarrow>=23.0.1
|
|
41
|
-
Requires-Dist: pydantic>=2.
|
|
42
|
-
Requires-Dist: python-dotenv>=1.2.
|
|
43
|
-
Requires-Dist: python-json-logger>=4.0.0
|
|
41
|
+
Requires-Dist: pydantic>=2.13.2
|
|
42
|
+
Requires-Dist: python-dotenv>=1.2.2
|
|
44
43
|
Requires-Dist: pyyaml>=6.0.3
|
|
45
|
-
Requires-Dist: rich>=
|
|
44
|
+
Requires-Dist: rich>=15.0.0
|
|
46
45
|
Requires-Dist: structlog>=25.5.0
|
|
47
|
-
Requires-Dist: uvicorn>=0.
|
|
46
|
+
Requires-Dist: uvicorn>=0.44.0
|
|
47
|
+
Provides-Extra: auth
|
|
48
|
+
Requires-Dist: ldap3>=2.9.1; extra == 'auth'
|
|
49
|
+
Requires-Dist: pyjwt[crypto]>=2.9.0; extra == 'auth'
|
|
48
50
|
Provides-Extra: cloud
|
|
49
51
|
Requires-Dist: boto3>=1.42.0; extra == 'cloud'
|
|
50
52
|
Requires-Dist: google-cloud-bigquery>=3.40.0; extra == 'cloud'
|
|
51
53
|
Requires-Dist: google-cloud-storage>=3.0.0; extra == 'cloud'
|
|
54
|
+
Provides-Extra: observability
|
|
55
|
+
Requires-Dist: langfuse>=4.3.1; extra == 'observability'
|
|
52
56
|
Description-Content-Type: text/markdown
|
|
53
57
|
|
|
54
58
|
# dataenginex
|
|
@@ -112,7 +112,7 @@ ______________________________________________________________________
|
|
|
112
112
|
|
|
113
113
|
## Development
|
|
114
114
|
|
|
115
|
-
See [docs/
|
|
115
|
+
See [docs/development.md](docs/development.md) for full setup.
|
|
116
116
|
|
|
117
117
|
```bash
|
|
118
118
|
uv run poe check-all # lint + typecheck + tests
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Supported Versions
|
|
4
|
+
|
|
5
|
+
| Version | Supported |
|
|
6
|
+
|---------|-----------|
|
|
7
|
+
| Latest minor release (1.0.x) | ✅ |
|
|
8
|
+
| Previous minor release | ✅ (security fixes only) |
|
|
9
|
+
| Older versions | ❌ |
|
|
10
|
+
|
|
11
|
+
## Reporting a Vulnerability
|
|
12
|
+
|
|
13
|
+
**Do NOT open a public issue for security vulnerabilities.**
|
|
14
|
+
|
|
15
|
+
Instead, please report them via one of these channels:
|
|
16
|
+
|
|
17
|
+
1. **Email**: security@thedataenginex.dev
|
|
18
|
+
2. **GitHub Security Advisories**: Use the "Report a vulnerability" button on the Security tab
|
|
19
|
+
|
|
20
|
+
### What to Include
|
|
21
|
+
|
|
22
|
+
- Description of the vulnerability
|
|
23
|
+
- Steps to reproduce
|
|
24
|
+
- Potential impact assessment
|
|
25
|
+
- Suggested fix (if any)
|
|
26
|
+
|
|
27
|
+
### Response Timeline
|
|
28
|
+
|
|
29
|
+
| Stage | Timeline |
|
|
30
|
+
|-------|----------|
|
|
31
|
+
| Acknowledgment | Within 48 hours |
|
|
32
|
+
| Initial assessment | Within 5 business days |
|
|
33
|
+
| Fix development | Within 30 days (critical), 90 days (non-critical) |
|
|
34
|
+
| Public disclosure | After fix is released |
|
|
35
|
+
|
|
36
|
+
## Disclosure Policy
|
|
37
|
+
|
|
38
|
+
We follow [coordinated disclosure](https://en.wikipedia.org/wiki/Coordinated_vulnerability_disclosure).
|
|
39
|
+
We will credit reporters in the security advisory unless they prefer to remain anonymous.
|
|
40
|
+
|
|
41
|
+
## Security Practices
|
|
42
|
+
|
|
43
|
+
DataEngineX follows these security practices:
|
|
44
|
+
|
|
45
|
+
- **No hardcoded secrets** — all credentials via environment variables
|
|
46
|
+
- **Parameterized queries** — never SQL concatenation
|
|
47
|
+
- **Input validation** — Pydantic models at API boundaries
|
|
48
|
+
- **Dependency auditing** — automated via `uv run poe security`
|
|
49
|
+
- **Pickle safety** — SafeUnpickler with HMAC verification for model loading
|
|
50
|
+
- **Container security** — non-root users, minimal base images
|
|
51
|
+
- **HTTPS only** — all production traffic encrypted
|
|
52
|
+
- **Least privilege** — minimal permissions for service accounts
|
|
53
|
+
|
|
54
|
+
## Security-Related Dependencies
|
|
55
|
+
|
|
56
|
+
| Dependency | Purpose | Security Note |
|
|
57
|
+
|------------|---------|---------------|
|
|
58
|
+
| pydantic | Config validation | Validates all inputs |
|
|
59
|
+
| python-dotenv | Env var loading | Never commit .env files |
|
|
60
|
+
| httpx | HTTP client | Timeout configured |
|
|
61
|
+
| structlog | Logging | No PII in logs by default |
|
|
62
|
+
|
|
63
|
+
## Auditing
|
|
64
|
+
|
|
65
|
+
Run security audits locally:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
uv run poe security # pip-audit for vulnerabilities
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
CI runs `pip-audit` and dependency scanning on every PR.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "dataenginex"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "1.0.2"
|
|
4
4
|
description = "DataEngineX - Core framework for AI/ML/Data engineering projects"
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "Jay", email = "jayapal.myaka99@gmail.com"}
|
|
@@ -9,24 +9,24 @@ readme = "src/dataenginex/README.md"
|
|
|
9
9
|
requires-python = ">=3.13"
|
|
10
10
|
license = {file = "LICENSE"}
|
|
11
11
|
dependencies = [
|
|
12
|
-
"pydantic>=2.
|
|
13
|
-
"python-dotenv>=1.2.
|
|
12
|
+
"pydantic>=2.13.2",
|
|
13
|
+
"python-dotenv>=1.2.2",
|
|
14
14
|
"pyyaml>=6.0.3",
|
|
15
|
-
"click>=8.3.
|
|
16
|
-
"rich>=
|
|
17
|
-
"duckdb>=1.5.
|
|
18
|
-
"croniter>=6.
|
|
19
|
-
"httpx>=0.28.
|
|
20
|
-
"prometheus-client>=0.
|
|
15
|
+
"click>=8.3.2",
|
|
16
|
+
"rich>=15.0.0",
|
|
17
|
+
"duckdb>=1.5.2",
|
|
18
|
+
"croniter>=6.2.2",
|
|
19
|
+
"httpx>=0.28.1",
|
|
20
|
+
"prometheus-client>=0.25.0",
|
|
21
21
|
# API framework
|
|
22
|
-
"fastapi>=0.
|
|
23
|
-
"uvicorn>=0.
|
|
22
|
+
"fastapi>=0.136.0",
|
|
23
|
+
"uvicorn>=0.44.0",
|
|
24
24
|
"structlog>=25.5.0",
|
|
25
|
-
|
|
26
|
-
"opentelemetry-api>=1.
|
|
27
|
-
"opentelemetry-sdk>=1.
|
|
28
|
-
"opentelemetry-instrumentation-fastapi>=0.
|
|
29
|
-
"opentelemetry-exporter-otlp>=1.
|
|
25
|
+
# OpenTelemetry
|
|
26
|
+
"opentelemetry-api>=1.41.0",
|
|
27
|
+
"opentelemetry-sdk>=1.41.0",
|
|
28
|
+
"opentelemetry-instrumentation-fastapi>=0.62b0",
|
|
29
|
+
"opentelemetry-exporter-otlp>=1.41.0",
|
|
30
30
|
"email-validator>=2.3.0",
|
|
31
31
|
# Data formats
|
|
32
32
|
"pyarrow>=23.0.1",
|
|
@@ -41,22 +41,36 @@ cloud = [
|
|
|
41
41
|
"google-cloud-storage>=3.0.0",
|
|
42
42
|
"google-cloud-bigquery>=3.40.0",
|
|
43
43
|
]
|
|
44
|
+
auth = [
|
|
45
|
+
"pyjwt[crypto]>=2.9.0",
|
|
46
|
+
"ldap3>=2.9.1",
|
|
47
|
+
]
|
|
48
|
+
# NOTE: litellm is NOT declared here because it pins python-dotenv==1.0.1, which
|
|
49
|
+
# conflicts with our python-dotenv>=1.2.1. Install separately for LiteLLMProvider:
|
|
50
|
+
# pip install 'litellm>=1.83.3' --no-deps
|
|
51
|
+
# or manage via a dedicated venv. The provider is lazy-imported; DEX runs fine
|
|
52
|
+
# without it.
|
|
53
|
+
observability = [
|
|
54
|
+
"langfuse>=4.3.1",
|
|
55
|
+
]
|
|
44
56
|
|
|
45
57
|
[dependency-groups]
|
|
46
58
|
dev = [
|
|
47
|
-
"pytest>=9.0.
|
|
48
|
-
"pytest-cov>=7.
|
|
59
|
+
"pytest>=9.0.3",
|
|
60
|
+
"pytest-cov>=7.1.0",
|
|
49
61
|
"pytest-asyncio>=1.3.0",
|
|
50
|
-
"ruff>=0.15.
|
|
51
|
-
"mypy>=1.
|
|
62
|
+
"ruff>=0.15.11",
|
|
63
|
+
"mypy>=1.20.1",
|
|
52
64
|
"types-pyyaml>=6.0.12.20250915",
|
|
53
65
|
"types-requests>=2.32.4.20260107",
|
|
54
|
-
"poethepoet>=0.
|
|
55
|
-
"uv>=0.
|
|
56
|
-
"mkdocs-material>=9.7.
|
|
57
|
-
"mkdocstrings[python]>=0.
|
|
66
|
+
"poethepoet>=0.44.0",
|
|
67
|
+
"uv>=0.11.6",
|
|
68
|
+
"mkdocs-material>=9.7.6",
|
|
69
|
+
"mkdocstrings[python]>=1.0.4",
|
|
58
70
|
"pre-commit>=4.5.1",
|
|
59
71
|
"pip-audit>=2.10.0",
|
|
72
|
+
"pyjwt[crypto]>=2.12.1",
|
|
73
|
+
"cryptography>=46.0.7",
|
|
60
74
|
]
|
|
61
75
|
data = [
|
|
62
76
|
"pyspark>=4.1.1",
|
|
@@ -141,6 +155,10 @@ ignore_missing_imports = true
|
|
|
141
155
|
module = ["sentence_transformers.*"]
|
|
142
156
|
ignore_missing_imports = true
|
|
143
157
|
|
|
158
|
+
[[tool.mypy.overrides]]
|
|
159
|
+
module = ["litellm.*", "langfuse.*", "ldap3.*"]
|
|
160
|
+
ignore_missing_imports = true
|
|
161
|
+
|
|
144
162
|
[[tool.mypy.overrides]]
|
|
145
163
|
module = ["pyspark.*"]
|
|
146
164
|
ignore_missing_imports = true
|
|
@@ -169,6 +187,8 @@ filterwarnings = [
|
|
|
169
187
|
"error", # Treat all warnings as errors by default
|
|
170
188
|
"ignore::DeprecationWarning:websockets\\.legacy", # uvicorn uses deprecated websockets.legacy API
|
|
171
189
|
"ignore::DeprecationWarning:uvicorn\\.protocols", # uvicorn imports deprecated WebSocketServerProtocol
|
|
190
|
+
"ignore:unclosed.*socket:ResourceWarning", # Starlette TestClient GC timing
|
|
191
|
+
"ignore:unclosed event loop:ResourceWarning", # Starlette TestClient GC timing
|
|
172
192
|
]
|
|
173
193
|
|
|
174
194
|
[tool.coverage.run]
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""AI layer — agents, retrieval, tools, routing, runtime, memory, observability, workflows.
|
|
2
|
+
|
|
3
|
+
Public API::
|
|
4
|
+
|
|
5
|
+
from dataenginex.ai import (
|
|
6
|
+
retriever_registry, agent_registry, tool_registry,
|
|
7
|
+
BuiltinRetriever, BuiltinAgentRuntime,
|
|
8
|
+
ModelRouter, BaseProvider,
|
|
9
|
+
Sandbox, SandboxConfig,
|
|
10
|
+
AuditLog, CostTracker,
|
|
11
|
+
AgentDAG, Condition,
|
|
12
|
+
ShortTermMemory, LongTermMemory, EpisodicMemory,
|
|
13
|
+
CheckpointManager, AgentExecutor, AgentConfig,
|
|
14
|
+
AgentMetrics,
|
|
15
|
+
)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from dataenginex.ai.agents import agent_registry
|
|
21
|
+
from dataenginex.ai.agents.builtin import BuiltinAgentRuntime
|
|
22
|
+
from dataenginex.ai.memory.base import BaseMemory, MemoryEntry, ShortTermMemory
|
|
23
|
+
from dataenginex.ai.memory.episodic import Episode, EpisodicMemory
|
|
24
|
+
from dataenginex.ai.memory.long_term import LongTermMemory
|
|
25
|
+
from dataenginex.ai.observability.audit import AuditEntry, AuditLog
|
|
26
|
+
from dataenginex.ai.observability.cost import CostTracker, TokenUsage
|
|
27
|
+
from dataenginex.ai.observability.metrics import AgentMetrics
|
|
28
|
+
from dataenginex.ai.retrieval import retriever_registry
|
|
29
|
+
from dataenginex.ai.retrieval.builtin import BuiltinRetriever
|
|
30
|
+
from dataenginex.ai.routing.router import BaseProvider, ModelRouter
|
|
31
|
+
from dataenginex.ai.runtime.checkpoint import Checkpoint, CheckpointManager
|
|
32
|
+
from dataenginex.ai.runtime.executor import AgentConfig, AgentExecutor, AgentResponse
|
|
33
|
+
from dataenginex.ai.runtime.sandbox import Sandbox, SandboxConfig, SandboxResult
|
|
34
|
+
from dataenginex.ai.tools import ToolRegistry, ToolSpec, tool_registry
|
|
35
|
+
from dataenginex.ai.workflows.conditions import Condition
|
|
36
|
+
from dataenginex.ai.workflows.dag import AgentDAG
|
|
37
|
+
from dataenginex.ai.workflows.human_loop import ApprovalGate
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
# Registries
|
|
41
|
+
"agent_registry",
|
|
42
|
+
"retriever_registry",
|
|
43
|
+
"tool_registry",
|
|
44
|
+
# Agents
|
|
45
|
+
"BuiltinAgentRuntime",
|
|
46
|
+
"BuiltinRetriever",
|
|
47
|
+
# Tools
|
|
48
|
+
"ToolRegistry",
|
|
49
|
+
"ToolSpec",
|
|
50
|
+
# Memory
|
|
51
|
+
"BaseMemory",
|
|
52
|
+
"MemoryEntry",
|
|
53
|
+
"ShortTermMemory",
|
|
54
|
+
"LongTermMemory",
|
|
55
|
+
"EpisodicMemory",
|
|
56
|
+
"Episode",
|
|
57
|
+
# Observability
|
|
58
|
+
"AuditEntry",
|
|
59
|
+
"AuditLog",
|
|
60
|
+
"CostTracker",
|
|
61
|
+
"TokenUsage",
|
|
62
|
+
"AgentMetrics",
|
|
63
|
+
# Routing
|
|
64
|
+
"BaseProvider",
|
|
65
|
+
"ModelRouter",
|
|
66
|
+
# Runtime
|
|
67
|
+
"AgentConfig",
|
|
68
|
+
"AgentExecutor",
|
|
69
|
+
"AgentResponse",
|
|
70
|
+
"Checkpoint",
|
|
71
|
+
"CheckpointManager",
|
|
72
|
+
"Sandbox",
|
|
73
|
+
"SandboxConfig",
|
|
74
|
+
"SandboxResult",
|
|
75
|
+
# Workflows
|
|
76
|
+
"AgentDAG",
|
|
77
|
+
"ApprovalGate",
|
|
78
|
+
"Condition",
|
|
79
|
+
]
|
|
@@ -17,6 +17,10 @@ from dataenginex.ai.agents import agent_registry
|
|
|
17
17
|
from dataenginex.ai.tools import ToolRegistry, tool_registry
|
|
18
18
|
from dataenginex.ai.tools.builtin import register_builtin_tools
|
|
19
19
|
from dataenginex.core.interfaces import BaseAgentRuntime
|
|
20
|
+
from dataenginex.middleware.domain_metrics import (
|
|
21
|
+
ai_agent_iterations,
|
|
22
|
+
ai_tool_calls_total,
|
|
23
|
+
)
|
|
20
24
|
|
|
21
25
|
logger = structlog.get_logger()
|
|
22
26
|
|
|
@@ -38,12 +42,14 @@ class BuiltinAgentRuntime(BaseAgentRuntime):
|
|
|
38
42
|
system_prompt: str = "You are a helpful data engineering assistant.",
|
|
39
43
|
tools: ToolRegistry | None = None,
|
|
40
44
|
max_iterations: int = 10,
|
|
45
|
+
name: str = "builtin",
|
|
41
46
|
**kwargs: Any,
|
|
42
47
|
) -> None:
|
|
43
48
|
self._llm = llm
|
|
44
49
|
self._system_prompt = system_prompt
|
|
45
50
|
self._tools = tools or tool_registry
|
|
46
51
|
self._max_iterations = max_iterations
|
|
52
|
+
self._name = name
|
|
47
53
|
self._history: list[dict[str, str]] = []
|
|
48
54
|
register_builtin_tools()
|
|
49
55
|
|
|
@@ -64,6 +70,7 @@ class BuiltinAgentRuntime(BaseAgentRuntime):
|
|
|
64
70
|
if step_result.get("done", False):
|
|
65
71
|
response = str(step_result.get("response", ""))
|
|
66
72
|
self._history.append({"role": "assistant", "content": response})
|
|
73
|
+
ai_agent_iterations.labels(agent=self._name).observe(iterations)
|
|
67
74
|
return {"response": response, "iterations": iterations, "tool_calls": tool_calls}
|
|
68
75
|
|
|
69
76
|
# If tool was called, continue the loop
|
|
@@ -73,6 +80,7 @@ class BuiltinAgentRuntime(BaseAgentRuntime):
|
|
|
73
80
|
# Hit max iterations
|
|
74
81
|
final = "I've reached my reasoning limit. Here's what I have so far."
|
|
75
82
|
self._history.append({"role": "assistant", "content": final})
|
|
83
|
+
ai_agent_iterations.labels(agent=self._name).observe(self._max_iterations)
|
|
76
84
|
return {"response": final, "iterations": self._max_iterations, "tool_calls": tool_calls}
|
|
77
85
|
|
|
78
86
|
async def step(self, message: str, **kwargs: Any) -> dict[str, Any]:
|
|
@@ -134,8 +142,10 @@ class BuiltinAgentRuntime(BaseAgentRuntime):
|
|
|
134
142
|
try:
|
|
135
143
|
result = self._tools.call(tool_name, **args)
|
|
136
144
|
observation = f"Tool '{tool_name}' returned: {result}"
|
|
145
|
+
ai_tool_calls_total.labels(tool=tool_name, status="ok").inc()
|
|
137
146
|
except Exception as e:
|
|
138
147
|
observation = f"Tool '{tool_name}' failed: {e}"
|
|
148
|
+
ai_tool_calls_total.labels(tool=tool_name, status="error").inc()
|
|
139
149
|
|
|
140
150
|
self._history.append(
|
|
141
151
|
{"role": "assistant", "content": f"[tool: {tool_name}] {observation}"},
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Agent memory — short-term, long-term, and episodic memory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataenginex.ai.memory.base import BaseMemory, MemoryEntry, ShortTermMemory
|
|
6
|
+
from dataenginex.ai.memory.episodic import Episode, EpisodicMemory
|
|
7
|
+
from dataenginex.ai.memory.long_term import LongTermMemory
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"BaseMemory",
|
|
11
|
+
"Episode",
|
|
12
|
+
"EpisodicMemory",
|
|
13
|
+
"LongTermMemory",
|
|
14
|
+
"MemoryEntry",
|
|
15
|
+
"ShortTermMemory",
|
|
16
|
+
]
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Agent memory system — short-term, long-term, and episodic memory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class MemoryEntry:
|
|
12
|
+
"""A single memory entry."""
|
|
13
|
+
|
|
14
|
+
content: str
|
|
15
|
+
role: str # "user", "assistant", "system", "tool"
|
|
16
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
17
|
+
timestamp: float = 0.0
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseMemory(ABC):
|
|
21
|
+
"""Abstract base class for agent memory."""
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def add(self, entry: MemoryEntry) -> None:
|
|
25
|
+
"""Add a memory entry."""
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def search(self, query: str, top_k: int = 5) -> list[MemoryEntry]:
|
|
29
|
+
"""Search memory by semantic similarity."""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def recent(self, n: int = 10) -> list[MemoryEntry]:
|
|
33
|
+
"""Get the most recent entries."""
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def clear(self) -> None:
|
|
37
|
+
"""Clear all memory."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ShortTermMemory(BaseMemory):
|
|
41
|
+
"""Session/conversation memory — lives in-process, lost on restart."""
|
|
42
|
+
|
|
43
|
+
def __init__(self, max_entries: int = 100) -> None:
|
|
44
|
+
self._entries: list[MemoryEntry] = []
|
|
45
|
+
self._max = max_entries
|
|
46
|
+
|
|
47
|
+
def add(self, entry: MemoryEntry) -> None:
|
|
48
|
+
self._entries.append(entry)
|
|
49
|
+
if len(self._entries) > self._max:
|
|
50
|
+
self._entries.pop(0)
|
|
51
|
+
|
|
52
|
+
def search(self, query: str, top_k: int = 5) -> list[MemoryEntry]:
|
|
53
|
+
return [e for e in self._entries if query.lower() in e.content.lower()][:top_k]
|
|
54
|
+
|
|
55
|
+
def recent(self, n: int = 10) -> list[MemoryEntry]:
|
|
56
|
+
return self._entries[-n:]
|
|
57
|
+
|
|
58
|
+
def clear(self) -> None:
|
|
59
|
+
self._entries.clear()
|