dataenginex 0.10.0__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/ci.yml +23 -1
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/docs-notify.yml +1 -1
- {dataenginex-0.10.0 → dataenginex-1.0.0}/CHANGELOG.md +39 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/CLAUDE.md +1 -1
- dataenginex-1.0.0/CODE_OF_CONDUCT.md +57 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/PKG-INFO +1 -2
- {dataenginex-0.10.0 → dataenginex-1.0.0}/README.md +1 -1
- dataenginex-1.0.0/SECURITY.md +71 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/ci-cd.md +1 -1
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/index.md +1 -1
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/observability.md +1 -1
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/sdlc.md +1 -1
- {dataenginex-0.10.0 → dataenginex-1.0.0}/pyproject.toml +2 -2
- dataenginex-1.0.0/src/dataenginex/ai/__init__.py +79 -0
- dataenginex-1.0.0/src/dataenginex/ai/memory/__init__.py +16 -0
- dataenginex-1.0.0/src/dataenginex/ai/memory/base.py +59 -0
- dataenginex-1.0.0/src/dataenginex/ai/memory/episodic.py +40 -0
- dataenginex-1.0.0/src/dataenginex/ai/memory/long_term.py +52 -0
- dataenginex-1.0.0/src/dataenginex/ai/observability/__init__.py +9 -0
- dataenginex-1.0.0/src/dataenginex/ai/observability/audit.py +34 -0
- dataenginex-1.0.0/src/dataenginex/ai/observability/cost.py +57 -0
- dataenginex-1.0.0/src/dataenginex/ai/observability/metrics.py +26 -0
- dataenginex-1.0.0/src/dataenginex/ai/routing/__init__.py +7 -0
- dataenginex-1.0.0/src/dataenginex/ai/routing/anthropic.py +58 -0
- dataenginex-1.0.0/src/dataenginex/ai/routing/huggingface.py +36 -0
- dataenginex-1.0.0/src/dataenginex/ai/routing/ollama.py +48 -0
- dataenginex-1.0.0/src/dataenginex/ai/routing/openai.py +58 -0
- dataenginex-1.0.0/src/dataenginex/ai/routing/router.py +56 -0
- dataenginex-1.0.0/src/dataenginex/ai/runtime/__init__.py +27 -0
- dataenginex-1.0.0/src/dataenginex/ai/runtime/checkpoint.py +31 -0
- dataenginex-1.0.0/src/dataenginex/ai/runtime/executor.py +173 -0
- dataenginex-1.0.0/src/dataenginex/ai/runtime/sandbox.py +220 -0
- dataenginex-1.0.0/src/dataenginex/ai/workflows/__init__.py +9 -0
- dataenginex-1.0.0/src/dataenginex/ai/workflows/conditions.py +48 -0
- dataenginex-1.0.0/src/dataenginex/ai/workflows/dag.py +124 -0
- dataenginex-1.0.0/src/dataenginex/ai/workflows/human_loop.py +47 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/pipeline/run_history.py +1 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/quality/gates.py +1 -2
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/quality/spark.py +1 -2
- dataenginex-1.0.0/tests/unit/test_ai_modules.py +755 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_llm.py +24 -4
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_quality_gates.py +4 -12
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_run_history.py +1 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/uv.lock +1 -12
- dataenginex-0.10.0/src/dataenginex/ai/__init__.py +0 -13
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.claude/commands/new-feature.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.claude/commands/validate.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.claude/settings.json +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.env.template +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/dependabot.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/labels.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/release-pr-template.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/auto-pr-dev-to-main.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/auto-pr-to-dev.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/claude.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/docker-build-push.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/enforce-dev-to-main.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/label-sync.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/pypi-publish.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/release-dex.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/release-please.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/security.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.gitignore +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.gitleaks.toml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.pre-commit-config.yaml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.python-version +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/.release-please-manifest.json +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/CODEOWNERS +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/CONTRIBUTING.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/Dockerfile +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/LICENSE +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docker-compose.test.yml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/adr/0000-template.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/adr/0001-medallion-architecture.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/api.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/core.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/dashboard.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/data.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/index.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/lakehouse.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/middleware.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/ml.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/plugins.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/warehouse.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/architecture.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/contributing.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/development.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/quickstart.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/release-notes.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/roadmap/project-roadmap.csv +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/roadmap/project-roadmap.json +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/security-scanning.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-21-phase-0-foundation.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-22-dataenginex-1.0-master-plan.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-22-phase-1-data-layer.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-22-phase-6a-dex-engine-integration.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-22-phase-6b-dex-studio-redesign.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-23-careerdex-example.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-23-dex-studio-direct-import.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-24-dex-naming-architecture-dry.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-28-unified-docs-and-tooling.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/specs/2026-03-21-dataenginex-v2-system-redesign.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/specs/2026-03-22-phase-6-integration-design.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/specs/2026-03-23-dex-studio-direct-import-design.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/specs/2026-03-24-dex-naming-architecture-dry-design.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/specs/2026-03-28-unified-docs-and-tooling-design.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/01_hello_pipeline.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/02_api_quickstart.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/03_quality_gate.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/04_ml_training.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/05_rag_demo.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/06_llm_quickstart.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/07_api_ingestion.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/08_spark_ml.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/09_feature_engineering.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/10_model_analysis.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/GUIDE.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/dashboard/dashboard_config.yaml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/dashboard/run_dashboard.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/dex.yaml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/movies.csv +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/poe_tasks.toml +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/release-please-config.json +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/scripts/GUIDE.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/scripts/localstack/create-buckets.sh +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/scripts/promote.sh +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/sonar-project.properties +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/README.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/RELEASE_NOTES.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/agents/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/agents/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/retrieval/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/retrieval/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/tools/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/tools/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/auth.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/errors.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/factory.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/health.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/pagination.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/rate_limit.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/ai.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/data.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/health.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/ml.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/pipelines.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/root.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/system.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/schemas.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/cli/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/cli/main.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/cli/run.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/cli/serve.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/cli/train.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/config/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/config/defaults.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/config/loader.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/config/schema.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/exceptions.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/interfaces.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/medallion_architecture.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/quality.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/schemas.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/validators.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/dashboard/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/dashboard/app.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/dashboard/panels.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/connectors/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/connectors/csv.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/connectors/duckdb.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/connectors/legacy.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/pipeline/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/pipeline/dag.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/pipeline/runner.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/profiler.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/quality/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/transforms/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/transforms/sql.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/lakehouse/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/lakehouse/catalog.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/lakehouse/partitioning.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/lakehouse/storage.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/logging_config.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/metrics.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/metrics_middleware.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/request_logging.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/tracing.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/drift.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/features/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/features/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/llm.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/metrics.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/mlflow_registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/scheduler.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/serving.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/serving_engine/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/serving_engine/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/tracking/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/tracking/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/training.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/vectorstore.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/orchestration/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/orchestration/builtin.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/plugins/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/plugins/registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/py.typed +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/secops/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/secops/audit.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/secops/gate.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/secops/masking.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/secops/pii.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/warehouse/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/warehouse/lineage.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/warehouse/transforms.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tasks/findings.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tasks/lessons.md +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conformance/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conformance/test_connector.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conformance/test_feature_store.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conformance/test_tracker.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conformance/test_transform.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conftest.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/fixtures/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/fixtures/sample_data.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/fixtures/sample_jobs.csv +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/fixtures/sample_jobs.json +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/test_cli_run.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/test_config_cli.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/test_full_app.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/test_pipeline_e2e.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/test_storage_real.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/load/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/__init__.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_agent_runtime.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_ai_router.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_auth.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_factory.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_pagination.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_rate_limit.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_schemas.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_validators.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_builtin_agent.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_builtin_feature_store.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_builtin_serving.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_builtin_tracker.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_cli_train.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_config_loader.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_config_schema.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_core_exceptions.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_core_interfaces.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_core_registry.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_csv_connector.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_data.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_data_router.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_drift_scheduler.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_duckdb_connector.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_errors.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_health.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_lakehouse.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_logging.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_medallion.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_metrics.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_middleware.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_ml.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_ml_router.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_pipeline_dag.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_pipeline_runner.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_plugins.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_quality_spark.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_retriever.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_scheduler.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_secops.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_spark_fixtures.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_sql_transforms.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_storage_abstraction.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_system_router.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_tracing.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_vectorstore.py +0 -0
- {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_warehouse.py +0 -0
|
@@ -7,6 +7,10 @@ on:
|
|
|
7
7
|
branches: [main, dev]
|
|
8
8
|
workflow_dispatch:
|
|
9
9
|
|
|
10
|
+
schedule:
|
|
11
|
+
# Weekly Python version compatibility check
|
|
12
|
+
- cron: '0 0 * * 0'
|
|
13
|
+
|
|
10
14
|
permissions:
|
|
11
15
|
contents: read
|
|
12
16
|
|
|
@@ -26,7 +30,7 @@ jobs:
|
|
|
26
30
|
- run: uv run poe quality
|
|
27
31
|
|
|
28
32
|
test:
|
|
29
|
-
name: Tests
|
|
33
|
+
name: Tests (Python 3.13)
|
|
30
34
|
runs-on: ubuntu-latest
|
|
31
35
|
needs: quality
|
|
32
36
|
steps:
|
|
@@ -45,3 +49,21 @@ jobs:
|
|
|
45
49
|
fail_ci_if_error: false
|
|
46
50
|
env:
|
|
47
51
|
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
|
|
52
|
+
|
|
53
|
+
test-compat:
|
|
54
|
+
name: Python Compatibility
|
|
55
|
+
runs-on: ubuntu-latest
|
|
56
|
+
if: github.event_name == 'schedule'
|
|
57
|
+
strategy:
|
|
58
|
+
matrix:
|
|
59
|
+
python-version: ["3.11", "3.12"]
|
|
60
|
+
steps:
|
|
61
|
+
- uses: actions/checkout@v6
|
|
62
|
+
- uses: astral-sh/setup-uv@v7
|
|
63
|
+
with:
|
|
64
|
+
version: "latest"
|
|
65
|
+
python-version: ${{ matrix.python-version }}
|
|
66
|
+
- run: uv sync --group ml
|
|
67
|
+
env:
|
|
68
|
+
UV_PROJECT_ENVIRONMENT: .venv
|
|
69
|
+
- run: uv run poe check-all
|
|
@@ -28,6 +28,45 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
28
28
|
|
|
29
29
|
## [Unreleased]
|
|
30
30
|
|
|
31
|
+
## [1.0.0] - 2026-04-07
|
|
32
|
+
|
|
33
|
+
### Highlights
|
|
34
|
+
|
|
35
|
+
- **Complete Data + ML + AI Framework**: All phases from the v1.0 master plan implemented — config-driven pipeline via `dex.yaml`, BackendRegistry pattern for swappable backends, unified CLI.
|
|
36
|
+
- **Data Layer**: DuckDB connector (default), CSV connector, PipelineRunner with DAG resolution, transforms (filter, derive, cast, deduplicate), quality gates (completeness, uniqueness), column-level lineage tracking, built-in cron scheduler.
|
|
37
|
+
- **ML Layer**: SQLite-backed experiment tracker, DuckDB feature store, sklearn/xgboost training integration, model registry with versioning (dev → staging → production), built-in model serving via FastAPI, PSI drift detection.
|
|
38
|
+
- **AI Layer**: Built-in ReAct agent runtime, Ollama LLM provider (default), tool registry (sql_query, predict, search), BM25 sparse retrieval (DuckDB FTS), dense vector retrieval (DuckDB VSS HNSW), hybrid retrieval with RRF fusion, agent memory (short-term + episodic).
|
|
39
|
+
- **CLI Commands**: `dex init`, `dex validate`, `dex version`, `dex serve`, `dex run`, `dex train`, `dex agent`, `dex query`.
|
|
40
|
+
- **API**: FastAPI app factory, JWT auth, rate limiting, health endpoints, project CRUD, pipeline run/status, data explorer, ML experiments/models, agent chat/manage, WebSocket for live logs and streaming.
|
|
41
|
+
- **Backend Registry Pattern**: Every subsystem follows ABC + BackendRegistry[T] pattern with built-in implementations and optional extras (Dagster, MLflow, Qdrant, LanceDB, sentence-transformers, PySpark).
|
|
42
|
+
|
|
43
|
+
### Breaking Changes
|
|
44
|
+
|
|
45
|
+
- **FastAPI now optional**: Core install (`pip install dataenginex`) includes only lightweight deps. Install `[api]` extra for FastAPI/uvicorn: `pip install dataenginex[api]`
|
|
46
|
+
- **Cloud SDKs now optional**: Core install no longer requires boto3/google-cloud-storage/google-cloud-bigquery. Install `[cloud]` extra: `pip install dataenginex[cloud]`
|
|
47
|
+
- **Routers moved**: API routers moved to application packages. Use `from dataenginex.api import ...` directly (requires `[api]` extra)
|
|
48
|
+
- **Root `__init__.py` slimmed**: Re-exports removed. Import from submodules directly: `from dataenginex.api import HealthChecker` etc.
|
|
49
|
+
|
|
50
|
+
### Added
|
|
51
|
+
|
|
52
|
+
- **Full project templates**: `dex init --template [minimal|data-pipeline|ml-project|ai-agent|full-stack|career-intelligence]`
|
|
53
|
+
- **Docker support**: Multi-stage Dockerfile (`ghcr.io/thedataenginex/dex`), docker-compose.yml for full stack
|
|
54
|
+
- **SecOps**: PII scanning in pipelines, masking, audit trail
|
|
55
|
+
- **Quality schema**: Spark audit integration for data quality validation
|
|
56
|
+
- **Examples**: 5 runnable examples in `examples/` directory
|
|
57
|
+
|
|
58
|
+
### Verification checklist
|
|
59
|
+
|
|
60
|
+
1. `uv run poe lint` — Ruff checks clean
|
|
61
|
+
2. `uv run poe typecheck` — mypy strict (all modules)
|
|
62
|
+
3. `uv run poe test` — 663 passed, 36 skipped
|
|
63
|
+
4. `pip install dataenginex` — installs successfully
|
|
64
|
+
5. `dex validate dex.yaml` — validates config
|
|
65
|
+
6. `dex version` — shows version
|
|
66
|
+
|
|
67
|
+
[Unreleased]: https://github.com/TheDataEngineX/DEX/compare/v1.0.0...HEAD
|
|
68
|
+
[1.0.0]: https://github.com/TheDataEngineX/DEX/releases/tag/v1.0.0
|
|
69
|
+
|
|
31
70
|
## [0.7.1] - 2026-03-17
|
|
32
71
|
|
|
33
72
|
### Fixed
|
|
@@ -11,7 +11,7 @@ Goal is to save Claude code tokens for lower cost without loosing quality.
|
|
|
11
11
|
|
|
12
12
|
| Package | Location | Purpose |
|
|
13
13
|
|---------|----------|---------|
|
|
14
|
-
| `dataenginex` | `src/dataenginex/` | Core framework — config
|
|
14
|
+
| `dataenginex` | `src/dataenginex/` | Core framework — config, registry, CLI, API, ML, AI (routing, runtime, memory, observability, workflows) |
|
|
15
15
|
|
|
16
16
|
**Stack:** Python 3.13+ · FastAPI · DuckDB · structlog · Pydantic · Click · Rich · uv · Ruff · mypy strict · pytest
|
|
17
17
|
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
|
2
|
+
|
|
3
|
+
## Our Pledge
|
|
4
|
+
|
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our
|
|
6
|
+
community a harassment-free experience for everyone, regardless of age, body
|
|
7
|
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
|
8
|
+
identity and expression, level of experience, education, socio-economic status,
|
|
9
|
+
nationality, personal appearance, race, religion, or sexual identity and
|
|
10
|
+
orientation.
|
|
11
|
+
|
|
12
|
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
|
13
|
+
diverse, inclusive, and healthy community.
|
|
14
|
+
|
|
15
|
+
## Our Standards
|
|
16
|
+
|
|
17
|
+
Examples of behavior that contributes to a positive environment:
|
|
18
|
+
|
|
19
|
+
- Using welcoming and inclusive language
|
|
20
|
+
- Being respectful of differing viewpoints and experiences
|
|
21
|
+
- Gracefully accepting constructive criticism
|
|
22
|
+
- Focusing on what is best for the community
|
|
23
|
+
- Showing empathy towards other community members
|
|
24
|
+
|
|
25
|
+
Examples of unacceptable behavior:
|
|
26
|
+
|
|
27
|
+
- The use of sexualized language or imagery, and sexual attention or advances
|
|
28
|
+
- Trolling, insulting or derogatory comments, and personal or political attacks
|
|
29
|
+
- Public or private harassment
|
|
30
|
+
- Publishing others' private information without explicit permission
|
|
31
|
+
- Other conduct which could reasonably be considered inappropriate
|
|
32
|
+
|
|
33
|
+
## Enforcement Responsibilities
|
|
34
|
+
|
|
35
|
+
Community leaders are responsible for clarifying and enforcing our standards of
|
|
36
|
+
acceptable behavior and will take appropriate and fair corrective action in
|
|
37
|
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
|
38
|
+
or harmful.
|
|
39
|
+
|
|
40
|
+
## Scope
|
|
41
|
+
|
|
42
|
+
This Code of Conduct applies within all community spaces, and also applies when
|
|
43
|
+
an individual is officially representing the community in public spaces.
|
|
44
|
+
|
|
45
|
+
## Enforcement
|
|
46
|
+
|
|
47
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
|
48
|
+
reported to the community leaders responsible for enforcement at
|
|
49
|
+
**conduct@thedataenginex.dev**.
|
|
50
|
+
|
|
51
|
+
All complaints will be reviewed and investigated promptly and fairly.
|
|
52
|
+
|
|
53
|
+
## Attribution
|
|
54
|
+
|
|
55
|
+
This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org),
|
|
56
|
+
version 2.1, available at
|
|
57
|
+
<https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataenginex
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: DataEngineX - Core framework for AI/ML/Data engineering projects
|
|
5
5
|
Author-email: Jay <jayapal.myaka99@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -40,7 +40,6 @@ Requires-Dist: prometheus-client>=0.24.1
|
|
|
40
40
|
Requires-Dist: pyarrow>=23.0.1
|
|
41
41
|
Requires-Dist: pydantic>=2.10.0
|
|
42
42
|
Requires-Dist: python-dotenv>=1.2.1
|
|
43
|
-
Requires-Dist: python-json-logger>=4.0.0
|
|
44
43
|
Requires-Dist: pyyaml>=6.0.3
|
|
45
44
|
Requires-Dist: rich>=14.3.3
|
|
46
45
|
Requires-Dist: structlog>=25.5.0
|
|
@@ -112,7 +112,7 @@ ______________________________________________________________________
|
|
|
112
112
|
|
|
113
113
|
## Development
|
|
114
114
|
|
|
115
|
-
See [docs/
|
|
115
|
+
See [docs/development.md](docs/development.md) for full setup.
|
|
116
116
|
|
|
117
117
|
```bash
|
|
118
118
|
uv run poe check-all # lint + typecheck + tests
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# Security Policy
|
|
2
|
+
|
|
3
|
+
## Supported Versions
|
|
4
|
+
|
|
5
|
+
| Version | Supported |
|
|
6
|
+
|---------|-----------|
|
|
7
|
+
| Latest minor release (1.0.x) | ✅ |
|
|
8
|
+
| Previous minor release | ✅ (security fixes only) |
|
|
9
|
+
| Older versions | ❌ |
|
|
10
|
+
|
|
11
|
+
## Reporting a Vulnerability
|
|
12
|
+
|
|
13
|
+
**Do NOT open a public issue for security vulnerabilities.**
|
|
14
|
+
|
|
15
|
+
Instead, please report them via one of these channels:
|
|
16
|
+
|
|
17
|
+
1. **Email**: security@thedataenginex.dev
|
|
18
|
+
2. **GitHub Security Advisories**: Use the "Report a vulnerability" button on the Security tab
|
|
19
|
+
|
|
20
|
+
### What to Include
|
|
21
|
+
|
|
22
|
+
- Description of the vulnerability
|
|
23
|
+
- Steps to reproduce
|
|
24
|
+
- Potential impact assessment
|
|
25
|
+
- Suggested fix (if any)
|
|
26
|
+
|
|
27
|
+
### Response Timeline
|
|
28
|
+
|
|
29
|
+
| Stage | Timeline |
|
|
30
|
+
|-------|----------|
|
|
31
|
+
| Acknowledgment | Within 48 hours |
|
|
32
|
+
| Initial assessment | Within 5 business days |
|
|
33
|
+
| Fix development | Within 30 days (critical), 90 days (non-critical) |
|
|
34
|
+
| Public disclosure | After fix is released |
|
|
35
|
+
|
|
36
|
+
## Disclosure Policy
|
|
37
|
+
|
|
38
|
+
We follow [coordinated disclosure](https://en.wikipedia.org/wiki/Coordinated_vulnerability_disclosure).
|
|
39
|
+
We will credit reporters in the security advisory unless they prefer to remain anonymous.
|
|
40
|
+
|
|
41
|
+
## Security Practices
|
|
42
|
+
|
|
43
|
+
DataEngineX follows these security practices:
|
|
44
|
+
|
|
45
|
+
- **No hardcoded secrets** — all credentials via environment variables
|
|
46
|
+
- **Parameterized queries** — never SQL concatenation
|
|
47
|
+
- **Input validation** — Pydantic models at API boundaries
|
|
48
|
+
- **Dependency auditing** — automated via `uv run poe security`
|
|
49
|
+
- **Pickle safety** — SafeUnpickler with HMAC verification for model loading
|
|
50
|
+
- **Container security** — non-root users, minimal base images
|
|
51
|
+
- **HTTPS only** — all production traffic encrypted
|
|
52
|
+
- **Least privilege** — minimal permissions for service accounts
|
|
53
|
+
|
|
54
|
+
## Security-Related Dependencies
|
|
55
|
+
|
|
56
|
+
| Dependency | Purpose | Security Note |
|
|
57
|
+
|------------|---------|---------------|
|
|
58
|
+
| pydantic | Config validation | Validates all inputs |
|
|
59
|
+
| python-dotenv | Env var loading | Never commit .env files |
|
|
60
|
+
| httpx | HTTP client | Timeout configured |
|
|
61
|
+
| structlog | Logging | No PII in logs by default |
|
|
62
|
+
|
|
63
|
+
## Auditing
|
|
64
|
+
|
|
65
|
+
Run security audits locally:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
uv run poe security # pip-audit for vulnerabilities
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
CI runs `pip-audit` and dependency scanning on every PR.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "dataenginex"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "1.0.0"
|
|
4
4
|
description = "DataEngineX - Core framework for AI/ML/Data engineering projects"
|
|
5
5
|
authors = [
|
|
6
6
|
{name = "Jay", email = "jayapal.myaka99@gmail.com"}
|
|
@@ -22,7 +22,7 @@ dependencies = [
|
|
|
22
22
|
"fastapi>=0.135.1",
|
|
23
23
|
"uvicorn>=0.42.0",
|
|
24
24
|
"structlog>=25.5.0",
|
|
25
|
-
|
|
25
|
+
# OpenTelemetry
|
|
26
26
|
"opentelemetry-api>=1.40.0",
|
|
27
27
|
"opentelemetry-sdk>=1.40.0",
|
|
28
28
|
"opentelemetry-instrumentation-fastapi>=0.61b0",
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""AI layer — agents, retrieval, tools, routing, runtime, memory, observability, workflows.
|
|
2
|
+
|
|
3
|
+
Public API::
|
|
4
|
+
|
|
5
|
+
from dataenginex.ai import (
|
|
6
|
+
retriever_registry, agent_registry, tool_registry,
|
|
7
|
+
BuiltinRetriever, BuiltinAgentRuntime,
|
|
8
|
+
ModelRouter, BaseProvider,
|
|
9
|
+
Sandbox, SandboxConfig,
|
|
10
|
+
AuditLog, CostTracker,
|
|
11
|
+
AgentDAG, Condition,
|
|
12
|
+
ShortTermMemory, LongTermMemory, EpisodicMemory,
|
|
13
|
+
CheckpointManager, AgentExecutor, AgentConfig,
|
|
14
|
+
AgentMetrics,
|
|
15
|
+
)
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from dataenginex.ai.agents import agent_registry
|
|
21
|
+
from dataenginex.ai.agents.builtin import BuiltinAgentRuntime
|
|
22
|
+
from dataenginex.ai.memory.base import BaseMemory, MemoryEntry, ShortTermMemory
|
|
23
|
+
from dataenginex.ai.memory.episodic import Episode, EpisodicMemory
|
|
24
|
+
from dataenginex.ai.memory.long_term import LongTermMemory
|
|
25
|
+
from dataenginex.ai.observability.audit import AuditEntry, AuditLog
|
|
26
|
+
from dataenginex.ai.observability.cost import CostTracker, TokenUsage
|
|
27
|
+
from dataenginex.ai.observability.metrics import AgentMetrics
|
|
28
|
+
from dataenginex.ai.retrieval import retriever_registry
|
|
29
|
+
from dataenginex.ai.retrieval.builtin import BuiltinRetriever
|
|
30
|
+
from dataenginex.ai.routing.router import BaseProvider, ModelRouter
|
|
31
|
+
from dataenginex.ai.runtime.checkpoint import Checkpoint, CheckpointManager
|
|
32
|
+
from dataenginex.ai.runtime.executor import AgentConfig, AgentExecutor, AgentResponse
|
|
33
|
+
from dataenginex.ai.runtime.sandbox import Sandbox, SandboxConfig, SandboxResult
|
|
34
|
+
from dataenginex.ai.tools import ToolRegistry, ToolSpec, tool_registry
|
|
35
|
+
from dataenginex.ai.workflows.conditions import Condition
|
|
36
|
+
from dataenginex.ai.workflows.dag import AgentDAG
|
|
37
|
+
from dataenginex.ai.workflows.human_loop import ApprovalGate
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
# Registries
|
|
41
|
+
"agent_registry",
|
|
42
|
+
"retriever_registry",
|
|
43
|
+
"tool_registry",
|
|
44
|
+
# Agents
|
|
45
|
+
"BuiltinAgentRuntime",
|
|
46
|
+
"BuiltinRetriever",
|
|
47
|
+
# Tools
|
|
48
|
+
"ToolRegistry",
|
|
49
|
+
"ToolSpec",
|
|
50
|
+
# Memory
|
|
51
|
+
"BaseMemory",
|
|
52
|
+
"MemoryEntry",
|
|
53
|
+
"ShortTermMemory",
|
|
54
|
+
"LongTermMemory",
|
|
55
|
+
"EpisodicMemory",
|
|
56
|
+
"Episode",
|
|
57
|
+
# Observability
|
|
58
|
+
"AuditEntry",
|
|
59
|
+
"AuditLog",
|
|
60
|
+
"CostTracker",
|
|
61
|
+
"TokenUsage",
|
|
62
|
+
"AgentMetrics",
|
|
63
|
+
# Routing
|
|
64
|
+
"BaseProvider",
|
|
65
|
+
"ModelRouter",
|
|
66
|
+
# Runtime
|
|
67
|
+
"AgentConfig",
|
|
68
|
+
"AgentExecutor",
|
|
69
|
+
"AgentResponse",
|
|
70
|
+
"Checkpoint",
|
|
71
|
+
"CheckpointManager",
|
|
72
|
+
"Sandbox",
|
|
73
|
+
"SandboxConfig",
|
|
74
|
+
"SandboxResult",
|
|
75
|
+
# Workflows
|
|
76
|
+
"AgentDAG",
|
|
77
|
+
"ApprovalGate",
|
|
78
|
+
"Condition",
|
|
79
|
+
]
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Agent memory — short-term, long-term, and episodic memory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataenginex.ai.memory.base import BaseMemory, MemoryEntry, ShortTermMemory
|
|
6
|
+
from dataenginex.ai.memory.episodic import Episode, EpisodicMemory
|
|
7
|
+
from dataenginex.ai.memory.long_term import LongTermMemory
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"BaseMemory",
|
|
11
|
+
"Episode",
|
|
12
|
+
"EpisodicMemory",
|
|
13
|
+
"LongTermMemory",
|
|
14
|
+
"MemoryEntry",
|
|
15
|
+
"ShortTermMemory",
|
|
16
|
+
]
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""Agent memory system — short-term, long-term, and episodic memory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class MemoryEntry:
|
|
12
|
+
"""A single memory entry."""
|
|
13
|
+
|
|
14
|
+
content: str
|
|
15
|
+
role: str # "user", "assistant", "system", "tool"
|
|
16
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
17
|
+
timestamp: float = 0.0
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BaseMemory(ABC):
|
|
21
|
+
"""Abstract base class for agent memory."""
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def add(self, entry: MemoryEntry) -> None:
|
|
25
|
+
"""Add a memory entry."""
|
|
26
|
+
|
|
27
|
+
@abstractmethod
|
|
28
|
+
def search(self, query: str, top_k: int = 5) -> list[MemoryEntry]:
|
|
29
|
+
"""Search memory by semantic similarity."""
|
|
30
|
+
|
|
31
|
+
@abstractmethod
|
|
32
|
+
def recent(self, n: int = 10) -> list[MemoryEntry]:
|
|
33
|
+
"""Get the most recent entries."""
|
|
34
|
+
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def clear(self) -> None:
|
|
37
|
+
"""Clear all memory."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ShortTermMemory(BaseMemory):
|
|
41
|
+
"""Session/conversation memory — lives in-process, lost on restart."""
|
|
42
|
+
|
|
43
|
+
def __init__(self, max_entries: int = 100) -> None:
|
|
44
|
+
self._entries: list[MemoryEntry] = []
|
|
45
|
+
self._max = max_entries
|
|
46
|
+
|
|
47
|
+
def add(self, entry: MemoryEntry) -> None:
|
|
48
|
+
self._entries.append(entry)
|
|
49
|
+
if len(self._entries) > self._max:
|
|
50
|
+
self._entries.pop(0)
|
|
51
|
+
|
|
52
|
+
def search(self, query: str, top_k: int = 5) -> list[MemoryEntry]:
|
|
53
|
+
return [e for e in self._entries if query.lower() in e.content.lower()][:top_k]
|
|
54
|
+
|
|
55
|
+
def recent(self, n: int = 10) -> list[MemoryEntry]:
|
|
56
|
+
return self._entries[-n:]
|
|
57
|
+
|
|
58
|
+
def clear(self) -> None:
|
|
59
|
+
self._entries.clear()
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Episodic memory — experience replay for task-based learning."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Episode(BaseModel):
|
|
11
|
+
"""A recorded agent episode — a full task execution with outcome."""
|
|
12
|
+
|
|
13
|
+
task: str
|
|
14
|
+
steps: list[dict[str, Any]]
|
|
15
|
+
outcome: str
|
|
16
|
+
reward: float
|
|
17
|
+
timestamp: float
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class EpisodicMemory:
|
|
21
|
+
"""Experience replay memory — stores and retrieves past episodes."""
|
|
22
|
+
|
|
23
|
+
def __init__(self) -> None:
|
|
24
|
+
self._episodes: list[Episode] = []
|
|
25
|
+
|
|
26
|
+
def add_episode(self, episode: Episode) -> None:
|
|
27
|
+
self._episodes.append(episode)
|
|
28
|
+
|
|
29
|
+
def recall_similar(self, task: str, top_k: int = 5) -> list[Episode]:
|
|
30
|
+
task_lower = task.lower()
|
|
31
|
+
scored: list[tuple[int, Episode]] = []
|
|
32
|
+
for ep in self._episodes:
|
|
33
|
+
score = sum(1 for word in task_lower.split() if word in ep.task.lower())
|
|
34
|
+
if score > 0:
|
|
35
|
+
scored.append((score, ep))
|
|
36
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
37
|
+
return [ep for _, ep in scored[:top_k]]
|
|
38
|
+
|
|
39
|
+
def best_episodes(self, top_k: int = 5) -> list[Episode]:
|
|
40
|
+
return sorted(self._episodes, key=lambda e: e.reward, reverse=True)[:top_k]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Long-term memory — keyword-searchable persistent memory store."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import asdict
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from dataenginex.ai.memory.base import BaseMemory, MemoryEntry
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class LongTermMemory(BaseMemory):
|
|
14
|
+
"""Persistent memory with keyword search — no external vector DB required.
|
|
15
|
+
|
|
16
|
+
Data is stored as a flat list and scored by keyword overlap.
|
|
17
|
+
Call :meth:`persist` to write to disk and :meth:`load_from_file` to restore.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
def __init__(self) -> None:
|
|
21
|
+
self._entries: list[MemoryEntry] = []
|
|
22
|
+
|
|
23
|
+
def add(self, entry: MemoryEntry) -> None:
|
|
24
|
+
if not entry.timestamp:
|
|
25
|
+
entry.timestamp = time.time()
|
|
26
|
+
self._entries.append(entry)
|
|
27
|
+
|
|
28
|
+
def search(self, query: str, top_k: int = 5) -> list[MemoryEntry]:
|
|
29
|
+
query_lower = query.lower()
|
|
30
|
+
scored: list[tuple[int, MemoryEntry]] = []
|
|
31
|
+
for entry in self._entries:
|
|
32
|
+
score = sum(1 for word in query_lower.split() if word in entry.content.lower())
|
|
33
|
+
if score > 0:
|
|
34
|
+
scored.append((score, entry))
|
|
35
|
+
scored.sort(key=lambda x: x[0], reverse=True)
|
|
36
|
+
return [e for _, e in scored[:top_k]]
|
|
37
|
+
|
|
38
|
+
def recent(self, n: int = 10) -> list[MemoryEntry]:
|
|
39
|
+
return self._entries[-n:]
|
|
40
|
+
|
|
41
|
+
def clear(self) -> None:
|
|
42
|
+
self._entries.clear()
|
|
43
|
+
|
|
44
|
+
def persist(self, path: str) -> None:
|
|
45
|
+
"""Persist all memory entries to a JSON file at *path*."""
|
|
46
|
+
data = [asdict(e) for e in self._entries]
|
|
47
|
+
Path(path).write_text(json.dumps(data, indent=2), encoding="utf-8")
|
|
48
|
+
|
|
49
|
+
def load_from_file(self, path: str) -> None:
|
|
50
|
+
"""Replace in-memory entries with those from a JSON file at *path*."""
|
|
51
|
+
raw: list[dict[str, object]] = json.loads(Path(path).read_text(encoding="utf-8"))
|
|
52
|
+
self._entries = [MemoryEntry(**item) for item in raw] # type: ignore[arg-type]
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Observability — audit logging, cost tracking, and metrics."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataenginex.ai.observability.audit import AuditEntry, AuditLog
|
|
6
|
+
from dataenginex.ai.observability.cost import CostTracker, TokenUsage
|
|
7
|
+
from dataenginex.ai.observability.metrics import AgentMetrics
|
|
8
|
+
|
|
9
|
+
__all__ = ["AgentMetrics", "AuditEntry", "AuditLog", "CostTracker", "TokenUsage"]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Audit logging — track every agent action for compliance and debugging."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class AuditEntry(BaseModel):
|
|
9
|
+
"""A single audit log entry."""
|
|
10
|
+
|
|
11
|
+
agent_name: str
|
|
12
|
+
action: str
|
|
13
|
+
input: str
|
|
14
|
+
output: str
|
|
15
|
+
timestamp: float
|
|
16
|
+
reasoning: str = ""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AuditLog:
|
|
20
|
+
"""In-memory audit log for agent actions."""
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
self._entries: list[AuditEntry] = []
|
|
24
|
+
|
|
25
|
+
def log(self, entry: AuditEntry) -> None:
|
|
26
|
+
"""Record an audit entry."""
|
|
27
|
+
self._entries.append(entry)
|
|
28
|
+
|
|
29
|
+
def get_entries(self, agent_name: str | None = None, limit: int = 100) -> list[AuditEntry]:
|
|
30
|
+
"""Get audit entries, optionally filtered by agent name."""
|
|
31
|
+
if agent_name is None:
|
|
32
|
+
return self._entries[-limit:]
|
|
33
|
+
filtered = [e for e in self._entries if e.agent_name == agent_name]
|
|
34
|
+
return filtered[-limit:]
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Token usage and cost tracking for LLM calls."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TokenUsage(BaseModel):
|
|
11
|
+
"""Token usage for a single LLM call."""
|
|
12
|
+
|
|
13
|
+
model: str
|
|
14
|
+
tokens_in: int
|
|
15
|
+
tokens_out: int
|
|
16
|
+
cost_usd: float
|
|
17
|
+
agent_name: str = ""
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CostTracker:
|
|
21
|
+
"""Tracks cumulative token usage and costs across agents."""
|
|
22
|
+
|
|
23
|
+
def __init__(self) -> None:
|
|
24
|
+
self._records: list[TokenUsage] = []
|
|
25
|
+
|
|
26
|
+
def record(self, usage: TokenUsage) -> None:
|
|
27
|
+
"""Record a token usage entry."""
|
|
28
|
+
self._records.append(usage)
|
|
29
|
+
|
|
30
|
+
def total_cost(self, agent_name: str | None = None) -> float:
|
|
31
|
+
"""Get total cost in USD, optionally filtered by agent."""
|
|
32
|
+
if agent_name is None:
|
|
33
|
+
return sum(r.cost_usd for r in self._records)
|
|
34
|
+
return sum(r.cost_usd for r in self._records if r.agent_name == agent_name)
|
|
35
|
+
|
|
36
|
+
def summary(self) -> dict[str, Any]:
|
|
37
|
+
"""Get a summary of all token usage and costs."""
|
|
38
|
+
total_in = sum(r.tokens_in for r in self._records)
|
|
39
|
+
total_out = sum(r.tokens_out for r in self._records)
|
|
40
|
+
return {
|
|
41
|
+
"total_records": len(self._records),
|
|
42
|
+
"total_tokens_in": total_in,
|
|
43
|
+
"total_tokens_out": total_out,
|
|
44
|
+
"total_cost_usd": self.total_cost(),
|
|
45
|
+
"by_model": self._by_model(),
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
def _by_model(self) -> dict[str, dict[str, Any]]:
|
|
49
|
+
"""Break down costs by model."""
|
|
50
|
+
result: dict[str, dict[str, Any]] = {}
|
|
51
|
+
for r in self._records:
|
|
52
|
+
if r.model not in result:
|
|
53
|
+
result[r.model] = {"tokens_in": 0, "tokens_out": 0, "cost_usd": 0.0}
|
|
54
|
+
result[r.model]["tokens_in"] += r.tokens_in
|
|
55
|
+
result[r.model]["tokens_out"] += r.tokens_out
|
|
56
|
+
result[r.model]["cost_usd"] += r.cost_usd
|
|
57
|
+
return result
|