dataenginex 1.1.0__tar.gz → 1.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dataenginex-1.1.2/.github/workflows/auto-pr.yml +23 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/workflows/ci.yml +4 -4
- dataenginex-1.1.2/.github/workflows/release.yml +69 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.gitignore +1 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/CHANGELOG.md +7 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/PKG-INFO +59 -45
- dataenginex-1.1.2/README.md +161 -0
- dataenginex-1.1.2/docs/api-reference/api.md +63 -0
- dataenginex-1.1.2/docs/api-reference/dashboard.md +8 -0
- dataenginex-1.1.2/docs/api-reference/ml.md +45 -0
- dataenginex-1.1.2/docs/architecture.md +178 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/ci-cd.md +65 -114
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/development.md +7 -8
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/observability.md +20 -22
- dataenginex-1.1.2/docs/quickstart.md +110 -0
- dataenginex-1.1.2/docs/release-notes.md +141 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/02_api_quickstart.py +8 -19
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/GUIDE.md +26 -19
- {dataenginex-1.1.0 → dataenginex-1.1.2}/poe_tasks.toml +0 -13
- {dataenginex-1.1.0 → dataenginex-1.1.2}/pyproject.toml +12 -28
- dataenginex-1.1.2/scripts/localstack/init.sh +7 -0
- dataenginex-1.1.2/src/dataenginex/README.md +88 -0
- dataenginex-1.1.2/src/dataenginex/__init__.py +96 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/__init__.py +38 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/agents/builtin.py +2 -2
- {dataenginex-1.1.0/src/dataenginex/ml → dataenginex-1.1.2/src/dataenginex/ai}/llm.py +1 -1
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/observability/langfuse.py +1 -1
- {dataenginex-1.1.0/src/dataenginex/ml → dataenginex-1.1.2/src/dataenginex/ai}/vectorstore.py +3 -3
- dataenginex-1.1.2/src/dataenginex/api/__init__.py +36 -0
- dataenginex-1.1.2/src/dataenginex/api/errors.py +43 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/cli/main.py +28 -27
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/cli/run.py +14 -17
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/cli/train.py +21 -31
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/config/defaults.py +0 -4
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/config/schema.py +0 -23
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/runner.py +5 -3
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/quality/gates.py +4 -3
- dataenginex-1.1.2/src/dataenginex/engine.py +803 -0
- dataenginex-1.1.2/src/dataenginex/middleware/__init__.py +43 -0
- dataenginex-1.1.2/src/dataenginex/ml/__init__.py +56 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/serving_engine/builtin.py +2 -3
- dataenginex-1.1.2/src/dataenginex/orchestration/__init__.py +23 -0
- {dataenginex-1.1.0/src/dataenginex/ml → dataenginex-1.1.2/src/dataenginex/orchestration}/scheduler.py +2 -3
- dataenginex-1.1.2/src/dataenginex/store.py +814 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/warehouse/lineage.py +13 -1
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/worker.py +1 -1
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_builtin_serving.py +2 -2
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_config_loader.py +4 -5
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_config_schema.py +2 -3
- dataenginex-1.1.2/tests/unit/test_dex_engine.py +144 -0
- dataenginex-1.1.2/tests/unit/test_dex_store.py +338 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_domain_metrics_wiring.py +2 -45
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_drift_scheduler.py +1 -1
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_llm.py +1 -1
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_llm_litellm_vllm.py +3 -3
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_observability_langfuse.py +1 -1
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_vectorstore.py +1 -1
- {dataenginex-1.1.0 → dataenginex-1.1.2}/uv.lock +751 -868
- dataenginex-1.1.0/.github/workflows/auto-pr-dev-to-main.yml +0 -15
- dataenginex-1.1.0/.github/workflows/auto-pr-to-dev.yml +0 -16
- dataenginex-1.1.0/.github/workflows/claude.yml +0 -23
- dataenginex-1.1.0/.github/workflows/docker-build-push.yml +0 -63
- dataenginex-1.1.0/.github/workflows/docs-notify.yml +0 -18
- dataenginex-1.1.0/.github/workflows/label-sync.yml +0 -31
- dataenginex-1.1.0/.github/workflows/pypi-publish.yml +0 -290
- dataenginex-1.1.0/.github/workflows/release-dex.yml +0 -150
- dataenginex-1.1.0/.github/workflows/release-please.yml +0 -16
- dataenginex-1.1.0/.release-please-manifest.json +0 -3
- dataenginex-1.1.0/README.md +0 -157
- dataenginex-1.1.0/docs/api-reference/api.md +0 -5
- dataenginex-1.1.0/docs/api-reference/dashboard.md +0 -7
- dataenginex-1.1.0/docs/api-reference/ml.md +0 -5
- dataenginex-1.1.0/docs/architecture.md +0 -150
- dataenginex-1.1.0/docs/quickstart.md +0 -85
- dataenginex-1.1.0/docs/release-notes.md +0 -51
- dataenginex-1.1.0/release-please-config.json +0 -23
- dataenginex-1.1.0/sonar-project.properties +0 -22
- dataenginex-1.1.0/src/dataenginex/README.md +0 -63
- dataenginex-1.1.0/src/dataenginex/__init__.py +0 -85
- dataenginex-1.1.0/src/dataenginex/api/__init__.py +0 -73
- dataenginex-1.1.0/src/dataenginex/api/auth.py +0 -242
- dataenginex-1.1.0/src/dataenginex/api/errors.py +0 -77
- dataenginex-1.1.0/src/dataenginex/api/factory.py +0 -245
- dataenginex-1.1.0/src/dataenginex/api/health.py +0 -147
- dataenginex-1.1.0/src/dataenginex/api/jwks.py +0 -155
- dataenginex-1.1.0/src/dataenginex/api/ldap_sync.py +0 -170
- dataenginex-1.1.0/src/dataenginex/api/rate_limit.py +0 -131
- dataenginex-1.1.0/src/dataenginex/api/rbac.py +0 -91
- dataenginex-1.1.0/src/dataenginex/api/routers/__init__.py +0 -9
- dataenginex-1.1.0/src/dataenginex/api/routers/ai.py +0 -192
- dataenginex-1.1.0/src/dataenginex/api/routers/data.py +0 -154
- dataenginex-1.1.0/src/dataenginex/api/routers/health.py +0 -26
- dataenginex-1.1.0/src/dataenginex/api/routers/ml.py +0 -182
- dataenginex-1.1.0/src/dataenginex/api/routers/pipelines.py +0 -194
- dataenginex-1.1.0/src/dataenginex/api/routers/root.py +0 -25
- dataenginex-1.1.0/src/dataenginex/api/routers/system.py +0 -48
- dataenginex-1.1.0/src/dataenginex/api/scim.py +0 -629
- dataenginex-1.1.0/src/dataenginex/cli/serve.py +0 -48
- dataenginex-1.1.0/src/dataenginex/middleware/__init__.py +0 -67
- dataenginex-1.1.0/src/dataenginex/middleware/metrics_middleware.py +0 -87
- dataenginex-1.1.0/src/dataenginex/middleware/request_logging.py +0 -88
- dataenginex-1.1.0/src/dataenginex/middleware/tracing.py +0 -104
- dataenginex-1.1.0/src/dataenginex/ml/__init__.py +0 -105
- dataenginex-1.1.0/src/dataenginex/orchestration/__init__.py +0 -10
- dataenginex-1.1.0/tests/integration/test_api_middleware_integration.py +0 -375
- dataenginex-1.1.0/tests/integration/test_full_app.py +0 -89
- dataenginex-1.1.0/tests/unit/test_ai_router.py +0 -96
- dataenginex-1.1.0/tests/unit/test_ai_router_extended.py +0 -273
- dataenginex-1.1.0/tests/unit/test_api_auth.py +0 -100
- dataenginex-1.1.0/tests/unit/test_api_factory.py +0 -168
- dataenginex-1.1.0/tests/unit/test_api_jwks.py +0 -179
- dataenginex-1.1.0/tests/unit/test_api_rate_limit.py +0 -86
- dataenginex-1.1.0/tests/unit/test_api_rbac.py +0 -109
- dataenginex-1.1.0/tests/unit/test_api_scim.py +0 -162
- dataenginex-1.1.0/tests/unit/test_data_router.py +0 -117
- dataenginex-1.1.0/tests/unit/test_errors.py +0 -64
- dataenginex-1.1.0/tests/unit/test_health.py +0 -126
- dataenginex-1.1.0/tests/unit/test_metrics.py +0 -139
- dataenginex-1.1.0/tests/unit/test_middleware.py +0 -62
- dataenginex-1.1.0/tests/unit/test_ml_router.py +0 -122
- dataenginex-1.1.0/tests/unit/test_pipeline_router_extended.py +0 -252
- dataenginex-1.1.0/tests/unit/test_security_extended.py +0 -379
- dataenginex-1.1.0/tests/unit/test_system_router.py +0 -65
- dataenginex-1.1.0/tests/unit/test_tracing.py +0 -90
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.claude/commands/new-feature.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.claude/commands/validate.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.claude/settings.json +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.env.template +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/dependabot.yml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/labels.yml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/release-pr-template.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/workflows/enforce-dev-to-main.yml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/workflows/security.yml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.gitleaks.toml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.pre-commit-config.yaml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/.python-version +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/CLAUDE.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/CODEOWNERS +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/CODE_OF_CONDUCT.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/CONTRIBUTING.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/Dockerfile +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/LICENSE +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/SECURITY.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docker-compose.test.yml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/adr/0000-template.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/adr/0001-medallion-architecture.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/core.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/data.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/index.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/lakehouse.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/middleware.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/plugins.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/warehouse.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/contributing.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/index.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/sdlc.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/security-scanning.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/01_hello_pipeline.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/03_quality_gate.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/04_ml_training.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/05_rag_demo.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/06_llm_quickstart.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/07_api_ingestion.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/08_spark_ml.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/09_feature_engineering.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/10_model_analysis.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/dashboard/dashboard_config.yaml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/dashboard/run_dashboard.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/data/events.csv +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/data/users.csv +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/dex.yaml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/ecommerce/data/customers.csv +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/ecommerce/data/orders.csv +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/ecommerce/data/products.csv +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/ecommerce/dex.yaml +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/ecommerce/run_all.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/movies.csv +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/RELEASE_NOTES.md +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/agents/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/memory/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/memory/base.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/memory/episodic.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/memory/long_term.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/observability/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/observability/audit.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/observability/cost.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/observability/metrics.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/retrieval/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/retrieval/builtin.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/retrieval/graph.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/anthropic.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/huggingface.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/ollama.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/openai.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/router.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/checkpoint.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/executor.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/sandbox.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/tools/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/tools/builtin.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/conditions.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/dag.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/human_loop.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/api/pagination.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/api/schemas.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/cli/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/config/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/config/loader.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/exceptions.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/interfaces.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/medallion_architecture.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/quality.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/registry.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/schemas.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/validators.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/connectors/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/connectors/csv.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/connectors/duckdb.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/connectors/legacy.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/dag.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/run_history.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/profiler.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/quality/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/quality/spark.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/registry.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/transforms/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/transforms/sql.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/lakehouse/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/lakehouse/catalog.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/lakehouse/partitioning.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/lakehouse/storage.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/middleware/domain_metrics.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/middleware/logging_config.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/middleware/metrics.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/drift.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/features/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/features/builtin.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/metrics.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/mlflow_registry.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/registry.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/serving.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/serving_engine/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/tracking/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/tracking/builtin.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/training.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/orchestration/builtin.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/plugins/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/plugins/registry.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/py.typed +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/secops/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/secops/audit.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/secops/gate.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/secops/masking.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/secops/pii.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/warehouse/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/warehouse/transforms.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conformance/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conformance/test_connector.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conformance/test_feature_store.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conformance/test_tracker.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conformance/test_transform.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conftest.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/fixtures/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/fixtures/sample_data.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/fixtures/sample_jobs.csv +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/fixtures/sample_jobs.json +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_ai_integration.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_cli_run.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_config_cli.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_lineage_integration.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_ml_integration.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_pipeline_e2e.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_secops_integration.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_storage_real.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/load/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/__init__.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_agent_runtime.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_ai_modules.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_api_pagination.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_api_schemas.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_api_validators.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_builtin_agent.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_builtin_feature_store.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_builtin_tracker.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_cli_train.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_core_exceptions.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_core_interfaces.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_core_registry.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_csv_connector.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_data.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_domain_metrics.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_duckdb_connector.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_lakehouse.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_logging.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_medallion.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_ml.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_pipeline_dag.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_pipeline_runner.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_plugins.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_quality_gates.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_quality_spark.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_retriever.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_retriever_graph.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_run_history.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_scheduler.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_secops.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_spark_fixtures.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_sql_transforms.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_storage_abstraction.py +0 -0
- {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_warehouse.py +0 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
name: Auto PR
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- dev
|
|
7
|
+
- 'feature/**'
|
|
8
|
+
- 'fix/**'
|
|
9
|
+
|
|
10
|
+
permissions:
|
|
11
|
+
contents: write
|
|
12
|
+
pull-requests: write
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
feature-to-dev:
|
|
16
|
+
if: startsWith(github.ref, 'refs/heads/feature/') || startsWith(github.ref, 'refs/heads/fix/')
|
|
17
|
+
uses: TheDataEngineX/.github/.github/workflows/auto-pr-to-dev.yml@main
|
|
18
|
+
secrets: inherit
|
|
19
|
+
|
|
20
|
+
dev-to-main:
|
|
21
|
+
if: github.ref == 'refs/heads/dev'
|
|
22
|
+
uses: TheDataEngineX/.github/.github/workflows/auto-pr-dev-to-main.yml@main
|
|
23
|
+
secrets: inherit
|
|
@@ -20,7 +20,7 @@ jobs:
|
|
|
20
20
|
runs-on: ubuntu-latest
|
|
21
21
|
steps:
|
|
22
22
|
- uses: actions/checkout@v6
|
|
23
|
-
- uses: astral-sh/setup-uv@
|
|
23
|
+
- uses: astral-sh/setup-uv@v8.1.0
|
|
24
24
|
with:
|
|
25
25
|
version: "latest"
|
|
26
26
|
python-version: "3.13"
|
|
@@ -35,7 +35,7 @@ jobs:
|
|
|
35
35
|
needs: quality
|
|
36
36
|
steps:
|
|
37
37
|
- uses: actions/checkout@v6
|
|
38
|
-
- uses: astral-sh/setup-uv@
|
|
38
|
+
- uses: astral-sh/setup-uv@v8.1.0
|
|
39
39
|
with:
|
|
40
40
|
version: "latest"
|
|
41
41
|
python-version: "3.13"
|
|
@@ -43,7 +43,7 @@ jobs:
|
|
|
43
43
|
env:
|
|
44
44
|
UV_PROJECT_ENVIRONMENT: .venv
|
|
45
45
|
- run: uv run poe test-cov-core
|
|
46
|
-
- uses: codecov/codecov-action@
|
|
46
|
+
- uses: codecov/codecov-action@v5
|
|
47
47
|
with:
|
|
48
48
|
flags: dataenginex
|
|
49
49
|
fail_ci_if_error: false
|
|
@@ -59,7 +59,7 @@ jobs:
|
|
|
59
59
|
python-version: ["3.11", "3.12"]
|
|
60
60
|
steps:
|
|
61
61
|
- uses: actions/checkout@v6
|
|
62
|
-
- uses: astral-sh/setup-uv@
|
|
62
|
+
- uses: astral-sh/setup-uv@v8.1.0
|
|
63
63
|
with:
|
|
64
64
|
version: "latest"
|
|
65
65
|
python-version: ${{ matrix.python-version }}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
name: Release
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- 'v[0-9]+.[0-9]+.[0-9]+'
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
name: Build
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
permissions:
|
|
13
|
+
contents: read
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v6
|
|
16
|
+
- uses: astral-sh/setup-uv@v8.1.0
|
|
17
|
+
with:
|
|
18
|
+
version: "latest"
|
|
19
|
+
python-version: "3.13"
|
|
20
|
+
- run: uv build
|
|
21
|
+
- uses: actions/upload-artifact@v7
|
|
22
|
+
with:
|
|
23
|
+
name: dist
|
|
24
|
+
path: dist/
|
|
25
|
+
|
|
26
|
+
publish-pypi:
|
|
27
|
+
name: Publish to PyPI
|
|
28
|
+
needs: build
|
|
29
|
+
runs-on: ubuntu-latest
|
|
30
|
+
environment: ${{ vars.PYPI_ENVIRONMENT }}
|
|
31
|
+
permissions:
|
|
32
|
+
contents: read
|
|
33
|
+
id-token: write
|
|
34
|
+
steps:
|
|
35
|
+
- uses: actions/download-artifact@v8
|
|
36
|
+
with:
|
|
37
|
+
name: dist
|
|
38
|
+
path: dist/
|
|
39
|
+
- uses: pypa/gh-action-pypi-publish@release/v1
|
|
40
|
+
|
|
41
|
+
github-release:
|
|
42
|
+
name: GitHub Release + SBOM
|
|
43
|
+
needs: build
|
|
44
|
+
runs-on: ubuntu-latest
|
|
45
|
+
permissions:
|
|
46
|
+
contents: write
|
|
47
|
+
steps:
|
|
48
|
+
- uses: actions/checkout@v6
|
|
49
|
+
- uses: astral-sh/setup-uv@v8.1.0
|
|
50
|
+
with:
|
|
51
|
+
version: "latest"
|
|
52
|
+
- name: Extract version
|
|
53
|
+
id: version
|
|
54
|
+
run: |
|
|
55
|
+
VERSION=$(grep -m 1 "^version = " pyproject.toml | sed 's/version = "//;s/".*//')
|
|
56
|
+
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
|
57
|
+
- name: Generate CycloneDX SBOM
|
|
58
|
+
run: |
|
|
59
|
+
uvx --from cyclonedx-bom cyclonedx-py environment \
|
|
60
|
+
-o sbom-dex-${{ steps.version.outputs.version }}.json \
|
|
61
|
+
--of json
|
|
62
|
+
- name: Create GitHub release + attach SBOM
|
|
63
|
+
env:
|
|
64
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
65
|
+
run: |
|
|
66
|
+
gh release create ${{ github.ref_name }} \
|
|
67
|
+
--title "DEX ${{ github.ref_name }}" \
|
|
68
|
+
--generate-notes \
|
|
69
|
+
"sbom-dex-${{ steps.version.outputs.version }}.json"
|
|
@@ -5,6 +5,13 @@ All notable changes to `dataenginex` will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.1.1](https://github.com/TheDataEngineX/dex/compare/v1.1.0...v1.1.1) (2026-05-07)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Bug Fixes
|
|
12
|
+
|
|
13
|
+
* rich reflex compat ([#232](https://github.com/TheDataEngineX/dex/issues/232)) ([#233](https://github.com/TheDataEngineX/dex/issues/233)) ([7d33c05](https://github.com/TheDataEngineX/dex/commit/7d33c05d9d3b041567a5539930dabc30294a0d8d))
|
|
14
|
+
|
|
8
15
|
## [1.1.0](https://github.com/TheDataEngineX/dex/compare/v1.0.3...v1.1.0) (2026-05-06)
|
|
9
16
|
|
|
10
17
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataenginex
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.2
|
|
4
4
|
Summary: DataEngineX - Core framework for AI/ML/Data engineering projects
|
|
5
5
|
Author-email: Jay <jayapal.myaka99@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -31,28 +31,17 @@ Requires-Dist: asyncpg>=0.31.0
|
|
|
31
31
|
Requires-Dist: click>=8.3.3
|
|
32
32
|
Requires-Dist: croniter>=6.2.2
|
|
33
33
|
Requires-Dist: duckdb>=1.5.2
|
|
34
|
-
Requires-Dist: email-validator>=2.3.0
|
|
35
|
-
Requires-Dist: fastapi>=0.136.1
|
|
36
34
|
Requires-Dist: httpx>=0.28.1
|
|
37
|
-
Requires-Dist: opentelemetry-api>=1.41.1
|
|
38
|
-
Requires-Dist: opentelemetry-exporter-otlp>=1.41.1
|
|
39
|
-
Requires-Dist: opentelemetry-instrumentation-fastapi>=0.62b1
|
|
40
|
-
Requires-Dist: opentelemetry-sdk>=1.41.1
|
|
41
35
|
Requires-Dist: prometheus-client>=0.25.0
|
|
42
36
|
Requires-Dist: pyarrow>=23.0.1
|
|
43
37
|
Requires-Dist: pydantic>=2.13.4
|
|
44
38
|
Requires-Dist: python-dotenv>=1.2.2
|
|
45
39
|
Requires-Dist: pyyaml>=6.0.3
|
|
46
|
-
Requires-Dist: qdrant-client>=1.
|
|
40
|
+
Requires-Dist: qdrant-client>=1.18.0
|
|
47
41
|
Requires-Dist: redis>=5.3.1
|
|
48
|
-
Requires-Dist: rich>=15.0.0
|
|
49
42
|
Requires-Dist: structlog>=25.5.0
|
|
50
|
-
Requires-Dist: uvicorn>=0.46.0
|
|
51
|
-
Provides-Extra: auth
|
|
52
|
-
Requires-Dist: ldap3>=2.9.1; extra == 'auth'
|
|
53
|
-
Requires-Dist: pyjwt[crypto]>=2.12.1; extra == 'auth'
|
|
54
43
|
Provides-Extra: cloud
|
|
55
|
-
Requires-Dist: boto3>=1.43.
|
|
44
|
+
Requires-Dist: boto3>=1.43.7; extra == 'cloud'
|
|
56
45
|
Requires-Dist: google-cloud-bigquery>=3.41.0; extra == 'cloud'
|
|
57
46
|
Requires-Dist: google-cloud-storage>=3.10.1; extra == 'cloud'
|
|
58
47
|
Provides-Extra: observability
|
|
@@ -61,44 +50,72 @@ Description-Content-Type: text/markdown
|
|
|
61
50
|
|
|
62
51
|
# dataenginex
|
|
63
52
|
|
|
64
|
-
Unified Data + ML + AI
|
|
53
|
+
Unified Data + ML + AI **library**. Config-driven, self-hosted, production-ready.
|
|
54
|
+
|
|
55
|
+
`dataenginex` is a pure Python library — no HTTP server. Your application owns the server layer.
|
|
65
56
|
|
|
66
57
|
## Install
|
|
67
58
|
|
|
68
59
|
```bash
|
|
69
|
-
# Core (DuckDB,
|
|
60
|
+
# Core (DuckDB, structlog, Pydantic, Click, arq, asyncpg, qdrant-client)
|
|
70
61
|
pip install dataenginex
|
|
71
62
|
|
|
72
|
-
#
|
|
73
|
-
pip install dataenginex[
|
|
74
|
-
pip install dataenginex[
|
|
75
|
-
pip install dataenginex[agents] # LangGraph agent runtime
|
|
76
|
-
pip install dataenginex[vectors] # Qdrant + LanceDB vector stores
|
|
77
|
-
pip install dataenginex[embeddings] # sentence-transformers + ONNX
|
|
78
|
-
pip install dataenginex[spark] # PySpark transforms
|
|
79
|
-
pip install dataenginex[cloud] # S3 + GCS storage backends
|
|
80
|
-
pip install dataenginex[all] # Everything
|
|
63
|
+
# Optional extras
|
|
64
|
+
pip install "dataenginex[cloud]" # S3 + GCS + BigQuery storage backends
|
|
65
|
+
pip install "dataenginex[observability]" # Langfuse LLM call tracing
|
|
81
66
|
```
|
|
82
67
|
|
|
68
|
+
> **LiteLLM:** Install separately — it pins `python-dotenv==1.0.1` which conflicts
|
|
69
|
+
> with dataenginex's `python-dotenv>=1.2.2`.
|
|
70
|
+
> ```bash
|
|
71
|
+
> pip install 'litellm>=1.83.3' --no-deps
|
|
72
|
+
> ```
|
|
73
|
+
|
|
83
74
|
## Submodules
|
|
84
75
|
|
|
85
|
-
| Module |
|
|
86
|
-
|
|
87
|
-
| `dataenginex.
|
|
88
|
-
| `dataenginex.
|
|
89
|
-
| `dataenginex.
|
|
90
|
-
| `dataenginex.
|
|
91
|
-
| `dataenginex.
|
|
92
|
-
| `dataenginex.
|
|
93
|
-
| `dataenginex.
|
|
94
|
-
| `dataenginex.
|
|
95
|
-
| `dataenginex.
|
|
96
|
-
| `dataenginex.
|
|
76
|
+
| Module | Description |
|
|
77
|
+
|--------|-------------|
|
|
78
|
+
| `dataenginex.engine` | `DexEngine` — single entry point; loads config, inits store, wires all backends |
|
|
79
|
+
| `dataenginex.store` | `DexStore` — DuckDB-backed persistence (`.dex/store.duckdb`) |
|
|
80
|
+
| `dataenginex.config` | `dex.yaml` schema, loader, env var resolution, layering |
|
|
81
|
+
| `dataenginex.core` | Exceptions, `Base*` ABCs, `BackendRegistry` |
|
|
82
|
+
| `dataenginex.cli` | `dex` CLI (`validate`, `version`, `init`) |
|
|
83
|
+
| `dataenginex.api` | HTTP helpers: error types, response models (no server bundled) |
|
|
84
|
+
| `dataenginex.data` | Connectors, pipeline runner, schema registry, profiler |
|
|
85
|
+
| `dataenginex.ml` | Classical ML: training, model registry, serving, drift detection |
|
|
86
|
+
| `dataenginex.ai` | LLM providers, agents, RAG, vectorstore, memory, observability |
|
|
87
|
+
| `dataenginex.orchestration` | `DriftScheduler`, background scheduling |
|
|
88
|
+
| `dataenginex.middleware` | structlog config, Prometheus metrics |
|
|
89
|
+
| `dataenginex.lakehouse` | Storage backends (local, S3, GCS), catalog, partitioning |
|
|
90
|
+
| `dataenginex.warehouse` | SQL transforms, lineage tracking |
|
|
91
|
+
| `dataenginex.plugins` | Entry-point plugin discovery |
|
|
97
92
|
|
|
98
93
|
## Quick Usage
|
|
99
94
|
|
|
100
95
|
```python
|
|
101
|
-
|
|
96
|
+
from pathlib import Path
|
|
97
|
+
from dataenginex.engine import DexEngine
|
|
98
|
+
|
|
99
|
+
# Load config and initialize all backends
|
|
100
|
+
engine = DexEngine(Path("dex.yaml"))
|
|
101
|
+
|
|
102
|
+
# Data
|
|
103
|
+
engine.run_pipeline("clean_users")
|
|
104
|
+
sources = list(engine.config.data.sources.keys())
|
|
105
|
+
|
|
106
|
+
# ML
|
|
107
|
+
models = engine.model_registry.list_models()
|
|
108
|
+
result = engine.model_registry.predict("churn_model", features)
|
|
109
|
+
|
|
110
|
+
# AI
|
|
111
|
+
response = engine.agents["assistant"].chat("summarize the latest run")
|
|
112
|
+
|
|
113
|
+
# Persistence (DuckDB)
|
|
114
|
+
runs = engine.store.list_pipeline_runs(limit=10)
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
# Config system only
|
|
102
119
|
from dataenginex.config import load_config
|
|
103
120
|
cfg = load_config(Path("dex.yaml"))
|
|
104
121
|
|
|
@@ -106,15 +123,12 @@ cfg = load_config(Path("dex.yaml"))
|
|
|
106
123
|
from dataenginex.core.interfaces import BaseConnector
|
|
107
124
|
from dataenginex.core.registry import BackendRegistry
|
|
108
125
|
|
|
109
|
-
# Exceptions
|
|
110
|
-
from dataenginex.core.exceptions import DataEngineXError, BackendNotInstalledError
|
|
111
|
-
|
|
112
126
|
# ML
|
|
113
|
-
from dataenginex.ml import ModelRegistry
|
|
127
|
+
from dataenginex.ml import ModelRegistry, SklearnTrainer
|
|
114
128
|
|
|
115
|
-
#
|
|
116
|
-
|
|
117
|
-
|
|
129
|
+
# AI
|
|
130
|
+
from dataenginex.ai.llm import get_llm_provider
|
|
131
|
+
from dataenginex.ai.vectorstore import VectorStoreBackend
|
|
118
132
|
```
|
|
119
133
|
|
|
120
134
|
## Source and Docs
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
# DEX — Data + ML + AI Framework
|
|
2
|
+
|
|
3
|
+
[](https://github.com/TheDataEngineX/DEX/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/dataenginex/)
|
|
5
|
+
[](https://www.python.org/downloads/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
Unified Data + ML + AI **library**. One `dex.yaml` defines your entire project — from data ingestion through ML training to AI agents. Self-hosted, config-driven, production-ready.
|
|
9
|
+
|
|
10
|
+
`dataenginex` is a **pure Python library**. It has no HTTP server. Your application owns the server layer.
|
|
11
|
+
|
|
12
|
+
______________________________________________________________________
|
|
13
|
+
|
|
14
|
+
## Quick Start
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install dataenginex
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
```python
|
|
21
|
+
from dataenginex.engine import DexEngine
|
|
22
|
+
|
|
23
|
+
engine = DexEngine("dex.yaml") # loads config, inits DuckDB store
|
|
24
|
+
engine.run_pipeline("clean_users") # execute a pipeline
|
|
25
|
+
models = engine.model_registry.list_models()
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
# Development
|
|
30
|
+
git clone https://github.com/TheDataEngineX/DEX && cd DEX
|
|
31
|
+
uv run poe check-all # lint + typecheck + tests
|
|
32
|
+
dex validate dex.yaml # validate a config file
|
|
33
|
+
dex version # show version + environment
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
______________________________________________________________________
|
|
37
|
+
|
|
38
|
+
## What It Does
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
dex.yaml
|
|
42
|
+
├── data: CSV/Parquet/DuckDB → transforms → quality checks
|
|
43
|
+
├── ml: Experiment tracking → training → serving → drift detection
|
|
44
|
+
├── ai: LLM providers → retrieval (BM25/dense/hybrid) → agents
|
|
45
|
+
└── observability: structlog + Prometheus metrics
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
**Opinionated defaults, swappable backends.** Everything works out of the box with
|
|
49
|
+
built-in implementations. Swap any layer for industry tools via optional extras:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install "dataenginex[cloud]" # S3 + GCS storage backends
|
|
53
|
+
pip install "dataenginex[observability]" # Langfuse LLM tracing
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
______________________________________________________________________
|
|
57
|
+
|
|
58
|
+
## Project Structure
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
dataenginex/
|
|
62
|
+
├── src/dataenginex/
|
|
63
|
+
│ ├── cli/ # dex CLI (validate, version, init)
|
|
64
|
+
│ ├── config/ # dex.yaml schema, loader, env var resolution
|
|
65
|
+
│ ├── core/ # Exceptions, interfaces (Base* ABCs), registry
|
|
66
|
+
│ ├── engine.py # DexEngine — single entry point for applications
|
|
67
|
+
│ ├── store.py # DexStore — DuckDB-backed persistence (.dex/store.duckdb)
|
|
68
|
+
│ ├── api/ # HTTP helpers: error types, response models (no server)
|
|
69
|
+
│ ├── data/ # Connectors, schema registry, profiler, pipeline runner
|
|
70
|
+
│ ├── ml/ # Classical ML: training, registry, serving, drift
|
|
71
|
+
│ ├── ai/ # LLM providers, agents, RAG, vectorstore, observability
|
|
72
|
+
│ ├── orchestration/ # DriftScheduler, background workers
|
|
73
|
+
│ ├── middleware/ # structlog config, Prometheus metrics (library use)
|
|
74
|
+
│ ├── lakehouse/ # Catalog, partitioning, storage backends
|
|
75
|
+
│ ├── warehouse/ # SQL transforms, lineage
|
|
76
|
+
│ └── plugins/ # Plugin system (entry-point discovery)
|
|
77
|
+
│
|
|
78
|
+
├── examples/ # Runnable examples + dex.yaml templates
|
|
79
|
+
├── tests/ # Unit + integration tests
|
|
80
|
+
├── docs/ # MkDocs documentation
|
|
81
|
+
└── pyproject.toml # Package config (version source of truth)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
______________________________________________________________________
|
|
85
|
+
|
|
86
|
+
## Architecture
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
dex.yaml → DexEngine.__init__
|
|
90
|
+
│
|
|
91
|
+
├── config/ load + validate → DexConfig
|
|
92
|
+
├── store/ DexStore (.dex/store.duckdb)
|
|
93
|
+
├── data/ register sources + pipelines
|
|
94
|
+
├── ml/ model registry + serving
|
|
95
|
+
└── ai/ LLM providers + agents
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
**Backend Registry Pattern:** Every subsystem has a `Base*` ABC + `BackendRegistry`.
|
|
99
|
+
Built-in backends work out of the box. Extras implement the same interface.
|
|
100
|
+
|
|
101
|
+
**Tech Stack:**
|
|
102
|
+
|
|
103
|
+
| Layer | Built-in | Optional Extra |
|
|
104
|
+
|-------|----------|----------------|
|
|
105
|
+
| Data Engine | DuckDB | PySpark |
|
|
106
|
+
| Orchestration | croniter scheduler | Dagster |
|
|
107
|
+
| ML Tracking | JSON-based tracker | MLflow |
|
|
108
|
+
| Model Serving | Built-in predictor | — |
|
|
109
|
+
| LLM | Ollama / LiteLLM / vLLM | Any OpenAI-compatible |
|
|
110
|
+
| Vector Store | DuckDB VSS | Qdrant |
|
|
111
|
+
| Retrieval | BM25 + Dense + Hybrid | — |
|
|
112
|
+
| Persistence | DuckDB (`.dex/store.duckdb`) | — |
|
|
113
|
+
| Logging | structlog | — |
|
|
114
|
+
|
|
115
|
+
______________________________________________________________________
|
|
116
|
+
|
|
117
|
+
## Development
|
|
118
|
+
|
|
119
|
+
See [docs/development.md](docs/development.md) for full setup.
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
uv run poe check-all # lint + typecheck + tests
|
|
123
|
+
uv run poe lint-fix # auto-fix lint issues
|
|
124
|
+
uv run poe test-cov # tests + coverage report
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
______________________________________________________________________
|
|
128
|
+
|
|
129
|
+
## Documentation
|
|
130
|
+
|
|
131
|
+
| Guide | Description |
|
|
132
|
+
|-------|-------------|
|
|
133
|
+
| [Quickstart](docs/quickstart.md) | Get running in 5 minutes |
|
|
134
|
+
| [Architecture](docs/architecture.md) | System design and patterns |
|
|
135
|
+
| [Development](docs/development.md) | Local setup and workflow |
|
|
136
|
+
| [API Reference](docs/api-reference/index.md) | Auto-generated module docs |
|
|
137
|
+
|
|
138
|
+
> Docs: [docs.thedataenginex.org](https://docs.thedataenginex.org)
|
|
139
|
+
|
|
140
|
+
______________________________________________________________________
|
|
141
|
+
|
|
142
|
+
## The DEX Ecosystem
|
|
143
|
+
|
|
144
|
+
```
|
|
145
|
+
TheDataEngineX/
|
|
146
|
+
├── dataenginex — Core library (this repo, PyPI)
|
|
147
|
+
├── dex-studio — Web UI (FastAPI + Jinja2) — single pane of glass
|
|
148
|
+
└── infradex — Terraform + Helm + K3s deployment
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
dex-studio imports `dataenginex` directly — no HTTP server required.
|
|
152
|
+
|
|
153
|
+
______________________________________________________________________
|
|
154
|
+
|
|
155
|
+
## License
|
|
156
|
+
|
|
157
|
+
MIT License. See [LICENSE](LICENSE).
|
|
158
|
+
|
|
159
|
+
______________________________________________________________________
|
|
160
|
+
|
|
161
|
+
**Version**: [](https://pypi.org/project/dataenginex/) | **License**: MIT | **Python**: 3.13+
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# dataenginex.api
|
|
2
|
+
|
|
3
|
+
HTTP helpers — error types, response models, and shared utilities.
|
|
4
|
+
|
|
5
|
+
`dataenginex` does **not** bundle a FastAPI server. This module provides the building blocks
|
|
6
|
+
for applications (like DEX Studio or your own FastAPI app) that want to expose DEX functionality
|
|
7
|
+
over HTTP.
|
|
8
|
+
|
|
9
|
+
## Error Types
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from dataenginex.api.errors import (
|
|
13
|
+
NotFoundError,
|
|
14
|
+
ValidationError,
|
|
15
|
+
ConflictError,
|
|
16
|
+
ServiceUnavailableError,
|
|
17
|
+
)
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
Standard Pydantic response models for HTTP error responses, usable in any FastAPI app:
|
|
21
|
+
|
|
22
|
+
```python
|
|
23
|
+
from fastapi import FastAPI
|
|
24
|
+
from dataenginex.api.errors import NotFoundError
|
|
25
|
+
|
|
26
|
+
app = FastAPI()
|
|
27
|
+
|
|
28
|
+
@app.get("/pipelines/{name}")
|
|
29
|
+
def get_pipeline(name: str):
|
|
30
|
+
pipeline = engine.config.data.pipelines.get(name)
|
|
31
|
+
if pipeline is None:
|
|
32
|
+
raise NotFoundError(detail=f"Pipeline '{name}' not found")
|
|
33
|
+
return pipeline
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
## Building an HTTP Layer
|
|
37
|
+
|
|
38
|
+
To expose DEX functionality over HTTP, create a FastAPI app in your application
|
|
39
|
+
and call `DexEngine` directly:
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from fastapi import FastAPI
|
|
43
|
+
from dataenginex.engine import DexEngine
|
|
44
|
+
|
|
45
|
+
engine = DexEngine("dex.yaml")
|
|
46
|
+
app = FastAPI()
|
|
47
|
+
|
|
48
|
+
@app.get("/health")
|
|
49
|
+
def health():
|
|
50
|
+
return engine.health()
|
|
51
|
+
|
|
52
|
+
@app.get("/pipelines")
|
|
53
|
+
def list_pipelines():
|
|
54
|
+
return list(engine.config.data.pipelines.keys())
|
|
55
|
+
|
|
56
|
+
@app.post("/pipelines/{name}/run")
|
|
57
|
+
def run_pipeline(name: str):
|
|
58
|
+
return engine.run_pipeline(name)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
See `examples/02_api_quickstart.py` for a minimal working example.
|
|
62
|
+
|
|
63
|
+
::: dataenginex.api
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: dashboard (removed)
|
|
3
|
+
description: The Streamlit dashboard module was removed in v1.0
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
The `dataenginex.dashboard` Streamlit module was removed in **v1.0**.
|
|
7
|
+
|
|
8
|
+
Use [DEX Studio](https://github.com/TheDataEngineX/dex-studio) instead — a full-featured Reflex web UI for data pipelines, ML experiments, and AI agents.
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# dataenginex.ml
|
|
2
|
+
|
|
3
|
+
Classical ML — training, model registry, drift detection, and model serving.
|
|
4
|
+
|
|
5
|
+
LLM providers, vector stores, agents, and RAG live in `dataenginex.ai`.
|
|
6
|
+
The drift scheduler lives in `dataenginex.orchestration`.
|
|
7
|
+
|
|
8
|
+
## Module Split
|
|
9
|
+
|
|
10
|
+
| Concern | Module |
|
|
11
|
+
|---------|--------|
|
|
12
|
+
| Training, registry, serving, drift | `dataenginex.ml` |
|
|
13
|
+
| LLM providers, chat, embeddings | `dataenginex.ai.llm` |
|
|
14
|
+
| Vector stores | `dataenginex.ai.vectorstore` |
|
|
15
|
+
| Background drift scheduling | `dataenginex.orchestration.scheduler` |
|
|
16
|
+
|
|
17
|
+
## Quick Usage
|
|
18
|
+
|
|
19
|
+
```python
|
|
20
|
+
from dataenginex.ml import (
|
|
21
|
+
SklearnTrainer, TrainingResult,
|
|
22
|
+
ModelRegistry, ModelArtifact, ModelStage,
|
|
23
|
+
DriftDetector, DriftReport,
|
|
24
|
+
ModelServer, PredictionRequest, PredictionResponse,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
# Train
|
|
28
|
+
trainer = SklearnTrainer(experiment_name="churn")
|
|
29
|
+
result: TrainingResult = trainer.train(X_train, y_train)
|
|
30
|
+
|
|
31
|
+
# Register
|
|
32
|
+
registry = ModelRegistry()
|
|
33
|
+
registry.register(result.model, name="churn_v1", stage=ModelStage.STAGING)
|
|
34
|
+
|
|
35
|
+
# Drift
|
|
36
|
+
detector = DriftDetector(reference=X_train)
|
|
37
|
+
report: DriftReport = detector.detect(X_new)
|
|
38
|
+
|
|
39
|
+
# Serve
|
|
40
|
+
server = ModelServer()
|
|
41
|
+
server.load("churn_v1", stage=ModelStage.PRODUCTION)
|
|
42
|
+
resp = server.predict(PredictionRequest(features={"age": 35}))
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
::: dataenginex.ml
|