dataenginex 0.4.1__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/workflows/ci.yml +3 -3
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/workflows/release.yml +2 -2
- {dataenginex-0.4.1 → dataenginex-0.4.3}/CHANGELOG.md +12 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/PKG-INFO +3 -1
- {dataenginex-0.4.1 → dataenginex-0.4.3}/README.md +0 -2
- dataenginex-0.4.3/docs/api-reference/ai.md +191 -0
- dataenginex-0.4.3/docs/api-reference/config.md +84 -0
- dataenginex-0.4.3/docs/api-reference/core.md +143 -0
- dataenginex-0.4.3/docs/api-reference/data.md +279 -0
- dataenginex-0.4.3/docs/api-reference/engine.md +96 -0
- dataenginex-0.4.3/docs/api-reference/index.md +19 -0
- dataenginex-0.4.3/docs/api-reference/lakehouse.md +78 -0
- dataenginex-0.4.3/docs/api-reference/middleware.md +73 -0
- dataenginex-0.4.3/docs/api-reference/ml.md +172 -0
- dataenginex-0.4.3/docs/api-reference/orchestration.md +59 -0
- dataenginex-0.4.3/docs/api-reference/plugins.md +53 -0
- dataenginex-0.4.3/docs/api-reference/secops.md +121 -0
- dataenginex-0.4.3/docs/api-reference/warehouse.md +67 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/development.md +6 -24
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/index.md +2 -0
- dataenginex-0.4.3/docs/observability.md +61 -0
- dataenginex-0.4.3/docs/release-notes.md +98 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/07_api_ingestion.py +1 -11
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/ecommerce/run_all.py +144 -101
- {dataenginex-0.4.1 → dataenginex-0.4.3}/poe_tasks.toml +1 -1
- {dataenginex-0.4.1 → dataenginex-0.4.3}/pyproject.toml +3 -1
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/__init__.py +2 -3
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/runtime/sandbox.py +8 -2
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/config/loader.py +17 -6
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/config/schema.py +70 -7
- dataenginex-0.4.3/src/dataenginex/config/settings.py +127 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/core/__init__.py +1 -6
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/core/medallion_architecture.py +0 -122
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/__init__.py +0 -21
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/connectors/__init__.py +10 -18
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/connectors/delta.py +1 -1
- dataenginex-0.4.3/src/dataenginex/data/connectors/http.py +191 -0
- dataenginex-0.4.3/src/dataenginex/data/connectors/rest.py +129 -0
- dataenginex-0.4.3/src/dataenginex/data/connectors/sse.py +168 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/pipeline/dag.py +46 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/pipeline/runner.py +117 -11
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/quality/gates.py +37 -6
- dataenginex-0.4.3/src/dataenginex/data/transforms/sql.py +347 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/conftest.py +24 -6
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_data.py +126 -230
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_medallion.py +166 -251
- {dataenginex-0.4.1 → dataenginex-0.4.3}/uv.lock +440 -464
- dataenginex-0.4.1/docs/api-reference/core.md +0 -5
- dataenginex-0.4.1/docs/api-reference/data.md +0 -5
- dataenginex-0.4.1/docs/api-reference/index.md +0 -14
- dataenginex-0.4.1/docs/api-reference/lakehouse.md +0 -5
- dataenginex-0.4.1/docs/api-reference/middleware.md +0 -5
- dataenginex-0.4.1/docs/api-reference/ml.md +0 -45
- dataenginex-0.4.1/docs/api-reference/plugins.md +0 -5
- dataenginex-0.4.1/docs/api-reference/warehouse.md +0 -5
- dataenginex-0.4.1/docs/observability.md +0 -798
- dataenginex-0.4.1/docs/release-notes.md +0 -141
- dataenginex-0.4.1/src/dataenginex/data/connectors/legacy.py +0 -365
- dataenginex-0.4.1/src/dataenginex/data/transforms/sql.py +0 -171
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.claude/commands/new-feature.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.claude/commands/validate.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.claude/settings.json +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.env.template +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/dependabot.yml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/labels.yml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/release-pr-template.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/workflows/auto-pr.yml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/workflows/docs-sync.yml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/workflows/enforce-dev-to-main.yml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.github/workflows/security.yml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.gitignore +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.gitleaks.toml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.pre-commit-config.yaml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/.python-version +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/CLAUDE.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/CODEOWNERS +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/CODE_OF_CONDUCT.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/CONTRIBUTING.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/Dockerfile +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/LICENSE +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/SECURITY.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docker-compose.test.yml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/adr/0000-template.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/adr/0001-medallion-architecture.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/api-reference/api.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/architecture.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/ci-cd.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/contributing.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/quickstart.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/sdlc.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/docs/security-scanning.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/01_hello_pipeline.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/02_api_quickstart.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/03_quality_gate.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/04_ml_training.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/05_rag_demo.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/06_llm_quickstart.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/08_spark_ml.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/09_feature_engineering.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/10_model_analysis.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/GUIDE.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/dashboard/dashboard_config.yaml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/dashboard/run_dashboard.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/data/events.csv +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/data/users.csv +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/dex.yaml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/ecommerce/data/customers.csv +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/ecommerce/data/orders.csv +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/ecommerce/data/products.csv +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/ecommerce/dex.yaml +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/examples/movies.csv +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/scripts/localstack/init.sh +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/README.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/RELEASE_NOTES.md +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/_json.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/agents/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/agents/builtin.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/llm.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/memory/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/memory/base.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/memory/episodic.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/memory/long_term.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/observability/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/observability/audit.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/observability/cost.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/observability/metrics.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/retrieval/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/retrieval/builtin.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/retrieval/graph.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/routing/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/routing/anthropic.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/routing/guarded.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/routing/ollama.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/routing/openai.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/routing/router.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/runtime/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/runtime/checkpoint.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/runtime/executor.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/tools/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/tools/builtin.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/vectorstore.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/workflows/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/workflows/conditions.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/workflows/dag.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ai/workflows/human_loop.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/api/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/api/errors.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/api/pagination.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/api/schemas.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/cli/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/cli/main.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/cli/run.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/cli/secops.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/cli/train.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/config/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/config/defaults.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/core/exceptions.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/core/interfaces.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/core/quality.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/core/registry.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/core/schemas.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/core/validators.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/connectors/_utils.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/connectors/csv.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/connectors/dbt.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/connectors/duckdb.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/connectors/parquet.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/connectors/spark.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/pipeline/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/pipeline/run_history.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/profiler.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/quality/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/quality/spark.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/registry.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/data/transforms/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/engine.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/lakehouse/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/lakehouse/catalog.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/lakehouse/partitioning.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/lakehouse/storage.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/middleware/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/middleware/domain_metrics.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/middleware/logging_config.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/middleware/metrics.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/drift.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/features/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/features/builtin.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/metrics.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/mlflow_registry.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/registry.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/serving.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/serving_engine/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/serving_engine/builtin.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/tracking/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/tracking/builtin.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/ml/training.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/orchestration/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/orchestration/builtin.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/orchestration/scheduler.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/plugins/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/plugins/registry.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/py.typed +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/secops/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/secops/audit.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/secops/gate.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/secops/guard.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/secops/masking.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/secops/pii.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/store.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/warehouse/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/warehouse/lineage.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/warehouse/transforms.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/src/dataenginex/worker.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/conformance/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/conformance/test_connector.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/conformance/test_feature_store.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/conformance/test_tracker.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/conformance/test_transform.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/fixtures/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/fixtures/sample_data.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/fixtures/sample_jobs.csv +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/fixtures/sample_jobs.json +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/integration/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/integration/test_ai_integration.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/integration/test_cli_run.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/integration/test_config_cli.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/integration/test_lineage_integration.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/integration/test_ml_integration.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/integration/test_pipeline_e2e.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/integration/test_secops_integration.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/integration/test_storage_real.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/load/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/__init__.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_agent_runtime.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_ai_modules.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_api_pagination.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_api_schemas.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_api_validators.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_builtin_agent.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_builtin_feature_store.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_builtin_serving.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_builtin_tracker.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_cli_train.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_config_loader.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_config_schema.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_config_schema_extended.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_core_exceptions.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_core_interfaces.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_core_quality.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_core_registry.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_core_schemas_extended.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_csv_connector.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_dbt_connector.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_dex_engine.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_dex_store.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_domain_metrics.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_domain_metrics_wiring.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_drift_scheduler.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_duckdb_connector.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_guarded_provider.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_lakehouse.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_llm.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_llm_extended.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_llm_litellm_vllm.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_logging.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_medallion_extended.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_middleware_metrics.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_ml.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_parquet_connector.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_pipeline_dag.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_pipeline_runner.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_plugins.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_privacy_guard_wiring.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_quality_gates.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_quality_spark.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_retriever.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_retriever_graph.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_run_history.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_scheduler.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_secops.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_secops_engine_and_cli.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_secops_guard.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_spark_connector.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_spark_fixtures.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_sql_transforms.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_storage_abstraction.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_vectorstore.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_vectorstore_extended.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_warehouse.py +0 -0
- {dataenginex-0.4.1 → dataenginex-0.4.3}/tests/unit/test_warehouse_transforms.py +0 -0
|
@@ -19,7 +19,7 @@ jobs:
|
|
|
19
19
|
name: Code Quality
|
|
20
20
|
runs-on: ubuntu-latest
|
|
21
21
|
steps:
|
|
22
|
-
- uses: actions/checkout@
|
|
22
|
+
- uses: actions/checkout@v7
|
|
23
23
|
- uses: astral-sh/setup-uv@v8.2.0
|
|
24
24
|
with:
|
|
25
25
|
version: "latest"
|
|
@@ -34,7 +34,7 @@ jobs:
|
|
|
34
34
|
runs-on: ubuntu-latest
|
|
35
35
|
needs: quality
|
|
36
36
|
steps:
|
|
37
|
-
- uses: actions/checkout@
|
|
37
|
+
- uses: actions/checkout@v7
|
|
38
38
|
- uses: astral-sh/setup-uv@v8.2.0
|
|
39
39
|
with:
|
|
40
40
|
version: "latest"
|
|
@@ -58,7 +58,7 @@ jobs:
|
|
|
58
58
|
matrix:
|
|
59
59
|
python-version: ["3.11", "3.12"]
|
|
60
60
|
steps:
|
|
61
|
-
- uses: actions/checkout@
|
|
61
|
+
- uses: actions/checkout@v7
|
|
62
62
|
- uses: astral-sh/setup-uv@v8.2.0
|
|
63
63
|
with:
|
|
64
64
|
version: "latest"
|
|
@@ -23,7 +23,7 @@ jobs:
|
|
|
23
23
|
permissions:
|
|
24
24
|
contents: read
|
|
25
25
|
steps:
|
|
26
|
-
- uses: actions/checkout@
|
|
26
|
+
- uses: actions/checkout@v7
|
|
27
27
|
with:
|
|
28
28
|
ref: ${{ inputs.ref || github.ref }}
|
|
29
29
|
- uses: astral-sh/setup-uv@v8.2.0
|
|
@@ -58,7 +58,7 @@ jobs:
|
|
|
58
58
|
permissions:
|
|
59
59
|
contents: write
|
|
60
60
|
steps:
|
|
61
|
-
- uses: actions/checkout@
|
|
61
|
+
- uses: actions/checkout@v7
|
|
62
62
|
with:
|
|
63
63
|
ref: ${{ inputs.ref || github.ref }}
|
|
64
64
|
- uses: astral-sh/setup-uv@v8.2.0
|
|
@@ -7,6 +7,18 @@ All notable changes to `dataenginex` will be documented in this file.
|
|
|
7
7
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
8
8
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
9
9
|
|
|
10
|
+
## [0.4.2] - 2026-06-23
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Example scripts refreshed: `08_spark_ml.py`, `09_feature_engineering.py`, `10_model_analysis.py` — demonstrate PySpark ML, feature transforms, and drift detection
|
|
15
|
+
- `docs/release-notes.md` rewritten — cleaned up pre-reset 1.x entries, corrected version history to start from 0.3.5
|
|
16
|
+
- `docs/observability.md` trimmed to library-level content only; HTTP/K8s content moved to `dex-studio/docs/observability.md`
|
|
17
|
+
|
|
18
|
+
### Changed
|
|
19
|
+
|
|
20
|
+
- Version bumped to 0.4.2
|
|
21
|
+
|
|
10
22
|
## [0.4.1] - 2026-06-12
|
|
11
23
|
|
|
12
24
|
### Added
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataenginex
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: DataEngineX — open-source, self-hosted, local-first Data + ML + AI workbench library
|
|
5
5
|
Author-email: Jay <jayapal.myaka99@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -30,9 +30,11 @@ Requires-Dist: click>=8.3.3
|
|
|
30
30
|
Requires-Dist: croniter>=6.2.2
|
|
31
31
|
Requires-Dist: duckdb>=1.5.2
|
|
32
32
|
Requires-Dist: httpx>=0.28.1
|
|
33
|
+
Requires-Dist: msgpack>=1.2.1
|
|
33
34
|
Requires-Dist: orjson>=3.11.0
|
|
34
35
|
Requires-Dist: prometheus-client>=0.25.0
|
|
35
36
|
Requires-Dist: pyarrow>=23.0.1
|
|
37
|
+
Requires-Dist: pydantic-settings>=2.14.2
|
|
36
38
|
Requires-Dist: pydantic>=2.13.4
|
|
37
39
|
Requires-Dist: pyyaml>=6.0.3
|
|
38
40
|
Requires-Dist: structlog>=25.5.0
|
|
@@ -7,8 +7,6 @@
|
|
|
7
7
|
|
|
8
8
|
The Python library that powers [DEX Studio](https://github.com/TheDataEngineX/dex-studio) — an open-source, self-hosted, local-first Data + ML + AI workbench for individuals and small teams. **Use the library directly when you want code; install DEX Studio when you want a UI.**
|
|
9
9
|
|
|
10
|
-
> **Pre-1.0 status.** `0.4.0` is honest about it. See the [CHANGELOG scope-reset entry](CHANGELOG.md) for the rationale.
|
|
11
|
-
|
|
12
10
|
______________________________________________________________________
|
|
13
11
|
|
|
14
12
|
## Install
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# dataenginex.ai
|
|
2
|
+
|
|
3
|
+
LLM routing, agent runtimes, vector stores, memory, retrieval, observability, and workflow orchestration.
|
|
4
|
+
|
|
5
|
+
## Quick import
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from dataenginex.ai import (
|
|
9
|
+
ModelRouter, BaseProvider,
|
|
10
|
+
LLMProvider, LLMResponse,
|
|
11
|
+
BuiltinAgentRuntime,
|
|
12
|
+
BuiltinRetriever,
|
|
13
|
+
VectorStore,
|
|
14
|
+
SandboxConfig,
|
|
15
|
+
)
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
______________________________________________________________________
|
|
19
|
+
|
|
20
|
+
## LLM Interface
|
|
21
|
+
|
|
22
|
+
`dataenginex.ai.llm`
|
|
23
|
+
|
|
24
|
+
Unified LLM request/response interface across providers. Handles streaming, tool calls, retries, and token counting.
|
|
25
|
+
|
|
26
|
+
::: dataenginex.ai.llm
|
|
27
|
+
|
|
28
|
+
**Key classes:** `LLMProvider`, `LLMResponse`, `LLMMessage`, `ToolCall`
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from dataenginex.ai.llm import LLMMessage
|
|
32
|
+
|
|
33
|
+
response = provider.complete([
|
|
34
|
+
LLMMessage(role="user", content="Summarize this dataset."),
|
|
35
|
+
])
|
|
36
|
+
print(response.content)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
______________________________________________________________________
|
|
40
|
+
|
|
41
|
+
## Model Router
|
|
42
|
+
|
|
43
|
+
`dataenginex.ai.routing.router`
|
|
44
|
+
|
|
45
|
+
Routes LLM requests to the appropriate provider based on cost, latency, capability, and fallback rules.
|
|
46
|
+
|
|
47
|
+
::: dataenginex.ai.routing.router
|
|
48
|
+
|
|
49
|
+
**Key class:** `ModelRouter`
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from dataenginex.ai.routing.router import ModelRouter
|
|
53
|
+
|
|
54
|
+
router = ModelRouter.from_config(engine.config)
|
|
55
|
+
response = router.complete("Explain this error.", model_hint="fast")
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### Providers
|
|
59
|
+
|
|
60
|
+
`dataenginex.ai.routing.anthropic` · `dataenginex.ai.routing.openai` · `dataenginex.ai.routing.ollama` · `dataenginex.ai.routing.guarded`
|
|
61
|
+
|
|
62
|
+
::: dataenginex.ai.routing.anthropic
|
|
63
|
+
::: dataenginex.ai.routing.openai
|
|
64
|
+
::: dataenginex.ai.routing.ollama
|
|
65
|
+
::: dataenginex.ai.routing.guarded
|
|
66
|
+
|
|
67
|
+
______________________________________________________________________
|
|
68
|
+
|
|
69
|
+
## Agents
|
|
70
|
+
|
|
71
|
+
`dataenginex.ai.agents.builtin`
|
|
72
|
+
|
|
73
|
+
Built-in agent runtime — tool-use loop, memory injection, step tracing, and structured output parsing.
|
|
74
|
+
|
|
75
|
+
::: dataenginex.ai.agents.builtin
|
|
76
|
+
|
|
77
|
+
**Key class:** `BuiltinAgentRuntime`
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from dataenginex.ai.agents.builtin import BuiltinAgentRuntime
|
|
81
|
+
|
|
82
|
+
agent = BuiltinAgentRuntime(router=router, tools=[search_tool, sql_tool])
|
|
83
|
+
result = agent.run("Find the top 10 customers by revenue last quarter.")
|
|
84
|
+
print(result.output)
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
______________________________________________________________________
|
|
88
|
+
|
|
89
|
+
## Vector Store
|
|
90
|
+
|
|
91
|
+
`dataenginex.ai.vectorstore`
|
|
92
|
+
|
|
93
|
+
Embedding storage and similarity search. Defaults to in-process DuckDB VSS; swap for Qdrant via `dataenginex[qdrant]`.
|
|
94
|
+
|
|
95
|
+
::: dataenginex.ai.vectorstore
|
|
96
|
+
|
|
97
|
+
**Key class:** `VectorStore`
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from dataenginex.ai.vectorstore import VectorStore
|
|
101
|
+
|
|
102
|
+
store = VectorStore(db_path=".dex/store.duckdb")
|
|
103
|
+
store.upsert("doc-1", embedding=[0.1, 0.2, ...], metadata={"source": "wiki"})
|
|
104
|
+
results = store.search(query_embedding, top_k=5)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
______________________________________________________________________
|
|
108
|
+
|
|
109
|
+
## Memory
|
|
110
|
+
|
|
111
|
+
`dataenginex.ai.memory.base` — abstract memory interface
|
|
112
|
+
|
|
113
|
+
::: dataenginex.ai.memory.base
|
|
114
|
+
|
|
115
|
+
`dataenginex.ai.memory.episodic` — short-term conversation memory scoped to a single agent session
|
|
116
|
+
|
|
117
|
+
::: dataenginex.ai.memory.episodic
|
|
118
|
+
|
|
119
|
+
`dataenginex.ai.memory.long_term` — persistent memory backed by the vector store, survives across sessions
|
|
120
|
+
|
|
121
|
+
::: dataenginex.ai.memory.long_term
|
|
122
|
+
|
|
123
|
+
______________________________________________________________________
|
|
124
|
+
|
|
125
|
+
## Retrieval
|
|
126
|
+
|
|
127
|
+
`dataenginex.ai.retrieval.builtin` — RAG retriever: embeds query, searches vector store, returns ranked chunks
|
|
128
|
+
|
|
129
|
+
::: dataenginex.ai.retrieval.builtin
|
|
130
|
+
|
|
131
|
+
`dataenginex.ai.retrieval.graph` — graph-based retrieval for structured knowledge graphs
|
|
132
|
+
|
|
133
|
+
::: dataenginex.ai.retrieval.graph
|
|
134
|
+
|
|
135
|
+
______________________________________________________________________
|
|
136
|
+
|
|
137
|
+
## Runtime
|
|
138
|
+
|
|
139
|
+
`dataenginex.ai.runtime.executor` — async execution engine with concurrency, timeout, and step-level error handling
|
|
140
|
+
|
|
141
|
+
::: dataenginex.ai.runtime.executor
|
|
142
|
+
|
|
143
|
+
`dataenginex.ai.runtime.checkpoint` — saves and restores agent run state for long-running or resumable workflows
|
|
144
|
+
|
|
145
|
+
::: dataenginex.ai.runtime.checkpoint
|
|
146
|
+
|
|
147
|
+
`dataenginex.ai.runtime.sandbox` — isolated code execution sandbox for agent-generated Python with configurable resource limits
|
|
148
|
+
|
|
149
|
+
::: dataenginex.ai.runtime.sandbox
|
|
150
|
+
|
|
151
|
+
______________________________________________________________________
|
|
152
|
+
|
|
153
|
+
## Tools
|
|
154
|
+
|
|
155
|
+
`dataenginex.ai.tools.builtin`
|
|
156
|
+
|
|
157
|
+
Built-in agent tools: `sql_query`, `web_search`, `file_read`, `python_exec`, `vector_search`.
|
|
158
|
+
|
|
159
|
+
::: dataenginex.ai.tools.builtin
|
|
160
|
+
|
|
161
|
+
______________________________________________________________________
|
|
162
|
+
|
|
163
|
+
## Workflows
|
|
164
|
+
|
|
165
|
+
`dataenginex.ai.workflows.dag` — multi-step agent workflows as DAGs; steps branch, merge, and pass structured outputs
|
|
166
|
+
|
|
167
|
+
::: dataenginex.ai.workflows.dag
|
|
168
|
+
|
|
169
|
+
`dataenginex.ai.workflows.conditions` — conditional branching logic for DAG workflows
|
|
170
|
+
|
|
171
|
+
::: dataenginex.ai.workflows.conditions
|
|
172
|
+
|
|
173
|
+
`dataenginex.ai.workflows.human_loop` — pause a workflow at a step requiring human review or approval
|
|
174
|
+
|
|
175
|
+
::: dataenginex.ai.workflows.human_loop
|
|
176
|
+
|
|
177
|
+
______________________________________________________________________
|
|
178
|
+
|
|
179
|
+
## Observability
|
|
180
|
+
|
|
181
|
+
`dataenginex.ai.observability.audit` — logs every LLM request/response, tool call, and agent step for compliance
|
|
182
|
+
|
|
183
|
+
::: dataenginex.ai.observability.audit
|
|
184
|
+
|
|
185
|
+
`dataenginex.ai.observability.cost` — tracks token usage and estimated cost per provider, model, and agent run
|
|
186
|
+
|
|
187
|
+
::: dataenginex.ai.observability.cost
|
|
188
|
+
|
|
189
|
+
`dataenginex.ai.observability.metrics` — Prometheus metrics for LLM latency, token throughput, error rate
|
|
190
|
+
|
|
191
|
+
::: dataenginex.ai.observability.metrics
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# dataenginex.config
|
|
2
|
+
|
|
3
|
+
Config schema, loader, settings, and defaults. The `dex.yaml` file is parsed into a typed `DexConfig` object that drives the engine.
|
|
4
|
+
|
|
5
|
+
## Quick import
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from dataenginex.config import DexConfig, load_config, DexSettings
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
______________________________________________________________________
|
|
12
|
+
|
|
13
|
+
## Schema
|
|
14
|
+
|
|
15
|
+
`dataenginex.config.schema`
|
|
16
|
+
|
|
17
|
+
Pydantic models for `dex.yaml`. Covers pipelines, connectors, transforms, ML, AI, secops, and lakehouse config blocks.
|
|
18
|
+
|
|
19
|
+
::: dataenginex.config.schema
|
|
20
|
+
|
|
21
|
+
**Key model:** `DexConfig`
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from dataenginex.config.schema import DexConfig
|
|
25
|
+
|
|
26
|
+
config = DexConfig.model_validate({
|
|
27
|
+
"pipelines": {
|
|
28
|
+
"ingest_events": {
|
|
29
|
+
"source": {"type": "csv", "path": "data/raw/events.csv"},
|
|
30
|
+
"destination": {"type": "parquet", "path": "data/bronze/events"},
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
})
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
______________________________________________________________________
|
|
37
|
+
|
|
38
|
+
## Loader
|
|
39
|
+
|
|
40
|
+
`dataenginex.config.loader`
|
|
41
|
+
|
|
42
|
+
Loads and validates `dex.yaml` (or a custom path) with environment variable interpolation and secret resolution.
|
|
43
|
+
|
|
44
|
+
::: dataenginex.config.loader
|
|
45
|
+
|
|
46
|
+
**Key function:** `load_config`
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from dataenginex.config.loader import load_config
|
|
50
|
+
|
|
51
|
+
config = load_config("dex.yaml")
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
______________________________________________________________________
|
|
55
|
+
|
|
56
|
+
## Settings
|
|
57
|
+
|
|
58
|
+
`dataenginex.config.settings`
|
|
59
|
+
|
|
60
|
+
Runtime settings sourced from environment variables (`.env` or system env). Covers API keys, storage paths, log level, and feature flags.
|
|
61
|
+
|
|
62
|
+
::: dataenginex.config.settings
|
|
63
|
+
|
|
64
|
+
**Key class:** `DexSettings`
|
|
65
|
+
|
|
66
|
+
```python
|
|
67
|
+
from dataenginex.config.settings import DexSettings
|
|
68
|
+
|
|
69
|
+
settings = DexSettings()
|
|
70
|
+
print(settings.log_level) # "INFO"
|
|
71
|
+
print(settings.dex_store_path) # ".dex/store.duckdb"
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
See `.env.template` in the project root for all available settings.
|
|
75
|
+
|
|
76
|
+
______________________________________________________________________
|
|
77
|
+
|
|
78
|
+
## Defaults
|
|
79
|
+
|
|
80
|
+
`dataenginex.config.defaults`
|
|
81
|
+
|
|
82
|
+
Default values applied when config keys are omitted.
|
|
83
|
+
|
|
84
|
+
::: dataenginex.config.defaults
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
# dataenginex.core
|
|
2
|
+
|
|
3
|
+
Foundation layer — interfaces, medallion architecture, quality gates, registries, exceptions, and shared schemas.
|
|
4
|
+
|
|
5
|
+
## Quick import
|
|
6
|
+
|
|
7
|
+
```python
|
|
8
|
+
from dataenginex.core import (
|
|
9
|
+
MedallionConfig, Layer,
|
|
10
|
+
QualityGate, QualityCheck, QualityResult, Severity,
|
|
11
|
+
BaseConnector, BaseTransform, BaseRunner,
|
|
12
|
+
NotFoundError, ValidationError, PipelineError,
|
|
13
|
+
ComponentRegistry,
|
|
14
|
+
)
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
______________________________________________________________________
|
|
18
|
+
|
|
19
|
+
## Medallion Architecture
|
|
20
|
+
|
|
21
|
+
`dataenginex.core.medallion_architecture`
|
|
22
|
+
|
|
23
|
+
Defines the Bronze → Silver → Gold layer model and per-layer configuration.
|
|
24
|
+
|
|
25
|
+
::: dataenginex.core.medallion_architecture
|
|
26
|
+
|
|
27
|
+
**Key classes:** `MedallionConfig`, `Layer`, `LayerConfig`, `MedallionPipeline`
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from dataenginex.core.medallion_architecture import Layer, MedallionConfig, LayerConfig
|
|
31
|
+
|
|
32
|
+
cfg = MedallionConfig(
|
|
33
|
+
bronze=LayerConfig(path="data/bronze"),
|
|
34
|
+
silver=LayerConfig(path="data/silver", quality_threshold=0.95),
|
|
35
|
+
gold=LayerConfig(path="data/gold", quality_threshold=0.99),
|
|
36
|
+
)
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
______________________________________________________________________
|
|
40
|
+
|
|
41
|
+
## Quality Gates
|
|
42
|
+
|
|
43
|
+
`dataenginex.core.quality`
|
|
44
|
+
|
|
45
|
+
Declarative data quality checks that run at layer promotion boundaries.
|
|
46
|
+
|
|
47
|
+
::: dataenginex.core.quality
|
|
48
|
+
|
|
49
|
+
**Key classes:** `QualityCheck`, `QualityGate`, `QualityResult`, `Severity`
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from dataenginex.core.quality import QualityCheck, QualityGate, Severity
|
|
53
|
+
|
|
54
|
+
gate = QualityGate(
|
|
55
|
+
checks=[
|
|
56
|
+
QualityCheck(name="no_nulls", column="user_id", check_type="not_null"),
|
|
57
|
+
QualityCheck(
|
|
58
|
+
name="email_format",
|
|
59
|
+
column="email",
|
|
60
|
+
check_type="regex",
|
|
61
|
+
pattern=r"^[^@]+@[^@]+\.[^@]+$",
|
|
62
|
+
severity=Severity.ERROR,
|
|
63
|
+
),
|
|
64
|
+
]
|
|
65
|
+
)
|
|
66
|
+
result = gate.run(df)
|
|
67
|
+
assert result.passed
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
______________________________________________________________________
|
|
71
|
+
|
|
72
|
+
## Interfaces
|
|
73
|
+
|
|
74
|
+
`dataenginex.core.interfaces`
|
|
75
|
+
|
|
76
|
+
Abstract base classes for connectors, transforms, runners, and storage backends. Implement these to extend DEX with custom components.
|
|
77
|
+
|
|
78
|
+
::: dataenginex.core.interfaces
|
|
79
|
+
|
|
80
|
+
**Key classes:** `BaseConnector`, `BaseTransform`, `BaseRunner`, `BaseStorage`, `BaseProfiler`
|
|
81
|
+
|
|
82
|
+
______________________________________________________________________
|
|
83
|
+
|
|
84
|
+
## Registry
|
|
85
|
+
|
|
86
|
+
`dataenginex.core.registry`
|
|
87
|
+
|
|
88
|
+
Generic component registry used by connectors, transforms, models, and agents.
|
|
89
|
+
|
|
90
|
+
::: dataenginex.core.registry
|
|
91
|
+
|
|
92
|
+
**Key class:** `ComponentRegistry`
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
from dataenginex.core.registry import ComponentRegistry
|
|
96
|
+
|
|
97
|
+
registry: ComponentRegistry[MyPlugin] = ComponentRegistry()
|
|
98
|
+
registry.register("my_plugin", MyPlugin)
|
|
99
|
+
plugin = registry.get("my_plugin")
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
______________________________________________________________________
|
|
103
|
+
|
|
104
|
+
## Schemas
|
|
105
|
+
|
|
106
|
+
`dataenginex.core.schemas`
|
|
107
|
+
|
|
108
|
+
Shared Pydantic response models used across the engine, CLI, and API layers.
|
|
109
|
+
|
|
110
|
+
::: dataenginex.core.schemas
|
|
111
|
+
|
|
112
|
+
**Key models:** `HealthResponse`, `RootResponse`, `EchoRequest`, `EchoResponse`, `ErrorResponse`, `ComponentStatus`
|
|
113
|
+
|
|
114
|
+
______________________________________________________________________
|
|
115
|
+
|
|
116
|
+
## Validators
|
|
117
|
+
|
|
118
|
+
`dataenginex.core.validators`
|
|
119
|
+
|
|
120
|
+
Reusable Pydantic validators and field helpers for config schema enforcement.
|
|
121
|
+
|
|
122
|
+
::: dataenginex.core.validators
|
|
123
|
+
|
|
124
|
+
______________________________________________________________________
|
|
125
|
+
|
|
126
|
+
## Exceptions
|
|
127
|
+
|
|
128
|
+
`dataenginex.core.exceptions`
|
|
129
|
+
|
|
130
|
+
Typed exception hierarchy for pipeline, validation, and resource errors.
|
|
131
|
+
|
|
132
|
+
::: dataenginex.core.exceptions
|
|
133
|
+
|
|
134
|
+
**Key exceptions:** `DexError`, `NotFoundError`, `ValidationError`, `PipelineError`, `RegistryError`, `ConfigError`
|
|
135
|
+
|
|
136
|
+
```python
|
|
137
|
+
from dataenginex.core.exceptions import NotFoundError, PipelineError
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
engine.run_pipeline("missing")
|
|
141
|
+
except NotFoundError as e:
|
|
142
|
+
print(e.detail)
|
|
143
|
+
```
|