dataenginex 0.4.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/PULL_REQUEST_TEMPLATE.md +1 -1
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/workflows/ci.yml +5 -5
- dataenginex-0.4.1/.github/workflows/docs-sync.yml +26 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/workflows/release.yml +21 -5
- dataenginex-0.4.1/CHANGELOG.md +75 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/CONTRIBUTING.md +1 -1
- {dataenginex-0.4.0 → dataenginex-0.4.1}/PKG-INFO +10 -5
- {dataenginex-0.4.0 → dataenginex-0.4.1}/README.md +3 -3
- {dataenginex-0.4.0 → dataenginex-0.4.1}/SECURITY.md +5 -3
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/adr/0001-medallion-architecture.md +2 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/api-reference/index.md +0 -1
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/ci-cd.md +1 -1
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/contributing.md +1 -1
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/development.md +2 -2
- dataenginex-0.4.1/docs/index.md +26 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/observability.md +4 -2
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/quickstart.md +2 -2
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/sdlc.md +1 -1
- dataenginex-0.4.1/poe_tasks.toml +48 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/pyproject.toml +35 -53
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/README.md +3 -3
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/RELEASE_NOTES.md +2 -0
- dataenginex-0.4.1/src/dataenginex/_json.py +27 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/agents/builtin.py +5 -6
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/memory/long_term.py +3 -3
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/runtime/executor.py +3 -3
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/tools/builtin.py +7 -2
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/api/pagination.py +115 -114
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/config/loader.py +10 -2
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/core/interfaces.py +9 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/connectors/__init__.py +1 -0
- dataenginex-0.4.1/src/dataenginex/data/connectors/delta.py +161 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/connectors/legacy.py +4 -3
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/connectors/parquet.py +4 -1
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/pipeline/run_history.py +4 -4
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/pipeline/runner.py +82 -2
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/registry.py +4 -3
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/engine.py +43 -14
- dataenginex-0.4.1/src/dataenginex/lakehouse/catalog.py +212 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/lakehouse/storage.py +879 -722
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/__init__.py +6 -4
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/features/builtin.py +16 -0
- dataenginex-0.4.1/src/dataenginex/ml/registry.py +293 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/tracking/builtin.py +5 -5
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/training.py +236 -2
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/secops/audit.py +95 -52
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/store.py +265 -218
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/warehouse/lineage.py +7 -6
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_dex_engine.py +5 -2
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_dex_store.py +8 -5
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_ml.py +189 -2
- {dataenginex-0.4.0 → dataenginex-0.4.1}/uv.lock +666 -840
- dataenginex-0.4.0/CHANGELOG.md +0 -572
- dataenginex-0.4.0/docs/api-reference/dashboard.md +0 -8
- dataenginex-0.4.0/docs/index.md +0 -10
- dataenginex-0.4.0/poe_tasks.toml +0 -162
- dataenginex-0.4.0/src/dataenginex/lakehouse/catalog.py +0 -172
- dataenginex-0.4.0/src/dataenginex/ml/registry.py +0 -187
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.claude/commands/new-feature.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.claude/commands/validate.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.claude/settings.json +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.env.template +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/dependabot.yml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/labels.yml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/release-pr-template.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/workflows/auto-pr.yml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/workflows/enforce-dev-to-main.yml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.github/workflows/security.yml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.gitignore +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.gitleaks.toml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.pre-commit-config.yaml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/.python-version +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/CLAUDE.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/CODEOWNERS +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/CODE_OF_CONDUCT.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/Dockerfile +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/LICENSE +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docker-compose.test.yml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/adr/0000-template.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/api-reference/api.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/api-reference/core.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/api-reference/data.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/api-reference/lakehouse.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/api-reference/middleware.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/api-reference/ml.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/api-reference/plugins.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/api-reference/warehouse.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/architecture.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/release-notes.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/docs/security-scanning.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/01_hello_pipeline.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/02_api_quickstart.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/03_quality_gate.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/04_ml_training.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/05_rag_demo.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/06_llm_quickstart.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/07_api_ingestion.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/08_spark_ml.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/09_feature_engineering.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/10_model_analysis.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/GUIDE.md +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/dashboard/dashboard_config.yaml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/dashboard/run_dashboard.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/data/events.csv +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/data/users.csv +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/dex.yaml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/ecommerce/data/customers.csv +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/ecommerce/data/orders.csv +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/ecommerce/data/products.csv +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/ecommerce/dex.yaml +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/ecommerce/run_all.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/examples/movies.csv +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/scripts/localstack/init.sh +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/agents/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/llm.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/memory/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/memory/base.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/memory/episodic.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/observability/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/observability/audit.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/observability/cost.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/observability/metrics.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/retrieval/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/retrieval/builtin.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/retrieval/graph.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/routing/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/routing/anthropic.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/routing/guarded.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/routing/ollama.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/routing/openai.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/routing/router.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/runtime/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/runtime/checkpoint.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/runtime/sandbox.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/tools/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/vectorstore.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/workflows/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/workflows/conditions.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/workflows/dag.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ai/workflows/human_loop.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/api/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/api/errors.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/api/schemas.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/cli/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/cli/main.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/cli/run.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/cli/secops.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/cli/train.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/config/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/config/defaults.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/config/schema.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/core/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/core/exceptions.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/core/medallion_architecture.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/core/quality.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/core/registry.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/core/schemas.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/core/validators.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/connectors/_utils.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/connectors/csv.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/connectors/dbt.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/connectors/duckdb.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/connectors/spark.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/pipeline/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/pipeline/dag.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/profiler.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/quality/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/quality/gates.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/quality/spark.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/transforms/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/data/transforms/sql.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/lakehouse/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/lakehouse/partitioning.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/middleware/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/middleware/domain_metrics.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/middleware/logging_config.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/middleware/metrics.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/drift.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/features/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/metrics.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/mlflow_registry.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/serving.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/serving_engine/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/serving_engine/builtin.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/ml/tracking/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/orchestration/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/orchestration/builtin.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/orchestration/scheduler.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/plugins/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/plugins/registry.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/py.typed +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/secops/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/secops/gate.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/secops/guard.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/secops/masking.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/secops/pii.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/warehouse/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/warehouse/transforms.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/src/dataenginex/worker.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/conformance/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/conformance/test_connector.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/conformance/test_feature_store.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/conformance/test_tracker.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/conformance/test_transform.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/conftest.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/fixtures/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/fixtures/sample_data.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/fixtures/sample_jobs.csv +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/fixtures/sample_jobs.json +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/integration/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/integration/test_ai_integration.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/integration/test_cli_run.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/integration/test_config_cli.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/integration/test_lineage_integration.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/integration/test_ml_integration.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/integration/test_pipeline_e2e.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/integration/test_secops_integration.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/integration/test_storage_real.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/load/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/__init__.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_agent_runtime.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_ai_modules.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_api_pagination.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_api_schemas.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_api_validators.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_builtin_agent.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_builtin_feature_store.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_builtin_serving.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_builtin_tracker.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_cli_train.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_config_loader.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_config_schema.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_config_schema_extended.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_core_exceptions.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_core_interfaces.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_core_quality.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_core_registry.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_core_schemas_extended.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_csv_connector.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_data.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_dbt_connector.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_domain_metrics.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_domain_metrics_wiring.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_drift_scheduler.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_duckdb_connector.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_guarded_provider.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_lakehouse.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_llm.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_llm_extended.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_llm_litellm_vllm.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_logging.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_medallion.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_medallion_extended.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_middleware_metrics.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_parquet_connector.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_pipeline_dag.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_pipeline_runner.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_plugins.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_privacy_guard_wiring.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_quality_gates.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_quality_spark.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_retriever.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_retriever_graph.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_run_history.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_scheduler.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_secops.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_secops_engine_and_cli.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_secops_guard.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_spark_connector.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_spark_fixtures.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_sql_transforms.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_storage_abstraction.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_vectorstore.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_vectorstore_extended.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_warehouse.py +0 -0
- {dataenginex-0.4.0 → dataenginex-0.4.1}/tests/unit/test_warehouse_transforms.py +0 -0
|
@@ -70,6 +70,6 @@ chore: establish org/domain foundation (pages, labels, project automation)
|
|
|
70
70
|
- [ ] Org/Repo variable set: `ORG_PROJECT_URL`
|
|
71
71
|
- [ ] Org/Repo secret set: `ORG_PROJECT_TOKEN`
|
|
72
72
|
- [ ] Cloudflare DNS updated for docs/api/apex domains
|
|
73
|
-
- [ ] Post-cutover checks completed
|
|
73
|
+
- [ ] Post-cutover checks completed
|
|
74
74
|
|
|
75
75
|
## Notes for Reviewers
|
|
@@ -9,7 +9,7 @@ on:
|
|
|
9
9
|
|
|
10
10
|
schedule:
|
|
11
11
|
# Weekly Python version compatibility check
|
|
12
|
-
- cron:
|
|
12
|
+
- cron: "0 0 * * 0"
|
|
13
13
|
|
|
14
14
|
permissions:
|
|
15
15
|
contents: read
|
|
@@ -20,7 +20,7 @@ jobs:
|
|
|
20
20
|
runs-on: ubuntu-latest
|
|
21
21
|
steps:
|
|
22
22
|
- uses: actions/checkout@v6
|
|
23
|
-
- uses: astral-sh/setup-uv@v8.
|
|
23
|
+
- uses: astral-sh/setup-uv@v8.2.0
|
|
24
24
|
with:
|
|
25
25
|
version: "latest"
|
|
26
26
|
python-version: "3.13"
|
|
@@ -35,7 +35,7 @@ jobs:
|
|
|
35
35
|
needs: quality
|
|
36
36
|
steps:
|
|
37
37
|
- uses: actions/checkout@v6
|
|
38
|
-
- uses: astral-sh/setup-uv@v8.
|
|
38
|
+
- uses: astral-sh/setup-uv@v8.2.0
|
|
39
39
|
with:
|
|
40
40
|
version: "latest"
|
|
41
41
|
python-version: "3.13"
|
|
@@ -43,7 +43,7 @@ jobs:
|
|
|
43
43
|
env:
|
|
44
44
|
UV_PROJECT_ENVIRONMENT: .venv
|
|
45
45
|
- run: uv run poe test-cov-core
|
|
46
|
-
- uses: codecov/codecov-action@
|
|
46
|
+
- uses: codecov/codecov-action@v7
|
|
47
47
|
with:
|
|
48
48
|
flags: dataenginex
|
|
49
49
|
fail_ci_if_error: false
|
|
@@ -59,7 +59,7 @@ jobs:
|
|
|
59
59
|
python-version: ["3.11", "3.12"]
|
|
60
60
|
steps:
|
|
61
61
|
- uses: actions/checkout@v6
|
|
62
|
-
- uses: astral-sh/setup-uv@v8.
|
|
62
|
+
- uses: astral-sh/setup-uv@v8.2.0
|
|
63
63
|
with:
|
|
64
64
|
version: "latest"
|
|
65
65
|
python-version: ${{ matrix.python-version }}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
name: Trigger Docs Sync
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
version:
|
|
10
|
+
description: "Version tag to sync to website"
|
|
11
|
+
required: false
|
|
12
|
+
default: ""
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
dispatch:
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
steps:
|
|
18
|
+
- name: Send repository_dispatch to website
|
|
19
|
+
uses: peter-evans/repository-dispatch@v4
|
|
20
|
+
with:
|
|
21
|
+
token: ${{ secrets.DOCS_SYNC_PAT }}
|
|
22
|
+
repository: TheDataEngineX/website
|
|
23
|
+
event-type: sync-docs
|
|
24
|
+
client-payload: >-
|
|
25
|
+
{"repo": "dataenginex",
|
|
26
|
+
"version": "${{ github.ref_name || inputs.version }}"}
|
|
@@ -3,7 +3,18 @@ name: Release
|
|
|
3
3
|
on:
|
|
4
4
|
push:
|
|
5
5
|
tags:
|
|
6
|
-
-
|
|
6
|
+
- "v[0-9]+.[0-9]+.[0-9]+"
|
|
7
|
+
workflow_dispatch:
|
|
8
|
+
inputs:
|
|
9
|
+
tag:
|
|
10
|
+
description: "Release tag (e.g. v0.4.1)"
|
|
11
|
+
required: true
|
|
12
|
+
type: string
|
|
13
|
+
ref:
|
|
14
|
+
description: "Git ref to build from (branch or SHA)"
|
|
15
|
+
required: false
|
|
16
|
+
default: "main"
|
|
17
|
+
type: string
|
|
7
18
|
|
|
8
19
|
jobs:
|
|
9
20
|
build:
|
|
@@ -13,7 +24,9 @@ jobs:
|
|
|
13
24
|
contents: read
|
|
14
25
|
steps:
|
|
15
26
|
- uses: actions/checkout@v6
|
|
16
|
-
|
|
27
|
+
with:
|
|
28
|
+
ref: ${{ inputs.ref || github.ref }}
|
|
29
|
+
- uses: astral-sh/setup-uv@v8.2.0
|
|
17
30
|
with:
|
|
18
31
|
version: "latest"
|
|
19
32
|
python-version: "3.13"
|
|
@@ -46,7 +59,9 @@ jobs:
|
|
|
46
59
|
contents: write
|
|
47
60
|
steps:
|
|
48
61
|
- uses: actions/checkout@v6
|
|
49
|
-
|
|
62
|
+
with:
|
|
63
|
+
ref: ${{ inputs.ref || github.ref }}
|
|
64
|
+
- uses: astral-sh/setup-uv@v8.2.0
|
|
50
65
|
with:
|
|
51
66
|
version: "latest"
|
|
52
67
|
- name: Extract version
|
|
@@ -62,8 +77,9 @@ jobs:
|
|
|
62
77
|
- name: Create GitHub release + attach SBOM
|
|
63
78
|
env:
|
|
64
79
|
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
80
|
+
RELEASE_TAG: ${{ inputs.tag || github.ref_name }}
|
|
65
81
|
run: |
|
|
66
|
-
gh release create $
|
|
67
|
-
--title "DEX $
|
|
82
|
+
gh release create "$RELEASE_TAG" \
|
|
83
|
+
--title "DEX $RELEASE_TAG" \
|
|
68
84
|
--generate-notes \
|
|
69
85
|
"sbom-dex-${{ steps.version.outputs.version }}.json"
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
See [docs/release-notes.md](docs/release-notes.md) for the complete release history.
|
|
4
|
+
|
|
5
|
+
All notable changes to `dataenginex` will be documented in this file.
|
|
6
|
+
|
|
7
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
8
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
9
|
+
|
|
10
|
+
## [0.4.1] - 2026-06-12
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- `dataenginex._json` — drop-in `orjson`-backed JSON shim (`dumps`, `loads`, `JSONResponse`) replacing stdlib `json` across the library for ~3–5× serialization throughput
|
|
15
|
+
- `DeltaConnector` — native Delta Lake read/write via `deltalake` (new `delta` optional extra: `pip install "dataenginex[delta]"`)
|
|
16
|
+
- `ml.features.builtin` — built-in feature transformers: `StandardScalerTransform`, `MinMaxScalerTransform`, `OneHotEncoderTransform`, `PolynomialFeaturesTransform`
|
|
17
|
+
- `core.interfaces` — new `Closeable` and `AsyncCloseable` protocols for uniform resource lifecycle
|
|
18
|
+
- `orjson>=3.11.0` and `zstandard>=0.25.0` promoted to core runtime dependencies
|
|
19
|
+
|
|
20
|
+
### Changed
|
|
21
|
+
|
|
22
|
+
- **Lakehouse storage** (`lakehouse/storage.py`) — full rewrite: unified `LakehouseStorage` with pluggable backends (local, S3, GCS), Zstandard compression throughout, columnar partition pruning
|
|
23
|
+
- **Lakehouse catalog** (`lakehouse/catalog.py`) — catalog entries now carry partition stats and schema fingerprints; `register` / `resolve` API stabilised
|
|
24
|
+
- **ML registry** (`ml/registry.py`) — artifact versioning with aliasing (`promote_alias`), stage transitions (`development` → `staging` → `production`), and metadata search
|
|
25
|
+
- **ML training** (`ml/training.py`) — `TrainingJob` lifecycle management, early-stopping callbacks, cross-validation harness, experiment comparison utilities
|
|
26
|
+
- **Pagination** (`api/pagination.py`) — cursor-based and page-number strategies unified under `PaginationResult`; `paginate_query` helper works with any iterable
|
|
27
|
+
- **Store** (`store.py`) — async-safe DuckDB connection pool, `get_pipeline_runs` and `list_model_artifacts` now return typed dataclasses
|
|
28
|
+
- **SecOps audit** (`secops/audit.py`) — structured audit events with severity levels, retention policy enforcement, export to JSONL
|
|
29
|
+
- **AI runtime** (`ai/runtime/executor.py`) — tool call concurrency limit, timeout per tool, structured error envelopes
|
|
30
|
+
- **Config loader** (`config/loader.py`) — environment variable interpolation (`${VAR}`) and `include:` directive for config composition
|
|
31
|
+
- `zstandard` used for pipeline run history compression reducing on-disk footprint by ~60%
|
|
32
|
+
|
|
33
|
+
### Fixed
|
|
34
|
+
|
|
35
|
+
- `mypy --strict` passes cleanly across all modules after strict type annotation pass
|
|
36
|
+
- `DeltaConnector` and `LakehouseStorage` excluded from coverage thresholds (require live filesystems); coverage gate unchanged for all other modules
|
|
37
|
+
|
|
38
|
+
## [0.4.0] - 2026-02-21
|
|
39
|
+
|
|
40
|
+
> **Scope reset from 1.x.** Versions 1.0.0–1.1.2 were prematurely tagged stable. Resetting to `0.4.0` to honestly reflect pre-1.0 maturity. See [ADR-0007](https://github.com/TheDataEngineX/docs/blob/main/adr/0007-local-first-scope-reset.md) for rationale. The 1.x versions on PyPI are yanked but remain installable by exact pin (`pip install 'dataenginex==1.1.2'`); plain `pip install dataenginex` now resolves to `0.4.0`.
|
|
41
|
+
|
|
42
|
+
### Added
|
|
43
|
+
|
|
44
|
+
- Stable `__all__` exports in every subpackage `__init__.py`
|
|
45
|
+
- `from __future__ import annotations` in all public modules
|
|
46
|
+
- Comprehensive module-level docstrings with usage examples
|
|
47
|
+
- New public API exports: `ComponentHealth`, `AuthMiddleware`, `AuthUser`,
|
|
48
|
+
`create_token`, `decode_token`, `BadRequestError`, `NotFoundError`,
|
|
49
|
+
`PaginationMeta`, `RateLimiter`, `RateLimitMiddleware`,
|
|
50
|
+
`ConnectorStatus`, `FetchResult`, `ColumnProfile`, `get_logger`, `get_tracer`
|
|
51
|
+
|
|
52
|
+
### Changed
|
|
53
|
+
|
|
54
|
+
- Reorganized `__all__` in all subpackages for logical grouping
|
|
55
|
+
- Updated package version to 0.4.0
|
|
56
|
+
|
|
57
|
+
## [0.3.5] - 2026-02-13
|
|
58
|
+
|
|
59
|
+
### Added
|
|
60
|
+
|
|
61
|
+
- Production hardening: structured logging, Prometheus/OTel, health probes
|
|
62
|
+
- Data connectors: `RestConnector`, `FileConnector` with async interface
|
|
63
|
+
- Schema registry with versioned schema management
|
|
64
|
+
- Data profiler with automated dataset statistics
|
|
65
|
+
- Lakehouse catalog, partitioning, and storage backends
|
|
66
|
+
- ML framework: trainer, model registry, drift detection, serving
|
|
67
|
+
- Warehouse transforms and persistent lineage tracking
|
|
68
|
+
- JWT authentication middleware
|
|
69
|
+
- Rate limiting middleware
|
|
70
|
+
- Cursor-based pagination utilities
|
|
71
|
+
- Versioned API router (`/api/v1/`)
|
|
72
|
+
|
|
73
|
+
[0.3.5]: https://github.com/TheDataEngineX/dataenginex/releases/tag/v0.3.5
|
|
74
|
+
[0.4.0]: https://github.com/TheDataEngineX/dataenginex/compare/v0.3.5...v0.4.0
|
|
75
|
+
[0.4.1]: https://github.com/TheDataEngineX/dataenginex/compare/v0.4.0...v0.4.1
|
|
@@ -7,7 +7,7 @@ Thank you for your interest in contributing to DataEngineX!
|
|
|
7
7
|
Quick essentials:
|
|
8
8
|
|
|
9
9
|
- Development setup: [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md)
|
|
10
|
-
- Governance & legal: [
|
|
10
|
+
- Governance & legal: See [docs/CONTRIBUTING.md](docs/CONTRIBUTING.md)
|
|
11
11
|
- Code of Conduct: [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md)
|
|
12
12
|
|
|
13
13
|
## Quick Start
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dataenginex
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: DataEngineX — open-source, self-hosted, local-first Data + ML + AI workbench library
|
|
5
5
|
Author-email: Jay <jayapal.myaka99@gmail.com>
|
|
6
6
|
License: MIT License
|
|
@@ -30,18 +30,23 @@ Requires-Dist: click>=8.3.3
|
|
|
30
30
|
Requires-Dist: croniter>=6.2.2
|
|
31
31
|
Requires-Dist: duckdb>=1.5.2
|
|
32
32
|
Requires-Dist: httpx>=0.28.1
|
|
33
|
+
Requires-Dist: orjson>=3.11.0
|
|
33
34
|
Requires-Dist: prometheus-client>=0.25.0
|
|
34
35
|
Requires-Dist: pyarrow>=23.0.1
|
|
35
36
|
Requires-Dist: pydantic>=2.13.4
|
|
36
|
-
Requires-Dist: python-dotenv>=1.2.2
|
|
37
37
|
Requires-Dist: pyyaml>=6.0.3
|
|
38
38
|
Requires-Dist: structlog>=25.5.0
|
|
39
|
+
Requires-Dist: zstandard>=0.25.0
|
|
39
40
|
Provides-Extra: cloud
|
|
40
41
|
Requires-Dist: boto3>=1.43.7; extra == 'cloud'
|
|
41
42
|
Requires-Dist: google-cloud-bigquery>=3.41.0; extra == 'cloud'
|
|
42
43
|
Requires-Dist: google-cloud-storage>=3.10.1; extra == 'cloud'
|
|
44
|
+
Provides-Extra: delta
|
|
45
|
+
Requires-Dist: deltalake>=0.24.0; extra == 'delta'
|
|
43
46
|
Provides-Extra: postgres
|
|
44
47
|
Requires-Dist: asyncpg>=0.31.0; extra == 'postgres'
|
|
48
|
+
Provides-Extra: pytorch
|
|
49
|
+
Requires-Dist: torch>=2.0.0; extra == 'pytorch'
|
|
45
50
|
Provides-Extra: qdrant
|
|
46
51
|
Requires-Dist: qdrant-client>=1.18.0; extra == 'qdrant'
|
|
47
52
|
Provides-Extra: queue
|
|
@@ -52,7 +57,7 @@ Description-Content-Type: text/markdown
|
|
|
52
57
|
|
|
53
58
|
The Python library that powers [DEX Studio](https://github.com/TheDataEngineX/dex-studio) — an open-source, self-hosted, local-first Data + ML + AI workbench. Use the library directly when you want code, not a UI.
|
|
54
59
|
|
|
55
|
-
> **Pre-1.0 status.** `0.4.0` is honest about that. See the [scope reset CHANGELOG](https://github.com/TheDataEngineX/
|
|
60
|
+
> **Pre-1.0 status.** `0.4.0` is honest about that. See the [scope reset CHANGELOG](https://github.com/TheDataEngineX/dataenginex/blob/main/CHANGELOG.md) for the rationale.
|
|
56
61
|
|
|
57
62
|
## Install
|
|
58
63
|
|
|
@@ -148,9 +153,9 @@ DEX Studio imports `dataenginex` directly — no separate API server.
|
|
|
148
153
|
|
|
149
154
|
## Links
|
|
150
155
|
|
|
151
|
-
- Source: [github.com/TheDataEngineX/
|
|
156
|
+
- Source: [github.com/TheDataEngineX/dataenginex](https://github.com/TheDataEngineX/dataenginex)
|
|
152
157
|
- Docs: [docs.thedataenginex.org](https://docs.thedataenginex.org)
|
|
153
158
|
- Roadmap: [docs/docs/roadmap/DESIGN-2026.md](https://github.com/TheDataEngineX/docs/blob/main/docs/roadmap/DESIGN-2026.md)
|
|
154
159
|
- ADRs: [docs/adr/](https://github.com/TheDataEngineX/docs/tree/main/adr)
|
|
155
|
-
- Issues: [github.com/TheDataEngineX/
|
|
160
|
+
- Issues: [github.com/TheDataEngineX/dataenginex/issues](https://github.com/TheDataEngineX/dataenginex/issues)
|
|
156
161
|
- License: MIT
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# dataenginex
|
|
2
2
|
|
|
3
|
-
[](https://github.com/TheDataEngineX/dataenginex/actions/workflows/ci.yml)
|
|
4
4
|
[](https://pypi.org/project/dataenginex/)
|
|
5
5
|
[](https://www.python.org/downloads/)
|
|
6
6
|
[](LICENSE)
|
|
@@ -96,7 +96,7 @@ ______________________________________________________________________
|
|
|
96
96
|
## Development
|
|
97
97
|
|
|
98
98
|
```bash
|
|
99
|
-
git clone https://github.com/TheDataEngineX/
|
|
99
|
+
git clone https://github.com/TheDataEngineX/dataenginex && cd dataenginex
|
|
100
100
|
uv sync
|
|
101
101
|
uv run poe check-all # lint + typecheck + tests
|
|
102
102
|
uv run poe test-cov # tests + coverage
|
|
@@ -126,7 +126,7 @@ ______________________________________________________________________
|
|
|
126
126
|
|
|
127
127
|
| Repo | Purpose |
|
|
128
128
|
| --- | --- |
|
|
129
|
-
| [dataenginex](https://github.com/TheDataEngineX/
|
|
129
|
+
| [dataenginex](https://github.com/TheDataEngineX/dataenginex) | This library (PyPI) |
|
|
130
130
|
| [dex-studio](https://github.com/TheDataEngineX/dex-studio) | Web UI — FastAPI + Jinja2 + HTMX |
|
|
131
131
|
| [docs](https://github.com/TheDataEngineX/docs) | Docs site ([docs.thedataenginex.org](https://docs.thedataenginex.org)) — ADRs + roadmap live here |
|
|
132
132
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
| Version | Supported |
|
|
6
6
|
|---------|-----------|
|
|
7
|
-
| Latest
|
|
7
|
+
| Latest (0.4.x) | ✅ |
|
|
8
8
|
| Previous minor release | ✅ (security fixes only) |
|
|
9
9
|
| Older versions | ❌ |
|
|
10
10
|
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
Instead, please report them via one of these channels:
|
|
16
16
|
|
|
17
17
|
1. **Email**: security@thedataenginex.dev
|
|
18
|
-
|
|
18
|
+
1. **GitHub Security Advisories**: Use the "Report a vulnerability" button on the Security tab
|
|
19
19
|
|
|
20
20
|
### What to Include
|
|
21
21
|
|
|
@@ -53,6 +53,8 @@ DataEngineX follows these security practices:
|
|
|
53
53
|
|
|
54
54
|
## Security-Related Dependencies
|
|
55
55
|
|
|
56
|
+
*This table is illustrative, not exhaustive.*
|
|
57
|
+
|
|
56
58
|
| Dependency | Purpose | Security Note |
|
|
57
59
|
|------------|---------|---------------|
|
|
58
60
|
| pydantic | Config validation | Validates all inputs |
|
|
@@ -68,4 +70,4 @@ Run security audits locally:
|
|
|
68
70
|
uv run poe security # pip-audit for vulnerabilities
|
|
69
71
|
```
|
|
70
72
|
|
|
71
|
-
CI runs `pip-audit` and dependency scanning on every PR.
|
|
73
|
+
CI runs `pip-audit` and dependency scanning on every PR.
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
**Date**: 2026-02-15
|
|
5
5
|
**Authors**: Data Engineering Team
|
|
6
6
|
|
|
7
|
+
> **Note:** Implementation details (tooling, storage) have evolved since this ADR was written. See [docs/architecture.md](docs/architecture.md) for the current design. References to ADR-0002, ADR-0003, and ADR-0004 are placeholders for decisions that were planned but not written.
|
|
8
|
+
|
|
7
9
|
## Context
|
|
8
10
|
|
|
9
11
|
DEX needs a scalable, standardized data architecture that works across all projects (CareerDEX, Weather, etc.). The data pipeline needs to handle:
|
|
@@ -11,5 +11,4 @@ Auto-generated API documentation for the `dataenginex` package.
|
|
|
11
11
|
| [middleware](middleware.md) | Logging, metrics, tracing, request middleware |
|
|
12
12
|
| [ml](ml.md) | ML training, model registry, drift detection, serving, vectorstore, LLM adapters |
|
|
13
13
|
| [plugins](plugins.md) | Plugin system — ABC, registry, auto-discovery |
|
|
14
|
-
| [dashboard](dashboard.md) | Streamlit dashboard framework — panels, config |
|
|
15
14
|
| [warehouse](warehouse.md) | Transforms, persistent lineage tracking |
|
|
@@ -124,7 +124,7 @@ ______________________________________________________________________
|
|
|
124
124
|
|
|
125
125
|
## Release Automation
|
|
126
126
|
|
|
127
|
-
**Workflow**: [`.github/workflows/release.yml`](https://github.com/TheDataEngineX/
|
|
127
|
+
**Workflow**: [`.github/workflows/release.yml`](https://github.com/TheDataEngineX/dataenginex/blob/main/.github/workflows/release.yml)
|
|
128
128
|
|
|
129
129
|
**Trigger**: Push a tag matching `v[0-9]+.[0-9]+.[0-9]+` to `main`
|
|
130
130
|
|
|
@@ -69,7 +69,7 @@ Use semantic commit format:
|
|
|
69
69
|
- This project is open source under MIT; keep license and attribution notices in redistributions.
|
|
70
70
|
- Forks are welcome, but should use a distinct public name when redistributed as a separate project.
|
|
71
71
|
- Do not present a fork as the official DataEngineX/DEX project.
|
|
72
|
-
- See
|
|
72
|
+
- See the project's license and brand guidelines for brand-usage details.
|
|
73
73
|
|
|
74
74
|
## Code Reviews
|
|
75
75
|
|
|
@@ -37,7 +37,7 @@ This installs all Python dependencies and configures pre-commit hooks.
|
|
|
37
37
|
|
|
38
38
|
```bash
|
|
39
39
|
# 1. Clone repo and create feature branch
|
|
40
|
-
git clone https://github.com/TheDataEngineX/
|
|
40
|
+
git clone https://github.com/TheDataEngineX/dataenginex.git
|
|
41
41
|
cd dex
|
|
42
42
|
git checkout -b feat/issue-XXX-description dev
|
|
43
43
|
|
|
@@ -208,5 +208,5 @@ uv run poe clean # Remove caches and build artifacts
|
|
|
208
208
|
- **Architecture**: See [architecture.md](./architecture.md)
|
|
209
209
|
- **ADRs**: See [ADR-0001](./adr/0001-medallion-architecture.md) for architectural decisions
|
|
210
210
|
- **Deployment**: See Deployment Runbook in the `infradex` repo
|
|
211
|
-
- **Issues**: [GitHub Issues](https://github.com/TheDataEngineX/
|
|
211
|
+
- **Issues**: [GitHub Issues](https://github.com/TheDataEngineX/dataenginex/issues)
|
|
212
212
|
- **Discussions**: [GitHub Discussions](https://github.com/orgs/TheDataEngineX/discussions)
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# DataEngineX Documentation
|
|
2
|
+
|
|
3
|
+
**The Python library (PyPI) — engine, config, CLI, pipelines, ML, AI, PrivacyGuard.**
|
|
4
|
+
|
|
5
|
+
Documentation for the core `dataenginex` library. For the web UI, see [dex-studio](https://github.com/TheDataEngineX/dex-studio).
|
|
6
|
+
|
|
7
|
+
## Quick start
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install dataenginex
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
or
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
uv add dataenginex
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
See [Quickstart](quickstart.md) for a full walkthrough with a sample project.
|
|
20
|
+
|
|
21
|
+
## Guides
|
|
22
|
+
|
|
23
|
+
- [Quickstart](quickstart.md) — Get up and running in 5 minutes
|
|
24
|
+
- [Architecture](architecture.md) — Core patterns, module map, design decisions
|
|
25
|
+
- [Development Setup](development.md) — Prerequisites, workflow, troubleshooting
|
|
26
|
+
- [Contributing](contributing.md) — How to contribute to the project
|
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
**Complete guide to monitoring, debugging, and understanding DataEngineX in production.**
|
|
4
4
|
|
|
5
|
+
> **Note on scope:** This document covers both library-level and application-level observability. dataenginex itself is a pure library — HTTP-level middleware, health endpoints, and request tracing are configured by the application (e.g., dex-studio).
|
|
6
|
+
|
|
5
7
|
> **Quick Links:** [Metrics](#prometheus-metrics) · [Tracing](#opentelemetry-tracing) · [Grafana](#grafana-dashboards) · [Local Testing](#local-testing) · [Troubleshooting](#troubleshooting)
|
|
6
8
|
|
|
7
9
|
______________________________________________________________________
|
|
@@ -579,7 +581,7 @@ ______________________________________________________________________
|
|
|
579
581
|
|
|
580
582
|
### Prometheus alert rules (SLO-aligned)
|
|
581
583
|
|
|
582
|
-
The actual rule definitions live in [`infradex/monitoring/alerts/dataenginex-alerts.yml`](https://github.com/TheDataEngineX/infradex/blob/main/monitoring/alerts/dataenginex-alerts.yml). They expose three alerts—latency, error rate, and saturation—scoped to prod.
|
|
584
|
+
The actual rule definitions live in [`infradex/monitoring/alerts/dataenginex-alerts.yml`](https://github.com/TheDataEngineX/infradex/blob/main/monitoring/alerts/dataenginex-alerts.yml). They expose three alerts—latency, error rate, and saturation—scoped to prod.
|
|
583
585
|
|
|
584
586
|
| Alert | Environment | Threshold | Severity | Receiver |
|
|
585
587
|
|-------|-------------|-----------|----------|----------|
|
|
@@ -613,7 +615,7 @@ kubectl apply -f https://raw.githubusercontent.com/TheDataEngineX/infradex/main/
|
|
|
613
615
|
kubectl rollout restart deployment/alertmanager
|
|
614
616
|
```
|
|
615
617
|
|
|
616
|
-
3. Verify the alerts appear in Alertmanager UI and reference the release runbook described in
|
|
618
|
+
3. Verify the alerts appear in Alertmanager UI and reference the release runbook described in the `infradex` repository.
|
|
617
619
|
|
|
618
620
|
If you manage the stack via ArgoCD, push the changes to the kustomize overlay and let ArgoCD sync the deployments automatically rather than running the commands above manually.
|
|
619
621
|
|
|
@@ -7,7 +7,7 @@ Get a DataEngineX pipeline running in under five minutes.
|
|
|
7
7
|
```bash
|
|
8
8
|
pip install dataenginex
|
|
9
9
|
# or from source:
|
|
10
|
-
git clone https://github.com/TheDataEngineX/
|
|
10
|
+
git clone https://github.com/TheDataEngineX/dataenginex && cd dataenginex
|
|
11
11
|
uv sync
|
|
12
12
|
```
|
|
13
13
|
|
|
@@ -138,7 +138,7 @@ events = engine.secops_audit.events # list of AuditEvent
|
|
|
138
138
|
- [API Reference](api-reference/index.md) — auto-generated module docs
|
|
139
139
|
- `examples/` directory — full list of runnable examples
|
|
140
140
|
|
|
141
|
-
|
|
141
|
+
______________________________________________________________________
|
|
142
142
|
|
|
143
143
|
## DEX Studio
|
|
144
144
|
|
|
@@ -199,7 +199,7 @@ ______________________________________________________________________
|
|
|
199
199
|
**Operations:**
|
|
200
200
|
|
|
201
201
|
- **Deployment Runbook** (in `infradex` repo) - Release procedures
|
|
202
|
-
- **
|
|
202
|
+
- **Project Roadmap** (in the external `docs` repository) - Issue tracking and milestone source
|
|
203
203
|
|
|
204
204
|
______________________________________________________________________
|
|
205
205
|
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
[tool.poe]
|
|
2
|
+
executor.type = "uv"
|
|
3
|
+
|
|
4
|
+
[tool.poe.tasks.setup]
|
|
5
|
+
shell = "uv sync --all-extras --all-groups && uvx pre-commit install"
|
|
6
|
+
|
|
7
|
+
[tool.poe.tasks.lint]
|
|
8
|
+
cmd = "ruff check src/ tests/"
|
|
9
|
+
|
|
10
|
+
[tool.poe.tasks.typecheck]
|
|
11
|
+
cmd = "mypy src/dataenginex/ --config-file=pyproject.toml --no-incremental"
|
|
12
|
+
env = {UV_PYTHON = "3.13", UV_PROJECT_ENVIRONMENT = ".venv"}
|
|
13
|
+
|
|
14
|
+
[tool.poe.tasks.security]
|
|
15
|
+
cmd = "uv run pip-audit --progress-spinner off --ignore-vuln GHSA-5239-wwwm-4pmq --ignore-vuln CVE-2025-3000"
|
|
16
|
+
|
|
17
|
+
[tool.poe.tasks.actionlint]
|
|
18
|
+
shell = """
|
|
19
|
+
if command -v actionlint >/dev/null 2>&1; then
|
|
20
|
+
actionlint .github/workflows/*.yml
|
|
21
|
+
elif command -v docker >/dev/null 2>&1; then
|
|
22
|
+
docker run --rm -v "$PWD":/repo -w /repo rhysd/actionlint:latest .github/workflows/*.yml
|
|
23
|
+
else
|
|
24
|
+
echo 'actionlint not found — skipping'
|
|
25
|
+
fi
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
[tool.poe.tasks.quality]
|
|
29
|
+
sequence = ["lint", "typecheck", "security"]
|
|
30
|
+
|
|
31
|
+
[tool.poe.tasks.test-cov-core]
|
|
32
|
+
env = {COVERAGE_PROCESS_START = "pyproject.toml"}
|
|
33
|
+
shell = "coverage run -m pytest tests/ -v && coverage combine && coverage html && coverage xml -o coverage.xml && coverage report --fail-under=80"
|
|
34
|
+
|
|
35
|
+
[tool.poe.tasks.check-all]
|
|
36
|
+
sequence = ["quality", "test-cov-core"]
|
|
37
|
+
|
|
38
|
+
[tool.poe.tasks.format]
|
|
39
|
+
cmd = "ruff format src/ tests/"
|
|
40
|
+
|
|
41
|
+
[tool.poe.tasks.autofix]
|
|
42
|
+
shell = "ruff check --fix src/ tests/ && ruff format src/ tests/"
|
|
43
|
+
|
|
44
|
+
[tool.poe.tasks.lint-fix]
|
|
45
|
+
sequence = ["autofix"]
|
|
46
|
+
|
|
47
|
+
[tool.poe.tasks.clean]
|
|
48
|
+
shell = "rm -rf .pytest_cache .mypy_cache .ruff_cache dist build htmlcov .coverage* && find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null; echo 'cleaned'"
|