dataenginex 1.1.0__tar.gz → 1.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (322) hide show
  1. dataenginex-1.1.2/.github/workflows/auto-pr.yml +23 -0
  2. {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/workflows/ci.yml +4 -4
  3. dataenginex-1.1.2/.github/workflows/release.yml +69 -0
  4. {dataenginex-1.1.0 → dataenginex-1.1.2}/.gitignore +1 -0
  5. {dataenginex-1.1.0 → dataenginex-1.1.2}/CHANGELOG.md +7 -0
  6. {dataenginex-1.1.0 → dataenginex-1.1.2}/PKG-INFO +59 -45
  7. dataenginex-1.1.2/README.md +161 -0
  8. dataenginex-1.1.2/docs/api-reference/api.md +63 -0
  9. dataenginex-1.1.2/docs/api-reference/dashboard.md +8 -0
  10. dataenginex-1.1.2/docs/api-reference/ml.md +45 -0
  11. dataenginex-1.1.2/docs/architecture.md +178 -0
  12. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/ci-cd.md +65 -114
  13. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/development.md +7 -8
  14. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/observability.md +20 -22
  15. dataenginex-1.1.2/docs/quickstart.md +110 -0
  16. dataenginex-1.1.2/docs/release-notes.md +141 -0
  17. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/02_api_quickstart.py +8 -19
  18. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/GUIDE.md +26 -19
  19. {dataenginex-1.1.0 → dataenginex-1.1.2}/poe_tasks.toml +0 -13
  20. {dataenginex-1.1.0 → dataenginex-1.1.2}/pyproject.toml +12 -28
  21. dataenginex-1.1.2/scripts/localstack/init.sh +7 -0
  22. dataenginex-1.1.2/src/dataenginex/README.md +88 -0
  23. dataenginex-1.1.2/src/dataenginex/__init__.py +96 -0
  24. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/__init__.py +38 -0
  25. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/agents/builtin.py +2 -2
  26. {dataenginex-1.1.0/src/dataenginex/ml → dataenginex-1.1.2/src/dataenginex/ai}/llm.py +1 -1
  27. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/observability/langfuse.py +1 -1
  28. {dataenginex-1.1.0/src/dataenginex/ml → dataenginex-1.1.2/src/dataenginex/ai}/vectorstore.py +3 -3
  29. dataenginex-1.1.2/src/dataenginex/api/__init__.py +36 -0
  30. dataenginex-1.1.2/src/dataenginex/api/errors.py +43 -0
  31. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/cli/main.py +28 -27
  32. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/cli/run.py +14 -17
  33. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/cli/train.py +21 -31
  34. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/config/defaults.py +0 -4
  35. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/config/schema.py +0 -23
  36. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/runner.py +5 -3
  37. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/quality/gates.py +4 -3
  38. dataenginex-1.1.2/src/dataenginex/engine.py +803 -0
  39. dataenginex-1.1.2/src/dataenginex/middleware/__init__.py +43 -0
  40. dataenginex-1.1.2/src/dataenginex/ml/__init__.py +56 -0
  41. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/serving_engine/builtin.py +2 -3
  42. dataenginex-1.1.2/src/dataenginex/orchestration/__init__.py +23 -0
  43. {dataenginex-1.1.0/src/dataenginex/ml → dataenginex-1.1.2/src/dataenginex/orchestration}/scheduler.py +2 -3
  44. dataenginex-1.1.2/src/dataenginex/store.py +814 -0
  45. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/warehouse/lineage.py +13 -1
  46. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/worker.py +1 -1
  47. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_builtin_serving.py +2 -2
  48. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_config_loader.py +4 -5
  49. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_config_schema.py +2 -3
  50. dataenginex-1.1.2/tests/unit/test_dex_engine.py +144 -0
  51. dataenginex-1.1.2/tests/unit/test_dex_store.py +338 -0
  52. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_domain_metrics_wiring.py +2 -45
  53. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_drift_scheduler.py +1 -1
  54. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_llm.py +1 -1
  55. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_llm_litellm_vllm.py +3 -3
  56. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_observability_langfuse.py +1 -1
  57. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_vectorstore.py +1 -1
  58. {dataenginex-1.1.0 → dataenginex-1.1.2}/uv.lock +751 -868
  59. dataenginex-1.1.0/.github/workflows/auto-pr-dev-to-main.yml +0 -15
  60. dataenginex-1.1.0/.github/workflows/auto-pr-to-dev.yml +0 -16
  61. dataenginex-1.1.0/.github/workflows/claude.yml +0 -23
  62. dataenginex-1.1.0/.github/workflows/docker-build-push.yml +0 -63
  63. dataenginex-1.1.0/.github/workflows/docs-notify.yml +0 -18
  64. dataenginex-1.1.0/.github/workflows/label-sync.yml +0 -31
  65. dataenginex-1.1.0/.github/workflows/pypi-publish.yml +0 -290
  66. dataenginex-1.1.0/.github/workflows/release-dex.yml +0 -150
  67. dataenginex-1.1.0/.github/workflows/release-please.yml +0 -16
  68. dataenginex-1.1.0/.release-please-manifest.json +0 -3
  69. dataenginex-1.1.0/README.md +0 -157
  70. dataenginex-1.1.0/docs/api-reference/api.md +0 -5
  71. dataenginex-1.1.0/docs/api-reference/dashboard.md +0 -7
  72. dataenginex-1.1.0/docs/api-reference/ml.md +0 -5
  73. dataenginex-1.1.0/docs/architecture.md +0 -150
  74. dataenginex-1.1.0/docs/quickstart.md +0 -85
  75. dataenginex-1.1.0/docs/release-notes.md +0 -51
  76. dataenginex-1.1.0/release-please-config.json +0 -23
  77. dataenginex-1.1.0/sonar-project.properties +0 -22
  78. dataenginex-1.1.0/src/dataenginex/README.md +0 -63
  79. dataenginex-1.1.0/src/dataenginex/__init__.py +0 -85
  80. dataenginex-1.1.0/src/dataenginex/api/__init__.py +0 -73
  81. dataenginex-1.1.0/src/dataenginex/api/auth.py +0 -242
  82. dataenginex-1.1.0/src/dataenginex/api/errors.py +0 -77
  83. dataenginex-1.1.0/src/dataenginex/api/factory.py +0 -245
  84. dataenginex-1.1.0/src/dataenginex/api/health.py +0 -147
  85. dataenginex-1.1.0/src/dataenginex/api/jwks.py +0 -155
  86. dataenginex-1.1.0/src/dataenginex/api/ldap_sync.py +0 -170
  87. dataenginex-1.1.0/src/dataenginex/api/rate_limit.py +0 -131
  88. dataenginex-1.1.0/src/dataenginex/api/rbac.py +0 -91
  89. dataenginex-1.1.0/src/dataenginex/api/routers/__init__.py +0 -9
  90. dataenginex-1.1.0/src/dataenginex/api/routers/ai.py +0 -192
  91. dataenginex-1.1.0/src/dataenginex/api/routers/data.py +0 -154
  92. dataenginex-1.1.0/src/dataenginex/api/routers/health.py +0 -26
  93. dataenginex-1.1.0/src/dataenginex/api/routers/ml.py +0 -182
  94. dataenginex-1.1.0/src/dataenginex/api/routers/pipelines.py +0 -194
  95. dataenginex-1.1.0/src/dataenginex/api/routers/root.py +0 -25
  96. dataenginex-1.1.0/src/dataenginex/api/routers/system.py +0 -48
  97. dataenginex-1.1.0/src/dataenginex/api/scim.py +0 -629
  98. dataenginex-1.1.0/src/dataenginex/cli/serve.py +0 -48
  99. dataenginex-1.1.0/src/dataenginex/middleware/__init__.py +0 -67
  100. dataenginex-1.1.0/src/dataenginex/middleware/metrics_middleware.py +0 -87
  101. dataenginex-1.1.0/src/dataenginex/middleware/request_logging.py +0 -88
  102. dataenginex-1.1.0/src/dataenginex/middleware/tracing.py +0 -104
  103. dataenginex-1.1.0/src/dataenginex/ml/__init__.py +0 -105
  104. dataenginex-1.1.0/src/dataenginex/orchestration/__init__.py +0 -10
  105. dataenginex-1.1.0/tests/integration/test_api_middleware_integration.py +0 -375
  106. dataenginex-1.1.0/tests/integration/test_full_app.py +0 -89
  107. dataenginex-1.1.0/tests/unit/test_ai_router.py +0 -96
  108. dataenginex-1.1.0/tests/unit/test_ai_router_extended.py +0 -273
  109. dataenginex-1.1.0/tests/unit/test_api_auth.py +0 -100
  110. dataenginex-1.1.0/tests/unit/test_api_factory.py +0 -168
  111. dataenginex-1.1.0/tests/unit/test_api_jwks.py +0 -179
  112. dataenginex-1.1.0/tests/unit/test_api_rate_limit.py +0 -86
  113. dataenginex-1.1.0/tests/unit/test_api_rbac.py +0 -109
  114. dataenginex-1.1.0/tests/unit/test_api_scim.py +0 -162
  115. dataenginex-1.1.0/tests/unit/test_data_router.py +0 -117
  116. dataenginex-1.1.0/tests/unit/test_errors.py +0 -64
  117. dataenginex-1.1.0/tests/unit/test_health.py +0 -126
  118. dataenginex-1.1.0/tests/unit/test_metrics.py +0 -139
  119. dataenginex-1.1.0/tests/unit/test_middleware.py +0 -62
  120. dataenginex-1.1.0/tests/unit/test_ml_router.py +0 -122
  121. dataenginex-1.1.0/tests/unit/test_pipeline_router_extended.py +0 -252
  122. dataenginex-1.1.0/tests/unit/test_security_extended.py +0 -379
  123. dataenginex-1.1.0/tests/unit/test_system_router.py +0 -65
  124. dataenginex-1.1.0/tests/unit/test_tracing.py +0 -90
  125. {dataenginex-1.1.0 → dataenginex-1.1.2}/.claude/commands/new-feature.md +0 -0
  126. {dataenginex-1.1.0 → dataenginex-1.1.2}/.claude/commands/validate.md +0 -0
  127. {dataenginex-1.1.0 → dataenginex-1.1.2}/.claude/settings.json +0 -0
  128. {dataenginex-1.1.0 → dataenginex-1.1.2}/.env.template +0 -0
  129. {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  130. {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  131. {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  132. {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  133. {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/dependabot.yml +0 -0
  134. {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/labels.yml +0 -0
  135. {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/release-pr-template.md +0 -0
  136. {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/workflows/enforce-dev-to-main.yml +0 -0
  137. {dataenginex-1.1.0 → dataenginex-1.1.2}/.github/workflows/security.yml +0 -0
  138. {dataenginex-1.1.0 → dataenginex-1.1.2}/.gitleaks.toml +0 -0
  139. {dataenginex-1.1.0 → dataenginex-1.1.2}/.pre-commit-config.yaml +0 -0
  140. {dataenginex-1.1.0 → dataenginex-1.1.2}/.python-version +0 -0
  141. {dataenginex-1.1.0 → dataenginex-1.1.2}/CLAUDE.md +0 -0
  142. {dataenginex-1.1.0 → dataenginex-1.1.2}/CODEOWNERS +0 -0
  143. {dataenginex-1.1.0 → dataenginex-1.1.2}/CODE_OF_CONDUCT.md +0 -0
  144. {dataenginex-1.1.0 → dataenginex-1.1.2}/CONTRIBUTING.md +0 -0
  145. {dataenginex-1.1.0 → dataenginex-1.1.2}/Dockerfile +0 -0
  146. {dataenginex-1.1.0 → dataenginex-1.1.2}/LICENSE +0 -0
  147. {dataenginex-1.1.0 → dataenginex-1.1.2}/SECURITY.md +0 -0
  148. {dataenginex-1.1.0 → dataenginex-1.1.2}/docker-compose.test.yml +0 -0
  149. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/adr/0000-template.md +0 -0
  150. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/adr/0001-medallion-architecture.md +0 -0
  151. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/core.md +0 -0
  152. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/data.md +0 -0
  153. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/index.md +0 -0
  154. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/lakehouse.md +0 -0
  155. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/middleware.md +0 -0
  156. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/plugins.md +0 -0
  157. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/api-reference/warehouse.md +0 -0
  158. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/contributing.md +0 -0
  159. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/index.md +0 -0
  160. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/sdlc.md +0 -0
  161. {dataenginex-1.1.0 → dataenginex-1.1.2}/docs/security-scanning.md +0 -0
  162. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/01_hello_pipeline.py +0 -0
  163. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/03_quality_gate.py +0 -0
  164. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/04_ml_training.py +0 -0
  165. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/05_rag_demo.py +0 -0
  166. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/06_llm_quickstart.py +0 -0
  167. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/07_api_ingestion.py +0 -0
  168. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/08_spark_ml.py +0 -0
  169. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/09_feature_engineering.py +0 -0
  170. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/10_model_analysis.py +0 -0
  171. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/dashboard/dashboard_config.yaml +0 -0
  172. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/dashboard/run_dashboard.py +0 -0
  173. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/data/events.csv +0 -0
  174. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/data/users.csv +0 -0
  175. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/dex.yaml +0 -0
  176. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/ecommerce/data/customers.csv +0 -0
  177. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/ecommerce/data/orders.csv +0 -0
  178. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/ecommerce/data/products.csv +0 -0
  179. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/ecommerce/dex.yaml +0 -0
  180. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/ecommerce/run_all.py +0 -0
  181. {dataenginex-1.1.0 → dataenginex-1.1.2}/examples/movies.csv +0 -0
  182. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/RELEASE_NOTES.md +0 -0
  183. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/agents/__init__.py +0 -0
  184. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/memory/__init__.py +0 -0
  185. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/memory/base.py +0 -0
  186. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/memory/episodic.py +0 -0
  187. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/memory/long_term.py +0 -0
  188. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/observability/__init__.py +0 -0
  189. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/observability/audit.py +0 -0
  190. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/observability/cost.py +0 -0
  191. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/observability/metrics.py +0 -0
  192. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/retrieval/__init__.py +0 -0
  193. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/retrieval/builtin.py +0 -0
  194. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/retrieval/graph.py +0 -0
  195. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/__init__.py +0 -0
  196. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/anthropic.py +0 -0
  197. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/huggingface.py +0 -0
  198. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/ollama.py +0 -0
  199. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/openai.py +0 -0
  200. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/routing/router.py +0 -0
  201. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/__init__.py +0 -0
  202. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/checkpoint.py +0 -0
  203. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/executor.py +0 -0
  204. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/sandbox.py +0 -0
  205. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/tools/__init__.py +0 -0
  206. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/tools/builtin.py +0 -0
  207. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/__init__.py +0 -0
  208. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/conditions.py +0 -0
  209. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/dag.py +0 -0
  210. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/human_loop.py +0 -0
  211. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/api/pagination.py +0 -0
  212. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/api/schemas.py +0 -0
  213. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/cli/__init__.py +0 -0
  214. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/config/__init__.py +0 -0
  215. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/config/loader.py +0 -0
  216. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/__init__.py +0 -0
  217. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/exceptions.py +0 -0
  218. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/interfaces.py +0 -0
  219. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/medallion_architecture.py +0 -0
  220. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/quality.py +0 -0
  221. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/registry.py +0 -0
  222. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/schemas.py +0 -0
  223. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/core/validators.py +0 -0
  224. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/__init__.py +0 -0
  225. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/connectors/__init__.py +0 -0
  226. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/connectors/csv.py +0 -0
  227. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/connectors/duckdb.py +0 -0
  228. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/connectors/legacy.py +0 -0
  229. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/__init__.py +0 -0
  230. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/dag.py +0 -0
  231. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/run_history.py +0 -0
  232. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/profiler.py +0 -0
  233. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/quality/__init__.py +0 -0
  234. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/quality/spark.py +0 -0
  235. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/registry.py +0 -0
  236. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/transforms/__init__.py +0 -0
  237. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/data/transforms/sql.py +0 -0
  238. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/lakehouse/__init__.py +0 -0
  239. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/lakehouse/catalog.py +0 -0
  240. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/lakehouse/partitioning.py +0 -0
  241. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/lakehouse/storage.py +0 -0
  242. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/middleware/domain_metrics.py +0 -0
  243. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/middleware/logging_config.py +0 -0
  244. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/middleware/metrics.py +0 -0
  245. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/drift.py +0 -0
  246. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/features/__init__.py +0 -0
  247. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/features/builtin.py +0 -0
  248. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/metrics.py +0 -0
  249. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/mlflow_registry.py +0 -0
  250. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/registry.py +0 -0
  251. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/serving.py +0 -0
  252. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/serving_engine/__init__.py +0 -0
  253. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/tracking/__init__.py +0 -0
  254. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/tracking/builtin.py +0 -0
  255. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/ml/training.py +0 -0
  256. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/orchestration/builtin.py +0 -0
  257. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/plugins/__init__.py +0 -0
  258. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/plugins/registry.py +0 -0
  259. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/py.typed +0 -0
  260. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/secops/__init__.py +0 -0
  261. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/secops/audit.py +0 -0
  262. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/secops/gate.py +0 -0
  263. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/secops/masking.py +0 -0
  264. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/secops/pii.py +0 -0
  265. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/warehouse/__init__.py +0 -0
  266. {dataenginex-1.1.0 → dataenginex-1.1.2}/src/dataenginex/warehouse/transforms.py +0 -0
  267. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/__init__.py +0 -0
  268. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conformance/__init__.py +0 -0
  269. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conformance/test_connector.py +0 -0
  270. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conformance/test_feature_store.py +0 -0
  271. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conformance/test_tracker.py +0 -0
  272. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conformance/test_transform.py +0 -0
  273. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/conftest.py +0 -0
  274. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/fixtures/__init__.py +0 -0
  275. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/fixtures/sample_data.py +0 -0
  276. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/fixtures/sample_jobs.csv +0 -0
  277. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/fixtures/sample_jobs.json +0 -0
  278. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/__init__.py +0 -0
  279. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_ai_integration.py +0 -0
  280. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_cli_run.py +0 -0
  281. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_config_cli.py +0 -0
  282. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_lineage_integration.py +0 -0
  283. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_ml_integration.py +0 -0
  284. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_pipeline_e2e.py +0 -0
  285. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_secops_integration.py +0 -0
  286. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/integration/test_storage_real.py +0 -0
  287. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/load/__init__.py +0 -0
  288. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/__init__.py +0 -0
  289. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_agent_runtime.py +0 -0
  290. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_ai_modules.py +0 -0
  291. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_api_pagination.py +0 -0
  292. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_api_schemas.py +0 -0
  293. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_api_validators.py +0 -0
  294. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_builtin_agent.py +0 -0
  295. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_builtin_feature_store.py +0 -0
  296. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_builtin_tracker.py +0 -0
  297. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_cli_train.py +0 -0
  298. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_core_exceptions.py +0 -0
  299. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_core_interfaces.py +0 -0
  300. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_core_registry.py +0 -0
  301. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_csv_connector.py +0 -0
  302. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_data.py +0 -0
  303. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_domain_metrics.py +0 -0
  304. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_duckdb_connector.py +0 -0
  305. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_lakehouse.py +0 -0
  306. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_logging.py +0 -0
  307. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_medallion.py +0 -0
  308. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_ml.py +0 -0
  309. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_pipeline_dag.py +0 -0
  310. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_pipeline_runner.py +0 -0
  311. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_plugins.py +0 -0
  312. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_quality_gates.py +0 -0
  313. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_quality_spark.py +0 -0
  314. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_retriever.py +0 -0
  315. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_retriever_graph.py +0 -0
  316. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_run_history.py +0 -0
  317. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_scheduler.py +0 -0
  318. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_secops.py +0 -0
  319. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_spark_fixtures.py +0 -0
  320. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_sql_transforms.py +0 -0
  321. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_storage_abstraction.py +0 -0
  322. {dataenginex-1.1.0 → dataenginex-1.1.2}/tests/unit/test_warehouse.py +0 -0
@@ -0,0 +1,23 @@
1
+ name: Auto PR
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - dev
7
+ - 'feature/**'
8
+ - 'fix/**'
9
+
10
+ permissions:
11
+ contents: write
12
+ pull-requests: write
13
+
14
+ jobs:
15
+ feature-to-dev:
16
+ if: startsWith(github.ref, 'refs/heads/feature/') || startsWith(github.ref, 'refs/heads/fix/')
17
+ uses: TheDataEngineX/.github/.github/workflows/auto-pr-to-dev.yml@main
18
+ secrets: inherit
19
+
20
+ dev-to-main:
21
+ if: github.ref == 'refs/heads/dev'
22
+ uses: TheDataEngineX/.github/.github/workflows/auto-pr-dev-to-main.yml@main
23
+ secrets: inherit
@@ -20,7 +20,7 @@ jobs:
20
20
  runs-on: ubuntu-latest
21
21
  steps:
22
22
  - uses: actions/checkout@v6
23
- - uses: astral-sh/setup-uv@v7
23
+ - uses: astral-sh/setup-uv@v8.1.0
24
24
  with:
25
25
  version: "latest"
26
26
  python-version: "3.13"
@@ -35,7 +35,7 @@ jobs:
35
35
  needs: quality
36
36
  steps:
37
37
  - uses: actions/checkout@v6
38
- - uses: astral-sh/setup-uv@v7
38
+ - uses: astral-sh/setup-uv@v8.1.0
39
39
  with:
40
40
  version: "latest"
41
41
  python-version: "3.13"
@@ -43,7 +43,7 @@ jobs:
43
43
  env:
44
44
  UV_PROJECT_ENVIRONMENT: .venv
45
45
  - run: uv run poe test-cov-core
46
- - uses: codecov/codecov-action@v6
46
+ - uses: codecov/codecov-action@v5
47
47
  with:
48
48
  flags: dataenginex
49
49
  fail_ci_if_error: false
@@ -59,7 +59,7 @@ jobs:
59
59
  python-version: ["3.11", "3.12"]
60
60
  steps:
61
61
  - uses: actions/checkout@v6
62
- - uses: astral-sh/setup-uv@v7
62
+ - uses: astral-sh/setup-uv@v8.1.0
63
63
  with:
64
64
  version: "latest"
65
65
  python-version: ${{ matrix.python-version }}
@@ -0,0 +1,69 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - 'v[0-9]+.[0-9]+.[0-9]+'
7
+
8
+ jobs:
9
+ build:
10
+ name: Build
11
+ runs-on: ubuntu-latest
12
+ permissions:
13
+ contents: read
14
+ steps:
15
+ - uses: actions/checkout@v6
16
+ - uses: astral-sh/setup-uv@v8.1.0
17
+ with:
18
+ version: "latest"
19
+ python-version: "3.13"
20
+ - run: uv build
21
+ - uses: actions/upload-artifact@v7
22
+ with:
23
+ name: dist
24
+ path: dist/
25
+
26
+ publish-pypi:
27
+ name: Publish to PyPI
28
+ needs: build
29
+ runs-on: ubuntu-latest
30
+ environment: ${{ vars.PYPI_ENVIRONMENT }}
31
+ permissions:
32
+ contents: read
33
+ id-token: write
34
+ steps:
35
+ - uses: actions/download-artifact@v8
36
+ with:
37
+ name: dist
38
+ path: dist/
39
+ - uses: pypa/gh-action-pypi-publish@release/v1
40
+
41
+ github-release:
42
+ name: GitHub Release + SBOM
43
+ needs: build
44
+ runs-on: ubuntu-latest
45
+ permissions:
46
+ contents: write
47
+ steps:
48
+ - uses: actions/checkout@v6
49
+ - uses: astral-sh/setup-uv@v8.1.0
50
+ with:
51
+ version: "latest"
52
+ - name: Extract version
53
+ id: version
54
+ run: |
55
+ VERSION=$(grep -m 1 "^version = " pyproject.toml | sed 's/version = "//;s/".*//')
56
+ echo "version=$VERSION" >> "$GITHUB_OUTPUT"
57
+ - name: Generate CycloneDX SBOM
58
+ run: |
59
+ uvx --from cyclonedx-bom cyclonedx-py environment \
60
+ -o sbom-dex-${{ steps.version.outputs.version }}.json \
61
+ --of json
62
+ - name: Create GitHub release + attach SBOM
63
+ env:
64
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
65
+ run: |
66
+ gh release create ${{ github.ref_name }} \
67
+ --title "DEX ${{ github.ref_name }}" \
68
+ --generate-notes \
69
+ "sbom-dex-${{ steps.version.outputs.version }}.json"
@@ -65,6 +65,7 @@ desktop.ini
65
65
  # Logs and databases
66
66
  *.log
67
67
  *.sqlite3
68
+ *.duckdb
68
69
  logs/
69
70
  .dex/
70
71
 
@@ -5,6 +5,13 @@ All notable changes to `dataenginex` will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.1.1](https://github.com/TheDataEngineX/dex/compare/v1.1.0...v1.1.1) (2026-05-07)
9
+
10
+
11
+ ### Bug Fixes
12
+
13
+ * rich reflex compat ([#232](https://github.com/TheDataEngineX/dex/issues/232)) ([#233](https://github.com/TheDataEngineX/dex/issues/233)) ([7d33c05](https://github.com/TheDataEngineX/dex/commit/7d33c05d9d3b041567a5539930dabc30294a0d8d))
14
+
8
15
  ## [1.1.0](https://github.com/TheDataEngineX/dex/compare/v1.0.3...v1.1.0) (2026-05-06)
9
16
 
10
17
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataenginex
3
- Version: 1.1.0
3
+ Version: 1.1.2
4
4
  Summary: DataEngineX - Core framework for AI/ML/Data engineering projects
5
5
  Author-email: Jay <jayapal.myaka99@gmail.com>
6
6
  License: MIT License
@@ -31,28 +31,17 @@ Requires-Dist: asyncpg>=0.31.0
31
31
  Requires-Dist: click>=8.3.3
32
32
  Requires-Dist: croniter>=6.2.2
33
33
  Requires-Dist: duckdb>=1.5.2
34
- Requires-Dist: email-validator>=2.3.0
35
- Requires-Dist: fastapi>=0.136.1
36
34
  Requires-Dist: httpx>=0.28.1
37
- Requires-Dist: opentelemetry-api>=1.41.1
38
- Requires-Dist: opentelemetry-exporter-otlp>=1.41.1
39
- Requires-Dist: opentelemetry-instrumentation-fastapi>=0.62b1
40
- Requires-Dist: opentelemetry-sdk>=1.41.1
41
35
  Requires-Dist: prometheus-client>=0.25.0
42
36
  Requires-Dist: pyarrow>=23.0.1
43
37
  Requires-Dist: pydantic>=2.13.4
44
38
  Requires-Dist: python-dotenv>=1.2.2
45
39
  Requires-Dist: pyyaml>=6.0.3
46
- Requires-Dist: qdrant-client>=1.17.1
40
+ Requires-Dist: qdrant-client>=1.18.0
47
41
  Requires-Dist: redis>=5.3.1
48
- Requires-Dist: rich>=15.0.0
49
42
  Requires-Dist: structlog>=25.5.0
50
- Requires-Dist: uvicorn>=0.46.0
51
- Provides-Extra: auth
52
- Requires-Dist: ldap3>=2.9.1; extra == 'auth'
53
- Requires-Dist: pyjwt[crypto]>=2.12.1; extra == 'auth'
54
43
  Provides-Extra: cloud
55
- Requires-Dist: boto3>=1.43.4; extra == 'cloud'
44
+ Requires-Dist: boto3>=1.43.7; extra == 'cloud'
56
45
  Requires-Dist: google-cloud-bigquery>=3.41.0; extra == 'cloud'
57
46
  Requires-Dist: google-cloud-storage>=3.10.1; extra == 'cloud'
58
47
  Provides-Extra: observability
@@ -61,44 +50,72 @@ Description-Content-Type: text/markdown
61
50
 
62
51
  # dataenginex
63
52
 
64
- Unified Data + ML + AI framework. Config-driven, self-hosted, production-ready.
53
+ Unified Data + ML + AI **library**. Config-driven, self-hosted, production-ready.
54
+
55
+ `dataenginex` is a pure Python library — no HTTP server. Your application owns the server layer.
65
56
 
66
57
  ## Install
67
58
 
68
59
  ```bash
69
- # Core (DuckDB, FastAPI, structlog, Pydantic, Click, Rich)
60
+ # Core (DuckDB, structlog, Pydantic, Click, arq, asyncpg, qdrant-client)
70
61
  pip install dataenginex
71
62
 
72
- # With optional extras
73
- pip install dataenginex[dagster] # Dagster orchestration
74
- pip install dataenginex[mlflow] # MLflow experiment tracking
75
- pip install dataenginex[agents] # LangGraph agent runtime
76
- pip install dataenginex[vectors] # Qdrant + LanceDB vector stores
77
- pip install dataenginex[embeddings] # sentence-transformers + ONNX
78
- pip install dataenginex[spark] # PySpark transforms
79
- pip install dataenginex[cloud] # S3 + GCS storage backends
80
- pip install dataenginex[all] # Everything
63
+ # Optional extras
64
+ pip install "dataenginex[cloud]" # S3 + GCS + BigQuery storage backends
65
+ pip install "dataenginex[observability]" # Langfuse LLM call tracing
81
66
  ```
82
67
 
68
+ > **LiteLLM:** Install separately — it pins `python-dotenv==1.0.1` which conflicts
69
+ > with dataenginex's `python-dotenv>=1.2.2`.
70
+ > ```bash
71
+ > pip install 'litellm>=1.83.3' --no-deps
72
+ > ```
73
+
83
74
  ## Submodules
84
75
 
85
- | Module | Requires Extra | Description |
86
- |--------|---------------|-------------|
87
- | `dataenginex.config` | — | dex.yaml schema, loader, env var resolution, layering |
88
- | `dataenginex.core` | — | Exceptions, interfaces (10 Base* ABCs), backend registry |
89
- | `dataenginex.cli` | — | `dex` CLI (validate, version, init, serve) |
90
- | `dataenginex.api` | — | FastAPI app, auth (JWT), health, rate limiting |
91
- | `dataenginex.data` | | Connectors, schema registry, profiler |
92
- | `dataenginex.ml` | | Training, model registry, serving, drift detection |
93
- | `dataenginex.middleware` | | Structured logging, Prometheus metrics, tracing |
94
- | `dataenginex.lakehouse` | optional `[cloud]` | Storage backends (local, S3, GCS), catalog |
95
- | `dataenginex.warehouse` | | SQL/Spark transforms, lineage |
96
- | `dataenginex.plugins` | | Plugin system (entry-point discovery) |
76
+ | Module | Description |
77
+ |--------|-------------|
78
+ | `dataenginex.engine` | `DexEngine` single entry point; loads config, inits store, wires all backends |
79
+ | `dataenginex.store` | `DexStore` DuckDB-backed persistence (`.dex/store.duckdb`) |
80
+ | `dataenginex.config` | `dex.yaml` schema, loader, env var resolution, layering |
81
+ | `dataenginex.core` | Exceptions, `Base*` ABCs, `BackendRegistry` |
82
+ | `dataenginex.cli` | `dex` CLI (`validate`, `version`, `init`) |
83
+ | `dataenginex.api` | HTTP helpers: error types, response models (no server bundled) |
84
+ | `dataenginex.data` | Connectors, pipeline runner, schema registry, profiler |
85
+ | `dataenginex.ml` | Classical ML: training, model registry, serving, drift detection |
86
+ | `dataenginex.ai` | LLM providers, agents, RAG, vectorstore, memory, observability |
87
+ | `dataenginex.orchestration` | `DriftScheduler`, background scheduling |
88
+ | `dataenginex.middleware` | structlog config, Prometheus metrics |
89
+ | `dataenginex.lakehouse` | Storage backends (local, S3, GCS), catalog, partitioning |
90
+ | `dataenginex.warehouse` | SQL transforms, lineage tracking |
91
+ | `dataenginex.plugins` | Entry-point plugin discovery |
97
92
 
98
93
  ## Quick Usage
99
94
 
100
95
  ```python
101
- # Config system
96
+ from pathlib import Path
97
+ from dataenginex.engine import DexEngine
98
+
99
+ # Load config and initialize all backends
100
+ engine = DexEngine(Path("dex.yaml"))
101
+
102
+ # Data
103
+ engine.run_pipeline("clean_users")
104
+ sources = list(engine.config.data.sources.keys())
105
+
106
+ # ML
107
+ models = engine.model_registry.list_models()
108
+ result = engine.model_registry.predict("churn_model", features)
109
+
110
+ # AI
111
+ response = engine.agents["assistant"].chat("summarize the latest run")
112
+
113
+ # Persistence (DuckDB)
114
+ runs = engine.store.list_pipeline_runs(limit=10)
115
+ ```
116
+
117
+ ```python
118
+ # Config system only
102
119
  from dataenginex.config import load_config
103
120
  cfg = load_config(Path("dex.yaml"))
104
121
 
@@ -106,15 +123,12 @@ cfg = load_config(Path("dex.yaml"))
106
123
  from dataenginex.core.interfaces import BaseConnector
107
124
  from dataenginex.core.registry import BackendRegistry
108
125
 
109
- # Exceptions
110
- from dataenginex.core.exceptions import DataEngineXError, BackendNotInstalledError
111
-
112
126
  # ML
113
- from dataenginex.ml import ModelRegistry
127
+ from dataenginex.ml import ModelRegistry, SklearnTrainer
114
128
 
115
- # CLI
116
- # dex validate dex.yaml
117
- # dex version
129
+ # AI
130
+ from dataenginex.ai.llm import get_llm_provider
131
+ from dataenginex.ai.vectorstore import VectorStoreBackend
118
132
  ```
119
133
 
120
134
  ## Source and Docs
@@ -0,0 +1,161 @@
1
+ # DEX — Data + ML + AI Framework
2
+
3
+ [![CI](https://github.com/TheDataEngineX/DEX/actions/workflows/ci.yml/badge.svg?branch=dev)](https://github.com/TheDataEngineX/DEX/actions/workflows/ci.yml)
4
+ [![PyPI](https://img.shields.io/pypi/v/dataenginex)](https://pypi.org/project/dataenginex/)
5
+ [![Python 3.13+](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
7
+
8
+ Unified Data + ML + AI **library**. One `dex.yaml` defines your entire project — from data ingestion through ML training to AI agents. Self-hosted, config-driven, production-ready.
9
+
10
+ `dataenginex` is a **pure Python library**. It has no HTTP server. Your application owns the server layer.
11
+
12
+ ______________________________________________________________________
13
+
14
+ ## Quick Start
15
+
16
+ ```bash
17
+ pip install dataenginex
18
+ ```
19
+
20
+ ```python
21
+ from dataenginex.engine import DexEngine
22
+
23
+ engine = DexEngine("dex.yaml") # loads config, inits DuckDB store
24
+ engine.run_pipeline("clean_users") # execute a pipeline
25
+ models = engine.model_registry.list_models()
26
+ ```
27
+
28
+ ```bash
29
+ # Development
30
+ git clone https://github.com/TheDataEngineX/DEX && cd DEX
31
+ uv run poe check-all # lint + typecheck + tests
32
+ dex validate dex.yaml # validate a config file
33
+ dex version # show version + environment
34
+ ```
35
+
36
+ ______________________________________________________________________
37
+
38
+ ## What It Does
39
+
40
+ ```
41
+ dex.yaml
42
+ ├── data: CSV/Parquet/DuckDB → transforms → quality checks
43
+ ├── ml: Experiment tracking → training → serving → drift detection
44
+ ├── ai: LLM providers → retrieval (BM25/dense/hybrid) → agents
45
+ └── observability: structlog + Prometheus metrics
46
+ ```
47
+
48
+ **Opinionated defaults, swappable backends.** Everything works out of the box with
49
+ built-in implementations. Swap any layer for industry tools via optional extras:
50
+
51
+ ```bash
52
+ pip install "dataenginex[cloud]" # S3 + GCS storage backends
53
+ pip install "dataenginex[observability]" # Langfuse LLM tracing
54
+ ```
55
+
56
+ ______________________________________________________________________
57
+
58
+ ## Project Structure
59
+
60
+ ```
61
+ dataenginex/
62
+ ├── src/dataenginex/
63
+ │ ├── cli/ # dex CLI (validate, version, init)
64
+ │ ├── config/ # dex.yaml schema, loader, env var resolution
65
+ │ ├── core/ # Exceptions, interfaces (Base* ABCs), registry
66
+ │ ├── engine.py # DexEngine — single entry point for applications
67
+ │ ├── store.py # DexStore — DuckDB-backed persistence (.dex/store.duckdb)
68
+ │ ├── api/ # HTTP helpers: error types, response models (no server)
69
+ │ ├── data/ # Connectors, schema registry, profiler, pipeline runner
70
+ │ ├── ml/ # Classical ML: training, registry, serving, drift
71
+ │ ├── ai/ # LLM providers, agents, RAG, vectorstore, observability
72
+ │ ├── orchestration/ # DriftScheduler, background workers
73
+ │ ├── middleware/ # structlog config, Prometheus metrics (library use)
74
+ │ ├── lakehouse/ # Catalog, partitioning, storage backends
75
+ │ ├── warehouse/ # SQL transforms, lineage
76
+ │ └── plugins/ # Plugin system (entry-point discovery)
77
+
78
+ ├── examples/ # Runnable examples + dex.yaml templates
79
+ ├── tests/ # Unit + integration tests
80
+ ├── docs/ # MkDocs documentation
81
+ └── pyproject.toml # Package config (version source of truth)
82
+ ```
83
+
84
+ ______________________________________________________________________
85
+
86
+ ## Architecture
87
+
88
+ ```
89
+ dex.yaml → DexEngine.__init__
90
+
91
+ ├── config/ load + validate → DexConfig
92
+ ├── store/ DexStore (.dex/store.duckdb)
93
+ ├── data/ register sources + pipelines
94
+ ├── ml/ model registry + serving
95
+ └── ai/ LLM providers + agents
96
+ ```
97
+
98
+ **Backend Registry Pattern:** Every subsystem has a `Base*` ABC + `BackendRegistry`.
99
+ Built-in backends work out of the box. Extras implement the same interface.
100
+
101
+ **Tech Stack:**
102
+
103
+ | Layer | Built-in | Optional Extra |
104
+ |-------|----------|----------------|
105
+ | Data Engine | DuckDB | PySpark |
106
+ | Orchestration | croniter scheduler | Dagster |
107
+ | ML Tracking | JSON-based tracker | MLflow |
108
+ | Model Serving | Built-in predictor | — |
109
+ | LLM | Ollama / LiteLLM / vLLM | Any OpenAI-compatible |
110
+ | Vector Store | DuckDB VSS | Qdrant |
111
+ | Retrieval | BM25 + Dense + Hybrid | — |
112
+ | Persistence | DuckDB (`.dex/store.duckdb`) | — |
113
+ | Logging | structlog | — |
114
+
115
+ ______________________________________________________________________
116
+
117
+ ## Development
118
+
119
+ See [docs/development.md](docs/development.md) for full setup.
120
+
121
+ ```bash
122
+ uv run poe check-all # lint + typecheck + tests
123
+ uv run poe lint-fix # auto-fix lint issues
124
+ uv run poe test-cov # tests + coverage report
125
+ ```
126
+
127
+ ______________________________________________________________________
128
+
129
+ ## Documentation
130
+
131
+ | Guide | Description |
132
+ |-------|-------------|
133
+ | [Quickstart](docs/quickstart.md) | Get running in 5 minutes |
134
+ | [Architecture](docs/architecture.md) | System design and patterns |
135
+ | [Development](docs/development.md) | Local setup and workflow |
136
+ | [API Reference](docs/api-reference/index.md) | Auto-generated module docs |
137
+
138
+ > Docs: [docs.thedataenginex.org](https://docs.thedataenginex.org)
139
+
140
+ ______________________________________________________________________
141
+
142
+ ## The DEX Ecosystem
143
+
144
+ ```
145
+ TheDataEngineX/
146
+ ├── dataenginex — Core library (this repo, PyPI)
147
+ ├── dex-studio — Web UI (FastAPI + Jinja2) — single pane of glass
148
+ └── infradex — Terraform + Helm + K3s deployment
149
+ ```
150
+
151
+ dex-studio imports `dataenginex` directly — no HTTP server required.
152
+
153
+ ______________________________________________________________________
154
+
155
+ ## License
156
+
157
+ MIT License. See [LICENSE](LICENSE).
158
+
159
+ ______________________________________________________________________
160
+
161
+ **Version**: [![PyPI](https://img.shields.io/pypi/v/dataenginex)](https://pypi.org/project/dataenginex/) | **License**: MIT | **Python**: 3.13+
@@ -0,0 +1,63 @@
1
+ # dataenginex.api
2
+
3
+ HTTP helpers — error types, response models, and shared utilities.
4
+
5
+ `dataenginex` does **not** bundle a FastAPI server. This module provides the building blocks
6
+ for applications (like DEX Studio or your own FastAPI app) that want to expose DEX functionality
7
+ over HTTP.
8
+
9
+ ## Error Types
10
+
11
+ ```python
12
+ from dataenginex.api.errors import (
13
+ NotFoundError,
14
+ ValidationError,
15
+ ConflictError,
16
+ ServiceUnavailableError,
17
+ )
18
+ ```
19
+
20
+ Standard Pydantic response models for HTTP error responses, usable in any FastAPI app:
21
+
22
+ ```python
23
+ from fastapi import FastAPI
24
+ from dataenginex.api.errors import NotFoundError
25
+
26
+ app = FastAPI()
27
+
28
+ @app.get("/pipelines/{name}")
29
+ def get_pipeline(name: str):
30
+ pipeline = engine.config.data.pipelines.get(name)
31
+ if pipeline is None:
32
+ raise NotFoundError(detail=f"Pipeline '{name}' not found")
33
+ return pipeline
34
+ ```
35
+
36
+ ## Building an HTTP Layer
37
+
38
+ To expose DEX functionality over HTTP, create a FastAPI app in your application
39
+ and call `DexEngine` directly:
40
+
41
+ ```python
42
+ from fastapi import FastAPI
43
+ from dataenginex.engine import DexEngine
44
+
45
+ engine = DexEngine("dex.yaml")
46
+ app = FastAPI()
47
+
48
+ @app.get("/health")
49
+ def health():
50
+ return engine.health()
51
+
52
+ @app.get("/pipelines")
53
+ def list_pipelines():
54
+ return list(engine.config.data.pipelines.keys())
55
+
56
+ @app.post("/pipelines/{name}/run")
57
+ def run_pipeline(name: str):
58
+ return engine.run_pipeline(name)
59
+ ```
60
+
61
+ See `examples/02_api_quickstart.py` for a minimal working example.
62
+
63
+ ::: dataenginex.api
@@ -0,0 +1,8 @@
1
+ ---
2
+ title: dashboard (removed)
3
+ description: The Streamlit dashboard module was removed in v1.0
4
+ ---
5
+
6
+ The `dataenginex.dashboard` Streamlit module was removed in **v1.0**.
7
+
8
+ Use [DEX Studio](https://github.com/TheDataEngineX/dex-studio) instead — a full-featured Reflex web UI for data pipelines, ML experiments, and AI agents.
@@ -0,0 +1,45 @@
1
+ # dataenginex.ml
2
+
3
+ Classical ML — training, model registry, drift detection, and model serving.
4
+
5
+ LLM providers, vector stores, agents, and RAG live in `dataenginex.ai`.
6
+ The drift scheduler lives in `dataenginex.orchestration`.
7
+
8
+ ## Module Split
9
+
10
+ | Concern | Module |
11
+ |---------|--------|
12
+ | Training, registry, serving, drift | `dataenginex.ml` |
13
+ | LLM providers, chat, embeddings | `dataenginex.ai.llm` |
14
+ | Vector stores | `dataenginex.ai.vectorstore` |
15
+ | Background drift scheduling | `dataenginex.orchestration.scheduler` |
16
+
17
+ ## Quick Usage
18
+
19
+ ```python
20
+ from dataenginex.ml import (
21
+ SklearnTrainer, TrainingResult,
22
+ ModelRegistry, ModelArtifact, ModelStage,
23
+ DriftDetector, DriftReport,
24
+ ModelServer, PredictionRequest, PredictionResponse,
25
+ )
26
+
27
+ # Train
28
+ trainer = SklearnTrainer(experiment_name="churn")
29
+ result: TrainingResult = trainer.train(X_train, y_train)
30
+
31
+ # Register
32
+ registry = ModelRegistry()
33
+ registry.register(result.model, name="churn_v1", stage=ModelStage.STAGING)
34
+
35
+ # Drift
36
+ detector = DriftDetector(reference=X_train)
37
+ report: DriftReport = detector.detect(X_new)
38
+
39
+ # Serve
40
+ server = ModelServer()
41
+ server.load("churn_v1", stage=ModelStage.PRODUCTION)
42
+ resp = server.predict(PredictionRequest(features={"age": 35}))
43
+ ```
44
+
45
+ ::: dataenginex.ml