dataenginex 1.1.1__tar.gz → 1.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/workflows/ci.yml +4 -4
  2. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/workflows/release.yml +4 -4
  3. {dataenginex-1.1.1 → dataenginex-1.1.2}/.gitignore +1 -0
  4. {dataenginex-1.1.1 → dataenginex-1.1.2}/PKG-INFO +59 -44
  5. dataenginex-1.1.2/README.md +161 -0
  6. dataenginex-1.1.2/docs/api-reference/api.md +63 -0
  7. dataenginex-1.1.2/docs/api-reference/dashboard.md +8 -0
  8. dataenginex-1.1.2/docs/api-reference/ml.md +45 -0
  9. dataenginex-1.1.2/docs/architecture.md +178 -0
  10. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/ci-cd.md +65 -114
  11. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/development.md +7 -8
  12. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/observability.md +20 -22
  13. dataenginex-1.1.2/docs/quickstart.md +110 -0
  14. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/release-notes.md +65 -0
  15. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/02_api_quickstart.py +8 -19
  16. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/GUIDE.md +26 -19
  17. {dataenginex-1.1.1 → dataenginex-1.1.2}/pyproject.toml +12 -27
  18. dataenginex-1.1.2/scripts/localstack/init.sh +7 -0
  19. dataenginex-1.1.2/src/dataenginex/README.md +88 -0
  20. dataenginex-1.1.2/src/dataenginex/__init__.py +96 -0
  21. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/__init__.py +38 -0
  22. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/agents/builtin.py +2 -2
  23. {dataenginex-1.1.1/src/dataenginex/ml → dataenginex-1.1.2/src/dataenginex/ai}/llm.py +1 -1
  24. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/observability/langfuse.py +1 -1
  25. {dataenginex-1.1.1/src/dataenginex/ml → dataenginex-1.1.2/src/dataenginex/ai}/vectorstore.py +3 -3
  26. dataenginex-1.1.2/src/dataenginex/api/__init__.py +36 -0
  27. dataenginex-1.1.2/src/dataenginex/api/errors.py +43 -0
  28. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/cli/main.py +0 -3
  29. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/config/defaults.py +0 -4
  30. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/config/schema.py +0 -23
  31. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/runner.py +5 -3
  32. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/quality/gates.py +4 -3
  33. dataenginex-1.1.2/src/dataenginex/engine.py +803 -0
  34. dataenginex-1.1.2/src/dataenginex/middleware/__init__.py +43 -0
  35. dataenginex-1.1.2/src/dataenginex/ml/__init__.py +56 -0
  36. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/serving_engine/builtin.py +2 -3
  37. dataenginex-1.1.2/src/dataenginex/orchestration/__init__.py +23 -0
  38. {dataenginex-1.1.1/src/dataenginex/ml → dataenginex-1.1.2/src/dataenginex/orchestration}/scheduler.py +2 -3
  39. dataenginex-1.1.2/src/dataenginex/store.py +814 -0
  40. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/warehouse/lineage.py +13 -1
  41. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/worker.py +1 -1
  42. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_builtin_serving.py +2 -2
  43. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_config_loader.py +4 -5
  44. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_config_schema.py +2 -3
  45. dataenginex-1.1.2/tests/unit/test_dex_engine.py +144 -0
  46. dataenginex-1.1.2/tests/unit/test_dex_store.py +338 -0
  47. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_domain_metrics_wiring.py +2 -45
  48. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_drift_scheduler.py +1 -1
  49. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_llm.py +1 -1
  50. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_llm_litellm_vllm.py +3 -3
  51. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_observability_langfuse.py +1 -1
  52. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_vectorstore.py +1 -1
  53. {dataenginex-1.1.1 → dataenginex-1.1.2}/uv.lock +751 -866
  54. dataenginex-1.1.1/.github/workflows/docker-build-push.yml +0 -63
  55. dataenginex-1.1.1/.github/workflows/docs-notify.yml +0 -18
  56. dataenginex-1.1.1/.github/workflows/label-sync.yml +0 -31
  57. dataenginex-1.1.1/.release-please-manifest.json +0 -3
  58. dataenginex-1.1.1/README.md +0 -157
  59. dataenginex-1.1.1/docs/api-reference/api.md +0 -5
  60. dataenginex-1.1.1/docs/api-reference/dashboard.md +0 -7
  61. dataenginex-1.1.1/docs/api-reference/ml.md +0 -5
  62. dataenginex-1.1.1/docs/architecture.md +0 -150
  63. dataenginex-1.1.1/docs/quickstart.md +0 -85
  64. dataenginex-1.1.1/release-please-config.json +0 -23
  65. dataenginex-1.1.1/sonar-project.properties +0 -22
  66. dataenginex-1.1.1/src/dataenginex/README.md +0 -63
  67. dataenginex-1.1.1/src/dataenginex/__init__.py +0 -85
  68. dataenginex-1.1.1/src/dataenginex/api/__init__.py +0 -73
  69. dataenginex-1.1.1/src/dataenginex/api/auth.py +0 -242
  70. dataenginex-1.1.1/src/dataenginex/api/errors.py +0 -77
  71. dataenginex-1.1.1/src/dataenginex/api/factory.py +0 -245
  72. dataenginex-1.1.1/src/dataenginex/api/health.py +0 -147
  73. dataenginex-1.1.1/src/dataenginex/api/jwks.py +0 -155
  74. dataenginex-1.1.1/src/dataenginex/api/ldap_sync.py +0 -170
  75. dataenginex-1.1.1/src/dataenginex/api/rate_limit.py +0 -131
  76. dataenginex-1.1.1/src/dataenginex/api/rbac.py +0 -91
  77. dataenginex-1.1.1/src/dataenginex/api/routers/__init__.py +0 -9
  78. dataenginex-1.1.1/src/dataenginex/api/routers/ai.py +0 -192
  79. dataenginex-1.1.1/src/dataenginex/api/routers/data.py +0 -154
  80. dataenginex-1.1.1/src/dataenginex/api/routers/health.py +0 -26
  81. dataenginex-1.1.1/src/dataenginex/api/routers/ml.py +0 -182
  82. dataenginex-1.1.1/src/dataenginex/api/routers/pipelines.py +0 -194
  83. dataenginex-1.1.1/src/dataenginex/api/routers/root.py +0 -25
  84. dataenginex-1.1.1/src/dataenginex/api/routers/system.py +0 -48
  85. dataenginex-1.1.1/src/dataenginex/api/scim.py +0 -629
  86. dataenginex-1.1.1/src/dataenginex/cli/serve.py +0 -48
  87. dataenginex-1.1.1/src/dataenginex/middleware/__init__.py +0 -67
  88. dataenginex-1.1.1/src/dataenginex/middleware/metrics_middleware.py +0 -87
  89. dataenginex-1.1.1/src/dataenginex/middleware/request_logging.py +0 -88
  90. dataenginex-1.1.1/src/dataenginex/middleware/tracing.py +0 -104
  91. dataenginex-1.1.1/src/dataenginex/ml/__init__.py +0 -105
  92. dataenginex-1.1.1/src/dataenginex/orchestration/__init__.py +0 -10
  93. dataenginex-1.1.1/tests/integration/test_api_middleware_integration.py +0 -375
  94. dataenginex-1.1.1/tests/integration/test_full_app.py +0 -89
  95. dataenginex-1.1.1/tests/unit/test_ai_router.py +0 -96
  96. dataenginex-1.1.1/tests/unit/test_ai_router_extended.py +0 -273
  97. dataenginex-1.1.1/tests/unit/test_api_auth.py +0 -100
  98. dataenginex-1.1.1/tests/unit/test_api_factory.py +0 -168
  99. dataenginex-1.1.1/tests/unit/test_api_jwks.py +0 -179
  100. dataenginex-1.1.1/tests/unit/test_api_rate_limit.py +0 -86
  101. dataenginex-1.1.1/tests/unit/test_api_rbac.py +0 -109
  102. dataenginex-1.1.1/tests/unit/test_api_scim.py +0 -162
  103. dataenginex-1.1.1/tests/unit/test_data_router.py +0 -117
  104. dataenginex-1.1.1/tests/unit/test_errors.py +0 -64
  105. dataenginex-1.1.1/tests/unit/test_health.py +0 -126
  106. dataenginex-1.1.1/tests/unit/test_metrics.py +0 -139
  107. dataenginex-1.1.1/tests/unit/test_middleware.py +0 -62
  108. dataenginex-1.1.1/tests/unit/test_ml_router.py +0 -122
  109. dataenginex-1.1.1/tests/unit/test_pipeline_router_extended.py +0 -252
  110. dataenginex-1.1.1/tests/unit/test_security_extended.py +0 -379
  111. dataenginex-1.1.1/tests/unit/test_system_router.py +0 -65
  112. dataenginex-1.1.1/tests/unit/test_tracing.py +0 -90
  113. {dataenginex-1.1.1 → dataenginex-1.1.2}/.claude/commands/new-feature.md +0 -0
  114. {dataenginex-1.1.1 → dataenginex-1.1.2}/.claude/commands/validate.md +0 -0
  115. {dataenginex-1.1.1 → dataenginex-1.1.2}/.claude/settings.json +0 -0
  116. {dataenginex-1.1.1 → dataenginex-1.1.2}/.env.template +0 -0
  117. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  118. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  119. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  120. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  121. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/dependabot.yml +0 -0
  122. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/labels.yml +0 -0
  123. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/release-pr-template.md +0 -0
  124. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/workflows/auto-pr.yml +0 -0
  125. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/workflows/enforce-dev-to-main.yml +0 -0
  126. {dataenginex-1.1.1 → dataenginex-1.1.2}/.github/workflows/security.yml +0 -0
  127. {dataenginex-1.1.1 → dataenginex-1.1.2}/.gitleaks.toml +0 -0
  128. {dataenginex-1.1.1 → dataenginex-1.1.2}/.pre-commit-config.yaml +0 -0
  129. {dataenginex-1.1.1 → dataenginex-1.1.2}/.python-version +0 -0
  130. {dataenginex-1.1.1 → dataenginex-1.1.2}/CHANGELOG.md +0 -0
  131. {dataenginex-1.1.1 → dataenginex-1.1.2}/CLAUDE.md +0 -0
  132. {dataenginex-1.1.1 → dataenginex-1.1.2}/CODEOWNERS +0 -0
  133. {dataenginex-1.1.1 → dataenginex-1.1.2}/CODE_OF_CONDUCT.md +0 -0
  134. {dataenginex-1.1.1 → dataenginex-1.1.2}/CONTRIBUTING.md +0 -0
  135. {dataenginex-1.1.1 → dataenginex-1.1.2}/Dockerfile +0 -0
  136. {dataenginex-1.1.1 → dataenginex-1.1.2}/LICENSE +0 -0
  137. {dataenginex-1.1.1 → dataenginex-1.1.2}/SECURITY.md +0 -0
  138. {dataenginex-1.1.1 → dataenginex-1.1.2}/docker-compose.test.yml +0 -0
  139. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/adr/0000-template.md +0 -0
  140. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/adr/0001-medallion-architecture.md +0 -0
  141. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/api-reference/core.md +0 -0
  142. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/api-reference/data.md +0 -0
  143. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/api-reference/index.md +0 -0
  144. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/api-reference/lakehouse.md +0 -0
  145. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/api-reference/middleware.md +0 -0
  146. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/api-reference/plugins.md +0 -0
  147. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/api-reference/warehouse.md +0 -0
  148. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/contributing.md +0 -0
  149. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/index.md +0 -0
  150. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/sdlc.md +0 -0
  151. {dataenginex-1.1.1 → dataenginex-1.1.2}/docs/security-scanning.md +0 -0
  152. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/01_hello_pipeline.py +0 -0
  153. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/03_quality_gate.py +0 -0
  154. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/04_ml_training.py +0 -0
  155. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/05_rag_demo.py +0 -0
  156. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/06_llm_quickstart.py +0 -0
  157. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/07_api_ingestion.py +0 -0
  158. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/08_spark_ml.py +0 -0
  159. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/09_feature_engineering.py +0 -0
  160. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/10_model_analysis.py +0 -0
  161. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/dashboard/dashboard_config.yaml +0 -0
  162. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/dashboard/run_dashboard.py +0 -0
  163. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/data/events.csv +0 -0
  164. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/data/users.csv +0 -0
  165. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/dex.yaml +0 -0
  166. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/ecommerce/data/customers.csv +0 -0
  167. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/ecommerce/data/orders.csv +0 -0
  168. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/ecommerce/data/products.csv +0 -0
  169. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/ecommerce/dex.yaml +0 -0
  170. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/ecommerce/run_all.py +0 -0
  171. {dataenginex-1.1.1 → dataenginex-1.1.2}/examples/movies.csv +0 -0
  172. {dataenginex-1.1.1 → dataenginex-1.1.2}/poe_tasks.toml +0 -0
  173. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/RELEASE_NOTES.md +0 -0
  174. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/agents/__init__.py +0 -0
  175. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/memory/__init__.py +0 -0
  176. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/memory/base.py +0 -0
  177. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/memory/episodic.py +0 -0
  178. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/memory/long_term.py +0 -0
  179. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/observability/__init__.py +0 -0
  180. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/observability/audit.py +0 -0
  181. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/observability/cost.py +0 -0
  182. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/observability/metrics.py +0 -0
  183. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/retrieval/__init__.py +0 -0
  184. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/retrieval/builtin.py +0 -0
  185. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/retrieval/graph.py +0 -0
  186. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/routing/__init__.py +0 -0
  187. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/routing/anthropic.py +0 -0
  188. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/routing/huggingface.py +0 -0
  189. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/routing/ollama.py +0 -0
  190. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/routing/openai.py +0 -0
  191. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/routing/router.py +0 -0
  192. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/__init__.py +0 -0
  193. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/checkpoint.py +0 -0
  194. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/executor.py +0 -0
  195. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/runtime/sandbox.py +0 -0
  196. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/tools/__init__.py +0 -0
  197. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/tools/builtin.py +0 -0
  198. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/__init__.py +0 -0
  199. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/conditions.py +0 -0
  200. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/dag.py +0 -0
  201. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ai/workflows/human_loop.py +0 -0
  202. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/api/pagination.py +0 -0
  203. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/api/schemas.py +0 -0
  204. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/cli/__init__.py +0 -0
  205. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/cli/run.py +0 -0
  206. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/cli/train.py +0 -0
  207. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/config/__init__.py +0 -0
  208. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/config/loader.py +0 -0
  209. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/core/__init__.py +0 -0
  210. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/core/exceptions.py +0 -0
  211. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/core/interfaces.py +0 -0
  212. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/core/medallion_architecture.py +0 -0
  213. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/core/quality.py +0 -0
  214. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/core/registry.py +0 -0
  215. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/core/schemas.py +0 -0
  216. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/core/validators.py +0 -0
  217. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/__init__.py +0 -0
  218. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/connectors/__init__.py +0 -0
  219. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/connectors/csv.py +0 -0
  220. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/connectors/duckdb.py +0 -0
  221. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/connectors/legacy.py +0 -0
  222. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/__init__.py +0 -0
  223. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/dag.py +0 -0
  224. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/pipeline/run_history.py +0 -0
  225. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/profiler.py +0 -0
  226. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/quality/__init__.py +0 -0
  227. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/quality/spark.py +0 -0
  228. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/registry.py +0 -0
  229. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/transforms/__init__.py +0 -0
  230. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/data/transforms/sql.py +0 -0
  231. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/lakehouse/__init__.py +0 -0
  232. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/lakehouse/catalog.py +0 -0
  233. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/lakehouse/partitioning.py +0 -0
  234. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/lakehouse/storage.py +0 -0
  235. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/middleware/domain_metrics.py +0 -0
  236. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/middleware/logging_config.py +0 -0
  237. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/middleware/metrics.py +0 -0
  238. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/drift.py +0 -0
  239. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/features/__init__.py +0 -0
  240. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/features/builtin.py +0 -0
  241. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/metrics.py +0 -0
  242. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/mlflow_registry.py +0 -0
  243. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/registry.py +0 -0
  244. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/serving.py +0 -0
  245. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/serving_engine/__init__.py +0 -0
  246. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/tracking/__init__.py +0 -0
  247. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/tracking/builtin.py +0 -0
  248. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/ml/training.py +0 -0
  249. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/orchestration/builtin.py +0 -0
  250. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/plugins/__init__.py +0 -0
  251. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/plugins/registry.py +0 -0
  252. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/py.typed +0 -0
  253. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/secops/__init__.py +0 -0
  254. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/secops/audit.py +0 -0
  255. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/secops/gate.py +0 -0
  256. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/secops/masking.py +0 -0
  257. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/secops/pii.py +0 -0
  258. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/warehouse/__init__.py +0 -0
  259. {dataenginex-1.1.1 → dataenginex-1.1.2}/src/dataenginex/warehouse/transforms.py +0 -0
  260. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/__init__.py +0 -0
  261. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/conformance/__init__.py +0 -0
  262. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/conformance/test_connector.py +0 -0
  263. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/conformance/test_feature_store.py +0 -0
  264. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/conformance/test_tracker.py +0 -0
  265. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/conformance/test_transform.py +0 -0
  266. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/conftest.py +0 -0
  267. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/fixtures/__init__.py +0 -0
  268. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/fixtures/sample_data.py +0 -0
  269. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/fixtures/sample_jobs.csv +0 -0
  270. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/fixtures/sample_jobs.json +0 -0
  271. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/integration/__init__.py +0 -0
  272. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/integration/test_ai_integration.py +0 -0
  273. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/integration/test_cli_run.py +0 -0
  274. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/integration/test_config_cli.py +0 -0
  275. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/integration/test_lineage_integration.py +0 -0
  276. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/integration/test_ml_integration.py +0 -0
  277. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/integration/test_pipeline_e2e.py +0 -0
  278. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/integration/test_secops_integration.py +0 -0
  279. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/integration/test_storage_real.py +0 -0
  280. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/load/__init__.py +0 -0
  281. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/__init__.py +0 -0
  282. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_agent_runtime.py +0 -0
  283. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_ai_modules.py +0 -0
  284. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_api_pagination.py +0 -0
  285. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_api_schemas.py +0 -0
  286. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_api_validators.py +0 -0
  287. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_builtin_agent.py +0 -0
  288. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_builtin_feature_store.py +0 -0
  289. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_builtin_tracker.py +0 -0
  290. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_cli_train.py +0 -0
  291. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_core_exceptions.py +0 -0
  292. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_core_interfaces.py +0 -0
  293. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_core_registry.py +0 -0
  294. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_csv_connector.py +0 -0
  295. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_data.py +0 -0
  296. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_domain_metrics.py +0 -0
  297. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_duckdb_connector.py +0 -0
  298. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_lakehouse.py +0 -0
  299. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_logging.py +0 -0
  300. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_medallion.py +0 -0
  301. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_ml.py +0 -0
  302. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_pipeline_dag.py +0 -0
  303. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_pipeline_runner.py +0 -0
  304. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_plugins.py +0 -0
  305. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_quality_gates.py +0 -0
  306. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_quality_spark.py +0 -0
  307. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_retriever.py +0 -0
  308. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_retriever_graph.py +0 -0
  309. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_run_history.py +0 -0
  310. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_scheduler.py +0 -0
  311. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_secops.py +0 -0
  312. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_spark_fixtures.py +0 -0
  313. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_sql_transforms.py +0 -0
  314. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_storage_abstraction.py +0 -0
  315. {dataenginex-1.1.1 → dataenginex-1.1.2}/tests/unit/test_warehouse.py +0 -0
@@ -20,7 +20,7 @@ jobs:
20
20
  runs-on: ubuntu-latest
21
21
  steps:
22
22
  - uses: actions/checkout@v6
23
- - uses: astral-sh/setup-uv@v7
23
+ - uses: astral-sh/setup-uv@v8.1.0
24
24
  with:
25
25
  version: "latest"
26
26
  python-version: "3.13"
@@ -35,7 +35,7 @@ jobs:
35
35
  needs: quality
36
36
  steps:
37
37
  - uses: actions/checkout@v6
38
- - uses: astral-sh/setup-uv@v7
38
+ - uses: astral-sh/setup-uv@v8.1.0
39
39
  with:
40
40
  version: "latest"
41
41
  python-version: "3.13"
@@ -43,7 +43,7 @@ jobs:
43
43
  env:
44
44
  UV_PROJECT_ENVIRONMENT: .venv
45
45
  - run: uv run poe test-cov-core
46
- - uses: codecov/codecov-action@v6
46
+ - uses: codecov/codecov-action@v5
47
47
  with:
48
48
  flags: dataenginex
49
49
  fail_ci_if_error: false
@@ -59,7 +59,7 @@ jobs:
59
59
  python-version: ["3.11", "3.12"]
60
60
  steps:
61
61
  - uses: actions/checkout@v6
62
- - uses: astral-sh/setup-uv@v7
62
+ - uses: astral-sh/setup-uv@v8.1.0
63
63
  with:
64
64
  version: "latest"
65
65
  python-version: ${{ matrix.python-version }}
@@ -13,12 +13,12 @@ jobs:
13
13
  contents: read
14
14
  steps:
15
15
  - uses: actions/checkout@v6
16
- - uses: astral-sh/setup-uv@v7
16
+ - uses: astral-sh/setup-uv@v8.1.0
17
17
  with:
18
18
  version: "latest"
19
19
  python-version: "3.13"
20
20
  - run: uv build
21
- - uses: actions/upload-artifact@v4
21
+ - uses: actions/upload-artifact@v7
22
22
  with:
23
23
  name: dist
24
24
  path: dist/
@@ -32,7 +32,7 @@ jobs:
32
32
  contents: read
33
33
  id-token: write
34
34
  steps:
35
- - uses: actions/download-artifact@v4
35
+ - uses: actions/download-artifact@v8
36
36
  with:
37
37
  name: dist
38
38
  path: dist/
@@ -46,7 +46,7 @@ jobs:
46
46
  contents: write
47
47
  steps:
48
48
  - uses: actions/checkout@v6
49
- - uses: astral-sh/setup-uv@v7
49
+ - uses: astral-sh/setup-uv@v8.1.0
50
50
  with:
51
51
  version: "latest"
52
52
  - name: Extract version
@@ -65,6 +65,7 @@ desktop.ini
65
65
  # Logs and databases
66
66
  *.log
67
67
  *.sqlite3
68
+ *.duckdb
68
69
  logs/
69
70
  .dex/
70
71
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataenginex
3
- Version: 1.1.1
3
+ Version: 1.1.2
4
4
  Summary: DataEngineX - Core framework for AI/ML/Data engineering projects
5
5
  Author-email: Jay <jayapal.myaka99@gmail.com>
6
6
  License: MIT License
@@ -31,27 +31,17 @@ Requires-Dist: asyncpg>=0.31.0
31
31
  Requires-Dist: click>=8.3.3
32
32
  Requires-Dist: croniter>=6.2.2
33
33
  Requires-Dist: duckdb>=1.5.2
34
- Requires-Dist: email-validator>=2.3.0
35
- Requires-Dist: fastapi>=0.136.1
36
34
  Requires-Dist: httpx>=0.28.1
37
- Requires-Dist: opentelemetry-api>=1.41.1
38
- Requires-Dist: opentelemetry-exporter-otlp>=1.41.1
39
- Requires-Dist: opentelemetry-instrumentation-fastapi>=0.62b1
40
- Requires-Dist: opentelemetry-sdk>=1.41.1
41
35
  Requires-Dist: prometheus-client>=0.25.0
42
36
  Requires-Dist: pyarrow>=23.0.1
43
37
  Requires-Dist: pydantic>=2.13.4
44
38
  Requires-Dist: python-dotenv>=1.2.2
45
39
  Requires-Dist: pyyaml>=6.0.3
46
- Requires-Dist: qdrant-client>=1.17.1
40
+ Requires-Dist: qdrant-client>=1.18.0
47
41
  Requires-Dist: redis>=5.3.1
48
42
  Requires-Dist: structlog>=25.5.0
49
- Requires-Dist: uvicorn>=0.46.0
50
- Provides-Extra: auth
51
- Requires-Dist: ldap3>=2.9.1; extra == 'auth'
52
- Requires-Dist: pyjwt[crypto]>=2.12.1; extra == 'auth'
53
43
  Provides-Extra: cloud
54
- Requires-Dist: boto3>=1.43.4; extra == 'cloud'
44
+ Requires-Dist: boto3>=1.43.7; extra == 'cloud'
55
45
  Requires-Dist: google-cloud-bigquery>=3.41.0; extra == 'cloud'
56
46
  Requires-Dist: google-cloud-storage>=3.10.1; extra == 'cloud'
57
47
  Provides-Extra: observability
@@ -60,44 +50,72 @@ Description-Content-Type: text/markdown
60
50
 
61
51
  # dataenginex
62
52
 
63
- Unified Data + ML + AI framework. Config-driven, self-hosted, production-ready.
53
+ Unified Data + ML + AI **library**. Config-driven, self-hosted, production-ready.
54
+
55
+ `dataenginex` is a pure Python library — no HTTP server. Your application owns the server layer.
64
56
 
65
57
  ## Install
66
58
 
67
59
  ```bash
68
- # Core (DuckDB, FastAPI, structlog, Pydantic, Click, Rich)
60
+ # Core (DuckDB, structlog, Pydantic, Click, arq, asyncpg, qdrant-client)
69
61
  pip install dataenginex
70
62
 
71
- # With optional extras
72
- pip install dataenginex[dagster] # Dagster orchestration
73
- pip install dataenginex[mlflow] # MLflow experiment tracking
74
- pip install dataenginex[agents] # LangGraph agent runtime
75
- pip install dataenginex[vectors] # Qdrant + LanceDB vector stores
76
- pip install dataenginex[embeddings] # sentence-transformers + ONNX
77
- pip install dataenginex[spark] # PySpark transforms
78
- pip install dataenginex[cloud] # S3 + GCS storage backends
79
- pip install dataenginex[all] # Everything
63
+ # Optional extras
64
+ pip install "dataenginex[cloud]" # S3 + GCS + BigQuery storage backends
65
+ pip install "dataenginex[observability]" # Langfuse LLM call tracing
80
66
  ```
81
67
 
68
+ > **LiteLLM:** Install separately — it pins `python-dotenv==1.0.1` which conflicts
69
+ > with dataenginex's `python-dotenv>=1.2.2`.
70
+ > ```bash
71
+ > pip install 'litellm>=1.83.3' --no-deps
72
+ > ```
73
+
82
74
  ## Submodules
83
75
 
84
- | Module | Requires Extra | Description |
85
- |--------|---------------|-------------|
86
- | `dataenginex.config` | — | dex.yaml schema, loader, env var resolution, layering |
87
- | `dataenginex.core` | — | Exceptions, interfaces (10 Base* ABCs), backend registry |
88
- | `dataenginex.cli` | — | `dex` CLI (validate, version, init, serve) |
89
- | `dataenginex.api` | — | FastAPI app, auth (JWT), health, rate limiting |
90
- | `dataenginex.data` | | Connectors, schema registry, profiler |
91
- | `dataenginex.ml` | | Training, model registry, serving, drift detection |
92
- | `dataenginex.middleware` | | Structured logging, Prometheus metrics, tracing |
93
- | `dataenginex.lakehouse` | optional `[cloud]` | Storage backends (local, S3, GCS), catalog |
94
- | `dataenginex.warehouse` | | SQL/Spark transforms, lineage |
95
- | `dataenginex.plugins` | | Plugin system (entry-point discovery) |
76
+ | Module | Description |
77
+ |--------|-------------|
78
+ | `dataenginex.engine` | `DexEngine` single entry point; loads config, inits store, wires all backends |
79
+ | `dataenginex.store` | `DexStore` DuckDB-backed persistence (`.dex/store.duckdb`) |
80
+ | `dataenginex.config` | `dex.yaml` schema, loader, env var resolution, layering |
81
+ | `dataenginex.core` | Exceptions, `Base*` ABCs, `BackendRegistry` |
82
+ | `dataenginex.cli` | `dex` CLI (`validate`, `version`, `init`) |
83
+ | `dataenginex.api` | HTTP helpers: error types, response models (no server bundled) |
84
+ | `dataenginex.data` | Connectors, pipeline runner, schema registry, profiler |
85
+ | `dataenginex.ml` | Classical ML: training, model registry, serving, drift detection |
86
+ | `dataenginex.ai` | LLM providers, agents, RAG, vectorstore, memory, observability |
87
+ | `dataenginex.orchestration` | `DriftScheduler`, background scheduling |
88
+ | `dataenginex.middleware` | structlog config, Prometheus metrics |
89
+ | `dataenginex.lakehouse` | Storage backends (local, S3, GCS), catalog, partitioning |
90
+ | `dataenginex.warehouse` | SQL transforms, lineage tracking |
91
+ | `dataenginex.plugins` | Entry-point plugin discovery |
96
92
 
97
93
  ## Quick Usage
98
94
 
99
95
  ```python
100
- # Config system
96
+ from pathlib import Path
97
+ from dataenginex.engine import DexEngine
98
+
99
+ # Load config and initialize all backends
100
+ engine = DexEngine(Path("dex.yaml"))
101
+
102
+ # Data
103
+ engine.run_pipeline("clean_users")
104
+ sources = list(engine.config.data.sources.keys())
105
+
106
+ # ML
107
+ models = engine.model_registry.list_models()
108
+ result = engine.model_registry.predict("churn_model", features)
109
+
110
+ # AI
111
+ response = engine.agents["assistant"].chat("summarize the latest run")
112
+
113
+ # Persistence (DuckDB)
114
+ runs = engine.store.list_pipeline_runs(limit=10)
115
+ ```
116
+
117
+ ```python
118
+ # Config system only
101
119
  from dataenginex.config import load_config
102
120
  cfg = load_config(Path("dex.yaml"))
103
121
 
@@ -105,15 +123,12 @@ cfg = load_config(Path("dex.yaml"))
105
123
  from dataenginex.core.interfaces import BaseConnector
106
124
  from dataenginex.core.registry import BackendRegistry
107
125
 
108
- # Exceptions
109
- from dataenginex.core.exceptions import DataEngineXError, BackendNotInstalledError
110
-
111
126
  # ML
112
- from dataenginex.ml import ModelRegistry
127
+ from dataenginex.ml import ModelRegistry, SklearnTrainer
113
128
 
114
- # CLI
115
- # dex validate dex.yaml
116
- # dex version
129
+ # AI
130
+ from dataenginex.ai.llm import get_llm_provider
131
+ from dataenginex.ai.vectorstore import VectorStoreBackend
117
132
  ```
118
133
 
119
134
  ## Source and Docs
@@ -0,0 +1,161 @@
1
+ # DEX — Data + ML + AI Framework
2
+
3
+ [![CI](https://github.com/TheDataEngineX/DEX/actions/workflows/ci.yml/badge.svg?branch=dev)](https://github.com/TheDataEngineX/DEX/actions/workflows/ci.yml)
4
+ [![PyPI](https://img.shields.io/pypi/v/dataenginex)](https://pypi.org/project/dataenginex/)
5
+ [![Python 3.13+](https://img.shields.io/badge/python-3.13+-blue.svg)](https://www.python.org/downloads/)
6
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
7
+
8
+ Unified Data + ML + AI **library**. One `dex.yaml` defines your entire project — from data ingestion through ML training to AI agents. Self-hosted, config-driven, production-ready.
9
+
10
+ `dataenginex` is a **pure Python library**. It has no HTTP server. Your application owns the server layer.
11
+
12
+ ______________________________________________________________________
13
+
14
+ ## Quick Start
15
+
16
+ ```bash
17
+ pip install dataenginex
18
+ ```
19
+
20
+ ```python
21
+ from dataenginex.engine import DexEngine
22
+
23
+ engine = DexEngine("dex.yaml") # loads config, inits DuckDB store
24
+ engine.run_pipeline("clean_users") # execute a pipeline
25
+ models = engine.model_registry.list_models()
26
+ ```
27
+
28
+ ```bash
29
+ # Development
30
+ git clone https://github.com/TheDataEngineX/DEX && cd DEX
31
+ uv run poe check-all # lint + typecheck + tests
32
+ dex validate dex.yaml # validate a config file
33
+ dex version # show version + environment
34
+ ```
35
+
36
+ ______________________________________________________________________
37
+
38
+ ## What It Does
39
+
40
+ ```
41
+ dex.yaml
42
+ ├── data: CSV/Parquet/DuckDB → transforms → quality checks
43
+ ├── ml: Experiment tracking → training → serving → drift detection
44
+ ├── ai: LLM providers → retrieval (BM25/dense/hybrid) → agents
45
+ └── observability: structlog + Prometheus metrics
46
+ ```
47
+
48
+ **Opinionated defaults, swappable backends.** Everything works out of the box with
49
+ built-in implementations. Swap any layer for industry tools via optional extras:
50
+
51
+ ```bash
52
+ pip install "dataenginex[cloud]" # S3 + GCS storage backends
53
+ pip install "dataenginex[observability]" # Langfuse LLM tracing
54
+ ```
55
+
56
+ ______________________________________________________________________
57
+
58
+ ## Project Structure
59
+
60
+ ```
61
+ dataenginex/
62
+ ├── src/dataenginex/
63
+ │ ├── cli/ # dex CLI (validate, version, init)
64
+ │ ├── config/ # dex.yaml schema, loader, env var resolution
65
+ │ ├── core/ # Exceptions, interfaces (Base* ABCs), registry
66
+ │ ├── engine.py # DexEngine — single entry point for applications
67
+ │ ├── store.py # DexStore — DuckDB-backed persistence (.dex/store.duckdb)
68
+ │ ├── api/ # HTTP helpers: error types, response models (no server)
69
+ │ ├── data/ # Connectors, schema registry, profiler, pipeline runner
70
+ │ ├── ml/ # Classical ML: training, registry, serving, drift
71
+ │ ├── ai/ # LLM providers, agents, RAG, vectorstore, observability
72
+ │ ├── orchestration/ # DriftScheduler, background workers
73
+ │ ├── middleware/ # structlog config, Prometheus metrics (library use)
74
+ │ ├── lakehouse/ # Catalog, partitioning, storage backends
75
+ │ ├── warehouse/ # SQL transforms, lineage
76
+ │ └── plugins/ # Plugin system (entry-point discovery)
77
+
78
+ ├── examples/ # Runnable examples + dex.yaml templates
79
+ ├── tests/ # Unit + integration tests
80
+ ├── docs/ # MkDocs documentation
81
+ └── pyproject.toml # Package config (version source of truth)
82
+ ```
83
+
84
+ ______________________________________________________________________
85
+
86
+ ## Architecture
87
+
88
+ ```
89
+ dex.yaml → DexEngine.__init__
90
+
91
+ ├── config/ load + validate → DexConfig
92
+ ├── store/ DexStore (.dex/store.duckdb)
93
+ ├── data/ register sources + pipelines
94
+ ├── ml/ model registry + serving
95
+ └── ai/ LLM providers + agents
96
+ ```
97
+
98
+ **Backend Registry Pattern:** Every subsystem has a `Base*` ABC + `BackendRegistry`.
99
+ Built-in backends work out of the box. Extras implement the same interface.
100
+
101
+ **Tech Stack:**
102
+
103
+ | Layer | Built-in | Optional Extra |
104
+ |-------|----------|----------------|
105
+ | Data Engine | DuckDB | PySpark |
106
+ | Orchestration | croniter scheduler | Dagster |
107
+ | ML Tracking | JSON-based tracker | MLflow |
108
+ | Model Serving | Built-in predictor | — |
109
+ | LLM | Ollama / LiteLLM / vLLM | Any OpenAI-compatible |
110
+ | Vector Store | DuckDB VSS | Qdrant |
111
+ | Retrieval | BM25 + Dense + Hybrid | — |
112
+ | Persistence | DuckDB (`.dex/store.duckdb`) | — |
113
+ | Logging | structlog | — |
114
+
115
+ ______________________________________________________________________
116
+
117
+ ## Development
118
+
119
+ See [docs/development.md](docs/development.md) for full setup.
120
+
121
+ ```bash
122
+ uv run poe check-all # lint + typecheck + tests
123
+ uv run poe lint-fix # auto-fix lint issues
124
+ uv run poe test-cov # tests + coverage report
125
+ ```
126
+
127
+ ______________________________________________________________________
128
+
129
+ ## Documentation
130
+
131
+ | Guide | Description |
132
+ |-------|-------------|
133
+ | [Quickstart](docs/quickstart.md) | Get running in 5 minutes |
134
+ | [Architecture](docs/architecture.md) | System design and patterns |
135
+ | [Development](docs/development.md) | Local setup and workflow |
136
+ | [API Reference](docs/api-reference/index.md) | Auto-generated module docs |
137
+
138
+ > Docs: [docs.thedataenginex.org](https://docs.thedataenginex.org)
139
+
140
+ ______________________________________________________________________
141
+
142
+ ## The DEX Ecosystem
143
+
144
+ ```
145
+ TheDataEngineX/
146
+ ├── dataenginex — Core library (this repo, PyPI)
147
+ ├── dex-studio — Web UI (FastAPI + Jinja2) — single pane of glass
148
+ └── infradex — Terraform + Helm + K3s deployment
149
+ ```
150
+
151
+ dex-studio imports `dataenginex` directly — no HTTP server required.
152
+
153
+ ______________________________________________________________________
154
+
155
+ ## License
156
+
157
+ MIT License. See [LICENSE](LICENSE).
158
+
159
+ ______________________________________________________________________
160
+
161
+ **Version**: [![PyPI](https://img.shields.io/pypi/v/dataenginex)](https://pypi.org/project/dataenginex/) | **License**: MIT | **Python**: 3.13+
@@ -0,0 +1,63 @@
1
+ # dataenginex.api
2
+
3
+ HTTP helpers — error types, response models, and shared utilities.
4
+
5
+ `dataenginex` does **not** bundle a FastAPI server. This module provides the building blocks
6
+ for applications (like DEX Studio or your own FastAPI app) that want to expose DEX functionality
7
+ over HTTP.
8
+
9
+ ## Error Types
10
+
11
+ ```python
12
+ from dataenginex.api.errors import (
13
+ NotFoundError,
14
+ ValidationError,
15
+ ConflictError,
16
+ ServiceUnavailableError,
17
+ )
18
+ ```
19
+
20
+ Standard Pydantic response models for HTTP error responses, usable in any FastAPI app:
21
+
22
+ ```python
23
+ from fastapi import FastAPI
24
+ from dataenginex.api.errors import NotFoundError
25
+
26
+ app = FastAPI()
27
+
28
+ @app.get("/pipelines/{name}")
29
+ def get_pipeline(name: str):
30
+ pipeline = engine.config.data.pipelines.get(name)
31
+ if pipeline is None:
32
+ raise NotFoundError(detail=f"Pipeline '{name}' not found")
33
+ return pipeline
34
+ ```
35
+
36
+ ## Building an HTTP Layer
37
+
38
+ To expose DEX functionality over HTTP, create a FastAPI app in your application
39
+ and call `DexEngine` directly:
40
+
41
+ ```python
42
+ from fastapi import FastAPI
43
+ from dataenginex.engine import DexEngine
44
+
45
+ engine = DexEngine("dex.yaml")
46
+ app = FastAPI()
47
+
48
+ @app.get("/health")
49
+ def health():
50
+ return engine.health()
51
+
52
+ @app.get("/pipelines")
53
+ def list_pipelines():
54
+ return list(engine.config.data.pipelines.keys())
55
+
56
+ @app.post("/pipelines/{name}/run")
57
+ def run_pipeline(name: str):
58
+ return engine.run_pipeline(name)
59
+ ```
60
+
61
+ See `examples/02_api_quickstart.py` for a minimal working example.
62
+
63
+ ::: dataenginex.api
@@ -0,0 +1,8 @@
1
+ ---
2
+ title: dashboard (removed)
3
+ description: The Streamlit dashboard module was removed in v1.0
4
+ ---
5
+
6
+ The `dataenginex.dashboard` Streamlit module was removed in **v1.0**.
7
+
8
+ Use [DEX Studio](https://github.com/TheDataEngineX/dex-studio) instead — a full-featured Reflex web UI for data pipelines, ML experiments, and AI agents.
@@ -0,0 +1,45 @@
1
+ # dataenginex.ml
2
+
3
+ Classical ML — training, model registry, drift detection, and model serving.
4
+
5
+ LLM providers, vector stores, agents, and RAG live in `dataenginex.ai`.
6
+ The drift scheduler lives in `dataenginex.orchestration`.
7
+
8
+ ## Module Split
9
+
10
+ | Concern | Module |
11
+ |---------|--------|
12
+ | Training, registry, serving, drift | `dataenginex.ml` |
13
+ | LLM providers, chat, embeddings | `dataenginex.ai.llm` |
14
+ | Vector stores | `dataenginex.ai.vectorstore` |
15
+ | Background drift scheduling | `dataenginex.orchestration.scheduler` |
16
+
17
+ ## Quick Usage
18
+
19
+ ```python
20
+ from dataenginex.ml import (
21
+ SklearnTrainer, TrainingResult,
22
+ ModelRegistry, ModelArtifact, ModelStage,
23
+ DriftDetector, DriftReport,
24
+ ModelServer, PredictionRequest, PredictionResponse,
25
+ )
26
+
27
+ # Train
28
+ trainer = SklearnTrainer(experiment_name="churn")
29
+ result: TrainingResult = trainer.train(X_train, y_train)
30
+
31
+ # Register
32
+ registry = ModelRegistry()
33
+ registry.register(result.model, name="churn_v1", stage=ModelStage.STAGING)
34
+
35
+ # Drift
36
+ detector = DriftDetector(reference=X_train)
37
+ report: DriftReport = detector.detect(X_new)
38
+
39
+ # Serve
40
+ server = ModelServer()
41
+ server.load("churn_v1", stage=ModelStage.PRODUCTION)
42
+ resp = server.predict(PredictionRequest(features={"age": 35}))
43
+ ```
44
+
45
+ ::: dataenginex.ml