dataenginex 1.0.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. {dataenginex-1.0.0 → dataenginex-1.0.2}/.gitignore +1 -0
  2. {dataenginex-1.0.0 → dataenginex-1.0.2}/PKG-INFO +20 -15
  3. {dataenginex-1.0.0 → dataenginex-1.0.2}/pyproject.toml +43 -23
  4. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/agents/builtin.py +10 -0
  5. dataenginex-1.0.2/src/dataenginex/ai/observability/__init__.py +19 -0
  6. dataenginex-1.0.2/src/dataenginex/ai/observability/langfuse.py +191 -0
  7. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/retrieval/__init__.py +3 -0
  8. dataenginex-1.0.2/src/dataenginex/ai/retrieval/graph.py +159 -0
  9. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/__init__.py +13 -1
  10. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/auth.py +64 -4
  11. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/factory.py +8 -0
  12. dataenginex-1.0.2/src/dataenginex/api/jwks.py +155 -0
  13. dataenginex-1.0.2/src/dataenginex/api/ldap_sync.py +170 -0
  14. dataenginex-1.0.2/src/dataenginex/api/rbac.py +91 -0
  15. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/ai.py +10 -2
  16. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/ml.py +25 -4
  17. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/pipelines.py +20 -4
  18. dataenginex-1.0.2/src/dataenginex/api/scim.py +391 -0
  19. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/pipeline/runner.py +11 -0
  20. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/middleware/__init__.py +21 -0
  21. dataenginex-1.0.2/src/dataenginex/middleware/domain_metrics.py +77 -0
  22. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/llm.py +158 -2
  23. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/scheduler.py +7 -0
  24. dataenginex-1.0.2/tests/integration/test_ai_integration.py +427 -0
  25. dataenginex-1.0.2/tests/integration/test_api_middleware_integration.py +375 -0
  26. dataenginex-1.0.2/tests/integration/test_lineage_integration.py +391 -0
  27. dataenginex-1.0.2/tests/integration/test_ml_integration.py +277 -0
  28. dataenginex-1.0.2/tests/integration/test_secops_integration.py +316 -0
  29. dataenginex-1.0.2/tests/unit/test_ai_router_extended.py +273 -0
  30. dataenginex-1.0.2/tests/unit/test_api_jwks.py +179 -0
  31. dataenginex-1.0.2/tests/unit/test_api_rbac.py +109 -0
  32. dataenginex-1.0.2/tests/unit/test_api_scim.py +162 -0
  33. dataenginex-1.0.2/tests/unit/test_domain_metrics.py +98 -0
  34. dataenginex-1.0.2/tests/unit/test_domain_metrics_wiring.py +138 -0
  35. dataenginex-1.0.2/tests/unit/test_llm_litellm_vllm.py +102 -0
  36. dataenginex-1.0.2/tests/unit/test_observability_langfuse.py +116 -0
  37. dataenginex-1.0.2/tests/unit/test_pipeline_router_extended.py +247 -0
  38. dataenginex-1.0.2/tests/unit/test_retriever_graph.py +89 -0
  39. dataenginex-1.0.2/tests/unit/test_security_extended.py +379 -0
  40. dataenginex-1.0.2/uv.lock +4929 -0
  41. dataenginex-1.0.0/src/dataenginex/ai/observability/__init__.py +0 -9
  42. dataenginex-1.0.0/uv.lock +0 -4376
  43. {dataenginex-1.0.0 → dataenginex-1.0.2}/.claude/commands/new-feature.md +0 -0
  44. {dataenginex-1.0.0 → dataenginex-1.0.2}/.claude/commands/validate.md +0 -0
  45. {dataenginex-1.0.0 → dataenginex-1.0.2}/.claude/settings.json +0 -0
  46. {dataenginex-1.0.0 → dataenginex-1.0.2}/.env.template +0 -0
  47. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  48. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  49. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  50. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  51. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/dependabot.yml +0 -0
  52. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/labels.yml +0 -0
  53. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/release-pr-template.md +0 -0
  54. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/auto-pr-dev-to-main.yml +0 -0
  55. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/auto-pr-to-dev.yml +0 -0
  56. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/ci.yml +0 -0
  57. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/claude.yml +0 -0
  58. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/docker-build-push.yml +0 -0
  59. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/docs-notify.yml +0 -0
  60. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/enforce-dev-to-main.yml +0 -0
  61. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/label-sync.yml +0 -0
  62. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/pypi-publish.yml +0 -0
  63. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/release-dex.yml +0 -0
  64. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/release-please.yml +0 -0
  65. {dataenginex-1.0.0 → dataenginex-1.0.2}/.github/workflows/security.yml +0 -0
  66. {dataenginex-1.0.0 → dataenginex-1.0.2}/.gitleaks.toml +0 -0
  67. {dataenginex-1.0.0 → dataenginex-1.0.2}/.pre-commit-config.yaml +0 -0
  68. {dataenginex-1.0.0 → dataenginex-1.0.2}/.python-version +0 -0
  69. {dataenginex-1.0.0 → dataenginex-1.0.2}/.release-please-manifest.json +0 -0
  70. {dataenginex-1.0.0 → dataenginex-1.0.2}/CHANGELOG.md +0 -0
  71. {dataenginex-1.0.0 → dataenginex-1.0.2}/CLAUDE.md +0 -0
  72. {dataenginex-1.0.0 → dataenginex-1.0.2}/CODEOWNERS +0 -0
  73. {dataenginex-1.0.0 → dataenginex-1.0.2}/CODE_OF_CONDUCT.md +0 -0
  74. {dataenginex-1.0.0 → dataenginex-1.0.2}/CONTRIBUTING.md +0 -0
  75. {dataenginex-1.0.0 → dataenginex-1.0.2}/Dockerfile +0 -0
  76. {dataenginex-1.0.0 → dataenginex-1.0.2}/LICENSE +0 -0
  77. {dataenginex-1.0.0 → dataenginex-1.0.2}/README.md +0 -0
  78. {dataenginex-1.0.0 → dataenginex-1.0.2}/SECURITY.md +0 -0
  79. {dataenginex-1.0.0 → dataenginex-1.0.2}/docker-compose.test.yml +0 -0
  80. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/adr/0000-template.md +0 -0
  81. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/adr/0001-medallion-architecture.md +0 -0
  82. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/api-reference/api.md +0 -0
  83. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/api-reference/core.md +0 -0
  84. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/api-reference/dashboard.md +0 -0
  85. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/api-reference/data.md +0 -0
  86. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/api-reference/index.md +0 -0
  87. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/api-reference/lakehouse.md +0 -0
  88. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/api-reference/middleware.md +0 -0
  89. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/api-reference/ml.md +0 -0
  90. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/api-reference/plugins.md +0 -0
  91. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/api-reference/warehouse.md +0 -0
  92. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/architecture.md +0 -0
  93. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/ci-cd.md +0 -0
  94. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/contributing.md +0 -0
  95. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/development.md +0 -0
  96. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/index.md +0 -0
  97. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/observability.md +0 -0
  98. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/quickstart.md +0 -0
  99. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/release-notes.md +0 -0
  100. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/roadmap/project-roadmap.csv +0 -0
  101. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/roadmap/project-roadmap.json +0 -0
  102. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/sdlc.md +0 -0
  103. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/security-scanning.md +0 -0
  104. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-21-phase-0-foundation.md +0 -0
  105. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-22-dataenginex-1.0-master-plan.md +0 -0
  106. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-22-phase-1-data-layer.md +0 -0
  107. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-22-phase-6a-dex-engine-integration.md +0 -0
  108. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-22-phase-6b-dex-studio-redesign.md +0 -0
  109. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-23-careerdex-example.md +0 -0
  110. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-23-dex-studio-direct-import.md +0 -0
  111. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-24-dex-naming-architecture-dry.md +0 -0
  112. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/plans/2026-03-28-unified-docs-and-tooling.md +0 -0
  113. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/specs/2026-03-21-dataenginex-v2-system-redesign.md +0 -0
  114. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/specs/2026-03-22-phase-6-integration-design.md +0 -0
  115. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/specs/2026-03-23-dex-studio-direct-import-design.md +0 -0
  116. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/specs/2026-03-24-dex-naming-architecture-dry-design.md +0 -0
  117. {dataenginex-1.0.0 → dataenginex-1.0.2}/docs/superpowers/specs/2026-03-28-unified-docs-and-tooling-design.md +0 -0
  118. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/01_hello_pipeline.py +0 -0
  119. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/02_api_quickstart.py +0 -0
  120. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/03_quality_gate.py +0 -0
  121. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/04_ml_training.py +0 -0
  122. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/05_rag_demo.py +0 -0
  123. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/06_llm_quickstart.py +0 -0
  124. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/07_api_ingestion.py +0 -0
  125. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/08_spark_ml.py +0 -0
  126. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/09_feature_engineering.py +0 -0
  127. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/10_model_analysis.py +0 -0
  128. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/GUIDE.md +0 -0
  129. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/dashboard/dashboard_config.yaml +0 -0
  130. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/dashboard/run_dashboard.py +0 -0
  131. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/dex.yaml +0 -0
  132. {dataenginex-1.0.0 → dataenginex-1.0.2}/examples/movies.csv +0 -0
  133. {dataenginex-1.0.0 → dataenginex-1.0.2}/poe_tasks.toml +0 -0
  134. {dataenginex-1.0.0 → dataenginex-1.0.2}/release-please-config.json +0 -0
  135. {dataenginex-1.0.0 → dataenginex-1.0.2}/scripts/GUIDE.md +0 -0
  136. {dataenginex-1.0.0 → dataenginex-1.0.2}/scripts/localstack/create-buckets.sh +0 -0
  137. {dataenginex-1.0.0 → dataenginex-1.0.2}/scripts/promote.sh +0 -0
  138. {dataenginex-1.0.0 → dataenginex-1.0.2}/sonar-project.properties +0 -0
  139. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/README.md +0 -0
  140. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/RELEASE_NOTES.md +0 -0
  141. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/__init__.py +0 -0
  142. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/__init__.py +0 -0
  143. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/agents/__init__.py +0 -0
  144. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/memory/__init__.py +0 -0
  145. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/memory/base.py +0 -0
  146. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/memory/episodic.py +0 -0
  147. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/memory/long_term.py +0 -0
  148. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/observability/audit.py +0 -0
  149. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/observability/cost.py +0 -0
  150. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/observability/metrics.py +0 -0
  151. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/retrieval/builtin.py +0 -0
  152. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/routing/__init__.py +0 -0
  153. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/routing/anthropic.py +0 -0
  154. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/routing/huggingface.py +0 -0
  155. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/routing/ollama.py +0 -0
  156. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/routing/openai.py +0 -0
  157. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/routing/router.py +0 -0
  158. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/runtime/__init__.py +0 -0
  159. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/runtime/checkpoint.py +0 -0
  160. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/runtime/executor.py +0 -0
  161. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/runtime/sandbox.py +0 -0
  162. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/tools/__init__.py +0 -0
  163. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/tools/builtin.py +0 -0
  164. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/workflows/__init__.py +0 -0
  165. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/workflows/conditions.py +0 -0
  166. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/workflows/dag.py +0 -0
  167. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ai/workflows/human_loop.py +0 -0
  168. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/errors.py +0 -0
  169. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/health.py +0 -0
  170. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/pagination.py +0 -0
  171. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/rate_limit.py +0 -0
  172. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/__init__.py +0 -0
  173. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/data.py +0 -0
  174. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/health.py +0 -0
  175. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/root.py +0 -0
  176. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/routers/system.py +0 -0
  177. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/api/schemas.py +0 -0
  178. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/cli/__init__.py +0 -0
  179. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/cli/main.py +0 -0
  180. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/cli/run.py +0 -0
  181. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/cli/serve.py +0 -0
  182. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/cli/train.py +0 -0
  183. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/config/__init__.py +0 -0
  184. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/config/defaults.py +0 -0
  185. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/config/loader.py +0 -0
  186. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/config/schema.py +0 -0
  187. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/core/__init__.py +0 -0
  188. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/core/exceptions.py +0 -0
  189. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/core/interfaces.py +0 -0
  190. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/core/medallion_architecture.py +0 -0
  191. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/core/quality.py +0 -0
  192. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/core/registry.py +0 -0
  193. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/core/schemas.py +0 -0
  194. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/core/validators.py +0 -0
  195. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/dashboard/__init__.py +0 -0
  196. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/dashboard/app.py +0 -0
  197. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/dashboard/panels.py +0 -0
  198. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/__init__.py +0 -0
  199. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/connectors/__init__.py +0 -0
  200. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/connectors/csv.py +0 -0
  201. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/connectors/duckdb.py +0 -0
  202. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/connectors/legacy.py +0 -0
  203. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/pipeline/__init__.py +0 -0
  204. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/pipeline/dag.py +0 -0
  205. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/pipeline/run_history.py +0 -0
  206. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/profiler.py +0 -0
  207. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/quality/__init__.py +0 -0
  208. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/quality/gates.py +0 -0
  209. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/quality/spark.py +0 -0
  210. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/registry.py +0 -0
  211. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/transforms/__init__.py +0 -0
  212. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/data/transforms/sql.py +0 -0
  213. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/lakehouse/__init__.py +0 -0
  214. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/lakehouse/catalog.py +0 -0
  215. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/lakehouse/partitioning.py +0 -0
  216. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/lakehouse/storage.py +0 -0
  217. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/middleware/logging_config.py +0 -0
  218. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/middleware/metrics.py +0 -0
  219. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/middleware/metrics_middleware.py +0 -0
  220. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/middleware/request_logging.py +0 -0
  221. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/middleware/tracing.py +0 -0
  222. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/__init__.py +0 -0
  223. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/drift.py +0 -0
  224. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/features/__init__.py +0 -0
  225. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/features/builtin.py +0 -0
  226. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/metrics.py +0 -0
  227. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/mlflow_registry.py +0 -0
  228. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/registry.py +0 -0
  229. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/serving.py +0 -0
  230. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/serving_engine/__init__.py +0 -0
  231. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/serving_engine/builtin.py +0 -0
  232. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/tracking/__init__.py +0 -0
  233. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/tracking/builtin.py +0 -0
  234. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/training.py +0 -0
  235. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/ml/vectorstore.py +0 -0
  236. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/orchestration/__init__.py +0 -0
  237. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/orchestration/builtin.py +0 -0
  238. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/plugins/__init__.py +0 -0
  239. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/plugins/registry.py +0 -0
  240. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/py.typed +0 -0
  241. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/secops/__init__.py +0 -0
  242. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/secops/audit.py +0 -0
  243. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/secops/gate.py +0 -0
  244. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/secops/masking.py +0 -0
  245. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/secops/pii.py +0 -0
  246. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/warehouse/__init__.py +0 -0
  247. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/warehouse/lineage.py +0 -0
  248. {dataenginex-1.0.0 → dataenginex-1.0.2}/src/dataenginex/warehouse/transforms.py +0 -0
  249. {dataenginex-1.0.0 → dataenginex-1.0.2}/tasks/findings.md +0 -0
  250. {dataenginex-1.0.0 → dataenginex-1.0.2}/tasks/lessons.md +0 -0
  251. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/__init__.py +0 -0
  252. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/conformance/__init__.py +0 -0
  253. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/conformance/test_connector.py +0 -0
  254. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/conformance/test_feature_store.py +0 -0
  255. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/conformance/test_tracker.py +0 -0
  256. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/conformance/test_transform.py +0 -0
  257. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/conftest.py +0 -0
  258. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/fixtures/__init__.py +0 -0
  259. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/fixtures/sample_data.py +0 -0
  260. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/fixtures/sample_jobs.csv +0 -0
  261. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/fixtures/sample_jobs.json +0 -0
  262. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/integration/__init__.py +0 -0
  263. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/integration/test_cli_run.py +0 -0
  264. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/integration/test_config_cli.py +0 -0
  265. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/integration/test_full_app.py +0 -0
  266. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/integration/test_pipeline_e2e.py +0 -0
  267. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/integration/test_storage_real.py +0 -0
  268. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/load/__init__.py +0 -0
  269. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/__init__.py +0 -0
  270. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_agent_runtime.py +0 -0
  271. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_ai_modules.py +0 -0
  272. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_ai_router.py +0 -0
  273. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_api_auth.py +0 -0
  274. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_api_factory.py +0 -0
  275. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_api_pagination.py +0 -0
  276. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_api_rate_limit.py +0 -0
  277. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_api_schemas.py +0 -0
  278. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_api_validators.py +0 -0
  279. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_builtin_agent.py +0 -0
  280. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_builtin_feature_store.py +0 -0
  281. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_builtin_serving.py +0 -0
  282. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_builtin_tracker.py +0 -0
  283. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_cli_train.py +0 -0
  284. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_config_loader.py +0 -0
  285. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_config_schema.py +0 -0
  286. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_core_exceptions.py +0 -0
  287. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_core_interfaces.py +0 -0
  288. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_core_registry.py +0 -0
  289. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_csv_connector.py +0 -0
  290. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_data.py +0 -0
  291. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_data_router.py +0 -0
  292. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_drift_scheduler.py +0 -0
  293. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_duckdb_connector.py +0 -0
  294. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_errors.py +0 -0
  295. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_health.py +0 -0
  296. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_lakehouse.py +0 -0
  297. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_llm.py +0 -0
  298. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_logging.py +0 -0
  299. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_medallion.py +0 -0
  300. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_metrics.py +0 -0
  301. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_middleware.py +0 -0
  302. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_ml.py +0 -0
  303. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_ml_router.py +0 -0
  304. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_pipeline_dag.py +0 -0
  305. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_pipeline_runner.py +0 -0
  306. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_plugins.py +0 -0
  307. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_quality_gates.py +0 -0
  308. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_quality_spark.py +0 -0
  309. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_retriever.py +0 -0
  310. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_run_history.py +0 -0
  311. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_scheduler.py +0 -0
  312. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_secops.py +0 -0
  313. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_spark_fixtures.py +0 -0
  314. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_sql_transforms.py +0 -0
  315. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_storage_abstraction.py +0 -0
  316. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_system_router.py +0 -0
  317. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_tracing.py +0 -0
  318. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_vectorstore.py +0 -0
  319. {dataenginex-1.0.0 → dataenginex-1.0.2}/tests/unit/test_warehouse.py +0 -0
@@ -90,3 +90,4 @@ Notes.txt
90
90
 
91
91
  # AI Coding Assistant
92
92
  .continue/
93
+ .playwright-mcp/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataenginex
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: DataEngineX - Core framework for AI/ML/Data engineering projects
5
5
  Author-email: Jay <jayapal.myaka99@gmail.com>
6
6
  License: MIT License
@@ -26,28 +26,33 @@ License: MIT License
26
26
  SOFTWARE.
27
27
  License-File: LICENSE
28
28
  Requires-Python: >=3.13
29
- Requires-Dist: click>=8.3.1
30
- Requires-Dist: croniter>=6.0.0
31
- Requires-Dist: duckdb>=1.5.0
29
+ Requires-Dist: click>=8.3.2
30
+ Requires-Dist: croniter>=6.2.2
31
+ Requires-Dist: duckdb>=1.5.2
32
32
  Requires-Dist: email-validator>=2.3.0
33
- Requires-Dist: fastapi>=0.135.1
34
- Requires-Dist: httpx>=0.28.0
35
- Requires-Dist: opentelemetry-api>=1.40.0
36
- Requires-Dist: opentelemetry-exporter-otlp>=1.40.0
37
- Requires-Dist: opentelemetry-instrumentation-fastapi>=0.61b0
38
- Requires-Dist: opentelemetry-sdk>=1.40.0
39
- Requires-Dist: prometheus-client>=0.24.1
33
+ Requires-Dist: fastapi>=0.136.0
34
+ Requires-Dist: httpx>=0.28.1
35
+ Requires-Dist: opentelemetry-api>=1.41.0
36
+ Requires-Dist: opentelemetry-exporter-otlp>=1.41.0
37
+ Requires-Dist: opentelemetry-instrumentation-fastapi>=0.62b0
38
+ Requires-Dist: opentelemetry-sdk>=1.41.0
39
+ Requires-Dist: prometheus-client>=0.25.0
40
40
  Requires-Dist: pyarrow>=23.0.1
41
- Requires-Dist: pydantic>=2.10.0
42
- Requires-Dist: python-dotenv>=1.2.1
41
+ Requires-Dist: pydantic>=2.13.2
42
+ Requires-Dist: python-dotenv>=1.2.2
43
43
  Requires-Dist: pyyaml>=6.0.3
44
- Requires-Dist: rich>=14.3.3
44
+ Requires-Dist: rich>=15.0.0
45
45
  Requires-Dist: structlog>=25.5.0
46
- Requires-Dist: uvicorn>=0.42.0
46
+ Requires-Dist: uvicorn>=0.44.0
47
+ Provides-Extra: auth
48
+ Requires-Dist: ldap3>=2.9.1; extra == 'auth'
49
+ Requires-Dist: pyjwt[crypto]>=2.9.0; extra == 'auth'
47
50
  Provides-Extra: cloud
48
51
  Requires-Dist: boto3>=1.42.0; extra == 'cloud'
49
52
  Requires-Dist: google-cloud-bigquery>=3.40.0; extra == 'cloud'
50
53
  Requires-Dist: google-cloud-storage>=3.0.0; extra == 'cloud'
54
+ Provides-Extra: observability
55
+ Requires-Dist: langfuse>=4.3.1; extra == 'observability'
51
56
  Description-Content-Type: text/markdown
52
57
 
53
58
  # dataenginex
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dataenginex"
3
- version = "1.0.0"
3
+ version = "1.0.2"
4
4
  description = "DataEngineX - Core framework for AI/ML/Data engineering projects"
5
5
  authors = [
6
6
  {name = "Jay", email = "jayapal.myaka99@gmail.com"}
@@ -9,24 +9,24 @@ readme = "src/dataenginex/README.md"
9
9
  requires-python = ">=3.13"
10
10
  license = {file = "LICENSE"}
11
11
  dependencies = [
12
- "pydantic>=2.10.0",
13
- "python-dotenv>=1.2.1",
12
+ "pydantic>=2.13.2",
13
+ "python-dotenv>=1.2.2",
14
14
  "pyyaml>=6.0.3",
15
- "click>=8.3.1",
16
- "rich>=14.3.3",
17
- "duckdb>=1.5.0",
18
- "croniter>=6.0.0",
19
- "httpx>=0.28.0",
20
- "prometheus-client>=0.24.1",
15
+ "click>=8.3.2",
16
+ "rich>=15.0.0",
17
+ "duckdb>=1.5.2",
18
+ "croniter>=6.2.2",
19
+ "httpx>=0.28.1",
20
+ "prometheus-client>=0.25.0",
21
21
  # API framework
22
- "fastapi>=0.135.1",
23
- "uvicorn>=0.42.0",
22
+ "fastapi>=0.136.0",
23
+ "uvicorn>=0.44.0",
24
24
  "structlog>=25.5.0",
25
25
  # OpenTelemetry
26
- "opentelemetry-api>=1.40.0",
27
- "opentelemetry-sdk>=1.40.0",
28
- "opentelemetry-instrumentation-fastapi>=0.61b0",
29
- "opentelemetry-exporter-otlp>=1.40.0",
26
+ "opentelemetry-api>=1.41.0",
27
+ "opentelemetry-sdk>=1.41.0",
28
+ "opentelemetry-instrumentation-fastapi>=0.62b0",
29
+ "opentelemetry-exporter-otlp>=1.41.0",
30
30
  "email-validator>=2.3.0",
31
31
  # Data formats
32
32
  "pyarrow>=23.0.1",
@@ -41,22 +41,36 @@ cloud = [
41
41
  "google-cloud-storage>=3.0.0",
42
42
  "google-cloud-bigquery>=3.40.0",
43
43
  ]
44
+ auth = [
45
+ "pyjwt[crypto]>=2.9.0",
46
+ "ldap3>=2.9.1",
47
+ ]
48
+ # NOTE: litellm is NOT declared here because it pins python-dotenv==1.0.1, which
49
+ # conflicts with our python-dotenv>=1.2.1. Install separately for LiteLLMProvider:
50
+ # pip install 'litellm>=1.83.3' --no-deps
51
+ # or manage via a dedicated venv. The provider is lazy-imported; DEX runs fine
52
+ # without it.
53
+ observability = [
54
+ "langfuse>=4.3.1",
55
+ ]
44
56
 
45
57
  [dependency-groups]
46
58
  dev = [
47
- "pytest>=9.0.2",
48
- "pytest-cov>=7.0.0",
59
+ "pytest>=9.0.3",
60
+ "pytest-cov>=7.1.0",
49
61
  "pytest-asyncio>=1.3.0",
50
- "ruff>=0.15.6",
51
- "mypy>=1.19.1",
62
+ "ruff>=0.15.11",
63
+ "mypy>=1.20.1",
52
64
  "types-pyyaml>=6.0.12.20250915",
53
65
  "types-requests>=2.32.4.20260107",
54
- "poethepoet>=0.42.1",
55
- "uv>=0.10.0",
56
- "mkdocs-material>=9.7.0",
57
- "mkdocstrings[python]>=0.29.0",
66
+ "poethepoet>=0.44.0",
67
+ "uv>=0.11.6",
68
+ "mkdocs-material>=9.7.6",
69
+ "mkdocstrings[python]>=1.0.4",
58
70
  "pre-commit>=4.5.1",
59
71
  "pip-audit>=2.10.0",
72
+ "pyjwt[crypto]>=2.12.1",
73
+ "cryptography>=46.0.7",
60
74
  ]
61
75
  data = [
62
76
  "pyspark>=4.1.1",
@@ -141,6 +155,10 @@ ignore_missing_imports = true
141
155
  module = ["sentence_transformers.*"]
142
156
  ignore_missing_imports = true
143
157
 
158
+ [[tool.mypy.overrides]]
159
+ module = ["litellm.*", "langfuse.*", "ldap3.*"]
160
+ ignore_missing_imports = true
161
+
144
162
  [[tool.mypy.overrides]]
145
163
  module = ["pyspark.*"]
146
164
  ignore_missing_imports = true
@@ -169,6 +187,8 @@ filterwarnings = [
169
187
  "error", # Treat all warnings as errors by default
170
188
  "ignore::DeprecationWarning:websockets\\.legacy", # uvicorn uses deprecated websockets.legacy API
171
189
  "ignore::DeprecationWarning:uvicorn\\.protocols", # uvicorn imports deprecated WebSocketServerProtocol
190
+ "ignore:unclosed.*socket:ResourceWarning", # Starlette TestClient GC timing
191
+ "ignore:unclosed event loop:ResourceWarning", # Starlette TestClient GC timing
172
192
  ]
173
193
 
174
194
  [tool.coverage.run]
@@ -17,6 +17,10 @@ from dataenginex.ai.agents import agent_registry
17
17
  from dataenginex.ai.tools import ToolRegistry, tool_registry
18
18
  from dataenginex.ai.tools.builtin import register_builtin_tools
19
19
  from dataenginex.core.interfaces import BaseAgentRuntime
20
+ from dataenginex.middleware.domain_metrics import (
21
+ ai_agent_iterations,
22
+ ai_tool_calls_total,
23
+ )
20
24
 
21
25
  logger = structlog.get_logger()
22
26
 
@@ -38,12 +42,14 @@ class BuiltinAgentRuntime(BaseAgentRuntime):
38
42
  system_prompt: str = "You are a helpful data engineering assistant.",
39
43
  tools: ToolRegistry | None = None,
40
44
  max_iterations: int = 10,
45
+ name: str = "builtin",
41
46
  **kwargs: Any,
42
47
  ) -> None:
43
48
  self._llm = llm
44
49
  self._system_prompt = system_prompt
45
50
  self._tools = tools or tool_registry
46
51
  self._max_iterations = max_iterations
52
+ self._name = name
47
53
  self._history: list[dict[str, str]] = []
48
54
  register_builtin_tools()
49
55
 
@@ -64,6 +70,7 @@ class BuiltinAgentRuntime(BaseAgentRuntime):
64
70
  if step_result.get("done", False):
65
71
  response = str(step_result.get("response", ""))
66
72
  self._history.append({"role": "assistant", "content": response})
73
+ ai_agent_iterations.labels(agent=self._name).observe(iterations)
67
74
  return {"response": response, "iterations": iterations, "tool_calls": tool_calls}
68
75
 
69
76
  # If tool was called, continue the loop
@@ -73,6 +80,7 @@ class BuiltinAgentRuntime(BaseAgentRuntime):
73
80
  # Hit max iterations
74
81
  final = "I've reached my reasoning limit. Here's what I have so far."
75
82
  self._history.append({"role": "assistant", "content": final})
83
+ ai_agent_iterations.labels(agent=self._name).observe(self._max_iterations)
76
84
  return {"response": final, "iterations": self._max_iterations, "tool_calls": tool_calls}
77
85
 
78
86
  async def step(self, message: str, **kwargs: Any) -> dict[str, Any]:
@@ -134,8 +142,10 @@ class BuiltinAgentRuntime(BaseAgentRuntime):
134
142
  try:
135
143
  result = self._tools.call(tool_name, **args)
136
144
  observation = f"Tool '{tool_name}' returned: {result}"
145
+ ai_tool_calls_total.labels(tool=tool_name, status="ok").inc()
137
146
  except Exception as e:
138
147
  observation = f"Tool '{tool_name}' failed: {e}"
148
+ ai_tool_calls_total.labels(tool=tool_name, status="error").inc()
139
149
 
140
150
  self._history.append(
141
151
  {"role": "assistant", "content": f"[tool: {tool_name}] {observation}"},
@@ -0,0 +1,19 @@
1
+ """Observability — audit logging, cost tracking, metrics, Langfuse tracing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataenginex.ai.observability.audit import AuditEntry, AuditLog
6
+ from dataenginex.ai.observability.cost import CostTracker, TokenUsage
7
+ from dataenginex.ai.observability.langfuse import LangfuseSink, get_sink, trace_generation
8
+ from dataenginex.ai.observability.metrics import AgentMetrics
9
+
10
+ __all__ = [
11
+ "AgentMetrics",
12
+ "AuditEntry",
13
+ "AuditLog",
14
+ "CostTracker",
15
+ "LangfuseSink",
16
+ "TokenUsage",
17
+ "get_sink",
18
+ "trace_generation",
19
+ ]
@@ -0,0 +1,191 @@
1
+ """Langfuse tracing sink — ship LLM traces to self-hosted or cloud Langfuse.
2
+
3
+ Records :class:`LLMResponse` calls as generation spans in Langfuse. Falls
4
+ back to a no-op when the ``langfuse`` optional dependency is missing or the
5
+ sink is disabled via env var.
6
+
7
+ Configuration (env vars)::
8
+
9
+ DEX_LANGFUSE_ENABLED — "true" to activate the sink (default "false")
10
+ LANGFUSE_PUBLIC_KEY — project public key
11
+ LANGFUSE_SECRET_KEY — project secret key
12
+ LANGFUSE_HOST — endpoint (default https://cloud.langfuse.com)
13
+
14
+ Install::
15
+
16
+ uv sync --group observability
17
+
18
+ Example::
19
+
20
+ from dataenginex.ai.observability.langfuse import trace_generation
21
+
22
+ with trace_generation(name="summarise", model="gpt-4o") as ctx:
23
+ ctx["input"] = prompt
24
+ ctx["response"] = llm.generate(prompt)
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import os
30
+ import time
31
+ from collections.abc import Iterator
32
+ from contextlib import contextmanager
33
+ from typing import Any
34
+
35
+ import structlog
36
+
37
+ from dataenginex.ml.llm import LLMResponse
38
+
39
+ logger = structlog.get_logger()
40
+
41
+ __all__ = ["LangfuseSink", "get_sink", "trace_generation"]
42
+
43
+
44
+ class LangfuseSink:
45
+ """Thin wrapper over the Langfuse v4 SDK client.
46
+
47
+ All public methods are safe to call unconditionally — when langfuse is
48
+ not installed or the sink is disabled, they are no-ops.
49
+
50
+ Args:
51
+ public_key: Langfuse project public key; falls back to
52
+ ``LANGFUSE_PUBLIC_KEY`` env var.
53
+ secret_key: Langfuse project secret key; falls back to
54
+ ``LANGFUSE_SECRET_KEY`` env var.
55
+ host: Langfuse endpoint; falls back to ``LANGFUSE_HOST`` env var
56
+ (default ``https://cloud.langfuse.com``).
57
+ enabled: Override the ``DEX_LANGFUSE_ENABLED`` env var.
58
+ """
59
+
60
+ def __init__(
61
+ self,
62
+ public_key: str | None = None,
63
+ secret_key: str | None = None,
64
+ host: str | None = None,
65
+ enabled: bool | None = None,
66
+ ) -> None:
67
+ self._enabled = self._resolve_enabled(enabled)
68
+ self._client: Any = None
69
+ if not self._enabled:
70
+ return
71
+ try:
72
+ from langfuse import Langfuse
73
+ except ImportError:
74
+ logger.warning(
75
+ "langfuse not installed — tracing disabled; "
76
+ "install with: uv sync --group observability",
77
+ )
78
+ self._enabled = False
79
+ return
80
+ try:
81
+ self._client = Langfuse(
82
+ public_key=public_key or os.getenv("LANGFUSE_PUBLIC_KEY", ""),
83
+ secret_key=secret_key or os.getenv("LANGFUSE_SECRET_KEY", ""),
84
+ host=host or os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com"),
85
+ )
86
+ logger.info(
87
+ "langfuse sink initialised",
88
+ host=host or os.getenv("LANGFUSE_HOST", "https://cloud.langfuse.com"),
89
+ )
90
+ except Exception as exc: # noqa: BLE001 — langfuse init surface varies
91
+ logger.warning("langfuse init failed", error=str(exc))
92
+ self._enabled = False
93
+
94
+ @staticmethod
95
+ def _resolve_enabled(explicit: bool | None) -> bool:
96
+ if explicit is not None:
97
+ return explicit
98
+ return os.getenv("DEX_LANGFUSE_ENABLED", "false").lower() == "true"
99
+
100
+ @property
101
+ def enabled(self) -> bool:
102
+ """Whether the sink will emit traces on call."""
103
+ return self._enabled and self._client is not None
104
+
105
+ def trace_generation(
106
+ self,
107
+ *,
108
+ name: str,
109
+ model: str,
110
+ input_messages: list[dict[str, str]] | str,
111
+ response: LLMResponse,
112
+ metadata: dict[str, Any] | None = None,
113
+ user_id: str | None = None,
114
+ ) -> None:
115
+ """Record a single generation event in Langfuse."""
116
+ if not self.enabled:
117
+ return
118
+ try:
119
+ with self._client.start_as_current_generation(
120
+ name=name,
121
+ model=model,
122
+ input=input_messages,
123
+ ) as gen:
124
+ gen.update(
125
+ output=response.text,
126
+ usage_details={
127
+ "input": response.prompt_tokens,
128
+ "output": response.completion_tokens,
129
+ "total": response.total_tokens,
130
+ },
131
+ metadata=metadata or {},
132
+ )
133
+ if user_id:
134
+ gen.update_trace(user_id=user_id)
135
+ except Exception: # noqa: BLE001 — never let tracing break the request
136
+ logger.exception("langfuse trace failed", name=name)
137
+
138
+ def flush(self) -> None:
139
+ """Flush buffered traces (safe to call on shutdown)."""
140
+ if not self.enabled:
141
+ return
142
+ try:
143
+ self._client.flush()
144
+ except Exception: # noqa: BLE001
145
+ logger.exception("langfuse flush failed")
146
+
147
+
148
+ _GLOBAL_SINK: LangfuseSink | None = None
149
+
150
+
151
+ def get_sink() -> LangfuseSink:
152
+ """Return the process-global :class:`LangfuseSink`, creating it lazily."""
153
+ global _GLOBAL_SINK # noqa: PLW0603 — process-wide singleton
154
+ if _GLOBAL_SINK is None:
155
+ _GLOBAL_SINK = LangfuseSink()
156
+ return _GLOBAL_SINK
157
+
158
+
159
+ @contextmanager
160
+ def trace_generation(
161
+ name: str,
162
+ model: str,
163
+ **metadata: Any,
164
+ ) -> Iterator[dict[str, Any]]:
165
+ """Context manager that emits a Langfuse trace on exit.
166
+
167
+ The caller populates ``ctx["input"]`` and ``ctx["response"]`` inside the
168
+ block. When the block exits cleanly, the trace is shipped; exceptions
169
+ propagate unchanged.
170
+
171
+ Example::
172
+
173
+ with trace_generation("summarise", model="gpt-4o", agent="builtin") as ctx:
174
+ ctx["input"] = prompt
175
+ ctx["response"] = llm.generate(prompt)
176
+ """
177
+ ctx: dict[str, Any] = {"_start": time.monotonic(), "metadata": dict(metadata)}
178
+ try:
179
+ yield ctx
180
+ finally:
181
+ sink = get_sink()
182
+ response = ctx.get("response")
183
+ if sink.enabled and isinstance(response, LLMResponse):
184
+ sink.trace_generation(
185
+ name=name,
186
+ model=model,
187
+ input_messages=ctx.get("input", ""),
188
+ response=response,
189
+ metadata=ctx.get("metadata", {}),
190
+ user_id=ctx.get("user_id"),
191
+ )
@@ -1,6 +1,7 @@
1
1
  """Retriever registry.
2
2
 
3
3
  Built-in retriever supports dense, sparse (BM25), and hybrid strategies.
4
+ Graph retriever adds LightRAG-style dual-level (entity + semantic) retrieval.
4
5
  """
5
6
 
6
7
  from __future__ import annotations
@@ -9,3 +10,5 @@ from dataenginex.core.interfaces import BaseRetriever
9
10
  from dataenginex.core.registry import BackendRegistry
10
11
 
11
12
  retriever_registry: BackendRegistry[BaseRetriever] = BackendRegistry("retriever")
13
+
14
+ __all__ = ["retriever_registry"]
@@ -0,0 +1,159 @@
1
+ """Graph-augmented retriever — dual-level (entity + semantic) RAG.
2
+
3
+ Inspired by LightRAG (EMNLP 2025). Retrieves at two complementary levels
4
+ and fuses the rankings via reciprocal rank fusion (RRF):
5
+
6
+ 1. **Entity-level** (high-level, high recall) — pull docs mentioning the
7
+ named entities of the query.
8
+ 2. **Semantic-level** (low-level, high precision) — dense vector similarity
9
+ against the embedded query.
10
+
11
+ Entity extraction defaults to a lightweight rule-based extractor
12
+ (capitalised tokens and bigrams). Supply ``entity_extractor`` for LLM-driven
13
+ NER or domain-specific extraction.
14
+
15
+ Example::
16
+
17
+ from dataenginex.ai.retrieval.graph import GraphRetriever
18
+
19
+ retriever = GraphRetriever(
20
+ vector_store=store,
21
+ embed_fn=embed_fn,
22
+ documents=docs,
23
+ )
24
+ hits = retriever.retrieve("How does Snowflake handle schema drift?", top_k=5)
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import re
30
+ from collections import Counter
31
+ from collections.abc import Callable
32
+ from typing import Any
33
+
34
+ import structlog
35
+
36
+ from dataenginex.ai.retrieval import retriever_registry
37
+ from dataenginex.ai.retrieval.builtin import _BM25, _rrf
38
+ from dataenginex.core.interfaces import BaseRetriever
39
+
40
+ logger = structlog.get_logger()
41
+
42
+ __all__ = ["GraphRetriever", "default_extract_entities"]
43
+
44
+
45
+ _ENTITY_REGEX = re.compile(r"\b[A-Z][a-zA-Z0-9\-]+(?:\s+[A-Z][a-zA-Z0-9\-]+){0,3}\b")
46
+ _STOPWORDS: frozenset[str] = frozenset(
47
+ {"The", "This", "That", "A", "An", "Is", "Are", "Was", "Were", "I", "We", "You", "It", "If"},
48
+ )
49
+
50
+
51
+ def default_extract_entities(text: str) -> list[str]:
52
+ """Rule-based entity extractor — capitalised tokens and multi-word phrases."""
53
+ return [m for m in _ENTITY_REGEX.findall(text) if m not in _STOPWORDS]
54
+
55
+
56
+ @retriever_registry.decorator("graph")
57
+ class GraphRetriever(BaseRetriever):
58
+ """Dual-level (entity + semantic) retriever.
59
+
60
+ Args:
61
+ vector_store: :class:`BaseVectorStore` for dense search. When ``None``
62
+ the semantic level falls back to BM25.
63
+ embed_fn: Callable mapping text → embedding vector.
64
+ entity_extractor: Entity extractor. Defaults to
65
+ :func:`default_extract_entities`.
66
+ documents: Initial documents to index (each dict must have ``"text"``).
67
+ """
68
+
69
+ def __init__(
70
+ self,
71
+ vector_store: Any = None,
72
+ embed_fn: Any = None,
73
+ entity_extractor: Callable[[str], list[str]] | None = None,
74
+ documents: list[dict[str, Any]] | None = None,
75
+ **_kwargs: Any,
76
+ ) -> None:
77
+ self._store = vector_store
78
+ self._embed_fn = embed_fn
79
+ self._extract = entity_extractor or default_extract_entities
80
+ self._docs: list[dict[str, Any]] = []
81
+ self._entity_index: dict[str, set[int]] = {}
82
+ self._bm25 = _BM25()
83
+ if documents:
84
+ self.index(documents)
85
+
86
+ def index(self, documents: list[dict[str, Any]]) -> None:
87
+ """Index documents for dual-level retrieval."""
88
+ self._docs = documents
89
+ self._entity_index = {}
90
+ for idx, doc in enumerate(documents):
91
+ for ent in self._extract(doc.get("text", "")):
92
+ self._entity_index.setdefault(ent.lower(), set()).add(idx)
93
+ self._bm25.index(documents)
94
+ if self._store is not None and self._embed_fn is not None:
95
+ for doc in documents:
96
+ embedding = self._embed_fn(doc.get("text", ""))
97
+ self._store.add(
98
+ ids=[doc.get("id", str(id(doc)))],
99
+ embeddings=[embedding],
100
+ documents=[doc.get("text", "")],
101
+ metadata=[doc.get("metadata", {})],
102
+ )
103
+ logger.info(
104
+ "graph retriever indexed",
105
+ docs=len(documents),
106
+ entities=len(self._entity_index),
107
+ )
108
+
109
+ def retrieve(
110
+ self,
111
+ query: str,
112
+ top_k: int = 10,
113
+ **kwargs: Any,
114
+ ) -> list[dict[str, Any]]:
115
+ """Retrieve top_k docs by fusing entity-level and semantic-level rankings."""
116
+ pool = max(top_k * 2, top_k)
117
+ entity_ranks = self._entity_level(query, pool)
118
+ semantic_ranks = self._semantic_level(query, pool)
119
+ fused = _rrf(entity_ranks, semantic_ranks)[:top_k]
120
+ return [
121
+ {**self._docs[idx], "score": score, "method": "graph"}
122
+ for idx, score in fused
123
+ if 0 <= idx < len(self._docs)
124
+ ]
125
+
126
+ def _entity_level(self, query: str, top_k: int) -> list[tuple[int, float]]:
127
+ """High-level ranking: docs containing query entities, weighted by overlap."""
128
+ query_entities = [e.lower() for e in self._extract(query)]
129
+ if not query_entities:
130
+ return self._bm25.score(query, top_k)
131
+ counts: Counter[int] = Counter()
132
+ for ent in query_entities:
133
+ for doc_idx in self._entity_index.get(ent, ()):
134
+ counts[doc_idx] += 1
135
+ if not counts:
136
+ return self._bm25.score(query, top_k)
137
+ ranked = sorted(counts.items(), key=lambda x: x[1], reverse=True)[:top_k]
138
+ return [(idx, float(c)) for idx, c in ranked]
139
+
140
+ def _semantic_level(self, query: str, top_k: int) -> list[tuple[int, float]]:
141
+ """Low-level ranking: dense vector similarity (falls back to BM25)."""
142
+ if self._store is None or self._embed_fn is None:
143
+ return self._bm25.score(query, top_k)
144
+ embedding = self._embed_fn(query)
145
+ dense = self._store.search(embedding, top_k=top_k)
146
+ mapped: list[tuple[int, float]] = []
147
+ for r in dense:
148
+ doc_text = r.get("document", r.get("text", ""))
149
+ score = float(r.get("score", 0.0))
150
+ for i, doc in enumerate(self._docs):
151
+ if doc.get("text", "") == doc_text:
152
+ mapped.append((i, score))
153
+ break
154
+ return mapped
155
+
156
+ @property
157
+ def entities(self) -> dict[str, set[int]]:
158
+ """Expose the entity → doc-indices index (read-only view)."""
159
+ return self._entity_index
@@ -20,7 +20,13 @@ Requires the ``[api]`` extra::
20
20
  from __future__ import annotations
21
21
 
22
22
  try:
23
- from .auth import AuthMiddleware, AuthUser, create_token, decode_token
23
+ from .auth import (
24
+ AuthMiddleware,
25
+ AuthUser,
26
+ create_token,
27
+ decode_token,
28
+ decode_token_auto,
29
+ )
24
30
  from .errors import (
25
31
  APIHTTPException,
26
32
  BadRequestError,
@@ -30,6 +36,7 @@ try:
30
36
  from .health import ComponentHealth, HealthChecker, HealthStatus
31
37
  from .pagination import PaginatedResponse, PaginationMeta, paginate
32
38
  from .rate_limit import RateLimiter, RateLimitMiddleware
39
+ from .rbac import Role, has_role, require_role
33
40
  except ImportError as _exc:
34
41
  _MISSING_MSG = (
35
42
  "dataenginex.api requires the [api] extra. Install it with: pip install dataenginex[api]"
@@ -42,6 +49,7 @@ __all__ = [
42
49
  "AuthUser",
43
50
  "create_token",
44
51
  "decode_token",
52
+ "decode_token_auto",
45
53
  # Errors
46
54
  "APIHTTPException",
47
55
  "BadRequestError",
@@ -58,4 +66,8 @@ __all__ = [
58
66
  # Rate limiting
59
67
  "RateLimiter",
60
68
  "RateLimitMiddleware",
69
+ # RBAC
70
+ "Role",
71
+ "has_role",
72
+ "require_role",
61
73
  ]