dataenginex 0.10.0__tar.gz → 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/ci.yml +23 -1
  2. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/docs-notify.yml +1 -1
  3. {dataenginex-0.10.0 → dataenginex-1.0.0}/CHANGELOG.md +39 -0
  4. {dataenginex-0.10.0 → dataenginex-1.0.0}/CLAUDE.md +1 -1
  5. dataenginex-1.0.0/CODE_OF_CONDUCT.md +57 -0
  6. {dataenginex-0.10.0 → dataenginex-1.0.0}/PKG-INFO +1 -2
  7. {dataenginex-0.10.0 → dataenginex-1.0.0}/README.md +1 -1
  8. dataenginex-1.0.0/SECURITY.md +71 -0
  9. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/ci-cd.md +1 -1
  10. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/index.md +1 -1
  11. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/observability.md +1 -1
  12. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/sdlc.md +1 -1
  13. {dataenginex-0.10.0 → dataenginex-1.0.0}/pyproject.toml +2 -2
  14. dataenginex-1.0.0/src/dataenginex/ai/__init__.py +79 -0
  15. dataenginex-1.0.0/src/dataenginex/ai/memory/__init__.py +16 -0
  16. dataenginex-1.0.0/src/dataenginex/ai/memory/base.py +59 -0
  17. dataenginex-1.0.0/src/dataenginex/ai/memory/episodic.py +40 -0
  18. dataenginex-1.0.0/src/dataenginex/ai/memory/long_term.py +52 -0
  19. dataenginex-1.0.0/src/dataenginex/ai/observability/__init__.py +9 -0
  20. dataenginex-1.0.0/src/dataenginex/ai/observability/audit.py +34 -0
  21. dataenginex-1.0.0/src/dataenginex/ai/observability/cost.py +57 -0
  22. dataenginex-1.0.0/src/dataenginex/ai/observability/metrics.py +26 -0
  23. dataenginex-1.0.0/src/dataenginex/ai/routing/__init__.py +7 -0
  24. dataenginex-1.0.0/src/dataenginex/ai/routing/anthropic.py +58 -0
  25. dataenginex-1.0.0/src/dataenginex/ai/routing/huggingface.py +36 -0
  26. dataenginex-1.0.0/src/dataenginex/ai/routing/ollama.py +48 -0
  27. dataenginex-1.0.0/src/dataenginex/ai/routing/openai.py +58 -0
  28. dataenginex-1.0.0/src/dataenginex/ai/routing/router.py +56 -0
  29. dataenginex-1.0.0/src/dataenginex/ai/runtime/__init__.py +27 -0
  30. dataenginex-1.0.0/src/dataenginex/ai/runtime/checkpoint.py +31 -0
  31. dataenginex-1.0.0/src/dataenginex/ai/runtime/executor.py +173 -0
  32. dataenginex-1.0.0/src/dataenginex/ai/runtime/sandbox.py +220 -0
  33. dataenginex-1.0.0/src/dataenginex/ai/workflows/__init__.py +9 -0
  34. dataenginex-1.0.0/src/dataenginex/ai/workflows/conditions.py +48 -0
  35. dataenginex-1.0.0/src/dataenginex/ai/workflows/dag.py +124 -0
  36. dataenginex-1.0.0/src/dataenginex/ai/workflows/human_loop.py +47 -0
  37. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/pipeline/run_history.py +1 -0
  38. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/quality/gates.py +1 -2
  39. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/quality/spark.py +1 -2
  40. dataenginex-1.0.0/tests/unit/test_ai_modules.py +755 -0
  41. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_llm.py +24 -4
  42. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_quality_gates.py +4 -12
  43. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_run_history.py +1 -0
  44. {dataenginex-0.10.0 → dataenginex-1.0.0}/uv.lock +1 -12
  45. dataenginex-0.10.0/src/dataenginex/ai/__init__.py +0 -13
  46. {dataenginex-0.10.0 → dataenginex-1.0.0}/.claude/commands/new-feature.md +0 -0
  47. {dataenginex-0.10.0 → dataenginex-1.0.0}/.claude/commands/validate.md +0 -0
  48. {dataenginex-0.10.0 → dataenginex-1.0.0}/.claude/settings.json +0 -0
  49. {dataenginex-0.10.0 → dataenginex-1.0.0}/.env.template +0 -0
  50. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  51. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  52. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  53. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
  54. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/dependabot.yml +0 -0
  55. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/labels.yml +0 -0
  56. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/release-pr-template.md +0 -0
  57. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/auto-pr-dev-to-main.yml +0 -0
  58. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/auto-pr-to-dev.yml +0 -0
  59. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/claude.yml +0 -0
  60. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/docker-build-push.yml +0 -0
  61. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/enforce-dev-to-main.yml +0 -0
  62. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/label-sync.yml +0 -0
  63. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/pypi-publish.yml +0 -0
  64. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/release-dex.yml +0 -0
  65. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/release-please.yml +0 -0
  66. {dataenginex-0.10.0 → dataenginex-1.0.0}/.github/workflows/security.yml +0 -0
  67. {dataenginex-0.10.0 → dataenginex-1.0.0}/.gitignore +0 -0
  68. {dataenginex-0.10.0 → dataenginex-1.0.0}/.gitleaks.toml +0 -0
  69. {dataenginex-0.10.0 → dataenginex-1.0.0}/.pre-commit-config.yaml +0 -0
  70. {dataenginex-0.10.0 → dataenginex-1.0.0}/.python-version +0 -0
  71. {dataenginex-0.10.0 → dataenginex-1.0.0}/.release-please-manifest.json +0 -0
  72. {dataenginex-0.10.0 → dataenginex-1.0.0}/CODEOWNERS +0 -0
  73. {dataenginex-0.10.0 → dataenginex-1.0.0}/CONTRIBUTING.md +0 -0
  74. {dataenginex-0.10.0 → dataenginex-1.0.0}/Dockerfile +0 -0
  75. {dataenginex-0.10.0 → dataenginex-1.0.0}/LICENSE +0 -0
  76. {dataenginex-0.10.0 → dataenginex-1.0.0}/docker-compose.test.yml +0 -0
  77. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/adr/0000-template.md +0 -0
  78. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/adr/0001-medallion-architecture.md +0 -0
  79. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/api.md +0 -0
  80. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/core.md +0 -0
  81. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/dashboard.md +0 -0
  82. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/data.md +0 -0
  83. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/index.md +0 -0
  84. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/lakehouse.md +0 -0
  85. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/middleware.md +0 -0
  86. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/ml.md +0 -0
  87. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/plugins.md +0 -0
  88. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/api-reference/warehouse.md +0 -0
  89. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/architecture.md +0 -0
  90. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/contributing.md +0 -0
  91. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/development.md +0 -0
  92. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/quickstart.md +0 -0
  93. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/release-notes.md +0 -0
  94. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/roadmap/project-roadmap.csv +0 -0
  95. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/roadmap/project-roadmap.json +0 -0
  96. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/security-scanning.md +0 -0
  97. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-21-phase-0-foundation.md +0 -0
  98. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-22-dataenginex-1.0-master-plan.md +0 -0
  99. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-22-phase-1-data-layer.md +0 -0
  100. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-22-phase-6a-dex-engine-integration.md +0 -0
  101. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-22-phase-6b-dex-studio-redesign.md +0 -0
  102. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-23-careerdex-example.md +0 -0
  103. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-23-dex-studio-direct-import.md +0 -0
  104. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-24-dex-naming-architecture-dry.md +0 -0
  105. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/plans/2026-03-28-unified-docs-and-tooling.md +0 -0
  106. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/specs/2026-03-21-dataenginex-v2-system-redesign.md +0 -0
  107. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/specs/2026-03-22-phase-6-integration-design.md +0 -0
  108. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/specs/2026-03-23-dex-studio-direct-import-design.md +0 -0
  109. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/specs/2026-03-24-dex-naming-architecture-dry-design.md +0 -0
  110. {dataenginex-0.10.0 → dataenginex-1.0.0}/docs/superpowers/specs/2026-03-28-unified-docs-and-tooling-design.md +0 -0
  111. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/01_hello_pipeline.py +0 -0
  112. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/02_api_quickstart.py +0 -0
  113. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/03_quality_gate.py +0 -0
  114. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/04_ml_training.py +0 -0
  115. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/05_rag_demo.py +0 -0
  116. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/06_llm_quickstart.py +0 -0
  117. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/07_api_ingestion.py +0 -0
  118. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/08_spark_ml.py +0 -0
  119. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/09_feature_engineering.py +0 -0
  120. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/10_model_analysis.py +0 -0
  121. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/GUIDE.md +0 -0
  122. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/dashboard/dashboard_config.yaml +0 -0
  123. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/dashboard/run_dashboard.py +0 -0
  124. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/dex.yaml +0 -0
  125. {dataenginex-0.10.0 → dataenginex-1.0.0}/examples/movies.csv +0 -0
  126. {dataenginex-0.10.0 → dataenginex-1.0.0}/poe_tasks.toml +0 -0
  127. {dataenginex-0.10.0 → dataenginex-1.0.0}/release-please-config.json +0 -0
  128. {dataenginex-0.10.0 → dataenginex-1.0.0}/scripts/GUIDE.md +0 -0
  129. {dataenginex-0.10.0 → dataenginex-1.0.0}/scripts/localstack/create-buckets.sh +0 -0
  130. {dataenginex-0.10.0 → dataenginex-1.0.0}/scripts/promote.sh +0 -0
  131. {dataenginex-0.10.0 → dataenginex-1.0.0}/sonar-project.properties +0 -0
  132. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/README.md +0 -0
  133. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/RELEASE_NOTES.md +0 -0
  134. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/__init__.py +0 -0
  135. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/agents/__init__.py +0 -0
  136. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/agents/builtin.py +0 -0
  137. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/retrieval/__init__.py +0 -0
  138. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/retrieval/builtin.py +0 -0
  139. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/tools/__init__.py +0 -0
  140. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ai/tools/builtin.py +0 -0
  141. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/__init__.py +0 -0
  142. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/auth.py +0 -0
  143. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/errors.py +0 -0
  144. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/factory.py +0 -0
  145. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/health.py +0 -0
  146. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/pagination.py +0 -0
  147. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/rate_limit.py +0 -0
  148. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/__init__.py +0 -0
  149. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/ai.py +0 -0
  150. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/data.py +0 -0
  151. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/health.py +0 -0
  152. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/ml.py +0 -0
  153. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/pipelines.py +0 -0
  154. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/root.py +0 -0
  155. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/routers/system.py +0 -0
  156. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/api/schemas.py +0 -0
  157. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/cli/__init__.py +0 -0
  158. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/cli/main.py +0 -0
  159. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/cli/run.py +0 -0
  160. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/cli/serve.py +0 -0
  161. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/cli/train.py +0 -0
  162. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/config/__init__.py +0 -0
  163. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/config/defaults.py +0 -0
  164. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/config/loader.py +0 -0
  165. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/config/schema.py +0 -0
  166. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/__init__.py +0 -0
  167. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/exceptions.py +0 -0
  168. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/interfaces.py +0 -0
  169. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/medallion_architecture.py +0 -0
  170. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/quality.py +0 -0
  171. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/registry.py +0 -0
  172. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/schemas.py +0 -0
  173. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/core/validators.py +0 -0
  174. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/dashboard/__init__.py +0 -0
  175. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/dashboard/app.py +0 -0
  176. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/dashboard/panels.py +0 -0
  177. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/__init__.py +0 -0
  178. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/connectors/__init__.py +0 -0
  179. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/connectors/csv.py +0 -0
  180. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/connectors/duckdb.py +0 -0
  181. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/connectors/legacy.py +0 -0
  182. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/pipeline/__init__.py +0 -0
  183. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/pipeline/dag.py +0 -0
  184. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/pipeline/runner.py +0 -0
  185. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/profiler.py +0 -0
  186. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/quality/__init__.py +0 -0
  187. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/registry.py +0 -0
  188. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/transforms/__init__.py +0 -0
  189. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/data/transforms/sql.py +0 -0
  190. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/lakehouse/__init__.py +0 -0
  191. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/lakehouse/catalog.py +0 -0
  192. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/lakehouse/partitioning.py +0 -0
  193. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/lakehouse/storage.py +0 -0
  194. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/__init__.py +0 -0
  195. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/logging_config.py +0 -0
  196. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/metrics.py +0 -0
  197. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/metrics_middleware.py +0 -0
  198. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/request_logging.py +0 -0
  199. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/middleware/tracing.py +0 -0
  200. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/__init__.py +0 -0
  201. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/drift.py +0 -0
  202. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/features/__init__.py +0 -0
  203. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/features/builtin.py +0 -0
  204. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/llm.py +0 -0
  205. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/metrics.py +0 -0
  206. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/mlflow_registry.py +0 -0
  207. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/registry.py +0 -0
  208. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/scheduler.py +0 -0
  209. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/serving.py +0 -0
  210. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/serving_engine/__init__.py +0 -0
  211. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/serving_engine/builtin.py +0 -0
  212. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/tracking/__init__.py +0 -0
  213. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/tracking/builtin.py +0 -0
  214. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/training.py +0 -0
  215. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/ml/vectorstore.py +0 -0
  216. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/orchestration/__init__.py +0 -0
  217. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/orchestration/builtin.py +0 -0
  218. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/plugins/__init__.py +0 -0
  219. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/plugins/registry.py +0 -0
  220. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/py.typed +0 -0
  221. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/secops/__init__.py +0 -0
  222. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/secops/audit.py +0 -0
  223. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/secops/gate.py +0 -0
  224. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/secops/masking.py +0 -0
  225. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/secops/pii.py +0 -0
  226. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/warehouse/__init__.py +0 -0
  227. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/warehouse/lineage.py +0 -0
  228. {dataenginex-0.10.0 → dataenginex-1.0.0}/src/dataenginex/warehouse/transforms.py +0 -0
  229. {dataenginex-0.10.0 → dataenginex-1.0.0}/tasks/findings.md +0 -0
  230. {dataenginex-0.10.0 → dataenginex-1.0.0}/tasks/lessons.md +0 -0
  231. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/__init__.py +0 -0
  232. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conformance/__init__.py +0 -0
  233. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conformance/test_connector.py +0 -0
  234. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conformance/test_feature_store.py +0 -0
  235. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conformance/test_tracker.py +0 -0
  236. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conformance/test_transform.py +0 -0
  237. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/conftest.py +0 -0
  238. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/fixtures/__init__.py +0 -0
  239. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/fixtures/sample_data.py +0 -0
  240. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/fixtures/sample_jobs.csv +0 -0
  241. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/fixtures/sample_jobs.json +0 -0
  242. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/__init__.py +0 -0
  243. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/test_cli_run.py +0 -0
  244. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/test_config_cli.py +0 -0
  245. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/test_full_app.py +0 -0
  246. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/test_pipeline_e2e.py +0 -0
  247. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/integration/test_storage_real.py +0 -0
  248. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/load/__init__.py +0 -0
  249. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/__init__.py +0 -0
  250. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_agent_runtime.py +0 -0
  251. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_ai_router.py +0 -0
  252. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_auth.py +0 -0
  253. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_factory.py +0 -0
  254. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_pagination.py +0 -0
  255. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_rate_limit.py +0 -0
  256. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_schemas.py +0 -0
  257. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_api_validators.py +0 -0
  258. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_builtin_agent.py +0 -0
  259. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_builtin_feature_store.py +0 -0
  260. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_builtin_serving.py +0 -0
  261. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_builtin_tracker.py +0 -0
  262. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_cli_train.py +0 -0
  263. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_config_loader.py +0 -0
  264. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_config_schema.py +0 -0
  265. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_core_exceptions.py +0 -0
  266. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_core_interfaces.py +0 -0
  267. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_core_registry.py +0 -0
  268. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_csv_connector.py +0 -0
  269. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_data.py +0 -0
  270. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_data_router.py +0 -0
  271. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_drift_scheduler.py +0 -0
  272. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_duckdb_connector.py +0 -0
  273. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_errors.py +0 -0
  274. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_health.py +0 -0
  275. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_lakehouse.py +0 -0
  276. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_logging.py +0 -0
  277. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_medallion.py +0 -0
  278. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_metrics.py +0 -0
  279. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_middleware.py +0 -0
  280. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_ml.py +0 -0
  281. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_ml_router.py +0 -0
  282. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_pipeline_dag.py +0 -0
  283. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_pipeline_runner.py +0 -0
  284. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_plugins.py +0 -0
  285. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_quality_spark.py +0 -0
  286. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_retriever.py +0 -0
  287. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_scheduler.py +0 -0
  288. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_secops.py +0 -0
  289. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_spark_fixtures.py +0 -0
  290. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_sql_transforms.py +0 -0
  291. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_storage_abstraction.py +0 -0
  292. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_system_router.py +0 -0
  293. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_tracing.py +0 -0
  294. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_vectorstore.py +0 -0
  295. {dataenginex-0.10.0 → dataenginex-1.0.0}/tests/unit/test_warehouse.py +0 -0
@@ -7,6 +7,10 @@ on:
7
7
  branches: [main, dev]
8
8
  workflow_dispatch:
9
9
 
10
+ schedule:
11
+ # Weekly Python version compatibility check
12
+ - cron: '0 0 * * 0'
13
+
10
14
  permissions:
11
15
  contents: read
12
16
 
@@ -26,7 +30,7 @@ jobs:
26
30
  - run: uv run poe quality
27
31
 
28
32
  test:
29
- name: Tests
33
+ name: Tests (Python 3.13)
30
34
  runs-on: ubuntu-latest
31
35
  needs: quality
32
36
  steps:
@@ -45,3 +49,21 @@ jobs:
45
49
  fail_ci_if_error: false
46
50
  env:
47
51
  CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
52
+
53
+ test-compat:
54
+ name: Python Compatibility
55
+ runs-on: ubuntu-latest
56
+ if: github.event_name == 'schedule'
57
+ strategy:
58
+ matrix:
59
+ python-version: ["3.11", "3.12"]
60
+ steps:
61
+ - uses: actions/checkout@v6
62
+ - uses: astral-sh/setup-uv@v7
63
+ with:
64
+ version: "latest"
65
+ python-version: ${{ matrix.python-version }}
66
+ - run: uv sync --group ml
67
+ env:
68
+ UV_PROJECT_ENVIRONMENT: .venv
69
+ - run: uv run poe check-all
@@ -9,7 +9,7 @@ jobs:
9
9
  notify:
10
10
  runs-on: ubuntu-latest
11
11
  steps:
12
- - uses: peter-evans/repository-dispatch@v3
12
+ - uses: peter-evans/repository-dispatch@v4
13
13
  with:
14
14
  token: ${{ secrets.DOCS_DISPATCH_TOKEN }}
15
15
  repository: TheDataEngineX/docs
@@ -28,6 +28,45 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
28
28
 
29
29
  ## [Unreleased]
30
30
 
31
+ ## [1.0.0] - 2026-04-07
32
+
33
+ ### Highlights
34
+
35
+ - **Complete Data + ML + AI Framework**: All phases from the v1.0 master plan implemented — config-driven pipeline via `dex.yaml`, BackendRegistry pattern for swappable backends, unified CLI.
36
+ - **Data Layer**: DuckDB connector (default), CSV connector, PipelineRunner with DAG resolution, transforms (filter, derive, cast, deduplicate), quality gates (completeness, uniqueness), column-level lineage tracking, built-in cron scheduler.
37
+ - **ML Layer**: SQLite-backed experiment tracker, DuckDB feature store, sklearn/xgboost training integration, model registry with versioning (dev → staging → production), built-in model serving via FastAPI, PSI drift detection.
38
+ - **AI Layer**: Built-in ReAct agent runtime, Ollama LLM provider (default), tool registry (sql_query, predict, search), BM25 sparse retrieval (DuckDB FTS), dense vector retrieval (DuckDB VSS HNSW), hybrid retrieval with RRF fusion, agent memory (short-term + episodic).
39
+ - **CLI Commands**: `dex init`, `dex validate`, `dex version`, `dex serve`, `dex run`, `dex train`, `dex agent`, `dex query`.
40
+ - **API**: FastAPI app factory, JWT auth, rate limiting, health endpoints, project CRUD, pipeline run/status, data explorer, ML experiments/models, agent chat/manage, WebSocket for live logs and streaming.
41
+ - **Backend Registry Pattern**: Every subsystem follows ABC + BackendRegistry[T] pattern with built-in implementations and optional extras (Dagster, MLflow, Qdrant, LanceDB, sentence-transformers, PySpark).
42
+
43
+ ### Breaking Changes
44
+
45
+ - **FastAPI now optional**: Core install (`pip install dataenginex`) includes only lightweight deps. Install `[api]` extra for FastAPI/uvicorn: `pip install dataenginex[api]`
46
+ - **Cloud SDKs now optional**: Core install no longer requires boto3/google-cloud-storage/google-cloud-bigquery. Install `[cloud]` extra: `pip install dataenginex[cloud]`
47
+ - **Routers moved**: API routers moved to application packages. Use `from dataenginex.api import ...` directly (requires `[api]` extra)
48
+ - **Root `__init__.py` slimmed**: Re-exports removed. Import from submodules directly: `from dataenginex.api import HealthChecker` etc.
49
+
50
+ ### Added
51
+
52
+ - **Full project templates**: `dex init --template [minimal|data-pipeline|ml-project|ai-agent|full-stack|career-intelligence]`
53
+ - **Docker support**: Multi-stage Dockerfile (`ghcr.io/thedataenginex/dex`), docker-compose.yml for full stack
54
+ - **SecOps**: PII scanning in pipelines, masking, audit trail
55
+ - **Quality schema**: Spark audit integration for data quality validation
56
+ - **Examples**: 5 runnable examples in `examples/` directory
57
+
58
+ ### Verification checklist
59
+
60
+ 1. `uv run poe lint` — Ruff checks clean
61
+ 2. `uv run poe typecheck` — mypy strict (all modules)
62
+ 3. `uv run poe test` — 663 passed, 36 skipped
63
+ 4. `pip install dataenginex` — installs successfully
64
+ 5. `dex validate dex.yaml` — validates config
65
+ 6. `dex version` — shows version
66
+
67
+ [Unreleased]: https://github.com/TheDataEngineX/DEX/compare/v1.0.0...HEAD
68
+ [1.0.0]: https://github.com/TheDataEngineX/DEX/releases/tag/v1.0.0
69
+
31
70
  ## [0.7.1] - 2026-03-17
32
71
 
33
72
  ### Fixed
@@ -11,7 +11,7 @@ Goal is to save Claude code tokens for lower cost without loosing quality.
11
11
 
12
12
  | Package | Location | Purpose |
13
13
  |---------|----------|---------|
14
- | `dataenginex` | `src/dataenginex/` | Core framework — config system, backend registry, CLI, API, ML, AI agents |
14
+ | `dataenginex` | `src/dataenginex/` | Core framework — config, registry, CLI, API, ML, AI (routing, runtime, memory, observability, workflows) |
15
15
 
16
16
  **Stack:** Python 3.13+ · FastAPI · DuckDB · structlog · Pydantic · Click · Rich · uv · Ruff · mypy strict · pytest
17
17
 
@@ -0,0 +1,57 @@
1
+ # Contributor Covenant Code of Conduct
2
+
3
+ ## Our Pledge
4
+
5
+ We as members, contributors, and leaders pledge to make participation in our
6
+ community a harassment-free experience for everyone, regardless of age, body
7
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
8
+ identity and expression, level of experience, education, socio-economic status,
9
+ nationality, personal appearance, race, religion, or sexual identity and
10
+ orientation.
11
+
12
+ We pledge to act and interact in ways that contribute to an open, welcoming,
13
+ diverse, inclusive, and healthy community.
14
+
15
+ ## Our Standards
16
+
17
+ Examples of behavior that contributes to a positive environment:
18
+
19
+ - Using welcoming and inclusive language
20
+ - Being respectful of differing viewpoints and experiences
21
+ - Gracefully accepting constructive criticism
22
+ - Focusing on what is best for the community
23
+ - Showing empathy towards other community members
24
+
25
+ Examples of unacceptable behavior:
26
+
27
+ - The use of sexualized language or imagery, and sexual attention or advances
28
+ - Trolling, insulting or derogatory comments, and personal or political attacks
29
+ - Public or private harassment
30
+ - Publishing others' private information without explicit permission
31
+ - Other conduct which could reasonably be considered inappropriate
32
+
33
+ ## Enforcement Responsibilities
34
+
35
+ Community leaders are responsible for clarifying and enforcing our standards of
36
+ acceptable behavior and will take appropriate and fair corrective action in
37
+ response to any behavior that they deem inappropriate, threatening, offensive,
38
+ or harmful.
39
+
40
+ ## Scope
41
+
42
+ This Code of Conduct applies within all community spaces, and also applies when
43
+ an individual is officially representing the community in public spaces.
44
+
45
+ ## Enforcement
46
+
47
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
48
+ reported to the community leaders responsible for enforcement at
49
+ **conduct@thedataenginex.dev**.
50
+
51
+ All complaints will be reviewed and investigated promptly and fairly.
52
+
53
+ ## Attribution
54
+
55
+ This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org),
56
+ version 2.1, available at
57
+ <https://www.contributor-covenant.org/version/2/1/code_of_conduct.html>.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dataenginex
3
- Version: 0.10.0
3
+ Version: 1.0.0
4
4
  Summary: DataEngineX - Core framework for AI/ML/Data engineering projects
5
5
  Author-email: Jay <jayapal.myaka99@gmail.com>
6
6
  License: MIT License
@@ -40,7 +40,6 @@ Requires-Dist: prometheus-client>=0.24.1
40
40
  Requires-Dist: pyarrow>=23.0.1
41
41
  Requires-Dist: pydantic>=2.10.0
42
42
  Requires-Dist: python-dotenv>=1.2.1
43
- Requires-Dist: python-json-logger>=4.0.0
44
43
  Requires-Dist: pyyaml>=6.0.3
45
44
  Requires-Dist: rich>=14.3.3
46
45
  Requires-Dist: structlog>=25.5.0
@@ -112,7 +112,7 @@ ______________________________________________________________________
112
112
 
113
113
  ## Development
114
114
 
115
- See [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md) for full setup.
115
+ See [docs/development.md](docs/development.md) for full setup.
116
116
 
117
117
  ```bash
118
118
  uv run poe check-all # lint + typecheck + tests
@@ -0,0 +1,71 @@
1
+ # Security Policy
2
+
3
+ ## Supported Versions
4
+
5
+ | Version | Supported |
6
+ |---------|-----------|
7
+ | Latest minor release (1.0.x) | ✅ |
8
+ | Previous minor release | ✅ (security fixes only) |
9
+ | Older versions | ❌ |
10
+
11
+ ## Reporting a Vulnerability
12
+
13
+ **Do NOT open a public issue for security vulnerabilities.**
14
+
15
+ Instead, please report them via one of these channels:
16
+
17
+ 1. **Email**: security@thedataenginex.dev
18
+ 2. **GitHub Security Advisories**: Use the "Report a vulnerability" button on the Security tab
19
+
20
+ ### What to Include
21
+
22
+ - Description of the vulnerability
23
+ - Steps to reproduce
24
+ - Potential impact assessment
25
+ - Suggested fix (if any)
26
+
27
+ ### Response Timeline
28
+
29
+ | Stage | Timeline |
30
+ |-------|----------|
31
+ | Acknowledgment | Within 48 hours |
32
+ | Initial assessment | Within 5 business days |
33
+ | Fix development | Within 30 days (critical), 90 days (non-critical) |
34
+ | Public disclosure | After fix is released |
35
+
36
+ ## Disclosure Policy
37
+
38
+ We follow [coordinated disclosure](https://en.wikipedia.org/wiki/Coordinated_vulnerability_disclosure).
39
+ We will credit reporters in the security advisory unless they prefer to remain anonymous.
40
+
41
+ ## Security Practices
42
+
43
+ DataEngineX follows these security practices:
44
+
45
+ - **No hardcoded secrets** — all credentials via environment variables
46
+ - **Parameterized queries** — never SQL concatenation
47
+ - **Input validation** — Pydantic models at API boundaries
48
+ - **Dependency auditing** — automated via `uv run poe security`
49
+ - **Pickle safety** — SafeUnpickler with HMAC verification for model loading
50
+ - **Container security** — non-root users, minimal base images
51
+ - **HTTPS only** — all production traffic encrypted
52
+ - **Least privilege** — minimal permissions for service accounts
53
+
54
+ ## Security-Related Dependencies
55
+
56
+ | Dependency | Purpose | Security Note |
57
+ |------------|---------|---------------|
58
+ | pydantic | Config validation | Validates all inputs |
59
+ | python-dotenv | Env var loading | Never commit .env files |
60
+ | httpx | HTTP client | Timeout configured |
61
+ | structlog | Logging | No PII in logs by default |
62
+
63
+ ## Auditing
64
+
65
+ Run security audits locally:
66
+
67
+ ```bash
68
+ uv run poe security # pip-audit for vulnerabilities
69
+ ```
70
+
71
+ CI runs `pip-audit` and dependency scanning on every PR.
@@ -407,4 +407,4 @@ gh workflow run pypi-publish.yml -f tag=v<version>
407
407
 
408
408
  ______________________________________________________________________
409
409
 
410
- **[← Back to Documentation Hub](docs-hub.md)**
410
+ **[← Back to Documentation](index.md)**
@@ -4,7 +4,7 @@ Welcome to the DEX docs site.
4
4
 
5
5
  Start here:
6
6
 
7
- - [Documentation Hub](docs-hub.md)
8
7
  - [Development Setup](development.md)
9
8
  - [Contributing](contributing.md)
10
9
  - [Architecture](architecture.md)
10
+ - [Quickstart](quickstart.md)
@@ -795,4 +795,4 @@ ______________________________________________________________________
795
795
 
796
796
  ______________________________________________________________________
797
797
 
798
- **[← Back to Documentation Hub](docs-hub.md)**
798
+ **[← Back to Documentation](index.md)**
@@ -203,4 +203,4 @@ ______________________________________________________________________
203
203
 
204
204
  ______________________________________________________________________
205
205
 
206
- **[← Back to Documentation Hub](docs-hub.md)**
206
+ **[← Back to Documentation](index.md)**
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "dataenginex"
3
- version = "0.10.0"
3
+ version = "1.0.0"
4
4
  description = "DataEngineX - Core framework for AI/ML/Data engineering projects"
5
5
  authors = [
6
6
  {name = "Jay", email = "jayapal.myaka99@gmail.com"}
@@ -22,7 +22,7 @@ dependencies = [
22
22
  "fastapi>=0.135.1",
23
23
  "uvicorn>=0.42.0",
24
24
  "structlog>=25.5.0",
25
- "python-json-logger>=4.0.0",
25
+ # OpenTelemetry
26
26
  "opentelemetry-api>=1.40.0",
27
27
  "opentelemetry-sdk>=1.40.0",
28
28
  "opentelemetry-instrumentation-fastapi>=0.61b0",
@@ -0,0 +1,79 @@
1
+ """AI layer — agents, retrieval, tools, routing, runtime, memory, observability, workflows.
2
+
3
+ Public API::
4
+
5
+ from dataenginex.ai import (
6
+ retriever_registry, agent_registry, tool_registry,
7
+ BuiltinRetriever, BuiltinAgentRuntime,
8
+ ModelRouter, BaseProvider,
9
+ Sandbox, SandboxConfig,
10
+ AuditLog, CostTracker,
11
+ AgentDAG, Condition,
12
+ ShortTermMemory, LongTermMemory, EpisodicMemory,
13
+ CheckpointManager, AgentExecutor, AgentConfig,
14
+ AgentMetrics,
15
+ )
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from dataenginex.ai.agents import agent_registry
21
+ from dataenginex.ai.agents.builtin import BuiltinAgentRuntime
22
+ from dataenginex.ai.memory.base import BaseMemory, MemoryEntry, ShortTermMemory
23
+ from dataenginex.ai.memory.episodic import Episode, EpisodicMemory
24
+ from dataenginex.ai.memory.long_term import LongTermMemory
25
+ from dataenginex.ai.observability.audit import AuditEntry, AuditLog
26
+ from dataenginex.ai.observability.cost import CostTracker, TokenUsage
27
+ from dataenginex.ai.observability.metrics import AgentMetrics
28
+ from dataenginex.ai.retrieval import retriever_registry
29
+ from dataenginex.ai.retrieval.builtin import BuiltinRetriever
30
+ from dataenginex.ai.routing.router import BaseProvider, ModelRouter
31
+ from dataenginex.ai.runtime.checkpoint import Checkpoint, CheckpointManager
32
+ from dataenginex.ai.runtime.executor import AgentConfig, AgentExecutor, AgentResponse
33
+ from dataenginex.ai.runtime.sandbox import Sandbox, SandboxConfig, SandboxResult
34
+ from dataenginex.ai.tools import ToolRegistry, ToolSpec, tool_registry
35
+ from dataenginex.ai.workflows.conditions import Condition
36
+ from dataenginex.ai.workflows.dag import AgentDAG
37
+ from dataenginex.ai.workflows.human_loop import ApprovalGate
38
+
39
+ __all__ = [
40
+ # Registries
41
+ "agent_registry",
42
+ "retriever_registry",
43
+ "tool_registry",
44
+ # Agents
45
+ "BuiltinAgentRuntime",
46
+ "BuiltinRetriever",
47
+ # Tools
48
+ "ToolRegistry",
49
+ "ToolSpec",
50
+ # Memory
51
+ "BaseMemory",
52
+ "MemoryEntry",
53
+ "ShortTermMemory",
54
+ "LongTermMemory",
55
+ "EpisodicMemory",
56
+ "Episode",
57
+ # Observability
58
+ "AuditEntry",
59
+ "AuditLog",
60
+ "CostTracker",
61
+ "TokenUsage",
62
+ "AgentMetrics",
63
+ # Routing
64
+ "BaseProvider",
65
+ "ModelRouter",
66
+ # Runtime
67
+ "AgentConfig",
68
+ "AgentExecutor",
69
+ "AgentResponse",
70
+ "Checkpoint",
71
+ "CheckpointManager",
72
+ "Sandbox",
73
+ "SandboxConfig",
74
+ "SandboxResult",
75
+ # Workflows
76
+ "AgentDAG",
77
+ "ApprovalGate",
78
+ "Condition",
79
+ ]
@@ -0,0 +1,16 @@
1
+ """Agent memory — short-term, long-term, and episodic memory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataenginex.ai.memory.base import BaseMemory, MemoryEntry, ShortTermMemory
6
+ from dataenginex.ai.memory.episodic import Episode, EpisodicMemory
7
+ from dataenginex.ai.memory.long_term import LongTermMemory
8
+
9
+ __all__ = [
10
+ "BaseMemory",
11
+ "Episode",
12
+ "EpisodicMemory",
13
+ "LongTermMemory",
14
+ "MemoryEntry",
15
+ "ShortTermMemory",
16
+ ]
@@ -0,0 +1,59 @@
1
+ """Agent memory system — short-term, long-term, and episodic memory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+
10
+ @dataclass
11
+ class MemoryEntry:
12
+ """A single memory entry."""
13
+
14
+ content: str
15
+ role: str # "user", "assistant", "system", "tool"
16
+ metadata: dict[str, Any] = field(default_factory=dict)
17
+ timestamp: float = 0.0
18
+
19
+
20
+ class BaseMemory(ABC):
21
+ """Abstract base class for agent memory."""
22
+
23
+ @abstractmethod
24
+ def add(self, entry: MemoryEntry) -> None:
25
+ """Add a memory entry."""
26
+
27
+ @abstractmethod
28
+ def search(self, query: str, top_k: int = 5) -> list[MemoryEntry]:
29
+ """Search memory by semantic similarity."""
30
+
31
+ @abstractmethod
32
+ def recent(self, n: int = 10) -> list[MemoryEntry]:
33
+ """Get the most recent entries."""
34
+
35
+ @abstractmethod
36
+ def clear(self) -> None:
37
+ """Clear all memory."""
38
+
39
+
40
+ class ShortTermMemory(BaseMemory):
41
+ """Session/conversation memory — lives in-process, lost on restart."""
42
+
43
+ def __init__(self, max_entries: int = 100) -> None:
44
+ self._entries: list[MemoryEntry] = []
45
+ self._max = max_entries
46
+
47
+ def add(self, entry: MemoryEntry) -> None:
48
+ self._entries.append(entry)
49
+ if len(self._entries) > self._max:
50
+ self._entries.pop(0)
51
+
52
+ def search(self, query: str, top_k: int = 5) -> list[MemoryEntry]:
53
+ return [e for e in self._entries if query.lower() in e.content.lower()][:top_k]
54
+
55
+ def recent(self, n: int = 10) -> list[MemoryEntry]:
56
+ return self._entries[-n:]
57
+
58
+ def clear(self) -> None:
59
+ self._entries.clear()
@@ -0,0 +1,40 @@
1
+ """Episodic memory — experience replay for task-based learning."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from pydantic import BaseModel
8
+
9
+
10
+ class Episode(BaseModel):
11
+ """A recorded agent episode — a full task execution with outcome."""
12
+
13
+ task: str
14
+ steps: list[dict[str, Any]]
15
+ outcome: str
16
+ reward: float
17
+ timestamp: float
18
+
19
+
20
+ class EpisodicMemory:
21
+ """Experience replay memory — stores and retrieves past episodes."""
22
+
23
+ def __init__(self) -> None:
24
+ self._episodes: list[Episode] = []
25
+
26
+ def add_episode(self, episode: Episode) -> None:
27
+ self._episodes.append(episode)
28
+
29
+ def recall_similar(self, task: str, top_k: int = 5) -> list[Episode]:
30
+ task_lower = task.lower()
31
+ scored: list[tuple[int, Episode]] = []
32
+ for ep in self._episodes:
33
+ score = sum(1 for word in task_lower.split() if word in ep.task.lower())
34
+ if score > 0:
35
+ scored.append((score, ep))
36
+ scored.sort(key=lambda x: x[0], reverse=True)
37
+ return [ep for _, ep in scored[:top_k]]
38
+
39
+ def best_episodes(self, top_k: int = 5) -> list[Episode]:
40
+ return sorted(self._episodes, key=lambda e: e.reward, reverse=True)[:top_k]
@@ -0,0 +1,52 @@
1
+ """Long-term memory — keyword-searchable persistent memory store."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import time
7
+ from dataclasses import asdict
8
+ from pathlib import Path
9
+
10
+ from dataenginex.ai.memory.base import BaseMemory, MemoryEntry
11
+
12
+
13
+ class LongTermMemory(BaseMemory):
14
+ """Persistent memory with keyword search — no external vector DB required.
15
+
16
+ Data is stored as a flat list and scored by keyword overlap.
17
+ Call :meth:`persist` to write to disk and :meth:`load_from_file` to restore.
18
+ """
19
+
20
+ def __init__(self) -> None:
21
+ self._entries: list[MemoryEntry] = []
22
+
23
+ def add(self, entry: MemoryEntry) -> None:
24
+ if not entry.timestamp:
25
+ entry.timestamp = time.time()
26
+ self._entries.append(entry)
27
+
28
+ def search(self, query: str, top_k: int = 5) -> list[MemoryEntry]:
29
+ query_lower = query.lower()
30
+ scored: list[tuple[int, MemoryEntry]] = []
31
+ for entry in self._entries:
32
+ score = sum(1 for word in query_lower.split() if word in entry.content.lower())
33
+ if score > 0:
34
+ scored.append((score, entry))
35
+ scored.sort(key=lambda x: x[0], reverse=True)
36
+ return [e for _, e in scored[:top_k]]
37
+
38
+ def recent(self, n: int = 10) -> list[MemoryEntry]:
39
+ return self._entries[-n:]
40
+
41
+ def clear(self) -> None:
42
+ self._entries.clear()
43
+
44
+ def persist(self, path: str) -> None:
45
+ """Persist all memory entries to a JSON file at *path*."""
46
+ data = [asdict(e) for e in self._entries]
47
+ Path(path).write_text(json.dumps(data, indent=2), encoding="utf-8")
48
+
49
+ def load_from_file(self, path: str) -> None:
50
+ """Replace in-memory entries with those from a JSON file at *path*."""
51
+ raw: list[dict[str, object]] = json.loads(Path(path).read_text(encoding="utf-8"))
52
+ self._entries = [MemoryEntry(**item) for item in raw] # type: ignore[arg-type]
@@ -0,0 +1,9 @@
1
+ """Observability — audit logging, cost tracking, and metrics."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataenginex.ai.observability.audit import AuditEntry, AuditLog
6
+ from dataenginex.ai.observability.cost import CostTracker, TokenUsage
7
+ from dataenginex.ai.observability.metrics import AgentMetrics
8
+
9
+ __all__ = ["AgentMetrics", "AuditEntry", "AuditLog", "CostTracker", "TokenUsage"]
@@ -0,0 +1,34 @@
1
+ """Audit logging — track every agent action for compliance and debugging."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pydantic import BaseModel
6
+
7
+
8
+ class AuditEntry(BaseModel):
9
+ """A single audit log entry."""
10
+
11
+ agent_name: str
12
+ action: str
13
+ input: str
14
+ output: str
15
+ timestamp: float
16
+ reasoning: str = ""
17
+
18
+
19
+ class AuditLog:
20
+ """In-memory audit log for agent actions."""
21
+
22
+ def __init__(self) -> None:
23
+ self._entries: list[AuditEntry] = []
24
+
25
+ def log(self, entry: AuditEntry) -> None:
26
+ """Record an audit entry."""
27
+ self._entries.append(entry)
28
+
29
+ def get_entries(self, agent_name: str | None = None, limit: int = 100) -> list[AuditEntry]:
30
+ """Get audit entries, optionally filtered by agent name."""
31
+ if agent_name is None:
32
+ return self._entries[-limit:]
33
+ filtered = [e for e in self._entries if e.agent_name == agent_name]
34
+ return filtered[-limit:]
@@ -0,0 +1,57 @@
1
+ """Token usage and cost tracking for LLM calls."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from pydantic import BaseModel
8
+
9
+
10
+ class TokenUsage(BaseModel):
11
+ """Token usage for a single LLM call."""
12
+
13
+ model: str
14
+ tokens_in: int
15
+ tokens_out: int
16
+ cost_usd: float
17
+ agent_name: str = ""
18
+
19
+
20
+ class CostTracker:
21
+ """Tracks cumulative token usage and costs across agents."""
22
+
23
+ def __init__(self) -> None:
24
+ self._records: list[TokenUsage] = []
25
+
26
+ def record(self, usage: TokenUsage) -> None:
27
+ """Record a token usage entry."""
28
+ self._records.append(usage)
29
+
30
+ def total_cost(self, agent_name: str | None = None) -> float:
31
+ """Get total cost in USD, optionally filtered by agent."""
32
+ if agent_name is None:
33
+ return sum(r.cost_usd for r in self._records)
34
+ return sum(r.cost_usd for r in self._records if r.agent_name == agent_name)
35
+
36
+ def summary(self) -> dict[str, Any]:
37
+ """Get a summary of all token usage and costs."""
38
+ total_in = sum(r.tokens_in for r in self._records)
39
+ total_out = sum(r.tokens_out for r in self._records)
40
+ return {
41
+ "total_records": len(self._records),
42
+ "total_tokens_in": total_in,
43
+ "total_tokens_out": total_out,
44
+ "total_cost_usd": self.total_cost(),
45
+ "by_model": self._by_model(),
46
+ }
47
+
48
+ def _by_model(self) -> dict[str, dict[str, Any]]:
49
+ """Break down costs by model."""
50
+ result: dict[str, dict[str, Any]] = {}
51
+ for r in self._records:
52
+ if r.model not in result:
53
+ result[r.model] = {"tokens_in": 0, "tokens_out": 0, "cost_usd": 0.0}
54
+ result[r.model]["tokens_in"] += r.tokens_in
55
+ result[r.model]["tokens_out"] += r.tokens_out
56
+ result[r.model]["cost_usd"] += r.cost_usd
57
+ return result