dao-ai 0.1.18__tar.gz → 0.1.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (319) hide show
  1. {dao_ai-0.1.18 → dao_ai-0.1.19}/PKG-INFO +3 -2
  2. {dao_ai-0.1.18 → dao_ai-0.1.19}/README.md +2 -1
  3. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/04_genie/README.md +75 -15
  4. dao_ai-0.1.19/config/examples/04_genie/genie_in_memory_semantic_cache.yaml +148 -0
  5. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/README.md +2 -1
  6. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/examples.md +3 -2
  7. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/key-capabilities.md +69 -5
  8. {dao_ai-0.1.18 → dao_ai-0.1.19}/pyproject.toml +1 -1
  9. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/config.py +99 -0
  10. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/cache/__init__.py +2 -0
  11. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/cache/core.py +1 -1
  12. dao_ai-0.1.19/src/dao_ai/genie/cache/in_memory_semantic.py +871 -0
  13. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/cache/lru.py +15 -11
  14. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/cache/semantic.py +52 -18
  15. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/genie.py +28 -3
  16. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_genie.py +8 -9
  17. dao_ai-0.1.19/tests/dao_ai/test_in_memory_semantic_cache.py +1144 -0
  18. {dao_ai-0.1.18 → dao_ai-0.1.19}/.gitignore +0 -0
  19. {dao_ai-0.1.18 → dao_ai-0.1.19}/.python-version +0 -0
  20. {dao_ai-0.1.18 → dao_ai-0.1.19}/CHANGELOG.md +0 -0
  21. {dao_ai-0.1.18 → dao_ai-0.1.19}/CONTRIBUTING.md +0 -0
  22. {dao_ai-0.1.18 → dao_ai-0.1.19}/CONTRIBUTORS.md +0 -0
  23. {dao_ai-0.1.18 → dao_ai-0.1.19}/LICENSE +0 -0
  24. {dao_ai-0.1.18 → dao_ai-0.1.19}/Makefile +0 -0
  25. {dao_ai-0.1.18 → dao_ai-0.1.19}/app.yaml +0 -0
  26. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/01_getting_started/README.md +0 -0
  27. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/01_getting_started/minimal.yaml +0 -0
  28. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/README.md +0 -0
  29. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/custom_mcp.yaml +0 -0
  30. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/external_mcp.yaml +0 -0
  31. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/filtered_mcp.yaml +0 -0
  32. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/managed_mcp.yaml +0 -0
  33. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/slack_integration.yaml +0 -0
  34. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/03_reranking/README.md +0 -0
  35. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/03_reranking/instruction_aware_reranking.yaml +0 -0
  36. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/03_reranking/vector_search_with_reranking.yaml +0 -0
  37. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/04_genie/genie_basic.yaml +0 -0
  38. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/04_genie/genie_lru_cache.yaml +0 -0
  39. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/04_genie/genie_semantic_cache.yaml +0 -0
  40. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/04_genie/genie_with_conversation_id.yaml +0 -0
  41. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/05_memory/README.md +0 -0
  42. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/05_memory/conversation_summarization.yaml +0 -0
  43. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/05_memory/in_memory_basic.yaml +0 -0
  44. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/05_memory/lakebase_persistence.yaml +0 -0
  45. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/05_memory/postgres_persistence.yaml +0 -0
  46. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/06_on_behalf_of_user/README.md +0 -0
  47. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/06_on_behalf_of_user/obo_basic.yaml +0 -0
  48. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/07_human_in_the_loop/README.md +0 -0
  49. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/07_human_in_the_loop/human_in_the_loop.yaml +0 -0
  50. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/08_guardrails/README.md +0 -0
  51. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/08_guardrails/guardrails_basic.yaml +0 -0
  52. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/09_structured_output/README.md +0 -0
  53. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/09_structured_output/structured_output.yaml +0 -0
  54. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/10_agent_integrations/README.md +0 -0
  55. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/10_agent_integrations/agent_bricks.yaml +0 -0
  56. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/10_agent_integrations/kasal.yaml +0 -0
  57. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/11_prompt_engineering/README.md +0 -0
  58. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/11_prompt_engineering/prompt_optimization.yaml +0 -0
  59. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/11_prompt_engineering/prompt_registry.yaml +0 -0
  60. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/README.md +0 -0
  61. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/combined_middleware.yaml +0 -0
  62. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/context_management.yaml +0 -0
  63. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/custom_field_validation.yaml +0 -0
  64. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/limit_middleware.yaml +0 -0
  65. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/logging_middleware.yaml +0 -0
  66. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/pii_middleware.yaml +0 -0
  67. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/retry_middleware.yaml +0 -0
  68. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/tool_selector_middleware.yaml +0 -0
  69. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/13_orchestration/README.md +0 -0
  70. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/13_orchestration/supervisor_pattern.yaml +0 -0
  71. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/13_orchestration/swarm_pattern.yaml +0 -0
  72. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/14_basic_tools/README.md +0 -0
  73. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/14_basic_tools/sql_tool_example.yaml +0 -0
  74. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/README.md +0 -0
  75. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/brick_store.yaml +0 -0
  76. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/deep_research.yaml +0 -0
  77. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/executive_assistant.yaml +0 -0
  78. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/genie_and_genie_mcp.yaml +0 -0
  79. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/genie_vector_search_hybrid.yaml +0 -0
  80. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/hardware_store.yaml +0 -0
  81. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/hardware_store_instructed.yaml +0 -0
  82. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/hardware_store_lakebase.yaml +0 -0
  83. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/hardware_store_swarm.yaml +0 -0
  84. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/quick_serve_restaurant.yaml +0 -0
  85. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/reservations_system.yaml +0 -0
  86. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/16_instructed_retriever/README.md +0 -0
  87. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/16_instructed_retriever/full_pipeline.yaml +0 -0
  88. {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/16_instructed_retriever/instructed_retriever.yaml +0 -0
  89. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/appointments.sql +0 -0
  90. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/appointments_data.sql +0 -0
  91. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/brand_rep_demo_data.sql +0 -0
  92. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/brand_rep_demo_queries.sql +0 -0
  93. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/brand_rep_demo_tables.sql +0 -0
  94. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/brand_rep_demo_validation.sql +0 -0
  95. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/customers.sql +0 -0
  96. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/customers_data.sql +0 -0
  97. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/dim_stores.sql +0 -0
  98. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/dim_stores_data.sql +0 -0
  99. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/employee_performance.sql +0 -0
  100. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/employee_performance_data.sql +0 -0
  101. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/employee_tasks.sql +0 -0
  102. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/employee_tasks_data.sql +0 -0
  103. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/inventory.sql +0 -0
  104. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/inventory_data.sql +0 -0
  105. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/managers.sql +0 -0
  106. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/managers_data.sql +0 -0
  107. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/product_data.sql +0 -0
  108. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/products.sql +0 -0
  109. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/task_assignments.sql +0 -0
  110. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/hardware_store/inventory.snappy.parquet +0 -0
  111. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/hardware_store/inventory.sql +0 -0
  112. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/hardware_store/products.snappy.parquet +0 -0
  113. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/hardware_store/products.sql +0 -0
  114. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/.gitkeep +0 -0
  115. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/fulfil_item_orders.sql +0 -0
  116. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/items_description.csv +0 -0
  117. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/items_description.sql +0 -0
  118. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/items_raw.csv +0 -0
  119. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/items_raw.sql +0 -0
  120. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/orders_raw.csv +0 -0
  121. {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/orders_raw.sql +0 -0
  122. {dao_ai-0.1.18 → dao_ai-0.1.19}/databricks.yaml.template +0 -0
  123. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/architecture.md +0 -0
  124. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/cli-reference.md +0 -0
  125. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/configuration-reference.md +0 -0
  126. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/contributing.md +0 -0
  127. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/faq.md +0 -0
  128. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/hardware_store/README.md +0 -0
  129. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/hardware_store/retail_supervisor.png +0 -0
  130. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/hardware_store/retail_swarm.png +0 -0
  131. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/images/genie.png +0 -0
  132. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/python-api.md +0 -0
  133. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/quick_serve_restaurant/.gitkeep +0 -0
  134. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/quick_serve_restaurant/quick-serve-restaurant.png +0 -0
  135. {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/why-dao.md +0 -0
  136. {dao_ai-0.1.18 → dao_ai-0.1.19}/environment.yaml +0 -0
  137. {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/dais2025/examples.yaml +0 -0
  138. {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/deep_research/examples.yaml +0 -0
  139. {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/executive_assistant/examples.yaml +0 -0
  140. {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/hardware_store/examples.yaml +0 -0
  141. {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/quick_serve_restaurant/.gitkeep +0 -0
  142. {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/quick_serve_restaurant/examples.yaml +0 -0
  143. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/extract_store_numbers.sql +0 -0
  144. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_inventory_by_sku.sql +0 -0
  145. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_inventory_by_upc.sql +0 -0
  146. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_product_by_sku.sql +0 -0
  147. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_product_by_upc.sql +0 -0
  148. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_store_by_number.sql +0 -0
  149. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_store_inventory_by_sku.sql +0 -0
  150. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_store_inventory_by_upc.sql +0 -0
  151. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_inventory_by_sku.sql +0 -0
  152. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_inventory_by_upc.sql +0 -0
  153. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_product_by_sku.sql +0 -0
  154. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_product_by_upc.sql +0 -0
  155. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_store_inventory_by_sku.sql +0 -0
  156. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_store_inventory_by_upc.sql +0 -0
  157. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/quick_serve_restaurant/.gitkeep +0 -0
  158. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/quick_serve_restaurant/insert_coffee_order.sql +0 -0
  159. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/quick_serve_restaurant/lookup_items_by_descriptions.sql +0 -0
  160. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/quick_serve_restaurant/match_historical_item_order_by_date.sql +0 -0
  161. {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/quick_serve_restaurant/match_item_by_description_and_price.sql +0 -0
  162. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/01_ingest_and_transform.py +0 -0
  163. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/02_provision_vector_search.py +0 -0
  164. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/03_provision_lakebase.py +0 -0
  165. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/04_unity_catalog_tools.py +0 -0
  166. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/05_deploy_agent.py +0 -0
  167. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/06_generate_evaluation_data.py +0 -0
  168. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/07_run_evaluation.py +0 -0
  169. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/08_run_examples.py +0 -0
  170. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/09_evaluate_inferences.py +0 -0
  171. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/10_optimize_prompts.py +0 -0
  172. {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/99_scratchpad.py +0 -0
  173. {dao_ai-0.1.18 → dao_ai-0.1.19}/requirements.txt +0 -0
  174. {dao_ai-0.1.18 → dao_ai-0.1.19}/schemas/bundle_config_schema.json +0 -0
  175. {dao_ai-0.1.18 → dao_ai-0.1.19}/schemas/model_config_schema.json +0 -0
  176. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/__init__.py +0 -0
  177. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/apps/__init__.py +0 -0
  178. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/apps/handlers.py +0 -0
  179. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/apps/model_serving.py +0 -0
  180. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/apps/resources.py +0 -0
  181. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/apps/server.py +0 -0
  182. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/catalog.py +0 -0
  183. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/cli.py +0 -0
  184. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/evaluation.py +0 -0
  185. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/__init__.py +0 -0
  186. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/cache/base.py +0 -0
  187. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/core.py +0 -0
  188. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/graph.py +0 -0
  189. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/hooks/__init__.py +0 -0
  190. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/hooks/core.py +0 -0
  191. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/logging.py +0 -0
  192. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/memory/__init__.py +0 -0
  193. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/memory/base.py +0 -0
  194. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/memory/core.py +0 -0
  195. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/memory/databricks.py +0 -0
  196. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/memory/postgres.py +0 -0
  197. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/messages.py +0 -0
  198. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/__init__.py +0 -0
  199. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/assertions.py +0 -0
  200. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/base.py +0 -0
  201. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/context_editing.py +0 -0
  202. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/core.py +0 -0
  203. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/guardrails.py +0 -0
  204. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/human_in_the_loop.py +0 -0
  205. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/message_validation.py +0 -0
  206. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/model_call_limit.py +0 -0
  207. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/model_retry.py +0 -0
  208. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/pii.py +0 -0
  209. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/summarization.py +0 -0
  210. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/tool_call_limit.py +0 -0
  211. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/tool_retry.py +0 -0
  212. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/tool_selector.py +0 -0
  213. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/models.py +0 -0
  214. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/nodes.py +0 -0
  215. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/optimization.py +0 -0
  216. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/orchestration/__init__.py +0 -0
  217. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/orchestration/core.py +0 -0
  218. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/orchestration/supervisor.py +0 -0
  219. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/orchestration/swarm.py +0 -0
  220. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/prompts/__init__.py +0 -0
  221. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/prompts/instructed_retriever_decomposition.yaml +0 -0
  222. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/prompts/instruction_reranker.yaml +0 -0
  223. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/prompts/router.yaml +0 -0
  224. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/prompts/verifier.yaml +0 -0
  225. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/providers/__init__.py +0 -0
  226. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/providers/base.py +0 -0
  227. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/providers/databricks.py +0 -0
  228. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/state.py +0 -0
  229. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/__init__.py +0 -0
  230. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/agent.py +0 -0
  231. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/core.py +0 -0
  232. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/email.py +0 -0
  233. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/instructed_retriever.py +0 -0
  234. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/instruction_reranker.py +0 -0
  235. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/mcp.py +0 -0
  236. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/memory.py +0 -0
  237. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/python.py +0 -0
  238. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/router.py +0 -0
  239. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/search.py +0 -0
  240. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/slack.py +0 -0
  241. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/sql.py +0 -0
  242. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/time.py +0 -0
  243. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/unity_catalog.py +0 -0
  244. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/vector_search.py +0 -0
  245. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/verifier.py +0 -0
  246. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/types.py +0 -0
  247. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/utils.py +0 -0
  248. {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/vector_search.py +0 -0
  249. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/config/test_model_config.yaml +0 -0
  250. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/conftest.py +0 -0
  251. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_context_editing.py +0 -0
  252. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_model_call_limit.py +0 -0
  253. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_model_retry.py +0 -0
  254. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_pii.py +0 -0
  255. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_tool_call_limit.py +0 -0
  256. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_tool_retry.py +0 -0
  257. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_tool_selector.py +0 -0
  258. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_agent_response_format.py +0 -0
  259. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_assertions_middleware.py +0 -0
  260. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_catalog.py +0 -0
  261. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_chat_history.py +0 -0
  262. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_config.py +0 -0
  263. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_databricks.py +0 -0
  264. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_evaluation.py +0 -0
  265. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_function_parsing.py +0 -0
  266. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_genie_conversation_ids_in_outputs.py +0 -0
  267. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_genie_databricks_integration.py +0 -0
  268. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_genie_room_model.py +0 -0
  269. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_guardrail_retry.py +0 -0
  270. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_hitl_config_model.py +0 -0
  271. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_hitl_responses_agent.py +0 -0
  272. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_hooks.py +0 -0
  273. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_human_in_the_loop.py +0 -0
  274. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_inference.py +0 -0
  275. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_inference_integration.py +0 -0
  276. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_input_output_structure.py +0 -0
  277. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_instructed_retriever.py +0 -0
  278. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_instruction_reranker.py +0 -0
  279. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_instruction_reranker_integration.py +0 -0
  280. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_interrupt_type.py +0 -0
  281. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_llm_interrupt_handling.py +0 -0
  282. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_mcp.py +0 -0
  283. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_mcp_filtering.py +0 -0
  284. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_mcp_filtering_integration.py +0 -0
  285. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_mcp_function_model.py +0 -0
  286. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_message_validation_middleware.py +0 -0
  287. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_messages.py +0 -0
  288. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_models.py +0 -0
  289. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_optimization.py +0 -0
  290. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_postgres_integration.py +0 -0
  291. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_prompt_optimizations.py +0 -0
  292. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_prompts.py +0 -0
  293. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_reranking.py +0 -0
  294. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_reranking_integration.py +0 -0
  295. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_resources_model_genie_integration.py +0 -0
  296. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_response_format.py +0 -0
  297. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_responses_agent_structured_output_unit.py +0 -0
  298. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_router.py +0 -0
  299. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_semantic_cache_context.py +0 -0
  300. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_sql_tool.py +0 -0
  301. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_sql_tool_integration.py +0 -0
  302. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_state.py +0 -0
  303. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_summarization_inference.py +0 -0
  304. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_swarm_middleware.py +0 -0
  305. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_tools.py +0 -0
  306. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_types.py +0 -0
  307. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_unity_catalog.py +0 -0
  308. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_utils.py +0 -0
  309. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_utils_type_from_fqn.py +0 -0
  310. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_vector_search.py +0 -0
  311. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_verifier.py +0 -0
  312. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_warehouse_model.py +0 -0
  313. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/weather_server_mcp.py +0 -0
  314. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/hardware_store/.gitkeep +0 -0
  315. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/hardware_store/test_graph.py +0 -0
  316. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/images/doritos_upc.png +0 -0
  317. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/images/lays_upc.png +0 -0
  318. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/quick_serve_restaurant/.gitkeep +0 -0
  319. {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/test_mcp_app_auth.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dao-ai
3
- Version: 0.1.18
3
+ Version: 0.1.19
4
4
  Summary: DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML.
5
5
  Project-URL: Homepage, https://github.com/natefleming/dao-ai
6
6
  Project-URL: Documentation, https://natefleming.github.io/dao-ai
@@ -409,7 +409,8 @@ The `config/examples/` directory contains ready-to-use configurations organized
409
409
 
410
410
  - `01_getting_started/minimal.yaml` - Simplest possible agent
411
411
  - `02_tools/vector_search_with_reranking.yaml` - RAG with improved accuracy
412
- - `04_genie/genie_semantic_cache.yaml` - NL-to-SQL with two-tier caching
412
+ - `04_genie/genie_semantic_cache.yaml` - NL-to-SQL with PostgreSQL semantic caching
413
+ - `04_genie/genie_in_memory_semantic_cache.yaml` - NL-to-SQL with in-memory semantic caching (no database)
413
414
  - `05_memory/conversation_summarization.yaml` - Long conversation handling
414
415
  - `06_on_behalf_of_user/obo_basic.yaml` - User-level access control
415
416
  - `07_human_in_the_loop/human_in_the_loop.yaml` - Approval workflows
@@ -330,7 +330,8 @@ The `config/examples/` directory contains ready-to-use configurations organized
330
330
 
331
331
  - `01_getting_started/minimal.yaml` - Simplest possible agent
332
332
  - `02_tools/vector_search_with_reranking.yaml` - RAG with improved accuracy
333
- - `04_genie/genie_semantic_cache.yaml` - NL-to-SQL with two-tier caching
333
+ - `04_genie/genie_semantic_cache.yaml` - NL-to-SQL with PostgreSQL semantic caching
334
+ - `04_genie/genie_in_memory_semantic_cache.yaml` - NL-to-SQL with in-memory semantic caching (no database)
334
335
  - `05_memory/conversation_summarization.yaml` - Long conversation handling
335
336
  - `06_on_behalf_of_user/obo_basic.yaml` - User-level access control
336
337
  - `07_human_in_the_loop/human_in_the_loop.yaml` - Approval workflows
@@ -52,10 +52,20 @@ flowchart TB
52
52
 
53
53
  | File | Description |
54
54
  |------|-------------|
55
- | [`genie_cached.yaml`](./genie_cached.yaml) | Two-tier caching with LRU and semantic cache |
55
+ | [`genie_cached.yaml`](./genie_cached.yaml) | Two-tier caching with LRU and PostgreSQL semantic cache |
56
+ | [`genie_in_memory_semantic_cache.yaml`](./genie_in_memory_semantic_cache.yaml) | In-memory semantic cache (no database required) |
56
57
 
57
58
  ## Cache Tiers
58
59
 
60
+ DAO provides two L2 semantic cache implementations:
61
+
62
+ | Implementation | Best For | Database Required |
63
+ |----------------|----------|-------------------|
64
+ | **PostgreSQL Semantic Cache** | Production multi-instance deployments, large cache sizes (thousands+), cross-instance sharing | Yes (PostgreSQL with pg_vector) |
65
+ | **In-Memory Semantic Cache** | Single-instance deployments, dev/test, no database access, moderate cache sizes (hundreds to low thousands) | No (in-memory only) |
66
+
67
+ Both use the same L2 distance algorithm and support conversation context awareness for consistent behavior.
68
+
59
69
  ```mermaid
60
70
  %%{init: {'theme': 'base'}}%%
61
71
  graph TB
@@ -70,8 +80,9 @@ graph TB
70
80
  subgraph L2["🧠 L2: Semantic Cache"]
71
81
  SEM1["<b>Type:</b> Similarity match"]
72
82
  SEM2["<b>Speed:</b> ~50ms"]
73
- SEM3["<b>Threshold:</b> 0.95"]
74
- SEM4["<b>TTL:</b> ttl: 3600 (1 hour)"]
83
+ SEM3["<b>Options:</b> PostgreSQL or In-Memory"]
84
+ SEM4["<b>Threshold:</b> 0.85-0.95"]
85
+ SEM5["<b>TTL:</b> ttl: 3600 (1 hour)"]
75
86
  end
76
87
  end
77
88
 
@@ -81,21 +92,56 @@ graph TB
81
92
 
82
93
  ## Configuration
83
94
 
95
+ ### PostgreSQL Semantic Cache (Multi-Instance)
96
+
84
97
  ```yaml
85
- resources:
86
- genie_rooms:
87
- retail_genie_room: &retail_genie_room
88
- space_id: "01efabcd1234567890abcdef12345678"
98
+ genie_tool:
99
+ function:
100
+ type: factory
101
+ name: dao_ai.tools.create_genie_tool
102
+ args:
103
+ genie_room: *retail_genie_room
89
104
 
90
105
  # ⚡ L1: LRU Cache - Exact match
91
- lru_cache:
92
- maxsize: 100 # Max cached queries
106
+ lru_cache_parameters:
107
+ warehouse: *warehouse
108
+ capacity: 100
109
+ time_to_live_seconds: 3600
110
+
111
+ # 🧠 L2: PostgreSQL Semantic Cache - Similar queries
112
+ semantic_cache_parameters:
113
+ database: *postgres_db
114
+ warehouse: *warehouse
115
+ embedding_model: *embedding_model
116
+ similarity_threshold: 0.85
117
+ time_to_live_seconds: 3600
118
+ context_window_size: 3
119
+ ```
120
+
121
+ ### In-Memory Semantic Cache (Single-Instance)
122
+
123
+ ```yaml
124
+ genie_tool:
125
+ function:
126
+ type: factory
127
+ name: dao_ai.tools.create_genie_tool
128
+ args:
129
+ genie_room: *retail_genie_room
130
+
131
+ # Optional L1: LRU Cache - Exact match
132
+ # lru_cache_parameters:
133
+ # warehouse: *warehouse
134
+ # capacity: 100
135
+ # time_to_live_seconds: 3600
93
136
 
94
- # 🧠 L2: Semantic Cache - Similar queries
95
- semantic_cache:
96
- similarity_threshold: 0.95 # How similar (0.0-1.0)
97
- ttl: 3600 # Time-to-live in seconds
98
- max_results: 1000 # Max cached embeddings
137
+ # 🧠 In-Memory Semantic Cache - No database required
138
+ in_memory_semantic_cache_parameters:
139
+ warehouse: *warehouse
140
+ embedding_model: *embedding_model
141
+ similarity_threshold: 0.85
142
+ time_to_live_seconds: 604800 # 1 week
143
+ capacity: 1000 # LRU eviction when full
144
+ context_window_size: 3
99
145
  ```
100
146
 
101
147
  ## Cache Flow
@@ -210,8 +256,10 @@ agents:
210
256
 
211
257
  ## Quick Start
212
258
 
259
+ ### PostgreSQL Semantic Cache
260
+
213
261
  ```bash
214
- # Run with caching enabled
262
+ # Run with PostgreSQL semantic cache
215
263
  dao-ai chat -c config/examples/04_genie/genie_cached.yaml
216
264
 
217
265
  # Test caching behavior
@@ -220,6 +268,18 @@ dao-ai chat -c config/examples/04_genie/genie_cached.yaml
220
268
  > Show me Q4 revenue # Semantic cache hit (~50ms)
221
269
  ```
222
270
 
271
+ ### In-Memory Semantic Cache
272
+
273
+ ```bash
274
+ # Run with in-memory semantic cache (no database required)
275
+ dao-ai chat -c config/examples/04_genie/genie_in_memory_semantic_cache.yaml
276
+
277
+ # Test caching behavior
278
+ > What are the total sales for Q4? # First query - Genie hit
279
+ > What are the total sales for Q4? # Semantic cache hit (~50ms)
280
+ > Show me Q4 revenue # Semantic cache hit (~50ms)
281
+ ```
282
+
223
283
  ## Cache Monitoring
224
284
 
225
285
  ```bash
@@ -0,0 +1,148 @@
1
+ # yaml-language-server: $schema=../../../schemas/model_config_schema.json
2
+ #
3
+ # Example configuration for Genie with in-memory semantic caching:
4
+ # - In-Memory Semantic Cache: Similarity search without external database
5
+ # - Optional LRU Cache (L1): Fast O(1) exact match lookup
6
+ #
7
+ # This configuration is ideal for:
8
+ # - Environments without access to PostgreSQL or Databricks Lakebase
9
+ # - Single-instance deployments (cache not shared across instances)
10
+ # - Moderate cache sizes (hundreds to low thousands of entries)
11
+ # - Cases where cache persistence across restarts is not required
12
+ #
13
+ # Cache flow: Question → LRU (exact match) → In-Memory Semantic (similarity) → Genie API
14
+ # On cache hit, the cached SQL is re-executed against the warehouse for fresh data.
15
+
16
+
17
+ schemas:
18
+
19
+ quick_serve_restaurant_schema: &quick_serve_restaurant_schema
20
+ catalog_name: retail_consumer_goods # Unity Catalog name
21
+ schema_name: quick_serve_restaurant # Schema within the catalog
22
+
23
+ resources:
24
+ llms:
25
+ # Primary LLM for general tasks
26
+ default_llm: &default_llm
27
+ name: databricks-claude-sonnet-4
28
+ temperature: 0.1 # Low temperature for consistent responses
29
+ max_tokens: 8192 # Maximum tokens per response
30
+ on_behalf_of_user: False
31
+
32
+ # Embedding model for semantic similarity search
33
+ embedding_model: &embedding_model
34
+ name: databricks-gte-large-en # Text embedding model
35
+ on_behalf_of_user: False
36
+
37
+ warehouses:
38
+ # Warehouse for executing SQL queries (used by semantic cache)
39
+ shared_endpoint_warehouse: &shared_endpoint_warehouse
40
+ name: "Shared Endpoint Warehouse" # Human-readable name
41
+ description: "A warehouse for shared endpoints" # Description
42
+ warehouse_id: 148ccb90800933a1 # Databricks warehouse ID
43
+ on_behalf_of_user: False
44
+
45
+ genie_rooms:
46
+ # Genie space for retail data queries
47
+ retail_genie_room: &retail_genie_room
48
+ name: "Retail AI Genie Room" # Human-readable name
49
+ description: "A room for Genie agents to interact" # Description
50
+ space_id:
51
+ env: RETAIL_AI_GENIE_SPACE_ID
52
+ default_value: 01f01c91f1f414d59daaefd2b7ec82ea
53
+
54
+
55
+ # =============================================================================
56
+ # MEMORY CONFIGURATION
57
+ # =============================================================================
58
+ # Configure in-memory storage for agent conversations and state persistence
59
+
60
+ memory: &memory
61
+ # Conversation checkpointing for state persistence
62
+ checkpointer:
63
+ name: default_checkpointer # Checkpointer identifier (type inferred as memory - no database)
64
+
65
+
66
+ tools:
67
+ genie_tool: &genie_tool
68
+ name: genie
69
+ function:
70
+ type: factory # Tool type: factory function
71
+ name: dao_ai.tools.create_genie_tool # Factory function path
72
+ args: # Arguments passed to factory
73
+ name: my_genie_tool
74
+ description: Answers questions about retail products and inventory
75
+ genie_room: *retail_genie_room # Reference to Genie room config
76
+
77
+ # Optional L1 Cache: LRU (Least Recently Used) - Fast exact match
78
+ # Uncomment to enable LRU cache in front of semantic cache
79
+ # lru_cache_parameters:
80
+ # warehouse: *shared_endpoint_warehouse # Warehouse to re-execute cached SQL
81
+ # capacity: 100 # Maximum number of cached entries
82
+ # time_to_live_seconds: 3600 # Cache entries expire after 1 hour
83
+
84
+ # In-Memory Semantic Cache: Similarity-based lookup with LRU eviction (NO database required)
85
+ # Default settings optimized for ~30 users on 8GB machine:
86
+ # - Capacity: 10,000 entries (~200MB, ~330 queries/user)
87
+ # - Eviction: LRU (Least Recently Used) keeps hot queries cached
88
+ # - TTL: 1 week (accommodates weekly work patterns)
89
+ # - Memory: ~4-5% of 8GB system
90
+ in_memory_semantic_cache_parameters:
91
+ warehouse: *shared_endpoint_warehouse # Warehouse used to re-execute cached SQL
92
+ embedding_model: *embedding_model # Reference to embedding model
93
+ # embedding_dims: 1024 # Auto-detected if omitted (recommended)
94
+ similarity_threshold: 0.85 # Minimum similarity for question matching (L2 distance to 0-1)
95
+ context_similarity_threshold: 0.80 # Minimum similarity for context matching
96
+ # time_to_live_seconds: 604800 # Cache entries expire after 1 week (default)
97
+ # capacity: 10000 # Max cache entries, LRU eviction when full (default: 10000, ~200MB)
98
+ # # Adjust for different scenarios:
99
+ # # - Small (5-10 users): capacity: 1000 (~20MB)
100
+ # # - Medium (30 users): capacity: 10000 (~200MB, default)
101
+ # # - Large (100 users): capacity: 30000 (~600MB)
102
+ # # - Unlimited: capacity: null (not recommended - unbounded memory)
103
+ context_window_size: 3 # Number of previous conversation turns to include
104
+ # max_context_tokens: 2000 # Maximum context length (default: 2000)
105
+ # question_weight: 0.6 # Weight for question similarity (default: 0.6)
106
+ # context_weight: 0.4 # Weight for context similarity (default: 0.4)
107
+ # Note: question_weight + context_weight must equal 1.0
108
+
109
+ persist_conversation: true
110
+
111
+
112
+ agents:
113
+ genie: &genie
114
+ name: genie # Agent identifier
115
+ description: "Genie Agent with In-Memory Semantic Cache"
116
+ model: *default_llm # Reference to LLM configuration
117
+ tools: # Tools available to this agent
118
+ - *genie_tool
119
+ prompt: | # System prompt defining agent behavior
120
+ Answers questions about retail products and inventory using natural language.
121
+ You have access to a semantic cache that remembers similar questions to provide faster responses.
122
+
123
+
124
+ app:
125
+ name: genie_in_memory_semantic_cache_dao # Application name
126
+ description: "Multi-agent system that talks to genie with in-memory semantic caching (no database required)"
127
+ log_level: DEBUG # Logging level for the application
128
+ environment_vars: # Secrets to inject at runtime
129
+ RETAIL_AI_DATABRICKS_CLIENT_ID: "{{secrets/retail_consumer_goods/RETAIL_AI_DATABRICKS_CLIENT_ID}}"
130
+ RETAIL_AI_DATABRICKS_CLIENT_SECRET: "{{secrets/retail_consumer_goods/RETAIL_AI_DATABRICKS_CLIENT_SECRET}}"
131
+ RETAIL_AI_DATABRICKS_HOST: "{{secrets/retail_consumer_goods/RETAIL_AI_DATABRICKS_HOST}}"
132
+ registered_model: # MLflow registered model configuration
133
+ schema: *quick_serve_restaurant_schema # Schema where model will be registered
134
+ name: dao_genie_in_memory_semantic_cache # Model name in MLflow registry
135
+ endpoint_name: dao_genie_in_memory_semantic_cache # Model serving endpoint name
136
+ tags: # Tags for resource organization
137
+ business: rcg # Business unit identifier
138
+ streaming: true # Indicates streaming capabilities
139
+ permissions: # Model serving permissions
140
+ - principals: [users] # Grant access to all users
141
+ entitlements:
142
+ - CAN_QUERY # Query permissions
143
+ agents: # List of agents included in the system
144
+ - *genie # Genie agent with in-memory cache
145
+ orchestration: # Agent orchestration configuration
146
+ memory: *memory # In-memory conversation persistence
147
+ swarm: # Swarm orchestration pattern
148
+ default_agent: *genie # Default agent for routing
@@ -52,7 +52,8 @@ Or jump directly to the category that matches your current need.
52
52
  **Natural language to SQL**
53
53
  - Basic Genie integration
54
54
  - LRU caching for performance
55
- - Semantic caching with embeddings
55
+ - PostgreSQL semantic caching with embeddings
56
+ - In-memory semantic caching (no database required)
56
57
 
57
58
  👉 Query data with natural language, optimized with caching
58
59
 
@@ -120,9 +120,10 @@ Improve performance and reduce costs through intelligent caching.
120
120
  | Example | Description |
121
121
  |---------|-------------|
122
122
  | `genie_lru_cache.yaml` | LRU (Least Recently Used) caching for Genie |
123
- | `genie_semantic_cache.yaml` | Two-tier semantic caching with embeddings |
123
+ | `genie_semantic_cache.yaml` | Two-tier semantic caching with PostgreSQL embeddings |
124
+ | `genie_in_memory_semantic_cache.yaml` | In-memory semantic caching (no database required) |
124
125
 
125
- **Prerequisites:** PostgreSQL or Lakebase for semantic cache
126
+ **Prerequisites:** PostgreSQL or Lakebase required for `genie_semantic_cache.yaml` only
126
127
  **Next:** Add persistence in `05_memory/`
127
128
 
128
129
  ---
@@ -202,7 +202,7 @@ graph TB
202
202
  l1_cache["L1: LRU Cache (In-Memory)<br/>• Capacity: 1000 entries<br/>• Hash-based lookup<br/>• O(1) exact string match"]
203
203
  l1_hit{Hit?}
204
204
 
205
- l2_cache["L2: Semantic Cache (PostgreSQL)<br/>• pg_vector embeddings<br/>• Conversation context aware<br/>• L2 distance similarity<br/>• Partitioned by Genie space ID"]
205
+ l2_cache["L2: Semantic Cache<br/>• PostgreSQL (pg_vector) OR In-Memory<br/>• Dual embeddings (question + context)<br/>• L2 distance similarity<br/>• Conversation context aware<br/>• Partitioned by Genie space ID"]
206
206
  l2_hit{Hit?}
207
207
 
208
208
  genie["Genie API<br/>(Expensive call)<br/>Natural language to SQL"]
@@ -247,7 +247,11 @@ The **LRU (Least Recently Used) Cache** provides instant lookups for exact quest
247
247
 
248
248
  ### Semantic Cache (L2)
249
249
 
250
- The **Semantic Cache** uses PostgreSQL with pg_vector to find similar questions even when worded differently. It includes **conversation context awareness** to improve matching in multi-turn conversations:
250
+ The **Semantic Cache** finds similar questions even when worded differently using vector embeddings and similarity search. It includes **conversation context awareness** to improve matching in multi-turn conversations. DAO provides two implementations:
251
+
252
+ #### PostgreSQL-Based Semantic Cache
253
+
254
+ Uses PostgreSQL with pg_vector for persistent, multi-instance shared caching:
251
255
 
252
256
  | Parameter | Default | Description |
253
257
  |-----------|---------|-------------|
@@ -259,6 +263,62 @@ The **Semantic Cache** uses PostgreSQL with pg_vector to find similar questions
259
263
  | `table_name` | `genie_semantic_cache` | Table name for cache storage |
260
264
  | `context_window_size` | 3 | Number of previous conversation turns to include |
261
265
  | `context_similarity_threshold` | 0.80 | Minimum similarity for conversation context |
266
+ | `question_weight` | 0.6 | Weight for question similarity in combined score (0.0-1.0) |
267
+ | `context_weight` | 0.4 | Weight for context similarity (computed as 1 - question_weight if not set) |
268
+ | `embedding_dims` | Auto-detected | Embedding vector dimensions (auto-detected from model if not specified) |
269
+ | `max_context_tokens` | 2000 | Maximum token length for conversation context embeddings |
270
+
271
+ **Best for:** Production deployments with multiple instances, large cache sizes (thousands+), and cross-instance cache sharing
272
+
273
+ #### In-Memory Semantic Cache
274
+
275
+ Uses in-memory storage without external database dependencies:
276
+
277
+ ```yaml
278
+ genie_tool:
279
+ function:
280
+ type: factory
281
+ name: dao_ai.tools.create_genie_tool
282
+ args:
283
+ genie_room: *retail_genie_room
284
+
285
+ # In-memory semantic cache (no database required)
286
+ in_memory_semantic_cache_parameters:
287
+ warehouse: *warehouse
288
+ embedding_model: *embedding_model # Default: databricks-gte-large-en
289
+ similarity_threshold: 0.85 # 0.0-1.0 (default: 0.85)
290
+ time_to_live_seconds: 86400 # 1 day (default), use -1 or None for never expire
291
+ capacity: 1000 # Max cache entries (LRU eviction when full)
292
+ context_window_size: 3 # Number of previous conversation turns
293
+ context_similarity_threshold: 0.80 # Minimum context similarity
294
+ question_weight: 0.6 # Weight for question similarity
295
+ context_weight: 0.4 # Weight for context similarity
296
+ embedding_dims: null # Auto-detected from model
297
+ max_context_tokens: 2000 # Max context token length
298
+ ```
299
+
300
+ | Parameter | Default | Description |
301
+ |-----------|---------|-------------|
302
+ | `similarity_threshold` | 0.85 | Minimum similarity for cache hit (0.0-1.0) |
303
+ | `time_to_live_seconds` | 86400 | Cache entry lifetime (-1 = never expire) |
304
+ | `embedding_model` | `databricks-gte-large-en` | Model for generating question embeddings |
305
+ | `warehouse` | Required | Databricks warehouse for SQL execution |
306
+ | `capacity` | 1000 | Maximum cache entries (LRU eviction when full) |
307
+ | `context_window_size` | 3 | Number of previous conversation turns to include |
308
+ | `context_similarity_threshold` | 0.80 | Minimum similarity for conversation context |
309
+ | `question_weight` | 0.6 | Weight for question similarity in combined score (0.0-1.0) |
310
+ | `context_weight` | 0.4 | Weight for context similarity (computed as 1 - question_weight if not set) |
311
+ | `embedding_dims` | Auto-detected | Embedding vector dimensions (auto-detected from model if not specified) |
312
+ | `max_context_tokens` | 2000 | Maximum token length for conversation context embeddings |
313
+
314
+ **Best for:** Single-instance deployments, development/testing, scenarios without database access, moderate cache sizes (hundreds to low thousands)
315
+
316
+ **Key Differences:**
317
+ - ✅ **No external database required** - Simpler setup and deployment
318
+ - ✅ **Same L2 distance algorithm** - Consistent behavior with PostgreSQL version
319
+ - ⚠️ **Per-instance cache** - Each replica has its own cache (not shared)
320
+ - ⚠️ **No persistence** - Cache is lost on restart
321
+ - ⚠️ **Memory-bound** - Limited by available RAM; use capacity limits
262
322
 
263
323
  **Best for:** Catching rephrased questions like:
264
324
  - "What's our inventory status?" ≈ "Show me stock levels"
@@ -271,6 +331,12 @@ The semantic cache tracks conversation history to resolve ambiguous references:
271
331
 
272
332
  This works by embedding both the current question *and* recent conversation turns, then computing a weighted similarity score. This dramatically improves cache hits in multi-turn conversations where users naturally use pronouns and references.
273
333
 
334
+ **Weight Configuration:**
335
+ The `question_weight` and `context_weight` parameters control how question vs conversation context similarity are combined into the final score:
336
+ - Both weights must sum to 1.0 (if only one is provided, the other is computed automatically)
337
+ - Higher `question_weight` prioritizes matching the exact question wording
338
+ - Higher `context_weight` prioritizes matching the conversation context, useful for multi-turn conversations with pronouns and references
339
+
274
340
  ### Cache Behavior
275
341
 
276
342
  1. **SQL Caching, Not Results**: The cache stores the *generated SQL query*, not the query results. On a cache hit, the SQL is re-executed against your warehouse, ensuring **data freshness**.
@@ -283,12 +349,10 @@ This works by embedding both the current question *and* recent conversation turn
283
349
  - Genie generates fresh SQL
284
350
  - The new SQL is cached
285
351
 
286
- 4. **Multi-Instance Aware**: Each LRU cache is per-instance (in Model Serving, each replica has its own). The semantic cache is shared across all instances via PostgreSQL.
352
+ 4. **Multi-Instance Aware**: Each LRU cache is per-instance (in Model Serving, each replica has its own). The PostgreSQL semantic cache is shared across all instances. The in-memory semantic cache is per-instance (not shared).
287
353
 
288
354
  5. **Space ID Partitioning**: Cache entries are isolated per Genie space, preventing cross-space cache pollution.
289
355
 
290
- For more details on semantic cache configuration, see [docs/semantic_cache_weight_configuration.md](semantic_cache_weight_configuration.md).
291
-
292
356
  ## 5. Vector Search Reranking
293
357
 
294
358
  **The problem:** Vector search (semantic similarity) is fast but sometimes returns loosely related results. It's like a librarian who quickly grabs 50 books that *might* be relevant.
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "dao-ai"
7
- version = "0.1.18"
7
+ version = "0.1.19"
8
8
  description = "DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -1773,6 +1773,105 @@ class GenieSemanticCacheParametersModel(BaseModel):
1773
1773
  return self
1774
1774
 
1775
1775
 
1776
+ # Memory estimation for capacity planning:
1777
+ # - Each entry: ~20KB (8KB question embedding + 8KB context embedding + 4KB strings/overhead)
1778
+ # - 1,000 entries: ~20MB (0.4% of 8GB)
1779
+ # - 5,000 entries: ~100MB (2% of 8GB)
1780
+ # - 10,000 entries: ~200MB (4-5% of 8GB) - default for ~30 users
1781
+ # - 20,000 entries: ~400MB (8-10% of 8GB)
1782
+ # Default 10,000 entries provides ~330 queries per user for 30 users.
1783
+ class GenieInMemorySemanticCacheParametersModel(BaseModel):
1784
+ """
1785
+ Configuration for in-memory semantic cache (no database required).
1786
+
1787
+ This cache stores embeddings and cache entries entirely in memory, providing
1788
+ semantic similarity matching without requiring external database dependencies
1789
+ like PostgreSQL or Databricks Lakebase.
1790
+
1791
+ Default settings are tuned for ~30 users on an 8GB machine:
1792
+ - Capacity: 10,000 entries (~200MB memory, ~330 queries per user)
1793
+ - Eviction: LRU (Least Recently Used) - keeps frequently accessed queries
1794
+ - TTL: 1 week (accommodates weekly work patterns and batch jobs)
1795
+ - Memory overhead: ~4-5% of 8GB system
1796
+
1797
+ The LRU eviction strategy ensures hot queries stay cached while cold queries
1798
+ are evicted, providing better hit rates than FIFO eviction.
1799
+
1800
+ For larger deployments or memory-constrained environments, adjust capacity and TTL accordingly.
1801
+
1802
+ Use this when:
1803
+ - No external database access is available
1804
+ - Single-instance deployments (cache not shared across instances)
1805
+ - Cache persistence across restarts is not required
1806
+ - Cache sizes are moderate (hundreds to low thousands of entries)
1807
+
1808
+ For multi-instance deployments or large cache sizes, use GenieSemanticCacheParametersModel
1809
+ with PostgreSQL backend instead.
1810
+ """
1811
+
1812
+ model_config = ConfigDict(use_enum_values=True, extra="forbid")
1813
+ time_to_live_seconds: int | None = (
1814
+ 60 * 60 * 24 * 7
1815
+ ) # 1 week default (604800 seconds), None or negative = never expires
1816
+ similarity_threshold: float = 0.85 # Minimum similarity for question matching (L2 distance converted to 0-1 scale)
1817
+ context_similarity_threshold: float = 0.80 # Minimum similarity for context matching (L2 distance converted to 0-1 scale)
1818
+ question_weight: Optional[float] = (
1819
+ 0.6 # Weight for question similarity in combined score (0-1). If not provided, computed as 1 - context_weight
1820
+ )
1821
+ context_weight: Optional[float] = (
1822
+ None # Weight for context similarity in combined score (0-1). If not provided, computed as 1 - question_weight
1823
+ )
1824
+ embedding_model: str | LLMModel = "databricks-gte-large-en"
1825
+ embedding_dims: int | None = None # Auto-detected if None
1826
+ warehouse: WarehouseModel
1827
+ capacity: int | None = (
1828
+ 10000 # Maximum cache entries. ~200MB for 10000 entries (1024-dim embeddings). LRU eviction when full. None = unlimited (not recommended for production).
1829
+ )
1830
+ context_window_size: int = 3 # Number of previous turns to include for context
1831
+ max_context_tokens: int = (
1832
+ 2000 # Maximum context length to prevent extremely long embeddings
1833
+ )
1834
+
1835
+ @model_validator(mode="after")
1836
+ def compute_and_validate_weights(self) -> Self:
1837
+ """
1838
+ Compute missing weight and validate that question_weight + context_weight = 1.0.
1839
+
1840
+ Either question_weight or context_weight (or both) can be provided.
1841
+ The missing one will be computed as 1.0 - provided_weight.
1842
+ If both are provided, they must sum to 1.0.
1843
+ """
1844
+ if self.question_weight is None and self.context_weight is None:
1845
+ # Both missing - use defaults
1846
+ self.question_weight = 0.6
1847
+ self.context_weight = 0.4
1848
+ elif self.question_weight is None:
1849
+ # Compute question_weight from context_weight
1850
+ if not (0.0 <= self.context_weight <= 1.0):
1851
+ raise ValueError(
1852
+ f"context_weight must be between 0.0 and 1.0, got {self.context_weight}"
1853
+ )
1854
+ self.question_weight = 1.0 - self.context_weight
1855
+ elif self.context_weight is None:
1856
+ # Compute context_weight from question_weight
1857
+ if not (0.0 <= self.question_weight <= 1.0):
1858
+ raise ValueError(
1859
+ f"question_weight must be between 0.0 and 1.0, got {self.question_weight}"
1860
+ )
1861
+ self.context_weight = 1.0 - self.question_weight
1862
+ else:
1863
+ # Both provided - validate they sum to 1.0
1864
+ total_weight = self.question_weight + self.context_weight
1865
+ if not abs(total_weight - 1.0) < 0.0001: # Allow small floating point error
1866
+ raise ValueError(
1867
+ f"question_weight ({self.question_weight}) + context_weight ({self.context_weight}) "
1868
+ f"must equal 1.0 (got {total_weight}). These weights determine the relative importance "
1869
+ f"of question vs context similarity in the combined score."
1870
+ )
1871
+
1872
+ return self
1873
+
1874
+
1776
1875
  class SearchParametersModel(BaseModel):
1777
1876
  model_config = ConfigDict(use_enum_values=True, extra="forbid")
1778
1877
  num_results: Optional[int] = 10
@@ -28,6 +28,7 @@ from dao_ai.genie.cache.base import (
28
28
  SQLCacheEntry,
29
29
  )
30
30
  from dao_ai.genie.cache.core import execute_sql_via_warehouse
31
+ from dao_ai.genie.cache.in_memory_semantic import InMemorySemanticCacheService
31
32
  from dao_ai.genie.cache.lru import LRUCacheService
32
33
  from dao_ai.genie.cache.semantic import SemanticCacheService
33
34
 
@@ -38,6 +39,7 @@ __all__ = [
38
39
  "SQLCacheEntry",
39
40
  "execute_sql_via_warehouse",
40
41
  # Cache implementations
42
+ "InMemorySemanticCacheService",
41
43
  "LRUCacheService",
42
44
  "SemanticCacheService",
43
45
  ]
@@ -38,7 +38,7 @@ def execute_sql_via_warehouse(
38
38
  w: WorkspaceClient = warehouse.workspace_client
39
39
  warehouse_id: str = str(warehouse.warehouse_id)
40
40
 
41
- logger.trace("Executing cached SQL", layer=layer_name, sql_prefix=sql[:100])
41
+ logger.trace("Executing cached SQL", layer=layer_name, sql=sql[:100])
42
42
 
43
43
  statement_response: StatementResponse = w.statement_execution.execute_statement(
44
44
  statement=sql,