dao-ai 0.1.18__tar.gz → 0.1.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dao_ai-0.1.18 → dao_ai-0.1.19}/PKG-INFO +3 -2
- {dao_ai-0.1.18 → dao_ai-0.1.19}/README.md +2 -1
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/04_genie/README.md +75 -15
- dao_ai-0.1.19/config/examples/04_genie/genie_in_memory_semantic_cache.yaml +148 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/README.md +2 -1
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/examples.md +3 -2
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/key-capabilities.md +69 -5
- {dao_ai-0.1.18 → dao_ai-0.1.19}/pyproject.toml +1 -1
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/config.py +99 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/cache/__init__.py +2 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/cache/core.py +1 -1
- dao_ai-0.1.19/src/dao_ai/genie/cache/in_memory_semantic.py +871 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/cache/lru.py +15 -11
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/cache/semantic.py +52 -18
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/genie.py +28 -3
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_genie.py +8 -9
- dao_ai-0.1.19/tests/dao_ai/test_in_memory_semantic_cache.py +1144 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/.gitignore +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/.python-version +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/CHANGELOG.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/CONTRIBUTING.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/CONTRIBUTORS.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/LICENSE +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/Makefile +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/app.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/01_getting_started/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/01_getting_started/minimal.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/custom_mcp.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/external_mcp.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/filtered_mcp.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/managed_mcp.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/02_mcp/slack_integration.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/03_reranking/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/03_reranking/instruction_aware_reranking.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/03_reranking/vector_search_with_reranking.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/04_genie/genie_basic.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/04_genie/genie_lru_cache.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/04_genie/genie_semantic_cache.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/04_genie/genie_with_conversation_id.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/05_memory/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/05_memory/conversation_summarization.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/05_memory/in_memory_basic.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/05_memory/lakebase_persistence.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/05_memory/postgres_persistence.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/06_on_behalf_of_user/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/06_on_behalf_of_user/obo_basic.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/07_human_in_the_loop/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/07_human_in_the_loop/human_in_the_loop.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/08_guardrails/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/08_guardrails/guardrails_basic.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/09_structured_output/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/09_structured_output/structured_output.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/10_agent_integrations/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/10_agent_integrations/agent_bricks.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/10_agent_integrations/kasal.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/11_prompt_engineering/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/11_prompt_engineering/prompt_optimization.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/11_prompt_engineering/prompt_registry.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/combined_middleware.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/context_management.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/custom_field_validation.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/limit_middleware.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/logging_middleware.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/pii_middleware.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/retry_middleware.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/12_middleware/tool_selector_middleware.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/13_orchestration/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/13_orchestration/supervisor_pattern.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/13_orchestration/swarm_pattern.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/14_basic_tools/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/14_basic_tools/sql_tool_example.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/brick_store.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/deep_research.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/executive_assistant.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/genie_and_genie_mcp.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/genie_vector_search_hybrid.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/hardware_store.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/hardware_store_instructed.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/hardware_store_lakebase.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/hardware_store_swarm.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/quick_serve_restaurant.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/15_complete_applications/reservations_system.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/16_instructed_retriever/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/16_instructed_retriever/full_pipeline.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/config/examples/16_instructed_retriever/instructed_retriever.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/appointments.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/appointments_data.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/brand_rep_demo_data.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/brand_rep_demo_queries.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/brand_rep_demo_tables.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/brand_rep_demo_validation.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/customers.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/customers_data.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/dim_stores.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/dim_stores_data.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/employee_performance.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/employee_performance_data.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/employee_tasks.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/employee_tasks_data.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/inventory.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/inventory_data.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/managers.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/managers_data.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/product_data.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/products.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/dais2025/task_assignments.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/hardware_store/inventory.snappy.parquet +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/hardware_store/inventory.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/hardware_store/products.snappy.parquet +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/hardware_store/products.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/.gitkeep +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/fulfil_item_orders.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/items_description.csv +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/items_description.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/items_raw.csv +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/items_raw.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/orders_raw.csv +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/data/quick_serve_restaurant/orders_raw.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/databricks.yaml.template +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/architecture.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/cli-reference.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/configuration-reference.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/contributing.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/faq.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/hardware_store/README.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/hardware_store/retail_supervisor.png +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/hardware_store/retail_swarm.png +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/images/genie.png +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/python-api.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/quick_serve_restaurant/.gitkeep +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/quick_serve_restaurant/quick-serve-restaurant.png +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/docs/why-dao.md +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/environment.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/dais2025/examples.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/deep_research/examples.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/executive_assistant/examples.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/hardware_store/examples.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/quick_serve_restaurant/.gitkeep +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/examples/quick_serve_restaurant/examples.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/extract_store_numbers.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_inventory_by_sku.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_inventory_by_upc.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_product_by_sku.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_product_by_upc.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_store_by_number.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_store_inventory_by_sku.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/dais2025/find_store_inventory_by_upc.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_inventory_by_sku.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_inventory_by_upc.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_product_by_sku.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_product_by_upc.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_store_inventory_by_sku.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/hardware_store/find_store_inventory_by_upc.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/quick_serve_restaurant/.gitkeep +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/quick_serve_restaurant/insert_coffee_order.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/quick_serve_restaurant/lookup_items_by_descriptions.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/quick_serve_restaurant/match_historical_item_order_by_date.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/functions/quick_serve_restaurant/match_item_by_description_and_price.sql +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/01_ingest_and_transform.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/02_provision_vector_search.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/03_provision_lakebase.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/04_unity_catalog_tools.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/05_deploy_agent.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/06_generate_evaluation_data.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/07_run_evaluation.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/08_run_examples.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/09_evaluate_inferences.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/10_optimize_prompts.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/notebooks/99_scratchpad.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/requirements.txt +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/schemas/bundle_config_schema.json +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/schemas/model_config_schema.json +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/__init__.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/apps/__init__.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/apps/handlers.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/apps/model_serving.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/apps/resources.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/apps/server.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/catalog.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/cli.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/evaluation.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/__init__.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/cache/base.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/genie/core.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/graph.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/hooks/__init__.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/hooks/core.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/logging.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/memory/__init__.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/memory/base.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/memory/core.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/memory/databricks.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/memory/postgres.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/messages.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/__init__.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/assertions.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/base.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/context_editing.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/core.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/guardrails.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/human_in_the_loop.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/message_validation.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/model_call_limit.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/model_retry.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/pii.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/summarization.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/tool_call_limit.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/tool_retry.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/middleware/tool_selector.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/models.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/nodes.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/optimization.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/orchestration/__init__.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/orchestration/core.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/orchestration/supervisor.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/orchestration/swarm.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/prompts/__init__.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/prompts/instructed_retriever_decomposition.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/prompts/instruction_reranker.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/prompts/router.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/prompts/verifier.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/providers/__init__.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/providers/base.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/providers/databricks.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/state.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/__init__.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/agent.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/core.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/email.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/instructed_retriever.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/instruction_reranker.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/mcp.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/memory.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/python.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/router.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/search.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/slack.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/sql.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/time.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/unity_catalog.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/vector_search.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/tools/verifier.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/types.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/utils.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/src/dao_ai/vector_search.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/config/test_model_config.yaml +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/conftest.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_context_editing.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_model_call_limit.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_model_retry.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_pii.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_tool_call_limit.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_tool_retry.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/middleware/test_tool_selector.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_agent_response_format.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_assertions_middleware.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_catalog.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_chat_history.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_config.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_databricks.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_evaluation.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_function_parsing.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_genie_conversation_ids_in_outputs.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_genie_databricks_integration.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_genie_room_model.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_guardrail_retry.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_hitl_config_model.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_hitl_responses_agent.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_hooks.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_human_in_the_loop.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_inference.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_inference_integration.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_input_output_structure.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_instructed_retriever.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_instruction_reranker.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_instruction_reranker_integration.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_interrupt_type.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_llm_interrupt_handling.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_mcp.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_mcp_filtering.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_mcp_filtering_integration.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_mcp_function_model.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_message_validation_middleware.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_messages.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_models.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_optimization.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_postgres_integration.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_prompt_optimizations.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_prompts.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_reranking.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_reranking_integration.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_resources_model_genie_integration.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_response_format.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_responses_agent_structured_output_unit.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_router.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_semantic_cache_context.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_sql_tool.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_sql_tool_integration.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_state.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_summarization_inference.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_swarm_middleware.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_tools.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_types.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_unity_catalog.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_utils.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_utils_type_from_fqn.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_vector_search.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_verifier.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/test_warehouse_model.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/dao_ai/weather_server_mcp.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/hardware_store/.gitkeep +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/hardware_store/test_graph.py +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/images/doritos_upc.png +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/images/lays_upc.png +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/quick_serve_restaurant/.gitkeep +0 -0
- {dao_ai-0.1.18 → dao_ai-0.1.19}/tests/test_mcp_app_auth.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dao-ai
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.19
|
|
4
4
|
Summary: DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML.
|
|
5
5
|
Project-URL: Homepage, https://github.com/natefleming/dao-ai
|
|
6
6
|
Project-URL: Documentation, https://natefleming.github.io/dao-ai
|
|
@@ -409,7 +409,8 @@ The `config/examples/` directory contains ready-to-use configurations organized
|
|
|
409
409
|
|
|
410
410
|
- `01_getting_started/minimal.yaml` - Simplest possible agent
|
|
411
411
|
- `02_tools/vector_search_with_reranking.yaml` - RAG with improved accuracy
|
|
412
|
-
- `04_genie/genie_semantic_cache.yaml` - NL-to-SQL with
|
|
412
|
+
- `04_genie/genie_semantic_cache.yaml` - NL-to-SQL with PostgreSQL semantic caching
|
|
413
|
+
- `04_genie/genie_in_memory_semantic_cache.yaml` - NL-to-SQL with in-memory semantic caching (no database)
|
|
413
414
|
- `05_memory/conversation_summarization.yaml` - Long conversation handling
|
|
414
415
|
- `06_on_behalf_of_user/obo_basic.yaml` - User-level access control
|
|
415
416
|
- `07_human_in_the_loop/human_in_the_loop.yaml` - Approval workflows
|
|
@@ -330,7 +330,8 @@ The `config/examples/` directory contains ready-to-use configurations organized
|
|
|
330
330
|
|
|
331
331
|
- `01_getting_started/minimal.yaml` - Simplest possible agent
|
|
332
332
|
- `02_tools/vector_search_with_reranking.yaml` - RAG with improved accuracy
|
|
333
|
-
- `04_genie/genie_semantic_cache.yaml` - NL-to-SQL with
|
|
333
|
+
- `04_genie/genie_semantic_cache.yaml` - NL-to-SQL with PostgreSQL semantic caching
|
|
334
|
+
- `04_genie/genie_in_memory_semantic_cache.yaml` - NL-to-SQL with in-memory semantic caching (no database)
|
|
334
335
|
- `05_memory/conversation_summarization.yaml` - Long conversation handling
|
|
335
336
|
- `06_on_behalf_of_user/obo_basic.yaml` - User-level access control
|
|
336
337
|
- `07_human_in_the_loop/human_in_the_loop.yaml` - Approval workflows
|
|
@@ -52,10 +52,20 @@ flowchart TB
|
|
|
52
52
|
|
|
53
53
|
| File | Description |
|
|
54
54
|
|------|-------------|
|
|
55
|
-
| [`genie_cached.yaml`](./genie_cached.yaml) | Two-tier caching with LRU and semantic cache |
|
|
55
|
+
| [`genie_cached.yaml`](./genie_cached.yaml) | Two-tier caching with LRU and PostgreSQL semantic cache |
|
|
56
|
+
| [`genie_in_memory_semantic_cache.yaml`](./genie_in_memory_semantic_cache.yaml) | In-memory semantic cache (no database required) |
|
|
56
57
|
|
|
57
58
|
## Cache Tiers
|
|
58
59
|
|
|
60
|
+
DAO provides two L2 semantic cache implementations:
|
|
61
|
+
|
|
62
|
+
| Implementation | Best For | Database Required |
|
|
63
|
+
|----------------|----------|-------------------|
|
|
64
|
+
| **PostgreSQL Semantic Cache** | Production multi-instance deployments, large cache sizes (thousands+), cross-instance sharing | Yes (PostgreSQL with pg_vector) |
|
|
65
|
+
| **In-Memory Semantic Cache** | Single-instance deployments, dev/test, no database access, moderate cache sizes (hundreds to low thousands) | No (in-memory only) |
|
|
66
|
+
|
|
67
|
+
Both use the same L2 distance algorithm and support conversation context awareness for consistent behavior.
|
|
68
|
+
|
|
59
69
|
```mermaid
|
|
60
70
|
%%{init: {'theme': 'base'}}%%
|
|
61
71
|
graph TB
|
|
@@ -70,8 +80,9 @@ graph TB
|
|
|
70
80
|
subgraph L2["🧠 L2: Semantic Cache"]
|
|
71
81
|
SEM1["<b>Type:</b> Similarity match"]
|
|
72
82
|
SEM2["<b>Speed:</b> ~50ms"]
|
|
73
|
-
SEM3["<b>
|
|
74
|
-
SEM4["<b>
|
|
83
|
+
SEM3["<b>Options:</b> PostgreSQL or In-Memory"]
|
|
84
|
+
SEM4["<b>Threshold:</b> 0.85-0.95"]
|
|
85
|
+
SEM5["<b>TTL:</b> ttl: 3600 (1 hour)"]
|
|
75
86
|
end
|
|
76
87
|
end
|
|
77
88
|
|
|
@@ -81,21 +92,56 @@ graph TB
|
|
|
81
92
|
|
|
82
93
|
## Configuration
|
|
83
94
|
|
|
95
|
+
### PostgreSQL Semantic Cache (Multi-Instance)
|
|
96
|
+
|
|
84
97
|
```yaml
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
98
|
+
genie_tool:
|
|
99
|
+
function:
|
|
100
|
+
type: factory
|
|
101
|
+
name: dao_ai.tools.create_genie_tool
|
|
102
|
+
args:
|
|
103
|
+
genie_room: *retail_genie_room
|
|
89
104
|
|
|
90
105
|
# ⚡ L1: LRU Cache - Exact match
|
|
91
|
-
|
|
92
|
-
|
|
106
|
+
lru_cache_parameters:
|
|
107
|
+
warehouse: *warehouse
|
|
108
|
+
capacity: 100
|
|
109
|
+
time_to_live_seconds: 3600
|
|
110
|
+
|
|
111
|
+
# 🧠 L2: PostgreSQL Semantic Cache - Similar queries
|
|
112
|
+
semantic_cache_parameters:
|
|
113
|
+
database: *postgres_db
|
|
114
|
+
warehouse: *warehouse
|
|
115
|
+
embedding_model: *embedding_model
|
|
116
|
+
similarity_threshold: 0.85
|
|
117
|
+
time_to_live_seconds: 3600
|
|
118
|
+
context_window_size: 3
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### In-Memory Semantic Cache (Single-Instance)
|
|
122
|
+
|
|
123
|
+
```yaml
|
|
124
|
+
genie_tool:
|
|
125
|
+
function:
|
|
126
|
+
type: factory
|
|
127
|
+
name: dao_ai.tools.create_genie_tool
|
|
128
|
+
args:
|
|
129
|
+
genie_room: *retail_genie_room
|
|
130
|
+
|
|
131
|
+
# Optional L1: LRU Cache - Exact match
|
|
132
|
+
# lru_cache_parameters:
|
|
133
|
+
# warehouse: *warehouse
|
|
134
|
+
# capacity: 100
|
|
135
|
+
# time_to_live_seconds: 3600
|
|
93
136
|
|
|
94
|
-
# 🧠
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
137
|
+
# 🧠 In-Memory Semantic Cache - No database required
|
|
138
|
+
in_memory_semantic_cache_parameters:
|
|
139
|
+
warehouse: *warehouse
|
|
140
|
+
embedding_model: *embedding_model
|
|
141
|
+
similarity_threshold: 0.85
|
|
142
|
+
time_to_live_seconds: 604800 # 1 week
|
|
143
|
+
capacity: 1000 # LRU eviction when full
|
|
144
|
+
context_window_size: 3
|
|
99
145
|
```
|
|
100
146
|
|
|
101
147
|
## Cache Flow
|
|
@@ -210,8 +256,10 @@ agents:
|
|
|
210
256
|
|
|
211
257
|
## Quick Start
|
|
212
258
|
|
|
259
|
+
### PostgreSQL Semantic Cache
|
|
260
|
+
|
|
213
261
|
```bash
|
|
214
|
-
# Run with
|
|
262
|
+
# Run with PostgreSQL semantic cache
|
|
215
263
|
dao-ai chat -c config/examples/04_genie/genie_cached.yaml
|
|
216
264
|
|
|
217
265
|
# Test caching behavior
|
|
@@ -220,6 +268,18 @@ dao-ai chat -c config/examples/04_genie/genie_cached.yaml
|
|
|
220
268
|
> Show me Q4 revenue # Semantic cache hit (~50ms)
|
|
221
269
|
```
|
|
222
270
|
|
|
271
|
+
### In-Memory Semantic Cache
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
# Run with in-memory semantic cache (no database required)
|
|
275
|
+
dao-ai chat -c config/examples/04_genie/genie_in_memory_semantic_cache.yaml
|
|
276
|
+
|
|
277
|
+
# Test caching behavior
|
|
278
|
+
> What are the total sales for Q4? # First query - Genie hit
|
|
279
|
+
> What are the total sales for Q4? # Semantic cache hit (~50ms)
|
|
280
|
+
> Show me Q4 revenue # Semantic cache hit (~50ms)
|
|
281
|
+
```
|
|
282
|
+
|
|
223
283
|
## Cache Monitoring
|
|
224
284
|
|
|
225
285
|
```bash
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# yaml-language-server: $schema=../../../schemas/model_config_schema.json
|
|
2
|
+
#
|
|
3
|
+
# Example configuration for Genie with in-memory semantic caching:
|
|
4
|
+
# - In-Memory Semantic Cache: Similarity search without external database
|
|
5
|
+
# - Optional LRU Cache (L1): Fast O(1) exact match lookup
|
|
6
|
+
#
|
|
7
|
+
# This configuration is ideal for:
|
|
8
|
+
# - Environments without access to PostgreSQL or Databricks Lakebase
|
|
9
|
+
# - Single-instance deployments (cache not shared across instances)
|
|
10
|
+
# - Moderate cache sizes (hundreds to low thousands of entries)
|
|
11
|
+
# - Cases where cache persistence across restarts is not required
|
|
12
|
+
#
|
|
13
|
+
# Cache flow: Question → LRU (exact match) → In-Memory Semantic (similarity) → Genie API
|
|
14
|
+
# On cache hit, the cached SQL is re-executed against the warehouse for fresh data.
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
schemas:
|
|
18
|
+
|
|
19
|
+
quick_serve_restaurant_schema: &quick_serve_restaurant_schema
|
|
20
|
+
catalog_name: retail_consumer_goods # Unity Catalog name
|
|
21
|
+
schema_name: quick_serve_restaurant # Schema within the catalog
|
|
22
|
+
|
|
23
|
+
resources:
|
|
24
|
+
llms:
|
|
25
|
+
# Primary LLM for general tasks
|
|
26
|
+
default_llm: &default_llm
|
|
27
|
+
name: databricks-claude-sonnet-4
|
|
28
|
+
temperature: 0.1 # Low temperature for consistent responses
|
|
29
|
+
max_tokens: 8192 # Maximum tokens per response
|
|
30
|
+
on_behalf_of_user: False
|
|
31
|
+
|
|
32
|
+
# Embedding model for semantic similarity search
|
|
33
|
+
embedding_model: &embedding_model
|
|
34
|
+
name: databricks-gte-large-en # Text embedding model
|
|
35
|
+
on_behalf_of_user: False
|
|
36
|
+
|
|
37
|
+
warehouses:
|
|
38
|
+
# Warehouse for executing SQL queries (used by semantic cache)
|
|
39
|
+
shared_endpoint_warehouse: &shared_endpoint_warehouse
|
|
40
|
+
name: "Shared Endpoint Warehouse" # Human-readable name
|
|
41
|
+
description: "A warehouse for shared endpoints" # Description
|
|
42
|
+
warehouse_id: 148ccb90800933a1 # Databricks warehouse ID
|
|
43
|
+
on_behalf_of_user: False
|
|
44
|
+
|
|
45
|
+
genie_rooms:
|
|
46
|
+
# Genie space for retail data queries
|
|
47
|
+
retail_genie_room: &retail_genie_room
|
|
48
|
+
name: "Retail AI Genie Room" # Human-readable name
|
|
49
|
+
description: "A room for Genie agents to interact" # Description
|
|
50
|
+
space_id:
|
|
51
|
+
env: RETAIL_AI_GENIE_SPACE_ID
|
|
52
|
+
default_value: 01f01c91f1f414d59daaefd2b7ec82ea
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# =============================================================================
|
|
56
|
+
# MEMORY CONFIGURATION
|
|
57
|
+
# =============================================================================
|
|
58
|
+
# Configure in-memory storage for agent conversations and state persistence
|
|
59
|
+
|
|
60
|
+
memory: &memory
|
|
61
|
+
# Conversation checkpointing for state persistence
|
|
62
|
+
checkpointer:
|
|
63
|
+
name: default_checkpointer # Checkpointer identifier (type inferred as memory - no database)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
tools:
|
|
67
|
+
genie_tool: &genie_tool
|
|
68
|
+
name: genie
|
|
69
|
+
function:
|
|
70
|
+
type: factory # Tool type: factory function
|
|
71
|
+
name: dao_ai.tools.create_genie_tool # Factory function path
|
|
72
|
+
args: # Arguments passed to factory
|
|
73
|
+
name: my_genie_tool
|
|
74
|
+
description: Answers questions about retail products and inventory
|
|
75
|
+
genie_room: *retail_genie_room # Reference to Genie room config
|
|
76
|
+
|
|
77
|
+
# Optional L1 Cache: LRU (Least Recently Used) - Fast exact match
|
|
78
|
+
# Uncomment to enable LRU cache in front of semantic cache
|
|
79
|
+
# lru_cache_parameters:
|
|
80
|
+
# warehouse: *shared_endpoint_warehouse # Warehouse to re-execute cached SQL
|
|
81
|
+
# capacity: 100 # Maximum number of cached entries
|
|
82
|
+
# time_to_live_seconds: 3600 # Cache entries expire after 1 hour
|
|
83
|
+
|
|
84
|
+
# In-Memory Semantic Cache: Similarity-based lookup with LRU eviction (NO database required)
|
|
85
|
+
# Default settings optimized for ~30 users on 8GB machine:
|
|
86
|
+
# - Capacity: 10,000 entries (~200MB, ~330 queries/user)
|
|
87
|
+
# - Eviction: LRU (Least Recently Used) keeps hot queries cached
|
|
88
|
+
# - TTL: 1 week (accommodates weekly work patterns)
|
|
89
|
+
# - Memory: ~4-5% of 8GB system
|
|
90
|
+
in_memory_semantic_cache_parameters:
|
|
91
|
+
warehouse: *shared_endpoint_warehouse # Warehouse used to re-execute cached SQL
|
|
92
|
+
embedding_model: *embedding_model # Reference to embedding model
|
|
93
|
+
# embedding_dims: 1024 # Auto-detected if omitted (recommended)
|
|
94
|
+
similarity_threshold: 0.85 # Minimum similarity for question matching (L2 distance to 0-1)
|
|
95
|
+
context_similarity_threshold: 0.80 # Minimum similarity for context matching
|
|
96
|
+
# time_to_live_seconds: 604800 # Cache entries expire after 1 week (default)
|
|
97
|
+
# capacity: 10000 # Max cache entries, LRU eviction when full (default: 10000, ~200MB)
|
|
98
|
+
# # Adjust for different scenarios:
|
|
99
|
+
# # - Small (5-10 users): capacity: 1000 (~20MB)
|
|
100
|
+
# # - Medium (30 users): capacity: 10000 (~200MB, default)
|
|
101
|
+
# # - Large (100 users): capacity: 30000 (~600MB)
|
|
102
|
+
# # - Unlimited: capacity: null (not recommended - unbounded memory)
|
|
103
|
+
context_window_size: 3 # Number of previous conversation turns to include
|
|
104
|
+
# max_context_tokens: 2000 # Maximum context length (default: 2000)
|
|
105
|
+
# question_weight: 0.6 # Weight for question similarity (default: 0.6)
|
|
106
|
+
# context_weight: 0.4 # Weight for context similarity (default: 0.4)
|
|
107
|
+
# Note: question_weight + context_weight must equal 1.0
|
|
108
|
+
|
|
109
|
+
persist_conversation: true
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
agents:
|
|
113
|
+
genie: &genie
|
|
114
|
+
name: genie # Agent identifier
|
|
115
|
+
description: "Genie Agent with In-Memory Semantic Cache"
|
|
116
|
+
model: *default_llm # Reference to LLM configuration
|
|
117
|
+
tools: # Tools available to this agent
|
|
118
|
+
- *genie_tool
|
|
119
|
+
prompt: | # System prompt defining agent behavior
|
|
120
|
+
Answers questions about retail products and inventory using natural language.
|
|
121
|
+
You have access to a semantic cache that remembers similar questions to provide faster responses.
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
app:
|
|
125
|
+
name: genie_in_memory_semantic_cache_dao # Application name
|
|
126
|
+
description: "Multi-agent system that talks to genie with in-memory semantic caching (no database required)"
|
|
127
|
+
log_level: DEBUG # Logging level for the application
|
|
128
|
+
environment_vars: # Secrets to inject at runtime
|
|
129
|
+
RETAIL_AI_DATABRICKS_CLIENT_ID: "{{secrets/retail_consumer_goods/RETAIL_AI_DATABRICKS_CLIENT_ID}}"
|
|
130
|
+
RETAIL_AI_DATABRICKS_CLIENT_SECRET: "{{secrets/retail_consumer_goods/RETAIL_AI_DATABRICKS_CLIENT_SECRET}}"
|
|
131
|
+
RETAIL_AI_DATABRICKS_HOST: "{{secrets/retail_consumer_goods/RETAIL_AI_DATABRICKS_HOST}}"
|
|
132
|
+
registered_model: # MLflow registered model configuration
|
|
133
|
+
schema: *quick_serve_restaurant_schema # Schema where model will be registered
|
|
134
|
+
name: dao_genie_in_memory_semantic_cache # Model name in MLflow registry
|
|
135
|
+
endpoint_name: dao_genie_in_memory_semantic_cache # Model serving endpoint name
|
|
136
|
+
tags: # Tags for resource organization
|
|
137
|
+
business: rcg # Business unit identifier
|
|
138
|
+
streaming: true # Indicates streaming capabilities
|
|
139
|
+
permissions: # Model serving permissions
|
|
140
|
+
- principals: [users] # Grant access to all users
|
|
141
|
+
entitlements:
|
|
142
|
+
- CAN_QUERY # Query permissions
|
|
143
|
+
agents: # List of agents included in the system
|
|
144
|
+
- *genie # Genie agent with in-memory cache
|
|
145
|
+
orchestration: # Agent orchestration configuration
|
|
146
|
+
memory: *memory # In-memory conversation persistence
|
|
147
|
+
swarm: # Swarm orchestration pattern
|
|
148
|
+
default_agent: *genie # Default agent for routing
|
|
@@ -52,7 +52,8 @@ Or jump directly to the category that matches your current need.
|
|
|
52
52
|
**Natural language to SQL**
|
|
53
53
|
- Basic Genie integration
|
|
54
54
|
- LRU caching for performance
|
|
55
|
-
-
|
|
55
|
+
- PostgreSQL semantic caching with embeddings
|
|
56
|
+
- In-memory semantic caching (no database required)
|
|
56
57
|
|
|
57
58
|
👉 Query data with natural language, optimized with caching
|
|
58
59
|
|
|
@@ -120,9 +120,10 @@ Improve performance and reduce costs through intelligent caching.
|
|
|
120
120
|
| Example | Description |
|
|
121
121
|
|---------|-------------|
|
|
122
122
|
| `genie_lru_cache.yaml` | LRU (Least Recently Used) caching for Genie |
|
|
123
|
-
| `genie_semantic_cache.yaml` | Two-tier semantic caching with embeddings |
|
|
123
|
+
| `genie_semantic_cache.yaml` | Two-tier semantic caching with PostgreSQL embeddings |
|
|
124
|
+
| `genie_in_memory_semantic_cache.yaml` | In-memory semantic caching (no database required) |
|
|
124
125
|
|
|
125
|
-
**Prerequisites:** PostgreSQL or Lakebase for
|
|
126
|
+
**Prerequisites:** PostgreSQL or Lakebase required for `genie_semantic_cache.yaml` only
|
|
126
127
|
**Next:** Add persistence in `05_memory/`
|
|
127
128
|
|
|
128
129
|
---
|
|
@@ -202,7 +202,7 @@ graph TB
|
|
|
202
202
|
l1_cache["L1: LRU Cache (In-Memory)<br/>• Capacity: 1000 entries<br/>• Hash-based lookup<br/>• O(1) exact string match"]
|
|
203
203
|
l1_hit{Hit?}
|
|
204
204
|
|
|
205
|
-
l2_cache["L2: Semantic Cache
|
|
205
|
+
l2_cache["L2: Semantic Cache<br/>• PostgreSQL (pg_vector) OR In-Memory<br/>• Dual embeddings (question + context)<br/>• L2 distance similarity<br/>• Conversation context aware<br/>• Partitioned by Genie space ID"]
|
|
206
206
|
l2_hit{Hit?}
|
|
207
207
|
|
|
208
208
|
genie["Genie API<br/>(Expensive call)<br/>Natural language to SQL"]
|
|
@@ -247,7 +247,11 @@ The **LRU (Least Recently Used) Cache** provides instant lookups for exact quest
|
|
|
247
247
|
|
|
248
248
|
### Semantic Cache (L2)
|
|
249
249
|
|
|
250
|
-
The **Semantic Cache**
|
|
250
|
+
The **Semantic Cache** finds similar questions even when worded differently using vector embeddings and similarity search. It includes **conversation context awareness** to improve matching in multi-turn conversations. DAO provides two implementations:
|
|
251
|
+
|
|
252
|
+
#### PostgreSQL-Based Semantic Cache
|
|
253
|
+
|
|
254
|
+
Uses PostgreSQL with pg_vector for persistent, multi-instance shared caching:
|
|
251
255
|
|
|
252
256
|
| Parameter | Default | Description |
|
|
253
257
|
|-----------|---------|-------------|
|
|
@@ -259,6 +263,62 @@ The **Semantic Cache** uses PostgreSQL with pg_vector to find similar questions
|
|
|
259
263
|
| `table_name` | `genie_semantic_cache` | Table name for cache storage |
|
|
260
264
|
| `context_window_size` | 3 | Number of previous conversation turns to include |
|
|
261
265
|
| `context_similarity_threshold` | 0.80 | Minimum similarity for conversation context |
|
|
266
|
+
| `question_weight` | 0.6 | Weight for question similarity in combined score (0.0-1.0) |
|
|
267
|
+
| `context_weight` | 0.4 | Weight for context similarity (computed as 1 - question_weight if not set) |
|
|
268
|
+
| `embedding_dims` | Auto-detected | Embedding vector dimensions (auto-detected from model if not specified) |
|
|
269
|
+
| `max_context_tokens` | 2000 | Maximum token length for conversation context embeddings |
|
|
270
|
+
|
|
271
|
+
**Best for:** Production deployments with multiple instances, large cache sizes (thousands+), and cross-instance cache sharing
|
|
272
|
+
|
|
273
|
+
#### In-Memory Semantic Cache
|
|
274
|
+
|
|
275
|
+
Uses in-memory storage without external database dependencies:
|
|
276
|
+
|
|
277
|
+
```yaml
|
|
278
|
+
genie_tool:
|
|
279
|
+
function:
|
|
280
|
+
type: factory
|
|
281
|
+
name: dao_ai.tools.create_genie_tool
|
|
282
|
+
args:
|
|
283
|
+
genie_room: *retail_genie_room
|
|
284
|
+
|
|
285
|
+
# In-memory semantic cache (no database required)
|
|
286
|
+
in_memory_semantic_cache_parameters:
|
|
287
|
+
warehouse: *warehouse
|
|
288
|
+
embedding_model: *embedding_model # Default: databricks-gte-large-en
|
|
289
|
+
similarity_threshold: 0.85 # 0.0-1.0 (default: 0.85)
|
|
290
|
+
time_to_live_seconds: 86400 # 1 day (default), use -1 or None for never expire
|
|
291
|
+
capacity: 1000 # Max cache entries (LRU eviction when full)
|
|
292
|
+
context_window_size: 3 # Number of previous conversation turns
|
|
293
|
+
context_similarity_threshold: 0.80 # Minimum context similarity
|
|
294
|
+
question_weight: 0.6 # Weight for question similarity
|
|
295
|
+
context_weight: 0.4 # Weight for context similarity
|
|
296
|
+
embedding_dims: null # Auto-detected from model
|
|
297
|
+
max_context_tokens: 2000 # Max context token length
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
| Parameter | Default | Description |
|
|
301
|
+
|-----------|---------|-------------|
|
|
302
|
+
| `similarity_threshold` | 0.85 | Minimum similarity for cache hit (0.0-1.0) |
|
|
303
|
+
| `time_to_live_seconds` | 86400 | Cache entry lifetime (-1 = never expire) |
|
|
304
|
+
| `embedding_model` | `databricks-gte-large-en` | Model for generating question embeddings |
|
|
305
|
+
| `warehouse` | Required | Databricks warehouse for SQL execution |
|
|
306
|
+
| `capacity` | 1000 | Maximum cache entries (LRU eviction when full) |
|
|
307
|
+
| `context_window_size` | 3 | Number of previous conversation turns to include |
|
|
308
|
+
| `context_similarity_threshold` | 0.80 | Minimum similarity for conversation context |
|
|
309
|
+
| `question_weight` | 0.6 | Weight for question similarity in combined score (0.0-1.0) |
|
|
310
|
+
| `context_weight` | 0.4 | Weight for context similarity (computed as 1 - question_weight if not set) |
|
|
311
|
+
| `embedding_dims` | Auto-detected | Embedding vector dimensions (auto-detected from model if not specified) |
|
|
312
|
+
| `max_context_tokens` | 2000 | Maximum token length for conversation context embeddings |
|
|
313
|
+
|
|
314
|
+
**Best for:** Single-instance deployments, development/testing, scenarios without database access, moderate cache sizes (hundreds to low thousands)
|
|
315
|
+
|
|
316
|
+
**Key Differences:**
|
|
317
|
+
- ✅ **No external database required** - Simpler setup and deployment
|
|
318
|
+
- ✅ **Same L2 distance algorithm** - Consistent behavior with PostgreSQL version
|
|
319
|
+
- ⚠️ **Per-instance cache** - Each replica has its own cache (not shared)
|
|
320
|
+
- ⚠️ **No persistence** - Cache is lost on restart
|
|
321
|
+
- ⚠️ **Memory-bound** - Limited by available RAM; use capacity limits
|
|
262
322
|
|
|
263
323
|
**Best for:** Catching rephrased questions like:
|
|
264
324
|
- "What's our inventory status?" ≈ "Show me stock levels"
|
|
@@ -271,6 +331,12 @@ The semantic cache tracks conversation history to resolve ambiguous references:
|
|
|
271
331
|
|
|
272
332
|
This works by embedding both the current question *and* recent conversation turns, then computing a weighted similarity score. This dramatically improves cache hits in multi-turn conversations where users naturally use pronouns and references.
|
|
273
333
|
|
|
334
|
+
**Weight Configuration:**
|
|
335
|
+
The `question_weight` and `context_weight` parameters control how question vs conversation context similarity are combined into the final score:
|
|
336
|
+
- Both weights must sum to 1.0 (if only one is provided, the other is computed automatically)
|
|
337
|
+
- Higher `question_weight` prioritizes matching the exact question wording
|
|
338
|
+
- Higher `context_weight` prioritizes matching the conversation context, useful for multi-turn conversations with pronouns and references
|
|
339
|
+
|
|
274
340
|
### Cache Behavior
|
|
275
341
|
|
|
276
342
|
1. **SQL Caching, Not Results**: The cache stores the *generated SQL query*, not the query results. On a cache hit, the SQL is re-executed against your warehouse, ensuring **data freshness**.
|
|
@@ -283,12 +349,10 @@ This works by embedding both the current question *and* recent conversation turn
|
|
|
283
349
|
- Genie generates fresh SQL
|
|
284
350
|
- The new SQL is cached
|
|
285
351
|
|
|
286
|
-
4. **Multi-Instance Aware**: Each LRU cache is per-instance (in Model Serving, each replica has its own). The semantic cache is shared across all instances
|
|
352
|
+
4. **Multi-Instance Aware**: Each LRU cache is per-instance (in Model Serving, each replica has its own). The PostgreSQL semantic cache is shared across all instances. The in-memory semantic cache is per-instance (not shared).
|
|
287
353
|
|
|
288
354
|
5. **Space ID Partitioning**: Cache entries are isolated per Genie space, preventing cross-space cache pollution.
|
|
289
355
|
|
|
290
|
-
For more details on semantic cache configuration, see [docs/semantic_cache_weight_configuration.md](semantic_cache_weight_configuration.md).
|
|
291
|
-
|
|
292
356
|
## 5. Vector Search Reranking
|
|
293
357
|
|
|
294
358
|
**The problem:** Vector search (semantic similarity) is fast but sometimes returns loosely related results. It's like a librarian who quickly grabs 50 books that *might* be relevant.
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "dao-ai"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.19"
|
|
8
8
|
description = "DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -1773,6 +1773,105 @@ class GenieSemanticCacheParametersModel(BaseModel):
|
|
|
1773
1773
|
return self
|
|
1774
1774
|
|
|
1775
1775
|
|
|
1776
|
+
# Memory estimation for capacity planning:
|
|
1777
|
+
# - Each entry: ~20KB (8KB question embedding + 8KB context embedding + 4KB strings/overhead)
|
|
1778
|
+
# - 1,000 entries: ~20MB (0.4% of 8GB)
|
|
1779
|
+
# - 5,000 entries: ~100MB (2% of 8GB)
|
|
1780
|
+
# - 10,000 entries: ~200MB (4-5% of 8GB) - default for ~30 users
|
|
1781
|
+
# - 20,000 entries: ~400MB (8-10% of 8GB)
|
|
1782
|
+
# Default 10,000 entries provides ~330 queries per user for 30 users.
|
|
1783
|
+
class GenieInMemorySemanticCacheParametersModel(BaseModel):
|
|
1784
|
+
"""
|
|
1785
|
+
Configuration for in-memory semantic cache (no database required).
|
|
1786
|
+
|
|
1787
|
+
This cache stores embeddings and cache entries entirely in memory, providing
|
|
1788
|
+
semantic similarity matching without requiring external database dependencies
|
|
1789
|
+
like PostgreSQL or Databricks Lakebase.
|
|
1790
|
+
|
|
1791
|
+
Default settings are tuned for ~30 users on an 8GB machine:
|
|
1792
|
+
- Capacity: 10,000 entries (~200MB memory, ~330 queries per user)
|
|
1793
|
+
- Eviction: LRU (Least Recently Used) - keeps frequently accessed queries
|
|
1794
|
+
- TTL: 1 week (accommodates weekly work patterns and batch jobs)
|
|
1795
|
+
- Memory overhead: ~4-5% of 8GB system
|
|
1796
|
+
|
|
1797
|
+
The LRU eviction strategy ensures hot queries stay cached while cold queries
|
|
1798
|
+
are evicted, providing better hit rates than FIFO eviction.
|
|
1799
|
+
|
|
1800
|
+
For larger deployments or memory-constrained environments, adjust capacity and TTL accordingly.
|
|
1801
|
+
|
|
1802
|
+
Use this when:
|
|
1803
|
+
- No external database access is available
|
|
1804
|
+
- Single-instance deployments (cache not shared across instances)
|
|
1805
|
+
- Cache persistence across restarts is not required
|
|
1806
|
+
- Cache sizes are moderate (hundreds to low thousands of entries)
|
|
1807
|
+
|
|
1808
|
+
For multi-instance deployments or large cache sizes, use GenieSemanticCacheParametersModel
|
|
1809
|
+
with PostgreSQL backend instead.
|
|
1810
|
+
"""
|
|
1811
|
+
|
|
1812
|
+
model_config = ConfigDict(use_enum_values=True, extra="forbid")
|
|
1813
|
+
time_to_live_seconds: int | None = (
|
|
1814
|
+
60 * 60 * 24 * 7
|
|
1815
|
+
) # 1 week default (604800 seconds), None or negative = never expires
|
|
1816
|
+
similarity_threshold: float = 0.85 # Minimum similarity for question matching (L2 distance converted to 0-1 scale)
|
|
1817
|
+
context_similarity_threshold: float = 0.80 # Minimum similarity for context matching (L2 distance converted to 0-1 scale)
|
|
1818
|
+
question_weight: Optional[float] = (
|
|
1819
|
+
0.6 # Weight for question similarity in combined score (0-1). If not provided, computed as 1 - context_weight
|
|
1820
|
+
)
|
|
1821
|
+
context_weight: Optional[float] = (
|
|
1822
|
+
None # Weight for context similarity in combined score (0-1). If not provided, computed as 1 - question_weight
|
|
1823
|
+
)
|
|
1824
|
+
embedding_model: str | LLMModel = "databricks-gte-large-en"
|
|
1825
|
+
embedding_dims: int | None = None # Auto-detected if None
|
|
1826
|
+
warehouse: WarehouseModel
|
|
1827
|
+
capacity: int | None = (
|
|
1828
|
+
10000 # Maximum cache entries. ~200MB for 10000 entries (1024-dim embeddings). LRU eviction when full. None = unlimited (not recommended for production).
|
|
1829
|
+
)
|
|
1830
|
+
context_window_size: int = 3 # Number of previous turns to include for context
|
|
1831
|
+
max_context_tokens: int = (
|
|
1832
|
+
2000 # Maximum context length to prevent extremely long embeddings
|
|
1833
|
+
)
|
|
1834
|
+
|
|
1835
|
+
@model_validator(mode="after")
|
|
1836
|
+
def compute_and_validate_weights(self) -> Self:
|
|
1837
|
+
"""
|
|
1838
|
+
Compute missing weight and validate that question_weight + context_weight = 1.0.
|
|
1839
|
+
|
|
1840
|
+
Either question_weight or context_weight (or both) can be provided.
|
|
1841
|
+
The missing one will be computed as 1.0 - provided_weight.
|
|
1842
|
+
If both are provided, they must sum to 1.0.
|
|
1843
|
+
"""
|
|
1844
|
+
if self.question_weight is None and self.context_weight is None:
|
|
1845
|
+
# Both missing - use defaults
|
|
1846
|
+
self.question_weight = 0.6
|
|
1847
|
+
self.context_weight = 0.4
|
|
1848
|
+
elif self.question_weight is None:
|
|
1849
|
+
# Compute question_weight from context_weight
|
|
1850
|
+
if not (0.0 <= self.context_weight <= 1.0):
|
|
1851
|
+
raise ValueError(
|
|
1852
|
+
f"context_weight must be between 0.0 and 1.0, got {self.context_weight}"
|
|
1853
|
+
)
|
|
1854
|
+
self.question_weight = 1.0 - self.context_weight
|
|
1855
|
+
elif self.context_weight is None:
|
|
1856
|
+
# Compute context_weight from question_weight
|
|
1857
|
+
if not (0.0 <= self.question_weight <= 1.0):
|
|
1858
|
+
raise ValueError(
|
|
1859
|
+
f"question_weight must be between 0.0 and 1.0, got {self.question_weight}"
|
|
1860
|
+
)
|
|
1861
|
+
self.context_weight = 1.0 - self.question_weight
|
|
1862
|
+
else:
|
|
1863
|
+
# Both provided - validate they sum to 1.0
|
|
1864
|
+
total_weight = self.question_weight + self.context_weight
|
|
1865
|
+
if not abs(total_weight - 1.0) < 0.0001: # Allow small floating point error
|
|
1866
|
+
raise ValueError(
|
|
1867
|
+
f"question_weight ({self.question_weight}) + context_weight ({self.context_weight}) "
|
|
1868
|
+
f"must equal 1.0 (got {total_weight}). These weights determine the relative importance "
|
|
1869
|
+
f"of question vs context similarity in the combined score."
|
|
1870
|
+
)
|
|
1871
|
+
|
|
1872
|
+
return self
|
|
1873
|
+
|
|
1874
|
+
|
|
1776
1875
|
class SearchParametersModel(BaseModel):
|
|
1777
1876
|
model_config = ConfigDict(use_enum_values=True, extra="forbid")
|
|
1778
1877
|
num_results: Optional[int] = 10
|
|
@@ -28,6 +28,7 @@ from dao_ai.genie.cache.base import (
|
|
|
28
28
|
SQLCacheEntry,
|
|
29
29
|
)
|
|
30
30
|
from dao_ai.genie.cache.core import execute_sql_via_warehouse
|
|
31
|
+
from dao_ai.genie.cache.in_memory_semantic import InMemorySemanticCacheService
|
|
31
32
|
from dao_ai.genie.cache.lru import LRUCacheService
|
|
32
33
|
from dao_ai.genie.cache.semantic import SemanticCacheService
|
|
33
34
|
|
|
@@ -38,6 +39,7 @@ __all__ = [
|
|
|
38
39
|
"SQLCacheEntry",
|
|
39
40
|
"execute_sql_via_warehouse",
|
|
40
41
|
# Cache implementations
|
|
42
|
+
"InMemorySemanticCacheService",
|
|
41
43
|
"LRUCacheService",
|
|
42
44
|
"SemanticCacheService",
|
|
43
45
|
]
|
|
@@ -38,7 +38,7 @@ def execute_sql_via_warehouse(
|
|
|
38
38
|
w: WorkspaceClient = warehouse.workspace_client
|
|
39
39
|
warehouse_id: str = str(warehouse.warehouse_id)
|
|
40
40
|
|
|
41
|
-
logger.trace("Executing cached SQL", layer=layer_name,
|
|
41
|
+
logger.trace("Executing cached SQL", layer=layer_name, sql=sql[:100])
|
|
42
42
|
|
|
43
43
|
statement_response: StatementResponse = w.statement_execution.execute_statement(
|
|
44
44
|
statement=sql,
|