dao-ai 0.0.36__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. dao_ai/__init__.py +29 -0
  2. dao_ai/cli.py +195 -30
  3. dao_ai/config.py +770 -244
  4. dao_ai/genie/__init__.py +1 -22
  5. dao_ai/genie/cache/__init__.py +1 -2
  6. dao_ai/genie/cache/base.py +20 -70
  7. dao_ai/genie/cache/core.py +75 -0
  8. dao_ai/genie/cache/lru.py +44 -21
  9. dao_ai/genie/cache/semantic.py +390 -109
  10. dao_ai/genie/core.py +35 -0
  11. dao_ai/graph.py +27 -253
  12. dao_ai/hooks/__init__.py +9 -6
  13. dao_ai/hooks/core.py +22 -190
  14. dao_ai/memory/__init__.py +10 -0
  15. dao_ai/memory/core.py +23 -5
  16. dao_ai/memory/databricks.py +389 -0
  17. dao_ai/memory/postgres.py +2 -2
  18. dao_ai/messages.py +6 -4
  19. dao_ai/middleware/__init__.py +125 -0
  20. dao_ai/middleware/assertions.py +778 -0
  21. dao_ai/middleware/base.py +50 -0
  22. dao_ai/middleware/core.py +61 -0
  23. dao_ai/middleware/guardrails.py +415 -0
  24. dao_ai/middleware/human_in_the_loop.py +228 -0
  25. dao_ai/middleware/message_validation.py +554 -0
  26. dao_ai/middleware/summarization.py +192 -0
  27. dao_ai/models.py +1177 -108
  28. dao_ai/nodes.py +118 -161
  29. dao_ai/optimization.py +664 -0
  30. dao_ai/orchestration/__init__.py +52 -0
  31. dao_ai/orchestration/core.py +287 -0
  32. dao_ai/orchestration/supervisor.py +264 -0
  33. dao_ai/orchestration/swarm.py +226 -0
  34. dao_ai/prompts.py +126 -29
  35. dao_ai/providers/databricks.py +126 -381
  36. dao_ai/state.py +139 -21
  37. dao_ai/tools/__init__.py +8 -5
  38. dao_ai/tools/core.py +57 -4
  39. dao_ai/tools/email.py +280 -0
  40. dao_ai/tools/genie.py +47 -24
  41. dao_ai/tools/mcp.py +4 -3
  42. dao_ai/tools/memory.py +50 -0
  43. dao_ai/tools/python.py +4 -12
  44. dao_ai/tools/search.py +14 -0
  45. dao_ai/tools/slack.py +1 -1
  46. dao_ai/tools/unity_catalog.py +8 -6
  47. dao_ai/tools/vector_search.py +16 -9
  48. dao_ai/utils.py +72 -8
  49. dao_ai-0.1.1.dist-info/METADATA +1878 -0
  50. dao_ai-0.1.1.dist-info/RECORD +62 -0
  51. dao_ai/chat_models.py +0 -204
  52. dao_ai/guardrails.py +0 -112
  53. dao_ai/tools/genie/__init__.py +0 -236
  54. dao_ai/tools/human_in_the_loop.py +0 -100
  55. dao_ai-0.0.36.dist-info/METADATA +0 -951
  56. dao_ai-0.0.36.dist-info/RECORD +0 -47
  57. {dao_ai-0.0.36.dist-info → dao_ai-0.1.1.dist-info}/WHEEL +0 -0
  58. {dao_ai-0.0.36.dist-info → dao_ai-0.1.1.dist-info}/entry_points.txt +0 -0
  59. {dao_ai-0.0.36.dist-info → dao_ai-0.1.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1878 @@
1
+ Metadata-Version: 2.4
2
+ Name: dao-ai
3
+ Version: 0.1.1
4
+ Summary: DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML.
5
+ Project-URL: Homepage, https://github.com/natefleming/dao-ai
6
+ Project-URL: Documentation, https://natefleming.github.io/dao-ai
7
+ Project-URL: Repository, https://github.com/natefleming/dao-ai
8
+ Project-URL: Issues, https://github.com/natefleming/dao-ai/issues
9
+ Project-URL: Changelog, https://github.com/natefleming/dao-ai/blob/main/CHANGELOG.md
10
+ Author-email: Nate Fleming <nate.fleming@databricks.com>, Nate Fleming <nate.fleming@gmail.com>
11
+ Maintainer-email: Nate Fleming <nate.fleming@databricks.com>
12
+ License: MIT
13
+ License-File: LICENSE
14
+ Keywords: agents,ai,databricks,langchain,langgraph,llm,multi-agent,orchestration,vector-search,workflow
15
+ Classifier: Development Status :: 3 - Alpha
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: License :: OSI Approved :: MIT License
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Python :: 3.13
24
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
26
+ Classifier: Topic :: System :: Distributed Computing
27
+ Requires-Python: >=3.11
28
+ Requires-Dist: databricks-agents>=1.9.0
29
+ Requires-Dist: databricks-langchain[memory]>=0.12.1
30
+ Requires-Dist: databricks-mcp>=0.5.0
31
+ Requires-Dist: databricks-sdk[openai]>=0.76.0
32
+ Requires-Dist: ddgs>=9.10.0
33
+ Requires-Dist: dspy>=2.6.27
34
+ Requires-Dist: flashrank>=0.2.10
35
+ Requires-Dist: gepa>=0.0.22
36
+ Requires-Dist: grandalf>=0.8
37
+ Requires-Dist: langchain-community>=0.3.26
38
+ Requires-Dist: langchain-mcp-adapters>=0.2.1
39
+ Requires-Dist: langchain-tavily>=0.2.15
40
+ Requires-Dist: langchain>=1.2.0
41
+ Requires-Dist: langgraph-checkpoint-postgres>=3.0.2
42
+ Requires-Dist: langgraph>=1.0.5
43
+ Requires-Dist: langmem>=0.0.30
44
+ Requires-Dist: loguru>=0.7.3
45
+ Requires-Dist: mcp>=1.24.0
46
+ Requires-Dist: mlflow>=3.8.1
47
+ Requires-Dist: nest-asyncio>=1.6.0
48
+ Requires-Dist: openevals>=0.1.3
49
+ Requires-Dist: openpyxl>=3.1.5
50
+ Requires-Dist: psycopg[binary,pool]>=3.3.2
51
+ Requires-Dist: pydantic>=2.12.5
52
+ Requires-Dist: python-dotenv>=1.2.1
53
+ Requires-Dist: pyyaml>=6.0.2
54
+ Requires-Dist: rich>=14.2.0
55
+ Requires-Dist: scipy>=1.14.0
56
+ Requires-Dist: sqlparse>=0.5.4
57
+ Requires-Dist: tomli>=2.3.0
58
+ Requires-Dist: unitycatalog-ai[databricks]>=0.3.2
59
+ Provides-Extra: databricks
60
+ Requires-Dist: databricks-connect>=16.0.0; extra == 'databricks'
61
+ Requires-Dist: databricks-vectorsearch>=0.63; extra == 'databricks'
62
+ Requires-Dist: pyspark>=3.5.0; extra == 'databricks'
63
+ Provides-Extra: dev
64
+ Requires-Dist: mypy>=1.19.1; extra == 'dev'
65
+ Requires-Dist: pre-commit>=4.5.1; extra == 'dev'
66
+ Requires-Dist: pytest>=9.0.2; extra == 'dev'
67
+ Requires-Dist: ruff>=0.14.9; extra == 'dev'
68
+ Provides-Extra: docs
69
+ Requires-Dist: mkdocs-material>=9.7.1; extra == 'docs'
70
+ Requires-Dist: mkdocs>=1.6.1; extra == 'docs'
71
+ Requires-Dist: mkdocstrings[python]>=1.0.0; extra == 'docs'
72
+ Provides-Extra: test
73
+ Requires-Dist: pytest-cov>=7.0.0; extra == 'test'
74
+ Requires-Dist: pytest-mock>=3.15.1; extra == 'test'
75
+ Requires-Dist: pytest>=9.0.2; extra == 'test'
76
+ Description-Content-Type: text/markdown
77
+
78
+ # DAO: Declarative Agent Orchestration
79
+
80
+ [![Version](https://img.shields.io/badge/version-0.1.0-blue.svg)](CHANGELOG.md)
81
+ [![Python](https://img.shields.io/badge/python-3.11+-green.svg)](https://www.python.org/)
82
+ [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
83
+
84
+ **Production-grade AI agents defined in YAML, powered by LangGraph, deployed on Databricks.**
85
+
86
+ DAO is an **infrastructure-as-code framework** for building, deploying, and managing multi-agent AI systems. Instead of writing boilerplate Python code to wire up agents, tools, and orchestration, you define everything declaratively in YAML configuration files.
87
+
88
+ ```yaml
89
+ # Define an agent in 10 lines of YAML
90
+ agents:
91
+ product_expert:
92
+ name: product_expert
93
+ model: *claude_sonnet
94
+ tools:
95
+ - *vector_search_tool
96
+ - *genie_tool
97
+ prompt: |
98
+ You are a product expert. Answer questions about inventory and pricing.
99
+ ```
100
+
101
+ ### 🎨 Visual Configuration Studio
102
+
103
+ Prefer a visual interface? Check out **[DAO AI Builder](https://github.com/natefleming/dao-ai-builder)** — a React-based web application that provides a graphical interface for creating and editing DAO configurations. Perfect for:
104
+
105
+ - **Exploring** DAO's capabilities through an intuitive UI
106
+ - **Learning** the configuration structure with guided forms
107
+ - **Building** agents visually without writing YAML manually
108
+ - **Importing** and editing existing configurations
109
+
110
+ DAO AI Builder generates valid YAML configurations that work seamlessly with this framework. Use whichever workflow suits you best — visual builder or direct YAML editing.
111
+
112
+ <p align="center">
113
+ <img src="https://raw.githubusercontent.com/natefleming/dao-ai-builder/6ca07d2b977d9509b75edfb2e0f45681c840a931/docs/images/dao-ai-builder-screenshot.png" width="700" alt="DAO AI Builder Screenshot">
114
+ </p>
115
+
116
+ ---
117
+
118
+ ## 📚 Table of Contents
119
+
120
+ - [Why DAO?](#why-dao)
121
+ - [For Newcomers to AI Agents](#for-newcomers-to-ai-agents)
122
+ - [Comparing Databricks AI Agent Platforms](#comparing-databricks-ai-agent-platforms)
123
+ - [When to Use DAO](#when-to-use-dao)
124
+ - [When to Use Agent Bricks](#when-to-use-agent-bricks)
125
+ - [When to Use Kasal](#when-to-use-kasal)
126
+ - [Using All Three Together](#using-all-three-together)
127
+ - [Architecture](#architecture)
128
+ - [Orchestration Patterns](#orchestration-patterns)
129
+ - [Key Capabilities](#key-capabilities)
130
+ - [1. Multi-Tool Support](#1-multi-tool-support)
131
+ - [2. On-Behalf-Of User Support](#2-on-behalf-of-user-support)
132
+ - [3. Advanced Caching (Genie Queries)](#3-advanced-caching-genie-queries)
133
+ - [4. Vector Search Reranking](#4-vector-search-reranking)
134
+ - [5. Human-in-the-Loop Approvals](#5-human-in-the-loop-approvals)
135
+ - [6. Memory & State Persistence](#6-memory--state-persistence)
136
+ - [7. MLflow Prompt Registry Integration](#7-mlflow-prompt-registry-integration)
137
+ - [8. Automated Prompt Optimization](#8-automated-prompt-optimization)
138
+ - [9. Guardrails & Response Quality Middleware](#9-guardrails--response-quality-middleware)
139
+ - [10. Conversation Summarization](#10-conversation-summarization)
140
+ - [11. Structured Output (Response Format)](#11-structured-output-response-format)
141
+ - [12. Custom Input & Custom Output Support](#12-custom-input--custom-output-support)
142
+ - [13. Hook System](#13-hook-system)
143
+ - [Quick Start](#quick-start)
144
+ - [Prerequisites](#prerequisites)
145
+ - [Installation](#installation)
146
+ - [Your First Agent](#your-first-agent)
147
+ - [Configuration Reference](#configuration-reference)
148
+ - [Example Configurations](#example-configurations)
149
+ - [CLI Reference](#cli-reference)
150
+ - [Python API](#python-api)
151
+ - [Project Structure](#project-structure)
152
+ - [Common Questions](#common-questions)
153
+ - [Contributing](#contributing)
154
+ - [License](#license)
155
+
156
+ ---
157
+
158
+ ## Why DAO?
159
+
160
+ ### For Newcomers to AI Agents
161
+
162
+ **What is an AI Agent?**
163
+ Think of an AI agent as an intelligent assistant that can actually *do things*, not just chat. Here's the difference:
164
+
165
+ - **Chatbot**: "The temperature in San Francisco is 65°F" (just talks)
166
+ - **AI Agent**: Checks weather APIs, searches your calendar, books a restaurant, and sends you a reminder (takes action)
167
+
168
+ An AI agent can:
169
+ - **Reason** about what steps are needed to accomplish a goal
170
+ - **Use tools** like databases, APIs, and search engines to gather information
171
+ - **Make decisions** about which actions to take next
172
+ - **Coordinate** with other specialized agents to handle complex requests
173
+
174
+ **Real-world example:**
175
+ *"Find products that are low on stock and email the warehouse manager"*
176
+
177
+ - A chatbot would say: *"You should check inventory and contact the warehouse manager"*
178
+ - An AI agent would: Query the database, identify low-stock items, compose an email with the list, and send it
179
+
180
+ **What is Databricks?**
181
+ Databricks is a cloud platform where companies store and analyze their data. Think of it as a combination of:
182
+ - **Data warehouse** (where your business data lives)
183
+ - **AI/ML platform** (where you train and deploy models)
184
+ - **Governance layer** (controlling who can access what data)
185
+
186
+ Databricks provides several tools that DAO integrates with:
187
+ - **Unity Catalog**: Your organization's data catalog with security and permissions
188
+ - **Model Serving**: Turns AI models into APIs that applications can call
189
+ - **Vector Search**: Finds relevant information using semantic similarity (understanding meaning, not just keywords)
190
+ - **Genie**: Lets people ask questions in plain English and automatically generates SQL queries
191
+ - **MLflow**: Tracks experiments, versions models, and manages deployments
192
+
193
+ **Why DAO?**
194
+ DAO brings all these Databricks capabilities together into a unified framework for building AI agent systems. Instead of writing hundreds of lines of Python code to connect everything, you describe what you want in a YAML configuration file, and DAO handles the wiring for you.
195
+
196
+ **Think of it as:**
197
+ - **Traditional approach**: Building with LEGO bricks one by one (writing Python code)
198
+ - **DAO approach**: Using a blueprint that tells you exactly how to assemble the pieces (YAML configuration)
199
+
200
+ ---
201
+
202
+ ## Comparing Databricks AI Agent Platforms
203
+
204
+ Databricks offers **three complementary approaches** to building AI agents. Each is powerful and purpose-built for different use cases, teams, and workflows.
205
+
206
+ | Aspect | **DAO** (This Framework) | **Databricks Agent Bricks** | **Kasal** |
207
+ |--------|--------------------------|----------------------------|----------------------|
208
+ | **Interface** | YAML configuration files | Visual GUI (AI Playground) | Visual workflow designer (drag-and-drop canvas) |
209
+ | **Workflow** | Code-first, Git-native | UI-driven, wizard-based | Visual flowchart design with real-time monitoring |
210
+ | **Target Users** | ML Engineers, Platform Teams, DevOps | Data Analysts, Citizen Developers, Business Users | Business analysts, workflow designers, operations teams |
211
+ | **Learning Curve** | Moderate (requires YAML/config knowledge) | Low (guided wizards and templates) | Low (visual drag-and-drop, no coding required) |
212
+ | **Underlying Engine** | LangGraph (state graph orchestration) | Databricks-managed agent runtime | CrewAI (role-based agent collaboration) |
213
+ | **Orchestration** | Multi-agent patterns (Supervisor, Swarm) | Multi-agent Supervisor | CrewAI sequential/hierarchical processes |
214
+ | **Agent Philosophy** | State-driven workflows with graph execution | Automated optimization and template-based | Role-based agents with defined tasks and goals |
215
+ | **Tool Support** | Python, Factory, UC Functions, MCP, Agent Endpoints, Genie | UC Functions, MCP, Genie, Agent Endpoints | Genie, Custom APIs, UC Functions, Data connectors |
216
+ | **Advanced Caching** | LRU + Semantic caching (Genie SQL caching) | Standard platform caching | Standard platform caching |
217
+ | **Memory/State** | PostgreSQL, Lakebase, In-Memory, Custom backends | Built-in ephemeral state per conversation | Built-in conversation state (entity memory with limitations) |
218
+ | **Middleware/Hooks** | Assert/Suggest/Refine, Custom lifecycle hooks, Guardrails | None (optimization via automated tuning) | None (workflow-level control via UI) |
219
+ | **Deployment** | Databricks Asset Bundles, MLflow, CI/CD pipelines | One-click deployment to Model Serving | Databricks Marketplace or deploy from source |
220
+ | **Version Control** | Full Git integration, code review, branches | Workspace-based (not Git-native) | Source-based (Git available if deployed from source) |
221
+ | **Customization** | Unlimited (Python code, custom tools) | Template-based workflows | Workflow-level customization via visual designer |
222
+ | **Configuration** | Declarative YAML, infrastructure-as-code | Visual configuration in UI | Visual workflow canvas with property panels |
223
+ | **Monitoring** | MLflow tracking, custom logging | Built-in evaluation dashboard | Real-time execution tracking with detailed logs |
224
+ | **Evaluation** | Custom evaluation frameworks | Automated benchmarking and optimization | Visual execution traces and performance insights |
225
+ | **Best For** | Production multi-agent systems with complex requirements | Rapid prototyping and automated optimization | Visual workflow design and operational monitoring |
226
+
227
+ ### When to Use DAO
228
+
229
+ ✅ **Code-first workflow** — You prefer infrastructure-as-code with full Git integration, code reviews, and CI/CD pipelines
230
+ ✅ **Advanced caching** — You need LRU + semantic caching for Genie queries to optimize costs at scale
231
+ ✅ **Custom middleware** — You require assertion/validation logic, custom lifecycle hooks, or human-in-the-loop workflows
232
+ ✅ **Custom tools** — You're building proprietary Python tools or integrating with internal systems beyond standard integrations
233
+ ✅ **Swarm orchestration** — You need peer-to-peer agent handoffs (not just top-down supervisor routing)
234
+ ✅ **Stateful memory** — You require persistent conversation state in PostgreSQL, Lakebase, or custom backends
235
+ ✅ **Configuration reuse** — You want to maintain YAML templates, share them across teams, and version them in Git
236
+ ✅ **Regulated environments** — You need deterministic, auditable, and reproducible configurations for compliance
237
+ ✅ **Complex state management** — Your workflows require sophisticated state graphs with conditional branching and loops
238
+
239
+ ### When to Use Agent Bricks
240
+
241
+ ✅ **Rapid prototyping** — You want to build and test an agent in minutes using guided wizards
242
+ ✅ **No-code/low-code** — You prefer GUI-based configuration over writing YAML or designing workflows
243
+ ✅ **Automated optimization** — You want the platform to automatically tune prompts, models, and benchmarks for you
244
+ ✅ **Business user access** — Non-technical stakeholders (analysts, product managers) need to build or modify agents
245
+ ✅ **Getting started** — You're new to AI agents and want pre-built templates (Information Extraction, Knowledge Assistant, Custom LLM)
246
+ ✅ **Standard use cases** — Your needs are met by UC Functions, MCP servers, Genie, and agent endpoints
247
+ ✅ **Multi-agent supervisor** — You need top-down orchestration with a supervisor routing to specialists
248
+ ✅ **Quality optimization** — You want automated benchmarking and continuous improvement based on feedback
249
+
250
+ ### When to Use Kasal
251
+
252
+ ✅ **Visual workflow design** — You want to see and design agent interactions as a flowchart diagram
253
+ ✅ **Operational monitoring** — You need real-time visibility into agent execution with detailed logs and traces
254
+ ✅ **Role-based agents** — Your use case fits the CrewAI model of agents with specific roles, goals, and tasks
255
+ ✅ **Business process automation** — You're automating workflows where agents collaborate sequentially or hierarchically
256
+ ✅ **Data analysis pipelines** — You need agents to query, analyze, and visualize data with clear execution paths
257
+ ✅ **Content generation workflows** — Your agents collaborate on research, writing, and content creation tasks
258
+ ✅ **Team visibility** — Operations teams need to monitor and understand what agents are doing in real-time
259
+ ✅ **Quick deployment** — You want to deploy from Databricks Marketplace with minimal setup
260
+ ✅ **Drag-and-drop simplicity** — You prefer designing workflows visually rather than writing configuration files
261
+
262
+ ### Using All Three Together
263
+
264
+ Many teams use **multiple approaches** in their workflow, playing to each platform's strengths:
265
+
266
+ #### Progressive Sophistication Path
267
+
268
+ 1. **Design in Kasal** → Visually prototype workflows and validate agent collaboration patterns
269
+ 2. **Optimize in Agent Bricks** → Take validated use cases and let Agent Bricks auto-tune them
270
+ 3. **Productionize in DAO** → For complex systems needing advanced features, rebuild in DAO with full control
271
+
272
+ #### Hybrid Architecture Patterns
273
+
274
+ **Pattern 1: Division by Audience**
275
+ - **Kasal**: Operations teams design and monitor customer support workflows
276
+ - **Agent Bricks**: Data analysts create optimized information extraction agents
277
+ - **DAO**: ML engineers build the underlying orchestration layer with custom tools
278
+
279
+ **Pattern 2: Composition via Endpoints**
280
+ - **Agent Bricks**: Creates a Knowledge Assistant for HR policies (optimized automatically)
281
+ - **Kasal**: Designs a visual workflow for employee onboarding that calls the HR agent
282
+ - **DAO**: Orchestrates enterprise-wide employee support with custom payroll tools, approval workflows, and the agents from both platforms
283
+
284
+ **Pattern 3: Development Lifecycle**
285
+ - **Week 1**: Rapid prototype in Agent Bricks to validate business value
286
+ - **Week 2**: Redesign workflow visually in Kasal for team review and monitoring
287
+ - **Week 3**: Productionize in DAO with advanced caching, middleware, and CI/CD
288
+
289
+ #### Real-World Example: Customer Support System
290
+
291
+ ```
292
+ ┌─────────────────────────────────────────────────────────┐
293
+ │ DAO (Orchestration Layer) │
294
+ │ • Advanced caching for FAQ queries │
295
+ │ • Custom middleware for compliance checking │
296
+ │ • Swarm pattern for complex issue routing │
297
+ └─────────────┬───────────────────────────┬───────────────┘
298
+ │ │
299
+ ▼ ▼
300
+ ┌──────────────────────────┐ ┌──────────────────────────┐
301
+ │ Agent Bricks Agents │ │ Kasal Workflows │
302
+ │ • Product Info Agent │ │ • Escalation Process │
303
+ │ (auto-optimized) │ │ • Order Tracking Flow │
304
+ │ • Returns Policy Agent │ │ (visual monitoring) │
305
+ │ (benchmarked) │ │ │
306
+ └──────────────────────────┘ └──────────────────────────┘
307
+ ```
308
+
309
+ #### Interoperability
310
+
311
+ All three platforms can call each other via **agent endpoints**:
312
+ - Deploy any agent to Databricks Model Serving
313
+ - Reference it as a tool using the `factory` tool type with `create_agent_endpoint_tool`
314
+ - Compose complex systems across platform boundaries
315
+
316
+ **Example:**
317
+ ```yaml
318
+ # In DAO configuration
319
+ resources:
320
+ llms:
321
+ external_agent: &external_agent
322
+ name: agent-bricks-hr-assistant # Agent Bricks endpoint name
323
+
324
+ tools:
325
+ hr_assistant:
326
+ function:
327
+ type: factory
328
+ name: dao_ai.tools.create_agent_endpoint_tool
329
+ args:
330
+ llm: *external_agent
331
+ name: hr_assistant
332
+ description: "HR assistant built in Agent Bricks"
333
+
334
+ workflow_monitor:
335
+ function:
336
+ type: factory
337
+ name: dao_ai.tools.create_agent_endpoint_tool
338
+ args:
339
+ llm:
340
+ name: kasal-workflow-monitor # Kasal endpoint name
341
+ name: workflow_monitor
342
+ description: "Workflow monitor built in Kasal"
343
+ ```
344
+
345
+ ---
346
+
347
+ ## Architecture
348
+
349
+ ### How It Works (Simple Explanation)
350
+
351
+ Think of DAO as a three-layer cake:
352
+
353
+ **1. Your Configuration (Top Layer)** 🎂
354
+ You write a YAML file describing what you want: which AI models, what data to access, what tools agents can use.
355
+
356
+ **2. DAO Framework (Middle Layer)** 🔧
357
+ DAO reads your YAML and automatically wires everything together using LangGraph (a workflow engine for AI agents).
358
+
359
+ **3. Databricks Platform (Bottom Layer)** ☁️
360
+ Your deployed agent runs on Databricks, accessing Unity Catalog data, calling AI models, and using other Databricks services.
361
+
362
+ ### Technical Architecture Diagram
363
+
364
+ For developers and architects, here's the detailed view:
365
+
366
+ ```
367
+ ┌─────────────────────────────────────────────────────────────────────────────┐
368
+ │ YAML Configuration │
369
+ │ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────┐ ┌─────────────────┐ │
370
+ │ │ Schemas │ │ Resources│ │ Tools │ │ Agents │ │ Orchestration │ │
371
+ │ └─────────┘ └─────────┘ └─────────┘ └─────────┘ └─────────────────┘ │
372
+ └─────────────────────────────────────────────────────────────────────────────┘
373
+
374
+
375
+ ┌─────────────────────────────────────────────────────────────────────────────┐
376
+ │ DAO Framework (Python) │
377
+ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────────┐ │
378
+ │ │ Config │ │ Graph │ │ Nodes │ │ Tool Factory │ │
379
+ │ │ Loader │ │ Builder │ │ Factory │ │ │ │
380
+ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────────┘ │
381
+ └─────────────────────────────────────────────────────────────────────────────┘
382
+
383
+
384
+ ┌─────────────────────────────────────────────────────────────────────────────┐
385
+ │ LangGraph Runtime │
386
+ │ ┌─────────────────────────────────────────────────────────────────────┐ │
387
+ │ │ Compiled State Graph │ │
388
+ │ │ ┌─────────┐ ┌─────────────┐ ┌─────────────────────────┐ │ │
389
+ │ │ │ Message │───▶│ Supervisor/ │───▶│ Specialized Agents │ │ │
390
+ │ │ │ Hook │ │ Swarm │ │ (Product, Orders, DIY) │ │ │
391
+ │ │ └─────────┘ └─────────────┘ └─────────────────────────┘ │ │
392
+ │ └─────────────────────────────────────────────────────────────────────┘ │
393
+ └─────────────────────────────────────────────────────────────────────────────┘
394
+
395
+
396
+ ┌─────────────────────────────────────────────────────────────────────────────┐
397
+ │ Databricks Platform │
398
+ │ ┌─────────┐ ┌─────────────┐ ┌─────────────┐ ┌──────────┐ ┌─────────┐ │
399
+ │ │ Model │ │ Unity │ │ Vector │ │ Genie │ │ MLflow │ │
400
+ │ │ Serving │ │ Catalog │ │ Search │ │ Spaces │ │ │ │
401
+ │ └─────────┘ └─────────────┘ └─────────────┘ └──────────┘ └─────────┘ │
402
+ └─────────────────────────────────────────────────────────────────────────────┘
403
+ ```
404
+
405
+ ### Orchestration Patterns
406
+
407
+ When you have multiple specialized agents, you need to decide how they work together. DAO supports two patterns:
408
+
409
+ **Think of it like a company:**
410
+ - **Supervisor Pattern** = Traditional hierarchy (manager assigns tasks to specialists)
411
+ - **Swarm Pattern** = Collaborative team (specialists hand off work to each other)
412
+
413
+ DAO supports both approaches for multi-agent coordination:
414
+
415
+ #### 1. Supervisor Pattern
416
+
417
+ **Best for:** Clear separation of responsibilities with centralized control
418
+
419
+ A central "supervisor" agent reads each user request and decides which specialist agent should handle it. Think of it like a call center manager routing calls to different departments.
420
+
421
+ **Example use case:** Hardware store assistant
422
+ - User asks about product availability → Routes to **Product Agent**
423
+ - User asks about order status → Routes to **Orders Agent**
424
+ - User asks for DIY advice → Routes to **DIY Agent**
425
+
426
+ **Configuration:**
427
+
428
+ ```yaml
429
+ orchestration:
430
+ supervisor:
431
+ model: *router_llm
432
+ prompt: |
433
+ Route queries to the appropriate specialist agent based on the content.
434
+ ```
435
+
436
+ ```
437
+ ┌─────────────┐
438
+ │ Supervisor │
439
+ └──────┬──────┘
440
+ ┌───────────────┼───────────────┐
441
+ ▼ ▼ ▼
442
+ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐
443
+ │ Product │ │ Orders │ │ DIY │
444
+ │ Agent │ │ Agent │ │ Agent │
445
+ └─────────────┘ └─────────────┘ └─────────────┘
446
+ ```
447
+
448
+ #### 2. Swarm Pattern
449
+
450
+ **Best for:** Complex, multi-step workflows where agents need to collaborate
451
+
452
+ Agents work more autonomously and can directly hand off tasks to each other. Think of it like a team of specialists who know when to involve their colleagues.
453
+
454
+ **Example use case:** Complex customer inquiry
455
+ 1. User: *"I need a drill for a home project, do we have any in stock, and can you suggest how to use it?"*
456
+ 2. **Product Agent** checks inventory → Finds drill in stock → Hands off to **DIY Agent**
457
+ 3. **DIY Agent** provides usage instructions → Done
458
+
459
+ No central supervisor needed — agents decide collaboratively.
460
+
461
+ **Configuration:**
462
+
463
+ ```yaml
464
+ orchestration:
465
+ swarm:
466
+ model: *default_llm
467
+ default_agent: *general_agent # Where to start
468
+ handoffs:
469
+ product_agent: [orders_agent, diy_agent] # Product agent can hand off to these
470
+ orders_agent: [product_agent] # Orders agent can hand off to Product
471
+ ```
472
+
473
+ ```
474
+ ┌─────────────┐ handoff ┌─────────────┐
475
+ │ Product │◄───────────────▶│ Orders │
476
+ │ Agent │ │ Agent │
477
+ └──────┬──────┘ └──────┬──────┘
478
+ │ handoff │
479
+ └──────────────┬────────────────┘
480
+
481
+ ┌─────────────┐
482
+ │ DIY │
483
+ │ Agent │
484
+ └─────────────┘
485
+ ```
486
+
487
+ ---
488
+
489
+ ## Key Capabilities
490
+
491
+ These are the powerful features that make DAO production-ready. Don't worry if some seem complex — you can start simple and add these capabilities as you need them.
492
+
493
+ ### 1. Multi-Tool Support
494
+
495
+ **What are tools?** Tools are actions an agent can perform — like querying a database, calling an API, or running custom code.
496
+
497
+ DAO supports four types of tools, each suited for different use cases:
498
+
499
+ | Tool Type | Use Case | Example |
500
+ |-----------|----------|---------|
501
+ | **Python** | Custom business logic | `dao_ai.tools.current_time_tool` |
502
+ | **Factory** | Complex initialization with config | `create_vector_search_tool(retriever=...)`, `create_agent_endpoint_tool(llm=...)` |
503
+ | **Unity Catalog** | Governed SQL functions | `catalog.schema.find_product_by_sku` |
504
+ | **MCP** | External services via Model Context Protocol | GitHub, Slack, custom APIs |
505
+
506
+ ```yaml
507
+ tools:
508
+ # Python function - direct import
509
+ time_tool:
510
+ function:
511
+ type: python
512
+ name: dao_ai.tools.current_time_tool
513
+
514
+ # Factory - initialized with config
515
+ search_tool:
516
+ function:
517
+ type: factory
518
+ name: dao_ai.tools.create_vector_search_tool
519
+ args:
520
+ retriever: *products_retriever
521
+
522
+ # Unity Catalog - governed SQL function
523
+ sku_lookup:
524
+ function:
525
+ type: unity_catalog
526
+ name: find_product_by_sku
527
+ schema: *retail_schema
528
+
529
+ # MCP - external service integration
530
+ github_mcp:
531
+ function:
532
+ type: mcp
533
+ transport: streamable_http
534
+ connection: *github_connection
535
+ ```
536
+
537
+ ### 2. On-Behalf-Of User Support
538
+
539
+ **What is this?** Many Databricks resources (like SQL warehouses, Genie spaces, and LLMs) can operate "on behalf of" the end user, using their permissions instead of the agent's service account credentials.
540
+
541
+ **Why this matters:**
542
+ - **Security**: Users can only access data they're authorized to see
543
+ - **Compliance**: Audit logs show the actual user who made the request, not a service account
544
+ - **Governance**: Unity Catalog permissions are enforced at the user level
545
+ - **Flexibility**: No need to grant broad permissions to a service account
546
+
547
+ **How it works:** When `on_behalf_of_user: true` is set, the resource inherits the calling user's identity and permissions from the API request.
548
+
549
+ **Supported resources:**
550
+ ```yaml
551
+ resources:
552
+ # LLMs - use caller's permissions for model access
553
+ llms:
554
+ claude: &claude
555
+ name: databricks-claude-3-7-sonnet
556
+ on_behalf_of_user: true # Inherits caller's model access
557
+
558
+ # Warehouses - execute SQL as the calling user
559
+ warehouses:
560
+ analytics: &analytics_warehouse
561
+ warehouse_id: abc123def456
562
+ on_behalf_of_user: true # Queries run with user's data permissions
563
+
564
+ # Genie - natural language queries with user's context
565
+ genie_rooms:
566
+ sales_genie: &sales_genie
567
+ space_id: xyz789
568
+ on_behalf_of_user: true # Genie uses caller's data access
569
+ ```
570
+
571
+ **Real-world example:**
572
+ Your agent helps employees query HR data. With `on_behalf_of_user: true`:
573
+ - Managers can see their team's salary data
574
+ - Individual contributors can only see their own data
575
+ - HR admins can see all data
576
+
577
+ The same agent code enforces different permissions for each user automatically.
578
+
579
+ **Important notes:**
580
+ - The calling application must pass the user's identity in the API request
581
+ - The user must have the necessary permissions on the underlying resources
582
+ - Not all Databricks resources support on-behalf-of functionality
583
+
584
+ ### 3. Advanced Caching (Genie Queries)
585
+
586
+ **Why caching matters:** When users ask similar questions repeatedly, you don't want to pay for the same AI processing over and over. Caching stores results so you can reuse them.
587
+
588
+ **What makes DAO's caching special:** Instead of just storing old answers (which become stale), DAO stores the **SQL query** that Genie generated. When a similar question comes in, DAO re-runs the SQL to get **fresh data** without calling the expensive Genie API again.
589
+
590
+ **💰 Cost savings:** If users frequently ask "What's our inventory?", the first query costs $X (Genie API call). Subsequent similar queries cost only pennies (just running SQL).
591
+
592
+ DAO provides **two-tier caching** for Genie natural language queries, dramatically reducing costs and latency:
593
+
594
+ ```yaml
595
+ genie_tool:
596
+ function:
597
+ type: factory
598
+ name: dao_ai.tools.create_genie_tool
599
+ args:
600
+ genie_room: *retail_genie_room
601
+
602
+ # L1: Fast O(1) exact match lookup
603
+ lru_cache_parameters:
604
+ warehouse: *warehouse
605
+ capacity: 1000 # Max cached queries (default: 1000)
606
+ time_to_live_seconds: 86400 # 1 day (default), use -1 or None for never expire
607
+
608
+ # L2: Semantic similarity search via pg_vector
609
+ semantic_cache_parameters:
610
+ database: *postgres_db
611
+ warehouse: *warehouse
612
+ embedding_model: *embedding_model # Default: databricks-gte-large-en
613
+ similarity_threshold: 0.85 # 0.0-1.0 (default: 0.85), higher = stricter
614
+ time_to_live_seconds: 86400 # 1 day (default), use -1 or None for never expire
615
+ table_name: genie_semantic_cache # Optional, default: genie_semantic_cache
616
+ ```
617
+
618
+ #### Cache Architecture
619
+
620
+ ```
621
+ ┌─────────────────────────────────────────────────────────────────────────────┐
622
+ │ Two-Tier Cache Flow │
623
+ ├─────────────────────────────────────────────────────────────────────────────┤
624
+ │ │
625
+ │ Question: "What products are low on stock?" │
626
+ │ │ │
627
+ │ ▼ │
628
+ │ ┌──────────────────────────────────────┐ │
629
+ │ │ L1: LRU Cache (In-Memory) │ ◄── O(1) exact string match │
630
+ │ │ • Capacity: 1000 entries │ Fastest lookup │
631
+ │ │ • Hash-based lookup │ │
632
+ │ └──────────────────────────────────────┘ │
633
+ │ │ Miss │
634
+ │ ▼ │
635
+ │ ┌──────────────────────────────────────┐ │
636
+ │ │ L2: Semantic Cache (PostgreSQL) │ ◄── Vector similarity search │
637
+ │ │ • pg_vector embeddings │ Catches rephrased questions │
638
+ │ │ • Conversation context aware │ Handles pronouns/references │
639
+ │ │ • L2 distance similarity │ │
640
+ │ │ • Partitioned by Genie space ID │ │
641
+ │ └──────────────────────────────────────┘ │
642
+ │ │ Miss │
643
+ │ ▼ │
644
+ │ ┌──────────────────────────────────────┐ │
645
+ │ │ Genie API │ ◄── Natural language to SQL │
646
+ │ │ (Expensive call) │ │
647
+ │ └──────────────────────────────────────┘ │
648
+ │ │ │
649
+ │ ▼ │
650
+ │ ┌──────────────────────────────────────┐ │
651
+ │ │ Execute SQL via Warehouse │ ◄── Always fresh data! │
652
+ │ └──────────────────────────────────────┘ │
653
+ │ │
654
+ └─────────────────────────────────────────────────────────────────────────────┘
655
+ ```
656
+
657
+ #### LRU Cache (L1)
658
+
659
+ The **LRU (Least Recently Used) Cache** provides instant lookups for exact question matches:
660
+
661
+ | Parameter | Default | Description |
662
+ |-----------|---------|-------------|
663
+ | `capacity` | 1000 | Maximum number of cached queries |
664
+ | `time_to_live_seconds` | 86400 | Cache entry lifetime (-1 = never expire) |
665
+ | `warehouse` | Required | Databricks warehouse for SQL execution |
666
+
667
+ **Best for:** Repeated exact queries, chatbot interactions, dashboard refreshes
668
+
669
+ #### Semantic Cache (L2)
670
+
671
+ The **Semantic Cache** uses PostgreSQL with pg_vector to find similar questions even when worded differently. It includes **conversation context awareness** to improve matching in multi-turn conversations:
672
+
673
+ | Parameter | Default | Description |
674
+ |-----------|---------|-------------|
675
+ | `similarity_threshold` | 0.85 | Minimum similarity for cache hit (0.0-1.0) |
676
+ | `time_to_live_seconds` | 86400 | Cache entry lifetime (-1 = never expire) |
677
+ | `embedding_model` | `databricks-gte-large-en` | Model for generating question embeddings |
678
+ | `database` | Required | PostgreSQL with pg_vector extension |
679
+ | `warehouse` | Required | Databricks warehouse for SQL execution |
680
+ | `table_name` | `genie_semantic_cache` | Table name for cache storage |
681
+ | `context_window_size` | 3 | Number of previous conversation turns to include |
682
+ | `context_similarity_threshold` | 0.80 | Minimum similarity for conversation context |
683
+
684
+ **Best for:** Catching rephrased questions like:
685
+ - "What's our inventory status?" ≈ "Show me stock levels"
686
+ - "Top selling products this month" ≈ "Best sellers in December"
687
+
688
+ **Conversation Context Awareness:**
689
+ The semantic cache tracks conversation history to resolve ambiguous references:
690
+ - **User:** "Show me products with low stock"
691
+ - **User:** "What about *them* in the warehouse?" ← Uses context to understand "them" = low stock products
692
+
693
+ This works by embedding both the current question *and* recent conversation turns, then computing a weighted similarity score. This dramatically improves cache hits in multi-turn conversations where users naturally use pronouns and references.
694
+
695
+ #### Cache Behavior
696
+
697
+ 1. **SQL Caching, Not Results**: The cache stores the *generated SQL query*, not the query results. On a cache hit, the SQL is re-executed against your warehouse, ensuring **data freshness**.
698
+
699
+ 2. **Conversation-Aware Matching**: The semantic cache uses a rolling window of recent conversation turns to provide context for similarity matching. This helps resolve pronouns and references like "them", "that", or "the same products" by considering what was discussed previously.
700
+
701
+ 3. **Refresh on Hit**: When a semantic cache entry is found but expired:
702
+ - The expired entry is deleted
703
+ - A cache miss is returned
704
+ - Genie generates fresh SQL
705
+ - The new SQL is cached
706
+
707
+ 4. **Multi-Instance Aware**: Each LRU cache is per-instance (in Model Serving, each replica has its own). The semantic cache is shared across all instances via PostgreSQL.
708
+
709
+ 5. **Space ID Partitioning**: Cache entries are isolated per Genie space, preventing cross-space cache pollution.
710
+
711
+ ### 4. Vector Search Reranking
712
+
713
+ **The problem:** Vector search (semantic similarity) is fast but sometimes returns loosely related results. It's like a librarian who quickly grabs 50 books that *might* be relevant.
714
+
715
+ **The solution:** Reranking is like having an expert review those 50 books and pick the best 5 that *actually* answer your question.
716
+
717
+ **Benefits:**
718
+ - ✅ More accurate search results
719
+ - ✅ Better user experience (relevant answers)
720
+ - ✅ No external API calls (runs locally with FlashRank)
721
+
722
+ DAO supports **two-stage retrieval** with FlashRank reranking to improve search relevance without external API calls:
723
+
724
+ ```yaml
725
+ retrievers:
726
+ products_retriever: &products_retriever
727
+ vector_store: *products_vector_store
728
+ columns: [product_id, name, description, price]
729
+ search_parameters:
730
+ num_results: 50 # Retrieve more candidates
731
+ query_type: ANN
732
+ rerank:
733
+ model: ms-marco-MiniLM-L-12-v2 # Local cross-encoder model
734
+ top_n: 5 # Return top 5 after reranking
735
+ ```
736
+
737
+ #### How It Works
738
+
739
+ ```
740
+ ┌─────────────────────────────────────────────────────────────────────────────┐
741
+ │ Two-Stage Retrieval Flow │
742
+ ├─────────────────────────────────────────────────────────────────────────────┤
743
+ │ │
744
+ │ Query: "heavy duty outdoor extension cord" │
745
+ │ │ │
746
+ │ ▼ │
747
+ │ ┌──────────────────────────────────────┐ │
748
+ │ │ Stage 1: Vector Similarity Search │ ◄── Fast, approximate matching │
749
+ │ │ • Returns 50 candidates │ Uses embedding similarity │
750
+ │ │ • Milliseconds latency │ │
751
+ │ └──────────────────────────────────────┘ │
752
+ │ │ │
753
+ │ ▼ 50 documents │
754
+ │ ┌──────────────────────────────────────┐ │
755
+ │ │ Stage 2: Cross-Encoder Rerank │ ◄── Precise relevance scoring │
756
+ │ │ • FlashRank (local, no API) │ Query-document interaction │
757
+ │ │ • Returns top 5 most relevant │ │
758
+ │ └──────────────────────────────────────┘ │
759
+ │ │ │
760
+ │ ▼ 5 documents (reordered by relevance) │
761
+ │ │
762
+ └─────────────────────────────────────────────────────────────────────────────┘
763
+ ```
764
+
765
+ #### Why Reranking?
766
+
767
+ | Approach | Pros | Cons |
768
+ |----------|------|------|
769
+ | **Vector Search Only** | Fast, scalable | Embedding similarity ≠ relevance |
770
+ | **Reranking** | More accurate relevance | Slightly higher latency |
771
+ | **Both (Two-Stage)** | Best of both worlds | Optimal quality/speed tradeoff |
772
+
773
+ Vector embeddings capture semantic similarity but may rank loosely related documents highly. Cross-encoder reranking evaluates query-document pairs directly, dramatically improving result quality for the final user.
774
+
775
+ #### Available Models
776
+
777
+ | Model | Speed | Quality | Use Case |
778
+ |-------|-------|---------|----------|
779
+ | `ms-marco-TinyBERT-L-2-v2` | ⚡⚡⚡ Fastest | Good | High-throughput, latency-sensitive |
780
+ | `ms-marco-MiniLM-L-6-v2` | ⚡⚡ Fast | Better | Balanced performance |
781
+ | `ms-marco-MiniLM-L-12-v2` | ⚡ Moderate | Best | Default, recommended |
782
+ | `rank-T5-flan` | Slower | Excellent | Maximum accuracy |
783
+
784
+ #### Configuration Options
785
+
786
+ ```yaml
787
+ rerank:
788
+ model: ms-marco-MiniLM-L-12-v2 # FlashRank model name
789
+ top_n: 10 # Documents to return (default: all)
790
+ cache_dir: /tmp/flashrank_cache # Model weights cache location
791
+ columns: [description, name] # Columns for Databricks Reranker (optional)
792
+ ```
793
+
794
+ **Note:** Model weights are downloaded automatically on first use (~20MB for MiniLM-L-12-v2).
795
+
796
+ ### 5. Human-in-the-Loop Approvals
797
+
798
+ **Why this matters:** Some actions are too important to automate completely. For example, you might want human approval before an agent:
799
+ - Deletes data
800
+ - Sends external communications
801
+ - Places large orders
802
+ - Modifies production systems
803
+
804
+ **How it works:** Add a simple configuration to any tool, and the agent will pause and ask for human approval before executing it.
805
+
806
+ Add approval gates to sensitive tool calls without changing tool code:
807
+
808
+ ```yaml
809
+ tools:
810
+ dangerous_operation:
811
+ function:
812
+ type: python
813
+ name: my_package.dangerous_function
814
+ human_in_the_loop:
815
+ review_prompt: "This operation will modify production data. Approve?"
816
+ ```
817
+
818
+ ### 6. Memory & State Persistence
819
+
820
+ **What is memory?** Your agent needs to remember past conversations. When a user asks "What about size XL?" the agent should remember they were talking about shirts.
821
+
822
+ **Memory backend options:**
823
+ 1. **In-Memory**: Fast but temporary (resets when agent restarts). Good for testing and development.
824
+ 2. **PostgreSQL**: Persistent relational storage (survives restarts). Good for production systems requiring conversation history and user preferences.
825
+ 3. **Lakebase**: Databricks-native persistence layer built on Delta Lake. Good for production deployments that want to stay within the Databricks ecosystem.
826
+
827
+ **Why Lakebase?**
828
+ - **Native Databricks integration** - No external database required
829
+ - **Built on Delta Lake** - ACID transactions, time travel, scalability
830
+ - **Unified governance** - Same Unity Catalog permissions as your data
831
+ - **Cost-effective** - Uses existing Databricks storage and compute
832
+
833
+ Configure conversation memory with in-memory, PostgreSQL, or Lakebase backends:
834
+
835
+ ```yaml
836
+ memory:
837
+ # Option 1: PostgreSQL (external database)
838
+ checkpointer:
839
+ name: conversation_checkpointer
840
+ type: postgres
841
+ database: *postgres_db
842
+
843
+ store:
844
+ name: user_preferences_store
845
+ type: postgres
846
+ database: *postgres_db
847
+ embedding_model: *embedding_model
848
+
849
+ # Option 2: Lakebase (Databricks-native)
850
+ memory:
851
+ checkpointer:
852
+ name: conversation_checkpointer
853
+ type: lakebase
854
+ schema: *my_schema # Unity Catalog schema
855
+ table_name: agent_checkpoints # Delta table for conversation state
856
+
857
+ store:
858
+ name: user_preferences_store
859
+ type: lakebase
860
+ schema: *my_schema
861
+ table_name: agent_store # Delta table for key-value storage
862
+ embedding_model: *embedding_model
863
+ ```
864
+
865
+ **Choosing a backend:**
866
+ - **In-Memory**: Development and testing only
867
+ - **PostgreSQL**: When you need external database features or already have PostgreSQL infrastructure
868
+ - **Lakebase**: When you want Databricks-native persistence with Unity Catalog governance
869
+
870
+ ### 7. MLflow Prompt Registry Integration
871
+
872
+ **The problem:** Prompts (instructions you give to AI models) need constant refinement. Hardcoding them in YAML means every change requires redeployment.
873
+
874
+ **The solution:** Store prompts in MLflow's Prompt Registry. Now prompt engineers can:
875
+ - Update prompts without touching code
876
+ - Version prompts (v1, v2, v3...)
877
+ - A/B test different prompts
878
+ - Roll back to previous versions if needed
879
+
880
+ **Real-world example:**
881
+ Your marketing team wants to make the agent's tone more friendly. With the prompt registry, they update it in MLflow, and the agent uses the new prompt immediately — no code deployment required.
882
+
883
+ Store and version prompts externally, enabling prompt engineers to iterate without code changes:
884
+
885
+ ```yaml
886
+ prompts:
887
+ product_expert_prompt:
888
+ schema: *retail_schema
889
+ name: product_expert_prompt
890
+ alias: production # or version: 3
891
+ default_template: |
892
+ You are a product expert...
893
+ tags:
894
+ team: retail
895
+ environment: production
896
+
897
+ agents:
898
+ product_expert:
899
+ prompt: *product_expert_prompt # Loaded from MLflow registry
900
+ ```
901
+
902
+ ### 8. Automated Prompt Optimization
903
+
904
+ **What is this?** Instead of manually tweaking prompts through trial and error, DAO can automatically test variations and find the best one.
905
+
906
+ **How it works:** Using GEPA (Generative Evolution of Prompts and Agents):
907
+ 1. You provide a training dataset with example questions
908
+ 2. DAO generates multiple prompt variations
909
+ 3. Each variation is tested against your examples
910
+ 4. The best-performing prompt is selected
911
+
912
+ **Think of it like:** A/B testing for AI prompts, but automated.
913
+
914
+ Use GEPA (Generative Evolution of Prompts and Agents) to automatically improve prompts:
915
+
916
+ ```yaml
917
+ optimizations:
918
+ prompt_optimizations:
919
+ optimize_diy_prompt:
920
+ prompt: *diy_prompt
921
+ agent: *diy_agent
922
+ dataset: *training_dataset
923
+ reflection_model: "openai:/gpt-4"
924
+ num_candidates: 5
925
+ ```
926
+
927
+ ### 9. Guardrails & Response Quality Middleware
928
+
929
+ **What are guardrails?** Safety and quality controls that validate agent responses before they reach users. Think of them as quality assurance checkpoints.
930
+
931
+ **Why this matters:** AI models can sometimes generate responses that are:
932
+ - Inappropriate or unsafe
933
+ - Too long or too short
934
+ - Missing required information (like citations)
935
+ - In the wrong format or tone
936
+ - Off-topic or irrelevant
937
+ - Containing sensitive keywords that should be blocked
938
+
939
+ DAO provides two complementary middleware systems for response quality control:
940
+
941
+ ---
942
+
943
+ #### A. Guardrail Middleware (Content Safety & Quality)
944
+
945
+ **GuardrailMiddleware** uses LLM-as-judge to evaluate responses against custom criteria, with automatic retry and improvement loops.
946
+
947
+ **Use cases:**
948
+ - Professional tone validation
949
+ - Completeness checks (did the agent fully answer the question?)
950
+ - Accuracy verification
951
+ - Brand voice consistency
952
+ - Custom business rules
953
+
954
+ **How it works:**
955
+ 1. Agent generates a response
956
+ 2. LLM judge evaluates against your criteria (prompt-based)
957
+ 3. If fails: Provides feedback and asks agent to try again
958
+ 4. If passes: Response goes to user
959
+ 5. After max retries: Falls back or raises error
960
+
961
+ ```yaml
962
+ agents:
963
+ customer_service_agent:
964
+ model: *default_llm
965
+ guardrails:
966
+ # Professional tone check
967
+ - name: professional_tone
968
+ model: *judge_llm
969
+ prompt: *professional_tone_prompt # From MLflow Prompt Registry
970
+ num_retries: 3
971
+
972
+ # Completeness validation
973
+ - name: completeness_check
974
+ model: *judge_llm
975
+ prompt: |
976
+ Does the response fully address the user's question?
977
+ Score 1 if yes, 0 if no. Explain your reasoning.
978
+ num_retries: 2
979
+ ```
980
+
981
+ **Additional guardrail types:**
982
+
983
+ ```yaml
984
+ # Content Filter - Deterministic keyword blocking
985
+ guardrails:
986
+ - name: sensitive_content_filter
987
+ type: content_filter
988
+ blocked_keywords:
989
+ - password
990
+ - credit_card
991
+ - ssn
992
+ case_sensitive: false
993
+ on_failure: fallback
994
+ fallback_message: "I cannot provide that information."
995
+
996
+ # Safety Guardrail - Model-based safety evaluation
997
+ guardrails:
998
+ - name: safety_check
999
+ type: safety
1000
+ model: *safety_model
1001
+ categories:
1002
+ - violence
1003
+ - hate_speech
1004
+ - self_harm
1005
+ threshold: 0.7 # Sensitivity threshold
1006
+ num_retries: 1
1007
+ ```
1008
+
1009
+ **Real-world example:**
1010
+ Your customer service agent must maintain a professional tone and never discuss competitor products:
1011
+
1012
+ ```yaml
1013
+ agents:
1014
+ support_agent:
1015
+ guardrails:
1016
+ - name: professional_tone
1017
+ model: *judge_llm
1018
+ prompt: *professional_tone_prompt
1019
+ num_retries: 3
1020
+
1021
+ - name: no_competitors
1022
+ type: content_filter
1023
+ blocked_keywords: [competitor_a, competitor_b, competitor_c]
1024
+ on_failure: fallback
1025
+ fallback_message: "I can only discuss our own products and services."
1026
+ ```
1027
+
1028
+ ---
1029
+
1030
+ #### B. DSPy-Style Assertion Middleware (Programmatic Validation)
1031
+
1032
+ **Assertion middleware** provides programmatic, code-based validation inspired by DSPy's assertion mechanisms. Best for deterministic checks and custom logic.
1033
+
1034
+ | Middleware | Behavior | Use Case |
1035
+ |------------|----------|----------|
1036
+ | **AssertMiddleware** | Hard constraint - retries until satisfied or fails | Required output formats, mandatory citations, length constraints |
1037
+ | **SuggestMiddleware** | Soft constraint - logs feedback, optional single retry | Style preferences, quality suggestions, optional improvements |
1038
+ | **RefineMiddleware** | Iterative improvement - generates N attempts, selects best | Optimizing response quality, A/B testing variations |
1039
+
1040
+ ```yaml
1041
+ # Configure via middleware in agents
1042
+ agents:
1043
+ research_agent:
1044
+ middleware:
1045
+ # Hard constraint: Must include citations
1046
+ - type: assert
1047
+ constraint: has_citations
1048
+ max_retries: 3
1049
+ on_failure: fallback
1050
+ fallback_message: "Unable to provide cited response."
1051
+
1052
+ # Soft suggestion: Prefer concise responses
1053
+ - type: suggest
1054
+ constraint: length_under_500
1055
+ allow_one_retry: true
1056
+ ```
1057
+
1058
+ **Programmatic usage:**
1059
+
1060
+ ```python
1061
+ from dao_ai.middleware.assertions import (
1062
+ create_assert_middleware,
1063
+ create_suggest_middleware,
1064
+ create_refine_middleware,
1065
+ LengthConstraint,
1066
+ KeywordConstraint,
1067
+ )
1068
+
1069
+ # Hard constraint: response must be between 100-500 chars
1070
+ assert_middleware = create_assert_middleware(
1071
+ constraint=LengthConstraint(min_length=100, max_length=500),
1072
+ max_retries=3,
1073
+ on_failure="fallback",
1074
+ )
1075
+
1076
+ # Soft constraint: suggest professional tone
1077
+ suggest_middleware = create_suggest_middleware(
1078
+ constraint=lambda response, ctx: "professional" in response.lower(),
1079
+ allow_one_retry=True,
1080
+ )
1081
+
1082
+ # Iterative refinement: generate 3 attempts, pick best
1083
+ def quality_score(response: str, ctx: dict) -> float:
1084
+ # Score based on length, keywords, structure
1085
+ score = 0.0
1086
+ if 100 <= len(response) <= 500:
1087
+ score += 0.5
1088
+ if "please" in response.lower() or "thank you" in response.lower():
1089
+ score += 0.3
1090
+ if response.endswith(".") or response.endswith("!"):
1091
+ score += 0.2
1092
+ return score
1093
+
1094
+ refine_middleware = create_refine_middleware(
1095
+ reward_fn=quality_score,
1096
+ threshold=0.8,
1097
+ max_iterations=3,
1098
+ )
1099
+ ```
1100
+
1101
+ ---
1102
+
1103
+ #### When to Use Which?
1104
+
1105
+ | Use Case | Recommended Middleware |
1106
+ |----------|------------------------|
1107
+ | **Tone/style validation** | GuardrailMiddleware (LLM judge) |
1108
+ | **Safety checks** | SafetyGuardrailMiddleware |
1109
+ | **Keyword blocking** | ContentFilterMiddleware |
1110
+ | **Length constraints** | AssertMiddleware (deterministic) |
1111
+ | **Citation requirements** | AssertMiddleware or GuardrailMiddleware |
1112
+ | **Custom business logic** | AssertMiddleware (programmable) |
1113
+ | **Quality optimization** | RefineMiddleware (generates multiple attempts) |
1114
+ | **Soft suggestions** | SuggestMiddleware |
1115
+
1116
+ **Best practice:** Combine both approaches:
1117
+ - **ContentFilter** for fast, deterministic blocking
1118
+ - **AssertMiddleware** for programmatic constraints
1119
+ - **GuardrailMiddleware** for nuanced, LLM-based evaluation
1120
+
1121
+ ```yaml
1122
+ agents:
1123
+ production_agent:
1124
+ middleware:
1125
+ # Layer 1: Fast keyword blocking
1126
+ - type: content_filter
1127
+ blocked_keywords: [password, ssn]
1128
+
1129
+ # Layer 2: Deterministic length check
1130
+ - type: assert
1131
+ constraint: length_range
1132
+ min_length: 50
1133
+ max_length: 1000
1134
+
1135
+ # Layer 3: LLM-based quality evaluation
1136
+ - type: guardrail
1137
+ name: professional_tone
1138
+ model: *judge_llm
1139
+ prompt: *professional_tone_prompt
1140
+ ```
1141
+
1142
+ ### 10. Conversation Summarization
1143
+
1144
+ **The problem:** AI models have a maximum amount of text they can process (the "context window"). Long conversations eventually exceed this limit.
1145
+
1146
+ **The solution:** When conversations get too long, DAO automatically:
1147
+ 1. Summarizes the older parts of the conversation
1148
+ 2. Keeps recent messages as-is (for accuracy)
1149
+ 3. Continues the conversation with the condensed history
1150
+
1151
+ **Example:**
1152
+ After 20 messages about product recommendations, the agent summarizes: *"User is looking for power tools, prefers cordless, budget around $200."* This summary replaces the old messages, freeing up space for the conversation to continue.
1153
+
1154
+ Automatically summarize long conversation histories to stay within context limits:
1155
+
1156
+ ```yaml
1157
+ chat_history:
1158
+ max_tokens: 4096 # Max tokens for summarized history
1159
+ max_tokens_before_summary: 8000 # Trigger summarization at this threshold
1160
+ max_messages_before_summary: 20 # Or trigger at this message count
1161
+ ```
1162
+
1163
+ The `LoggingSummarizationMiddleware` provides detailed observability:
1164
+
1165
+ ```
1166
+ INFO | Summarization: BEFORE 25 messages (~12500 tokens) → AFTER 3 messages (~2100 tokens) | Reduced by ~10400 tokens
1167
+ ```
1168
+
1169
+ ### 11. Structured Output (Response Format)
1170
+
1171
+ **What is this?** A way to force your agent to return data in a specific JSON structure, making responses machine-readable and predictable.
1172
+
1173
+ **Why it matters:**
1174
+ - **Data extraction**: Extract structured information (product details, contact info) from text
1175
+ - **API integration**: Return data that other systems can consume directly
1176
+ - **Form filling**: Populate forms or databases automatically
1177
+ - **Consistent parsing**: No need to write brittle text parsing code
1178
+
1179
+ **How it works:** Define a schema (Pydantic model, dataclass, or JSON schema) and the agent will return data matching that structure.
1180
+
1181
+ ```yaml
1182
+ agents:
1183
+ contact_extractor:
1184
+ name: contact_extractor
1185
+ model: *default_llm
1186
+ prompt: |
1187
+ Extract contact information from the user's message.
1188
+ response_format:
1189
+ response_schema: |
1190
+ {
1191
+ "type": "object",
1192
+ "properties": {
1193
+ "name": {"type": "string"},
1194
+ "email": {"type": "string"},
1195
+ "phone": {"type": ["string", "null"]}
1196
+ },
1197
+ "required": ["name", "email"]
1198
+ }
1199
+ use_tool: true # Use function calling strategy (recommended for Databricks)
1200
+ ```
1201
+
1202
+ **Real-world example:**
1203
+ User: *"John Doe, john.doe@example.com, (555) 123-4567"*
1204
+ Agent returns:
1205
+ ```json
1206
+ {
1207
+ "name": "John Doe",
1208
+ "email": "john.doe@example.com",
1209
+ "phone": "(555) 123-4567"
1210
+ }
1211
+ ```
1212
+
1213
+ **Options:**
1214
+ - `response_schema`: Can be a JSON schema string, Pydantic model type, or fully qualified class name
1215
+ - `use_tool`: `true` (function calling), `false` (native), or `null` (auto-detect)
1216
+
1217
+ See `config/examples/structured_output.yaml` for a complete example.
1218
+
1219
+ ---
1220
+
1221
+ ### 12. Custom Input & Custom Output Support
1222
+
1223
+ **What is this?** A flexible system for passing custom configuration values to your agents and receiving enriched output with runtime state.
1224
+
1225
+ **Why it matters:**
1226
+ - **Pass context to prompts**: Any key in `configurable` becomes available as a template variable in your prompts
1227
+ - **Personalize responses**: Use `user_id`, `store_num`, or any custom field to tailor agent behavior
1228
+ - **Track conversations**: Maintain state across multiple interactions with `thread_id`/`conversation_id`
1229
+ - **Capture runtime state**: Output includes accumulated state like Genie conversation IDs, cache hits, and more
1230
+ - **Debug production issues**: Full context visibility for troubleshooting
1231
+
1232
+ **Key concepts:**
1233
+ - `configurable`: Custom key-value pairs passed to your agent (available in prompt templates)
1234
+ - `thread_id` / `conversation_id`: Identifies a specific conversation thread
1235
+ - `user_id`: Identifies who's asking questions
1236
+ - `session`: Runtime state that accumulates during the conversation (returned in output)
1237
+
1238
+ DAO uses a structured format for passing custom inputs and returning enriched outputs:
1239
+
1240
+ ```python
1241
+ # Input format
1242
+ custom_inputs = {
1243
+ "configurable": {
1244
+ "thread_id": "uuid-123", # LangGraph thread ID
1245
+ "conversation_id": "uuid-123", # Databricks-style (takes precedence)
1246
+ "user_id": "user@example.com",
1247
+ "store_num": "12345",
1248
+ },
1249
+ "session": {
1250
+ # Accumulated runtime state (optional in input)
1251
+ }
1252
+ }
1253
+
1254
+ # Output format includes session state
1255
+ custom_outputs = {
1256
+ "configurable": {
1257
+ "thread_id": "uuid-123",
1258
+ "conversation_id": "uuid-123",
1259
+ "user_id": "user@example.com",
1260
+ },
1261
+ "session": {
1262
+ "genie": {
1263
+ "spaces": {
1264
+ "space_abc": {
1265
+ "conversation_id": "genie-conv-456",
1266
+ "cache_hit": True,
1267
+ "follow_up_questions": ["What about pricing?"]
1268
+ }
1269
+ }
1270
+ }
1271
+ }
1272
+ }
1273
+ ```
1274
+
1275
+ **Using configurable values in prompts:**
1276
+
1277
+ Any key in the `configurable` dictionary becomes available as a template variable in your agent prompts:
1278
+
1279
+ ```yaml
1280
+ agents:
1281
+ personalized_agent:
1282
+ prompt: |
1283
+ You are a helpful assistant for {user_id}.
1284
+ Store location: {store_num}
1285
+
1286
+ Provide personalized recommendations based on the user's context.
1287
+ ```
1288
+
1289
+ When invoked with the `custom_inputs` above, the prompt automatically populates:
1290
+ - `{user_id}` → `"user@example.com"`
1291
+ - `{store_num}` → `"12345"`
1292
+
1293
+ **Key features:**
1294
+ - `conversation_id` and `thread_id` are interchangeable (conversation_id takes precedence)
1295
+ - If neither is provided, a UUID is auto-generated
1296
+ - `user_id` is normalized (dots replaced with underscores for memory namespaces)
1297
+ - All `configurable` keys are available as prompt template variables
1298
+ - `session` state is automatically maintained and returned in `custom_outputs`
1299
+ - Backward compatible with legacy flat custom_inputs format
1300
+
1301
+ ### 13. Hook System
1302
+
1303
+ **What are hooks?** Hooks let you run custom code at specific moments in your agent's lifecycle — like "before starting" or "when shutting down".
1304
+
1305
+ **Common use cases:**
1306
+ - Warm up caches on startup
1307
+ - Initialize database connections
1308
+ - Clean up resources on shutdown
1309
+ - Load configuration or credentials
1310
+
1311
+ **For per-message logic** (logging requests, checking permissions, etc.), use **middleware** instead. Middleware provides much more flexibility and control over the agent execution flow.
1312
+
1313
+ Inject custom logic at key points in the agent lifecycle:
1314
+
1315
+ ```yaml
1316
+ app:
1317
+ # Run on startup
1318
+ initialization_hooks:
1319
+ - my_package.hooks.setup_connections
1320
+ - my_package.hooks.warmup_caches
1321
+
1322
+ # Run on shutdown
1323
+ shutdown_hooks:
1324
+ - my_package.hooks.cleanup_resources
1325
+
1326
+ agents:
1327
+ my_agent:
1328
+ # For per-agent logic, use middleware
1329
+ middleware:
1330
+ - my_package.middleware.log_requests
1331
+ - my_package.middleware.check_permissions
1332
+ ```
1333
+
1334
+ ---
1335
+
1336
+ ## Quick Start
1337
+
1338
+ ### Choose Your Path
1339
+
1340
+ **Path 1: Visual Interface** → Use [DAO AI Builder](https://github.com/natefleming/dao-ai-builder) for a graphical, point-and-click experience
1341
+
1342
+ **Path 2: Code/Config** → Follow the instructions below to work with YAML files directly
1343
+
1344
+ Both paths produce the same result — choose what's comfortable for you!
1345
+
1346
+ ---
1347
+
1348
+ ### Prerequisites
1349
+
1350
+ Before you begin, you'll need:
1351
+
1352
+ - **Python 3.11 or newer** installed on your computer ([download here](https://www.python.org/downloads/))
1353
+ - **A Databricks workspace** (ask your IT team or see [Databricks docs](https://docs.databricks.com/))
1354
+ - Access to **Unity Catalog** (your organization's data catalog)
1355
+ - **Model Serving** enabled (for deploying AI agents)
1356
+ - *Optional*: Vector Search, Genie (for advanced features)
1357
+
1358
+ **Not sure if you have access?** Your Databricks administrator can grant you permissions.
1359
+
1360
+ ### Installation
1361
+
1362
+ **Option 1: For developers familiar with Git**
1363
+
1364
+ ```bash
1365
+ # Clone this repository
1366
+ git clone <repo-url>
1367
+ cd dao-ai
1368
+
1369
+ # Create an isolated Python environment
1370
+ uv venv
1371
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
1372
+
1373
+ # Install DAO and its dependencies
1374
+ make install
1375
+ ```
1376
+
1377
+ **Option 2: For those new to development**
1378
+
1379
+ 1. Download this project as a ZIP file (click the green "Code" button on GitHub → Download ZIP)
1380
+ 2. Extract the ZIP file to a folder on your computer
1381
+ 3. Open a terminal/command prompt and navigate to that folder
1382
+ 4. Run these commands:
1383
+
1384
+ ```bash
1385
+ # On Mac/Linux:
1386
+ python3 -m venv .venv
1387
+ source .venv/bin/activate
1388
+ pip install -e .
1389
+
1390
+ # On Windows:
1391
+ python -m venv .venv
1392
+ .venv\Scripts\activate
1393
+ pip install -e .
1394
+ ```
1395
+
1396
+ **Verification:** Run `dao-ai --version` to confirm the installation succeeded.
1397
+
1398
+ ### Your First Agent
1399
+
1400
+ Let's build a simple AI assistant in 4 steps. This agent will use a language model from Databricks to answer questions.
1401
+
1402
+ **Step 1: Create a configuration file**
1403
+
1404
+ Create a new file called `config/my_agent.yaml` and paste this content:
1405
+
1406
+ ```yaml
1407
+ schemas:
1408
+ my_schema: &my_schema
1409
+ catalog_name: my_catalog # Replace with your Unity Catalog name
1410
+ schema_name: my_schema # Replace with your schema name
1411
+
1412
+ resources:
1413
+ llms:
1414
+ default_llm: &default_llm
1415
+ name: databricks-meta-llama-3-3-70b-instruct # The AI model to use
1416
+
1417
+ agents:
1418
+ assistant: &assistant
1419
+ name: assistant
1420
+ model: *default_llm
1421
+ prompt: |
1422
+ You are a helpful assistant.
1423
+
1424
+ app:
1425
+ name: my_first_agent
1426
+ registered_model:
1427
+ schema: *my_schema
1428
+ name: my_first_agent
1429
+ agents:
1430
+ - *assistant
1431
+ orchestration:
1432
+ swarm:
1433
+ model: *default_llm
1434
+ ```
1435
+
1436
+ **💡 What's happening here?**
1437
+ - `schemas`: Points to your Unity Catalog location (where the agent will be registered)
1438
+ - `resources`: Defines the AI model (Llama 3.3 70B in this case)
1439
+ - `agents`: Describes your assistant agent and its behavior
1440
+ - `app`: Configures how the agent is deployed and orchestrated
1441
+
1442
+ **Step 2: Validate your configuration**
1443
+
1444
+ This checks for errors in your YAML file:
1445
+
1446
+ ```bash
1447
+ dao-ai validate -c config/my_agent.yaml
1448
+ ```
1449
+
1450
+ You should see: ✅ `Configuration is valid!`
1451
+
1452
+ **Step 3: Visualize the agent workflow** (optional)
1453
+
1454
+ Generate a diagram showing how your agent works:
1455
+
1456
+ ```bash
1457
+ dao-ai graph -c config/my_agent.yaml -o my_agent.png
1458
+ ```
1459
+
1460
+ This creates `my_agent.png` — open it to see a visual representation of your agent.
1461
+
1462
+ **Step 4: Deploy to Databricks**
1463
+
1464
+ **Option A: Using Python** (programmatic deployment)
1465
+
1466
+ ```python
1467
+ from dao_ai.config import AppConfig
1468
+
1469
+ # Load your configuration
1470
+ config = AppConfig.from_file("config/my_agent.yaml")
1471
+
1472
+ # Package the agent as an MLflow model
1473
+ config.create_agent()
1474
+
1475
+ # Deploy to Databricks Model Serving
1476
+ config.deploy_agent()
1477
+ ```
1478
+
1479
+ **Option B: Using the CLI** (one command)
1480
+
1481
+ ```bash
1482
+ dao-ai bundle --deploy --run -c config/my_agent.yaml
1483
+ ```
1484
+
1485
+ This single command:
1486
+ 1. Validates your configuration
1487
+ 2. Packages the agent
1488
+ 3. Deploys it to Databricks
1489
+ 4. Creates a serving endpoint
1490
+
1491
+ **Step 5: Interact with your agent**
1492
+
1493
+ Once deployed, you can chat with your agent using Python:
1494
+
1495
+ ```python
1496
+ from mlflow.deployments import get_deploy_client
1497
+
1498
+ # Connect to your Databricks workspace
1499
+ client = get_deploy_client("databricks")
1500
+
1501
+ # Send a message to your agent
1502
+ response = client.predict(
1503
+ endpoint="my_first_agent",
1504
+ inputs={
1505
+ "messages": [{"role": "user", "content": "Hello! What can you help me with?"}],
1506
+ "configurable": {
1507
+ "thread_id": "1", # Conversation ID
1508
+ "user_id": "demo_user" # User identifier
1509
+ }
1510
+ }
1511
+ )
1512
+
1513
+ # Print the agent's response
1514
+ print(response["message"]["content"])
1515
+ ```
1516
+
1517
+ **🎉 Congratulations!** You've built and deployed your first AI agent with DAO.
1518
+
1519
+ **Next steps:**
1520
+ - Explore the [`config/examples/`](config/examples/) folder for more advanced configurations
1521
+ - Try the [DAO AI Builder](https://github.com/natefleming/dao-ai-builder) visual interface
1522
+ - Add tools to your agent (database access, APIs, Vector Search)
1523
+ - Set up multi-agent orchestration (Supervisor or Swarm patterns)
1524
+
1525
+ ---
1526
+
1527
+ ## Configuration Reference
1528
+
1529
+ ### Full Configuration Structure
1530
+
1531
+ ```yaml
1532
+ # Schema definitions for Unity Catalog
1533
+ schemas:
1534
+ my_schema: &my_schema
1535
+ catalog_name: string
1536
+ schema_name: string
1537
+
1538
+ # Reusable variables (secrets, env vars)
1539
+ variables:
1540
+ api_key: &api_key
1541
+ options:
1542
+ - env: MY_API_KEY
1543
+ - scope: my_scope
1544
+ secret: api_key
1545
+
1546
+ # Infrastructure resources
1547
+ resources:
1548
+ llms:
1549
+ model_name: &model_name
1550
+ name: string # Databricks endpoint name
1551
+ temperature: float # 0.0 - 2.0
1552
+ max_tokens: int
1553
+ fallbacks: [string] # Fallback model names
1554
+ on_behalf_of_user: bool # Use caller's permissions
1555
+
1556
+ vector_stores:
1557
+ store_name: &store_name
1558
+ endpoint:
1559
+ name: string
1560
+ type: STANDARD | OPTIMIZED_STORAGE
1561
+ index:
1562
+ schema: *my_schema
1563
+ name: string
1564
+ source_table:
1565
+ schema: *my_schema
1566
+ name: string
1567
+ embedding_model: *embedding_model
1568
+ embedding_source_column: string
1569
+ columns: [string]
1570
+
1571
+ databases:
1572
+ postgres_db: &postgres_db
1573
+ instance_name: string
1574
+ client_id: *api_key # OAuth credentials
1575
+ client_secret: *secret
1576
+ workspace_host: string
1577
+
1578
+ warehouses:
1579
+ warehouse: &warehouse
1580
+ warehouse_id: string
1581
+ on_behalf_of_user: bool
1582
+
1583
+ genie_rooms:
1584
+ genie: &genie
1585
+ space_id: string
1586
+
1587
+ # Retriever configurations
1588
+ retrievers:
1589
+ retriever_name: &retriever_name
1590
+ vector_store: *store_name
1591
+ columns: [string]
1592
+ search_parameters:
1593
+ num_results: int
1594
+ query_type: ANN | HYBRID
1595
+
1596
+ # Tool definitions
1597
+ tools:
1598
+ tool_name: &tool_name
1599
+ name: string
1600
+ function:
1601
+ type: python | factory | unity_catalog | mcp
1602
+ name: string # Import path or UC function name
1603
+ args: {} # For factory tools
1604
+ schema: *my_schema # For UC tools
1605
+ human_in_the_loop: # Optional approval gate
1606
+ review_prompt: string
1607
+
1608
+ # Agent definitions
1609
+ agents:
1610
+ agent_name: &agent_name
1611
+ name: string
1612
+ description: string
1613
+ model: *model_name
1614
+ tools: [*tool_name]
1615
+ guardrails: [*guardrail_ref]
1616
+ prompt: string | *prompt_ref
1617
+ handoff_prompt: string # For swarm routing
1618
+ middleware: [*middleware_ref]
1619
+ response_format: *response_format_ref | string | null
1620
+
1621
+ # Prompt definitions (MLflow registry)
1622
+ prompts:
1623
+ prompt_name: &prompt_name
1624
+ schema: *my_schema
1625
+ name: string
1626
+ alias: string | null # e.g., "production"
1627
+ version: int | null
1628
+ default_template: string
1629
+ tags: {}
1630
+
1631
+ # Response format (structured output)
1632
+ response_formats:
1633
+ format_name: &format_name
1634
+ response_schema: string | type # JSON schema string or type reference
1635
+ use_tool: bool | null # null=auto, true=ToolStrategy, false=ProviderStrategy
1636
+
1637
+ # Memory configuration
1638
+ memory: &memory
1639
+ checkpointer:
1640
+ name: string
1641
+ type: memory | postgres | lakebase
1642
+ database: *postgres_db # For postgres
1643
+ schema: *my_schema # For lakebase
1644
+ table_name: string # For lakebase
1645
+ store:
1646
+ name: string
1647
+ type: memory | postgres | lakebase
1648
+ database: *postgres_db # For postgres
1649
+ schema: *my_schema # For lakebase
1650
+ table_name: string # For lakebase
1651
+ embedding_model: *embedding_model
1652
+
1653
+ # Application configuration
1654
+ app:
1655
+ name: string
1656
+ description: string
1657
+ log_level: DEBUG | INFO | WARNING | ERROR
1658
+
1659
+ registered_model:
1660
+ schema: *my_schema
1661
+ name: string
1662
+
1663
+ endpoint_name: string
1664
+
1665
+ agents: [*agent_name]
1666
+
1667
+ orchestration:
1668
+ supervisor: # OR swarm, not both
1669
+ model: *model_name
1670
+ prompt: string
1671
+ swarm:
1672
+ model: *model_name
1673
+ default_agent: *agent_name
1674
+ handoffs:
1675
+ agent_a: [agent_b, agent_c]
1676
+ memory: *memory
1677
+
1678
+ initialization_hooks: [string]
1679
+ shutdown_hooks: [string]
1680
+
1681
+ permissions:
1682
+ - principals: [users]
1683
+ entitlements: [CAN_QUERY]
1684
+
1685
+ environment_vars:
1686
+ KEY: "{{secrets/scope/secret}}"
1687
+ ```
1688
+
1689
+ ---
1690
+
1691
+ ## Example Configurations
1692
+
1693
+ The `config/examples/` directory contains ready-to-use configurations:
1694
+
1695
+ | Example | Description |
1696
+ |---------|-------------|
1697
+ | `minimal.yaml` | Simplest possible agent configuration |
1698
+ | `genie.yaml` | Natural language to SQL with Genie |
1699
+ | `genie_with_lru_cache.yaml` | Genie with LRU caching |
1700
+ | `genie_with_semantic_cache.yaml` | Genie with two-tier caching |
1701
+ | `conversation_summarization.yaml` | Long conversation summarization with PostgreSQL persistence |
1702
+ | `structured_output.yaml` | **NEW** Structured output / response format with JSON schema |
1703
+ | `human_in_the_loop.yaml` | Tool approval workflows |
1704
+ | `mcp.yaml` | External service integration via MCP |
1705
+ | `prompt_optimization.yaml` | Automated prompt tuning with GEPA |
1706
+ | `prompt_registry.yaml` | MLflow prompt registry integration |
1707
+ | `vector_search_with_reranking.yaml` | RAG with reranking |
1708
+ | `deep_research.yaml` | Multi-step research agent |
1709
+ | `slack.yaml` | Slack integration |
1710
+ | `reservations.yaml` | Restaurant reservation system |
1711
+
1712
+ ---
1713
+
1714
+ ## CLI Reference
1715
+
1716
+ ```bash
1717
+ # Validate configuration
1718
+ dao-ai validate -c config/my_config.yaml
1719
+
1720
+ # Generate JSON schema for IDE support
1721
+ dao-ai schema > schemas/model_config_schema.json
1722
+
1723
+ # Visualize agent workflow
1724
+ dao-ai graph -c config/my_config.yaml -o workflow.png
1725
+
1726
+ # Deploy with Databricks Asset Bundles
1727
+ dao-ai bundle --deploy --run -c config/my_config.yaml --profile DEFAULT
1728
+
1729
+ # Interactive chat with agent
1730
+ dao-ai chat -c config/my_config.yaml
1731
+
1732
+ # Verbose output (-v through -vvvv)
1733
+ dao-ai -vvvv validate -c config/my_config.yaml
1734
+ ```
1735
+
1736
+ ---
1737
+
1738
+ ## Python API
1739
+
1740
+ ```python
1741
+ from dao_ai.config import AppConfig
1742
+
1743
+ # Load configuration
1744
+ config = AppConfig.from_file("config/my_config.yaml")
1745
+
1746
+ # Access components
1747
+ agents = config.find_agents()
1748
+ tools = config.find_tools()
1749
+ vector_stores = config.resources.vector_stores
1750
+
1751
+ # Create infrastructure
1752
+ for name, vs in vector_stores.items():
1753
+ vs.create()
1754
+
1755
+ # Package and deploy
1756
+ config.create_agent(
1757
+ additional_pip_reqs=["custom-package==1.0.0"],
1758
+ additional_code_paths=["./my_modules"]
1759
+ )
1760
+ config.deploy_agent()
1761
+
1762
+ # Visualize
1763
+ config.display_graph()
1764
+ config.save_image("docs/architecture.png")
1765
+ ```
1766
+
1767
+ ---
1768
+
1769
+ ## Project Structure
1770
+
1771
+ ```
1772
+ dao-ai/
1773
+ ├── src/dao_ai/
1774
+ │ ├── config.py # Pydantic configuration models
1775
+ │ ├── graph.py # LangGraph workflow builder
1776
+ │ ├── nodes.py # Agent node factories
1777
+ │ ├── state.py # State management
1778
+ │ ├── optimization.py # GEPA-based prompt optimization
1779
+ │ ├── tools/ # Tool implementations
1780
+ │ │ ├── genie.py # Genie tool with caching
1781
+ │ │ ├── mcp.py # MCP integrations
1782
+ │ │ ├── vector_search.py
1783
+ │ │ └── ...
1784
+ │ ├── middleware/ # Agent middleware
1785
+ │ │ ├── assertions.py # Assert, Suggest, Refine middleware
1786
+ │ │ ├── summarization.py # Conversation summarization
1787
+ │ │ ├── guardrails.py # Content filtering and safety
1788
+ │ │ └── ...
1789
+ │ ├── orchestration/ # Multi-agent orchestration
1790
+ │ │ ├── supervisor.py # Supervisor pattern
1791
+ │ │ ├── swarm.py # Swarm pattern
1792
+ │ │ └── ...
1793
+ │ ├── genie/
1794
+ │ │ └── cache/ # LRU and Semantic cache
1795
+ │ ├── memory/ # Checkpointer and store
1796
+ │ └── hooks/ # Lifecycle hooks
1797
+ ├── config/
1798
+ │ ├── examples/ # Example configurations
1799
+ │ └── hardware_store/ # Reference implementation
1800
+ ├── tests/ # Test suite
1801
+ └── schemas/ # JSON schemas for validation
1802
+ ```
1803
+
1804
+ ---
1805
+
1806
+ ## Common Questions
1807
+
1808
+ ### How is this different from LangChain/LangGraph directly?
1809
+
1810
+ DAO is **built on top of** LangChain and LangGraph. Instead of writing Python code to configure agents, you use YAML files. Think of it as:
1811
+ - **LangChain/LangGraph**: The engine
1812
+ - **DAO**: The blueprint system that configures the engine
1813
+
1814
+ Benefits:
1815
+ - ✅ No Python coding required (just YAML)
1816
+ - ✅ Configurations are easier to review and version control
1817
+ - ✅ Databricks-specific integrations work out-of-the-box
1818
+ - ✅ Reusable patterns across your organization
1819
+
1820
+ ### Do I need to learn Python?
1821
+
1822
+ **For basic usage:** No. You only need to write YAML configuration files.
1823
+
1824
+ **For advanced usage:** Some Python knowledge helps if you want to:
1825
+ - Create custom tools
1826
+ - Write middleware hooks
1827
+ - Build complex business logic
1828
+
1829
+ Most users stick to YAML and use pre-built tools.
1830
+
1831
+ ### Can I test locally before deploying?
1832
+
1833
+ Yes! DAO includes a local testing mode:
1834
+
1835
+ ```python
1836
+ from dao_ai.config import AppConfig
1837
+
1838
+ config = AppConfig.from_file("config/my_agent.yaml")
1839
+ agent = config.as_runnable()
1840
+
1841
+ # Test locally
1842
+ response = agent.invoke({
1843
+ "messages": [{"role": "user", "content": "Test question"}]
1844
+ })
1845
+ print(response)
1846
+ ```
1847
+
1848
+ ### What's the learning curve?
1849
+
1850
+ **If you're new to AI agents:** 1-2 weeks to understand concepts and build your first agent
1851
+
1852
+ **If you know LangChain:** 1-2 days to translate your knowledge to YAML configs
1853
+
1854
+ **If you're a business user:** Consider starting with [DAO AI Builder](https://github.com/natefleming/dao-ai-builder) (visual interface)
1855
+
1856
+ ### How do I get help?
1857
+
1858
+ 1. Check the [`config/examples/`](config/examples/) directory for working examples
1859
+ 2. Review this README for detailed explanations
1860
+ 3. Read the [Configuration Reference](#configuration-reference) section
1861
+ 4. Open an issue on GitHub
1862
+
1863
+ ---
1864
+
1865
+ ## Contributing
1866
+
1867
+ 1. Fork the repository
1868
+ 2. Create a feature branch
1869
+ 3. Make your changes
1870
+ 4. Run tests: `make test`
1871
+ 5. Format code: `make format`
1872
+ 6. Submit a pull request
1873
+
1874
+ ---
1875
+
1876
+ ## License
1877
+
1878
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.