dao-ai 0.0.25__py3-none-any.whl → 0.0.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dao_ai/agent_as_code.py +3 -0
- dao_ai/config.py +431 -27
- dao_ai/graph.py +29 -4
- dao_ai/nodes.py +29 -20
- dao_ai/providers/databricks.py +536 -35
- dao_ai/tools/genie.py +2 -3
- dao_ai/tools/mcp.py +46 -27
- dao_ai/tools/vector_search.py +232 -22
- dao_ai/utils.py +57 -1
- {dao_ai-0.0.25.dist-info → dao_ai-0.0.28.dist-info}/METADATA +6 -3
- {dao_ai-0.0.25.dist-info → dao_ai-0.0.28.dist-info}/RECORD +14 -14
- {dao_ai-0.0.25.dist-info → dao_ai-0.0.28.dist-info}/WHEEL +1 -1
- {dao_ai-0.0.25.dist-info → dao_ai-0.0.28.dist-info}/entry_points.txt +0 -0
- {dao_ai-0.0.25.dist-info → dao_ai-0.0.28.dist-info}/licenses/LICENSE +0 -0
dao_ai/tools/genie.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import bisect
|
|
2
2
|
import json
|
|
3
|
-
import logging
|
|
4
3
|
import os
|
|
5
4
|
import time
|
|
6
5
|
from dataclasses import asdict, dataclass
|
|
@@ -184,7 +183,7 @@ class Genie:
|
|
|
184
183
|
conversation_id, result, query_str, description
|
|
185
184
|
)
|
|
186
185
|
elif state in ["RUNNING", "PENDING"]:
|
|
187
|
-
|
|
186
|
+
logger.debug("Waiting for query result...")
|
|
188
187
|
time.sleep(self.poll_interval)
|
|
189
188
|
else:
|
|
190
189
|
return GenieResponse(
|
|
@@ -250,7 +249,7 @@ class Genie:
|
|
|
250
249
|
)
|
|
251
250
|
# includes EXECUTING_QUERY, Genie can retry after this status
|
|
252
251
|
else:
|
|
253
|
-
|
|
252
|
+
logger.debug(f"Waiting...: {resp['status']}")
|
|
254
253
|
time.sleep(self.poll_interval)
|
|
255
254
|
return GenieResponse(
|
|
256
255
|
conversation_id,
|
dao_ai/tools/mcp.py
CHANGED
|
@@ -5,7 +5,6 @@ from databricks_mcp import DatabricksOAuthClientProvider
|
|
|
5
5
|
from langchain_core.runnables.base import RunnableLike
|
|
6
6
|
from langchain_core.tools import tool as create_tool
|
|
7
7
|
from langchain_mcp_adapters.client import MultiServerMCPClient
|
|
8
|
-
from langchain_mcp_adapters.tools import load_mcp_tools
|
|
9
8
|
from loguru import logger
|
|
10
9
|
from mcp import ClientSession
|
|
11
10
|
from mcp.client.streamable_http import streamablehttp_client
|
|
@@ -31,26 +30,17 @@ def create_mcp_tools(
|
|
|
31
30
|
"""
|
|
32
31
|
logger.debug(f"create_mcp_tools: {function}")
|
|
33
32
|
|
|
33
|
+
# Get MCP URL - handles all convenience objects (connection, genie_room, warehouse, etc.)
|
|
34
|
+
mcp_url = function.mcp_url
|
|
35
|
+
logger.debug(f"Using MCP URL: {mcp_url}")
|
|
36
|
+
|
|
34
37
|
# Check if using UC Connection or direct MCP connection
|
|
35
38
|
if function.connection:
|
|
36
39
|
# Use UC Connection approach with DatabricksOAuthClientProvider
|
|
37
40
|
logger.debug(f"Using UC Connection for MCP: {function.connection.name}")
|
|
38
41
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
mcp_url = function.url
|
|
42
|
-
logger.debug(f"Using provided MCP URL: {mcp_url}")
|
|
43
|
-
else:
|
|
44
|
-
# Construct URL from workspace host and connection name
|
|
45
|
-
# Pattern: https://{workspace_host}/api/2.0/mcp/external/{connection_name}
|
|
46
|
-
workspace_client = function.connection.workspace_client
|
|
47
|
-
workspace_host = workspace_client.config.host
|
|
48
|
-
connection_name = function.connection.name
|
|
49
|
-
mcp_url = f"{workspace_host}/api/2.0/mcp/external/{connection_name}"
|
|
50
|
-
logger.debug(f"Constructed MCP URL from connection: {mcp_url}")
|
|
51
|
-
|
|
52
|
-
async def _get_tools_with_connection():
|
|
53
|
-
"""Get tools using DatabricksOAuthClientProvider."""
|
|
42
|
+
async def _list_tools_with_connection():
|
|
43
|
+
"""List available tools using DatabricksOAuthClientProvider."""
|
|
54
44
|
workspace_client = function.connection.workspace_client
|
|
55
45
|
|
|
56
46
|
async with streamablehttp_client(
|
|
@@ -59,20 +49,16 @@ def create_mcp_tools(
|
|
|
59
49
|
async with ClientSession(read_stream, write_stream) as session:
|
|
60
50
|
# Initialize and list tools
|
|
61
51
|
await session.initialize()
|
|
62
|
-
|
|
63
|
-
return tools
|
|
52
|
+
return await session.list_tools()
|
|
64
53
|
|
|
65
54
|
try:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
f"Retrieved {len(langchain_tools)} MCP tools via UC Connection"
|
|
55
|
+
mcp_tools: list[Tool] | ListToolsResult = asyncio.run(
|
|
56
|
+
_list_tools_with_connection()
|
|
69
57
|
)
|
|
58
|
+
if isinstance(mcp_tools, ListToolsResult):
|
|
59
|
+
mcp_tools = mcp_tools.tools
|
|
70
60
|
|
|
71
|
-
|
|
72
|
-
wrapped_tools = [
|
|
73
|
-
as_human_in_the_loop(tool, function) for tool in langchain_tools
|
|
74
|
-
]
|
|
75
|
-
return wrapped_tools
|
|
61
|
+
logger.debug(f"Retrieved {len(mcp_tools)} MCP tools via UC Connection")
|
|
76
62
|
|
|
77
63
|
except Exception as e:
|
|
78
64
|
logger.error(f"Failed to get tools from MCP server via UC Connection: {e}")
|
|
@@ -80,6 +66,39 @@ def create_mcp_tools(
|
|
|
80
66
|
f"Failed to list MCP tools for function '{function.name}' via UC Connection '{function.connection.name}': {e}"
|
|
81
67
|
)
|
|
82
68
|
|
|
69
|
+
# Create wrapper tools with fresh session per invocation
|
|
70
|
+
def _create_tool_wrapper_with_connection(mcp_tool: Tool) -> RunnableLike:
|
|
71
|
+
@create_tool(
|
|
72
|
+
mcp_tool.name,
|
|
73
|
+
description=mcp_tool.description or f"MCP tool: {mcp_tool.name}",
|
|
74
|
+
args_schema=mcp_tool.inputSchema,
|
|
75
|
+
)
|
|
76
|
+
async def tool_wrapper(**kwargs):
|
|
77
|
+
"""Execute MCP tool with fresh UC Connection session."""
|
|
78
|
+
logger.debug(
|
|
79
|
+
f"Invoking MCP tool {mcp_tool.name} with fresh UC Connection session"
|
|
80
|
+
)
|
|
81
|
+
workspace_client = function.connection.workspace_client
|
|
82
|
+
|
|
83
|
+
try:
|
|
84
|
+
async with streamablehttp_client(
|
|
85
|
+
mcp_url, auth=DatabricksOAuthClientProvider(workspace_client)
|
|
86
|
+
) as (read_stream, write_stream, _):
|
|
87
|
+
async with ClientSession(read_stream, write_stream) as session:
|
|
88
|
+
await session.initialize()
|
|
89
|
+
result = await session.call_tool(mcp_tool.name, kwargs)
|
|
90
|
+
logger.debug(
|
|
91
|
+
f"MCP tool {mcp_tool.name} completed successfully"
|
|
92
|
+
)
|
|
93
|
+
return result
|
|
94
|
+
except Exception as e:
|
|
95
|
+
logger.error(f"MCP tool {mcp_tool.name} failed: {e}")
|
|
96
|
+
raise
|
|
97
|
+
|
|
98
|
+
return as_human_in_the_loop(tool_wrapper, function)
|
|
99
|
+
|
|
100
|
+
return [_create_tool_wrapper_with_connection(tool) for tool in mcp_tools]
|
|
101
|
+
|
|
83
102
|
else:
|
|
84
103
|
# Use direct MCP connection with MultiServerMCPClient
|
|
85
104
|
logger.debug("Using direct MCP connection with MultiServerMCPClient")
|
|
@@ -119,7 +138,7 @@ def create_mcp_tools(
|
|
|
119
138
|
logger.debug("Using existing authentication token")
|
|
120
139
|
|
|
121
140
|
return {
|
|
122
|
-
"url":
|
|
141
|
+
"url": mcp_url, # Use the resolved MCP URL
|
|
123
142
|
"transport": function.transport,
|
|
124
143
|
"headers": headers,
|
|
125
144
|
}
|
dao_ai/tools/vector_search.py
CHANGED
|
@@ -1,10 +1,21 @@
|
|
|
1
|
-
from typing import Any, Optional, Sequence
|
|
1
|
+
from typing import Annotated, Any, Callable, List, Optional, Sequence
|
|
2
2
|
|
|
3
3
|
import mlflow
|
|
4
|
-
from
|
|
5
|
-
from
|
|
4
|
+
from databricks.vector_search.reranker import DatabricksReranker
|
|
5
|
+
from databricks_ai_bridge.vector_search_retriever_tool import (
|
|
6
|
+
FilterItem,
|
|
7
|
+
VectorSearchRetrieverToolInput,
|
|
8
|
+
)
|
|
9
|
+
from databricks_langchain.vectorstores import DatabricksVectorSearch
|
|
10
|
+
from flashrank import Ranker, RerankRequest
|
|
11
|
+
from langchain_core.documents import Document
|
|
12
|
+
from langchain_core.tools import InjectedToolCallId, tool
|
|
13
|
+
from langgraph.prebuilt import InjectedState
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from mlflow.entities import SpanType
|
|
6
16
|
|
|
7
17
|
from dao_ai.config import (
|
|
18
|
+
RerankParametersModel,
|
|
8
19
|
RetrieverModel,
|
|
9
20
|
VectorStoreModel,
|
|
10
21
|
)
|
|
@@ -14,13 +25,13 @@ def create_vector_search_tool(
|
|
|
14
25
|
retriever: RetrieverModel | dict[str, Any],
|
|
15
26
|
name: Optional[str] = None,
|
|
16
27
|
description: Optional[str] = None,
|
|
17
|
-
) ->
|
|
28
|
+
) -> Callable:
|
|
18
29
|
"""
|
|
19
30
|
Create a Vector Search tool for retrieving documents from a Databricks Vector Search index.
|
|
20
31
|
|
|
21
32
|
This function creates a tool that enables semantic search over product information,
|
|
22
|
-
documentation, or other content
|
|
23
|
-
|
|
33
|
+
documentation, or other content using the @tool decorator pattern. It supports optional
|
|
34
|
+
reranking of results using FlashRank for improved relevance.
|
|
24
35
|
|
|
25
36
|
Args:
|
|
26
37
|
retriever: Configuration details for the vector search retriever, including:
|
|
@@ -32,32 +43,69 @@ def create_vector_search_tool(
|
|
|
32
43
|
- vector_store: Dictionary with 'endpoint_name' and 'index' for vector search
|
|
33
44
|
- columns: List of columns to retrieve from the vector store
|
|
34
45
|
- search_parameters: Additional parameters for customizing the search behavior
|
|
46
|
+
- rerank: Optional rerank configuration for result reranking
|
|
47
|
+
name: Optional custom name for the tool
|
|
48
|
+
description: Optional custom description for the tool
|
|
35
49
|
|
|
36
50
|
Returns:
|
|
37
|
-
A
|
|
51
|
+
A LangChain tool that performs vector search with optional reranking
|
|
38
52
|
"""
|
|
39
53
|
|
|
40
54
|
if isinstance(retriever, dict):
|
|
41
55
|
retriever = RetrieverModel(**retriever)
|
|
42
56
|
|
|
43
|
-
|
|
57
|
+
vector_store_config: VectorStoreModel = retriever.vector_store
|
|
58
|
+
|
|
59
|
+
# Index is required for vector search
|
|
60
|
+
if vector_store_config.index is None:
|
|
61
|
+
raise ValueError("vector_store.index is required for vector search")
|
|
44
62
|
|
|
45
|
-
index_name: str =
|
|
46
|
-
columns: Sequence[str] = retriever.columns
|
|
63
|
+
index_name: str = vector_store_config.index.full_name
|
|
64
|
+
columns: Sequence[str] = retriever.columns or []
|
|
47
65
|
search_parameters: dict[str, Any] = retriever.search_parameters.model_dump()
|
|
48
|
-
primary_key: str =
|
|
49
|
-
doc_uri: str =
|
|
50
|
-
text_column: str =
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
66
|
+
primary_key: str = vector_store_config.primary_key or ""
|
|
67
|
+
doc_uri: str = vector_store_config.doc_uri or ""
|
|
68
|
+
text_column: str = vector_store_config.embedding_source_column
|
|
69
|
+
|
|
70
|
+
# Extract reranker configuration
|
|
71
|
+
reranker_config: Optional[RerankParametersModel] = retriever.rerank
|
|
72
|
+
|
|
73
|
+
# Initialize FlashRank ranker once if reranking is enabled
|
|
74
|
+
# This is expensive (loads model weights), so we do it once and reuse across invocations
|
|
75
|
+
ranker: Optional[Ranker] = None
|
|
76
|
+
if reranker_config:
|
|
77
|
+
logger.debug(
|
|
78
|
+
f"Creating vector search tool with reranking: '{name}' "
|
|
79
|
+
f"(model: {reranker_config.model}, top_n: {reranker_config.top_n or 'auto'})"
|
|
80
|
+
)
|
|
81
|
+
try:
|
|
82
|
+
ranker = Ranker(
|
|
83
|
+
model_name=reranker_config.model, cache_dir=reranker_config.cache_dir
|
|
84
|
+
)
|
|
85
|
+
logger.info(
|
|
86
|
+
f"FlashRank ranker initialized successfully (model: {reranker_config.model})"
|
|
87
|
+
)
|
|
88
|
+
except Exception as e:
|
|
89
|
+
logger.warning(
|
|
90
|
+
f"Failed to initialize FlashRank ranker during tool creation: {e}. "
|
|
91
|
+
"Reranking will be disabled for this tool."
|
|
92
|
+
)
|
|
93
|
+
# Set reranker_config to None so we don't attempt reranking
|
|
94
|
+
reranker_config = None
|
|
95
|
+
else:
|
|
96
|
+
logger.debug(
|
|
97
|
+
f"Creating vector search tool without reranking: '{name}' (standard similarity search only)"
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Initialize the vector store
|
|
101
|
+
# Note: text_column is only required for self-managed embeddings
|
|
102
|
+
# For Databricks-managed embeddings, it's automatically determined from the index
|
|
103
|
+
vector_store: DatabricksVectorSearch = DatabricksVectorSearch(
|
|
57
104
|
index_name=index_name,
|
|
105
|
+
text_column=None, # Let DatabricksVectorSearch determine this from the index
|
|
58
106
|
columns=columns,
|
|
59
|
-
|
|
60
|
-
workspace_client=
|
|
107
|
+
include_score=True,
|
|
108
|
+
workspace_client=vector_store_config.workspace_client,
|
|
61
109
|
)
|
|
62
110
|
|
|
63
111
|
# Register the retriever schema with MLflow for model serving integration
|
|
@@ -66,7 +114,169 @@ def create_vector_search_tool(
|
|
|
66
114
|
primary_key=primary_key,
|
|
67
115
|
text_column=text_column,
|
|
68
116
|
doc_uri=doc_uri,
|
|
69
|
-
other_columns=columns,
|
|
117
|
+
other_columns=list(columns),
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Helper function to perform vector similarity search
|
|
121
|
+
@mlflow.trace(name="find_documents", span_type=SpanType.RETRIEVER)
|
|
122
|
+
def _find_documents(
|
|
123
|
+
query: str, filters: Optional[List[FilterItem]] = None
|
|
124
|
+
) -> List[Document]:
|
|
125
|
+
"""Perform vector similarity search."""
|
|
126
|
+
# Convert filters to dict format
|
|
127
|
+
filters_dict: dict[str, Any] = {}
|
|
128
|
+
if filters:
|
|
129
|
+
for item in filters:
|
|
130
|
+
item_dict = dict(item)
|
|
131
|
+
filters_dict[item_dict["key"]] = item_dict["value"]
|
|
132
|
+
|
|
133
|
+
# Merge with any configured filters
|
|
134
|
+
combined_filters: dict[str, Any] = {
|
|
135
|
+
**filters_dict,
|
|
136
|
+
**search_parameters.get("filters", {}),
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
# Perform similarity search
|
|
140
|
+
num_results: int = search_parameters.get("num_results", 10)
|
|
141
|
+
query_type: str = search_parameters.get("query_type", "ANN")
|
|
142
|
+
|
|
143
|
+
logger.debug(
|
|
144
|
+
f"Performing vector search: query='{query[:50]}...', k={num_results}, filters={combined_filters}"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Build similarity search kwargs
|
|
148
|
+
search_kwargs = {
|
|
149
|
+
"query": query,
|
|
150
|
+
"k": num_results,
|
|
151
|
+
"filter": combined_filters if combined_filters else None,
|
|
152
|
+
"query_type": query_type,
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
# Add DatabricksReranker if configured with columns
|
|
156
|
+
if reranker_config and reranker_config.columns:
|
|
157
|
+
search_kwargs["reranker"] = DatabricksReranker(
|
|
158
|
+
columns_to_rerank=reranker_config.columns
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
documents: List[Document] = vector_store.similarity_search(**search_kwargs)
|
|
162
|
+
|
|
163
|
+
logger.debug(f"Retrieved {len(documents)} documents from vector search")
|
|
164
|
+
return documents
|
|
165
|
+
|
|
166
|
+
# Helper function to rerank documents
|
|
167
|
+
@mlflow.trace(name="rerank_documents", span_type=SpanType.RETRIEVER)
|
|
168
|
+
def _rerank_documents(query: str, documents: List[Document]) -> List[Document]:
|
|
169
|
+
"""Rerank documents using FlashRank.
|
|
170
|
+
|
|
171
|
+
Uses the ranker instance initialized at tool creation time (captured in closure).
|
|
172
|
+
This avoids expensive model loading on every invocation.
|
|
173
|
+
"""
|
|
174
|
+
if not reranker_config or ranker is None:
|
|
175
|
+
return documents
|
|
176
|
+
|
|
177
|
+
logger.debug(
|
|
178
|
+
f"Starting reranking for {len(documents)} documents using model '{reranker_config.model}'"
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
# Prepare passages for reranking
|
|
182
|
+
passages: List[dict[str, Any]] = [
|
|
183
|
+
{"text": doc.page_content, "meta": doc.metadata} for doc in documents
|
|
184
|
+
]
|
|
185
|
+
|
|
186
|
+
# Create reranking request
|
|
187
|
+
rerank_request: RerankRequest = RerankRequest(query=query, passages=passages)
|
|
188
|
+
|
|
189
|
+
# Perform reranking
|
|
190
|
+
logger.debug(f"Reranking {len(passages)} passages for query: '{query[:50]}...'")
|
|
191
|
+
results: List[dict[str, Any]] = ranker.rerank(rerank_request)
|
|
192
|
+
|
|
193
|
+
# Apply top_n filtering
|
|
194
|
+
top_n: int = reranker_config.top_n or len(documents)
|
|
195
|
+
results = results[:top_n]
|
|
196
|
+
logger.debug(
|
|
197
|
+
f"Reranking complete. Filtered to top {top_n} results from {len(documents)} candidates"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Convert back to Document objects with reranking scores
|
|
201
|
+
reranked_docs: List[Document] = []
|
|
202
|
+
for result in results:
|
|
203
|
+
# Find original document by matching text
|
|
204
|
+
orig_doc: Optional[Document] = next(
|
|
205
|
+
(doc for doc in documents if doc.page_content == result["text"]), None
|
|
206
|
+
)
|
|
207
|
+
if orig_doc:
|
|
208
|
+
# Add reranking score to metadata
|
|
209
|
+
reranked_doc: Document = Document(
|
|
210
|
+
page_content=orig_doc.page_content,
|
|
211
|
+
metadata={
|
|
212
|
+
**orig_doc.metadata,
|
|
213
|
+
"reranker_score": result["score"],
|
|
214
|
+
},
|
|
215
|
+
)
|
|
216
|
+
reranked_docs.append(reranked_doc)
|
|
217
|
+
|
|
218
|
+
logger.debug(
|
|
219
|
+
f"Reranked {len(documents)} documents → {len(reranked_docs)} results "
|
|
220
|
+
f"(model: {reranker_config.model}, top score: {reranked_docs[0].metadata.get('reranker_score', 0):.4f})"
|
|
221
|
+
if reranked_docs
|
|
222
|
+
else f"Reranking completed with {len(reranked_docs)} results"
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
return reranked_docs
|
|
226
|
+
|
|
227
|
+
# Create the main vector search tool using @tool decorator
|
|
228
|
+
# Note: args_schema provides descriptions for query and filters,
|
|
229
|
+
# so Annotated is only needed for injected LangGraph parameters
|
|
230
|
+
@tool(
|
|
231
|
+
name_or_callable=name or index_name,
|
|
232
|
+
description=description or "Search for documents using vector similarity",
|
|
233
|
+
args_schema=VectorSearchRetrieverToolInput,
|
|
70
234
|
)
|
|
235
|
+
def vector_search_tool(
|
|
236
|
+
query: str,
|
|
237
|
+
filters: Optional[List[FilterItem]] = None,
|
|
238
|
+
state: Annotated[dict, InjectedState] = None,
|
|
239
|
+
tool_call_id: Annotated[str, InjectedToolCallId] = None,
|
|
240
|
+
) -> list[dict[str, Any]]:
|
|
241
|
+
"""
|
|
242
|
+
Search for documents using vector similarity with optional reranking.
|
|
243
|
+
|
|
244
|
+
This tool performs a two-stage retrieval process:
|
|
245
|
+
1. Vector similarity search to find candidate documents
|
|
246
|
+
2. Optional reranking using cross-encoder model for improved relevance
|
|
247
|
+
|
|
248
|
+
Both stages are traced in MLflow for observability.
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Command with ToolMessage containing the retrieved documents
|
|
252
|
+
"""
|
|
253
|
+
logger.debug(
|
|
254
|
+
f"Vector search tool called: query='{query[:50]}...', reranking={reranker_config is not None}"
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
# Step 1: Perform vector similarity search
|
|
258
|
+
documents: List[Document] = _find_documents(query, filters)
|
|
259
|
+
|
|
260
|
+
# Step 2: If reranking is enabled, rerank the documents
|
|
261
|
+
if reranker_config:
|
|
262
|
+
logger.debug(
|
|
263
|
+
f"Reranking enabled (model: '{reranker_config.model}', top_n: {reranker_config.top_n or 'all'})"
|
|
264
|
+
)
|
|
265
|
+
documents = _rerank_documents(query, documents)
|
|
266
|
+
logger.debug(f"Returning {len(documents)} reranked documents")
|
|
267
|
+
else:
|
|
268
|
+
logger.debug("Reranking disabled, returning original vector search results")
|
|
269
|
+
|
|
270
|
+
# Return Command with ToolMessage containing the documents
|
|
271
|
+
# Serialize documents to dicts for proper ToolMessage handling
|
|
272
|
+
serialized_docs: list[dict[str, Any]] = [
|
|
273
|
+
{
|
|
274
|
+
"page_content": doc.page_content,
|
|
275
|
+
"metadata": doc.metadata,
|
|
276
|
+
}
|
|
277
|
+
for doc in documents
|
|
278
|
+
]
|
|
279
|
+
|
|
280
|
+
return serialized_docs
|
|
71
281
|
|
|
72
282
|
return vector_search_tool
|
dao_ai/utils.py
CHANGED
|
@@ -3,7 +3,8 @@ import importlib.metadata
|
|
|
3
3
|
import os
|
|
4
4
|
import re
|
|
5
5
|
import site
|
|
6
|
-
from importlib.metadata import version
|
|
6
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
7
|
+
from pathlib import Path
|
|
7
8
|
from typing import Any, Callable, Sequence
|
|
8
9
|
|
|
9
10
|
from loguru import logger
|
|
@@ -37,6 +38,59 @@ def normalize_name(name: str) -> str:
|
|
|
37
38
|
return normalized.strip("_")
|
|
38
39
|
|
|
39
40
|
|
|
41
|
+
def dao_ai_version() -> str:
|
|
42
|
+
"""
|
|
43
|
+
Get the dao-ai package version, with fallback for source installations.
|
|
44
|
+
|
|
45
|
+
Tries to get the version from installed package metadata first. If the package
|
|
46
|
+
is not installed (e.g., running from source), falls back to reading from
|
|
47
|
+
pyproject.toml. Returns "dev" if neither method works.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
str: The version string, or "dev" if version cannot be determined
|
|
51
|
+
"""
|
|
52
|
+
try:
|
|
53
|
+
# Try to get version from installed package metadata
|
|
54
|
+
return version("dao-ai")
|
|
55
|
+
except PackageNotFoundError:
|
|
56
|
+
# Package not installed, try reading from pyproject.toml
|
|
57
|
+
logger.debug(
|
|
58
|
+
"dao-ai package not installed, attempting to read version from pyproject.toml"
|
|
59
|
+
)
|
|
60
|
+
try:
|
|
61
|
+
import tomllib # Python 3.11+
|
|
62
|
+
except ImportError:
|
|
63
|
+
try:
|
|
64
|
+
import tomli as tomllib # Fallback for Python < 3.11
|
|
65
|
+
except ImportError:
|
|
66
|
+
logger.warning(
|
|
67
|
+
"Cannot determine dao-ai version: package not installed and tomllib/tomli not available"
|
|
68
|
+
)
|
|
69
|
+
return "dev"
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
# Find pyproject.toml relative to this file
|
|
73
|
+
project_root = Path(__file__).parents[2]
|
|
74
|
+
pyproject_path = project_root / "pyproject.toml"
|
|
75
|
+
|
|
76
|
+
if not pyproject_path.exists():
|
|
77
|
+
logger.warning(
|
|
78
|
+
f"Cannot determine dao-ai version: pyproject.toml not found at {pyproject_path}"
|
|
79
|
+
)
|
|
80
|
+
return "dev"
|
|
81
|
+
|
|
82
|
+
with open(pyproject_path, "rb") as f:
|
|
83
|
+
pyproject_data = tomllib.load(f)
|
|
84
|
+
pkg_version = pyproject_data.get("project", {}).get("version", "dev")
|
|
85
|
+
logger.debug(
|
|
86
|
+
f"Read version {pkg_version} from pyproject.toml at {pyproject_path}"
|
|
87
|
+
)
|
|
88
|
+
return pkg_version
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.warning(f"Cannot determine dao-ai version from pyproject.toml: {e}")
|
|
91
|
+
return "dev"
|
|
92
|
+
|
|
93
|
+
|
|
40
94
|
def get_installed_packages() -> dict[str, str]:
|
|
41
95
|
"""Get all installed packages with versions"""
|
|
42
96
|
|
|
@@ -46,6 +100,7 @@ def get_installed_packages() -> dict[str, str]:
|
|
|
46
100
|
f"databricks-mcp=={version('databricks-mcp')}",
|
|
47
101
|
f"databricks-sdk[openai]=={version('databricks-sdk')}",
|
|
48
102
|
f"duckduckgo-search=={version('duckduckgo-search')}",
|
|
103
|
+
f"flashrank=={version('flashrank')}",
|
|
49
104
|
f"langchain=={version('langchain')}",
|
|
50
105
|
f"langchain-mcp-adapters=={version('langchain-mcp-adapters')}",
|
|
51
106
|
f"langchain-openai=={version('langchain-openai')}",
|
|
@@ -65,6 +120,7 @@ def get_installed_packages() -> dict[str, str]:
|
|
|
65
120
|
f"psycopg[binary,pool]=={version('psycopg')}",
|
|
66
121
|
f"pydantic=={version('pydantic')}",
|
|
67
122
|
f"pyyaml=={version('pyyaml')}",
|
|
123
|
+
f"tomli=={version('tomli')}",
|
|
68
124
|
f"unitycatalog-ai[databricks]=={version('unitycatalog-ai')}",
|
|
69
125
|
f"unitycatalog-langchain[databricks]=={version('unitycatalog-langchain')}",
|
|
70
126
|
]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dao-ai
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.28
|
|
4
4
|
Summary: DAO AI: A modular, multi-agent orchestration framework for complex AI workflows. Supports agent handoff, tool integration, and dynamic configuration via YAML.
|
|
5
5
|
Project-URL: Homepage, https://github.com/natefleming/dao-ai
|
|
6
6
|
Project-URL: Documentation, https://natefleming.github.io/dao-ai
|
|
@@ -29,6 +29,8 @@ Requires-Dist: databricks-langchain>=0.8.1
|
|
|
29
29
|
Requires-Dist: databricks-mcp>=0.3.0
|
|
30
30
|
Requires-Dist: databricks-sdk[openai]>=0.67.0
|
|
31
31
|
Requires-Dist: duckduckgo-search>=8.0.2
|
|
32
|
+
Requires-Dist: flashrank>=0.2.8
|
|
33
|
+
Requires-Dist: gepa>=0.0.17
|
|
32
34
|
Requires-Dist: grandalf>=0.8
|
|
33
35
|
Requires-Dist: langchain-mcp-adapters>=0.1.10
|
|
34
36
|
Requires-Dist: langchain-tavily>=0.2.11
|
|
@@ -40,7 +42,7 @@ Requires-Dist: langgraph>=0.6.10
|
|
|
40
42
|
Requires-Dist: langmem>=0.0.29
|
|
41
43
|
Requires-Dist: loguru>=0.7.3
|
|
42
44
|
Requires-Dist: mcp>=1.17.0
|
|
43
|
-
Requires-Dist: mlflow>=3.
|
|
45
|
+
Requires-Dist: mlflow>=3.5.1
|
|
44
46
|
Requires-Dist: nest-asyncio>=1.6.0
|
|
45
47
|
Requires-Dist: openevals>=0.0.19
|
|
46
48
|
Requires-Dist: openpyxl>=3.1.5
|
|
@@ -51,10 +53,11 @@ Requires-Dist: pyyaml>=6.0.2
|
|
|
51
53
|
Requires-Dist: rich>=14.0.0
|
|
52
54
|
Requires-Dist: scipy<=1.15
|
|
53
55
|
Requires-Dist: sqlparse>=0.5.3
|
|
56
|
+
Requires-Dist: tomli>=2.3.0
|
|
54
57
|
Requires-Dist: unitycatalog-ai[databricks]>=0.3.0
|
|
55
58
|
Provides-Extra: databricks
|
|
56
59
|
Requires-Dist: databricks-connect>=15.0.0; extra == 'databricks'
|
|
57
|
-
Requires-Dist: databricks-vectorsearch>=0.
|
|
60
|
+
Requires-Dist: databricks-vectorsearch>=0.63; extra == 'databricks'
|
|
58
61
|
Requires-Dist: pyspark>=3.5.0; extra == 'databricks'
|
|
59
62
|
Provides-Extra: dev
|
|
60
63
|
Requires-Dist: mypy>=1.0.0; extra == 'dev'
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
dao_ai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
dao_ai/agent_as_code.py,sha256=
|
|
2
|
+
dao_ai/agent_as_code.py,sha256=sviZQV7ZPxE5zkZ9jAbfegI681nra5i8yYxw05e3X7U,552
|
|
3
3
|
dao_ai/catalog.py,sha256=sPZpHTD3lPx4EZUtIWeQV7VQM89WJ6YH__wluk1v2lE,4947
|
|
4
4
|
dao_ai/chat_models.py,sha256=uhwwOTeLyHWqoTTgHrs4n5iSyTwe4EQcLKnh3jRxPWI,8626
|
|
5
5
|
dao_ai/cli.py,sha256=gq-nsapWxDA1M6Jua3vajBvIwf0Oa6YLcB58lEtMKUo,22503
|
|
6
|
-
dao_ai/config.py,sha256=
|
|
7
|
-
dao_ai/graph.py,sha256=
|
|
6
|
+
dao_ai/config.py,sha256=qqBdYV-ElIMYDKzY4lBejyJ6ysmwnBkqyGMWRBwWGVo,72507
|
|
7
|
+
dao_ai/graph.py,sha256=9kjJx0oFZKq5J9-Kpri4-0VCJILHYdYyhqQnj0_noxQ,8913
|
|
8
8
|
dao_ai/guardrails.py,sha256=4TKArDONRy8RwHzOT1plZ1rhy3x9GF_aeGpPCRl6wYA,4016
|
|
9
9
|
dao_ai/messages.py,sha256=xl_3-WcFqZKCFCiov8sZOPljTdM3gX3fCHhxq-xFg2U,7005
|
|
10
10
|
dao_ai/models.py,sha256=8r8GIG3EGxtVyWsRNI56lVaBjiNrPkzh4HdwMZRq8iw,31689
|
|
11
|
-
dao_ai/nodes.py,sha256=
|
|
11
|
+
dao_ai/nodes.py,sha256=iQ_5vL6mt1UcRnhwgz-l1D8Ww4CMQrSMVnP_Lu7fFjU,8781
|
|
12
12
|
dao_ai/prompts.py,sha256=7Hcstmv514P0s9s-TVoIlbkDV2XXOphGCW6gcPeyUYE,1628
|
|
13
13
|
dao_ai/state.py,sha256=_lF9krAYYjvFDMUwZzVKOn0ZnXKcOrbjWKdre0C5B54,1137
|
|
14
14
|
dao_ai/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
dao_ai/utils.py,sha256=
|
|
15
|
+
dao_ai/utils.py,sha256=4FV9y0EVn0tmxfkn4EdUAkOewoAF_T0pHDAHe6hJx-M,6708
|
|
16
16
|
dao_ai/vector_search.py,sha256=jlaFS_iizJ55wblgzZmswMM3UOL-qOp2BGJc0JqXYSg,2839
|
|
17
17
|
dao_ai/hooks/__init__.py,sha256=LlHGIuiZt6vGW8K5AQo1XJEkBP5vDVtMhq0IdjcLrD4,417
|
|
18
18
|
dao_ai/hooks/core.py,sha256=ZShHctUSoauhBgdf1cecy9-D7J6-sGn-pKjuRMumW5U,6663
|
|
@@ -22,20 +22,20 @@ dao_ai/memory/core.py,sha256=DnEjQO3S7hXr3CDDd7C2eE7fQUmcCS_8q9BXEgjPH3U,4271
|
|
|
22
22
|
dao_ai/memory/postgres.py,sha256=vvI3osjx1EoU5GBA6SCUstTBKillcmLl12hVgDMjfJY,15346
|
|
23
23
|
dao_ai/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
24
|
dao_ai/providers/base.py,sha256=-fjKypCOk28h6vioPfMj9YZSw_3Kcbi2nMuAyY7vX9k,1383
|
|
25
|
-
dao_ai/providers/databricks.py,sha256=
|
|
25
|
+
dao_ai/providers/databricks.py,sha256=W_lXSMbPTULMAx-KW7zBJfP7LtkcPGRnEfGcSYuu708,65824
|
|
26
26
|
dao_ai/tools/__init__.py,sha256=G5-5Yi6zpQOH53b5IzLdtsC6g0Ep6leI5GxgxOmgw7Q,1203
|
|
27
27
|
dao_ai/tools/agent.py,sha256=WbQnyziiT12TLMrA7xK0VuOU029tdmUBXbUl-R1VZ0Q,1886
|
|
28
28
|
dao_ai/tools/core.py,sha256=Kei33S8vrmvPOAyrFNekaWmV2jqZ-IPS1QDSvU7RZF0,1984
|
|
29
|
-
dao_ai/tools/genie.py,sha256=
|
|
29
|
+
dao_ai/tools/genie.py,sha256=R9Vl72s6NUtfTrQcdfhNtJFOw-BrKs_5JGt-3dAQGjA,15034
|
|
30
30
|
dao_ai/tools/human_in_the_loop.py,sha256=yk35MO9eNETnYFH-sqlgR-G24TrEgXpJlnZUustsLkI,3681
|
|
31
|
-
dao_ai/tools/mcp.py,sha256=
|
|
31
|
+
dao_ai/tools/mcp.py,sha256=5aQoRtx2z4xm6zgRslc78rSfEQe-mfhqov2NsiybYfc,8416
|
|
32
32
|
dao_ai/tools/python.py,sha256=XcQiTMshZyLUTVR5peB3vqsoUoAAy8gol9_pcrhddfI,1831
|
|
33
33
|
dao_ai/tools/slack.py,sha256=SCvyVcD9Pv_XXPXePE_fSU1Pd8VLTEkKDLvoGTZWy2Y,4775
|
|
34
34
|
dao_ai/tools/time.py,sha256=Y-23qdnNHzwjvnfkWvYsE7PoWS1hfeKy44tA7sCnNac,8759
|
|
35
35
|
dao_ai/tools/unity_catalog.py,sha256=uX_h52BuBAr4c9UeqSMI7DNz3BPRLeai5tBVW4sJqRI,13113
|
|
36
|
-
dao_ai/tools/vector_search.py,sha256=
|
|
37
|
-
dao_ai-0.0.
|
|
38
|
-
dao_ai-0.0.
|
|
39
|
-
dao_ai-0.0.
|
|
40
|
-
dao_ai-0.0.
|
|
41
|
-
dao_ai-0.0.
|
|
36
|
+
dao_ai/tools/vector_search.py,sha256=h6yCgEtOA3h-anJG0hGB3VvcmC3Os7_qnhz8xxRjv1E,11346
|
|
37
|
+
dao_ai-0.0.28.dist-info/METADATA,sha256=xVgC8iAxjfXmuRz_FzITQTXgZgbTcmEBAiY-DYvGWSk,42727
|
|
38
|
+
dao_ai-0.0.28.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
39
|
+
dao_ai-0.0.28.dist-info/entry_points.txt,sha256=Xa-UFyc6gWGwMqMJOt06ZOog2vAfygV_DSwg1AiP46g,43
|
|
40
|
+
dao_ai-0.0.28.dist-info/licenses/LICENSE,sha256=YZt3W32LtPYruuvHE9lGk2bw6ZPMMJD8yLrjgHybyz4,1069
|
|
41
|
+
dao_ai-0.0.28.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|