llama-stack 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/core/library_client.py +80 -3
- llama_stack/core/routing_tables/common.py +11 -0
- llama_stack/core/routing_tables/vector_stores.py +4 -0
- llama_stack/core/stack.py +16 -1
- llama_stack/core/storage/kvstore/kvstore.py +11 -0
- llama_stack/core/storage/kvstore/mongodb/mongodb.py +5 -0
- llama_stack/core/storage/kvstore/postgres/postgres.py +8 -0
- llama_stack/core/storage/kvstore/redis/redis.py +5 -0
- llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py +8 -0
- llama_stack/core/storage/sqlstore/sqlstore.py +8 -0
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +60 -34
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +4 -0
- llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py +9 -1
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +13 -1
- llama_stack/providers/utils/inference/embedding_mixin.py +20 -16
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +33 -0
- llama_stack/providers/utils/tools/mcp.py +258 -16
- {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/METADATA +2 -2
- {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/RECORD +29 -29
- {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/WHEEL +1 -1
- llama_stack_api/internal/kvstore.py +2 -0
- llama_stack_api/internal/sqlstore.py +2 -0
- llama_stack_api/llama_stack_api/internal/kvstore.py +2 -0
- llama_stack_api/llama_stack_api/internal/sqlstore.py +2 -0
- llama_stack_api/llama_stack_api/vector_stores.py +2 -0
- llama_stack_api/vector_stores.py +2 -0
- {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {llama_stack-0.4.2.dist-info → llama_stack-0.4.3.dist-info}/top_level.txt +0 -0
|
@@ -161,6 +161,45 @@ class LlamaStackAsLibraryClient(LlamaStackClient):
|
|
|
161
161
|
"""
|
|
162
162
|
pass
|
|
163
163
|
|
|
164
|
+
def shutdown(self) -> None:
|
|
165
|
+
"""Shutdown the client and release all resources.
|
|
166
|
+
|
|
167
|
+
This method should be called when you're done using the client to properly
|
|
168
|
+
close database connections and release other resources. Failure to call this
|
|
169
|
+
method may result in the program hanging on exit while waiting for background
|
|
170
|
+
threads to complete.
|
|
171
|
+
|
|
172
|
+
This method is idempotent and can be called multiple times safely.
|
|
173
|
+
|
|
174
|
+
Example:
|
|
175
|
+
client = LlamaStackAsLibraryClient("starter")
|
|
176
|
+
# ... use the client ...
|
|
177
|
+
client.shutdown()
|
|
178
|
+
"""
|
|
179
|
+
loop = self.loop
|
|
180
|
+
asyncio.set_event_loop(loop)
|
|
181
|
+
try:
|
|
182
|
+
loop.run_until_complete(self.async_client.shutdown())
|
|
183
|
+
finally:
|
|
184
|
+
loop.close()
|
|
185
|
+
asyncio.set_event_loop(None)
|
|
186
|
+
|
|
187
|
+
def __enter__(self) -> "LlamaStackAsLibraryClient":
|
|
188
|
+
"""Enter the context manager.
|
|
189
|
+
|
|
190
|
+
The client is already initialized in __init__, so this just returns self.
|
|
191
|
+
|
|
192
|
+
Example:
|
|
193
|
+
with LlamaStackAsLibraryClient("starter") as client:
|
|
194
|
+
response = client.models.list()
|
|
195
|
+
# Client is automatically shut down here
|
|
196
|
+
"""
|
|
197
|
+
return self
|
|
198
|
+
|
|
199
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
200
|
+
"""Exit the context manager and shut down the client."""
|
|
201
|
+
self.shutdown()
|
|
202
|
+
|
|
164
203
|
def request(self, *args, **kwargs):
|
|
165
204
|
loop = self.loop
|
|
166
205
|
asyncio.set_event_loop(loop)
|
|
@@ -224,6 +263,7 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
224
263
|
self.custom_provider_registry = custom_provider_registry
|
|
225
264
|
self.provider_data = provider_data
|
|
226
265
|
self.route_impls: RouteImpls | None = None # Initialize to None to prevent AttributeError
|
|
266
|
+
self.stack: Stack | None = None
|
|
227
267
|
|
|
228
268
|
def _remove_root_logger_handlers(self):
|
|
229
269
|
"""
|
|
@@ -246,9 +286,9 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
246
286
|
try:
|
|
247
287
|
self.route_impls = None
|
|
248
288
|
|
|
249
|
-
stack = Stack(self.config, self.custom_provider_registry)
|
|
250
|
-
await stack.initialize()
|
|
251
|
-
self.impls = stack.impls
|
|
289
|
+
self.stack = Stack(self.config, self.custom_provider_registry)
|
|
290
|
+
await self.stack.initialize()
|
|
291
|
+
self.impls = self.stack.impls
|
|
252
292
|
except ModuleNotFoundError as _e:
|
|
253
293
|
cprint(_e.msg, color="red", file=sys.stderr)
|
|
254
294
|
cprint(
|
|
@@ -283,6 +323,43 @@ class AsyncLlamaStackAsLibraryClient(AsyncLlamaStackClient):
|
|
|
283
323
|
self.route_impls = initialize_route_impls(self.impls)
|
|
284
324
|
return True
|
|
285
325
|
|
|
326
|
+
async def shutdown(self) -> None:
|
|
327
|
+
"""Shutdown the client and release all resources.
|
|
328
|
+
|
|
329
|
+
This method should be called when you're done using the client to properly
|
|
330
|
+
close database connections and release other resources. Failure to call this
|
|
331
|
+
method may result in the program hanging on exit while waiting for background
|
|
332
|
+
threads to complete.
|
|
333
|
+
|
|
334
|
+
This method is idempotent and can be called multiple times safely.
|
|
335
|
+
|
|
336
|
+
Example:
|
|
337
|
+
client = AsyncLlamaStackAsLibraryClient("starter")
|
|
338
|
+
await client.initialize()
|
|
339
|
+
# ... use the client ...
|
|
340
|
+
await client.shutdown()
|
|
341
|
+
"""
|
|
342
|
+
if self.stack:
|
|
343
|
+
await self.stack.shutdown()
|
|
344
|
+
self.stack = None
|
|
345
|
+
|
|
346
|
+
async def __aenter__(self) -> "AsyncLlamaStackAsLibraryClient":
|
|
347
|
+
"""Enter the async context manager.
|
|
348
|
+
|
|
349
|
+
Initializes the client and returns it.
|
|
350
|
+
|
|
351
|
+
Example:
|
|
352
|
+
async with AsyncLlamaStackAsLibraryClient("starter") as client:
|
|
353
|
+
response = await client.models.list()
|
|
354
|
+
# Client is automatically shut down here
|
|
355
|
+
"""
|
|
356
|
+
await self.initialize()
|
|
357
|
+
return self
|
|
358
|
+
|
|
359
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
360
|
+
"""Exit the async context manager and shut down the client."""
|
|
361
|
+
await self.shutdown()
|
|
362
|
+
|
|
286
363
|
async def request(
|
|
287
364
|
self,
|
|
288
365
|
cast_to: Any,
|
|
@@ -209,6 +209,17 @@ class CommonRoutingTableImpl(RoutingTable):
|
|
|
209
209
|
logger.info(f"Setting owner for {obj.type} '{obj.identifier}' to {obj.owner.principal}")
|
|
210
210
|
|
|
211
211
|
registered_obj = await register_object_with_provider(obj, p)
|
|
212
|
+
|
|
213
|
+
# Ensure OpenAI metadata exists for vector stores
|
|
214
|
+
if obj.type == ResourceType.vector_store.value:
|
|
215
|
+
if hasattr(p, "_ensure_openai_metadata_exists"):
|
|
216
|
+
await p._ensure_openai_metadata_exists(obj)
|
|
217
|
+
else:
|
|
218
|
+
logger.warning(
|
|
219
|
+
f"Provider {obj.provider_id} does not support OpenAI metadata creation. "
|
|
220
|
+
f"Vector store {obj.identifier} may not work with OpenAI-compatible APIs."
|
|
221
|
+
)
|
|
222
|
+
|
|
212
223
|
# TODO: This needs to be fixed for all APIs once they return the registered object
|
|
213
224
|
if obj.type == ResourceType.model.value:
|
|
214
225
|
await self.dist_registry.register(registered_obj)
|
|
@@ -55,6 +55,10 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
|
|
|
55
55
|
|
|
56
56
|
# Internal methods only - no public API exposure
|
|
57
57
|
|
|
58
|
+
async def list_vector_stores(self) -> list[VectorStoreWithOwner]:
|
|
59
|
+
"""List all registered vector stores."""
|
|
60
|
+
return await self.get_all_with_type(ResourceType.vector_store.value)
|
|
61
|
+
|
|
58
62
|
async def register_vector_store(
|
|
59
63
|
self,
|
|
60
64
|
vector_store_id: str,
|
llama_stack/core/stack.py
CHANGED
|
@@ -108,6 +108,7 @@ RESOURCES = [
|
|
|
108
108
|
),
|
|
109
109
|
("benchmarks", Api.benchmarks, "register_benchmark", "list_benchmarks", RegisterBenchmarkRequest),
|
|
110
110
|
("tool_groups", Api.tool_groups, "register_tool_group", "list_tool_groups", None),
|
|
111
|
+
("vector_stores", Api.vector_stores, "register_vector_store", "list_vector_stores", None),
|
|
111
112
|
]
|
|
112
113
|
|
|
113
114
|
|
|
@@ -620,7 +621,7 @@ class Stack:
|
|
|
620
621
|
async def shutdown(self):
|
|
621
622
|
for impl in self.impls.values():
|
|
622
623
|
impl_name = impl.__class__.__name__
|
|
623
|
-
logger.
|
|
624
|
+
logger.debug(f"Shutting down {impl_name}")
|
|
624
625
|
try:
|
|
625
626
|
if hasattr(impl, "shutdown"):
|
|
626
627
|
await asyncio.wait_for(impl.shutdown(), timeout=5)
|
|
@@ -642,6 +643,20 @@ class Stack:
|
|
|
642
643
|
if REGISTRY_REFRESH_TASK:
|
|
643
644
|
REGISTRY_REFRESH_TASK.cancel()
|
|
644
645
|
|
|
646
|
+
# Shutdown storage backends
|
|
647
|
+
from llama_stack.core.storage.kvstore.kvstore import shutdown_kvstore_backends
|
|
648
|
+
from llama_stack.core.storage.sqlstore.sqlstore import shutdown_sqlstore_backends
|
|
649
|
+
|
|
650
|
+
try:
|
|
651
|
+
await shutdown_kvstore_backends()
|
|
652
|
+
except Exception as e:
|
|
653
|
+
logger.exception(f"Failed to shutdown KV store backends: {e}")
|
|
654
|
+
|
|
655
|
+
try:
|
|
656
|
+
await shutdown_sqlstore_backends()
|
|
657
|
+
except Exception as e:
|
|
658
|
+
logger.exception(f"Failed to shutdown SQL store backends: {e}")
|
|
659
|
+
|
|
645
660
|
|
|
646
661
|
async def refresh_registry_once(impls: dict[Api, Any]):
|
|
647
662
|
logger.debug("refreshing registry")
|
|
@@ -62,6 +62,9 @@ class InmemoryKVStoreImpl(KVStore):
|
|
|
62
62
|
async def delete(self, key: str) -> None:
|
|
63
63
|
del self._store[key]
|
|
64
64
|
|
|
65
|
+
async def shutdown(self) -> None:
|
|
66
|
+
self._store.clear()
|
|
67
|
+
|
|
65
68
|
|
|
66
69
|
_KVSTORE_BACKENDS: dict[str, KVStoreConfig] = {}
|
|
67
70
|
_KVSTORE_INSTANCES: dict[tuple[str, str], KVStore] = {}
|
|
@@ -126,3 +129,11 @@ async def kvstore_impl(reference: KVStoreReference) -> KVStore:
|
|
|
126
129
|
await impl.initialize()
|
|
127
130
|
_KVSTORE_INSTANCES[cache_key] = impl
|
|
128
131
|
return impl
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
async def shutdown_kvstore_backends() -> None:
|
|
135
|
+
"""Shutdown all cached KV store instances."""
|
|
136
|
+
global _KVSTORE_INSTANCES
|
|
137
|
+
for instance in _KVSTORE_INSTANCES.values():
|
|
138
|
+
await instance.shutdown()
|
|
139
|
+
_KVSTORE_INSTANCES.clear()
|
|
@@ -123,3 +123,11 @@ class PostgresKVStoreImpl(KVStore):
|
|
|
123
123
|
(start_key, end_key),
|
|
124
124
|
)
|
|
125
125
|
return [row[0] for row in cursor.fetchall()]
|
|
126
|
+
|
|
127
|
+
async def shutdown(self) -> None:
|
|
128
|
+
if self._cursor:
|
|
129
|
+
self._cursor.close()
|
|
130
|
+
self._cursor = None
|
|
131
|
+
if self._conn:
|
|
132
|
+
self._conn.close()
|
|
133
|
+
self._conn = None
|
|
@@ -107,6 +107,14 @@ class SqlAlchemySqlStoreImpl(SqlStore):
|
|
|
107
107
|
|
|
108
108
|
return engine
|
|
109
109
|
|
|
110
|
+
async def shutdown(self) -> None:
|
|
111
|
+
"""Dispose the session maker's engine and close all connections."""
|
|
112
|
+
# The async_session holds a reference to the engine created in __init__
|
|
113
|
+
if self.async_session:
|
|
114
|
+
engine = self.async_session.kw.get("bind")
|
|
115
|
+
if engine:
|
|
116
|
+
await engine.dispose()
|
|
117
|
+
|
|
110
118
|
async def create_table(
|
|
111
119
|
self,
|
|
112
120
|
table: str,
|
|
@@ -85,3 +85,11 @@ def register_sqlstore_backends(backends: dict[str, StorageBackendConfig]) -> Non
|
|
|
85
85
|
_SQLSTORE_LOCKS.clear()
|
|
86
86
|
for name, cfg in backends.items():
|
|
87
87
|
_SQLSTORE_BACKENDS[name] = cfg
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
async def shutdown_sqlstore_backends() -> None:
|
|
91
|
+
"""Shutdown all cached SQL store instances."""
|
|
92
|
+
global _SQLSTORE_INSTANCES
|
|
93
|
+
for instance in _SQLSTORE_INSTANCES.values():
|
|
94
|
+
await instance.shutdown()
|
|
95
|
+
_SQLSTORE_INSTANCES.clear()
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
import asyncio
|
|
7
8
|
import re
|
|
8
9
|
import time
|
|
9
10
|
import uuid
|
|
@@ -16,6 +17,7 @@ from llama_stack.providers.utils.responses.responses_store import (
|
|
|
16
17
|
ResponsesStore,
|
|
17
18
|
_OpenAIResponseObjectWithInputAndMessages,
|
|
18
19
|
)
|
|
20
|
+
from llama_stack.providers.utils.tools.mcp import MCPSessionManager
|
|
19
21
|
from llama_stack_api import (
|
|
20
22
|
ConversationItem,
|
|
21
23
|
Conversations,
|
|
@@ -489,6 +491,19 @@ class OpenAIResponsesImpl:
|
|
|
489
491
|
response_id = f"resp_{uuid.uuid4()}"
|
|
490
492
|
created_at = int(time.time())
|
|
491
493
|
|
|
494
|
+
# Create a per-request MCP session manager for session reuse (fix for #4452)
|
|
495
|
+
# This avoids redundant tools/list calls when making multiple MCP tool invocations
|
|
496
|
+
mcp_session_manager = MCPSessionManager()
|
|
497
|
+
|
|
498
|
+
# Create a per-request ToolExecutor with the session manager
|
|
499
|
+
request_tool_executor = ToolExecutor(
|
|
500
|
+
tool_groups_api=self.tool_groups_api,
|
|
501
|
+
tool_runtime_api=self.tool_runtime_api,
|
|
502
|
+
vector_io_api=self.vector_io_api,
|
|
503
|
+
vector_stores_config=self.tool_executor.vector_stores_config,
|
|
504
|
+
mcp_session_manager=mcp_session_manager,
|
|
505
|
+
)
|
|
506
|
+
|
|
492
507
|
orchestrator = StreamingResponseOrchestrator(
|
|
493
508
|
inference_api=self.inference_api,
|
|
494
509
|
ctx=ctx,
|
|
@@ -498,7 +513,7 @@ class OpenAIResponsesImpl:
|
|
|
498
513
|
text=text,
|
|
499
514
|
max_infer_iters=max_infer_iters,
|
|
500
515
|
parallel_tool_calls=parallel_tool_calls,
|
|
501
|
-
tool_executor=
|
|
516
|
+
tool_executor=request_tool_executor,
|
|
502
517
|
safety_api=self.safety_api,
|
|
503
518
|
guardrail_ids=guardrail_ids,
|
|
504
519
|
instructions=instructions,
|
|
@@ -513,41 +528,52 @@ class OpenAIResponsesImpl:
|
|
|
513
528
|
|
|
514
529
|
# Type as ConversationItem to avoid list invariance issues
|
|
515
530
|
output_items: list[ConversationItem] = []
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
if store:
|
|
539
|
-
# TODO: we really should work off of output_items instead of "final_messages"
|
|
540
|
-
await self._store_response(
|
|
541
|
-
response=final_response,
|
|
542
|
-
input=all_input,
|
|
543
|
-
messages=messages_to_store,
|
|
531
|
+
try:
|
|
532
|
+
async for stream_chunk in orchestrator.create_response():
|
|
533
|
+
match stream_chunk.type:
|
|
534
|
+
case "response.completed" | "response.incomplete":
|
|
535
|
+
final_response = stream_chunk.response
|
|
536
|
+
case "response.failed":
|
|
537
|
+
failed_response = stream_chunk.response
|
|
538
|
+
case "response.output_item.done":
|
|
539
|
+
item = stream_chunk.item
|
|
540
|
+
output_items.append(item)
|
|
541
|
+
case _:
|
|
542
|
+
pass # Other event types
|
|
543
|
+
|
|
544
|
+
# Store and sync before yielding terminal events
|
|
545
|
+
# This ensures the storage/syncing happens even if the consumer breaks after receiving the event
|
|
546
|
+
if (
|
|
547
|
+
stream_chunk.type in {"response.completed", "response.incomplete"}
|
|
548
|
+
and final_response
|
|
549
|
+
and failed_response is None
|
|
550
|
+
):
|
|
551
|
+
messages_to_store = list(
|
|
552
|
+
filter(lambda x: not isinstance(x, OpenAISystemMessageParam), orchestrator.final_messages)
|
|
544
553
|
)
|
|
554
|
+
if store:
|
|
555
|
+
# TODO: we really should work off of output_items instead of "final_messages"
|
|
556
|
+
await self._store_response(
|
|
557
|
+
response=final_response,
|
|
558
|
+
input=all_input,
|
|
559
|
+
messages=messages_to_store,
|
|
560
|
+
)
|
|
545
561
|
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
562
|
+
if conversation:
|
|
563
|
+
await self._sync_response_to_conversation(conversation, input, output_items)
|
|
564
|
+
await self.responses_store.store_conversation_messages(conversation, messages_to_store)
|
|
565
|
+
|
|
566
|
+
yield stream_chunk
|
|
567
|
+
finally:
|
|
568
|
+
# Clean up MCP sessions at the end of the request (fix for #4452)
|
|
569
|
+
# Use shield() to prevent cancellation from interrupting cleanup and leaking resources
|
|
570
|
+
# Wrap in try/except as cleanup errors should not mask the original response
|
|
571
|
+
try:
|
|
572
|
+
await asyncio.shield(mcp_session_manager.close_all())
|
|
573
|
+
except BaseException as e:
|
|
574
|
+
# Debug level - cleanup errors are expected in streaming scenarios where
|
|
575
|
+
# anyio cancel scopes may be in a different task context
|
|
576
|
+
logger.debug(f"Error during MCP session cleanup: {e}")
|
|
551
577
|
|
|
552
578
|
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
|
|
553
579
|
return await self.responses_store.delete_response_object(response_id)
|
|
@@ -1200,6 +1200,9 @@ class StreamingResponseOrchestrator:
|
|
|
1200
1200
|
"mcp_list_tools_id": list_id,
|
|
1201
1201
|
}
|
|
1202
1202
|
|
|
1203
|
+
# Get session manager from tool_executor if available (fix for #4452)
|
|
1204
|
+
session_manager = getattr(self.tool_executor, "mcp_session_manager", None)
|
|
1205
|
+
|
|
1203
1206
|
# TODO: follow semantic conventions for Open Telemetry tool spans
|
|
1204
1207
|
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
|
|
1205
1208
|
with tracer.start_as_current_span("list_mcp_tools", attributes=attributes):
|
|
@@ -1207,6 +1210,7 @@ class StreamingResponseOrchestrator:
|
|
|
1207
1210
|
endpoint=mcp_tool.server_url,
|
|
1208
1211
|
headers=mcp_tool.headers,
|
|
1209
1212
|
authorization=mcp_tool.authorization,
|
|
1213
|
+
session_manager=session_manager,
|
|
1210
1214
|
)
|
|
1211
1215
|
|
|
1212
1216
|
# Create the MCP list tools message
|
|
@@ -54,11 +54,14 @@ class ToolExecutor:
|
|
|
54
54
|
tool_runtime_api: ToolRuntime,
|
|
55
55
|
vector_io_api: VectorIO,
|
|
56
56
|
vector_stores_config=None,
|
|
57
|
+
mcp_session_manager=None,
|
|
57
58
|
):
|
|
58
59
|
self.tool_groups_api = tool_groups_api
|
|
59
60
|
self.tool_runtime_api = tool_runtime_api
|
|
60
61
|
self.vector_io_api = vector_io_api
|
|
61
62
|
self.vector_stores_config = vector_stores_config
|
|
63
|
+
# Optional MCPSessionManager for session reuse within a request (fix for #4452)
|
|
64
|
+
self.mcp_session_manager = mcp_session_manager
|
|
62
65
|
|
|
63
66
|
async def execute_tool_call(
|
|
64
67
|
self,
|
|
@@ -233,6 +236,7 @@ class ToolExecutor:
|
|
|
233
236
|
"document_ids": [r.file_id for r in search_results],
|
|
234
237
|
"chunks": [r.content[0].text if r.content else "" for r in search_results],
|
|
235
238
|
"scores": [r.score for r in search_results],
|
|
239
|
+
"attributes": [r.attributes or {} for r in search_results],
|
|
236
240
|
"citation_files": citation_files,
|
|
237
241
|
},
|
|
238
242
|
)
|
|
@@ -327,12 +331,14 @@ class ToolExecutor:
|
|
|
327
331
|
# TODO: follow semantic conventions for Open Telemetry tool spans
|
|
328
332
|
# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-spans/#execute-tool-span
|
|
329
333
|
with tracer.start_as_current_span("invoke_mcp_tool", attributes=attributes):
|
|
334
|
+
# Pass session_manager for session reuse within request (fix for #4452)
|
|
330
335
|
result = await invoke_mcp_tool(
|
|
331
336
|
endpoint=mcp_tool.server_url,
|
|
332
337
|
tool_name=function_name,
|
|
333
338
|
kwargs=tool_kwargs,
|
|
334
339
|
headers=mcp_tool.headers,
|
|
335
340
|
authorization=mcp_tool.authorization,
|
|
341
|
+
session_manager=self.mcp_session_manager,
|
|
336
342
|
)
|
|
337
343
|
elif function_name == "knowledge_search":
|
|
338
344
|
response_file_search_tool = (
|
|
@@ -464,16 +470,18 @@ class ToolExecutor:
|
|
|
464
470
|
)
|
|
465
471
|
if result and (metadata := getattr(result, "metadata", None)) and "document_ids" in metadata:
|
|
466
472
|
message.results = []
|
|
473
|
+
attributes_list = metadata.get("attributes", [])
|
|
467
474
|
for i, doc_id in enumerate(metadata["document_ids"]):
|
|
468
475
|
text = metadata["chunks"][i] if "chunks" in metadata else None
|
|
469
476
|
score = metadata["scores"][i] if "scores" in metadata else None
|
|
477
|
+
attrs = attributes_list[i] if i < len(attributes_list) else {}
|
|
470
478
|
message.results.append(
|
|
471
479
|
OpenAIResponseOutputMessageFileSearchToolCallResults(
|
|
472
480
|
file_id=doc_id,
|
|
473
481
|
filename=doc_id,
|
|
474
482
|
text=text if text is not None else "",
|
|
475
483
|
score=score if score is not None else 0.0,
|
|
476
|
-
attributes=
|
|
484
|
+
attributes=attrs,
|
|
477
485
|
)
|
|
478
486
|
)
|
|
479
487
|
if has_error:
|
|
@@ -10,6 +10,7 @@ from typing import Any
|
|
|
10
10
|
import psycopg2
|
|
11
11
|
from numpy.typing import NDArray
|
|
12
12
|
from psycopg2 import sql
|
|
13
|
+
from psycopg2.extensions import cursor
|
|
13
14
|
from psycopg2.extras import Json, execute_values
|
|
14
15
|
from pydantic import BaseModel, TypeAdapter
|
|
15
16
|
|
|
@@ -54,6 +55,17 @@ def check_extension_version(cur):
|
|
|
54
55
|
return result[0] if result else None
|
|
55
56
|
|
|
56
57
|
|
|
58
|
+
def create_vector_extension(cur: cursor) -> None:
|
|
59
|
+
try:
|
|
60
|
+
log.info("Vector extension not found, creating...")
|
|
61
|
+
cur.execute("CREATE EXTENSION vector;")
|
|
62
|
+
log.info("Vector extension created successfully")
|
|
63
|
+
log.info(f"Vector extension version: {check_extension_version(cur)}")
|
|
64
|
+
|
|
65
|
+
except psycopg2.Error as e:
|
|
66
|
+
raise RuntimeError(f"Failed to create vector extension for PGVector: {e}") from e
|
|
67
|
+
|
|
68
|
+
|
|
57
69
|
def upsert_models(conn, keys_models: list[tuple[str, BaseModel]]):
|
|
58
70
|
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
|
59
71
|
query = sql.SQL(
|
|
@@ -364,7 +376,7 @@ class PGVectorVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProt
|
|
|
364
376
|
if version:
|
|
365
377
|
log.info(f"Vector extension version: {version}")
|
|
366
378
|
else:
|
|
367
|
-
|
|
379
|
+
create_vector_extension(cur)
|
|
368
380
|
|
|
369
381
|
cur.execute(
|
|
370
382
|
"""
|
|
@@ -25,7 +25,8 @@ from llama_stack_api import (
|
|
|
25
25
|
OpenAIEmbeddingUsage,
|
|
26
26
|
)
|
|
27
27
|
|
|
28
|
-
EMBEDDING_MODELS = {}
|
|
28
|
+
EMBEDDING_MODELS: dict[str, "SentenceTransformer"] = {}
|
|
29
|
+
EMBEDDING_MODELS_LOCK = asyncio.Lock()
|
|
29
30
|
|
|
30
31
|
DARWIN = "Darwin"
|
|
31
32
|
|
|
@@ -76,26 +77,29 @@ class SentenceTransformerEmbeddingMixin:
|
|
|
76
77
|
)
|
|
77
78
|
|
|
78
79
|
async def _load_sentence_transformer_model(self, model: str) -> "SentenceTransformer":
|
|
79
|
-
global EMBEDDING_MODELS
|
|
80
|
-
|
|
81
80
|
loaded_model = EMBEDDING_MODELS.get(model)
|
|
82
81
|
if loaded_model is not None:
|
|
83
82
|
return loaded_model
|
|
84
83
|
|
|
85
|
-
|
|
84
|
+
async with EMBEDDING_MODELS_LOCK:
|
|
85
|
+
loaded_model = EMBEDDING_MODELS.get(model)
|
|
86
|
+
if loaded_model is not None:
|
|
87
|
+
return loaded_model
|
|
88
|
+
|
|
89
|
+
log.info(f"Loading sentence transformer for {model}...")
|
|
86
90
|
|
|
87
|
-
|
|
88
|
-
|
|
91
|
+
def _load_model():
|
|
92
|
+
from sentence_transformers import SentenceTransformer
|
|
89
93
|
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
94
|
+
platform_name = platform.system()
|
|
95
|
+
if platform_name == DARWIN:
|
|
96
|
+
# PyTorch's OpenMP kernels can segfault on macOS when spawned from background
|
|
97
|
+
# threads with the default parallel settings, so force a single-threaded CPU run.
|
|
98
|
+
log.debug(f"Constraining torch threads on {platform_name} to a single worker")
|
|
99
|
+
torch.set_num_threads(1)
|
|
96
100
|
|
|
97
|
-
|
|
101
|
+
return SentenceTransformer(model, trust_remote_code=True)
|
|
98
102
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
103
|
+
loaded_model = await asyncio.to_thread(_load_model)
|
|
104
|
+
EMBEDDING_MODELS[model] = loaded_model
|
|
105
|
+
return loaded_model
|
|
@@ -122,6 +122,39 @@ class OpenAIVectorStoreMixin(ABC):
|
|
|
122
122
|
# update in-memory cache
|
|
123
123
|
self.openai_vector_stores[store_id] = store_info
|
|
124
124
|
|
|
125
|
+
async def _ensure_openai_metadata_exists(self, vector_store: VectorStore, name: str | None = None) -> None:
|
|
126
|
+
"""
|
|
127
|
+
Ensure OpenAI-compatible metadata exists for a vector store.
|
|
128
|
+
"""
|
|
129
|
+
if vector_store.identifier not in self.openai_vector_stores:
|
|
130
|
+
store_info = {
|
|
131
|
+
"id": vector_store.identifier,
|
|
132
|
+
"object": "vector_store",
|
|
133
|
+
"created_at": int(time.time()),
|
|
134
|
+
"name": name or vector_store.vector_store_name or vector_store.identifier,
|
|
135
|
+
"usage_bytes": 0,
|
|
136
|
+
"file_counts": VectorStoreFileCounts(
|
|
137
|
+
cancelled=0,
|
|
138
|
+
completed=0,
|
|
139
|
+
failed=0,
|
|
140
|
+
in_progress=0,
|
|
141
|
+
total=0,
|
|
142
|
+
).model_dump(),
|
|
143
|
+
"status": "completed",
|
|
144
|
+
"expires_after": None,
|
|
145
|
+
"expires_at": None,
|
|
146
|
+
"last_active_at": int(time.time()),
|
|
147
|
+
"file_ids": [],
|
|
148
|
+
"chunking_strategy": None,
|
|
149
|
+
"metadata": {
|
|
150
|
+
"provider_id": vector_store.provider_id,
|
|
151
|
+
"provider_vector_store_id": vector_store.provider_resource_id,
|
|
152
|
+
"embedding_model": vector_store.embedding_model,
|
|
153
|
+
"embedding_dimension": str(vector_store.embedding_dimension),
|
|
154
|
+
},
|
|
155
|
+
}
|
|
156
|
+
await self._save_openai_vector_store(vector_store.identifier, store_info)
|
|
157
|
+
|
|
125
158
|
async def _load_openai_vector_stores(self) -> dict[str, dict[str, Any]]:
|
|
126
159
|
"""Load all vector store metadata from persistent storage."""
|
|
127
160
|
assert self.kvstore
|
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
import asyncio
|
|
8
|
+
import hashlib
|
|
7
9
|
from collections.abc import AsyncGenerator
|
|
8
10
|
from contextlib import asynccontextmanager
|
|
9
11
|
from enum import Enum
|
|
@@ -73,6 +75,207 @@ class MCPProtol(Enum):
|
|
|
73
75
|
SSE = 2
|
|
74
76
|
|
|
75
77
|
|
|
78
|
+
class MCPSessionManager:
|
|
79
|
+
"""Manages MCP session lifecycle within a request scope.
|
|
80
|
+
|
|
81
|
+
This class caches MCP sessions by (endpoint, headers_hash) to avoid redundant
|
|
82
|
+
connection establishment and tools/list calls when making multiple tool
|
|
83
|
+
invocations to the same MCP server within a single request.
|
|
84
|
+
|
|
85
|
+
Fix for GitHub issue #4452: MCP tools/list called redundantly before every
|
|
86
|
+
tool invocation.
|
|
87
|
+
|
|
88
|
+
Usage:
|
|
89
|
+
async with MCPSessionManager() as session_manager:
|
|
90
|
+
# Multiple tool calls will reuse the same session
|
|
91
|
+
result1 = await invoke_mcp_tool(..., session_manager=session_manager)
|
|
92
|
+
result2 = await invoke_mcp_tool(..., session_manager=session_manager)
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(self):
|
|
96
|
+
# Cache of active sessions: key -> (session, client_context, session_context)
|
|
97
|
+
self._sessions: dict[str, tuple[ClientSession, Any, Any]] = {}
|
|
98
|
+
# Locks to prevent concurrent session creation for the same key
|
|
99
|
+
self._locks: dict[str, asyncio.Lock] = {}
|
|
100
|
+
# Global lock for managing the locks dict
|
|
101
|
+
self._global_lock = asyncio.Lock()
|
|
102
|
+
|
|
103
|
+
def _make_key(self, endpoint: str, headers: dict[str, str]) -> str:
|
|
104
|
+
"""Create a cache key from endpoint and headers."""
|
|
105
|
+
# Sort headers for consistent hashing
|
|
106
|
+
headers_str = str(sorted(headers.items()))
|
|
107
|
+
headers_hash = hashlib.sha256(headers_str.encode()).hexdigest()[:16]
|
|
108
|
+
return f"{endpoint}:{headers_hash}"
|
|
109
|
+
|
|
110
|
+
async def _get_lock(self, key: str) -> asyncio.Lock:
|
|
111
|
+
"""Get or create a lock for a specific cache key."""
|
|
112
|
+
async with self._global_lock:
|
|
113
|
+
if key not in self._locks:
|
|
114
|
+
self._locks[key] = asyncio.Lock()
|
|
115
|
+
return self._locks[key]
|
|
116
|
+
|
|
117
|
+
async def get_session(self, endpoint: str, headers: dict[str, str]) -> ClientSession:
|
|
118
|
+
"""Get or create an MCP session for the given endpoint and headers.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
endpoint: MCP server endpoint URL
|
|
122
|
+
headers: Headers including authorization
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
An initialized ClientSession ready for tool calls
|
|
126
|
+
"""
|
|
127
|
+
key = self._make_key(endpoint, headers)
|
|
128
|
+
|
|
129
|
+
# Check if session already exists (fast path)
|
|
130
|
+
if key in self._sessions:
|
|
131
|
+
session, _, _ = self._sessions[key]
|
|
132
|
+
return session
|
|
133
|
+
|
|
134
|
+
# Acquire lock for this specific key to prevent concurrent creation
|
|
135
|
+
lock = await self._get_lock(key)
|
|
136
|
+
async with lock:
|
|
137
|
+
# Double-check after acquiring lock
|
|
138
|
+
if key in self._sessions:
|
|
139
|
+
session, _, _ = self._sessions[key]
|
|
140
|
+
return session
|
|
141
|
+
|
|
142
|
+
# Create new session
|
|
143
|
+
session, client_ctx, session_ctx = await self._create_session(endpoint, headers)
|
|
144
|
+
self._sessions[key] = (session, client_ctx, session_ctx)
|
|
145
|
+
logger.debug(f"Created new MCP session for {endpoint} (key: {key[:32]}...)")
|
|
146
|
+
return session
|
|
147
|
+
|
|
148
|
+
async def _create_session(self, endpoint: str, headers: dict[str, str]) -> tuple[ClientSession, Any, Any]:
|
|
149
|
+
"""Create a new MCP session.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Tuple of (session, client_context, session_context) for lifecycle management
|
|
153
|
+
"""
|
|
154
|
+
# Use the same protocol detection logic as client_wrapper
|
|
155
|
+
connection_strategies = [MCPProtol.STREAMABLE_HTTP, MCPProtol.SSE]
|
|
156
|
+
mcp_protocol = protocol_cache.get(endpoint, default=MCPProtol.UNKNOWN)
|
|
157
|
+
if mcp_protocol == MCPProtol.SSE:
|
|
158
|
+
connection_strategies = [MCPProtol.SSE, MCPProtol.STREAMABLE_HTTP]
|
|
159
|
+
|
|
160
|
+
last_exception: BaseException | None = None
|
|
161
|
+
|
|
162
|
+
for i, strategy in enumerate(connection_strategies):
|
|
163
|
+
try:
|
|
164
|
+
client = streamablehttp_client
|
|
165
|
+
if strategy == MCPProtol.SSE:
|
|
166
|
+
client = cast(Any, sse_client)
|
|
167
|
+
|
|
168
|
+
# Enter the client context manager manually
|
|
169
|
+
client_ctx = client(endpoint, headers=headers)
|
|
170
|
+
client_streams = await client_ctx.__aenter__()
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
# Enter the session context manager manually
|
|
174
|
+
session = ClientSession(read_stream=client_streams[0], write_stream=client_streams[1])
|
|
175
|
+
session_ctx = session
|
|
176
|
+
await session.__aenter__()
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
await session.initialize()
|
|
180
|
+
protocol_cache[endpoint] = strategy
|
|
181
|
+
return session, client_ctx, session_ctx
|
|
182
|
+
except BaseException:
|
|
183
|
+
await session.__aexit__(None, None, None)
|
|
184
|
+
raise
|
|
185
|
+
except BaseException:
|
|
186
|
+
await client_ctx.__aexit__(None, None, None)
|
|
187
|
+
raise
|
|
188
|
+
|
|
189
|
+
except* httpx.HTTPStatusError as eg:
|
|
190
|
+
for exc in eg.exceptions:
|
|
191
|
+
err = cast(httpx.HTTPStatusError, exc)
|
|
192
|
+
if err.response.status_code == 401:
|
|
193
|
+
raise AuthenticationRequiredError(exc) from exc
|
|
194
|
+
if i == len(connection_strategies) - 1:
|
|
195
|
+
raise
|
|
196
|
+
last_exception = eg
|
|
197
|
+
except* httpx.ConnectError as eg:
|
|
198
|
+
if i == len(connection_strategies) - 1:
|
|
199
|
+
error_msg = f"Failed to connect to MCP server at {endpoint}: Connection refused"
|
|
200
|
+
logger.error(f"MCP connection error: {error_msg}")
|
|
201
|
+
raise ConnectionError(error_msg) from eg
|
|
202
|
+
else:
|
|
203
|
+
logger.warning(
|
|
204
|
+
f"failed to connect to MCP server at {endpoint} via {strategy.name}, "
|
|
205
|
+
f"falling back to {connection_strategies[i + 1].name}"
|
|
206
|
+
)
|
|
207
|
+
last_exception = eg
|
|
208
|
+
except* httpx.TimeoutException as eg:
|
|
209
|
+
if i == len(connection_strategies) - 1:
|
|
210
|
+
error_msg = f"MCP server at {endpoint} timed out"
|
|
211
|
+
logger.error(f"MCP timeout error: {error_msg}")
|
|
212
|
+
raise TimeoutError(error_msg) from eg
|
|
213
|
+
else:
|
|
214
|
+
logger.warning(
|
|
215
|
+
f"MCP server at {endpoint} timed out via {strategy.name}, "
|
|
216
|
+
f"falling back to {connection_strategies[i + 1].name}"
|
|
217
|
+
)
|
|
218
|
+
last_exception = eg
|
|
219
|
+
except* httpx.RequestError as eg:
|
|
220
|
+
if i == len(connection_strategies) - 1:
|
|
221
|
+
exc_msg = str(eg.exceptions[0]) if eg.exceptions else "Unknown error"
|
|
222
|
+
error_msg = f"Network error connecting to MCP server at {endpoint}: {exc_msg}"
|
|
223
|
+
logger.error(f"MCP network error: {error_msg}")
|
|
224
|
+
raise ConnectionError(error_msg) from eg
|
|
225
|
+
else:
|
|
226
|
+
logger.warning(
|
|
227
|
+
f"network error connecting to MCP server at {endpoint} via {strategy.name}, "
|
|
228
|
+
f"falling back to {connection_strategies[i + 1].name}"
|
|
229
|
+
)
|
|
230
|
+
last_exception = eg
|
|
231
|
+
except* McpError:
|
|
232
|
+
if i < len(connection_strategies) - 1:
|
|
233
|
+
logger.warning(
|
|
234
|
+
f"failed to connect via {strategy.name}, falling back to {connection_strategies[i + 1].name}"
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
raise
|
|
238
|
+
|
|
239
|
+
# Should not reach here, but just in case
|
|
240
|
+
if last_exception:
|
|
241
|
+
raise last_exception
|
|
242
|
+
raise RuntimeError(f"Failed to create MCP session for {endpoint}")
|
|
243
|
+
|
|
244
|
+
async def close_all(self) -> None:
|
|
245
|
+
"""Close all cached sessions.
|
|
246
|
+
|
|
247
|
+
Should be called at the end of a request to clean up resources.
|
|
248
|
+
|
|
249
|
+
Note: We catch BaseException (not just Exception) because:
|
|
250
|
+
1. CancelledError is a BaseException and can occur during cleanup
|
|
251
|
+
2. anyio cancel scope errors can occur if cleanup runs in a different
|
|
252
|
+
task context than where the session was created
|
|
253
|
+
These are expected in streaming response scenarios and are handled gracefully.
|
|
254
|
+
"""
|
|
255
|
+
errors = []
|
|
256
|
+
session_count = len(self._sessions)
|
|
257
|
+
for key, (session, client_ctx, _) in list(self._sessions.items()):
|
|
258
|
+
try:
|
|
259
|
+
await session.__aexit__(None, None, None)
|
|
260
|
+
except BaseException as e:
|
|
261
|
+
# Debug level since these errors are expected in streaming scenarios
|
|
262
|
+
# where cleanup runs in a different async context than session creation
|
|
263
|
+
logger.debug(f"Error closing MCP session {key}: {e}")
|
|
264
|
+
errors.append(e)
|
|
265
|
+
try:
|
|
266
|
+
await client_ctx.__aexit__(None, None, None)
|
|
267
|
+
except BaseException as e:
|
|
268
|
+
logger.debug(f"Error closing MCP client context {key}: {e}")
|
|
269
|
+
errors.append(e)
|
|
270
|
+
|
|
271
|
+
self._sessions.clear()
|
|
272
|
+
self._locks.clear()
|
|
273
|
+
logger.debug(f"Closed {session_count} MCP sessions")
|
|
274
|
+
|
|
275
|
+
if errors:
|
|
276
|
+
logger.debug(f"Encountered {len(errors)} errors while closing MCP sessions (expected in streaming)")
|
|
277
|
+
|
|
278
|
+
|
|
76
279
|
@asynccontextmanager
|
|
77
280
|
async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerator[ClientSession, Any]:
|
|
78
281
|
# we use a ttl'd dict to cache the happy path protocol for each endpoint
|
|
@@ -151,6 +354,7 @@ async def list_mcp_tools(
|
|
|
151
354
|
endpoint: str,
|
|
152
355
|
headers: dict[str, str] | None = None,
|
|
153
356
|
authorization: str | None = None,
|
|
357
|
+
session_manager: MCPSessionManager | None = None,
|
|
154
358
|
) -> ListToolDefsResponse:
|
|
155
359
|
"""List tools available from an MCP server.
|
|
156
360
|
|
|
@@ -158,6 +362,10 @@ async def list_mcp_tools(
|
|
|
158
362
|
endpoint: MCP server endpoint URL
|
|
159
363
|
headers: Optional base headers to include
|
|
160
364
|
authorization: Optional OAuth access token (just the token, not "Bearer <token>")
|
|
365
|
+
session_manager: Optional MCPSessionManager for session reuse within a request.
|
|
366
|
+
When provided, sessions are cached and reused, avoiding redundant session
|
|
367
|
+
creation when list_mcp_tools and invoke_mcp_tool are called for the same
|
|
368
|
+
server within a request. (Fix for #4452)
|
|
161
369
|
|
|
162
370
|
Returns:
|
|
163
371
|
List of tool definitions from the MCP server
|
|
@@ -169,7 +377,9 @@ async def list_mcp_tools(
|
|
|
169
377
|
final_headers = prepare_mcp_headers(headers, authorization)
|
|
170
378
|
|
|
171
379
|
tools = []
|
|
172
|
-
|
|
380
|
+
|
|
381
|
+
# Helper function to process session and list tools
|
|
382
|
+
async def _list_tools_from_session(session):
|
|
173
383
|
tools_result = await session.list_tools()
|
|
174
384
|
for tool in tools_result.tools:
|
|
175
385
|
tools.append(
|
|
@@ -183,15 +393,51 @@ async def list_mcp_tools(
|
|
|
183
393
|
},
|
|
184
394
|
)
|
|
185
395
|
)
|
|
396
|
+
|
|
397
|
+
# If a session manager is provided, use it for session reuse (fix for #4452)
|
|
398
|
+
if session_manager is not None:
|
|
399
|
+
session = await session_manager.get_session(endpoint, final_headers)
|
|
400
|
+
await _list_tools_from_session(session)
|
|
401
|
+
else:
|
|
402
|
+
# Fallback to original behavior: create a new session for this call
|
|
403
|
+
async with client_wrapper(endpoint, final_headers) as session:
|
|
404
|
+
await _list_tools_from_session(session)
|
|
405
|
+
|
|
186
406
|
return ListToolDefsResponse(data=tools)
|
|
187
407
|
|
|
188
408
|
|
|
409
|
+
def _parse_mcp_result(result) -> ToolInvocationResult:
|
|
410
|
+
"""Parse MCP tool call result into ToolInvocationResult.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
result: The raw MCP tool call result
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
ToolInvocationResult with parsed content
|
|
417
|
+
"""
|
|
418
|
+
content: list[InterleavedContentItem] = []
|
|
419
|
+
for item in result.content:
|
|
420
|
+
if isinstance(item, mcp_types.TextContent):
|
|
421
|
+
content.append(TextContentItem(text=item.text))
|
|
422
|
+
elif isinstance(item, mcp_types.ImageContent):
|
|
423
|
+
content.append(ImageContentItem(image=_URLOrData(data=item.data)))
|
|
424
|
+
elif isinstance(item, mcp_types.EmbeddedResource):
|
|
425
|
+
logger.warning(f"EmbeddedResource is not supported: {item}")
|
|
426
|
+
else:
|
|
427
|
+
raise ValueError(f"Unknown content type: {type(item)}")
|
|
428
|
+
return ToolInvocationResult(
|
|
429
|
+
content=content,
|
|
430
|
+
error_code=1 if result.isError else 0,
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
|
|
189
434
|
async def invoke_mcp_tool(
|
|
190
435
|
endpoint: str,
|
|
191
436
|
tool_name: str,
|
|
192
437
|
kwargs: dict[str, Any],
|
|
193
438
|
headers: dict[str, str] | None = None,
|
|
194
439
|
authorization: str | None = None,
|
|
440
|
+
session_manager: MCPSessionManager | None = None,
|
|
195
441
|
) -> ToolInvocationResult:
|
|
196
442
|
"""Invoke an MCP tool with the given arguments.
|
|
197
443
|
|
|
@@ -201,6 +447,9 @@ async def invoke_mcp_tool(
|
|
|
201
447
|
kwargs: Tool invocation arguments
|
|
202
448
|
headers: Optional base headers to include
|
|
203
449
|
authorization: Optional OAuth access token (just the token, not "Bearer <token>")
|
|
450
|
+
session_manager: Optional MCPSessionManager for session reuse within a request.
|
|
451
|
+
When provided, sessions are cached and reused for multiple tool calls to
|
|
452
|
+
the same endpoint, avoiding redundant tools/list calls. (Fix for #4452)
|
|
204
453
|
|
|
205
454
|
Returns:
|
|
206
455
|
Tool invocation result with content and error information
|
|
@@ -211,20 +460,13 @@ async def invoke_mcp_tool(
|
|
|
211
460
|
# Prepare headers with authorization handling
|
|
212
461
|
final_headers = prepare_mcp_headers(headers, authorization)
|
|
213
462
|
|
|
214
|
-
|
|
463
|
+
# If a session manager is provided, use it for session reuse (fix for #4452)
|
|
464
|
+
if session_manager is not None:
|
|
465
|
+
session = await session_manager.get_session(endpoint, final_headers)
|
|
215
466
|
result = await session.call_tool(tool_name, kwargs)
|
|
467
|
+
return _parse_mcp_result(result)
|
|
216
468
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
elif isinstance(item, mcp_types.ImageContent):
|
|
222
|
-
content.append(ImageContentItem(image=_URLOrData(data=item.data)))
|
|
223
|
-
elif isinstance(item, mcp_types.EmbeddedResource):
|
|
224
|
-
logger.warning(f"EmbeddedResource is not supported: {item}")
|
|
225
|
-
else:
|
|
226
|
-
raise ValueError(f"Unknown content type: {type(item)}")
|
|
227
|
-
return ToolInvocationResult(
|
|
228
|
-
content=content,
|
|
229
|
-
error_code=1 if result.isError else 0,
|
|
230
|
-
)
|
|
469
|
+
# Fallback to original behavior: create a new session for each call
|
|
470
|
+
async with client_wrapper(endpoint, final_headers) as session:
|
|
471
|
+
result = await session.call_tool(tool_name, kwargs)
|
|
472
|
+
return _parse_mcp_result(result)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: llama_stack
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: Llama Stack
|
|
5
5
|
Author-email: Meta Llama <llama-oss@meta.com>
|
|
6
6
|
License: MIT
|
|
@@ -46,7 +46,7 @@ Requires-Dist: psycopg2-binary
|
|
|
46
46
|
Requires-Dist: tornado>=6.5.3
|
|
47
47
|
Requires-Dist: urllib3>=2.6.3
|
|
48
48
|
Provides-Extra: client
|
|
49
|
-
Requires-Dist: llama-stack-client==0.4.
|
|
49
|
+
Requires-Dist: llama-stack-client==0.4.3; extra == "client"
|
|
50
50
|
Dynamic: license-file
|
|
51
51
|
|
|
52
52
|
# Llama Stack
|
|
@@ -30,11 +30,11 @@ llama_stack/core/distribution.py,sha256=pASA0KJ_KTTRQbWP0H5OSvp1ZFQvprbMfE6OLBTq
|
|
|
30
30
|
llama_stack/core/external.py,sha256=_UTuHkqMzDM07CMAGcPeschNm8NfMkr63iSrLLYc5lg,1869
|
|
31
31
|
llama_stack/core/id_generation.py,sha256=HyTuFALhY0FTpHrYSZbykpPgoNShBeKfFexJz5xEFU8,1185
|
|
32
32
|
llama_stack/core/inspect.py,sha256=0L_Em3kYuvqLQFa4Nn-47rzQtpzhuuWtcnpixCfQuv4,7250
|
|
33
|
-
llama_stack/core/library_client.py,sha256=
|
|
33
|
+
llama_stack/core/library_client.py,sha256=Lk1uNUCgnbx8WsCPmtVcwyjwcyU20_CH-hStAR93Fc0,24156
|
|
34
34
|
llama_stack/core/providers.py,sha256=EblMlsWJKGHsXCTmVo-doCJ64JEpBy7-2DoupFkaTUo,5134
|
|
35
35
|
llama_stack/core/request_headers.py,sha256=tUt-RvzUrl7yxbYKBe7nN5YBCgWxShz4cemLvl7XGxc,3692
|
|
36
36
|
llama_stack/core/resolver.py,sha256=IRPPwi60uAe5mlj-NjAR41laP9Dp1WvAI3A-bTMB-mk,19383
|
|
37
|
-
llama_stack/core/stack.py,sha256=
|
|
37
|
+
llama_stack/core/stack.py,sha256=cPvwjgldE3L9fEQEReKIIOtHNWEUtUtZx6DL17zIi34,28588
|
|
38
38
|
llama_stack/core/start_stack.sh,sha256=3snlFzur13NS1_UnJQ6t8zK7R5DCRFJKJrz9YTJmWVA,2834
|
|
39
39
|
llama_stack/core/testing_context.py,sha256=TIWetol6Sb2BSiqkq5X0knb0chG03GSpmjByFwVfY60,1438
|
|
40
40
|
llama_stack/core/access_control/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
@@ -54,13 +54,13 @@ llama_stack/core/routers/tool_runtime.py,sha256=bRfPMlLxtdRQ7ad5BPTetw7fi_QT_xV5
|
|
|
54
54
|
llama_stack/core/routers/vector_io.py,sha256=QqloV8ljTAvjvAytWRBCPnr5Aqm3LidLXalLxDh0W54,21972
|
|
55
55
|
llama_stack/core/routing_tables/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
56
56
|
llama_stack/core/routing_tables/benchmarks.py,sha256=hxHRQyk8MRVBpdLYIjy4Cim28sdAbPjf9tuZ0msddMg,2552
|
|
57
|
-
llama_stack/core/routing_tables/common.py,sha256=
|
|
57
|
+
llama_stack/core/routing_tables/common.py,sha256=1jPbybKPYQAllN9HeN5EKxCrLeVHG1kGvRfk2Uy3aQM,10847
|
|
58
58
|
llama_stack/core/routing_tables/datasets.py,sha256=xWhcVUg6TuIcBMRbcYf8_keNc5PADv8PfVEBWmbabJ4,3025
|
|
59
59
|
llama_stack/core/routing_tables/models.py,sha256=PyEYEHlB9uarnSpr6Mnw-bVDOPrANMO06I0dl2orbPg,11388
|
|
60
60
|
llama_stack/core/routing_tables/scoring_functions.py,sha256=gICo9oTTwqT3Ifrbi84wmdcLf2ipiaU3RCoPO6IBt-Y,2464
|
|
61
61
|
llama_stack/core/routing_tables/shields.py,sha256=KjfusJknyZ28evzhdJJLaOuiMJmjkzEPuilIlLH6AcY,2209
|
|
62
62
|
llama_stack/core/routing_tables/toolgroups.py,sha256=X-Uh78mFtfhIpt1RMKHgaam3PD-hWzvmHu0YQj_EEWk,5602
|
|
63
|
-
llama_stack/core/routing_tables/vector_stores.py,sha256=
|
|
63
|
+
llama_stack/core/routing_tables/vector_stores.py,sha256=ZLv1SGh_Z2N6eZjCp9_HV5L5nA_b8sio3XeZelP3JdQ,12560
|
|
64
64
|
llama_stack/core/server/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
65
65
|
llama_stack/core/server/auth.py,sha256=dCyBRg72nplOhKx0xvIiwcptVQmK6140VVCtGjSsvhw,7768
|
|
66
66
|
llama_stack/core/server/auth_providers.py,sha256=1b9eBmvaz2aBQlNkkODaEQHYIF6WejS4bjc3gyr8sg8,20876
|
|
@@ -72,19 +72,19 @@ llama_stack/core/storage/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZh
|
|
|
72
72
|
llama_stack/core/storage/datatypes.py,sha256=WZqYV_Pgy2nKwEEgjJR1_pST0VUHqY_rnPaWZ8-bjIU,9810
|
|
73
73
|
llama_stack/core/storage/kvstore/__init__.py,sha256=uTrLgzQMAgEUWthwT5LUfHnPAp41Std0zJzlTtPgP-A,309
|
|
74
74
|
llama_stack/core/storage/kvstore/config.py,sha256=XSPhkugrvA9hxBc8DXclD2ktZzNGTQypIbwoH3FXGmI,1436
|
|
75
|
-
llama_stack/core/storage/kvstore/kvstore.py,sha256=
|
|
75
|
+
llama_stack/core/storage/kvstore/kvstore.py,sha256=q1LNtQrAxlBBSuVVl6jnyd7v0JHcDpS_7U2h2QmWwtU,4601
|
|
76
76
|
llama_stack/core/storage/kvstore/mongodb/__init__.py,sha256=1LZfkl1Mo3ObnKm7s8619xYiaEZzy95o8pvEifawBQ4,275
|
|
77
|
-
llama_stack/core/storage/kvstore/mongodb/mongodb.py,sha256=
|
|
77
|
+
llama_stack/core/storage/kvstore/mongodb/mongodb.py,sha256=U8gCCzGhM0KzinwVa9huFzDNZgyVZ4TH7RmHCpH5RJc,3579
|
|
78
78
|
llama_stack/core/storage/kvstore/postgres/__init__.py,sha256=JPP4RZZRUMKOnh9hsnB1mxQkftVfLvo9l2QcXZlUHbY,262
|
|
79
|
-
llama_stack/core/storage/kvstore/postgres/postgres.py,sha256
|
|
79
|
+
llama_stack/core/storage/kvstore/postgres/postgres.py,sha256=ohEcChgHBjBvkOzIBBtV7Kp1N8kL9CzWRSdSW2-zmMw,4648
|
|
80
80
|
llama_stack/core/storage/kvstore/redis/__init__.py,sha256=FGi8DzDTHkWJufWWoJAXW2zjs2Ji5nsha2cGF84HZis,251
|
|
81
|
-
llama_stack/core/storage/kvstore/redis/redis.py,sha256=
|
|
81
|
+
llama_stack/core/storage/kvstore/redis/redis.py,sha256=pA41GajiVle-8rbSq1SNExcIdexTpSnk_1vE6zydlfM,3649
|
|
82
82
|
llama_stack/core/storage/kvstore/sqlite/__init__.py,sha256=tnKA5tuANI8w1wIJhhyTYpV5NUxyl5XypjdiHnr0PaY,253
|
|
83
83
|
llama_stack/core/storage/kvstore/sqlite/sqlite.py,sha256=Aecv8Jk8XPOYB_yoU_XkjYUfxqqv2pO7xhVp6F_Urdg,7234
|
|
84
84
|
llama_stack/core/storage/sqlstore/__init__.py,sha256=m-8Cg5fNJnxWZw2KVAHlkYPpGuMcrQliGWiYBQChl3Y,493
|
|
85
85
|
llama_stack/core/storage/sqlstore/authorized_sqlstore.py,sha256=-HWmTl_rQx_97DhFQPx1mREXJr8X3IyplqrpELdEgVc,14474
|
|
86
|
-
llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py,sha256=
|
|
87
|
-
llama_stack/core/storage/sqlstore/sqlstore.py,sha256=
|
|
86
|
+
llama_stack/core/storage/sqlstore/sqlalchemy_sqlstore.py,sha256=J3pNnZUpUoU3IhPmQpHbLMK7BXguPnCKjQZCYR_wayQ,15248
|
|
87
|
+
llama_stack/core/storage/sqlstore/sqlstore.py,sha256=rGuYGZerCrZZ_G8vMbeP5_ef4jnMFAcXFv-8jniNjno,3228
|
|
88
88
|
llama_stack/core/store/__init__.py,sha256=6rFevukvm0CQxaf3uV3lr5vJTZRB-igWXK2Bd32SNsk,244
|
|
89
89
|
llama_stack/core/store/registry.py,sha256=QSoJYCZ3VjHN3NkqECYGwvv0FJpozuh_9k22Lt_y2tY,7377
|
|
90
90
|
llama_stack/core/utils/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
@@ -200,9 +200,9 @@ llama_stack/providers/inline/agents/meta_reference/agents.py,sha256=Q1fqMRZ2Yfb4
|
|
|
200
200
|
llama_stack/providers/inline/agents/meta_reference/config.py,sha256=OY82xSuMfh8bMDfV7BZO4P-MMzzjUaMxOayNn4hNPj4,1375
|
|
201
201
|
llama_stack/providers/inline/agents/meta_reference/safety.py,sha256=e5taz2bA9Ks3nlRgLp0CGHsgU_Rv_6ed4bGemQlGEv8,1684
|
|
202
202
|
llama_stack/providers/inline/agents/meta_reference/responses/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
203
|
-
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py,sha256=
|
|
204
|
-
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py,sha256=
|
|
205
|
-
llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py,sha256=
|
|
203
|
+
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py,sha256=TPuSd7g6sA95b6-oR4ailQ_v6Te7HNqpcVzLFwEFwtU,25192
|
|
204
|
+
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py,sha256=PsHByA2JUBU2KESWXpHq4rBfV4wMBwZ4JMSLZR_kxns,70310
|
|
205
|
+
llama_stack/providers/inline/agents/meta_reference/responses/tool_executor.py,sha256=2aEeqhWP8kCEubFwj9zFj_QzfXD1MssHDedhjvuMZLs,23202
|
|
206
206
|
llama_stack/providers/inline/agents/meta_reference/responses/types.py,sha256=VFUUSbaH15cHzy72pYcxDfuEdKbUxT3F3jg5X-42kbE,8721
|
|
207
207
|
llama_stack/providers/inline/agents/meta_reference/responses/utils.py,sha256=yUGkUZcitTmb6mePDK8Zvw2FDB5FIvxVCx0b887Gqiw,22977
|
|
208
208
|
llama_stack/providers/inline/batches/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
@@ -464,7 +464,7 @@ llama_stack/providers/remote/vector_io/milvus/config.py,sha256=ZRf00ifVSEC5te8Ww
|
|
|
464
464
|
llama_stack/providers/remote/vector_io/milvus/milvus.py,sha256=epkA43GUBu6u5-uGw1Dk-MNcETTjve4GCGo52p46wnY,16406
|
|
465
465
|
llama_stack/providers/remote/vector_io/pgvector/__init__.py,sha256=yAFgSkT7gwkRtO-xWtm4nkP99QQxZAxsynDUEybIf9Y,564
|
|
466
466
|
llama_stack/providers/remote/vector_io/pgvector/config.py,sha256=yMVFQf4fJDev8ShnbnOicXnLF9r45MIxCKYcbmDFyfw,1540
|
|
467
|
-
llama_stack/providers/remote/vector_io/pgvector/pgvector.py,sha256=
|
|
467
|
+
llama_stack/providers/remote/vector_io/pgvector/pgvector.py,sha256=LD9SsKHexAg438rBo6Ro479aAWDb1U5o_zTObjr3GI8,20040
|
|
468
468
|
llama_stack/providers/remote/vector_io/qdrant/__init__.py,sha256=Tim4k1BhOROl5BCHuS1YzH6_MilgNdNrv1qe4zFcla4,554
|
|
469
469
|
llama_stack/providers/remote/vector_io/qdrant/config.py,sha256=4akCc4YbYYIBCENj8NRUm3OI-ix0zTOmbgq8RG3nIWU,1115
|
|
470
470
|
llama_stack/providers/remote/vector_io/qdrant/qdrant.py,sha256=VlA-y7F52LC4paHEV6BRQyxWAdBBzh0gWH1hUUs7JMQ,15404
|
|
@@ -485,7 +485,7 @@ llama_stack/providers/utils/datasetio/url_utils.py,sha256=Jhxw-bRs5PbkG-pCSRKqoK
|
|
|
485
485
|
llama_stack/providers/utils/files/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
486
486
|
llama_stack/providers/utils/files/form_data.py,sha256=-yRXfeSf3AfyRShz6HiSlTPFSa1y4qdlRDxwQOLMK40,2284
|
|
487
487
|
llama_stack/providers/utils/inference/__init__.py,sha256=Ocwqyn7ytwdt1vMFXsPBoa5D6uhA1fIljF-HiIsVvKw,1089
|
|
488
|
-
llama_stack/providers/utils/inference/embedding_mixin.py,sha256=
|
|
488
|
+
llama_stack/providers/utils/inference/embedding_mixin.py,sha256=n94bHl0YItWOpRhrCstnz0tr-ZABEJ3DTEGOyWD4t_4,3592
|
|
489
489
|
llama_stack/providers/utils/inference/inference_store.py,sha256=DG_W3FCwA6fjpz9GGko_ylm-4-kNBkxltcXNKOW52OI,9944
|
|
490
490
|
llama_stack/providers/utils/inference/litellm_openai_mixin.py,sha256=MDxQi5KZ1fIjxdpDhvcA7_9Rvu3HNrSQWTTqLrOkUII,13621
|
|
491
491
|
llama_stack/providers/utils/inference/model_registry.py,sha256=CEanQgkbESbAxwczpXQnPHjSa9pGzQBq43tF8t7S9kk,8157
|
|
@@ -495,7 +495,7 @@ llama_stack/providers/utils/inference/prompt_adapter.py,sha256=qE2y1EpY0zfYAN6We
|
|
|
495
495
|
llama_stack/providers/utils/inference/stream_utils.py,sha256=WdM3SPMh9xfOAcpd67_Ld0AaNKrvoYIdJ4nrFOTMmp8,675
|
|
496
496
|
llama_stack/providers/utils/memory/__init__.py,sha256=pA4yikPZUO-A0K2nscz5tEp1yYSBtvglbgC5pe-FGKE,214
|
|
497
497
|
llama_stack/providers/utils/memory/file_utils.py,sha256=MsjispuPO0cMXmRqAoTJ-dwM9uzgYn4aiRFBM-aHP9w,712
|
|
498
|
-
llama_stack/providers/utils/memory/openai_vector_store_mixin.py,sha256=
|
|
498
|
+
llama_stack/providers/utils/memory/openai_vector_store_mixin.py,sha256=7yVvuq_uc9DxOd7SLqcJOVmu2c0AkBGmPmXZDaM1TgQ,60115
|
|
499
499
|
llama_stack/providers/utils/memory/vector_store.py,sha256=HiNCtZ4OzvPk4RGuRNks7CnbrDoppYyrJdjST-emnZQ,11941
|
|
500
500
|
llama_stack/providers/utils/responses/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
501
501
|
llama_stack/providers/utils/responses/responses_store.py,sha256=4ziPwlqxMS-mrlB2rL2M8LU9sYewmInH9zN5WPHK00U,10397
|
|
@@ -504,7 +504,7 @@ llama_stack/providers/utils/scoring/aggregation_utils.py,sha256=S2XR7DLXEVf7FCTO
|
|
|
504
504
|
llama_stack/providers/utils/scoring/base_scoring_fn.py,sha256=Upd-Hu8ftoZArfAuHx00uDkvEtmVKWg8U9tgkArro4o,4098
|
|
505
505
|
llama_stack/providers/utils/scoring/basic_scoring_utils.py,sha256=JmGA65N55raHR7rmcdWdTQPaZy4X7I69KFDvfN6716A,714
|
|
506
506
|
llama_stack/providers/utils/tools/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
507
|
-
llama_stack/providers/utils/tools/mcp.py,sha256=
|
|
507
|
+
llama_stack/providers/utils/tools/mcp.py,sha256=0P0VQgrx7WWoomrmi-1QZFSMlb8DTDLxx8u8TJgMH84,20244
|
|
508
508
|
llama_stack/providers/utils/tools/ttl_dict.py,sha256=4Bv3Nri9HM2FSckfaJJbqzICpO2S_yOXcsgVj_yvsoA,2021
|
|
509
509
|
llama_stack/providers/utils/vector_io/__init__.py,sha256=fGP7xUTCZ3E77v3FtEuGyW2k3S5Tn9x0Kk1aEIafoxA,552
|
|
510
510
|
llama_stack/providers/utils/vector_io/vector_utils.py,sha256=l1asZcxbtlRIaZUi_LbXagclCAveD-k6w28LfOZwqBk,7147
|
|
@@ -513,7 +513,7 @@ llama_stack/telemetry/constants.py,sha256=LtXE61xwNL3cBYZXKcXcbwD_Uh1jazP3V8a0od
|
|
|
513
513
|
llama_stack/telemetry/helpers.py,sha256=7uarMIHL5ngOUXQZxkH96corFxE7Jk5JaizRQ8Z8Ok0,1694
|
|
514
514
|
llama_stack/testing/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
|
|
515
515
|
llama_stack/testing/api_recorder.py,sha256=oGGTrzzBYNNvOIcvcFZenNPthr0yziJ7hlGPtckx460,39240
|
|
516
|
-
llama_stack-0.4.
|
|
516
|
+
llama_stack-0.4.3.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
|
|
517
517
|
llama_stack_api/__init__.py,sha256=5XNQGpundjXTutLgnYp6B1t6KITWXH_of626GciNma4,28103
|
|
518
518
|
llama_stack_api/agents.py,sha256=u0sg3AoWCip5o8T4DMTM8uqP3BsdbkKbor3PmxKTg0g,7143
|
|
519
519
|
llama_stack_api/connectors.py,sha256=PcAwndbVQC6pm5HGSlNprqYFTZzhCM7SYHPyRkSIoaQ,4644
|
|
@@ -537,7 +537,7 @@ llama_stack_api/scoring_functions.py,sha256=0lP_ZENUh12i12ibg-_XNNPKLHi_TvB8H5Ly
|
|
|
537
537
|
llama_stack_api/shields.py,sha256=9dNMyTVL0xcR8_BXCHb_zuAJC7Cz8pX8htRwW2-EDSw,2823
|
|
538
538
|
llama_stack_api/tools.py,sha256=eCyZx806VfpBJgsuJF9R3urA8ljF3g0kLapNpx9YRzY,7518
|
|
539
539
|
llama_stack_api/vector_io.py,sha256=3tYy8xLhVvx_rMtfi5Pxv0GwTMm1TfMYwq82tFqRz1U,36517
|
|
540
|
-
llama_stack_api/vector_stores.py,sha256=
|
|
540
|
+
llama_stack_api/vector_stores.py,sha256=mILSO3k2X-Hg4G3YEdq54fKAenCuAzRAXqpNg-_D_Ng,1832
|
|
541
541
|
llama_stack_api/version.py,sha256=V3jdW3iFPdfOt4jWzJA-di7v0zHLYsn11hNtRzkY7uQ,297
|
|
542
542
|
llama_stack_api/admin/__init__.py,sha256=VnJn9fbk-dFkRrm1P5UWlAOcZDA2jf6dx9W5nt-WgOY,1049
|
|
543
543
|
llama_stack_api/admin/api.py,sha256=m14f4iBUJf-G0qITj66o-TFKCSUiD9U12XRnZ1Slr_w,1961
|
|
@@ -575,8 +575,8 @@ llama_stack_api/inspect_api/api.py,sha256=XkdM7jJ3_UlEIE4woEVi5mO2O1aNn9_FPtb18N
|
|
|
575
575
|
llama_stack_api/inspect_api/fastapi_routes.py,sha256=I7R8roy6einYDzrPN8wNjrRokpoSNZi9zrtmLHS1vDw,2575
|
|
576
576
|
llama_stack_api/inspect_api/models.py,sha256=EW69EHkOG8i0GS8KW8Kz6WaPZV74hzwad8dGXWrrKhs,683
|
|
577
577
|
llama_stack_api/internal/__init__.py,sha256=hZiF7mONpu54guvMUTW9XpfkETUO55u6hqYOYkz8Bt0,307
|
|
578
|
-
llama_stack_api/internal/kvstore.py,sha256=
|
|
579
|
-
llama_stack_api/internal/sqlstore.py,sha256=
|
|
578
|
+
llama_stack_api/internal/kvstore.py,sha256=mgNJz6r8_ju3I3JT2Pz5fSX_9DLv_OupsS2NnJe3usY,833
|
|
579
|
+
llama_stack_api/internal/sqlstore.py,sha256=FBIQhG7VOVMMSTe24uMigfxEWXnarY0hzx9HjrNXVnI,2262
|
|
580
580
|
llama_stack_api/llama_stack_api/__init__.py,sha256=5XNQGpundjXTutLgnYp6B1t6KITWXH_of626GciNma4,28103
|
|
581
581
|
llama_stack_api/llama_stack_api/agents.py,sha256=u0sg3AoWCip5o8T4DMTM8uqP3BsdbkKbor3PmxKTg0g,7143
|
|
582
582
|
llama_stack_api/llama_stack_api/connectors.py,sha256=PcAwndbVQC6pm5HGSlNprqYFTZzhCM7SYHPyRkSIoaQ,4644
|
|
@@ -600,7 +600,7 @@ llama_stack_api/llama_stack_api/scoring_functions.py,sha256=0lP_ZENUh12i12ibg-_X
|
|
|
600
600
|
llama_stack_api/llama_stack_api/shields.py,sha256=9dNMyTVL0xcR8_BXCHb_zuAJC7Cz8pX8htRwW2-EDSw,2823
|
|
601
601
|
llama_stack_api/llama_stack_api/tools.py,sha256=eCyZx806VfpBJgsuJF9R3urA8ljF3g0kLapNpx9YRzY,7518
|
|
602
602
|
llama_stack_api/llama_stack_api/vector_io.py,sha256=3tYy8xLhVvx_rMtfi5Pxv0GwTMm1TfMYwq82tFqRz1U,36517
|
|
603
|
-
llama_stack_api/llama_stack_api/vector_stores.py,sha256=
|
|
603
|
+
llama_stack_api/llama_stack_api/vector_stores.py,sha256=mILSO3k2X-Hg4G3YEdq54fKAenCuAzRAXqpNg-_D_Ng,1832
|
|
604
604
|
llama_stack_api/llama_stack_api/version.py,sha256=V3jdW3iFPdfOt4jWzJA-di7v0zHLYsn11hNtRzkY7uQ,297
|
|
605
605
|
llama_stack_api/llama_stack_api/admin/__init__.py,sha256=VnJn9fbk-dFkRrm1P5UWlAOcZDA2jf6dx9W5nt-WgOY,1049
|
|
606
606
|
llama_stack_api/llama_stack_api/admin/api.py,sha256=m14f4iBUJf-G0qITj66o-TFKCSUiD9U12XRnZ1Slr_w,1961
|
|
@@ -638,8 +638,8 @@ llama_stack_api/llama_stack_api/inspect_api/api.py,sha256=XkdM7jJ3_UlEIE4woEVi5m
|
|
|
638
638
|
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py,sha256=I7R8roy6einYDzrPN8wNjrRokpoSNZi9zrtmLHS1vDw,2575
|
|
639
639
|
llama_stack_api/llama_stack_api/inspect_api/models.py,sha256=EW69EHkOG8i0GS8KW8Kz6WaPZV74hzwad8dGXWrrKhs,683
|
|
640
640
|
llama_stack_api/llama_stack_api/internal/__init__.py,sha256=hZiF7mONpu54guvMUTW9XpfkETUO55u6hqYOYkz8Bt0,307
|
|
641
|
-
llama_stack_api/llama_stack_api/internal/kvstore.py,sha256=
|
|
642
|
-
llama_stack_api/llama_stack_api/internal/sqlstore.py,sha256=
|
|
641
|
+
llama_stack_api/llama_stack_api/internal/kvstore.py,sha256=mgNJz6r8_ju3I3JT2Pz5fSX_9DLv_OupsS2NnJe3usY,833
|
|
642
|
+
llama_stack_api/llama_stack_api/internal/sqlstore.py,sha256=FBIQhG7VOVMMSTe24uMigfxEWXnarY0hzx9HjrNXVnI,2262
|
|
643
643
|
llama_stack_api/llama_stack_api/providers/__init__.py,sha256=a_187ghsdPNYJ5xLizqKYREJJLBa-lpcIhLp8spgsH8,841
|
|
644
644
|
llama_stack_api/llama_stack_api/providers/api.py,sha256=ytwxri9s6p8j9ClFKgN9mfa1TF0VZh1o8W5cVZR49rc,534
|
|
645
645
|
llama_stack_api/llama_stack_api/providers/fastapi_routes.py,sha256=jb1yrXEk1MdtcgWCToSZtaB-wjKqv5uVKIkvduXoKlM,1962
|
|
@@ -648,8 +648,8 @@ llama_stack_api/providers/__init__.py,sha256=a_187ghsdPNYJ5xLizqKYREJJLBa-lpcIhL
|
|
|
648
648
|
llama_stack_api/providers/api.py,sha256=ytwxri9s6p8j9ClFKgN9mfa1TF0VZh1o8W5cVZR49rc,534
|
|
649
649
|
llama_stack_api/providers/fastapi_routes.py,sha256=jb1yrXEk1MdtcgWCToSZtaB-wjKqv5uVKIkvduXoKlM,1962
|
|
650
650
|
llama_stack_api/providers/models.py,sha256=nqBzh9je_dou35XFjYGD43hwKgjWy6HIRmGWUrcGqOw,653
|
|
651
|
-
llama_stack-0.4.
|
|
652
|
-
llama_stack-0.4.
|
|
653
|
-
llama_stack-0.4.
|
|
654
|
-
llama_stack-0.4.
|
|
655
|
-
llama_stack-0.4.
|
|
651
|
+
llama_stack-0.4.3.dist-info/METADATA,sha256=i3L_soHo8hFMex4qrg30BHHC4f79XoW3jdR3Zza76Yk,12464
|
|
652
|
+
llama_stack-0.4.3.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
653
|
+
llama_stack-0.4.3.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
|
|
654
|
+
llama_stack-0.4.3.dist-info/top_level.txt,sha256=pyNYneZU5w62BaExic-GC1ph5kk8JI2mJFwzqiZy2cU,28
|
|
655
|
+
llama_stack-0.4.3.dist-info/RECORD,,
|
|
@@ -42,6 +42,7 @@ class VectorStoreInput(BaseModel):
|
|
|
42
42
|
:param embedding_model: Name of the embedding model to use for vector generation
|
|
43
43
|
:param embedding_dimension: Dimension of the embedding vectors
|
|
44
44
|
:param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
|
|
45
|
+
:param vector_store_name: (Optional) Human-readable name for the vector store
|
|
45
46
|
"""
|
|
46
47
|
|
|
47
48
|
vector_store_id: str
|
|
@@ -49,3 +50,4 @@ class VectorStoreInput(BaseModel):
|
|
|
49
50
|
embedding_dimension: int
|
|
50
51
|
provider_id: str | None = None
|
|
51
52
|
provider_vector_store_id: str | None = None
|
|
53
|
+
vector_store_name: str | None = None
|
llama_stack_api/vector_stores.py
CHANGED
|
@@ -42,6 +42,7 @@ class VectorStoreInput(BaseModel):
|
|
|
42
42
|
:param embedding_model: Name of the embedding model to use for vector generation
|
|
43
43
|
:param embedding_dimension: Dimension of the embedding vectors
|
|
44
44
|
:param provider_vector_store_id: (Optional) Provider-specific identifier for the vector store
|
|
45
|
+
:param vector_store_name: (Optional) Human-readable name for the vector store
|
|
45
46
|
"""
|
|
46
47
|
|
|
47
48
|
vector_store_id: str
|
|
@@ -49,3 +50,4 @@ class VectorStoreInput(BaseModel):
|
|
|
49
50
|
embedding_dimension: int
|
|
50
51
|
provider_id: str | None = None
|
|
51
52
|
provider_vector_store_id: str | None = None
|
|
53
|
+
vector_store_name: str | None = None
|
|
File without changes
|
|
File without changes
|
|
File without changes
|