llama-stack 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -391,6 +391,9 @@ async def instantiate_provider(
391
391
  method = "get_adapter_impl"
392
392
  args = [config, deps]
393
393
 
394
+ if "policy" in inspect.signature(getattr(module, method)).parameters:
395
+ args.append(policy)
396
+
394
397
  elif isinstance(provider_spec, AutoRoutedProviderSpec):
395
398
  method = "get_auto_router_impl"
396
399
 
@@ -49,10 +49,17 @@ from llama_stack.apis.inference import (
49
49
  )
50
50
  from llama_stack.apis.models import Model, ModelType
51
51
  from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
52
+ from llama_stack.core.access_control.access_control import is_action_allowed
53
+ from llama_stack.core.datatypes import ModelWithOwner
54
+ from llama_stack.core.request_headers import get_authenticated_user
52
55
  from llama_stack.log import get_logger
53
56
  from llama_stack.models.llama.llama3.chat_format import ChatFormat
54
57
  from llama_stack.models.llama.llama3.tokenizer import Tokenizer
55
- from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
58
+ from llama_stack.providers.datatypes import (
59
+ HealthResponse,
60
+ HealthStatus,
61
+ RoutingTable,
62
+ )
56
63
  from llama_stack.providers.utils.inference.inference_store import InferenceStore
57
64
  from llama_stack.providers.utils.telemetry.tracing import enqueue_event, get_current_span
58
65
 
@@ -186,15 +193,41 @@ class InferenceRouter(Inference):
186
193
  provider = await self.routing_table.get_provider_impl(model.identifier)
187
194
  return provider, model.provider_resource_id
188
195
 
196
+ # Handles cases where clients use the provider format directly
197
+ return await self._get_provider_by_fallback(model_id, expected_model_type)
198
+
199
+ async def _get_provider_by_fallback(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]:
200
+ """
201
+ Handle fallback case where model_id is in provider_id/provider_resource_id format.
202
+ """
189
203
  splits = model_id.split("/", maxsplit=1)
190
204
  if len(splits) != 2:
191
205
  raise ModelNotFoundError(model_id)
192
206
 
193
207
  provider_id, provider_resource_id = splits
208
+
209
+ # Check if provider exists
194
210
  if provider_id not in self.routing_table.impls_by_provider_id:
195
211
  logger.warning(f"Provider {provider_id} not found for model {model_id}")
196
212
  raise ModelNotFoundError(model_id)
197
213
 
214
+ # Create a temporary model object for RBAC check
215
+ temp_model = ModelWithOwner(
216
+ identifier=model_id,
217
+ provider_id=provider_id,
218
+ provider_resource_id=provider_resource_id,
219
+ model_type=expected_model_type,
220
+ metadata={}, # Empty metadata for temporary object
221
+ )
222
+
223
+ # Perform RBAC check
224
+ user = get_authenticated_user()
225
+ if not is_action_allowed(self.routing_table.policy, "read", temp_model, user):
226
+ logger.debug(
227
+ f"Access denied to model '{model_id}' via fallback path for user {user.principal if user else 'anonymous'}"
228
+ )
229
+ raise ModelNotFoundError(model_id)
230
+
198
231
  return self.routing_table.impls_by_provider_id[provider_id], provider_resource_id
199
232
 
200
233
  async def openai_completion(
@@ -23,7 +23,7 @@ def available_providers() -> list[ProviderSpec]:
23
23
  "pillow",
24
24
  "pandas",
25
25
  "scikit-learn",
26
- "mcp>=1.8.1",
26
+ "mcp>=1.23.0",
27
27
  ]
28
28
  + kvstore_dependencies(), # TODO make this dynamic based on the kvstore config
29
29
  module="llama_stack.providers.inline.agents.meta_reference",
@@ -80,7 +80,7 @@ def available_providers() -> list[ProviderSpec]:
80
80
  provider_type="remote::model-context-protocol",
81
81
  module="llama_stack.providers.remote.tool_runtime.model_context_protocol",
82
82
  config_class="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderConfig",
83
- pip_packages=["mcp>=1.8.1"],
83
+ pip_packages=["mcp>=1.23.0"],
84
84
  provider_data_validator="llama_stack.providers.remote.tool_runtime.model_context_protocol.config.MCPProviderDataValidator",
85
85
  description="Model Context Protocol (MCP) tool for standardized tool calling and context management.",
86
86
  ),
@@ -283,8 +283,8 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
283
283
  # ...
284
284
  provider_resource_id = f"{self.__provider_id__}/{model_spec['model_id']}"
285
285
  if "embedding" in functions:
286
- embedding_dimension = model_spec["model_limits"]["embedding_dimension"]
287
- context_length = model_spec["model_limits"]["max_sequence_length"]
286
+ embedding_dimension = model_spec.get("model_limits", {}).get("embedding_dimension", 0)
287
+ context_length = model_spec.get("model_limits", {}).get("max_sequence_length", 0)
288
288
  embedding_metadata = {
289
289
  "embedding_dimension": embedding_dimension,
290
290
  "context_length": context_length,
@@ -306,10 +306,6 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
306
306
  metadata={},
307
307
  model_type=ModelType.llm,
308
308
  )
309
- # In theory, I guess it is possible that a model could be both an embedding model and a text chat model.
310
- # In that case, the cache will record the generator Model object, and the list which we return will have
311
- # both the generator Model object and the text chat Model object. That's fine because the cache is
312
- # only used for check_model_availability() anyway.
313
309
  self._model_cache[provider_resource_id] = model
314
310
  models.append(model)
315
311
  return models
@@ -56,7 +56,7 @@ class InferenceStore:
56
56
  logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
57
57
 
58
58
  await self.sql_store.create_table(
59
- "chat_completions",
59
+ self.reference.table_name,
60
60
  {
61
61
  "id": ColumnDefinition(type=ColumnType.STRING, primary_key=True),
62
62
  "created": ColumnType.INTEGER,
@@ -66,14 +66,6 @@ class InferenceStore:
66
66
  },
67
67
  )
68
68
 
69
- if self.enable_write_queue:
70
- self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
71
- for _ in range(self._num_writers):
72
- self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
73
- logger.debug(
74
- f"Inference store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
75
- )
76
-
77
69
  async def shutdown(self) -> None:
78
70
  if not self._worker_tasks:
79
71
  return
@@ -161,7 +153,7 @@ class InferenceStore:
161
153
 
162
154
  try:
163
155
  await self.sql_store.insert(
164
- table="chat_completions",
156
+ table=self.reference.table_name,
165
157
  data=record_data,
166
158
  )
167
159
  except IntegrityError as e:
@@ -173,7 +165,7 @@ class InferenceStore:
173
165
  error_message = str(e.orig) if e.orig else str(e)
174
166
  if self._is_unique_constraint_error(error_message):
175
167
  # Update the existing record instead
176
- await self.sql_store.update(table="chat_completions", data=record_data, where={"id": data["id"]})
168
+ await self.sql_store.update(table=self.reference.table_name, data=record_data, where={"id": data["id"]})
177
169
  else:
178
170
  # Re-raise if it's not a unique constraint error
179
171
  raise
@@ -217,7 +209,7 @@ class InferenceStore:
217
209
  where_conditions["model"] = model
218
210
 
219
211
  paginated_result = await self.sql_store.fetch_all(
220
- table="chat_completions",
212
+ table=self.reference.table_name,
221
213
  where=where_conditions if where_conditions else None,
222
214
  order_by=[("created", order.value)],
223
215
  cursor=("id", after) if after else None,
@@ -246,7 +238,7 @@ class InferenceStore:
246
238
  raise ValueError("Inference store is not initialized")
247
239
 
248
240
  row = await self.sql_store.fetch_one(
249
- table="chat_completions",
241
+ table=self.reference.table_name,
250
242
  where={"id": completion_id},
251
243
  )
252
244
 
@@ -3,7 +3,6 @@
3
3
  #
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
- import asyncio
7
6
 
8
7
  from llama_stack.apis.agents import (
9
8
  Order,
@@ -18,12 +17,12 @@ from llama_stack.apis.agents.openai_responses import (
18
17
  )
19
18
  from llama_stack.apis.inference import OpenAIMessageParam
20
19
  from llama_stack.core.datatypes import AccessRule
21
- from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType
20
+ from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
22
21
  from llama_stack.log import get_logger
23
22
 
24
23
  from ..sqlstore.api import ColumnDefinition, ColumnType
25
24
  from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
26
- from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
25
+ from ..sqlstore.sqlstore import sqlstore_impl
27
26
 
28
27
  logger = get_logger(name=__name__, category="openai_responses")
29
28
 
@@ -60,13 +59,6 @@ class ResponsesStore:
60
59
  base_store = sqlstore_impl(self.reference)
61
60
  self.sql_store = AuthorizedSqlStore(base_store, self.policy)
62
61
 
63
- # Disable write queue for SQLite since WAL mode handles concurrency
64
- # Keep it enabled for other backends (like Postgres) for performance
65
- backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
66
- if backend_config and backend_config.type == StorageBackendType.SQL_SQLITE:
67
- self.enable_write_queue = False
68
- logger.debug("Write queue disabled for SQLite (WAL mode handles concurrency)")
69
-
70
62
  await self.sql_store.create_table(
71
63
  "openai_responses",
72
64
  {
@@ -85,14 +77,6 @@ class ResponsesStore:
85
77
  },
86
78
  )
87
79
 
88
- if self.enable_write_queue:
89
- self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
90
- for _ in range(self._num_writers):
91
- self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
92
- logger.debug(
93
- f"Responses store write queue enabled with {self._num_writers} writers, max queue size {self._max_write_queue_size}"
94
- )
95
-
96
80
  async def shutdown(self) -> None:
97
81
  return
98
82
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama_stack
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: Llama Stack
5
5
  Author-email: Meta Llama <llama-oss@meta.com>
6
6
  License: MIT
@@ -22,7 +22,7 @@ Requires-Dist: fire
22
22
  Requires-Dist: httpx
23
23
  Requires-Dist: jinja2>=3.1.6
24
24
  Requires-Dist: jsonschema
25
- Requires-Dist: llama-stack-client>=0.3.3
25
+ Requires-Dist: llama-stack-client==0.3.5
26
26
  Requires-Dist: openai>=1.107
27
27
  Requires-Dist: prompt-toolkit
28
28
  Requires-Dist: python-dotenv
@@ -41,10 +41,11 @@ Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.30.0
41
41
  Requires-Dist: aiosqlite>=0.21.0
42
42
  Requires-Dist: asyncpg
43
43
  Requires-Dist: sqlalchemy[asyncio]>=2.0.41
44
+ Requires-Dist: starlette>=0.49.1
44
45
  Provides-Extra: ui
45
46
  Requires-Dist: streamlit; extra == "ui"
46
47
  Requires-Dist: pandas; extra == "ui"
47
- Requires-Dist: llama-stack-client>=0.3.3; extra == "ui"
48
+ Requires-Dist: llama-stack-client==0.3.5; extra == "ui"
48
49
  Requires-Dist: streamlit-option-menu; extra == "ui"
49
50
  Dynamic: license-file
50
51
 
@@ -60,83 +61,6 @@ Dynamic: license-file
60
61
  [**Quick Start**](https://llamastack.github.io/docs/getting_started/quickstart) | [**Documentation**](https://llamastack.github.io/docs) | [**Colab Notebook**](./docs/getting_started.ipynb) | [**Discord**](https://discord.gg/llama-stack)
61
62
 
62
63
 
63
- ### ✨🎉 Llama 4 Support 🎉✨
64
- We released [Version 0.2.0](https://github.com/meta-llama/llama-stack/releases/tag/v0.2.0) with support for the Llama 4 herd of models released by Meta.
65
-
66
- <details>
67
-
68
- <summary>👋 Click here to see how to run Llama 4 models on Llama Stack </summary>
69
-
70
- \
71
- *Note you need 8xH100 GPU-host to run these models*
72
-
73
- ```bash
74
- pip install -U llama_stack
75
-
76
- MODEL="Llama-4-Scout-17B-16E-Instruct"
77
- # get meta url from llama.com
78
- huggingface-cli download meta-llama/$MODEL --local-dir ~/.llama/$MODEL
79
-
80
- # install dependencies for the distribution
81
- llama stack list-deps meta-reference-gpu | xargs -L1 uv pip install
82
-
83
- # start a llama stack server
84
- INFERENCE_MODEL=meta-llama/$MODEL llama stack run meta-reference-gpu
85
-
86
- # install client to interact with the server
87
- pip install llama-stack-client
88
- ```
89
- ### CLI
90
- ```bash
91
- # Run a chat completion
92
- MODEL="Llama-4-Scout-17B-16E-Instruct"
93
-
94
- llama-stack-client --endpoint http://localhost:8321 \
95
- inference chat-completion \
96
- --model-id meta-llama/$MODEL \
97
- --message "write a haiku for meta's llama 4 models"
98
-
99
- OpenAIChatCompletion(
100
- ...
101
- choices=[
102
- OpenAIChatCompletionChoice(
103
- finish_reason='stop',
104
- index=0,
105
- message=OpenAIChatCompletionChoiceMessageOpenAIAssistantMessageParam(
106
- role='assistant',
107
- content='...**Silent minds awaken,** \n**Whispers of billions of words,** \n**Reasoning breaks the night.** \n\n— \n*This haiku blends the essence of LLaMA 4\'s capabilities with nature-inspired metaphor, evoking its vast training data and transformative potential.*',
108
- ...
109
- ),
110
- ...
111
- )
112
- ],
113
- ...
114
- )
115
- ```
116
- ### Python SDK
117
- ```python
118
- from llama_stack_client import LlamaStackClient
119
-
120
- client = LlamaStackClient(base_url=f"http://localhost:8321")
121
-
122
- model_id = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
123
- prompt = "Write a haiku about coding"
124
-
125
- print(f"User> {prompt}")
126
- response = client.chat.completions.create(
127
- model=model_id,
128
- messages=[
129
- {"role": "system", "content": "You are a helpful assistant."},
130
- {"role": "user", "content": prompt},
131
- ],
132
- )
133
- print(f"Assistant> {response.choices[0].message.content}")
134
- ```
135
- As more providers start supporting Llama 4, you can use them in Llama Stack as well. We are adding to the list. Stay tuned!
136
-
137
-
138
- </details>
139
-
140
64
  ### 🚀 One-Line Installer 🚀
141
65
 
142
66
  To try Llama Stack locally, run:
@@ -93,7 +93,7 @@ llama_stack/core/inspect.py,sha256=ODW460YweB7-Xwe2wTuvCxO-SzJT4dO2nmB5hviVIfc,2
93
93
  llama_stack/core/library_client.py,sha256=cfBzHiQkTbCjGjDO99BZAelc5C6pnHDpybAkk_D8rJI,20463
94
94
  llama_stack/core/providers.py,sha256=kLrOvCD1xEFuvPxTR6eux9c-EquIWeJ0RcMIXYQBjxE,5109
95
95
  llama_stack/core/request_headers.py,sha256=tUt-RvzUrl7yxbYKBe7nN5YBCgWxShz4cemLvl7XGxc,3692
96
- llama_stack/core/resolver.py,sha256=SSEk2gfL5NqrGLt2U_aTxbi3h7L1QeqWh8l0uZbk4gc,19919
96
+ llama_stack/core/resolver.py,sha256=22TSTIy-ZCCMOmqjAFVJ95V3Hph_k7KH65NzrFs4Vko,20030
97
97
  llama_stack/core/stack.py,sha256=P9IGkarUwEKk1AvTgo1U0l_mp8LBg98G47zG0O828cE,21469
98
98
  llama_stack/core/start_stack.sh,sha256=3snlFzur13NS1_UnJQ6t8zK7R5DCRFJKJrz9YTJmWVA,2834
99
99
  llama_stack/core/testing_context.py,sha256=TIWetol6Sb2BSiqkq5X0knb0chG03GSpmjByFwVfY60,1438
@@ -108,7 +108,7 @@ llama_stack/core/prompts/prompts.py,sha256=bk9XjxeTnNt3CIGmKaELkEaAMZfvu7DdAUeBq
108
108
  llama_stack/core/routers/__init__.py,sha256=ue96Y4Dh1Mxu1bWMzOX7Fra2ICvykCMkhlJgPS8sa9U,3631
109
109
  llama_stack/core/routers/datasets.py,sha256=79tYXa4cSNPzG1DCWL1I8k54HSeAW5EE_r0WtlBkTh0,2361
110
110
  llama_stack/core/routers/eval_scoring.py,sha256=N6_UyX8vL672bHDVKJ_r8bQ7wpI1JbD74h0aemqLR0o,5052
111
- llama_stack/core/routers/inference.py,sha256=EdDWgddaRtgug6buOS6PAlMf0SYNLOGNeBs49e0vGkE,28019
111
+ llama_stack/core/routers/inference.py,sha256=JNxCDgvo0nDbMMsEC9rAVR8Isrm4LEkmncgmvEe_50g,29360
112
112
  llama_stack/core/routers/safety.py,sha256=xyPbwRLxMSIrifQ8qj4GeonWiJBwmlH6Pcpc5f9-PP0,3071
113
113
  llama_stack/core/routers/tool_runtime.py,sha256=Xp_eKCz73o6eDMlY4dhZl-msbh3Z8Md8kdAfECO1438,3232
114
114
  llama_stack/core/routers/vector_io.py,sha256=nriMtO_vFKFVJZrtc1Zq9-ji7XYfnJBJOiTYegCRV08,18683
@@ -403,7 +403,7 @@ llama_stack/providers/inline/vector_io/sqlite_vec/__init__.py,sha256=_gP1I0fHxY3
403
403
  llama_stack/providers/inline/vector_io/sqlite_vec/config.py,sha256=zwHR-7oXMSbTnXeO2pJ1BmXWGL9NBkQ76A5Wb3fjqZQ,937
404
404
  llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py,sha256=jhBqfQI4E4EqijQR-2-f99YDQ4WENqHrkgoRirsVhd0,20511
405
405
  llama_stack/providers/registry/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
406
- llama_stack/providers/registry/agents.py,sha256=brI3222FyMoFtN4ooWJsoqMRNK9s0L87xkBISqrd2I8,1416
406
+ llama_stack/providers/registry/agents.py,sha256=PaLSYCm-Ssz-96qazd85XNBg4rof1y8NAP8lHFAabbc,1417
407
407
  llama_stack/providers/registry/batches.py,sha256=KElTpHCiLl4J7pa5BOkVYNK7woC9JB96JU0IcEjpdHI,901
408
408
  llama_stack/providers/registry/datasetio.py,sha256=IZSNequAMiL7wokqUvNPF8Bd4JbnLw6--v34oQFMj_Y,1902
409
409
  llama_stack/providers/registry/eval.py,sha256=q9DqUuMUcGPiYSliQLph8ehoHxBruZZttAElmZMDa4Y,1748
@@ -412,7 +412,7 @@ llama_stack/providers/registry/inference.py,sha256=Oc8sZhPPz5fjPShakch6Rn6JeDeou
412
412
  llama_stack/providers/registry/post_training.py,sha256=HPA_Qa7llPvsiDq2ff4uy7XqFKGh07Nc5f0k3mTVLVI,2956
413
413
  llama_stack/providers/registry/safety.py,sha256=ZQr1P4RlDh2skKXdAotExo62bOAkE3GcjwH8SsT2meQ,3417
414
414
  llama_stack/providers/registry/scoring.py,sha256=qWO6ze2FkO89q0WdN3KeuSBPLwqew5LF6GfjgRPe4I4,2109
415
- llama_stack/providers/registry/tool_runtime.py,sha256=SJgMPXd3wpPnOsIgc4ZuoY6vSjS2yVjn56K2zb3-E34,4433
415
+ llama_stack/providers/registry/tool_runtime.py,sha256=oET0KHFSZp9V0An1WAnfWoWeHArSHWJbil9YXRELji8,4434
416
416
  llama_stack/providers/registry/vector_io.py,sha256=QGr41Sf7cLUtyvzFbyNB7598NIUvwmGr5aAz8S3-dHU,30096
417
417
  llama_stack/providers/remote/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
418
418
  llama_stack/providers/remote/agents/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
@@ -492,7 +492,7 @@ llama_stack/providers/remote/inference/vllm/config.py,sha256=qN_djLcdnmknwLnLSYT
492
492
  llama_stack/providers/remote/inference/vllm/vllm.py,sha256=0IfNDNtLXuYDTsp29b9hM1zTfJoqH-QRjCg_w0NpDbg,4101
493
493
  llama_stack/providers/remote/inference/watsonx/__init__.py,sha256=05ruRuPiOkniMYQwvAWFIAtPJg2E7IoxPQU79sxogkA,480
494
494
  llama_stack/providers/remote/inference/watsonx/config.py,sha256=kH5hLq1EfO1T2iuRBwCVPiwtSTi1xUtE2HJs3t5qeZQ,1404
495
- llama_stack/providers/remote/inference/watsonx/watsonx.py,sha256=sr18UqVc4ZJebHg-wsSlm8lgKUugXG72Xi3SYqds7Ps,14524
495
+ llama_stack/providers/remote/inference/watsonx/watsonx.py,sha256=ts_LhCGGYfhs01kbco5gXeehega6fwCDqmK4T57WQdw,14129
496
496
  llama_stack/providers/remote/post_training/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
497
497
  llama_stack/providers/remote/post_training/nvidia/__init__.py,sha256=jChqHlLy0TY5fz4eHip0aujp9wH8SUn7JvyinQGfPzM,637
498
498
  llama_stack/providers/remote/post_training/nvidia/config.py,sha256=xpIKIqRwNPaUJZxoZcGfJQq9UVh8dppHSdWp9uT_rKc,3447
@@ -556,7 +556,7 @@ llama_stack/providers/utils/files/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8us
556
556
  llama_stack/providers/utils/files/form_data.py,sha256=oLDS9gsOWpUnqX51qczjNGTfHJBrZ0SFZbEHFtsfqCs,2291
557
557
  llama_stack/providers/utils/inference/__init__.py,sha256=Ocwqyn7ytwdt1vMFXsPBoa5D6uhA1fIljF-HiIsVvKw,1089
558
558
  llama_stack/providers/utils/inference/embedding_mixin.py,sha256=Ur9A0VJB0BEDh00Er8Ua-Mc08Sa69YAQW_cCcAdxB88,3336
559
- llama_stack/providers/utils/inference/inference_store.py,sha256=zNscOx7uiIspV8UoAdSlciWvupOWrLDBEtoros5tlpk,10273
559
+ llama_stack/providers/utils/inference/inference_store.py,sha256=p9GwdiWGQw9Tnb-xL7kqNi0odOnecyIhxsrg6VoI-3U,9891
560
560
  llama_stack/providers/utils/inference/litellm_openai_mixin.py,sha256=tcRCccOd4fR61TIQjFGb-B6Qybu5q-pklK5fo87Ji3I,13094
561
561
  llama_stack/providers/utils/inference/model_registry.py,sha256=ElaDfW67XphDvVLYBBghwSB-2A704ELqpJpm42Hdpc8,8250
562
562
  llama_stack/providers/utils/inference/openai_compat.py,sha256=kTjea5GUmaD8UfA6UgoPD8wvmWNBnAwuWLkmNUwy-as,49768
@@ -580,7 +580,7 @@ llama_stack/providers/utils/memory/file_utils.py,sha256=1Lz7FTR4eV1OYPgD3oABRCho
580
580
  llama_stack/providers/utils/memory/openai_vector_store_mixin.py,sha256=XbmaUW7srqYbx1UZGn8h0NjCe3o9j_afeG-sdrYYaME,53335
581
581
  llama_stack/providers/utils/memory/vector_store.py,sha256=bAnoHLa68Z9Zsaufpovkwb5wlAvoyXVHRG33gvTcjls,12023
582
582
  llama_stack/providers/utils/responses/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
583
- llama_stack/providers/utils/responses/responses_store.py,sha256=cJF93RVEyeGvd7-YJK9HK9NpfkcWzZ507bEK0D9Z8XI,11651
583
+ llama_stack/providers/utils/responses/responses_store.py,sha256=r1SxlhSvT4XeZm-2zqI4A17LISjI9yzWxEewAlUyEJw,10734
584
584
  llama_stack/providers/utils/scoring/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
585
585
  llama_stack/providers/utils/scoring/aggregation_utils.py,sha256=vNtkQbyEg71tWLCwibOHJyNGHqk5GBNB6uSMnlDaqJs,2775
586
586
  llama_stack/providers/utils/scoring/base_scoring_fn.py,sha256=q4KZZxU1TVBKO21bTcO5bnXu2LuzjjYzQZ492i_DfhA,4153
@@ -617,9 +617,9 @@ llama_stack/strong_typing/topological.py,sha256=I2YyhYW62PBM2wpfn6mbeCRxKGl_oa5t
617
617
  llama_stack/testing/__init__.py,sha256=vUvqRS2CXhASaFzYVspRYa5q8usSCzjKUlZhzNLuiKg,200
618
618
  llama_stack/testing/api_recorder.py,sha256=jt5Fq8HOPTA4rDzwIWWdBQJjxtivhbqoghFql3D--A0,38423
619
619
  llama_stack/ui/node_modules/flatted/python/flatted.py,sha256=UYburBDqkySaTfSpntPCUJRxiBGcplusJM7ECX8FEgA,3860
620
- llama_stack-0.3.3.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
621
- llama_stack-0.3.3.dist-info/METADATA,sha256=-2v1yFVpGA-OAXZICSSE1aB-XqIRLEDoPD2w6AHSsUI,15124
622
- llama_stack-0.3.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
623
- llama_stack-0.3.3.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
624
- llama_stack-0.3.3.dist-info/top_level.txt,sha256=2-nbQ1CAn4_w76YD_O6N6ofvjmk4DX5NFaBuApSx5N0,12
625
- llama_stack-0.3.3.dist-info/RECORD,,
620
+ llama_stack-0.3.5.dist-info/licenses/LICENSE,sha256=42g1gBn9gHYdBt5e6e1aFYhnc-JT9trU9qBD84oUAlY,1087
621
+ llama_stack-0.3.5.dist-info/METADATA,sha256=pQ_p1RWmVzNAMznjofhtlzytTyPOBCdOYinnHHLItHg,12817
622
+ llama_stack-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
623
+ llama_stack-0.3.5.dist-info/entry_points.txt,sha256=E5xoyAM9064aW_y96eSSwZCNT_ANctrvrhLMJnMQlw0,141
624
+ llama_stack-0.3.5.dist-info/top_level.txt,sha256=2-nbQ1CAn4_w76YD_O6N6ofvjmk4DX5NFaBuApSx5N0,12
625
+ llama_stack-0.3.5.dist-info/RECORD,,