llama-stack 0.4.4__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. llama_stack/cli/stack/_list_deps.py +11 -7
  2. llama_stack/cli/stack/run.py +3 -25
  3. llama_stack/core/access_control/datatypes.py +78 -0
  4. llama_stack/core/configure.py +2 -2
  5. llama_stack/{distributions/meta-reference-gpu → core/connectors}/__init__.py +3 -1
  6. llama_stack/core/connectors/connectors.py +162 -0
  7. llama_stack/core/conversations/conversations.py +61 -58
  8. llama_stack/core/datatypes.py +54 -8
  9. llama_stack/core/library_client.py +60 -13
  10. llama_stack/core/prompts/prompts.py +43 -42
  11. llama_stack/core/routers/datasets.py +20 -17
  12. llama_stack/core/routers/eval_scoring.py +143 -53
  13. llama_stack/core/routers/inference.py +20 -9
  14. llama_stack/core/routers/safety.py +30 -42
  15. llama_stack/core/routers/vector_io.py +15 -7
  16. llama_stack/core/routing_tables/models.py +42 -3
  17. llama_stack/core/routing_tables/scoring_functions.py +19 -19
  18. llama_stack/core/routing_tables/shields.py +20 -17
  19. llama_stack/core/routing_tables/vector_stores.py +8 -5
  20. llama_stack/core/server/auth.py +192 -17
  21. llama_stack/core/server/fastapi_router_registry.py +40 -5
  22. llama_stack/core/server/server.py +24 -5
  23. llama_stack/core/stack.py +54 -10
  24. llama_stack/core/storage/datatypes.py +9 -0
  25. llama_stack/core/store/registry.py +1 -1
  26. llama_stack/core/utils/exec.py +2 -2
  27. llama_stack/core/utils/type_inspection.py +16 -2
  28. llama_stack/distributions/dell/config.yaml +4 -1
  29. llama_stack/distributions/dell/run-with-safety.yaml +4 -1
  30. llama_stack/distributions/nvidia/config.yaml +4 -1
  31. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
  32. llama_stack/distributions/oci/config.yaml +4 -1
  33. llama_stack/distributions/open-benchmark/config.yaml +9 -1
  34. llama_stack/distributions/postgres-demo/config.yaml +1 -1
  35. llama_stack/distributions/starter/build.yaml +62 -0
  36. llama_stack/distributions/starter/config.yaml +22 -3
  37. llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
  38. llama_stack/distributions/starter/starter.py +13 -1
  39. llama_stack/distributions/starter-gpu/build.yaml +62 -0
  40. llama_stack/distributions/starter-gpu/config.yaml +22 -3
  41. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
  42. llama_stack/distributions/template.py +10 -2
  43. llama_stack/distributions/watsonx/config.yaml +4 -1
  44. llama_stack/log.py +1 -0
  45. llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
  46. llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
  47. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +49 -51
  48. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
  49. llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
  50. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
  51. llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
  52. llama_stack/providers/inline/batches/reference/batches.py +2 -1
  53. llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
  54. llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
  55. llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
  56. llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
  57. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
  58. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
  59. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
  60. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
  61. llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
  62. llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
  63. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
  64. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
  65. llama_stack/providers/registry/agents.py +1 -0
  66. llama_stack/providers/registry/inference.py +1 -9
  67. llama_stack/providers/registry/vector_io.py +136 -16
  68. llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
  69. llama_stack/providers/remote/files/s3/config.py +5 -3
  70. llama_stack/providers/remote/files/s3/files.py +2 -2
  71. llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
  72. llama_stack/providers/remote/inference/openai/openai.py +2 -0
  73. llama_stack/providers/remote/inference/together/together.py +4 -0
  74. llama_stack/providers/remote/inference/vertexai/config.py +3 -3
  75. llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
  76. llama_stack/providers/remote/inference/vllm/config.py +37 -18
  77. llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
  78. llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
  79. llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
  80. llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
  81. llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
  82. llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
  83. llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
  84. llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
  85. llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
  86. llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
  87. llama_stack/providers/remote/vector_io/oci/config.py +41 -0
  88. llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
  89. llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
  90. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
  91. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
  92. llama_stack/providers/utils/bedrock/client.py +3 -3
  93. llama_stack/providers/utils/bedrock/config.py +7 -7
  94. llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
  95. llama_stack/providers/utils/inference/http_client.py +239 -0
  96. llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
  97. llama_stack/providers/utils/inference/model_registry.py +148 -2
  98. llama_stack/providers/utils/inference/openai_compat.py +2 -1
  99. llama_stack/providers/utils/inference/openai_mixin.py +41 -2
  100. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
  101. llama_stack/providers/utils/memory/vector_store.py +46 -19
  102. llama_stack/providers/utils/responses/responses_store.py +7 -7
  103. llama_stack/providers/utils/safety.py +114 -0
  104. llama_stack/providers/utils/tools/mcp.py +44 -3
  105. llama_stack/testing/api_recorder.py +9 -3
  106. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
  107. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +111 -144
  108. llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
  109. llama_stack/distributions/meta-reference-gpu/doc_template.md +0 -119
  110. llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
  111. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
  112. llama_stack/models/llama/hadamard_utils.py +0 -88
  113. llama_stack/models/llama/llama3/args.py +0 -74
  114. llama_stack/models/llama/llama3/dog.jpg +0 -0
  115. llama_stack/models/llama/llama3/generation.py +0 -378
  116. llama_stack/models/llama/llama3/model.py +0 -304
  117. llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
  118. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
  119. llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
  120. llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
  121. llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
  122. llama_stack/models/llama/llama3/pasta.jpeg +0 -0
  123. llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
  124. llama_stack/models/llama/llama3/quantization/loader.py +0 -316
  125. llama_stack/models/llama/llama3_1/__init__.py +0 -12
  126. llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
  127. llama_stack/models/llama/llama3_1/prompts.py +0 -258
  128. llama_stack/models/llama/llama3_2/__init__.py +0 -5
  129. llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
  130. llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
  131. llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
  132. llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
  133. llama_stack/models/llama/llama3_3/__init__.py +0 -5
  134. llama_stack/models/llama/llama3_3/prompts.py +0 -259
  135. llama_stack/models/llama/llama4/args.py +0 -107
  136. llama_stack/models/llama/llama4/ffn.py +0 -58
  137. llama_stack/models/llama/llama4/moe.py +0 -214
  138. llama_stack/models/llama/llama4/preprocess.py +0 -435
  139. llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
  140. llama_stack/models/llama/llama4/quantization/loader.py +0 -226
  141. llama_stack/models/llama/llama4/vision/__init__.py +0 -5
  142. llama_stack/models/llama/llama4/vision/embedding.py +0 -210
  143. llama_stack/models/llama/llama4/vision/encoder.py +0 -412
  144. llama_stack/models/llama/quantize_impls.py +0 -316
  145. llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
  146. llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
  147. llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
  148. llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
  149. llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
  150. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
  151. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
  152. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
  153. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
  154. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
  155. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0rc1.dist-info}/top_level.txt +0 -0
@@ -4,13 +4,19 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from typing import Any
8
-
9
7
  from llama_stack.core.datatypes import (
10
8
  ShieldWithOwner,
11
9
  )
12
10
  from llama_stack.log import get_logger
13
- from llama_stack_api import ListShieldsResponse, ResourceType, Shield, Shields
11
+ from llama_stack_api import (
12
+ GetShieldRequest,
13
+ ListShieldsResponse,
14
+ RegisterShieldRequest,
15
+ ResourceType,
16
+ Shield,
17
+ Shields,
18
+ UnregisterShieldRequest,
19
+ )
14
20
 
15
21
  from .common import CommonRoutingTableImpl
16
22
 
@@ -21,21 +27,17 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
21
27
  async def list_shields(self) -> ListShieldsResponse:
22
28
  return ListShieldsResponse(data=await self.get_all_with_type(ResourceType.shield.value))
23
29
 
24
- async def get_shield(self, identifier: str) -> Shield:
25
- shield = await self.get_object_by_identifier("shield", identifier)
30
+ async def get_shield(self, request: GetShieldRequest) -> Shield:
31
+ shield = await self.get_object_by_identifier("shield", request.identifier)
26
32
  if shield is None:
27
- raise ValueError(f"Shield '{identifier}' not found")
33
+ raise ValueError(f"Shield '{request.identifier}' not found")
28
34
  return shield
29
35
 
30
- async def register_shield(
31
- self,
32
- shield_id: str,
33
- provider_shield_id: str | None = None,
34
- provider_id: str | None = None,
35
- params: dict[str, Any] | None = None,
36
- ) -> Shield:
36
+ async def register_shield(self, request: RegisterShieldRequest) -> Shield:
37
+ provider_shield_id = request.provider_shield_id
37
38
  if provider_shield_id is None:
38
- provider_shield_id = shield_id
39
+ provider_shield_id = request.shield_id
40
+ provider_id = request.provider_id
39
41
  if provider_id is None:
40
42
  # If provider_id not specified, use the only provider if it supports this shield type
41
43
  if len(self.impls_by_provider_id) == 1:
@@ -44,10 +46,11 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
44
46
  raise ValueError(
45
47
  "No provider specified and multiple providers available. Please specify a provider_id."
46
48
  )
49
+ params = request.params
47
50
  if params is None:
48
51
  params = {}
49
52
  shield = ShieldWithOwner(
50
- identifier=shield_id,
53
+ identifier=request.shield_id,
51
54
  provider_resource_id=provider_shield_id,
52
55
  provider_id=provider_id,
53
56
  params=params,
@@ -55,6 +58,6 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields):
55
58
  await self.register_object(shield)
56
59
  return shield
57
60
 
58
- async def unregister_shield(self, identifier: str) -> None:
59
- existing_shield = await self.get_shield(identifier)
61
+ async def unregister_shield(self, request: UnregisterShieldRequest) -> None:
62
+ existing_shield = await self.get_shield(GetShieldRequest(identifier=request.identifier))
60
63
  await self.unregister_object(existing_shield)
@@ -24,10 +24,13 @@ from llama_stack_api import (
24
24
  SearchRankingOptions,
25
25
  VectorStoreChunkingStrategy,
26
26
  VectorStoreDeleteResponse,
27
+ VectorStoreFileBatchObject,
27
28
  VectorStoreFileContentResponse,
28
29
  VectorStoreFileDeleteResponse,
29
30
  VectorStoreFileObject,
31
+ VectorStoreFilesListInBatchResponse,
30
32
  VectorStoreFileStatus,
33
+ VectorStoreListFilesResponse,
31
34
  VectorStoreObject,
32
35
  VectorStoreSearchResponsePage,
33
36
  )
@@ -205,7 +208,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
205
208
  after: str | None = None,
206
209
  before: str | None = None,
207
210
  filter: VectorStoreFileStatus | None = None,
208
- ) -> list[VectorStoreFileObject]:
211
+ ) -> VectorStoreListFilesResponse:
209
212
  await self.assert_action_allowed("read", "vector_store", vector_store_id)
210
213
  provider = await self.get_provider_impl(vector_store_id)
211
214
  return await provider.openai_list_files_in_vector_store(
@@ -276,7 +279,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
276
279
  self,
277
280
  vector_store_id: str,
278
281
  params: OpenAICreateVectorStoreFileBatchRequestWithExtraBody,
279
- ):
282
+ ) -> VectorStoreFileBatchObject:
280
283
  await self.assert_action_allowed("update", "vector_store", vector_store_id)
281
284
  provider = await self.get_provider_impl(vector_store_id)
282
285
  return await provider.openai_create_vector_store_file_batch(
@@ -288,7 +291,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
288
291
  self,
289
292
  batch_id: str,
290
293
  vector_store_id: str,
291
- ):
294
+ ) -> VectorStoreFileBatchObject:
292
295
  await self.assert_action_allowed("read", "vector_store", vector_store_id)
293
296
  provider = await self.get_provider_impl(vector_store_id)
294
297
  return await provider.openai_retrieve_vector_store_file_batch(
@@ -305,7 +308,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
305
308
  filter: str | None = None,
306
309
  limit: int | None = 20,
307
310
  order: str | None = "desc",
308
- ):
311
+ ) -> VectorStoreFilesListInBatchResponse:
309
312
  await self.assert_action_allowed("read", "vector_store", vector_store_id)
310
313
  provider = await self.get_provider_impl(vector_store_id)
311
314
  return await provider.openai_list_files_in_vector_store_file_batch(
@@ -322,7 +325,7 @@ class VectorStoresRoutingTable(CommonRoutingTableImpl):
322
325
  self,
323
326
  batch_id: str,
324
327
  vector_store_id: str,
325
- ):
328
+ ) -> VectorStoreFileBatchObject:
326
329
  await self.assert_action_allowed("update", "vector_store", vector_store_id)
327
330
  provider = await self.get_provider_impl(vector_store_id)
328
331
  return await provider.openai_cancel_vector_store_file_batch(
@@ -9,6 +9,8 @@ import json
9
9
  import httpx
10
10
  from aiohttp import hdrs
11
11
 
12
+ from llama_stack.core.access_control.conditions import parse_conditions
13
+ from llama_stack.core.access_control.datatypes import RouteAccessRule
12
14
  from llama_stack.core.datatypes import AuthenticationConfig, User
13
15
  from llama_stack.core.request_headers import user_from_scope
14
16
  from llama_stack.core.server.auth_providers import create_auth_provider
@@ -152,16 +154,6 @@ class AuthenticationMiddleware:
152
154
  f"Authentication successful: {validation_result.principal} with {len(validation_result.attributes)} attributes"
153
155
  )
154
156
 
155
- # Scope-based API access control
156
- if webmethod and webmethod.required_scope:
157
- user = user_from_scope(scope)
158
- if not _has_required_scope(webmethod.required_scope, user):
159
- return await self._send_auth_error(
160
- send,
161
- f"Access denied: user does not have required scope: {webmethod.required_scope}",
162
- status=403,
163
- )
164
-
165
157
  return await self.app(scope, receive, send)
166
158
 
167
159
  async def _send_auth_error(self, send, message, status=401):
@@ -177,13 +169,196 @@ class AuthenticationMiddleware:
177
169
  await send({"type": "http.response.body", "body": error_msg})
178
170
 
179
171
 
180
- def _has_required_scope(required_scope: str, user: User | None) -> bool:
181
- # if no user, assume auth is not enabled
182
- if not user:
183
- return True
172
+ class RouteAuthorizationMiddleware:
173
+ """Middleware that enforces route-level access control.
174
+
175
+ This middleware runs after authentication and checks if the authenticated user
176
+ has permission to access the requested API route based on route_policy rules.
177
+
178
+ """
179
+
180
+ def __init__(self, app, route_policy: list[RouteAccessRule]):
181
+ self.app = app
182
+ self.route_policy = route_policy
183
+
184
+ async def __call__(self, scope, receive, send):
185
+ # Only process HTTP requests
186
+ if scope["type"] != "http":
187
+ return await self.app(scope, receive, send)
188
+
189
+ # If no route policy configured, allow all routes (backward compatible)
190
+ if not self.route_policy:
191
+ return await self.app(scope, receive, send)
192
+
193
+ route = scope.get("path", "")
194
+ # Normalize route: remove trailing slash (except for root "/")
195
+ if route != "/" and route.endswith("/"):
196
+ route = route.rstrip("/")
197
+
198
+ # Get authenticated user from scope (set by AuthenticationMiddleware if present)
199
+ user = user_from_scope(scope)
200
+
201
+ # Check if user has permission to access this route
202
+ if not self._is_route_allowed(route, user):
203
+ return await self._send_error(
204
+ send, f"Access denied: insufficient permissions for route {route}", status=403
205
+ )
206
+
207
+ return await self.app(scope, receive, send)
208
+
209
+ def _is_route_allowed(self, route: str, user: User | None) -> bool:
210
+ """Check if the user is allowed to access the given route.
211
+
212
+ Rules are evaluated in order. First matching rule determines access.
213
+ If no rule matches, access is denied.
214
+
215
+ Args:
216
+ route: The route being accessed
217
+ user: The authenticated user, or None if no authentication is configured
218
+ """
219
+ user_str = user.principal if user else "anonymous"
220
+
221
+ for index, rule in enumerate(self.route_policy):
222
+ if self._rule_matches(rule, route, user):
223
+ # Check if this is a permit or forbid rule
224
+ if rule.permit:
225
+ decision = "APPROVED"
226
+ reason = rule.description or ""
227
+ logger.debug(
228
+ f"ROUTE_AUTHZ,decision={decision},user={user_str},"
229
+ f"route={route},rule_index={index},reason={reason!r}"
230
+ )
231
+ return True
232
+ else: # forbid
233
+ decision = "DENIED"
234
+ reason = rule.description or ""
235
+ logger.debug(
236
+ f"ROUTE_AUTHZ,decision={decision},user={user_str},"
237
+ f"route={route},rule_index={index},reason={reason!r}"
238
+ )
239
+ return False
240
+
241
+ # No matching rule found - deny by default
242
+ decision = "DENIED"
243
+ reason = "no matching rule"
244
+ logger.debug(f"ROUTE_AUTHZ,decision={decision},user={user_str},route={route},rule_index=-1,reason={reason!r}")
245
+ return False
246
+
247
+ def _rule_matches(self, rule: RouteAccessRule, route: str, user: User | None) -> bool:
248
+ """Check if a rule matches the given route and user.
249
+
250
+ Args:
251
+ rule: The rule to evaluate
252
+ route: The route being accessed
253
+ user: The authenticated user, or None if no authentication is configured
254
+ """
255
+ # Get the scope (permit or forbid)
256
+ scope = rule.permit if rule.permit else rule.forbid
257
+ if not scope:
258
+ return False
259
+
260
+ # Check if route matches
261
+ if not self._route_matches(route, scope.paths):
262
+ return False
263
+
264
+ # Evaluate conditions
265
+ return self._evaluate_conditions(rule, user)
266
+
267
+ def _route_matches(self, request_route: str, rule_patterns: str | list[str]) -> bool:
268
+ """Check if request route matches any of the rule patterns.
269
+
270
+ Supports:
271
+ - Exact match: "/v1/chat/completions"
272
+ - Prefix wildcard: "/v1/files*" matches "/v1/files", "/v1/files/upload", "/v1/files/list", etc.
273
+ - Full wildcard: "*" matches all routes
274
+ """
275
+ patterns = [rule_patterns] if isinstance(rule_patterns, str) else rule_patterns
276
+
277
+ for pattern in patterns:
278
+ if pattern == "*":
279
+ # Full wildcard matches everything
280
+ return True
281
+ elif pattern.endswith("*"):
282
+ # Prefix wildcard: check if request route starts with the prefix
283
+ prefix = pattern[:-1] # Remove "*"
284
+ if request_route.startswith(prefix):
285
+ return True
286
+ elif pattern == request_route:
287
+ # Exact match
288
+ return True
184
289
 
185
- if not user.attributes:
186
290
  return False
187
291
 
188
- user_scopes = user.attributes.get("scopes", [])
189
- return required_scope in user_scopes
292
+ def _evaluate_conditions(self, rule: RouteAccessRule, user: User | None) -> bool:
293
+ """Evaluate when/unless conditions for the rule.
294
+
295
+ Reuses the existing condition parsing from access_control.conditions.
296
+
297
+ Args:
298
+ rule: The rule whose conditions to evaluate
299
+ user: The authenticated user, or None if no authentication is configured
300
+
301
+ Returns:
302
+ True if conditions are met (or no conditions exist), False otherwise
303
+ """
304
+ # If rule has conditions but no user is available, conditions cannot be met
305
+ if (rule.when or rule.unless) and not user:
306
+ return False
307
+
308
+ if rule.when:
309
+ # At this point, if rule.when exists and we got past the check above,
310
+ # user is guaranteed to be non-None
311
+ assert user is not None
312
+ conditions_list = rule.when if isinstance(rule.when, list) else [rule.when]
313
+ conditions = parse_conditions(conditions_list)
314
+ # For 'when', all conditions must match (AND logic)
315
+ # Note: Since we're checking route access, we don't have a resource,
316
+ # so we create a context object to satisfy the interface
317
+ route_context = _RouteContext()
318
+ for condition in conditions:
319
+ if not condition.matches(route_context, user):
320
+ return False
321
+ return True
322
+
323
+ if rule.unless:
324
+ # At this point, if rule.unless exists and we got past the check above,
325
+ # user is guaranteed to be non-None
326
+ assert user is not None
327
+ conditions_list = rule.unless if isinstance(rule.unless, list) else [rule.unless]
328
+ conditions = parse_conditions(conditions_list)
329
+ # For 'unless', no conditions should match (NOT logic)
330
+ route_context = _RouteContext()
331
+ for condition in conditions:
332
+ if condition.matches(route_context, user):
333
+ return False
334
+ return True
335
+
336
+ # No conditions specified - rule applies regardless of user
337
+ return True
338
+
339
+ async def _send_error(self, send, message: str, status: int = 403):
340
+ """Send an error response."""
341
+ await send(
342
+ {
343
+ "type": "http.response.start",
344
+ "status": status,
345
+ "headers": [[b"content-type", b"application/json"]],
346
+ }
347
+ )
348
+ error_key = "message" if status == 401 else "detail"
349
+ error_msg = json.dumps({"error": {error_key: message}}).encode()
350
+ await send({"type": "http.response.body", "body": error_msg})
351
+
352
+
353
+ class _RouteContext:
354
+ """Placeholder resource for route-level condition evaluation.
355
+
356
+ Route rules don't operate on actual resources, so we use this context object
357
+ to satisfy the condition.matches() interface. Route conditions typically check
358
+ user attributes (e.g., "user with admin in roles") and don't require resource properties.
359
+ """
360
+
361
+ def __init__(self):
362
+ self.type = "route"
363
+ self.identifier = "route"
364
+ self.owner = None
@@ -16,20 +16,55 @@ from typing import Any, cast
16
16
  from fastapi import APIRouter
17
17
  from fastapi.routing import APIRoute
18
18
 
19
- from llama_stack_api import admin, batches, benchmarks, datasets, files, inspect_api, providers
19
+ from llama_stack_api import (
20
+ admin,
21
+ agents,
22
+ batches,
23
+ benchmarks,
24
+ connectors,
25
+ conversations,
26
+ datasetio,
27
+ datasets,
28
+ eval,
29
+ files,
30
+ inference,
31
+ inspect_api,
32
+ models,
33
+ post_training,
34
+ prompts,
35
+ providers,
36
+ safety,
37
+ scoring,
38
+ scoring_functions,
39
+ shields,
40
+ vector_io,
41
+ )
42
+ from llama_stack_api.datatypes import Api
20
43
 
21
44
  # Router factories for APIs that have FastAPI routers
22
45
  # Add new APIs here as they are migrated to the router system
23
- from llama_stack_api.datatypes import Api
24
-
25
46
  _ROUTER_FACTORIES: dict[str, Callable[[Any], APIRouter]] = {
26
47
  "admin": admin.fastapi_routes.create_router,
48
+ "agents": agents.fastapi_routes.create_router,
27
49
  "batches": batches.fastapi_routes.create_router,
28
50
  "benchmarks": benchmarks.fastapi_routes.create_router,
51
+ "connectors": connectors.fastapi_routes.create_router,
52
+ "conversations": conversations.fastapi_routes.create_router,
53
+ "datasetio": datasetio.fastapi_routes.create_router,
29
54
  "datasets": datasets.fastapi_routes.create_router,
30
- "providers": providers.fastapi_routes.create_router,
31
- "inspect": inspect_api.fastapi_routes.create_router,
55
+ "eval": eval.fastapi_routes.create_router,
32
56
  "files": files.fastapi_routes.create_router,
57
+ "inference": inference.fastapi_routes.create_router,
58
+ "inspect": inspect_api.fastapi_routes.create_router,
59
+ "models": models.fastapi_routes.create_router,
60
+ "post_training": post_training.fastapi_routes.create_router,
61
+ "prompts": prompts.fastapi_routes.create_router,
62
+ "providers": providers.fastapi_routes.create_router,
63
+ "safety": safety.fastapi_routes.create_router,
64
+ "scoring": scoring.fastapi_routes.create_router,
65
+ "scoring_functions": scoring_functions.fastapi_routes.create_router,
66
+ "shields": shields.fastapi_routes.create_router,
67
+ "vector_io": vector_io.fastapi_routes.create_router,
33
68
  }
34
69
 
35
70
 
@@ -48,7 +48,7 @@ from llama_stack.core.server.fastapi_router_registry import build_fastapi_router
48
48
  from llama_stack.core.server.routes import get_all_api_routes
49
49
  from llama_stack.core.stack import (
50
50
  Stack,
51
- cast_image_name_to_string,
51
+ cast_distro_name_to_string,
52
52
  replace_env_vars,
53
53
  )
54
54
  from llama_stack.core.utils.config import redact_sensitive_fields
@@ -57,7 +57,7 @@ from llama_stack.core.utils.context import preserve_contexts_async_generator
57
57
  from llama_stack.log import LoggingConfig, get_logger
58
58
  from llama_stack_api import Api, ConflictError, PaginatedResponse, ResourceNotFoundError
59
59
 
60
- from .auth import AuthenticationMiddleware
60
+ from .auth import AuthenticationMiddleware, RouteAuthorizationMiddleware
61
61
  from .quota import QuotaMiddleware
62
62
 
63
63
  REPO_ROOT = Path(__file__).parent.parent.parent.parent
@@ -88,6 +88,13 @@ async def global_exception_handler(request: Request, exc: Exception):
88
88
  traceback.print_exception(type(exc), exc, exc.__traceback__)
89
89
  http_exc = translate_exception(exc)
90
90
 
91
+ # OpenAI-compat Vector Stores endpoints treat many "not found" conditions as 400s.
92
+ # Our core exceptions model these as ResourceNotFoundError (mapped to 404 by default),
93
+ # but integration tests (and OpenAI client behavior expectations in this repo)
94
+ # assert they surface as BadRequestError instead.
95
+ if isinstance(exc, ResourceNotFoundError) and request.url.path.startswith("/v1/vector_stores"):
96
+ http_exc = HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=str(exc))
97
+
91
98
  return JSONResponse(status_code=http_exc.status_code, content={"error": {"detail": http_exc.detail}})
92
99
 
93
100
 
@@ -396,7 +403,7 @@ def create_app() -> StackApp:
396
403
  logger = get_logger(name=__name__, category="core::server", config=logger_config)
397
404
 
398
405
  config = replace_env_vars(config_contents)
399
- config = StackConfig(**cast_image_name_to_string(config))
406
+ config = StackConfig(**cast_distro_name_to_string(config))
400
407
 
401
408
  _log_run_config(run_config=config)
402
409
 
@@ -416,8 +423,19 @@ def create_app() -> StackApp:
416
423
  impls = app.stack.impls
417
424
 
418
425
  if config.server.auth:
419
- logger.info(f"Enabling authentication with provider: {config.server.auth.provider_config.type.value}")
420
- app.add_middleware(AuthenticationMiddleware, auth_config=config.server.auth, impls=impls)
426
+ # Add route authorization middleware if route_policy is configured
427
+ # This can work independently of authentication
428
+ # NOTE: Add this FIRST because middleware wraps in reverse order (last added runs first)
429
+ # We want: Request → Auth → RouteAuth → App
430
+ if config.server.auth.route_policy:
431
+ logger.info(f"Enabling route-level authorization with {len(config.server.auth.route_policy)} rules")
432
+ app.add_middleware(RouteAuthorizationMiddleware, route_policy=config.server.auth.route_policy)
433
+
434
+ # Add authentication middleware only if provider is configured
435
+ # This runs FIRST in the middleware chain (last added = first to run)
436
+ if config.server.auth.provider_config:
437
+ logger.info(f"Enabling authentication with provider: {config.server.auth.provider_config.type.value}")
438
+ app.add_middleware(AuthenticationMiddleware, auth_config=config.server.auth, impls=impls)
421
439
  else:
422
440
  if config.server.quota:
423
441
  quota = config.server.quota
@@ -474,6 +492,7 @@ def create_app() -> StackApp:
474
492
  apis_to_serve.add("providers")
475
493
  apis_to_serve.add("prompts")
476
494
  apis_to_serve.add("conversations")
495
+ apis_to_serve.add("connectors")
477
496
 
478
497
  for api_str in apis_to_serve:
479
498
  api = Api(api_str)
llama_stack/core/stack.py CHANGED
@@ -16,6 +16,7 @@ import yaml
16
16
  from pydantic import BaseModel
17
17
 
18
18
  from llama_stack.core.admin import AdminImpl, AdminImplConfig
19
+ from llama_stack.core.connectors.connectors import ConnectorServiceConfig, ConnectorServiceImpl
19
20
  from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
20
21
  from llama_stack.core.datatypes import Provider, QualifiedModel, SafetyConfig, StackConfig, VectorStoresConfig
21
22
  from llama_stack.core.distribution import get_provider_registry
@@ -42,6 +43,7 @@ from llama_stack_api import (
42
43
  Api,
43
44
  Batches,
44
45
  Benchmarks,
46
+ Connectors,
45
47
  Conversations,
46
48
  DatasetIO,
47
49
  Datasets,
@@ -54,6 +56,9 @@ from llama_stack_api import (
54
56
  Prompts,
55
57
  Providers,
56
58
  RegisterBenchmarkRequest,
59
+ RegisterModelRequest,
60
+ RegisterScoringFunctionRequest,
61
+ RegisterShieldRequest,
57
62
  Safety,
58
63
  Scoring,
59
64
  ScoringFunctions,
@@ -89,6 +94,7 @@ class LlamaStack(
89
94
  Files,
90
95
  Prompts,
91
96
  Conversations,
97
+ Connectors,
92
98
  ):
93
99
  pass
94
100
 
@@ -96,15 +102,15 @@ class LlamaStack(
96
102
  # Resources to register based on configuration.
97
103
  # If a request class is specified, the configuration object will be converted to this class before invoking the registration method.
98
104
  RESOURCES = [
99
- ("models", Api.models, "register_model", "list_models", None),
100
- ("shields", Api.shields, "register_shield", "list_shields", None),
105
+ ("models", Api.models, "register_model", "list_models", RegisterModelRequest),
106
+ ("shields", Api.shields, "register_shield", "list_shields", RegisterShieldRequest),
101
107
  ("datasets", Api.datasets, "register_dataset", "list_datasets", RegisterDatasetRequest),
102
108
  (
103
109
  "scoring_fns",
104
110
  Api.scoring_functions,
105
111
  "register_scoring_function",
106
112
  "list_scoring_functions",
107
- None,
113
+ RegisterScoringFunctionRequest,
108
114
  ),
109
115
  ("benchmarks", Api.benchmarks, "register_benchmark", "list_benchmarks", RegisterBenchmarkRequest),
110
116
  ("tool_groups", Api.tool_groups, "register_tool_group", "list_tool_groups", None),
@@ -242,6 +248,34 @@ async def register_resources(run_config: StackConfig, impls: dict[Api, Any]):
242
248
  )
243
249
 
244
250
 
251
+ async def register_connectors(run_config: StackConfig, impls: dict[Api, Any]):
252
+ """Register connectors from config"""
253
+ if Api.connectors not in impls:
254
+ return
255
+
256
+ connectors_impl = impls[Api.connectors]
257
+
258
+ # Get connector IDs from config
259
+ config_connector_ids = {c.connector_id for c in run_config.connectors}
260
+
261
+ # Register/Update config connectors
262
+ for connector in run_config.connectors:
263
+ logger.debug(f"Registering connector: {connector.connector_id}")
264
+ await connectors_impl.register_connector(
265
+ connector_id=connector.connector_id,
266
+ connector_type=connector.connector_type,
267
+ url=connector.url,
268
+ server_label=connector.server_label,
269
+ )
270
+
271
+ # Remove connectors not in config (orphan cleanup)
272
+ existing_connectors = await connectors_impl.list_connectors()
273
+ for connector in existing_connectors.data:
274
+ if connector.connector_id not in config_connector_ids:
275
+ logger.info(f"Removing orphaned connector: {connector.connector_id}")
276
+ await connectors_impl.unregister_connector(connector.connector_id)
277
+
278
+
245
279
  async def validate_vector_stores_config(vector_stores_config: VectorStoresConfig | None, impls: dict[Api, Any]):
246
280
  """Validate vector stores configuration."""
247
281
  if vector_stores_config is None:
@@ -276,7 +310,8 @@ async def _validate_embedding_model(embedding_model: QualifiedModel, impls: dict
276
310
  f"Embedding model '{model_identifier}' not found. Available embedding models: {list(models_list.keys())}"
277
311
  )
278
312
 
279
- embedding_dimension = model.metadata.get("embedding_dimension")
313
+ # if not in metadata, fetch from config default
314
+ embedding_dimension = model.metadata.get("embedding_dimension", embedding_model.embedding_dimensions)
280
315
  if embedding_dimension is None:
281
316
  raise ValueError(f"Embedding model '{model_identifier}' is missing 'embedding_dimension' in metadata")
282
317
 
@@ -489,10 +524,10 @@ def _convert_string_to_proper_type(value: str) -> Any:
489
524
  return value
490
525
 
491
526
 
492
- def cast_image_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
493
- """Ensure that any value for a key 'image_name' in a config_dict is a string"""
494
- if "image_name" in config_dict and config_dict["image_name"] is not None:
495
- config_dict["image_name"] = str(config_dict["image_name"])
527
+ def cast_distro_name_to_string(config_dict: dict[str, Any]) -> dict[str, Any]:
528
+ """Ensure that any value for a key 'distro_name' in a config_dict is a string"""
529
+ if "distro_name" in config_dict and config_dict["distro_name"] is not None:
530
+ config_dict["distro_name"] = str(config_dict["distro_name"])
496
531
  return config_dict
497
532
 
498
533
 
@@ -532,6 +567,11 @@ def add_internal_implementations(impls: dict[Api, Any], config: StackConfig) ->
532
567
  )
533
568
  impls[Api.conversations] = conversations_impl
534
569
 
570
+ connectors_impl = ConnectorServiceImpl(
571
+ ConnectorServiceConfig(config=config),
572
+ )
573
+ impls[Api.connectors] = connectors_impl
574
+
535
575
 
536
576
  def _initialize_storage(run_config: StackConfig):
537
577
  kv_backends: dict[str, StorageBackendConfig] = {}
@@ -574,7 +614,7 @@ class Stack:
574
614
  stores = self.run_config.storage.stores
575
615
  if not stores.metadata:
576
616
  raise ValueError("storage.stores.metadata must be configured with a kv_* backend")
577
- dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.image_name)
617
+ dist_registry, _ = await create_dist_registry(stores.metadata, self.run_config.distro_name)
578
618
  policy = self.run_config.server.auth.access_policy if self.run_config.server.auth else []
579
619
 
580
620
  internal_impls = {}
@@ -592,8 +632,11 @@ class Stack:
592
632
  await impls[Api.prompts].initialize()
593
633
  if Api.conversations in impls:
594
634
  await impls[Api.conversations].initialize()
635
+ if Api.connectors in impls:
636
+ await impls[Api.connectors].initialize()
595
637
 
596
638
  await register_resources(self.run_config, impls)
639
+ await register_connectors(self.run_config, impls)
597
640
  await refresh_registry_once(impls)
598
641
  await validate_vector_stores_config(self.run_config.vector_stores, impls)
599
642
  await validate_safety_config(self.run_config.safety, impls)
@@ -727,7 +770,7 @@ def run_config_from_adhoc_config_spec(
727
770
  )
728
771
  ]
729
772
  config = StackConfig(
730
- image_name="distro-test",
773
+ distro_name="distro-test",
731
774
  apis=list(provider_configs_by_api.keys()),
732
775
  providers=provider_configs_by_api,
733
776
  storage=StorageConfig(
@@ -740,6 +783,7 @@ def run_config_from_adhoc_config_spec(
740
783
  inference=InferenceStoreReference(backend="sql_default", table_name="inference_store"),
741
784
  conversations=SqlStoreReference(backend="sql_default", table_name="openai_conversations"),
742
785
  prompts=KVStoreReference(backend="kv_default", namespace="prompts"),
786
+ connectors=KVStoreReference(backend="kv_default", namespace="connectors"),
743
787
  ),
744
788
  ),
745
789
  )
@@ -255,6 +255,11 @@ class InferenceStoreReference(SqlStoreReference):
255
255
  class ResponsesStoreReference(InferenceStoreReference):
256
256
  """Responses store configuration with queue tuning."""
257
257
 
258
+ table_name: str = Field(
259
+ default="openai_responses",
260
+ description="Name of the table to use for storing OpenAI responses",
261
+ )
262
+
258
263
 
259
264
  class ServerStoresConfig(BaseModel):
260
265
  metadata: KVStoreReference | None = Field(
@@ -286,6 +291,10 @@ class ServerStoresConfig(BaseModel):
286
291
  default=KVStoreReference(backend="kv_default", namespace="prompts"),
287
292
  description="Prompts store configuration (uses KV backend)",
288
293
  )
294
+ connectors: KVStoreReference | None = Field(
295
+ default=KVStoreReference(backend="kv_default", namespace="connectors"),
296
+ description="Connectors store configuration (uses KV backend)",
297
+ )
289
298
 
290
299
 
291
300
  class StorageConfig(BaseModel):