llama-stack 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. llama_stack/cli/stack/_list_deps.py +11 -7
  2. llama_stack/cli/stack/run.py +3 -25
  3. llama_stack/core/access_control/datatypes.py +78 -0
  4. llama_stack/core/configure.py +2 -2
  5. llama_stack/{distributions/meta-reference-gpu → core/connectors}/__init__.py +3 -1
  6. llama_stack/core/connectors/connectors.py +162 -0
  7. llama_stack/core/conversations/conversations.py +61 -58
  8. llama_stack/core/datatypes.py +54 -8
  9. llama_stack/core/library_client.py +60 -13
  10. llama_stack/core/prompts/prompts.py +43 -42
  11. llama_stack/core/routers/datasets.py +20 -17
  12. llama_stack/core/routers/eval_scoring.py +143 -53
  13. llama_stack/core/routers/inference.py +20 -9
  14. llama_stack/core/routers/safety.py +30 -42
  15. llama_stack/core/routers/vector_io.py +15 -7
  16. llama_stack/core/routing_tables/models.py +42 -3
  17. llama_stack/core/routing_tables/scoring_functions.py +19 -19
  18. llama_stack/core/routing_tables/shields.py +20 -17
  19. llama_stack/core/routing_tables/vector_stores.py +8 -5
  20. llama_stack/core/server/auth.py +192 -17
  21. llama_stack/core/server/fastapi_router_registry.py +40 -5
  22. llama_stack/core/server/server.py +24 -5
  23. llama_stack/core/stack.py +54 -10
  24. llama_stack/core/storage/datatypes.py +9 -0
  25. llama_stack/core/store/registry.py +1 -1
  26. llama_stack/core/utils/exec.py +2 -2
  27. llama_stack/core/utils/type_inspection.py +16 -2
  28. llama_stack/distributions/dell/config.yaml +4 -1
  29. llama_stack/distributions/dell/run-with-safety.yaml +4 -1
  30. llama_stack/distributions/nvidia/config.yaml +4 -1
  31. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
  32. llama_stack/distributions/oci/config.yaml +4 -1
  33. llama_stack/distributions/open-benchmark/config.yaml +9 -1
  34. llama_stack/distributions/postgres-demo/config.yaml +1 -1
  35. llama_stack/distributions/starter/build.yaml +62 -0
  36. llama_stack/distributions/starter/config.yaml +22 -3
  37. llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
  38. llama_stack/distributions/starter/starter.py +13 -1
  39. llama_stack/distributions/starter-gpu/build.yaml +62 -0
  40. llama_stack/distributions/starter-gpu/config.yaml +22 -3
  41. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
  42. llama_stack/distributions/template.py +10 -2
  43. llama_stack/distributions/watsonx/config.yaml +4 -1
  44. llama_stack/log.py +1 -0
  45. llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
  46. llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
  47. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +53 -51
  48. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
  49. llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
  50. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
  51. llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
  52. llama_stack/providers/inline/batches/reference/batches.py +2 -1
  53. llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
  54. llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
  55. llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
  56. llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
  57. llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
  58. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
  59. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
  60. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
  61. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
  62. llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
  63. llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
  64. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
  65. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
  66. llama_stack/providers/registry/agents.py +1 -0
  67. llama_stack/providers/registry/inference.py +1 -9
  68. llama_stack/providers/registry/vector_io.py +136 -16
  69. llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
  70. llama_stack/providers/remote/files/s3/config.py +5 -3
  71. llama_stack/providers/remote/files/s3/files.py +2 -2
  72. llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
  73. llama_stack/providers/remote/inference/openai/openai.py +2 -0
  74. llama_stack/providers/remote/inference/together/together.py +4 -0
  75. llama_stack/providers/remote/inference/vertexai/config.py +3 -3
  76. llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
  77. llama_stack/providers/remote/inference/vllm/config.py +37 -18
  78. llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
  79. llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
  80. llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
  81. llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
  82. llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
  83. llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
  84. llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
  85. llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
  86. llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
  87. llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
  88. llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
  89. llama_stack/providers/remote/vector_io/oci/config.py +41 -0
  90. llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
  91. llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
  92. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
  93. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
  94. llama_stack/providers/utils/bedrock/client.py +3 -3
  95. llama_stack/providers/utils/bedrock/config.py +7 -7
  96. llama_stack/providers/utils/inference/__init__.py +0 -25
  97. llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
  98. llama_stack/providers/utils/inference/http_client.py +239 -0
  99. llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
  100. llama_stack/providers/utils/inference/model_registry.py +148 -2
  101. llama_stack/providers/utils/inference/openai_compat.py +1 -158
  102. llama_stack/providers/utils/inference/openai_mixin.py +42 -2
  103. llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
  104. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
  105. llama_stack/providers/utils/memory/vector_store.py +46 -19
  106. llama_stack/providers/utils/responses/responses_store.py +7 -7
  107. llama_stack/providers/utils/safety.py +114 -0
  108. llama_stack/providers/utils/tools/mcp.py +44 -3
  109. llama_stack/testing/api_recorder.py +9 -3
  110. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
  111. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/RECORD +115 -148
  112. llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
  113. llama_stack/distributions/meta-reference-gpu/doc_template.md +0 -119
  114. llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
  115. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
  116. llama_stack/models/llama/hadamard_utils.py +0 -88
  117. llama_stack/models/llama/llama3/args.py +0 -74
  118. llama_stack/models/llama/llama3/dog.jpg +0 -0
  119. llama_stack/models/llama/llama3/generation.py +0 -378
  120. llama_stack/models/llama/llama3/model.py +0 -304
  121. llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
  122. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
  123. llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
  124. llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
  125. llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
  126. llama_stack/models/llama/llama3/pasta.jpeg +0 -0
  127. llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
  128. llama_stack/models/llama/llama3/quantization/loader.py +0 -316
  129. llama_stack/models/llama/llama3_1/__init__.py +0 -12
  130. llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
  131. llama_stack/models/llama/llama3_1/prompts.py +0 -258
  132. llama_stack/models/llama/llama3_2/__init__.py +0 -5
  133. llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
  134. llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
  135. llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
  136. llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
  137. llama_stack/models/llama/llama3_3/__init__.py +0 -5
  138. llama_stack/models/llama/llama3_3/prompts.py +0 -259
  139. llama_stack/models/llama/llama4/args.py +0 -107
  140. llama_stack/models/llama/llama4/ffn.py +0 -58
  141. llama_stack/models/llama/llama4/moe.py +0 -214
  142. llama_stack/models/llama/llama4/preprocess.py +0 -435
  143. llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
  144. llama_stack/models/llama/llama4/quantization/loader.py +0 -226
  145. llama_stack/models/llama/llama4/vision/__init__.py +0 -5
  146. llama_stack/models/llama/llama4/vision/embedding.py +0 -210
  147. llama_stack/models/llama/llama4/vision/encoder.py +0 -412
  148. llama_stack/models/llama/quantize_impls.py +0 -316
  149. llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
  150. llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
  151. llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
  152. llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
  153. llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
  154. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
  155. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
  156. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
  157. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
  158. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
  159. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,114 @@
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the terms described in the LICENSE file in
5
+ # the root directory of this source tree.
6
+
7
+ import uuid
8
+ from typing import TYPE_CHECKING
9
+
10
+ from llama_stack_api import (
11
+ ModerationObject,
12
+ ModerationObjectResults,
13
+ OpenAIUserMessageParam,
14
+ RunModerationRequest,
15
+ RunShieldRequest,
16
+ RunShieldResponse,
17
+ )
18
+
19
+ if TYPE_CHECKING:
20
+ # Type stub for mypy - actual implementation provided by provider class
21
+ class _RunShieldProtocol:
22
+ async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse: ...
23
+
24
+
25
+ class ShieldToModerationMixin:
26
+ """
27
+ Mixin that provides run_moderation implementation by delegating to run_shield.
28
+
29
+ Providers must implement run_shield(request: RunShieldRequest) for this mixin to work.
30
+ Providers with custom run_moderation implementations will override this automatically.
31
+ """
32
+
33
+ if TYPE_CHECKING:
34
+ # Type hint for mypy - run_shield is provided by the mixed-in class
35
+ async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse: ...
36
+
37
+ async def run_moderation(self, request: RunModerationRequest) -> ModerationObject:
38
+ """
39
+ Run moderation by converting input to messages and delegating to run_shield.
40
+
41
+ Args:
42
+ request: RunModerationRequest with input and model
43
+
44
+ Returns:
45
+ ModerationObject with results for each input
46
+
47
+ Raises:
48
+ ValueError: If model is None
49
+ """
50
+ if request.model is None:
51
+ raise ValueError(f"{self.__class__.__name__} moderation requires a model identifier")
52
+
53
+ inputs = request.input if isinstance(request.input, list) else [request.input]
54
+ results = []
55
+
56
+ for text_input in inputs:
57
+ # Convert string to OpenAI message format
58
+ message = OpenAIUserMessageParam(content=text_input)
59
+
60
+ # Call run_shield (must be implemented by the provider)
61
+ shield_request = RunShieldRequest(
62
+ shield_id=request.model,
63
+ messages=[message],
64
+ )
65
+ shield_response = await self.run_shield(shield_request)
66
+
67
+ # Convert RunShieldResponse to ModerationObjectResults
68
+ results.append(self._shield_response_to_moderation_result(shield_response))
69
+
70
+ return ModerationObject(
71
+ id=f"modr-{uuid.uuid4()}",
72
+ model=request.model,
73
+ results=results,
74
+ )
75
+
76
+ def _shield_response_to_moderation_result(
77
+ self,
78
+ shield_response: RunShieldResponse,
79
+ ) -> ModerationObjectResults:
80
+ """Convert RunShieldResponse to ModerationObjectResults.
81
+
82
+ Args:
83
+ shield_response: The response from run_shield
84
+
85
+ Returns:
86
+ ModerationObjectResults with appropriate fields set
87
+ """
88
+ if shield_response.violation is None:
89
+ # Safe content
90
+ return ModerationObjectResults(
91
+ flagged=False,
92
+ categories={},
93
+ category_scores={},
94
+ category_applied_input_types={},
95
+ user_message=None,
96
+ metadata={},
97
+ )
98
+
99
+ # Unsafe content - extract violation details
100
+ v = shield_response.violation
101
+ violation_type = v.metadata.get("violation_type", "unsafe")
102
+
103
+ # Ensure violation_type is a string (metadata values can be Any)
104
+ if not isinstance(violation_type, str):
105
+ violation_type = "unsafe"
106
+
107
+ return ModerationObjectResults(
108
+ flagged=True,
109
+ categories={violation_type: True},
110
+ category_scores={violation_type: 1.0},
111
+ category_applied_input_types={violation_type: ["text"]},
112
+ user_message=v.user_message,
113
+ metadata=v.metadata,
114
+ )
@@ -8,6 +8,7 @@ import asyncio
8
8
  import hashlib
9
9
  from collections.abc import AsyncGenerator
10
10
  from contextlib import asynccontextmanager
11
+ from dataclasses import dataclass
11
12
  from enum import Enum
12
13
  from typing import Any, cast
13
14
 
@@ -241,10 +242,12 @@ class MCPSessionManager:
241
242
  raise last_exception
242
243
  raise RuntimeError(f"Failed to create MCP session for {endpoint}")
243
244
 
244
- async def close_all(self) -> None:
245
- """Close all cached sessions.
245
+ async def __aenter__(self):
246
+ """Enter the async context manager."""
247
+ return self
246
248
 
247
- Should be called at the end of a request to clean up resources.
249
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
250
+ """Exit the async context manager and cleanup all sessions.
248
251
 
249
252
  Note: We catch BaseException (not just Exception) because:
250
253
  1. CancelledError is a BaseException and can occur during cleanup
@@ -275,6 +278,8 @@ class MCPSessionManager:
275
278
  if errors:
276
279
  logger.debug(f"Encountered {len(errors)} errors while closing MCP sessions (expected in streaming)")
277
280
 
281
+ return False
282
+
278
283
 
279
284
  @asynccontextmanager
280
285
  async def client_wrapper(endpoint: str, headers: dict[str, str]) -> AsyncGenerator[ClientSession, Any]:
@@ -470,3 +475,39 @@ async def invoke_mcp_tool(
470
475
  async with client_wrapper(endpoint, final_headers) as session:
471
476
  result = await session.call_tool(tool_name, kwargs)
472
477
  return _parse_mcp_result(result)
478
+
479
+
480
+ @dataclass
481
+ class MCPServerInfo:
482
+ """Server information from an MCP server."""
483
+
484
+ name: str
485
+ version: str
486
+ title: str | None = None
487
+ description: str | None = None
488
+
489
+
490
+ async def get_mcp_server_info(
491
+ endpoint: str,
492
+ headers: dict[str, str] | None = None,
493
+ authorization: str | None = None,
494
+ ) -> MCPServerInfo:
495
+ """Get server info from an MCP server.
496
+ Args:
497
+ endpoint: MCP server endpoint URL
498
+ headers: Optional base headers to include
499
+ authorization: Optional OAuth access token (just the token, not "Bearer <token>")
500
+ Returns:
501
+ MCPServerInfo containing name, version, title, and description
502
+ """
503
+ final_headers = prepare_mcp_headers(headers, authorization)
504
+
505
+ async with client_wrapper(endpoint, final_headers) as session:
506
+ init_result = await session.initialize()
507
+
508
+ return MCPServerInfo(
509
+ name=init_result.serverInfo.name,
510
+ version=init_result.serverInfo.version,
511
+ title=init_result.serverInfo.title,
512
+ description=init_result.instructions,
513
+ )
@@ -77,11 +77,14 @@ def _normalize_numeric_literal_strings(value: str) -> str:
77
77
  return _FLOAT_IN_STRING_PATTERN.sub(_replace, value)
78
78
 
79
79
 
80
- def _normalize_body_for_hash(value: Any) -> Any:
80
+ def _normalize_body_for_hash(value: Any, exclude_stream_options: bool = False) -> Any:
81
81
  """Recursively normalize a JSON-like value to improve hash stability."""
82
82
 
83
83
  if isinstance(value, dict):
84
- return {key: _normalize_body_for_hash(item) for key, item in value.items()}
84
+ normalized = {key: _normalize_body_for_hash(item) for key, item in value.items()}
85
+ if exclude_stream_options and "stream_options" in normalized:
86
+ del normalized["stream_options"]
87
+ return normalized
85
88
  if isinstance(value, list):
86
89
  return [_normalize_body_for_hash(item) for item in value]
87
90
  if isinstance(value, tuple):
@@ -146,7 +149,10 @@ def normalize_inference_request(method: str, url: str, headers: dict[str, Any],
146
149
 
147
150
  parsed = urlparse(url)
148
151
 
149
- body_for_hash = _normalize_body_for_hash(body)
152
+ # Bedrock's OpenAI-compatible endpoint includes stream_options that vary between
153
+ # runs but don't affect the logical request. Exclude it for stable hashing.
154
+ is_bedrock = "bedrock" in parsed.netloc
155
+ body_for_hash = _normalize_body_for_hash(body, exclude_stream_options=is_bedrock)
150
156
 
151
157
  test_id = get_test_context()
152
158
  normalized: dict[str, Any] = {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llama_stack
3
- Version: 0.4.4
3
+ Version: 0.5.0
4
4
  Summary: Llama Stack
5
5
  Author-email: Meta Llama <llama-oss@meta.com>
6
6
  License: MIT
@@ -45,8 +45,12 @@ Requires-Dist: starlette>=0.49.1
45
45
  Requires-Dist: psycopg2-binary
46
46
  Requires-Dist: tornado>=6.5.3
47
47
  Requires-Dist: urllib3>=2.6.3
48
+ Requires-Dist: oracledb>=3.4.1
49
+ Requires-Dist: oci>=2.165.0
50
+ Requires-Dist: numpy>=2.3.2
51
+ Requires-Dist: mcp>=1.23.0
48
52
  Provides-Extra: client
49
- Requires-Dist: llama-stack-client==0.4.4; extra == "client"
53
+ Requires-Dist: llama-stack-client==0.5.0; extra == "client"
50
54
  Dynamic: license-file
51
55
 
52
56
  # Llama Stack
@@ -158,6 +162,7 @@ Please checkout our [Documentation](https://llamastack.github.io/docs) page for
158
162
  * A [Zero-to-Hero Guide](https://github.com/meta-llama/llama-stack/tree/main/docs/zero_to_hero_guide) that guide you through all the key components of llama stack with code samples.
159
163
  * [Contributing](CONTRIBUTING.md)
160
164
  * [Adding a new API Provider](https://llamastack.github.io/docs/contributing/new_api_provider) to walk-through how to add a new API provider.
165
+ * [Release Process](RELEASE_PROCESS.md) for information about release schedules and versioning.
161
166
 
162
167
  ### Llama Stack Client SDKs
163
168
 
@@ -172,6 +177,13 @@ Check out our client SDKs for connecting to a Llama Stack server in your preferr
172
177
 
173
178
  You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo.
174
179
 
180
+ ## Community
181
+
182
+ We hold regular community calls to discuss the latest developments and get feedback from the community.
183
+
184
+ - Date: every Thursday
185
+ - Time: 09:00 AM PST (check the [Community Event on Discord](https://discord.com/events/1257833999603335178/1413266296748900513) for the latest details)
186
+
175
187
  ## 🌟 GitHub Star History
176
188
  ## Star History
177
189