PyPI - llama-stack - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

llama-stack 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (311) hide show

llama_stack/cli/stack/_list_deps.py +11 -7
llama_stack/cli/stack/run.py +3 -25
llama_stack/core/access_control/datatypes.py +78 -0
llama_stack/core/configure.py +2 -2
{llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
llama_stack/core/connectors/connectors.py +162 -0
llama_stack/core/conversations/conversations.py +61 -58
llama_stack/core/datatypes.py +54 -8
llama_stack/core/library_client.py +60 -13
llama_stack/core/prompts/prompts.py +43 -42
llama_stack/core/routers/datasets.py +20 -17
llama_stack/core/routers/eval_scoring.py +143 -53
llama_stack/core/routers/inference.py +20 -9
llama_stack/core/routers/safety.py +30 -42
llama_stack/core/routers/vector_io.py +15 -7
llama_stack/core/routing_tables/models.py +42 -3
llama_stack/core/routing_tables/scoring_functions.py +19 -19
llama_stack/core/routing_tables/shields.py +20 -17
llama_stack/core/routing_tables/vector_stores.py +8 -5
llama_stack/core/server/auth.py +192 -17
llama_stack/core/server/fastapi_router_registry.py +40 -5
llama_stack/core/server/server.py +24 -5
llama_stack/core/stack.py +54 -10
llama_stack/core/storage/datatypes.py +9 -0
llama_stack/core/store/registry.py +1 -1
llama_stack/core/utils/exec.py +2 -2
llama_stack/core/utils/type_inspection.py +16 -2
llama_stack/distributions/dell/config.yaml +4 -1
llama_stack/distributions/dell/doc_template.md +209 -0
llama_stack/distributions/dell/run-with-safety.yaml +4 -1
llama_stack/distributions/nvidia/config.yaml +4 -1
llama_stack/distributions/nvidia/doc_template.md +170 -0
llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
llama_stack/distributions/oci/config.yaml +4 -1
llama_stack/distributions/oci/doc_template.md +140 -0
llama_stack/distributions/open-benchmark/config.yaml +9 -1
llama_stack/distributions/postgres-demo/config.yaml +1 -1
llama_stack/distributions/starter/build.yaml +62 -0
llama_stack/distributions/starter/config.yaml +22 -3
llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/starter/starter.py +13 -1
llama_stack/distributions/starter-gpu/build.yaml +62 -0
llama_stack/distributions/starter-gpu/config.yaml +22 -3
llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/template.py +10 -2
llama_stack/distributions/watsonx/config.yaml +4 -1
llama_stack/log.py +1 -0
llama_stack/models/llama/resources/dog.jpg +0 -0
llama_stack/models/llama/resources/pasta.jpeg +0 -0
llama_stack/models/llama/resources/small_dog.jpg +0 -0
llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
llama_stack/providers/inline/batches/reference/batches.py +2 -1
llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
llama_stack/providers/registry/agents.py +1 -0
llama_stack/providers/registry/inference.py +1 -9
llama_stack/providers/registry/vector_io.py +136 -16
llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
llama_stack/providers/remote/eval/nvidia/README.md +134 -0
llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
llama_stack/providers/remote/files/s3/README.md +266 -0
llama_stack/providers/remote/files/s3/config.py +5 -3
llama_stack/providers/remote/files/s3/files.py +2 -2
llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
llama_stack/providers/remote/inference/openai/openai.py +2 -0
llama_stack/providers/remote/inference/together/together.py +4 -0
llama_stack/providers/remote/inference/vertexai/config.py +3 -3
llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
llama_stack/providers/remote/inference/vllm/config.py +37 -18
llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
llama_stack/providers/remote/safety/nvidia/README.md +78 -0
llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
llama_stack/providers/remote/vector_io/oci/config.py +41 -0
llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
llama_stack/providers/utils/bedrock/client.py +3 -3
llama_stack/providers/utils/bedrock/config.py +7 -7
llama_stack/providers/utils/inference/__init__.py +0 -25
llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
llama_stack/providers/utils/inference/http_client.py +239 -0
llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
llama_stack/providers/utils/inference/model_registry.py +148 -2
llama_stack/providers/utils/inference/openai_compat.py +1 -158
llama_stack/providers/utils/inference/openai_mixin.py +42 -2
llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
llama_stack/providers/utils/memory/vector_store.py +46 -19
llama_stack/providers/utils/responses/responses_store.py +40 -6
llama_stack/providers/utils/safety.py +114 -0
llama_stack/providers/utils/tools/mcp.py +44 -3
llama_stack/testing/api_recorder.py +9 -3
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
llama_stack-0.5.0.dist-info/top_level.txt +1 -0
llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
llama_stack/models/llama/hadamard_utils.py +0 -88
llama_stack/models/llama/llama3/args.py +0 -74
llama_stack/models/llama/llama3/generation.py +0 -378
llama_stack/models/llama/llama3/model.py +0 -304
llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
llama_stack/models/llama/llama3/quantization/loader.py +0 -316
llama_stack/models/llama/llama3_1/__init__.py +0 -12
llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
llama_stack/models/llama/llama3_1/prompts.py +0 -258
llama_stack/models/llama/llama3_2/__init__.py +0 -5
llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
llama_stack/models/llama/llama3_3/__init__.py +0 -5
llama_stack/models/llama/llama3_3/prompts.py +0 -259
llama_stack/models/llama/llama4/args.py +0 -107
llama_stack/models/llama/llama4/ffn.py +0 -58
llama_stack/models/llama/llama4/moe.py +0 -214
llama_stack/models/llama/llama4/preprocess.py +0 -435
llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
llama_stack/models/llama/llama4/quantization/loader.py +0 -226
llama_stack/models/llama/llama4/vision/__init__.py +0 -5
llama_stack/models/llama/llama4/vision/embedding.py +0 -210
llama_stack/models/llama/llama4/vision/encoder.py +0 -412
llama_stack/models/llama/quantize_impls.py +0 -316
llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
llama_stack-0.4.3.dist-info/top_level.txt +0 -2
llama_stack_api/__init__.py +0 -945
llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/admin/api.py +0 -72
llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/admin/models.py +0 -113
llama_stack_api/agents.py +0 -173
llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/batches/api.py +0 -53
llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/batches/models.py +0 -78
llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/common/__init__.py +0 -5
llama_stack_api/common/content_types.py +0 -101
llama_stack_api/common/errors.py +0 -95
llama_stack_api/common/job_types.py +0 -38
llama_stack_api/common/responses.py +0 -77
llama_stack_api/common/training_types.py +0 -47
llama_stack_api/common/type_system.py +0 -146
llama_stack_api/connectors.py +0 -146
llama_stack_api/conversations.py +0 -270
llama_stack_api/datasetio.py +0 -55
llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/datasets/api.py +0 -35
llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/datasets/models.py +0 -152
llama_stack_api/datatypes.py +0 -373
llama_stack_api/eval.py +0 -137
llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/files/__init__.py +0 -35
llama_stack_api/files/api.py +0 -51
llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/files/models.py +0 -107
llama_stack_api/inference.py +0 -1169
llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/__init__.py +0 -945
llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/llama_stack_api/admin/api.py +0 -72
llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/llama_stack_api/admin/models.py +0 -113
llama_stack_api/llama_stack_api/agents.py +0 -173
llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/llama_stack_api/batches/api.py +0 -53
llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/llama_stack_api/batches/models.py +0 -78
llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/llama_stack_api/common/__init__.py +0 -5
llama_stack_api/llama_stack_api/common/content_types.py +0 -101
llama_stack_api/llama_stack_api/common/errors.py +0 -95
llama_stack_api/llama_stack_api/common/job_types.py +0 -38
llama_stack_api/llama_stack_api/common/responses.py +0 -77
llama_stack_api/llama_stack_api/common/training_types.py +0 -47
llama_stack_api/llama_stack_api/common/type_system.py +0 -146
llama_stack_api/llama_stack_api/connectors.py +0 -146
llama_stack_api/llama_stack_api/conversations.py +0 -270
llama_stack_api/llama_stack_api/datasetio.py +0 -55
llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/llama_stack_api/datasets/api.py +0 -35
llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/llama_stack_api/datasets/models.py +0 -152
llama_stack_api/llama_stack_api/datatypes.py +0 -373
llama_stack_api/llama_stack_api/eval.py +0 -137
llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/llama_stack_api/files/__init__.py +0 -35
llama_stack_api/llama_stack_api/files/api.py +0 -51
llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/llama_stack_api/files/models.py +0 -107
llama_stack_api/llama_stack_api/inference.py +0 -1169
llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/models.py +0 -171
llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/llama_stack_api/post_training.py +0 -370
llama_stack_api/llama_stack_api/prompts.py +0 -203
llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/llama_stack_api/providers/api.py +0 -16
llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/llama_stack_api/providers/models.py +0 -24
llama_stack_api/llama_stack_api/py.typed +0 -0
llama_stack_api/llama_stack_api/rag_tool.py +0 -168
llama_stack_api/llama_stack_api/resource.py +0 -37
llama_stack_api/llama_stack_api/router_utils.py +0 -160
llama_stack_api/llama_stack_api/safety.py +0 -132
llama_stack_api/llama_stack_api/schema_utils.py +0 -208
llama_stack_api/llama_stack_api/scoring.py +0 -93
llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/llama_stack_api/shields.py +0 -93
llama_stack_api/llama_stack_api/tools.py +0 -226
llama_stack_api/llama_stack_api/vector_io.py +0 -941
llama_stack_api/llama_stack_api/vector_stores.py +0 -53
llama_stack_api/llama_stack_api/version.py +0 -9
llama_stack_api/models.py +0 -171
llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/post_training.py +0 -370
llama_stack_api/prompts.py +0 -203
llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/providers/api.py +0 -16
llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/providers/models.py +0 -24
llama_stack_api/py.typed +0 -0
llama_stack_api/rag_tool.py +0 -168
llama_stack_api/resource.py +0 -37
llama_stack_api/router_utils.py +0 -160
llama_stack_api/safety.py +0 -132
llama_stack_api/schema_utils.py +0 -208
llama_stack_api/scoring.py +0 -93
llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/shields.py +0 -93
llama_stack_api/tools.py +0 -226
llama_stack_api/vector_io.py +0 -941
llama_stack_api/vector_stores.py +0 -53
llama_stack_api/version.py +0 -9
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0

llama_stack/providers/remote/inference/vertexai/vertexai.py CHANGED Viewed

@@ -40,9 +40,12 @@ class VertexAIInferenceAdapter(OpenAIMixin):
         Get the Vertex AI OpenAI-compatible API base URL.
         Returns the Vertex AI OpenAI-compatible endpoint URL.
-        Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
+        Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/start/openai
         """
-        return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
+        if not self.config.location or self.config.location == "global":
+            return f"https://aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/global/endpoints/openapi"
+        else:
+            return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
     async def list_provider_model_ids(self) -> Iterable[str]:
         """

llama_stack/providers/remote/inference/vllm/config.py CHANGED Viewed

@@ -4,11 +4,16 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+import warnings
 from pathlib import Path
-from pydantic import Field, HttpUrl, SecretStr, field_validator
+from pydantic import Field, HttpUrl, SecretStr, model_validator
-from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.providers.utils.inference.model_registry import (
+    NetworkConfig,
+    RemoteInferenceProviderConfig,
+    TLSConfig,
+)
 from llama_stack_api import json_schema_type
@@ -27,23 +32,33 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
         alias="api_token",
         description="The API token",
     )
-    tls_verify: bool | str = Field(
-        default=True,
-        description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
+    tls_verify: bool | str | None = Field(
+        default=None,
+        deprecated=True,
+        description="DEPRECATED: Use 'network.tls.verify' instead. Whether to verify TLS certificates. "
+        "Can be a boolean or a path to a CA certificate file.",
     )
-    @field_validator("tls_verify")
-    @classmethod
-    def validate_tls_verify(cls, v):
-        if isinstance(v, str):
-            # Otherwise, treat it as a cert path
-            cert_path = Path(v).expanduser().resolve()
-            if not cert_path.exists():
-                raise ValueError(f"TLS certificate file does not exist: {v}")
-            if not cert_path.is_file():
-                raise ValueError(f"TLS certificate path is not a file: {v}")
-            return v
-        return v
+    @model_validator(mode="after")
+    def migrate_tls_verify_to_network(self) -> "VLLMInferenceAdapterConfig":
+        """Migrate legacy tls_verify to network.tls.verify for backward compatibility."""
+        if self.tls_verify is not None:
+            warnings.warn(
+                "The 'tls_verify' config option is deprecated. Please use 'network.tls.verify' instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            # Convert string path to Path if needed
+            if isinstance(self.tls_verify, str):
+                verify_value: bool | Path = Path(self.tls_verify)
+            else:
+                verify_value = self.tls_verify
+            if self.network is None:
+                self.network = NetworkConfig(tls=TLSConfig(verify=verify_value))
+            elif self.network.tls is None:
+                self.network.tls = TLSConfig(verify=verify_value)
+        return self
     @classmethod
     def sample_run_config(
@@ -55,5 +70,9 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
             "base_url": base_url,
             "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
             "api_token": "${env.VLLM_API_TOKEN:=fake}",
-            "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
+            "network": {
+                "tls": {
+                    "verify": "${env.VLLM_TLS_VERIFY:=true}",
+                },
+            },
         }

llama_stack/providers/remote/inference/vllm/vllm.py CHANGED Viewed

@@ -73,9 +73,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
         except Exception as e:
             return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
-    def get_extra_client_params(self):
-        return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
     async def check_model_availability(self, model: str) -> bool:
         """
         Skip the check when running without authentication.

llama_stack/providers/remote/inference/watsonx/watsonx.py CHANGED Viewed

@@ -23,6 +23,7 @@ from llama_stack_api import (
     OpenAICompletionRequestWithExtraBody,
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
+    validate_embeddings_input_is_text,
 )
 logger = get_logger(name=__name__, category="providers::remote::watsonx")
@@ -147,6 +148,9 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         """
         Override parent method to add watsonx-specific parameters.
         """
+        # Validate that input contains only text, not token arrays
+        validate_embeddings_input_is_text(params)
         model_obj = await self.model_store.get_model(params.model)
         # Convert input to list if it's a string

llama_stack/providers/remote/post_training/nvidia/README.md ADDED Viewed

@@ -0,0 +1,151 @@
+# NVIDIA Post-Training Provider for LlamaStack
+This provider enables fine-tuning of LLMs using NVIDIA's NeMo Customizer service.
+## Features
+- Supervised fine-tuning of Llama models
+- LoRA fine-tuning support
+- Job management and status tracking
+## Getting Started
+### Prerequisites
+- LlamaStack with NVIDIA configuration
+- Access to Hosted NVIDIA NeMo Customizer service
+- Dataset registered in the Hosted NVIDIA NeMo Customizer service
+- Base model downloaded and available in the Hosted NVIDIA NeMo Customizer service
+### Setup
+Build the NVIDIA environment:
+```bash
+uv pip install llama-stack-client
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
+```
+### Basic Usage using the LlamaStack Python Client
+### Create Customization Job
+#### Initialize the client
+```python
+import os
+os.environ["NVIDIA_API_KEY"] = "your-api-key"
+os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test"
+os.environ["NVIDIA_DATASET_NAMESPACE"] = "default"
+os.environ["NVIDIA_PROJECT_ID"] = "test-project"
+os.environ["NVIDIA_OUTPUT_MODEL_DIR"] = "test-example-model@v1"
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+client = LlamaStackAsLibraryClient("nvidia")
+client.initialize()
+```
+#### Configure fine-tuning parameters
+```python
+from llama_stack_client.types.post_training_supervised_fine_tune_params import (
+    TrainingConfig,
+    TrainingConfigDataConfig,
+    TrainingConfigOptimizerConfig,
+)
+from llama_stack_client.types.algorithm_config_param import LoraFinetuningConfig
+```
+#### Set up LoRA configuration
+```python
+algorithm_config = LoraFinetuningConfig(type="LoRA", adapter_dim=16)
+```
+#### Configure training data
+```python
+data_config = TrainingConfigDataConfig(
+    dataset_id="your-dataset-id",  # Use client.datasets.list() to see available datasets
+    batch_size=16,
+)
+```
+#### Configure optimizer
+```python
+optimizer_config = TrainingConfigOptimizerConfig(
+    lr=0.0001,
+)
+```
+#### Set up training configuration
+```python
+training_config = TrainingConfig(
+    n_epochs=2,
+    data_config=data_config,
+    optimizer_config=optimizer_config,
+)
+```
+#### Start fine-tuning job
+```python
+training_job = client.post_training.supervised_fine_tune(
+    job_uuid="unique-job-id",
+    model="meta-llama/Llama-3.1-8B-Instruct",
+    checkpoint_dir="",
+    algorithm_config=algorithm_config,
+    training_config=training_config,
+    logger_config={},
+    hyperparam_search_config={},
+)
+```
+### List all jobs
+```python
+jobs = client.post_training.job.list()
+```
+###  Check job status
+```python
+job_status = client.post_training.job.status(job_uuid="your-job-id")
+```
+### Cancel a job
+```python
+client.post_training.job.cancel(job_uuid="your-job-id")
+```
+### Inference with the fine-tuned model
+#### 1. Register the model
+```python
+from llama_stack_api.models import Model, ModelType
+client.models.register(
+    model_id="test-example-model@v1",
+    provider_id="nvidia",
+    provider_model_id="test-example-model@v1",
+    model_type=ModelType.llm,
+)
+```
+#### 2. Inference with the fine-tuned model
+```python
+response = client.completions.create(
+    prompt="Complete the sentence using one word: Roses are red, violets are ",
+    stream=False,
+    model="test-example-model@v1",
+    max_tokens=50,
+)
+print(response.choices[0].text)
+```

llama_stack/providers/remote/post_training/nvidia/models.py CHANGED Viewed

@@ -5,23 +5,15 @@
 # the root directory of this source tree.
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.utils.inference.model_registry import (
-    ProviderModelEntry,
-    build_hf_repo_model_entry,
-)
+from llama_stack.providers.utils.inference.model_registry import build_hf_repo_model_entry
 _MODEL_ENTRIES = [
     build_hf_repo_model_entry(
         "meta/llama-3.1-8b-instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
+        "Llama3.1-8B-Instruct",
     ),
     build_hf_repo_model_entry(
         "meta/llama-3.2-1b-instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
+        "Llama3.2-1B-Instruct",
     ),
 ]
-def get_model_entries() -> list[ProviderModelEntry]:
-    return _MODEL_ENTRIES

llama_stack/providers/remote/post_training/nvidia/post_training.py CHANGED Viewed

@@ -14,13 +14,15 @@ from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostT
 from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack_api import (
-    AlgorithmConfig,
-    DPOAlignmentConfig,
+    CancelTrainingJobRequest,
+    GetTrainingJobArtifactsRequest,
+    GetTrainingJobStatusRequest,
     JobStatus,
     PostTrainingJob,
     PostTrainingJobArtifactsResponse,
     PostTrainingJobStatusResponse,
-    TrainingConfig,
+    PreferenceOptimizeRequest,
+    SupervisedFineTuneRequest,
 )
 from .models import _MODEL_ENTRIES
@@ -156,7 +158,9 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         return ListNvidiaPostTrainingJobs(data=jobs)
-    async def get_training_job_status(self, job_uuid: str) -> NvidiaPostTrainingJobStatusResponse:
+    async def get_training_job_status(
+        self, request: GetTrainingJobStatusRequest
+    ) -> NvidiaPostTrainingJobStatusResponse:
         """Get the status of a customization job.
         Updated the base class return type from PostTrainingJobResponse to NvidiaPostTrainingJob.
@@ -178,8 +182,8 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         """
         response = await self._make_request(
             "GET",
-            f"/v1/customization/jobs/{job_uuid}/status",
-            params={"job_id": job_uuid},
+            f"/v1/customization/jobs/{request.job_uuid}/status",
+            params={"job_id": request.job_uuid},
         )
         api_status = response.pop("status").lower()
@@ -187,18 +191,20 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         return NvidiaPostTrainingJobStatusResponse(
             status=JobStatus(mapped_status),
-            job_uuid=job_uuid,
+            job_uuid=request.job_uuid,
             started_at=datetime.fromisoformat(response.pop("created_at")),
             updated_at=datetime.fromisoformat(response.pop("updated_at")),
             **response,
         )
-    async def cancel_training_job(self, job_uuid: str) -> None:
+    async def cancel_training_job(self, request: CancelTrainingJobRequest) -> None:
         await self._make_request(
-            method="POST", path=f"/v1/customization/jobs/{job_uuid}/cancel", params={"job_id": job_uuid}
+            method="POST", path=f"/v1/customization/jobs/{request.job_uuid}/cancel", params={"job_id": request.job_uuid}
         )
-    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
+    async def get_training_job_artifacts(
+        self, request: GetTrainingJobArtifactsRequest
+    ) -> PostTrainingJobArtifactsResponse:
         raise NotImplementedError("Job artifacts are not implemented yet")
     async def get_post_training_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
@@ -206,13 +212,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
     async def supervised_fine_tune(
         self,
-        job_uuid: str,
-        training_config: dict[str, Any],
-        hyperparam_search_config: dict[str, Any],
-        logger_config: dict[str, Any],
-        model: str,
-        checkpoint_dir: str | None,
-        algorithm_config: AlgorithmConfig | None = None,
+        request: SupervisedFineTuneRequest,
     ) -> NvidiaPostTrainingJob:
         """
         Fine-tunes a model on a dataset.
@@ -300,13 +300,16 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
             User is informed about unsupported parameters via warnings.
         """
+        # Convert training_config to dict for internal processing
+        training_config = request.training_config.model_dump()
         # Check for unsupported method parameters
         unsupported_method_params = []
-        if checkpoint_dir:
-            unsupported_method_params.append(f"checkpoint_dir={checkpoint_dir}")
-        if hyperparam_search_config:
+        if request.checkpoint_dir:
+            unsupported_method_params.append(f"checkpoint_dir={request.checkpoint_dir}")
+        if request.hyperparam_search_config:
             unsupported_method_params.append("hyperparam_search_config")
-        if logger_config:
+        if request.logger_config:
             unsupported_method_params.append("logger_config")
         if unsupported_method_params:
@@ -344,7 +347,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         # Prepare base job configuration
         job_config = {
-            "config": model,
+            "config": request.model,
             "dataset": {
                 "name": training_config["data_config"]["dataset_id"],
                 "namespace": self.config.dataset_namespace,
@@ -388,14 +391,14 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
             job_config["hyperparameters"].pop("sft")
         # Handle LoRA-specific configuration
-        if algorithm_config:
-            if algorithm_config.type == "LoRA":
-                warn_unsupported_params(algorithm_config, supported_params["lora_config"], "LoRA config")
+        if request.algorithm_config:
+            if request.algorithm_config.type == "LoRA":
+                warn_unsupported_params(request.algorithm_config, supported_params["lora_config"], "LoRA config")
                 job_config["hyperparameters"]["lora"] = {
-                    k: v for k, v in {"alpha": algorithm_config.alpha}.items() if v is not None
+                    k: v for k, v in {"alpha": request.algorithm_config.alpha}.items() if v is not None
                 }
             else:
-                raise NotImplementedError(f"Unsupported algorithm config: {algorithm_config}")
+                raise NotImplementedError(f"Unsupported algorithm config: {request.algorithm_config}")
         # Create the customization job
         response = await self._make_request(
@@ -416,12 +419,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
     async def preference_optimize(
         self,
-        job_uuid: str,
-        finetuned_model: str,
-        algorithm_config: DPOAlignmentConfig,
-        training_config: TrainingConfig,
-        hyperparam_search_config: dict[str, Any],
-        logger_config: dict[str, Any],
+        request: PreferenceOptimizeRequest,
     ) -> PostTrainingJob:
         """Optimize a model based on preference data."""
         raise NotImplementedError("Preference optimization is not implemented yet")

llama_stack/providers/remote/safety/bedrock/bedrock.py CHANGED Viewed

@@ -5,12 +5,13 @@
 # the root directory of this source tree.
 import json
-from typing import Any
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
+from llama_stack.providers.utils.safety import ShieldToModerationMixin
 from llama_stack_api import (
-    OpenAIMessageParam,
+    GetShieldRequest,
+    RunShieldRequest,
     RunShieldResponse,
     Safety,
     SafetyViolation,
@@ -24,7 +25,7 @@ from .config import BedrockSafetyConfig
 logger = get_logger(name=__name__, category="safety::bedrock")
-class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate):
+class BedrockSafetyAdapter(ShieldToModerationMixin, Safety, ShieldsProtocolPrivate):
     def __init__(self, config: BedrockSafetyConfig) -> None:
         self.config = config
         self.registered_shields = []
@@ -55,49 +56,31 @@ class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate):
     async def unregister_shield(self, identifier: str) -> None:
         pass
-    async def run_shield(
-        self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] = None
-    ) -> RunShieldResponse:
-        shield = await self.shield_store.get_shield(shield_id)
+    async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse:
+        shield = await self.shield_store.get_shield(GetShieldRequest(identifier=request.shield_id))
         if not shield:
-            raise ValueError(f"Shield {shield_id} not found")
-        """
-        This is the implementation for the bedrock guardrails. The input to the guardrails is to be of this format
-        ```content = [
-            {
-                "text": {
-                    "text": "Is the AB503 Product a better investment than the S&P 500?"
-                }
-            }
-        ]```
-        Incoming messages contain content, role . For now we will extract the content and
-        default the "qualifiers": ["query"]
-        """
+            raise ValueError(f"Shield {request.shield_id} not found")
         shield_params = shield.params
-        logger.debug(f"run_shield::{shield_params}::messages={messages}")
+        logger.debug(f"run_shield::{shield_params}::messages={request.messages}")
-        # - convert the messages into format Bedrock expects
         content_messages = []
-        for message in messages:
+        for message in request.messages:
             content_messages.append({"text": {"text": message.content}})
         logger.debug(f"run_shield::final:messages::{json.dumps(content_messages, indent=2)}:")
         response = self.bedrock_runtime_client.apply_guardrail(
             guardrailIdentifier=shield.provider_resource_id,
             guardrailVersion=shield_params["guardrailVersion"],
-            source="OUTPUT",  # or 'INPUT' depending on your use case
+            source="OUTPUT",
             content=content_messages,
         )
         if response["action"] == "GUARDRAIL_INTERVENED":
             user_message = ""
             metadata = {}
             for output in response["outputs"]:
-                # guardrails returns a list - however for this implementation we will leverage the last values
                 user_message = output["text"]
             for assessment in response["assessments"]:
-                # guardrails returns a list - however for this implementation we will leverage the last values
                 metadata = dict(assessment)
             return RunShieldResponse(

llama_stack/providers/remote/safety/nvidia/README.md ADDED Viewed

@@ -0,0 +1,78 @@
+# NVIDIA Safety Provider for LlamaStack
+This provider enables safety checks and guardrails for LLM interactions using NVIDIA's NeMo Guardrails service.
+## Features
+- Run safety checks for messages
+## Getting Started
+### Prerequisites
+- LlamaStack with NVIDIA configuration
+- Access to NVIDIA NeMo Guardrails service
+- NIM for model to use for safety check is deployed
+### Setup
+Build the NVIDIA environment:
+```bash
+uv pip install llama-stack-client
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
+```
+### Basic Usage using the LlamaStack Python Client
+#### Initialize the client
+```python
+import os
+os.environ["NVIDIA_API_KEY"] = "your-api-key"
+os.environ["NVIDIA_GUARDRAILS_URL"] = "http://guardrails.test"
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+client = LlamaStackAsLibraryClient("nvidia")
+client.initialize()
+```
+#### Create a safety shield
+```python
+from llama_stack_api.safety import Shield
+from llama_stack_api.inference import Message
+# Create a safety shield
+shield = Shield(
+    shield_id="your-shield-id",
+    provider_resource_id="safety-model-id",  # The model to use for safety checks
+    description="Safety checks for content moderation",
+)
+# Register the shield
+await client.safety.register_shield(shield)
+```
+#### Run safety checks
+```python
+# Messages to check
+messages = [Message(role="user", content="Your message to check")]
+# Run safety check
+response = await client.safety.run_shield(
+    shield_id="your-shield-id",
+    messages=messages,
+)
+# Check for violations
+if response.violation:
+    print(f"Safety violation detected: {response.violation.user_message}")
+    print(f"Violation level: {response.violation.violation_level}")
+    print(f"Metadata: {response.violation.metadata}")
+else:
+    print("No safety violations detected")
+```

llama_stack/providers/remote/safety/nvidia/nvidia.py CHANGED Viewed

@@ -9,9 +9,11 @@ from typing import Any
 import requests
 from llama_stack.log import get_logger
+from llama_stack.providers.utils.safety import ShieldToModerationMixin
 from llama_stack_api import (
-    ModerationObject,
+    GetShieldRequest,
     OpenAIMessageParam,
+    RunShieldRequest,
     RunShieldResponse,
     Safety,
     SafetyViolation,
@@ -25,7 +27,7 @@ from .config import NVIDIASafetyConfig
 logger = get_logger(name=__name__, category="safety::nvidia")
-class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
+class NVIDIASafetyAdapter(ShieldToModerationMixin, Safety, ShieldsProtocolPrivate):
     def __init__(self, config: NVIDIASafetyConfig) -> None:
         """
         Initialize the NVIDIASafetyAdapter with a given safety configuration.
@@ -48,32 +50,14 @@ class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
     async def unregister_shield(self, identifier: str) -> None:
         pass
-    async def run_shield(
-        self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] | None = None
-    ) -> RunShieldResponse:
-        """
-        Run a safety shield check against the provided messages.
-        Args:
-            shield_id (str): The unique identifier for the shield to be used.
-            messages (List[Message]): A list of Message objects representing the conversation history.
-            params (Optional[dict[str, Any]]): Additional parameters for the shield check.
-        Returns:
-            RunShieldResponse: The response containing safety violation details if any.
-        Raises:
-            ValueError: If the shield with the provided shield_id is not found.
-        """
-        shield = await self.shield_store.get_shield(shield_id)
+    async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse:
+        """Run a safety shield check against the provided messages."""
+        shield = await self.shield_store.get_shield(GetShieldRequest(identifier=request.shield_id))
         if not shield:
-            raise ValueError(f"Shield {shield_id} not found")
+            raise ValueError(f"Shield {request.shield_id} not found")
         self.shield = NeMoGuardrails(self.config, shield.shield_id)
-        return await self.shield.run(messages)
-    async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
-        raise NotImplementedError("NVIDIA safety provider currently does not implement run_moderation")
+        return await self.shield.run(request.messages)
 class NeMoGuardrails:

llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

llama-stack 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl