PyPI - llama-stack - Versions diffs - 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

llama-stack 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (159) hide show

llama_stack/providers/remote/eval/nvidia/eval.py CHANGED Viewed

@@ -11,15 +11,19 @@ from llama_stack.providers.utils.inference.model_registry import ModelRegistryHe
 from llama_stack_api import (
     Agents,
     Benchmark,
-    BenchmarkConfig,
     BenchmarksProtocolPrivate,
     DatasetIO,
     Datasets,
     Eval,
     EvaluateResponse,
+    EvaluateRowsRequest,
     Inference,
     Job,
+    JobCancelRequest,
+    JobResultRequest,
     JobStatus,
+    JobStatusRequest,
+    RunEvalRequest,
     Scoring,
     ScoringResult,
 )
@@ -91,21 +95,20 @@ class NVIDIAEvalImpl(
     async def run_eval(
         self,
-        benchmark_id: str,
-        benchmark_config: BenchmarkConfig,
+        request: RunEvalRequest,
     ) -> Job:
         """Run an evaluation job for a benchmark."""
         model = (
-            benchmark_config.eval_candidate.model
-            if benchmark_config.eval_candidate.type == "model"
-            else benchmark_config.eval_candidate.config.model
+            request.benchmark_config.eval_candidate.model
+            if request.benchmark_config.eval_candidate.type == "model"
+            else request.benchmark_config.eval_candidate.config.model
         )
         nvidia_model = self.get_provider_model_id(model) or model
         result = await self._evaluator_post(
             "/v1/evaluation/jobs",
             {
-                "config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
+                "config": f"{DEFAULT_NAMESPACE}/{request.benchmark_id}",
                 "target": {"type": "model", "model": nvidia_model},
             },
         )
@@ -114,20 +117,17 @@ class NVIDIAEvalImpl(
     async def evaluate_rows(
         self,
-        benchmark_id: str,
-        input_rows: list[dict[str, Any]],
-        scoring_functions: list[str],
-        benchmark_config: BenchmarkConfig,
+        request: EvaluateRowsRequest,
     ) -> EvaluateResponse:
         raise NotImplementedError()
-    async def job_status(self, benchmark_id: str, job_id: str) -> Job:
+    async def job_status(self, request: JobStatusRequest) -> Job:
         """Get the status of an evaluation job.
         EvaluatorStatus: "created", "pending", "running", "cancelled", "cancelling", "failed", "completed".
         JobStatus: "scheduled", "in_progress", "completed", "cancelled", "failed"
         """
-        result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}")
+        result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}")
         result_status = result["status"]
         job_status = JobStatus.failed
@@ -140,27 +140,28 @@ class NVIDIAEvalImpl(
         elif result_status in ["cancelled"]:
             job_status = JobStatus.cancelled
-        return Job(job_id=job_id, status=job_status)
+        return Job(job_id=request.job_id, status=job_status)
-    async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
+    async def job_cancel(self, request: JobCancelRequest) -> None:
         """Cancel the evaluation job."""
-        await self._evaluator_post(f"/v1/evaluation/jobs/{job_id}/cancel", {})
+        await self._evaluator_post(f"/v1/evaluation/jobs/{request.job_id}/cancel", {})
-    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
+    async def job_result(self, request: JobResultRequest) -> EvaluateResponse:
         """Returns the results of the evaluation job."""
-        job = await self.job_status(benchmark_id, job_id)
+        job_status_request = JobStatusRequest(benchmark_id=request.benchmark_id, job_id=request.job_id)
+        job = await self.job_status(job_status_request)
         status = job.status
         if not status or status != JobStatus.completed:
-            raise ValueError(f"Job {job_id} not completed. Status: {status.value}")
+            raise ValueError(f"Job {request.job_id} not completed. Status: {status.value}")
-        result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}/results")
+        result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}/results")
         return EvaluateResponse(
             # TODO: these are stored in detailed results on NeMo Evaluator side; can be added
             generations=[],
             scores={
-                benchmark_id: ScoringResult(
+                request.benchmark_id: ScoringResult(
                     score_rows=[],
                     aggregated_results=result,
                 )

llama_stack/providers/remote/files/s3/config.py CHANGED Viewed

@@ -6,7 +6,7 @@
 from typing import Any
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, SecretStr
 from llama_stack.core.storage.datatypes import SqlStoreReference
@@ -16,8 +16,10 @@ class S3FilesImplConfig(BaseModel):
     bucket_name: str = Field(description="S3 bucket name to store files")
     region: str = Field(default="us-east-1", description="AWS region where the bucket is located")
-    aws_access_key_id: str | None = Field(default=None, description="AWS access key ID (optional if using IAM roles)")
-    aws_secret_access_key: str | None = Field(
+    aws_access_key_id: SecretStr | None = Field(
+        default=None, description="AWS access key ID (optional if using IAM roles)"
+    )
+    aws_secret_access_key: SecretStr | None = Field(
         default=None, description="AWS secret access key (optional if using IAM roles)"
     )
     endpoint_url: str | None = Field(default=None, description="Custom S3 endpoint URL (for MinIO, LocalStack, etc.)")

llama_stack/providers/remote/files/s3/files.py CHANGED Viewed

@@ -57,8 +57,8 @@ def _create_s3_client(config: S3FilesImplConfig) -> "S3Client":
         if config.aws_access_key_id and config.aws_secret_access_key:
             s3_config.update(
                 {
-                    "aws_access_key_id": config.aws_access_key_id,
-                    "aws_secret_access_key": config.aws_secret_access_key,
+                    "aws_access_key_id": config.aws_access_key_id.get_secret_value(),
+                    "aws_secret_access_key": config.aws_secret_access_key.get_secret_value(),
                 }
             )

llama_stack/providers/remote/inference/gemini/gemini.py CHANGED Viewed

@@ -12,6 +12,7 @@ from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
+    validate_embeddings_input_is_text,
 )
 from .config import GeminiConfig
@@ -37,6 +38,9 @@ class GeminiInferenceAdapter(OpenAIMixin):
         Override embeddings method to handle Gemini's missing usage statistics.
         Gemini's embedding API doesn't return usage information, so we provide default values.
         """
+        # Validate that input contains only text, not token arrays
+        validate_embeddings_input_is_text(params)
         # Build request params conditionally to avoid NotGiven/Omit type mismatch
         request_params: dict[str, Any] = {
             "model": await self._get_provider_model_id(params.model),

llama_stack/providers/remote/inference/openai/openai.py CHANGED Viewed

@@ -24,6 +24,8 @@ class OpenAIInferenceAdapter(OpenAIMixin):
     provider_data_api_key_field: str = "openai_api_key"
+    supports_tokenized_embeddings_input: bool = True
     embedding_model_metadata: dict[str, dict[str, int]] = {
         "text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
         "text-embedding-3-large": {"embedding_dimension": 3072, "context_length": 8192},

llama_stack/providers/remote/inference/together/together.py CHANGED Viewed

@@ -18,6 +18,7 @@ from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
+    validate_embeddings_input_is_text,
 )
 from .config import TogetherImplConfig
@@ -74,6 +75,9 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
          - does not support user param, returns 400 Unrecognized request arguments supplied: user
          - does not support dimensions param, returns 400 Unrecognized request arguments supplied: dimensions
         """
+        # Validate that input contains only text, not token arrays
+        validate_embeddings_input_is_text(params)
         # Together support ticket #13332 -> will not fix
         if params.user is not None:
             raise ValueError("Together's embeddings endpoint does not support user param.")

llama_stack/providers/remote/inference/vertexai/config.py CHANGED Viewed

@@ -19,7 +19,7 @@ class VertexAIProviderDataValidator(BaseModel):
     )
     vertex_location: str | None = Field(
         default=None,
-        description="Google Cloud location for Vertex AI (e.g., us-central1)",
+        description="Google Cloud location for Vertex AI (e.g., global)",
     )
@@ -31,7 +31,7 @@ class VertexAIConfig(RemoteInferenceProviderConfig):
         description="Google Cloud project ID for Vertex AI",
     )
     location: str = Field(
-        default="us-central1",
+        default="global",
         description="Google Cloud location for Vertex AI",
     )
@@ -39,7 +39,7 @@ class VertexAIConfig(RemoteInferenceProviderConfig):
     def sample_run_config(
         cls,
         project: str = "${env.VERTEX_AI_PROJECT:=}",
-        location: str = "${env.VERTEX_AI_LOCATION:=us-central1}",
+        location: str = "${env.VERTEX_AI_LOCATION:=global}",
         **kwargs,
     ) -> dict[str, Any]:
         return {

llama_stack/providers/remote/inference/vertexai/vertexai.py CHANGED Viewed

@@ -40,9 +40,12 @@ class VertexAIInferenceAdapter(OpenAIMixin):
         Get the Vertex AI OpenAI-compatible API base URL.
         Returns the Vertex AI OpenAI-compatible endpoint URL.
-        Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
+        Source: https://docs.cloud.google.com/vertex-ai/generative-ai/docs/start/openai
         """
-        return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
+        if not self.config.location or self.config.location == "global":
+            return f"https://aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/global/endpoints/openapi"
+        else:
+            return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
     async def list_provider_model_ids(self) -> Iterable[str]:
         """

llama_stack/providers/remote/inference/vllm/config.py CHANGED Viewed

@@ -4,11 +4,16 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+import warnings
 from pathlib import Path
-from pydantic import Field, HttpUrl, SecretStr, field_validator
+from pydantic import Field, HttpUrl, SecretStr, model_validator
-from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
+from llama_stack.providers.utils.inference.model_registry import (
+    NetworkConfig,
+    RemoteInferenceProviderConfig,
+    TLSConfig,
+)
 from llama_stack_api import json_schema_type
@@ -27,23 +32,33 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
         alias="api_token",
         description="The API token",
     )
-    tls_verify: bool | str = Field(
-        default=True,
-        description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
+    tls_verify: bool | str | None = Field(
+        default=None,
+        deprecated=True,
+        description="DEPRECATED: Use 'network.tls.verify' instead. Whether to verify TLS certificates. "
+        "Can be a boolean or a path to a CA certificate file.",
     )
-    @field_validator("tls_verify")
-    @classmethod
-    def validate_tls_verify(cls, v):
-        if isinstance(v, str):
-            # Otherwise, treat it as a cert path
-            cert_path = Path(v).expanduser().resolve()
-            if not cert_path.exists():
-                raise ValueError(f"TLS certificate file does not exist: {v}")
-            if not cert_path.is_file():
-                raise ValueError(f"TLS certificate path is not a file: {v}")
-            return v
-        return v
+    @model_validator(mode="after")
+    def migrate_tls_verify_to_network(self) -> "VLLMInferenceAdapterConfig":
+        """Migrate legacy tls_verify to network.tls.verify for backward compatibility."""
+        if self.tls_verify is not None:
+            warnings.warn(
+                "The 'tls_verify' config option is deprecated. Please use 'network.tls.verify' instead.",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+            # Convert string path to Path if needed
+            if isinstance(self.tls_verify, str):
+                verify_value: bool | Path = Path(self.tls_verify)
+            else:
+                verify_value = self.tls_verify
+            if self.network is None:
+                self.network = NetworkConfig(tls=TLSConfig(verify=verify_value))
+            elif self.network.tls is None:
+                self.network.tls = TLSConfig(verify=verify_value)
+        return self
     @classmethod
     def sample_run_config(
@@ -55,5 +70,9 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
             "base_url": base_url,
             "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
             "api_token": "${env.VLLM_API_TOKEN:=fake}",
-            "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
+            "network": {
+                "tls": {
+                    "verify": "${env.VLLM_TLS_VERIFY:=true}",
+                },
+            },
         }

llama_stack/providers/remote/inference/vllm/vllm.py CHANGED Viewed

@@ -73,9 +73,6 @@ class VLLMInferenceAdapter(OpenAIMixin):
         except Exception as e:
             return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
-    def get_extra_client_params(self):
-        return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
     async def check_model_availability(self, model: str) -> bool:
         """
         Skip the check when running without authentication.

llama_stack/providers/remote/inference/watsonx/watsonx.py CHANGED Viewed

@@ -23,6 +23,7 @@ from llama_stack_api import (
     OpenAICompletionRequestWithExtraBody,
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
+    validate_embeddings_input_is_text,
 )
 logger = get_logger(name=__name__, category="providers::remote::watsonx")
@@ -147,6 +148,9 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
         """
         Override parent method to add watsonx-specific parameters.
         """
+        # Validate that input contains only text, not token arrays
+        validate_embeddings_input_is_text(params)
         model_obj = await self.model_store.get_model(params.model)
         # Convert input to list if it's a string

llama_stack/providers/remote/post_training/nvidia/models.py CHANGED Viewed

@@ -5,23 +5,15 @@
 # the root directory of this source tree.
-from llama_stack.models.llama.sku_types import CoreModelId
-from llama_stack.providers.utils.inference.model_registry import (
-    ProviderModelEntry,
-    build_hf_repo_model_entry,
-)
+from llama_stack.providers.utils.inference.model_registry import build_hf_repo_model_entry
 _MODEL_ENTRIES = [
     build_hf_repo_model_entry(
         "meta/llama-3.1-8b-instruct",
-        CoreModelId.llama3_1_8b_instruct.value,
+        "Llama3.1-8B-Instruct",
     ),
     build_hf_repo_model_entry(
         "meta/llama-3.2-1b-instruct",
-        CoreModelId.llama3_2_1b_instruct.value,
+        "Llama3.2-1B-Instruct",
     ),
 ]
-def get_model_entries() -> list[ProviderModelEntry]:
-    return _MODEL_ENTRIES

llama_stack/providers/remote/post_training/nvidia/post_training.py CHANGED Viewed

@@ -14,13 +14,15 @@ from llama_stack.providers.remote.post_training.nvidia.config import NvidiaPostT
 from llama_stack.providers.remote.post_training.nvidia.utils import warn_unsupported_params
 from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
 from llama_stack_api import (
-    AlgorithmConfig,
-    DPOAlignmentConfig,
+    CancelTrainingJobRequest,
+    GetTrainingJobArtifactsRequest,
+    GetTrainingJobStatusRequest,
     JobStatus,
     PostTrainingJob,
     PostTrainingJobArtifactsResponse,
     PostTrainingJobStatusResponse,
-    TrainingConfig,
+    PreferenceOptimizeRequest,
+    SupervisedFineTuneRequest,
 )
 from .models import _MODEL_ENTRIES
@@ -156,7 +158,9 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         return ListNvidiaPostTrainingJobs(data=jobs)
-    async def get_training_job_status(self, job_uuid: str) -> NvidiaPostTrainingJobStatusResponse:
+    async def get_training_job_status(
+        self, request: GetTrainingJobStatusRequest
+    ) -> NvidiaPostTrainingJobStatusResponse:
         """Get the status of a customization job.
         Updated the base class return type from PostTrainingJobResponse to NvidiaPostTrainingJob.
@@ -178,8 +182,8 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         """
         response = await self._make_request(
             "GET",
-            f"/v1/customization/jobs/{job_uuid}/status",
-            params={"job_id": job_uuid},
+            f"/v1/customization/jobs/{request.job_uuid}/status",
+            params={"job_id": request.job_uuid},
         )
         api_status = response.pop("status").lower()
@@ -187,18 +191,20 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         return NvidiaPostTrainingJobStatusResponse(
             status=JobStatus(mapped_status),
-            job_uuid=job_uuid,
+            job_uuid=request.job_uuid,
             started_at=datetime.fromisoformat(response.pop("created_at")),
             updated_at=datetime.fromisoformat(response.pop("updated_at")),
             **response,
         )
-    async def cancel_training_job(self, job_uuid: str) -> None:
+    async def cancel_training_job(self, request: CancelTrainingJobRequest) -> None:
         await self._make_request(
-            method="POST", path=f"/v1/customization/jobs/{job_uuid}/cancel", params={"job_id": job_uuid}
+            method="POST", path=f"/v1/customization/jobs/{request.job_uuid}/cancel", params={"job_id": request.job_uuid}
         )
-    async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
+    async def get_training_job_artifacts(
+        self, request: GetTrainingJobArtifactsRequest
+    ) -> PostTrainingJobArtifactsResponse:
         raise NotImplementedError("Job artifacts are not implemented yet")
     async def get_post_training_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
@@ -206,13 +212,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
     async def supervised_fine_tune(
         self,
-        job_uuid: str,
-        training_config: dict[str, Any],
-        hyperparam_search_config: dict[str, Any],
-        logger_config: dict[str, Any],
-        model: str,
-        checkpoint_dir: str | None,
-        algorithm_config: AlgorithmConfig | None = None,
+        request: SupervisedFineTuneRequest,
     ) -> NvidiaPostTrainingJob:
         """
         Fine-tunes a model on a dataset.
@@ -300,13 +300,16 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
             User is informed about unsupported parameters via warnings.
         """
+        # Convert training_config to dict for internal processing
+        training_config = request.training_config.model_dump()
         # Check for unsupported method parameters
         unsupported_method_params = []
-        if checkpoint_dir:
-            unsupported_method_params.append(f"checkpoint_dir={checkpoint_dir}")
-        if hyperparam_search_config:
+        if request.checkpoint_dir:
+            unsupported_method_params.append(f"checkpoint_dir={request.checkpoint_dir}")
+        if request.hyperparam_search_config:
             unsupported_method_params.append("hyperparam_search_config")
-        if logger_config:
+        if request.logger_config:
             unsupported_method_params.append("logger_config")
         if unsupported_method_params:
@@ -344,7 +347,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
         # Prepare base job configuration
         job_config = {
-            "config": model,
+            "config": request.model,
             "dataset": {
                 "name": training_config["data_config"]["dataset_id"],
                 "namespace": self.config.dataset_namespace,
@@ -388,14 +391,14 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
             job_config["hyperparameters"].pop("sft")
         # Handle LoRA-specific configuration
-        if algorithm_config:
-            if algorithm_config.type == "LoRA":
-                warn_unsupported_params(algorithm_config, supported_params["lora_config"], "LoRA config")
+        if request.algorithm_config:
+            if request.algorithm_config.type == "LoRA":
+                warn_unsupported_params(request.algorithm_config, supported_params["lora_config"], "LoRA config")
                 job_config["hyperparameters"]["lora"] = {
-                    k: v for k, v in {"alpha": algorithm_config.alpha}.items() if v is not None
+                    k: v for k, v in {"alpha": request.algorithm_config.alpha}.items() if v is not None
                 }
             else:
-                raise NotImplementedError(f"Unsupported algorithm config: {algorithm_config}")
+                raise NotImplementedError(f"Unsupported algorithm config: {request.algorithm_config}")
         # Create the customization job
         response = await self._make_request(
@@ -416,12 +419,7 @@ class NvidiaPostTrainingAdapter(ModelRegistryHelper):
     async def preference_optimize(
         self,
-        job_uuid: str,
-        finetuned_model: str,
-        algorithm_config: DPOAlignmentConfig,
-        training_config: TrainingConfig,
-        hyperparam_search_config: dict[str, Any],
-        logger_config: dict[str, Any],
+        request: PreferenceOptimizeRequest,
     ) -> PostTrainingJob:
         """Optimize a model based on preference data."""
         raise NotImplementedError("Preference optimization is not implemented yet")

llama_stack/providers/remote/safety/bedrock/bedrock.py CHANGED Viewed

@@ -5,12 +5,13 @@
 # the root directory of this source tree.
 import json
-from typing import Any
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.bedrock.client import create_bedrock_client
+from llama_stack.providers.utils.safety import ShieldToModerationMixin
 from llama_stack_api import (
-    OpenAIMessageParam,
+    GetShieldRequest,
+    RunShieldRequest,
     RunShieldResponse,
     Safety,
     SafetyViolation,
@@ -24,7 +25,7 @@ from .config import BedrockSafetyConfig
 logger = get_logger(name=__name__, category="safety::bedrock")
-class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate):
+class BedrockSafetyAdapter(ShieldToModerationMixin, Safety, ShieldsProtocolPrivate):
     def __init__(self, config: BedrockSafetyConfig) -> None:
         self.config = config
         self.registered_shields = []
@@ -55,49 +56,31 @@ class BedrockSafetyAdapter(Safety, ShieldsProtocolPrivate):
     async def unregister_shield(self, identifier: str) -> None:
         pass
-    async def run_shield(
-        self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] = None
-    ) -> RunShieldResponse:
-        shield = await self.shield_store.get_shield(shield_id)
+    async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse:
+        shield = await self.shield_store.get_shield(GetShieldRequest(identifier=request.shield_id))
         if not shield:
-            raise ValueError(f"Shield {shield_id} not found")
-        """
-        This is the implementation for the bedrock guardrails. The input to the guardrails is to be of this format
-        ```content = [
-            {
-                "text": {
-                    "text": "Is the AB503 Product a better investment than the S&P 500?"
-                }
-            }
-        ]```
-        Incoming messages contain content, role . For now we will extract the content and
-        default the "qualifiers": ["query"]
-        """
+            raise ValueError(f"Shield {request.shield_id} not found")
         shield_params = shield.params
-        logger.debug(f"run_shield::{shield_params}::messages={messages}")
+        logger.debug(f"run_shield::{shield_params}::messages={request.messages}")
-        # - convert the messages into format Bedrock expects
         content_messages = []
-        for message in messages:
+        for message in request.messages:
             content_messages.append({"text": {"text": message.content}})
         logger.debug(f"run_shield::final:messages::{json.dumps(content_messages, indent=2)}:")
         response = self.bedrock_runtime_client.apply_guardrail(
             guardrailIdentifier=shield.provider_resource_id,
             guardrailVersion=shield_params["guardrailVersion"],
-            source="OUTPUT",  # or 'INPUT' depending on your use case
+            source="OUTPUT",
             content=content_messages,
         )
         if response["action"] == "GUARDRAIL_INTERVENED":
             user_message = ""
             metadata = {}
             for output in response["outputs"]:
-                # guardrails returns a list - however for this implementation we will leverage the last values
                 user_message = output["text"]
             for assessment in response["assessments"]:
-                # guardrails returns a list - however for this implementation we will leverage the last values
                 metadata = dict(assessment)
             return RunShieldResponse(

llama_stack/providers/remote/safety/nvidia/nvidia.py CHANGED Viewed

@@ -9,9 +9,11 @@ from typing import Any
 import requests
 from llama_stack.log import get_logger
+from llama_stack.providers.utils.safety import ShieldToModerationMixin
 from llama_stack_api import (
-    ModerationObject,
+    GetShieldRequest,
     OpenAIMessageParam,
+    RunShieldRequest,
     RunShieldResponse,
     Safety,
     SafetyViolation,
@@ -25,7 +27,7 @@ from .config import NVIDIASafetyConfig
 logger = get_logger(name=__name__, category="safety::nvidia")
-class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
+class NVIDIASafetyAdapter(ShieldToModerationMixin, Safety, ShieldsProtocolPrivate):
     def __init__(self, config: NVIDIASafetyConfig) -> None:
         """
         Initialize the NVIDIASafetyAdapter with a given safety configuration.
@@ -48,32 +50,14 @@ class NVIDIASafetyAdapter(Safety, ShieldsProtocolPrivate):
     async def unregister_shield(self, identifier: str) -> None:
         pass
-    async def run_shield(
-        self, shield_id: str, messages: list[OpenAIMessageParam], params: dict[str, Any] | None = None
-    ) -> RunShieldResponse:
-        """
-        Run a safety shield check against the provided messages.
-        Args:
-            shield_id (str): The unique identifier for the shield to be used.
-            messages (List[Message]): A list of Message objects representing the conversation history.
-            params (Optional[dict[str, Any]]): Additional parameters for the shield check.
-        Returns:
-            RunShieldResponse: The response containing safety violation details if any.
-        Raises:
-            ValueError: If the shield with the provided shield_id is not found.
-        """
-        shield = await self.shield_store.get_shield(shield_id)
+    async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse:
+        """Run a safety shield check against the provided messages."""
+        shield = await self.shield_store.get_shield(GetShieldRequest(identifier=request.shield_id))
         if not shield:
-            raise ValueError(f"Shield {shield_id} not found")
+            raise ValueError(f"Shield {request.shield_id} not found")
         self.shield = NeMoGuardrails(self.config, shield.shield_id)
-        return await self.shield.run(messages)
-    async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
-        raise NotImplementedError("NVIDIA safety provider currently does not implement run_moderation")
+        return await self.shield.run(request.messages)
 class NeMoGuardrails:

llama-stack 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

llama-stack 0.4.4py3-none-any.whl → 0.5.0py3-none-any.whl