PyPI - llama-stack - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl - Mend

llama-stack 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (311) hide show

llama_stack/cli/stack/_list_deps.py +11 -7
llama_stack/cli/stack/run.py +3 -25
llama_stack/core/access_control/datatypes.py +78 -0
llama_stack/core/configure.py +2 -2
{llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
llama_stack/core/connectors/connectors.py +162 -0
llama_stack/core/conversations/conversations.py +61 -58
llama_stack/core/datatypes.py +54 -8
llama_stack/core/library_client.py +60 -13
llama_stack/core/prompts/prompts.py +43 -42
llama_stack/core/routers/datasets.py +20 -17
llama_stack/core/routers/eval_scoring.py +143 -53
llama_stack/core/routers/inference.py +20 -9
llama_stack/core/routers/safety.py +30 -42
llama_stack/core/routers/vector_io.py +15 -7
llama_stack/core/routing_tables/models.py +42 -3
llama_stack/core/routing_tables/scoring_functions.py +19 -19
llama_stack/core/routing_tables/shields.py +20 -17
llama_stack/core/routing_tables/vector_stores.py +8 -5
llama_stack/core/server/auth.py +192 -17
llama_stack/core/server/fastapi_router_registry.py +40 -5
llama_stack/core/server/server.py +24 -5
llama_stack/core/stack.py +54 -10
llama_stack/core/storage/datatypes.py +9 -0
llama_stack/core/store/registry.py +1 -1
llama_stack/core/utils/exec.py +2 -2
llama_stack/core/utils/type_inspection.py +16 -2
llama_stack/distributions/dell/config.yaml +4 -1
llama_stack/distributions/dell/doc_template.md +209 -0
llama_stack/distributions/dell/run-with-safety.yaml +4 -1
llama_stack/distributions/nvidia/config.yaml +4 -1
llama_stack/distributions/nvidia/doc_template.md +170 -0
llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
llama_stack/distributions/oci/config.yaml +4 -1
llama_stack/distributions/oci/doc_template.md +140 -0
llama_stack/distributions/open-benchmark/config.yaml +9 -1
llama_stack/distributions/postgres-demo/config.yaml +1 -1
llama_stack/distributions/starter/build.yaml +62 -0
llama_stack/distributions/starter/config.yaml +22 -3
llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/starter/starter.py +13 -1
llama_stack/distributions/starter-gpu/build.yaml +62 -0
llama_stack/distributions/starter-gpu/config.yaml +22 -3
llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/template.py +10 -2
llama_stack/distributions/watsonx/config.yaml +4 -1
llama_stack/log.py +1 -0
llama_stack/models/llama/resources/dog.jpg +0 -0
llama_stack/models/llama/resources/pasta.jpeg +0 -0
llama_stack/models/llama/resources/small_dog.jpg +0 -0
llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
llama_stack/providers/inline/batches/reference/batches.py +2 -1
llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
llama_stack/providers/registry/agents.py +1 -0
llama_stack/providers/registry/inference.py +1 -9
llama_stack/providers/registry/vector_io.py +136 -16
llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
llama_stack/providers/remote/eval/nvidia/README.md +134 -0
llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
llama_stack/providers/remote/files/s3/README.md +266 -0
llama_stack/providers/remote/files/s3/config.py +5 -3
llama_stack/providers/remote/files/s3/files.py +2 -2
llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
llama_stack/providers/remote/inference/openai/openai.py +2 -0
llama_stack/providers/remote/inference/together/together.py +4 -0
llama_stack/providers/remote/inference/vertexai/config.py +3 -3
llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
llama_stack/providers/remote/inference/vllm/config.py +37 -18
llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
llama_stack/providers/remote/safety/nvidia/README.md +78 -0
llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
llama_stack/providers/remote/vector_io/oci/config.py +41 -0
llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
llama_stack/providers/utils/bedrock/client.py +3 -3
llama_stack/providers/utils/bedrock/config.py +7 -7
llama_stack/providers/utils/inference/__init__.py +0 -25
llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
llama_stack/providers/utils/inference/http_client.py +239 -0
llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
llama_stack/providers/utils/inference/model_registry.py +148 -2
llama_stack/providers/utils/inference/openai_compat.py +1 -158
llama_stack/providers/utils/inference/openai_mixin.py +42 -2
llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
llama_stack/providers/utils/memory/vector_store.py +46 -19
llama_stack/providers/utils/responses/responses_store.py +40 -6
llama_stack/providers/utils/safety.py +114 -0
llama_stack/providers/utils/tools/mcp.py +44 -3
llama_stack/testing/api_recorder.py +9 -3
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
llama_stack-0.5.0.dist-info/top_level.txt +1 -0
llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
llama_stack/models/llama/hadamard_utils.py +0 -88
llama_stack/models/llama/llama3/args.py +0 -74
llama_stack/models/llama/llama3/generation.py +0 -378
llama_stack/models/llama/llama3/model.py +0 -304
llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
llama_stack/models/llama/llama3/quantization/loader.py +0 -316
llama_stack/models/llama/llama3_1/__init__.py +0 -12
llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
llama_stack/models/llama/llama3_1/prompts.py +0 -258
llama_stack/models/llama/llama3_2/__init__.py +0 -5
llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
llama_stack/models/llama/llama3_3/__init__.py +0 -5
llama_stack/models/llama/llama3_3/prompts.py +0 -259
llama_stack/models/llama/llama4/args.py +0 -107
llama_stack/models/llama/llama4/ffn.py +0 -58
llama_stack/models/llama/llama4/moe.py +0 -214
llama_stack/models/llama/llama4/preprocess.py +0 -435
llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
llama_stack/models/llama/llama4/quantization/loader.py +0 -226
llama_stack/models/llama/llama4/vision/__init__.py +0 -5
llama_stack/models/llama/llama4/vision/embedding.py +0 -210
llama_stack/models/llama/llama4/vision/encoder.py +0 -412
llama_stack/models/llama/quantize_impls.py +0 -316
llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
llama_stack-0.4.3.dist-info/top_level.txt +0 -2
llama_stack_api/__init__.py +0 -945
llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/admin/api.py +0 -72
llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/admin/models.py +0 -113
llama_stack_api/agents.py +0 -173
llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/batches/api.py +0 -53
llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/batches/models.py +0 -78
llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/common/__init__.py +0 -5
llama_stack_api/common/content_types.py +0 -101
llama_stack_api/common/errors.py +0 -95
llama_stack_api/common/job_types.py +0 -38
llama_stack_api/common/responses.py +0 -77
llama_stack_api/common/training_types.py +0 -47
llama_stack_api/common/type_system.py +0 -146
llama_stack_api/connectors.py +0 -146
llama_stack_api/conversations.py +0 -270
llama_stack_api/datasetio.py +0 -55
llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/datasets/api.py +0 -35
llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/datasets/models.py +0 -152
llama_stack_api/datatypes.py +0 -373
llama_stack_api/eval.py +0 -137
llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/files/__init__.py +0 -35
llama_stack_api/files/api.py +0 -51
llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/files/models.py +0 -107
llama_stack_api/inference.py +0 -1169
llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/__init__.py +0 -945
llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/llama_stack_api/admin/api.py +0 -72
llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/llama_stack_api/admin/models.py +0 -113
llama_stack_api/llama_stack_api/agents.py +0 -173
llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/llama_stack_api/batches/api.py +0 -53
llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/llama_stack_api/batches/models.py +0 -78
llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/llama_stack_api/common/__init__.py +0 -5
llama_stack_api/llama_stack_api/common/content_types.py +0 -101
llama_stack_api/llama_stack_api/common/errors.py +0 -95
llama_stack_api/llama_stack_api/common/job_types.py +0 -38
llama_stack_api/llama_stack_api/common/responses.py +0 -77
llama_stack_api/llama_stack_api/common/training_types.py +0 -47
llama_stack_api/llama_stack_api/common/type_system.py +0 -146
llama_stack_api/llama_stack_api/connectors.py +0 -146
llama_stack_api/llama_stack_api/conversations.py +0 -270
llama_stack_api/llama_stack_api/datasetio.py +0 -55
llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/llama_stack_api/datasets/api.py +0 -35
llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/llama_stack_api/datasets/models.py +0 -152
llama_stack_api/llama_stack_api/datatypes.py +0 -373
llama_stack_api/llama_stack_api/eval.py +0 -137
llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/llama_stack_api/files/__init__.py +0 -35
llama_stack_api/llama_stack_api/files/api.py +0 -51
llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/llama_stack_api/files/models.py +0 -107
llama_stack_api/llama_stack_api/inference.py +0 -1169
llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/models.py +0 -171
llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/llama_stack_api/post_training.py +0 -370
llama_stack_api/llama_stack_api/prompts.py +0 -203
llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/llama_stack_api/providers/api.py +0 -16
llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/llama_stack_api/providers/models.py +0 -24
llama_stack_api/llama_stack_api/py.typed +0 -0
llama_stack_api/llama_stack_api/rag_tool.py +0 -168
llama_stack_api/llama_stack_api/resource.py +0 -37
llama_stack_api/llama_stack_api/router_utils.py +0 -160
llama_stack_api/llama_stack_api/safety.py +0 -132
llama_stack_api/llama_stack_api/schema_utils.py +0 -208
llama_stack_api/llama_stack_api/scoring.py +0 -93
llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/llama_stack_api/shields.py +0 -93
llama_stack_api/llama_stack_api/tools.py +0 -226
llama_stack_api/llama_stack_api/vector_io.py +0 -941
llama_stack_api/llama_stack_api/vector_stores.py +0 -53
llama_stack_api/llama_stack_api/version.py +0 -9
llama_stack_api/models.py +0 -171
llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/post_training.py +0 -370
llama_stack_api/prompts.py +0 -203
llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/providers/api.py +0 -16
llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/providers/models.py +0 -24
llama_stack_api/py.typed +0 -0
llama_stack_api/rag_tool.py +0 -168
llama_stack_api/resource.py +0 -37
llama_stack_api/router_utils.py +0 -160
llama_stack_api/safety.py +0 -132
llama_stack_api/schema_utils.py +0 -208
llama_stack_api/scoring.py +0 -93
llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/shields.py +0 -93
llama_stack_api/tools.py +0 -226
llama_stack_api/vector_io.py +0 -941
llama_stack_api/vector_stores.py +0 -53
llama_stack_api/version.py +0 -9
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0

llama_stack/providers/utils/inference/http_client.py ADDED Viewed

@@ -0,0 +1,239 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import ssl
+from pathlib import Path
+from typing import Any
+import httpx
+from openai._base_client import DefaultAsyncHttpxClient
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.model_registry import (
+    NetworkConfig,
+    ProxyConfig,
+    TimeoutConfig,
+    TLSConfig,
+)
+logger = get_logger(name=__name__, category="providers::utils")
+def _build_ssl_context(tls_config: TLSConfig) -> ssl.SSLContext | bool | Path:
+    """
+    Build an SSL context from TLS configuration.
+    Returns:
+        - ssl.SSLContext if advanced options (min_version, ciphers, or mTLS) are configured
+        - Path if only a CA bundle path is specified
+        - bool if only verify is specified as boolean
+    """
+    has_advanced_options = (
+        tls_config.min_version is not None or tls_config.ciphers is not None or tls_config.client_cert is not None
+    )
+    if not has_advanced_options:
+        return tls_config.verify
+    ctx = ssl.create_default_context()
+    if isinstance(tls_config.verify, Path):
+        ctx.load_verify_locations(str(tls_config.verify))
+    elif not tls_config.verify:
+        ctx.check_hostname = False
+        ctx.verify_mode = ssl.CERT_NONE
+    if tls_config.min_version:
+        if tls_config.min_version == "TLSv1.2":
+            ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+        elif tls_config.min_version == "TLSv1.3":
+            ctx.minimum_version = ssl.TLSVersion.TLSv1_3
+    if tls_config.ciphers:
+        ctx.set_ciphers(":".join(tls_config.ciphers))
+    if tls_config.client_cert and tls_config.client_key:
+        ctx.load_cert_chain(certfile=str(tls_config.client_cert), keyfile=str(tls_config.client_key))
+    return ctx
+def _build_proxy_mounts(proxy_config: ProxyConfig) -> dict[str, httpx.AsyncHTTPTransport] | None:
+    """
+    Build httpx proxy mounts from proxy configuration.
+    Returns:
+        Dictionary of proxy mounts for httpx, or None if no proxies configured
+    """
+    transport_kwargs: dict[str, Any] = {}
+    if proxy_config.cacert:
+        # Convert Path to string for httpx
+        transport_kwargs["verify"] = str(proxy_config.cacert)
+    if proxy_config.url:
+        # Convert HttpUrl to string for httpx
+        proxy_url = str(proxy_config.url)
+        return {
+            "http://": httpx.AsyncHTTPTransport(proxy=proxy_url, **transport_kwargs),
+            "https://": httpx.AsyncHTTPTransport(proxy=proxy_url, **transport_kwargs),
+        }
+    mounts = {}
+    if proxy_config.http:
+        mounts["http://"] = httpx.AsyncHTTPTransport(proxy=str(proxy_config.http), **transport_kwargs)
+    if proxy_config.https:
+        mounts["https://"] = httpx.AsyncHTTPTransport(proxy=str(proxy_config.https), **transport_kwargs)
+    return mounts if mounts else None
+def _build_network_client_kwargs(network_config: NetworkConfig | None) -> dict[str, Any]:
+    """
+    Build httpx.AsyncClient kwargs from network configuration.
+    This function creates the appropriate kwargs to pass to httpx.AsyncClient
+    based on the provided NetworkConfig, without creating the client itself.
+    Args:
+        network_config: Network configuration including TLS, proxy, and timeout settings
+    Returns:
+        Dictionary of kwargs to pass to httpx.AsyncClient constructor
+    """
+    if network_config is None:
+        return {}
+    client_kwargs: dict[str, Any] = {}
+    if network_config.tls:
+        ssl_context = _build_ssl_context(network_config.tls)
+        client_kwargs["verify"] = ssl_context
+    if network_config.proxy:
+        mounts = _build_proxy_mounts(network_config.proxy)
+        if mounts:
+            client_kwargs["mounts"] = mounts
+    if network_config.timeout is not None:
+        if isinstance(network_config.timeout, TimeoutConfig):
+            # httpx.Timeout requires all four parameters (connect, read, write, pool)
+            # to be set explicitly, or a default timeout value
+            timeout_kwargs: dict[str, float | None] = {
+                "connect": network_config.timeout.connect,
+                "read": network_config.timeout.read,
+                "write": None,
+                "pool": None,
+            }
+            client_kwargs["timeout"] = httpx.Timeout(**timeout_kwargs)
+        else:
+            client_kwargs["timeout"] = httpx.Timeout(network_config.timeout)
+    if network_config.headers:
+        client_kwargs["headers"] = network_config.headers
+    return client_kwargs
+def _extract_client_config(existing_client: httpx.AsyncClient | DefaultAsyncHttpxClient) -> dict[str, Any]:
+    """
+    Extract configuration (auth, headers) from an existing http_client.
+    Args:
+        existing_client: Existing httpx client (may be DefaultAsyncHttpxClient)
+    Returns:
+        Dictionary with extracted auth and headers, if available
+    """
+    config: dict[str, Any] = {}
+    # Extract from DefaultAsyncHttpxClient
+    if isinstance(existing_client, DefaultAsyncHttpxClient):
+        underlying_client = existing_client._client  # type: ignore[union-attr,attr-defined]
+        if hasattr(underlying_client, "_auth"):
+            config["auth"] = underlying_client._auth  # type: ignore[attr-defined]
+        if hasattr(existing_client, "_headers"):
+            config["headers"] = existing_client._headers  # type: ignore[attr-defined]
+    else:
+        # Extract from plain httpx.AsyncClient
+        if hasattr(existing_client, "_auth"):
+            config["auth"] = existing_client._auth  # type: ignore[attr-defined]
+        if hasattr(existing_client, "_headers"):
+            config["headers"] = existing_client._headers  # type: ignore[attr-defined]
+    return config
+def _merge_network_config_into_client(
+    existing_client: httpx.AsyncClient | DefaultAsyncHttpxClient, network_config: NetworkConfig | None
+) -> httpx.AsyncClient | DefaultAsyncHttpxClient:
+    """
+    Merge network configuration into an existing http_client.
+    Extracts auth and headers from the existing client, merges with network config,
+    and creates a new client with all settings combined.
+    Args:
+        existing_client: Existing httpx client (may be DefaultAsyncHttpxClient)
+        network_config: Network configuration to apply
+    Returns:
+        New client with network config applied, or original client if merge fails
+    """
+    if network_config is None:
+        return existing_client
+    network_kwargs = _build_network_client_kwargs(network_config)
+    if not network_kwargs:
+        return existing_client
+    try:
+        # Extract existing client config (auth, headers)
+        existing_config = _extract_client_config(existing_client)
+        # Merge headers: existing headers first, then network config (network takes precedence)
+        if existing_config.get("headers") and network_kwargs.get("headers"):
+            merged_headers = dict(existing_config["headers"])
+            merged_headers.update(network_kwargs["headers"])
+            network_kwargs["headers"] = merged_headers
+        elif existing_config.get("headers"):
+            network_kwargs["headers"] = existing_config["headers"]
+        # Preserve auth from existing client
+        if existing_config.get("auth"):
+            network_kwargs["auth"] = existing_config["auth"]
+        # Create new client with merged config
+        new_client = httpx.AsyncClient(**network_kwargs)
+        # If original was DefaultAsyncHttpxClient, wrap the new client
+        if isinstance(existing_client, DefaultAsyncHttpxClient):
+            return DefaultAsyncHttpxClient(client=new_client, headers=network_kwargs.get("headers"))  # type: ignore[call-arg]
+        return new_client
+    except Exception as e:
+        logger.debug(f"Could not merge network config into existing http_client: {e}. Using original client.")
+        return existing_client
+def build_http_client(network_config: NetworkConfig | None) -> dict[str, Any]:
+    """
+    Build httpx.AsyncClient parameters from network configuration.
+    This function creates the appropriate kwargs to pass to httpx.AsyncClient
+    based on the provided NetworkConfig.
+    Args:
+        network_config: Network configuration including TLS, proxy, and timeout settings
+    Returns:
+        Dictionary of kwargs to pass to httpx.AsyncClient constructor,
+        wrapped in {"http_client": AsyncClient(...)} for use with AsyncOpenAI
+    """
+    network_kwargs = _build_network_client_kwargs(network_config)
+    if not network_kwargs:
+        return {}
+    return {"http_client": httpx.AsyncClient(**network_kwargs)}

llama_stack/providers/utils/inference/litellm_openai_mixin.py CHANGED Viewed

@@ -30,6 +30,7 @@ from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
+    validate_embeddings_input_is_text,
 )
 logger = get_logger(name=__name__, category="providers::utils")
@@ -146,6 +147,9 @@ class LiteLLMOpenAIMixin(
         self,
         params: OpenAIEmbeddingsRequestWithExtraBody,
     ) -> OpenAIEmbeddingsResponse:
+        # Validate that input contains only text, not token arrays
+        validate_embeddings_input_is_text(params)
         if not self.model_store:
             raise ValueError("Model store is not initialized")
@@ -270,6 +274,8 @@ class LiteLLMOpenAIMixin(
             top_logprobs=params.top_logprobs,
             top_p=params.top_p,
             user=params.user,
+            safety_identifier=params.safety_identifier,
+            reasoning_effort=params.reasoning_effort,
             api_key=self.get_api_key(),
             api_base=self.api_base,
             **self._litellm_extra_request_params(params),

llama_stack/providers/utils/inference/model_registry.py CHANGED Viewed

@@ -4,9 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any
+from pathlib import Path
+from typing import Any, Literal
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, Field, HttpUrl, SecretStr, field_validator, model_validator
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference import (
@@ -17,6 +18,147 @@ from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, Unsupported
 logger = get_logger(name=__name__, category="providers::utils")
+class TLSConfig(BaseModel):
+    """TLS/SSL configuration for secure connections."""
+    verify: bool | Path = Field(
+        default=True,
+        description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
+    )
+    min_version: Literal["TLSv1.2", "TLSv1.3"] | None = Field(
+        default=None,
+        description="Minimum TLS version to use. Defaults to system default if not specified.",
+    )
+    ciphers: list[str] | None = Field(
+        default=None,
+        description="List of allowed cipher suites (e.g., ['ECDHE+AESGCM', 'DHE+AESGCM']).",
+    )
+    client_cert: Path | None = Field(
+        default=None,
+        description="Path to client certificate file for mTLS authentication.",
+    )
+    client_key: Path | None = Field(
+        default=None,
+        description="Path to client private key file for mTLS authentication.",
+    )
+    @field_validator("verify", mode="before")
+    @classmethod
+    def validate_verify(cls, v: bool | str | Path) -> bool | Path:
+        if isinstance(v, bool):
+            return v
+        if isinstance(v, str):
+            cert_path = Path(v).expanduser().resolve()
+        else:
+            cert_path = v.expanduser().resolve()
+        if not cert_path.exists():
+            raise ValueError(f"TLS certificate file does not exist: {v}")
+        if not cert_path.is_file():
+            raise ValueError(f"TLS certificate path is not a file: {v}")
+        return cert_path
+    @field_validator("client_cert", "client_key", mode="before")
+    @classmethod
+    def validate_cert_paths(cls, v: str | Path | None) -> Path | None:
+        if v is None:
+            return None
+        if isinstance(v, str):
+            cert_path = Path(v).expanduser().resolve()
+        else:
+            cert_path = v.expanduser().resolve()
+        if not cert_path.exists():
+            raise ValueError(f"Certificate/key file does not exist: {v}")
+        if not cert_path.is_file():
+            raise ValueError(f"Certificate/key path is not a file: {v}")
+        return cert_path
+    @model_validator(mode="after")
+    def validate_mtls_pair(self) -> "TLSConfig":
+        if (self.client_cert is None) != (self.client_key is None):
+            raise ValueError("Both client_cert and client_key must be provided together for mTLS")
+        return self
+class ProxyConfig(BaseModel):
+    """Proxy configuration for HTTP connections."""
+    url: HttpUrl | None = Field(
+        default=None,
+        description="Single proxy URL for all connections (e.g., 'http://proxy.example.com:8080').",
+    )
+    http: HttpUrl | None = Field(
+        default=None,
+        description="Proxy URL for HTTP connections.",
+    )
+    https: HttpUrl | None = Field(
+        default=None,
+        description="Proxy URL for HTTPS connections.",
+    )
+    cacert: Path | None = Field(
+        default=None,
+        description="Path to CA certificate file for verifying the proxy's certificate. Required for proxies in interception mode.",
+    )
+    no_proxy: list[str] | None = Field(
+        default=None,
+        description="List of hosts that should bypass the proxy (e.g., ['localhost', '127.0.0.1', '.internal.corp']).",
+    )
+    @field_validator("cacert", mode="before")
+    @classmethod
+    def validate_cacert(cls, v: str | Path | None) -> Path | None:
+        if v is None:
+            return None
+        if isinstance(v, str):
+            cert_path = Path(v).expanduser().resolve()
+        else:
+            cert_path = v.expanduser().resolve()
+        if not cert_path.exists():
+            raise ValueError(f"Proxy CA certificate file does not exist: {v}")
+        if not cert_path.is_file():
+            raise ValueError(f"Proxy CA certificate path is not a file: {v}")
+        return cert_path
+    @model_validator(mode="after")
+    def validate_proxy_config(self) -> "ProxyConfig":
+        if self.url and (self.http or self.https):
+            raise ValueError("Cannot specify both 'url' and 'http'/'https' proxy settings")
+        return self
+class TimeoutConfig(BaseModel):
+    """Timeout configuration for HTTP connections."""
+    connect: float | None = Field(
+        default=None,
+        description="Connection timeout in seconds.",
+    )
+    read: float | None = Field(
+        default=None,
+        description="Read timeout in seconds.",
+    )
+class NetworkConfig(BaseModel):
+    """Network configuration for remote provider connections."""
+    tls: TLSConfig | None = Field(
+        default=None,
+        description="TLS/SSL configuration for secure connections.",
+    )
+    proxy: ProxyConfig | None = Field(
+        default=None,
+        description="Proxy configuration for HTTP connections.",
+    )
+    timeout: float | TimeoutConfig | None = Field(
+        default=None,
+        description="Timeout configuration. Can be a float (for both connect and read) or a TimeoutConfig object with separate connect and read timeouts.",
+    )
+    headers: dict[str, str] | None = Field(
+        default=None,
+        description="Additional HTTP headers to include in all requests.",
+    )
 class RemoteInferenceProviderConfig(BaseModel):
     allowed_models: list[str] | None = Field(
         default=None,
@@ -31,6 +173,10 @@ class RemoteInferenceProviderConfig(BaseModel):
         description="Authentication credential for the provider",
         alias="api_key",
     )
+    network: NetworkConfig | None = Field(
+        default=None,
+        description="Network configuration including TLS, proxy, and timeout settings.",
+    )
 # TODO: this class is more confusing than useful right now. We need to make it

llama_stack/providers/utils/inference/openai_compat.py CHANGED Viewed

@@ -7,144 +7,14 @@ from typing import (
     Any,
 )
-from openai.types.chat import (
-    ChatCompletionMessageToolCall,
-)
 from pydantic import BaseModel
 from llama_stack.log import get_logger
-from llama_stack.models.llama.datatypes import (
-    BuiltinTool,
-    StopReason,
-    ToolCall,
-    ToolDefinition,
-)
+from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition
 logger = get_logger(name=__name__, category="providers::utils")
-class OpenAICompatCompletionChoiceDelta(BaseModel):
-    content: str
-class OpenAICompatLogprobs(BaseModel):
-    text_offset: list[int] | None = None
-    token_logprobs: list[float] | None = None
-    tokens: list[str] | None = None
-    top_logprobs: list[dict[str, float]] | None = None
-class OpenAICompatCompletionChoice(BaseModel):
-    finish_reason: str | None = None
-    text: str | None = None
-    delta: OpenAICompatCompletionChoiceDelta | None = None
-    logprobs: OpenAICompatLogprobs | None = None
-class OpenAICompatCompletionResponse(BaseModel):
-    choices: list[OpenAICompatCompletionChoice]
-def text_from_choice(choice) -> str:
-    if hasattr(choice, "delta") and choice.delta:
-        return choice.delta.content  # type: ignore[no-any-return]  # external OpenAI types lack precise annotations
-    if hasattr(choice, "message"):
-        return choice.message.content  # type: ignore[no-any-return]  # external OpenAI types lack precise annotations
-    return choice.text  # type: ignore[no-any-return]  # external OpenAI types lack precise annotations
-def get_stop_reason(finish_reason: str) -> StopReason:
-    if finish_reason in ["stop", "eos"]:
-        return StopReason.end_of_turn
-    elif finish_reason == "eom":
-        return StopReason.end_of_message
-    elif finish_reason == "length":
-        return StopReason.out_of_tokens
-    return StopReason.out_of_tokens
-class UnparseableToolCall(BaseModel):
-    """
-    A ToolCall with arguments that are not valid JSON.
-    Mirrors the ToolCall schema, but with arguments as a string.
-    """
-    call_id: str = ""
-    tool_name: str = ""
-    arguments: str = ""
-def convert_tool_call(
-    tool_call: ChatCompletionMessageToolCall,
-) -> ToolCall | UnparseableToolCall:
-    """
-    Convert a ChatCompletionMessageToolCall tool call to either a
-    ToolCall or UnparseableToolCall. Returns an UnparseableToolCall
-    if the tool call is not valid ToolCall.
-    """
-    try:
-        valid_tool_call = ToolCall(
-            call_id=tool_call.id,
-            tool_name=tool_call.function.name,
-            arguments=tool_call.function.arguments,
-        )
-    except Exception:
-        return UnparseableToolCall(
-            call_id=tool_call.id or "",
-            tool_name=tool_call.function.name or "",
-            arguments=tool_call.function.arguments or "",
-        )
-    return valid_tool_call
-PYTHON_TYPE_TO_LITELLM_TYPE = {
-    "int": "integer",
-    "float": "number",
-    "bool": "boolean",
-    "str": "string",
-}
-def to_openai_param_type(param_type: str) -> dict:
-    """
-    Convert Python type hints to OpenAI parameter type format.
-    Examples:
-        'str' -> {'type': 'string'}
-        'int' -> {'type': 'integer'}
-        'list[str]' -> {'type': 'array', 'items': {'type': 'string'}}
-        'list[int]' -> {'type': 'array', 'items': {'type': 'integer'}}
-    """
-    # Handle basic types first
-    basic_types = {
-        "str": "string",
-        "int": "integer",
-        "float": "number",
-        "bool": "boolean",
-    }
-    if param_type in basic_types:
-        return {"type": basic_types[param_type]}
-    # Handle list/array types
-    if param_type.startswith("list[") and param_type.endswith("]"):
-        inner_type = param_type[5:-1]
-        if inner_type in basic_types:
-            return {
-                "type": "array",
-                "items": {"type": basic_types.get(inner_type, inner_type)},
-            }
-    return {"type": param_type}
 def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
     """
     Convert a ToolDefinition to an OpenAI API-compatible dictionary.
@@ -210,33 +80,6 @@ async def prepare_openai_completion_params(**params):
     return completion_params
-def prepare_openai_embeddings_params(
-    model: str,
-    input: str | list[str],
-    encoding_format: str | None = "float",
-    dimensions: int | None = None,
-    user: str | None = None,
-):
-    if model is None:
-        raise ValueError("Model must be provided for embeddings")
-    input_list = [input] if isinstance(input, str) else input
-    params: dict[str, Any] = {
-        "model": model,
-        "input": input_list,
-    }
-    if encoding_format is not None:
-        params["encoding_format"] = encoding_format
-    if dimensions is not None:
-        params["dimensions"] = dimensions
-    if user is not None:
-        params["user"] = user
-    return params
 def get_stream_options_for_telemetry(
     stream_options: dict[str, Any] | None,
     is_streaming: bool,

llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl

llama-stack 0.4.3py3-none-any.whl → 0.5.0py3-none-any.whl