PyPI - llama-stack - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl - Mend

llama-stack 0.4.3py3-none-any.whl → 0.5.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (307) hide show

llama_stack/cli/stack/_list_deps.py +11 -7
llama_stack/cli/stack/run.py +3 -25
llama_stack/core/access_control/datatypes.py +78 -0
llama_stack/core/configure.py +2 -2
{llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
llama_stack/core/connectors/connectors.py +162 -0
llama_stack/core/conversations/conversations.py +61 -58
llama_stack/core/datatypes.py +54 -8
llama_stack/core/library_client.py +60 -13
llama_stack/core/prompts/prompts.py +43 -42
llama_stack/core/routers/datasets.py +20 -17
llama_stack/core/routers/eval_scoring.py +143 -53
llama_stack/core/routers/inference.py +20 -9
llama_stack/core/routers/safety.py +30 -42
llama_stack/core/routers/vector_io.py +15 -7
llama_stack/core/routing_tables/models.py +42 -3
llama_stack/core/routing_tables/scoring_functions.py +19 -19
llama_stack/core/routing_tables/shields.py +20 -17
llama_stack/core/routing_tables/vector_stores.py +8 -5
llama_stack/core/server/auth.py +192 -17
llama_stack/core/server/fastapi_router_registry.py +40 -5
llama_stack/core/server/server.py +24 -5
llama_stack/core/stack.py +54 -10
llama_stack/core/storage/datatypes.py +9 -0
llama_stack/core/store/registry.py +1 -1
llama_stack/core/utils/exec.py +2 -2
llama_stack/core/utils/type_inspection.py +16 -2
llama_stack/distributions/dell/config.yaml +4 -1
llama_stack/distributions/dell/doc_template.md +209 -0
llama_stack/distributions/dell/run-with-safety.yaml +4 -1
llama_stack/distributions/nvidia/config.yaml +4 -1
llama_stack/distributions/nvidia/doc_template.md +170 -0
llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
llama_stack/distributions/oci/config.yaml +4 -1
llama_stack/distributions/oci/doc_template.md +140 -0
llama_stack/distributions/open-benchmark/config.yaml +9 -1
llama_stack/distributions/postgres-demo/config.yaml +1 -1
llama_stack/distributions/starter/build.yaml +62 -0
llama_stack/distributions/starter/config.yaml +22 -3
llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/starter/starter.py +13 -1
llama_stack/distributions/starter-gpu/build.yaml +62 -0
llama_stack/distributions/starter-gpu/config.yaml +22 -3
llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/template.py +10 -2
llama_stack/distributions/watsonx/config.yaml +4 -1
llama_stack/log.py +1 -0
llama_stack/models/llama/resources/dog.jpg +0 -0
llama_stack/models/llama/resources/pasta.jpeg +0 -0
llama_stack/models/llama/resources/small_dog.jpg +0 -0
llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +183 -60
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
llama_stack/providers/inline/batches/reference/batches.py +2 -1
llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
llama_stack/providers/registry/agents.py +1 -0
llama_stack/providers/registry/inference.py +1 -9
llama_stack/providers/registry/vector_io.py +136 -16
llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
llama_stack/providers/remote/eval/nvidia/README.md +134 -0
llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
llama_stack/providers/remote/files/s3/README.md +266 -0
llama_stack/providers/remote/files/s3/config.py +5 -3
llama_stack/providers/remote/files/s3/files.py +2 -2
llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
llama_stack/providers/remote/inference/openai/openai.py +2 -0
llama_stack/providers/remote/inference/together/together.py +4 -0
llama_stack/providers/remote/inference/vertexai/config.py +3 -3
llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
llama_stack/providers/remote/inference/vllm/config.py +37 -18
llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
llama_stack/providers/remote/safety/nvidia/README.md +78 -0
llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
llama_stack/providers/remote/vector_io/oci/config.py +41 -0
llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
llama_stack/providers/utils/bedrock/client.py +3 -3
llama_stack/providers/utils/bedrock/config.py +7 -7
llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
llama_stack/providers/utils/inference/http_client.py +239 -0
llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
llama_stack/providers/utils/inference/model_registry.py +148 -2
llama_stack/providers/utils/inference/openai_compat.py +2 -1
llama_stack/providers/utils/inference/openai_mixin.py +41 -2
llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
llama_stack/providers/utils/memory/vector_store.py +46 -19
llama_stack/providers/utils/responses/responses_store.py +40 -6
llama_stack/providers/utils/safety.py +114 -0
llama_stack/providers/utils/tools/mcp.py +44 -3
llama_stack/testing/api_recorder.py +9 -3
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +131 -275
llama_stack-0.5.0rc1.dist-info/top_level.txt +1 -0
llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
llama_stack/models/llama/hadamard_utils.py +0 -88
llama_stack/models/llama/llama3/args.py +0 -74
llama_stack/models/llama/llama3/generation.py +0 -378
llama_stack/models/llama/llama3/model.py +0 -304
llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
llama_stack/models/llama/llama3/quantization/loader.py +0 -316
llama_stack/models/llama/llama3_1/__init__.py +0 -12
llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
llama_stack/models/llama/llama3_1/prompts.py +0 -258
llama_stack/models/llama/llama3_2/__init__.py +0 -5
llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
llama_stack/models/llama/llama3_3/__init__.py +0 -5
llama_stack/models/llama/llama3_3/prompts.py +0 -259
llama_stack/models/llama/llama4/args.py +0 -107
llama_stack/models/llama/llama4/ffn.py +0 -58
llama_stack/models/llama/llama4/moe.py +0 -214
llama_stack/models/llama/llama4/preprocess.py +0 -435
llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
llama_stack/models/llama/llama4/quantization/loader.py +0 -226
llama_stack/models/llama/llama4/vision/__init__.py +0 -5
llama_stack/models/llama/llama4/vision/embedding.py +0 -210
llama_stack/models/llama/llama4/vision/encoder.py +0 -412
llama_stack/models/llama/quantize_impls.py +0 -316
llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
llama_stack-0.4.3.dist-info/top_level.txt +0 -2
llama_stack_api/__init__.py +0 -945
llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/admin/api.py +0 -72
llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/admin/models.py +0 -113
llama_stack_api/agents.py +0 -173
llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/batches/api.py +0 -53
llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/batches/models.py +0 -78
llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/common/__init__.py +0 -5
llama_stack_api/common/content_types.py +0 -101
llama_stack_api/common/errors.py +0 -95
llama_stack_api/common/job_types.py +0 -38
llama_stack_api/common/responses.py +0 -77
llama_stack_api/common/training_types.py +0 -47
llama_stack_api/common/type_system.py +0 -146
llama_stack_api/connectors.py +0 -146
llama_stack_api/conversations.py +0 -270
llama_stack_api/datasetio.py +0 -55
llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/datasets/api.py +0 -35
llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/datasets/models.py +0 -152
llama_stack_api/datatypes.py +0 -373
llama_stack_api/eval.py +0 -137
llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/files/__init__.py +0 -35
llama_stack_api/files/api.py +0 -51
llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/files/models.py +0 -107
llama_stack_api/inference.py +0 -1169
llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/__init__.py +0 -945
llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/llama_stack_api/admin/api.py +0 -72
llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/llama_stack_api/admin/models.py +0 -113
llama_stack_api/llama_stack_api/agents.py +0 -173
llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/llama_stack_api/batches/api.py +0 -53
llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/llama_stack_api/batches/models.py +0 -78
llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/llama_stack_api/common/__init__.py +0 -5
llama_stack_api/llama_stack_api/common/content_types.py +0 -101
llama_stack_api/llama_stack_api/common/errors.py +0 -95
llama_stack_api/llama_stack_api/common/job_types.py +0 -38
llama_stack_api/llama_stack_api/common/responses.py +0 -77
llama_stack_api/llama_stack_api/common/training_types.py +0 -47
llama_stack_api/llama_stack_api/common/type_system.py +0 -146
llama_stack_api/llama_stack_api/connectors.py +0 -146
llama_stack_api/llama_stack_api/conversations.py +0 -270
llama_stack_api/llama_stack_api/datasetio.py +0 -55
llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/llama_stack_api/datasets/api.py +0 -35
llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/llama_stack_api/datasets/models.py +0 -152
llama_stack_api/llama_stack_api/datatypes.py +0 -373
llama_stack_api/llama_stack_api/eval.py +0 -137
llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/llama_stack_api/files/__init__.py +0 -35
llama_stack_api/llama_stack_api/files/api.py +0 -51
llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/llama_stack_api/files/models.py +0 -107
llama_stack_api/llama_stack_api/inference.py +0 -1169
llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/models.py +0 -171
llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/llama_stack_api/post_training.py +0 -370
llama_stack_api/llama_stack_api/prompts.py +0 -203
llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/llama_stack_api/providers/api.py +0 -16
llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/llama_stack_api/providers/models.py +0 -24
llama_stack_api/llama_stack_api/py.typed +0 -0
llama_stack_api/llama_stack_api/rag_tool.py +0 -168
llama_stack_api/llama_stack_api/resource.py +0 -37
llama_stack_api/llama_stack_api/router_utils.py +0 -160
llama_stack_api/llama_stack_api/safety.py +0 -132
llama_stack_api/llama_stack_api/schema_utils.py +0 -208
llama_stack_api/llama_stack_api/scoring.py +0 -93
llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/llama_stack_api/shields.py +0 -93
llama_stack_api/llama_stack_api/tools.py +0 -226
llama_stack_api/llama_stack_api/vector_io.py +0 -941
llama_stack_api/llama_stack_api/vector_stores.py +0 -53
llama_stack_api/llama_stack_api/version.py +0 -9
llama_stack_api/models.py +0 -171
llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/post_training.py +0 -370
llama_stack_api/prompts.py +0 -203
llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/providers/api.py +0 -16
llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/providers/models.py +0 -24
llama_stack_api/py.typed +0 -0
llama_stack_api/rag_tool.py +0 -168
llama_stack_api/resource.py +0 -37
llama_stack_api/router_utils.py +0 -160
llama_stack_api/safety.py +0 -132
llama_stack_api/schema_utils.py +0 -208
llama_stack_api/scoring.py +0 -93
llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/shields.py +0 -93
llama_stack_api/tools.py +0 -226
llama_stack_api/vector_io.py +0 -941
llama_stack_api/vector_stores.py +0 -53
llama_stack_api/version.py +0 -9
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0

llama_stack/providers/utils/inference/http_client.py ADDED Viewed

@@ -0,0 +1,239 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+import ssl
+from pathlib import Path
+from typing import Any
+import httpx
+from openai._base_client import DefaultAsyncHttpxClient
+from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.model_registry import (
+    NetworkConfig,
+    ProxyConfig,
+    TimeoutConfig,
+    TLSConfig,
+)
+logger = get_logger(name=__name__, category="providers::utils")
+def _build_ssl_context(tls_config: TLSConfig) -> ssl.SSLContext | bool | Path:
+    """
+    Build an SSL context from TLS configuration.
+    Returns:
+        - ssl.SSLContext if advanced options (min_version, ciphers, or mTLS) are configured
+        - Path if only a CA bundle path is specified
+        - bool if only verify is specified as boolean
+    """
+    has_advanced_options = (
+        tls_config.min_version is not None or tls_config.ciphers is not None or tls_config.client_cert is not None
+    )
+    if not has_advanced_options:
+        return tls_config.verify
+    ctx = ssl.create_default_context()
+    if isinstance(tls_config.verify, Path):
+        ctx.load_verify_locations(str(tls_config.verify))
+    elif not tls_config.verify:
+        ctx.check_hostname = False
+        ctx.verify_mode = ssl.CERT_NONE
+    if tls_config.min_version:
+        if tls_config.min_version == "TLSv1.2":
+            ctx.minimum_version = ssl.TLSVersion.TLSv1_2
+        elif tls_config.min_version == "TLSv1.3":
+            ctx.minimum_version = ssl.TLSVersion.TLSv1_3
+    if tls_config.ciphers:
+        ctx.set_ciphers(":".join(tls_config.ciphers))
+    if tls_config.client_cert and tls_config.client_key:
+        ctx.load_cert_chain(certfile=str(tls_config.client_cert), keyfile=str(tls_config.client_key))
+    return ctx
+def _build_proxy_mounts(proxy_config: ProxyConfig) -> dict[str, httpx.AsyncHTTPTransport] | None:
+    """
+    Build httpx proxy mounts from proxy configuration.
+    Returns:
+        Dictionary of proxy mounts for httpx, or None if no proxies configured
+    """
+    transport_kwargs: dict[str, Any] = {}
+    if proxy_config.cacert:
+        # Convert Path to string for httpx
+        transport_kwargs["verify"] = str(proxy_config.cacert)
+    if proxy_config.url:
+        # Convert HttpUrl to string for httpx
+        proxy_url = str(proxy_config.url)
+        return {
+            "http://": httpx.AsyncHTTPTransport(proxy=proxy_url, **transport_kwargs),
+            "https://": httpx.AsyncHTTPTransport(proxy=proxy_url, **transport_kwargs),
+        }
+    mounts = {}
+    if proxy_config.http:
+        mounts["http://"] = httpx.AsyncHTTPTransport(proxy=str(proxy_config.http), **transport_kwargs)
+    if proxy_config.https:
+        mounts["https://"] = httpx.AsyncHTTPTransport(proxy=str(proxy_config.https), **transport_kwargs)
+    return mounts if mounts else None
+def _build_network_client_kwargs(network_config: NetworkConfig | None) -> dict[str, Any]:
+    """
+    Build httpx.AsyncClient kwargs from network configuration.
+    This function creates the appropriate kwargs to pass to httpx.AsyncClient
+    based on the provided NetworkConfig, without creating the client itself.
+    Args:
+        network_config: Network configuration including TLS, proxy, and timeout settings
+    Returns:
+        Dictionary of kwargs to pass to httpx.AsyncClient constructor
+    """
+    if network_config is None:
+        return {}
+    client_kwargs: dict[str, Any] = {}
+    if network_config.tls:
+        ssl_context = _build_ssl_context(network_config.tls)
+        client_kwargs["verify"] = ssl_context
+    if network_config.proxy:
+        mounts = _build_proxy_mounts(network_config.proxy)
+        if mounts:
+            client_kwargs["mounts"] = mounts
+    if network_config.timeout is not None:
+        if isinstance(network_config.timeout, TimeoutConfig):
+            # httpx.Timeout requires all four parameters (connect, read, write, pool)
+            # to be set explicitly, or a default timeout value
+            timeout_kwargs: dict[str, float | None] = {
+                "connect": network_config.timeout.connect,
+                "read": network_config.timeout.read,
+                "write": None,
+                "pool": None,
+            }
+            client_kwargs["timeout"] = httpx.Timeout(**timeout_kwargs)
+        else:
+            client_kwargs["timeout"] = httpx.Timeout(network_config.timeout)
+    if network_config.headers:
+        client_kwargs["headers"] = network_config.headers
+    return client_kwargs
+def _extract_client_config(existing_client: httpx.AsyncClient | DefaultAsyncHttpxClient) -> dict[str, Any]:
+    """
+    Extract configuration (auth, headers) from an existing http_client.
+    Args:
+        existing_client: Existing httpx client (may be DefaultAsyncHttpxClient)
+    Returns:
+        Dictionary with extracted auth and headers, if available
+    """
+    config: dict[str, Any] = {}
+    # Extract from DefaultAsyncHttpxClient
+    if isinstance(existing_client, DefaultAsyncHttpxClient):
+        underlying_client = existing_client._client  # type: ignore[union-attr,attr-defined]
+        if hasattr(underlying_client, "_auth"):
+            config["auth"] = underlying_client._auth  # type: ignore[attr-defined]
+        if hasattr(existing_client, "_headers"):
+            config["headers"] = existing_client._headers  # type: ignore[attr-defined]
+    else:
+        # Extract from plain httpx.AsyncClient
+        if hasattr(existing_client, "_auth"):
+            config["auth"] = existing_client._auth  # type: ignore[attr-defined]
+        if hasattr(existing_client, "_headers"):
+            config["headers"] = existing_client._headers  # type: ignore[attr-defined]
+    return config
+def _merge_network_config_into_client(
+    existing_client: httpx.AsyncClient | DefaultAsyncHttpxClient, network_config: NetworkConfig | None
+) -> httpx.AsyncClient | DefaultAsyncHttpxClient:
+    """
+    Merge network configuration into an existing http_client.
+    Extracts auth and headers from the existing client, merges with network config,
+    and creates a new client with all settings combined.
+    Args:
+        existing_client: Existing httpx client (may be DefaultAsyncHttpxClient)
+        network_config: Network configuration to apply
+    Returns:
+        New client with network config applied, or original client if merge fails
+    """
+    if network_config is None:
+        return existing_client
+    network_kwargs = _build_network_client_kwargs(network_config)
+    if not network_kwargs:
+        return existing_client
+    try:
+        # Extract existing client config (auth, headers)
+        existing_config = _extract_client_config(existing_client)
+        # Merge headers: existing headers first, then network config (network takes precedence)
+        if existing_config.get("headers") and network_kwargs.get("headers"):
+            merged_headers = dict(existing_config["headers"])
+            merged_headers.update(network_kwargs["headers"])
+            network_kwargs["headers"] = merged_headers
+        elif existing_config.get("headers"):
+            network_kwargs["headers"] = existing_config["headers"]
+        # Preserve auth from existing client
+        if existing_config.get("auth"):
+            network_kwargs["auth"] = existing_config["auth"]
+        # Create new client with merged config
+        new_client = httpx.AsyncClient(**network_kwargs)
+        # If original was DefaultAsyncHttpxClient, wrap the new client
+        if isinstance(existing_client, DefaultAsyncHttpxClient):
+            return DefaultAsyncHttpxClient(client=new_client, headers=network_kwargs.get("headers"))  # type: ignore[call-arg]
+        return new_client
+    except Exception as e:
+        logger.debug(f"Could not merge network config into existing http_client: {e}. Using original client.")
+        return existing_client
+def build_http_client(network_config: NetworkConfig | None) -> dict[str, Any]:
+    """
+    Build httpx.AsyncClient parameters from network configuration.
+    This function creates the appropriate kwargs to pass to httpx.AsyncClient
+    based on the provided NetworkConfig.
+    Args:
+        network_config: Network configuration including TLS, proxy, and timeout settings
+    Returns:
+        Dictionary of kwargs to pass to httpx.AsyncClient constructor,
+        wrapped in {"http_client": AsyncClient(...)} for use with AsyncOpenAI
+    """
+    network_kwargs = _build_network_client_kwargs(network_config)
+    if not network_kwargs:
+        return {}
+    return {"http_client": httpx.AsyncClient(**network_kwargs)}

llama_stack/providers/utils/inference/litellm_openai_mixin.py CHANGED Viewed

@@ -30,6 +30,7 @@ from llama_stack_api import (
     OpenAIEmbeddingsRequestWithExtraBody,
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
+    validate_embeddings_input_is_text,
 )
 logger = get_logger(name=__name__, category="providers::utils")
@@ -146,6 +147,9 @@ class LiteLLMOpenAIMixin(
         self,
         params: OpenAIEmbeddingsRequestWithExtraBody,
     ) -> OpenAIEmbeddingsResponse:
+        # Validate that input contains only text, not token arrays
+        validate_embeddings_input_is_text(params)
         if not self.model_store:
             raise ValueError("Model store is not initialized")
@@ -270,6 +274,7 @@ class LiteLLMOpenAIMixin(
             top_logprobs=params.top_logprobs,
             top_p=params.top_p,
             user=params.user,
+            reasoning_effort=params.reasoning_effort,
             api_key=self.get_api_key(),
             api_base=self.api_base,
             **self._litellm_extra_request_params(params),

llama_stack/providers/utils/inference/model_registry.py CHANGED Viewed

@@ -4,9 +4,10 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any
+from pathlib import Path
+from typing import Any, Literal
-from pydantic import BaseModel, Field, SecretStr
+from pydantic import BaseModel, Field, HttpUrl, SecretStr, field_validator, model_validator
 from llama_stack.log import get_logger
 from llama_stack.providers.utils.inference import (
@@ -17,6 +18,147 @@ from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, Unsupported
 logger = get_logger(name=__name__, category="providers::utils")
+class TLSConfig(BaseModel):
+    """TLS/SSL configuration for secure connections."""
+    verify: bool | Path = Field(
+        default=True,
+        description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
+    )
+    min_version: Literal["TLSv1.2", "TLSv1.3"] | None = Field(
+        default=None,
+        description="Minimum TLS version to use. Defaults to system default if not specified.",
+    )
+    ciphers: list[str] | None = Field(
+        default=None,
+        description="List of allowed cipher suites (e.g., ['ECDHE+AESGCM', 'DHE+AESGCM']).",
+    )
+    client_cert: Path | None = Field(
+        default=None,
+        description="Path to client certificate file for mTLS authentication.",
+    )
+    client_key: Path | None = Field(
+        default=None,
+        description="Path to client private key file for mTLS authentication.",
+    )
+    @field_validator("verify", mode="before")
+    @classmethod
+    def validate_verify(cls, v: bool | str | Path) -> bool | Path:
+        if isinstance(v, bool):
+            return v
+        if isinstance(v, str):
+            cert_path = Path(v).expanduser().resolve()
+        else:
+            cert_path = v.expanduser().resolve()
+        if not cert_path.exists():
+            raise ValueError(f"TLS certificate file does not exist: {v}")
+        if not cert_path.is_file():
+            raise ValueError(f"TLS certificate path is not a file: {v}")
+        return cert_path
+    @field_validator("client_cert", "client_key", mode="before")
+    @classmethod
+    def validate_cert_paths(cls, v: str | Path | None) -> Path | None:
+        if v is None:
+            return None
+        if isinstance(v, str):
+            cert_path = Path(v).expanduser().resolve()
+        else:
+            cert_path = v.expanduser().resolve()
+        if not cert_path.exists():
+            raise ValueError(f"Certificate/key file does not exist: {v}")
+        if not cert_path.is_file():
+            raise ValueError(f"Certificate/key path is not a file: {v}")
+        return cert_path
+    @model_validator(mode="after")
+    def validate_mtls_pair(self) -> "TLSConfig":
+        if (self.client_cert is None) != (self.client_key is None):
+            raise ValueError("Both client_cert and client_key must be provided together for mTLS")
+        return self
+class ProxyConfig(BaseModel):
+    """Proxy configuration for HTTP connections."""
+    url: HttpUrl | None = Field(
+        default=None,
+        description="Single proxy URL for all connections (e.g., 'http://proxy.example.com:8080').",
+    )
+    http: HttpUrl | None = Field(
+        default=None,
+        description="Proxy URL for HTTP connections.",
+    )
+    https: HttpUrl | None = Field(
+        default=None,
+        description="Proxy URL for HTTPS connections.",
+    )
+    cacert: Path | None = Field(
+        default=None,
+        description="Path to CA certificate file for verifying the proxy's certificate. Required for proxies in interception mode.",
+    )
+    no_proxy: list[str] | None = Field(
+        default=None,
+        description="List of hosts that should bypass the proxy (e.g., ['localhost', '127.0.0.1', '.internal.corp']).",
+    )
+    @field_validator("cacert", mode="before")
+    @classmethod
+    def validate_cacert(cls, v: str | Path | None) -> Path | None:
+        if v is None:
+            return None
+        if isinstance(v, str):
+            cert_path = Path(v).expanduser().resolve()
+        else:
+            cert_path = v.expanduser().resolve()
+        if not cert_path.exists():
+            raise ValueError(f"Proxy CA certificate file does not exist: {v}")
+        if not cert_path.is_file():
+            raise ValueError(f"Proxy CA certificate path is not a file: {v}")
+        return cert_path
+    @model_validator(mode="after")
+    def validate_proxy_config(self) -> "ProxyConfig":
+        if self.url and (self.http or self.https):
+            raise ValueError("Cannot specify both 'url' and 'http'/'https' proxy settings")
+        return self
+class TimeoutConfig(BaseModel):
+    """Timeout configuration for HTTP connections."""
+    connect: float | None = Field(
+        default=None,
+        description="Connection timeout in seconds.",
+    )
+    read: float | None = Field(
+        default=None,
+        description="Read timeout in seconds.",
+    )
+class NetworkConfig(BaseModel):
+    """Network configuration for remote provider connections."""
+    tls: TLSConfig | None = Field(
+        default=None,
+        description="TLS/SSL configuration for secure connections.",
+    )
+    proxy: ProxyConfig | None = Field(
+        default=None,
+        description="Proxy configuration for HTTP connections.",
+    )
+    timeout: float | TimeoutConfig | None = Field(
+        default=None,
+        description="Timeout configuration. Can be a float (for both connect and read) or a TimeoutConfig object with separate connect and read timeouts.",
+    )
+    headers: dict[str, str] | None = Field(
+        default=None,
+        description="Additional HTTP headers to include in all requests.",
+    )
 class RemoteInferenceProviderConfig(BaseModel):
     allowed_models: list[str] | None = Field(
         default=None,
@@ -31,6 +173,10 @@ class RemoteInferenceProviderConfig(BaseModel):
         description="Authentication credential for the provider",
         alias="api_key",
     )
+    network: NetworkConfig | None = Field(
+        default=None,
+        description="Network configuration including TLS, proxy, and timeout settings.",
+    )
 # TODO: this class is more confusing than useful right now. We need to make it

llama_stack/providers/utils/inference/openai_compat.py CHANGED Viewed

@@ -19,6 +19,7 @@ from llama_stack.models.llama.datatypes import (
     ToolCall,
     ToolDefinition,
 )
+from llama_stack_api import OpenAIFinishReason
 logger = get_logger(name=__name__, category="providers::utils")
@@ -38,7 +39,7 @@ class OpenAICompatLogprobs(BaseModel):
 class OpenAICompatCompletionChoice(BaseModel):
-    finish_reason: str | None = None
+    finish_reason: OpenAIFinishReason | None = None
     text: str | None = None
     delta: OpenAICompatCompletionChoiceDelta | None = None
     logprobs: OpenAICompatLogprobs | None = None

llama_stack/providers/utils/inference/openai_mixin.py CHANGED Viewed

@@ -10,11 +10,16 @@ from abc import ABC, abstractmethod
 from collections.abc import AsyncIterator, Iterable
 from typing import Any
+import httpx
 from openai import AsyncOpenAI
 from pydantic import BaseModel, ConfigDict
 from llama_stack.core.request_headers import NeedsRequestProviderData
 from llama_stack.log import get_logger
+from llama_stack.providers.utils.inference.http_client import (
+    _build_network_client_kwargs,
+    _merge_network_config_into_client,
+)
 from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
 from llama_stack.providers.utils.inference.openai_compat import (
     get_stream_options_for_telemetry,
@@ -34,6 +39,7 @@ from llama_stack_api import (
     OpenAIEmbeddingsResponse,
     OpenAIEmbeddingUsage,
     OpenAIMessageParam,
+    validate_embeddings_input_is_text,
 )
 logger = get_logger(name=__name__, category="providers::utils")
@@ -82,6 +88,10 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
     # Set to False for providers that don't support stream_options (e.g., Ollama, vLLM)
     supports_stream_options: bool = True
+    # Allow subclasses to control whether the provider supports tokenized embeddings input
+    # Set to True for providers that support pre-tokenized input (list[int] and list[list[int]])
+    supports_tokenized_embeddings_input: bool = False
     # Embedding model metadata for this provider
     # Can be set by subclasses or instances to provide embedding models
     # Format: {"model_id": {"embedding_dimension": 1536, "context_length": 8192}}
@@ -121,7 +131,10 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
         Get any extra parameters to pass to the AsyncOpenAI client.
         Child classes can override this method to provide additional parameters
-        such as timeout settings, proxies, etc.
+        such as custom http_client, timeout settings, proxies, etc.
+        Note: Network configuration from config.network is automatically applied
+        in the client property. This method is for provider-specific customizations.
         :return: A dictionary of extra parameters
         """
@@ -194,6 +207,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
         Uses the abstract methods get_api_key() and get_base_url() which must be
         implemented by child classes.
+        Network configuration from config.network is automatically applied.
         Users can also provide the API key via the provider data header, which
         is used instead of any config API key.
         """
@@ -205,10 +219,30 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
                 message += f' Please provide a valid API key in the provider data header, e.g. x-llamastack-provider-data: {{"{self.provider_data_api_key_field}": "<API_KEY>"}}.'
             raise ValueError(message)
+        extra_params = self.get_extra_client_params()
+        network_kwargs = _build_network_client_kwargs(self.config.network)
+        # Handle http_client creation/merging:
+        # - If get_extra_client_params() provides an http_client (e.g., OCI with custom auth),
+        #   merge network config into it. The merge behavior:
+        #   * Preserves auth from get_extra_client_params() (provider-specific auth like OCI signer)
+        #   * Preserves headers from get_extra_client_params() as base
+        #   * Applies network config (TLS, proxy, timeout, headers) on top
+        #   * Network config headers take precedence over provider headers (allows override)
+        # - Otherwise, if network config exists, create http_client from it
+        # This allows providers with custom auth to still use standard network settings
+        if "http_client" in extra_params:
+            if network_kwargs:
+                extra_params["http_client"] = _merge_network_config_into_client(
+                    extra_params["http_client"], self.config.network
+                )
+        elif network_kwargs:
+            extra_params["http_client"] = httpx.AsyncClient(**network_kwargs)
         return AsyncOpenAI(
             api_key=api_key,
             base_url=self.get_base_url(),
-            **self.get_extra_client_params(),
+            **extra_params,
         )
     def _get_api_key_from_config_or_provider_data(self) -> str | None:
@@ -371,6 +405,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
             top_logprobs=params.top_logprobs,
             top_p=params.top_p,
             user=params.user,
+            reasoning_effort=params.reasoning_effort,
         )
         if extra_body := params.model_extra:
@@ -386,6 +421,10 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
         """
         Direct OpenAI embeddings API call.
         """
+        # Validate token array support if provider doesn't support it
+        if not self.supports_tokenized_embeddings_input:
+            validate_embeddings_input_is_text(params)
         provider_model_id = await self._get_provider_model_id(params.model)
         self._validate_model_allowed(provider_model_id)

llama-stack 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

llama-stack 0.4.3py3-none-any.whl → 0.5.0rc1py3-none-any.whl