llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/_list_deps.py +11 -7
- llama_stack/cli/stack/run.py +3 -25
- llama_stack/core/access_control/datatypes.py +78 -0
- llama_stack/core/configure.py +2 -2
- {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
- llama_stack/core/connectors/connectors.py +162 -0
- llama_stack/core/conversations/conversations.py +61 -58
- llama_stack/core/datatypes.py +54 -8
- llama_stack/core/library_client.py +60 -13
- llama_stack/core/prompts/prompts.py +43 -42
- llama_stack/core/routers/datasets.py +20 -17
- llama_stack/core/routers/eval_scoring.py +143 -53
- llama_stack/core/routers/inference.py +20 -9
- llama_stack/core/routers/safety.py +30 -42
- llama_stack/core/routers/vector_io.py +15 -7
- llama_stack/core/routing_tables/models.py +42 -3
- llama_stack/core/routing_tables/scoring_functions.py +19 -19
- llama_stack/core/routing_tables/shields.py +20 -17
- llama_stack/core/routing_tables/vector_stores.py +8 -5
- llama_stack/core/server/auth.py +192 -17
- llama_stack/core/server/fastapi_router_registry.py +40 -5
- llama_stack/core/server/server.py +24 -5
- llama_stack/core/stack.py +54 -10
- llama_stack/core/storage/datatypes.py +9 -0
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/exec.py +2 -2
- llama_stack/core/utils/type_inspection.py +16 -2
- llama_stack/distributions/dell/config.yaml +4 -1
- llama_stack/distributions/dell/doc_template.md +209 -0
- llama_stack/distributions/dell/run-with-safety.yaml +4 -1
- llama_stack/distributions/nvidia/config.yaml +4 -1
- llama_stack/distributions/nvidia/doc_template.md +170 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
- llama_stack/distributions/oci/config.yaml +4 -1
- llama_stack/distributions/oci/doc_template.md +140 -0
- llama_stack/distributions/open-benchmark/config.yaml +9 -1
- llama_stack/distributions/postgres-demo/config.yaml +1 -1
- llama_stack/distributions/starter/build.yaml +62 -0
- llama_stack/distributions/starter/config.yaml +22 -3
- llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/starter/starter.py +13 -1
- llama_stack/distributions/starter-gpu/build.yaml +62 -0
- llama_stack/distributions/starter-gpu/config.yaml +22 -3
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/template.py +10 -2
- llama_stack/distributions/watsonx/config.yaml +4 -1
- llama_stack/log.py +1 -0
- llama_stack/models/llama/resources/dog.jpg +0 -0
- llama_stack/models/llama/resources/pasta.jpeg +0 -0
- llama_stack/models/llama/resources/small_dog.jpg +0 -0
- llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
- llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
- llama_stack/providers/inline/batches/reference/batches.py +2 -1
- llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
- llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
- llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
- llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
- llama_stack/providers/registry/agents.py +1 -0
- llama_stack/providers/registry/inference.py +1 -9
- llama_stack/providers/registry/vector_io.py +136 -16
- llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
- llama_stack/providers/remote/eval/nvidia/README.md +134 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
- llama_stack/providers/remote/files/s3/README.md +266 -0
- llama_stack/providers/remote/files/s3/config.py +5 -3
- llama_stack/providers/remote/files/s3/files.py +2 -2
- llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
- llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
- llama_stack/providers/remote/inference/openai/openai.py +2 -0
- llama_stack/providers/remote/inference/together/together.py +4 -0
- llama_stack/providers/remote/inference/vertexai/config.py +3 -3
- llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
- llama_stack/providers/remote/inference/vllm/config.py +37 -18
- llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
- llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
- llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
- llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
- llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
- llama_stack/providers/remote/safety/nvidia/README.md +78 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
- llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
- llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
- llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
- llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
- llama_stack/providers/remote/vector_io/oci/config.py +41 -0
- llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
- llama_stack/providers/utils/bedrock/client.py +3 -3
- llama_stack/providers/utils/bedrock/config.py +7 -7
- llama_stack/providers/utils/inference/__init__.py +0 -25
- llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
- llama_stack/providers/utils/inference/http_client.py +239 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
- llama_stack/providers/utils/inference/model_registry.py +148 -2
- llama_stack/providers/utils/inference/openai_compat.py +1 -158
- llama_stack/providers/utils/inference/openai_mixin.py +42 -2
- llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
- llama_stack/providers/utils/memory/vector_store.py +46 -19
- llama_stack/providers/utils/responses/responses_store.py +40 -6
- llama_stack/providers/utils/safety.py +114 -0
- llama_stack/providers/utils/tools/mcp.py +44 -3
- llama_stack/testing/api_recorder.py +9 -3
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
- llama_stack-0.5.0.dist-info/top_level.txt +1 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
- llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
- llama_stack/models/llama/hadamard_utils.py +0 -88
- llama_stack/models/llama/llama3/args.py +0 -74
- llama_stack/models/llama/llama3/generation.py +0 -378
- llama_stack/models/llama/llama3/model.py +0 -304
- llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
- llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
- llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
- llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
- llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama3/quantization/loader.py +0 -316
- llama_stack/models/llama/llama3_1/__init__.py +0 -12
- llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
- llama_stack/models/llama/llama3_1/prompts.py +0 -258
- llama_stack/models/llama/llama3_2/__init__.py +0 -5
- llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
- llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
- llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
- llama_stack/models/llama/llama3_3/__init__.py +0 -5
- llama_stack/models/llama/llama3_3/prompts.py +0 -259
- llama_stack/models/llama/llama4/args.py +0 -107
- llama_stack/models/llama/llama4/ffn.py +0 -58
- llama_stack/models/llama/llama4/moe.py +0 -214
- llama_stack/models/llama/llama4/preprocess.py +0 -435
- llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama4/quantization/loader.py +0 -226
- llama_stack/models/llama/llama4/vision/__init__.py +0 -5
- llama_stack/models/llama/llama4/vision/embedding.py +0 -210
- llama_stack/models/llama/llama4/vision/encoder.py +0 -412
- llama_stack/models/llama/quantize_impls.py +0 -316
- llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
- llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
- llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
- llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
- llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
- llama_stack-0.4.3.dist-info/top_level.txt +0 -2
- llama_stack_api/__init__.py +0 -945
- llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/admin/api.py +0 -72
- llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/admin/models.py +0 -113
- llama_stack_api/agents.py +0 -173
- llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/batches/api.py +0 -53
- llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/batches/models.py +0 -78
- llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/common/errors.py +0 -95
- llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/common/responses.py +0 -77
- llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/connectors.py +0 -146
- llama_stack_api/conversations.py +0 -270
- llama_stack_api/datasetio.py +0 -55
- llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/datatypes.py +0 -373
- llama_stack_api/eval.py +0 -137
- llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/files/api.py +0 -51
- llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/files/models.py +0 -107
- llama_stack_api/inference.py +0 -1169
- llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/__init__.py +0 -945
- llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/llama_stack_api/admin/api.py +0 -72
- llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/llama_stack_api/admin/models.py +0 -113
- llama_stack_api/llama_stack_api/agents.py +0 -173
- llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/llama_stack_api/batches/api.py +0 -53
- llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/llama_stack_api/batches/models.py +0 -78
- llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/llama_stack_api/common/errors.py +0 -95
- llama_stack_api/llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/llama_stack_api/common/responses.py +0 -77
- llama_stack_api/llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/llama_stack_api/connectors.py +0 -146
- llama_stack_api/llama_stack_api/conversations.py +0 -270
- llama_stack_api/llama_stack_api/datasetio.py +0 -55
- llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/llama_stack_api/datatypes.py +0 -373
- llama_stack_api/llama_stack_api/eval.py +0 -137
- llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/llama_stack_api/files/api.py +0 -51
- llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/llama_stack_api/files/models.py +0 -107
- llama_stack_api/llama_stack_api/inference.py +0 -1169
- llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
- llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/models.py +0 -171
- llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/llama_stack_api/post_training.py +0 -370
- llama_stack_api/llama_stack_api/prompts.py +0 -203
- llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/llama_stack_api/providers/api.py +0 -16
- llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/llama_stack_api/providers/models.py +0 -24
- llama_stack_api/llama_stack_api/py.typed +0 -0
- llama_stack_api/llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/llama_stack_api/resource.py +0 -37
- llama_stack_api/llama_stack_api/router_utils.py +0 -160
- llama_stack_api/llama_stack_api/safety.py +0 -132
- llama_stack_api/llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/llama_stack_api/scoring.py +0 -93
- llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/llama_stack_api/shields.py +0 -93
- llama_stack_api/llama_stack_api/tools.py +0 -226
- llama_stack_api/llama_stack_api/vector_io.py +0 -941
- llama_stack_api/llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/llama_stack_api/version.py +0 -9
- llama_stack_api/models.py +0 -171
- llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/post_training.py +0 -370
- llama_stack_api/prompts.py +0 -203
- llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/providers/api.py +0 -16
- llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/providers/models.py +0 -24
- llama_stack_api/py.typed +0 -0
- llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/resource.py +0 -37
- llama_stack_api/router_utils.py +0 -160
- llama_stack_api/safety.py +0 -132
- llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/scoring.py +0 -93
- llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/shields.py +0 -93
- llama_stack_api/tools.py +0 -226
- llama_stack_api/vector_io.py +0 -941
- llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/version.py +0 -9
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
2
|
+
# All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
|
+
# the root directory of this source tree.
|
|
6
|
+
|
|
7
|
+
import ssl
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import httpx
|
|
12
|
+
from openai._base_client import DefaultAsyncHttpxClient
|
|
13
|
+
|
|
14
|
+
from llama_stack.log import get_logger
|
|
15
|
+
from llama_stack.providers.utils.inference.model_registry import (
|
|
16
|
+
NetworkConfig,
|
|
17
|
+
ProxyConfig,
|
|
18
|
+
TimeoutConfig,
|
|
19
|
+
TLSConfig,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
logger = get_logger(name=__name__, category="providers::utils")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _build_ssl_context(tls_config: TLSConfig) -> ssl.SSLContext | bool | Path:
|
|
26
|
+
"""
|
|
27
|
+
Build an SSL context from TLS configuration.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
- ssl.SSLContext if advanced options (min_version, ciphers, or mTLS) are configured
|
|
31
|
+
- Path if only a CA bundle path is specified
|
|
32
|
+
- bool if only verify is specified as boolean
|
|
33
|
+
"""
|
|
34
|
+
has_advanced_options = (
|
|
35
|
+
tls_config.min_version is not None or tls_config.ciphers is not None or tls_config.client_cert is not None
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
if not has_advanced_options:
|
|
39
|
+
return tls_config.verify
|
|
40
|
+
|
|
41
|
+
ctx = ssl.create_default_context()
|
|
42
|
+
|
|
43
|
+
if isinstance(tls_config.verify, Path):
|
|
44
|
+
ctx.load_verify_locations(str(tls_config.verify))
|
|
45
|
+
elif not tls_config.verify:
|
|
46
|
+
ctx.check_hostname = False
|
|
47
|
+
ctx.verify_mode = ssl.CERT_NONE
|
|
48
|
+
|
|
49
|
+
if tls_config.min_version:
|
|
50
|
+
if tls_config.min_version == "TLSv1.2":
|
|
51
|
+
ctx.minimum_version = ssl.TLSVersion.TLSv1_2
|
|
52
|
+
elif tls_config.min_version == "TLSv1.3":
|
|
53
|
+
ctx.minimum_version = ssl.TLSVersion.TLSv1_3
|
|
54
|
+
|
|
55
|
+
if tls_config.ciphers:
|
|
56
|
+
ctx.set_ciphers(":".join(tls_config.ciphers))
|
|
57
|
+
|
|
58
|
+
if tls_config.client_cert and tls_config.client_key:
|
|
59
|
+
ctx.load_cert_chain(certfile=str(tls_config.client_cert), keyfile=str(tls_config.client_key))
|
|
60
|
+
|
|
61
|
+
return ctx
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _build_proxy_mounts(proxy_config: ProxyConfig) -> dict[str, httpx.AsyncHTTPTransport] | None:
|
|
65
|
+
"""
|
|
66
|
+
Build httpx proxy mounts from proxy configuration.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
Dictionary of proxy mounts for httpx, or None if no proxies configured
|
|
70
|
+
"""
|
|
71
|
+
transport_kwargs: dict[str, Any] = {}
|
|
72
|
+
if proxy_config.cacert:
|
|
73
|
+
# Convert Path to string for httpx
|
|
74
|
+
transport_kwargs["verify"] = str(proxy_config.cacert)
|
|
75
|
+
|
|
76
|
+
if proxy_config.url:
|
|
77
|
+
# Convert HttpUrl to string for httpx
|
|
78
|
+
proxy_url = str(proxy_config.url)
|
|
79
|
+
return {
|
|
80
|
+
"http://": httpx.AsyncHTTPTransport(proxy=proxy_url, **transport_kwargs),
|
|
81
|
+
"https://": httpx.AsyncHTTPTransport(proxy=proxy_url, **transport_kwargs),
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
mounts = {}
|
|
85
|
+
if proxy_config.http:
|
|
86
|
+
mounts["http://"] = httpx.AsyncHTTPTransport(proxy=str(proxy_config.http), **transport_kwargs)
|
|
87
|
+
if proxy_config.https:
|
|
88
|
+
mounts["https://"] = httpx.AsyncHTTPTransport(proxy=str(proxy_config.https), **transport_kwargs)
|
|
89
|
+
|
|
90
|
+
return mounts if mounts else None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _build_network_client_kwargs(network_config: NetworkConfig | None) -> dict[str, Any]:
|
|
94
|
+
"""
|
|
95
|
+
Build httpx.AsyncClient kwargs from network configuration.
|
|
96
|
+
|
|
97
|
+
This function creates the appropriate kwargs to pass to httpx.AsyncClient
|
|
98
|
+
based on the provided NetworkConfig, without creating the client itself.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
network_config: Network configuration including TLS, proxy, and timeout settings
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Dictionary of kwargs to pass to httpx.AsyncClient constructor
|
|
105
|
+
"""
|
|
106
|
+
if network_config is None:
|
|
107
|
+
return {}
|
|
108
|
+
|
|
109
|
+
client_kwargs: dict[str, Any] = {}
|
|
110
|
+
|
|
111
|
+
if network_config.tls:
|
|
112
|
+
ssl_context = _build_ssl_context(network_config.tls)
|
|
113
|
+
client_kwargs["verify"] = ssl_context
|
|
114
|
+
|
|
115
|
+
if network_config.proxy:
|
|
116
|
+
mounts = _build_proxy_mounts(network_config.proxy)
|
|
117
|
+
if mounts:
|
|
118
|
+
client_kwargs["mounts"] = mounts
|
|
119
|
+
|
|
120
|
+
if network_config.timeout is not None:
|
|
121
|
+
if isinstance(network_config.timeout, TimeoutConfig):
|
|
122
|
+
# httpx.Timeout requires all four parameters (connect, read, write, pool)
|
|
123
|
+
# to be set explicitly, or a default timeout value
|
|
124
|
+
timeout_kwargs: dict[str, float | None] = {
|
|
125
|
+
"connect": network_config.timeout.connect,
|
|
126
|
+
"read": network_config.timeout.read,
|
|
127
|
+
"write": None,
|
|
128
|
+
"pool": None,
|
|
129
|
+
}
|
|
130
|
+
client_kwargs["timeout"] = httpx.Timeout(**timeout_kwargs)
|
|
131
|
+
else:
|
|
132
|
+
client_kwargs["timeout"] = httpx.Timeout(network_config.timeout)
|
|
133
|
+
|
|
134
|
+
if network_config.headers:
|
|
135
|
+
client_kwargs["headers"] = network_config.headers
|
|
136
|
+
|
|
137
|
+
return client_kwargs
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _extract_client_config(existing_client: httpx.AsyncClient | DefaultAsyncHttpxClient) -> dict[str, Any]:
|
|
141
|
+
"""
|
|
142
|
+
Extract configuration (auth, headers) from an existing http_client.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
existing_client: Existing httpx client (may be DefaultAsyncHttpxClient)
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
Dictionary with extracted auth and headers, if available
|
|
149
|
+
"""
|
|
150
|
+
config: dict[str, Any] = {}
|
|
151
|
+
|
|
152
|
+
# Extract from DefaultAsyncHttpxClient
|
|
153
|
+
if isinstance(existing_client, DefaultAsyncHttpxClient):
|
|
154
|
+
underlying_client = existing_client._client # type: ignore[union-attr,attr-defined]
|
|
155
|
+
if hasattr(underlying_client, "_auth"):
|
|
156
|
+
config["auth"] = underlying_client._auth # type: ignore[attr-defined]
|
|
157
|
+
if hasattr(existing_client, "_headers"):
|
|
158
|
+
config["headers"] = existing_client._headers # type: ignore[attr-defined]
|
|
159
|
+
else:
|
|
160
|
+
# Extract from plain httpx.AsyncClient
|
|
161
|
+
if hasattr(existing_client, "_auth"):
|
|
162
|
+
config["auth"] = existing_client._auth # type: ignore[attr-defined]
|
|
163
|
+
if hasattr(existing_client, "_headers"):
|
|
164
|
+
config["headers"] = existing_client._headers # type: ignore[attr-defined]
|
|
165
|
+
|
|
166
|
+
return config
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _merge_network_config_into_client(
|
|
170
|
+
existing_client: httpx.AsyncClient | DefaultAsyncHttpxClient, network_config: NetworkConfig | None
|
|
171
|
+
) -> httpx.AsyncClient | DefaultAsyncHttpxClient:
|
|
172
|
+
"""
|
|
173
|
+
Merge network configuration into an existing http_client.
|
|
174
|
+
|
|
175
|
+
Extracts auth and headers from the existing client, merges with network config,
|
|
176
|
+
and creates a new client with all settings combined.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
existing_client: Existing httpx client (may be DefaultAsyncHttpxClient)
|
|
180
|
+
network_config: Network configuration to apply
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
New client with network config applied, or original client if merge fails
|
|
184
|
+
"""
|
|
185
|
+
if network_config is None:
|
|
186
|
+
return existing_client
|
|
187
|
+
|
|
188
|
+
network_kwargs = _build_network_client_kwargs(network_config)
|
|
189
|
+
if not network_kwargs:
|
|
190
|
+
return existing_client
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
# Extract existing client config (auth, headers)
|
|
194
|
+
existing_config = _extract_client_config(existing_client)
|
|
195
|
+
|
|
196
|
+
# Merge headers: existing headers first, then network config (network takes precedence)
|
|
197
|
+
if existing_config.get("headers") and network_kwargs.get("headers"):
|
|
198
|
+
merged_headers = dict(existing_config["headers"])
|
|
199
|
+
merged_headers.update(network_kwargs["headers"])
|
|
200
|
+
network_kwargs["headers"] = merged_headers
|
|
201
|
+
elif existing_config.get("headers"):
|
|
202
|
+
network_kwargs["headers"] = existing_config["headers"]
|
|
203
|
+
|
|
204
|
+
# Preserve auth from existing client
|
|
205
|
+
if existing_config.get("auth"):
|
|
206
|
+
network_kwargs["auth"] = existing_config["auth"]
|
|
207
|
+
|
|
208
|
+
# Create new client with merged config
|
|
209
|
+
new_client = httpx.AsyncClient(**network_kwargs)
|
|
210
|
+
|
|
211
|
+
# If original was DefaultAsyncHttpxClient, wrap the new client
|
|
212
|
+
if isinstance(existing_client, DefaultAsyncHttpxClient):
|
|
213
|
+
return DefaultAsyncHttpxClient(client=new_client, headers=network_kwargs.get("headers")) # type: ignore[call-arg]
|
|
214
|
+
|
|
215
|
+
return new_client
|
|
216
|
+
except Exception as e:
|
|
217
|
+
logger.debug(f"Could not merge network config into existing http_client: {e}. Using original client.")
|
|
218
|
+
return existing_client
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def build_http_client(network_config: NetworkConfig | None) -> dict[str, Any]:
|
|
222
|
+
"""
|
|
223
|
+
Build httpx.AsyncClient parameters from network configuration.
|
|
224
|
+
|
|
225
|
+
This function creates the appropriate kwargs to pass to httpx.AsyncClient
|
|
226
|
+
based on the provided NetworkConfig.
|
|
227
|
+
|
|
228
|
+
Args:
|
|
229
|
+
network_config: Network configuration including TLS, proxy, and timeout settings
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
Dictionary of kwargs to pass to httpx.AsyncClient constructor,
|
|
233
|
+
wrapped in {"http_client": AsyncClient(...)} for use with AsyncOpenAI
|
|
234
|
+
"""
|
|
235
|
+
network_kwargs = _build_network_client_kwargs(network_config)
|
|
236
|
+
if not network_kwargs:
|
|
237
|
+
return {}
|
|
238
|
+
|
|
239
|
+
return {"http_client": httpx.AsyncClient(**network_kwargs)}
|
|
@@ -30,6 +30,7 @@ from llama_stack_api import (
|
|
|
30
30
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
31
31
|
OpenAIEmbeddingsResponse,
|
|
32
32
|
OpenAIEmbeddingUsage,
|
|
33
|
+
validate_embeddings_input_is_text,
|
|
33
34
|
)
|
|
34
35
|
|
|
35
36
|
logger = get_logger(name=__name__, category="providers::utils")
|
|
@@ -146,6 +147,9 @@ class LiteLLMOpenAIMixin(
|
|
|
146
147
|
self,
|
|
147
148
|
params: OpenAIEmbeddingsRequestWithExtraBody,
|
|
148
149
|
) -> OpenAIEmbeddingsResponse:
|
|
150
|
+
# Validate that input contains only text, not token arrays
|
|
151
|
+
validate_embeddings_input_is_text(params)
|
|
152
|
+
|
|
149
153
|
if not self.model_store:
|
|
150
154
|
raise ValueError("Model store is not initialized")
|
|
151
155
|
|
|
@@ -270,6 +274,8 @@ class LiteLLMOpenAIMixin(
|
|
|
270
274
|
top_logprobs=params.top_logprobs,
|
|
271
275
|
top_p=params.top_p,
|
|
272
276
|
user=params.user,
|
|
277
|
+
safety_identifier=params.safety_identifier,
|
|
278
|
+
reasoning_effort=params.reasoning_effort,
|
|
273
279
|
api_key=self.get_api_key(),
|
|
274
280
|
api_base=self.api_base,
|
|
275
281
|
**self._litellm_extra_request_params(params),
|
|
@@ -4,9 +4,10 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
-
from
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Literal
|
|
8
9
|
|
|
9
|
-
from pydantic import BaseModel, Field, SecretStr
|
|
10
|
+
from pydantic import BaseModel, Field, HttpUrl, SecretStr, field_validator, model_validator
|
|
10
11
|
|
|
11
12
|
from llama_stack.log import get_logger
|
|
12
13
|
from llama_stack.providers.utils.inference import (
|
|
@@ -17,6 +18,147 @@ from llama_stack_api import Model, ModelsProtocolPrivate, ModelType, Unsupported
|
|
|
17
18
|
logger = get_logger(name=__name__, category="providers::utils")
|
|
18
19
|
|
|
19
20
|
|
|
21
|
+
class TLSConfig(BaseModel):
|
|
22
|
+
"""TLS/SSL configuration for secure connections."""
|
|
23
|
+
|
|
24
|
+
verify: bool | Path = Field(
|
|
25
|
+
default=True,
|
|
26
|
+
description="Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file.",
|
|
27
|
+
)
|
|
28
|
+
min_version: Literal["TLSv1.2", "TLSv1.3"] | None = Field(
|
|
29
|
+
default=None,
|
|
30
|
+
description="Minimum TLS version to use. Defaults to system default if not specified.",
|
|
31
|
+
)
|
|
32
|
+
ciphers: list[str] | None = Field(
|
|
33
|
+
default=None,
|
|
34
|
+
description="List of allowed cipher suites (e.g., ['ECDHE+AESGCM', 'DHE+AESGCM']).",
|
|
35
|
+
)
|
|
36
|
+
client_cert: Path | None = Field(
|
|
37
|
+
default=None,
|
|
38
|
+
description="Path to client certificate file for mTLS authentication.",
|
|
39
|
+
)
|
|
40
|
+
client_key: Path | None = Field(
|
|
41
|
+
default=None,
|
|
42
|
+
description="Path to client private key file for mTLS authentication.",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
@field_validator("verify", mode="before")
|
|
46
|
+
@classmethod
|
|
47
|
+
def validate_verify(cls, v: bool | str | Path) -> bool | Path:
|
|
48
|
+
if isinstance(v, bool):
|
|
49
|
+
return v
|
|
50
|
+
if isinstance(v, str):
|
|
51
|
+
cert_path = Path(v).expanduser().resolve()
|
|
52
|
+
else:
|
|
53
|
+
cert_path = v.expanduser().resolve()
|
|
54
|
+
if not cert_path.exists():
|
|
55
|
+
raise ValueError(f"TLS certificate file does not exist: {v}")
|
|
56
|
+
if not cert_path.is_file():
|
|
57
|
+
raise ValueError(f"TLS certificate path is not a file: {v}")
|
|
58
|
+
return cert_path
|
|
59
|
+
|
|
60
|
+
@field_validator("client_cert", "client_key", mode="before")
|
|
61
|
+
@classmethod
|
|
62
|
+
def validate_cert_paths(cls, v: str | Path | None) -> Path | None:
|
|
63
|
+
if v is None:
|
|
64
|
+
return None
|
|
65
|
+
if isinstance(v, str):
|
|
66
|
+
cert_path = Path(v).expanduser().resolve()
|
|
67
|
+
else:
|
|
68
|
+
cert_path = v.expanduser().resolve()
|
|
69
|
+
if not cert_path.exists():
|
|
70
|
+
raise ValueError(f"Certificate/key file does not exist: {v}")
|
|
71
|
+
if not cert_path.is_file():
|
|
72
|
+
raise ValueError(f"Certificate/key path is not a file: {v}")
|
|
73
|
+
return cert_path
|
|
74
|
+
|
|
75
|
+
@model_validator(mode="after")
|
|
76
|
+
def validate_mtls_pair(self) -> "TLSConfig":
|
|
77
|
+
if (self.client_cert is None) != (self.client_key is None):
|
|
78
|
+
raise ValueError("Both client_cert and client_key must be provided together for mTLS")
|
|
79
|
+
return self
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ProxyConfig(BaseModel):
|
|
83
|
+
"""Proxy configuration for HTTP connections."""
|
|
84
|
+
|
|
85
|
+
url: HttpUrl | None = Field(
|
|
86
|
+
default=None,
|
|
87
|
+
description="Single proxy URL for all connections (e.g., 'http://proxy.example.com:8080').",
|
|
88
|
+
)
|
|
89
|
+
http: HttpUrl | None = Field(
|
|
90
|
+
default=None,
|
|
91
|
+
description="Proxy URL for HTTP connections.",
|
|
92
|
+
)
|
|
93
|
+
https: HttpUrl | None = Field(
|
|
94
|
+
default=None,
|
|
95
|
+
description="Proxy URL for HTTPS connections.",
|
|
96
|
+
)
|
|
97
|
+
cacert: Path | None = Field(
|
|
98
|
+
default=None,
|
|
99
|
+
description="Path to CA certificate file for verifying the proxy's certificate. Required for proxies in interception mode.",
|
|
100
|
+
)
|
|
101
|
+
no_proxy: list[str] | None = Field(
|
|
102
|
+
default=None,
|
|
103
|
+
description="List of hosts that should bypass the proxy (e.g., ['localhost', '127.0.0.1', '.internal.corp']).",
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
@field_validator("cacert", mode="before")
|
|
107
|
+
@classmethod
|
|
108
|
+
def validate_cacert(cls, v: str | Path | None) -> Path | None:
|
|
109
|
+
if v is None:
|
|
110
|
+
return None
|
|
111
|
+
if isinstance(v, str):
|
|
112
|
+
cert_path = Path(v).expanduser().resolve()
|
|
113
|
+
else:
|
|
114
|
+
cert_path = v.expanduser().resolve()
|
|
115
|
+
if not cert_path.exists():
|
|
116
|
+
raise ValueError(f"Proxy CA certificate file does not exist: {v}")
|
|
117
|
+
if not cert_path.is_file():
|
|
118
|
+
raise ValueError(f"Proxy CA certificate path is not a file: {v}")
|
|
119
|
+
return cert_path
|
|
120
|
+
|
|
121
|
+
@model_validator(mode="after")
|
|
122
|
+
def validate_proxy_config(self) -> "ProxyConfig":
|
|
123
|
+
if self.url and (self.http or self.https):
|
|
124
|
+
raise ValueError("Cannot specify both 'url' and 'http'/'https' proxy settings")
|
|
125
|
+
return self
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class TimeoutConfig(BaseModel):
|
|
129
|
+
"""Timeout configuration for HTTP connections."""
|
|
130
|
+
|
|
131
|
+
connect: float | None = Field(
|
|
132
|
+
default=None,
|
|
133
|
+
description="Connection timeout in seconds.",
|
|
134
|
+
)
|
|
135
|
+
read: float | None = Field(
|
|
136
|
+
default=None,
|
|
137
|
+
description="Read timeout in seconds.",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class NetworkConfig(BaseModel):
|
|
142
|
+
"""Network configuration for remote provider connections."""
|
|
143
|
+
|
|
144
|
+
tls: TLSConfig | None = Field(
|
|
145
|
+
default=None,
|
|
146
|
+
description="TLS/SSL configuration for secure connections.",
|
|
147
|
+
)
|
|
148
|
+
proxy: ProxyConfig | None = Field(
|
|
149
|
+
default=None,
|
|
150
|
+
description="Proxy configuration for HTTP connections.",
|
|
151
|
+
)
|
|
152
|
+
timeout: float | TimeoutConfig | None = Field(
|
|
153
|
+
default=None,
|
|
154
|
+
description="Timeout configuration. Can be a float (for both connect and read) or a TimeoutConfig object with separate connect and read timeouts.",
|
|
155
|
+
)
|
|
156
|
+
headers: dict[str, str] | None = Field(
|
|
157
|
+
default=None,
|
|
158
|
+
description="Additional HTTP headers to include in all requests.",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
|
|
20
162
|
class RemoteInferenceProviderConfig(BaseModel):
|
|
21
163
|
allowed_models: list[str] | None = Field(
|
|
22
164
|
default=None,
|
|
@@ -31,6 +173,10 @@ class RemoteInferenceProviderConfig(BaseModel):
|
|
|
31
173
|
description="Authentication credential for the provider",
|
|
32
174
|
alias="api_key",
|
|
33
175
|
)
|
|
176
|
+
network: NetworkConfig | None = Field(
|
|
177
|
+
default=None,
|
|
178
|
+
description="Network configuration including TLS, proxy, and timeout settings.",
|
|
179
|
+
)
|
|
34
180
|
|
|
35
181
|
|
|
36
182
|
# TODO: this class is more confusing than useful right now. We need to make it
|
|
@@ -7,144 +7,14 @@ from typing import (
|
|
|
7
7
|
Any,
|
|
8
8
|
)
|
|
9
9
|
|
|
10
|
-
from openai.types.chat import (
|
|
11
|
-
ChatCompletionMessageToolCall,
|
|
12
|
-
)
|
|
13
10
|
from pydantic import BaseModel
|
|
14
11
|
|
|
15
12
|
from llama_stack.log import get_logger
|
|
16
|
-
from llama_stack.models.llama.datatypes import
|
|
17
|
-
BuiltinTool,
|
|
18
|
-
StopReason,
|
|
19
|
-
ToolCall,
|
|
20
|
-
ToolDefinition,
|
|
21
|
-
)
|
|
13
|
+
from llama_stack.models.llama.datatypes import BuiltinTool, ToolDefinition
|
|
22
14
|
|
|
23
15
|
logger = get_logger(name=__name__, category="providers::utils")
|
|
24
16
|
|
|
25
17
|
|
|
26
|
-
class OpenAICompatCompletionChoiceDelta(BaseModel):
|
|
27
|
-
content: str
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class OpenAICompatLogprobs(BaseModel):
|
|
31
|
-
text_offset: list[int] | None = None
|
|
32
|
-
|
|
33
|
-
token_logprobs: list[float] | None = None
|
|
34
|
-
|
|
35
|
-
tokens: list[str] | None = None
|
|
36
|
-
|
|
37
|
-
top_logprobs: list[dict[str, float]] | None = None
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class OpenAICompatCompletionChoice(BaseModel):
|
|
41
|
-
finish_reason: str | None = None
|
|
42
|
-
text: str | None = None
|
|
43
|
-
delta: OpenAICompatCompletionChoiceDelta | None = None
|
|
44
|
-
logprobs: OpenAICompatLogprobs | None = None
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
class OpenAICompatCompletionResponse(BaseModel):
|
|
48
|
-
choices: list[OpenAICompatCompletionChoice]
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def text_from_choice(choice) -> str:
|
|
52
|
-
if hasattr(choice, "delta") and choice.delta:
|
|
53
|
-
return choice.delta.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations
|
|
54
|
-
|
|
55
|
-
if hasattr(choice, "message"):
|
|
56
|
-
return choice.message.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations
|
|
57
|
-
|
|
58
|
-
return choice.text # type: ignore[no-any-return] # external OpenAI types lack precise annotations
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def get_stop_reason(finish_reason: str) -> StopReason:
|
|
62
|
-
if finish_reason in ["stop", "eos"]:
|
|
63
|
-
return StopReason.end_of_turn
|
|
64
|
-
elif finish_reason == "eom":
|
|
65
|
-
return StopReason.end_of_message
|
|
66
|
-
elif finish_reason == "length":
|
|
67
|
-
return StopReason.out_of_tokens
|
|
68
|
-
|
|
69
|
-
return StopReason.out_of_tokens
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
class UnparseableToolCall(BaseModel):
|
|
73
|
-
"""
|
|
74
|
-
A ToolCall with arguments that are not valid JSON.
|
|
75
|
-
Mirrors the ToolCall schema, but with arguments as a string.
|
|
76
|
-
"""
|
|
77
|
-
|
|
78
|
-
call_id: str = ""
|
|
79
|
-
tool_name: str = ""
|
|
80
|
-
arguments: str = ""
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def convert_tool_call(
|
|
84
|
-
tool_call: ChatCompletionMessageToolCall,
|
|
85
|
-
) -> ToolCall | UnparseableToolCall:
|
|
86
|
-
"""
|
|
87
|
-
Convert a ChatCompletionMessageToolCall tool call to either a
|
|
88
|
-
ToolCall or UnparseableToolCall. Returns an UnparseableToolCall
|
|
89
|
-
if the tool call is not valid ToolCall.
|
|
90
|
-
"""
|
|
91
|
-
try:
|
|
92
|
-
valid_tool_call = ToolCall(
|
|
93
|
-
call_id=tool_call.id,
|
|
94
|
-
tool_name=tool_call.function.name,
|
|
95
|
-
arguments=tool_call.function.arguments,
|
|
96
|
-
)
|
|
97
|
-
except Exception:
|
|
98
|
-
return UnparseableToolCall(
|
|
99
|
-
call_id=tool_call.id or "",
|
|
100
|
-
tool_name=tool_call.function.name or "",
|
|
101
|
-
arguments=tool_call.function.arguments or "",
|
|
102
|
-
)
|
|
103
|
-
|
|
104
|
-
return valid_tool_call
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
PYTHON_TYPE_TO_LITELLM_TYPE = {
|
|
108
|
-
"int": "integer",
|
|
109
|
-
"float": "number",
|
|
110
|
-
"bool": "boolean",
|
|
111
|
-
"str": "string",
|
|
112
|
-
}
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def to_openai_param_type(param_type: str) -> dict:
|
|
116
|
-
"""
|
|
117
|
-
Convert Python type hints to OpenAI parameter type format.
|
|
118
|
-
|
|
119
|
-
Examples:
|
|
120
|
-
'str' -> {'type': 'string'}
|
|
121
|
-
'int' -> {'type': 'integer'}
|
|
122
|
-
'list[str]' -> {'type': 'array', 'items': {'type': 'string'}}
|
|
123
|
-
'list[int]' -> {'type': 'array', 'items': {'type': 'integer'}}
|
|
124
|
-
"""
|
|
125
|
-
# Handle basic types first
|
|
126
|
-
basic_types = {
|
|
127
|
-
"str": "string",
|
|
128
|
-
"int": "integer",
|
|
129
|
-
"float": "number",
|
|
130
|
-
"bool": "boolean",
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
if param_type in basic_types:
|
|
134
|
-
return {"type": basic_types[param_type]}
|
|
135
|
-
|
|
136
|
-
# Handle list/array types
|
|
137
|
-
if param_type.startswith("list[") and param_type.endswith("]"):
|
|
138
|
-
inner_type = param_type[5:-1]
|
|
139
|
-
if inner_type in basic_types:
|
|
140
|
-
return {
|
|
141
|
-
"type": "array",
|
|
142
|
-
"items": {"type": basic_types.get(inner_type, inner_type)},
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
return {"type": param_type}
|
|
146
|
-
|
|
147
|
-
|
|
148
18
|
def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
|
|
149
19
|
"""
|
|
150
20
|
Convert a ToolDefinition to an OpenAI API-compatible dictionary.
|
|
@@ -210,33 +80,6 @@ async def prepare_openai_completion_params(**params):
|
|
|
210
80
|
return completion_params
|
|
211
81
|
|
|
212
82
|
|
|
213
|
-
def prepare_openai_embeddings_params(
|
|
214
|
-
model: str,
|
|
215
|
-
input: str | list[str],
|
|
216
|
-
encoding_format: str | None = "float",
|
|
217
|
-
dimensions: int | None = None,
|
|
218
|
-
user: str | None = None,
|
|
219
|
-
):
|
|
220
|
-
if model is None:
|
|
221
|
-
raise ValueError("Model must be provided for embeddings")
|
|
222
|
-
|
|
223
|
-
input_list = [input] if isinstance(input, str) else input
|
|
224
|
-
|
|
225
|
-
params: dict[str, Any] = {
|
|
226
|
-
"model": model,
|
|
227
|
-
"input": input_list,
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
if encoding_format is not None:
|
|
231
|
-
params["encoding_format"] = encoding_format
|
|
232
|
-
if dimensions is not None:
|
|
233
|
-
params["dimensions"] = dimensions
|
|
234
|
-
if user is not None:
|
|
235
|
-
params["user"] = user
|
|
236
|
-
|
|
237
|
-
return params
|
|
238
|
-
|
|
239
|
-
|
|
240
83
|
def get_stream_options_for_telemetry(
|
|
241
84
|
stream_options: dict[str, Any] | None,
|
|
242
85
|
is_streaming: bool,
|