llama-stack 0.4.3__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/_list_deps.py +11 -7
- llama_stack/cli/stack/run.py +3 -25
- llama_stack/core/access_control/datatypes.py +78 -0
- llama_stack/core/configure.py +2 -2
- {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
- llama_stack/core/connectors/connectors.py +162 -0
- llama_stack/core/conversations/conversations.py +61 -58
- llama_stack/core/datatypes.py +54 -8
- llama_stack/core/library_client.py +60 -13
- llama_stack/core/prompts/prompts.py +43 -42
- llama_stack/core/routers/datasets.py +20 -17
- llama_stack/core/routers/eval_scoring.py +143 -53
- llama_stack/core/routers/inference.py +20 -9
- llama_stack/core/routers/safety.py +30 -42
- llama_stack/core/routers/vector_io.py +15 -7
- llama_stack/core/routing_tables/models.py +42 -3
- llama_stack/core/routing_tables/scoring_functions.py +19 -19
- llama_stack/core/routing_tables/shields.py +20 -17
- llama_stack/core/routing_tables/vector_stores.py +8 -5
- llama_stack/core/server/auth.py +192 -17
- llama_stack/core/server/fastapi_router_registry.py +40 -5
- llama_stack/core/server/server.py +24 -5
- llama_stack/core/stack.py +54 -10
- llama_stack/core/storage/datatypes.py +9 -0
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/exec.py +2 -2
- llama_stack/core/utils/type_inspection.py +16 -2
- llama_stack/distributions/dell/config.yaml +4 -1
- llama_stack/distributions/dell/doc_template.md +209 -0
- llama_stack/distributions/dell/run-with-safety.yaml +4 -1
- llama_stack/distributions/nvidia/config.yaml +4 -1
- llama_stack/distributions/nvidia/doc_template.md +170 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
- llama_stack/distributions/oci/config.yaml +4 -1
- llama_stack/distributions/oci/doc_template.md +140 -0
- llama_stack/distributions/open-benchmark/config.yaml +9 -1
- llama_stack/distributions/postgres-demo/config.yaml +1 -1
- llama_stack/distributions/starter/build.yaml +62 -0
- llama_stack/distributions/starter/config.yaml +22 -3
- llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/starter/starter.py +13 -1
- llama_stack/distributions/starter-gpu/build.yaml +62 -0
- llama_stack/distributions/starter-gpu/config.yaml +22 -3
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/template.py +10 -2
- llama_stack/distributions/watsonx/config.yaml +4 -1
- llama_stack/log.py +1 -0
- llama_stack/models/llama/resources/dog.jpg +0 -0
- llama_stack/models/llama/resources/pasta.jpeg +0 -0
- llama_stack/models/llama/resources/small_dog.jpg +0 -0
- llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +187 -60
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
- llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
- llama_stack/providers/inline/batches/reference/batches.py +2 -1
- llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
- llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
- llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
- llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
- llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
- llama_stack/providers/registry/agents.py +1 -0
- llama_stack/providers/registry/inference.py +1 -9
- llama_stack/providers/registry/vector_io.py +136 -16
- llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
- llama_stack/providers/remote/eval/nvidia/README.md +134 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
- llama_stack/providers/remote/files/s3/README.md +266 -0
- llama_stack/providers/remote/files/s3/config.py +5 -3
- llama_stack/providers/remote/files/s3/files.py +2 -2
- llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
- llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
- llama_stack/providers/remote/inference/openai/openai.py +2 -0
- llama_stack/providers/remote/inference/together/together.py +4 -0
- llama_stack/providers/remote/inference/vertexai/config.py +3 -3
- llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
- llama_stack/providers/remote/inference/vllm/config.py +37 -18
- llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
- llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
- llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
- llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
- llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
- llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
- llama_stack/providers/remote/safety/nvidia/README.md +78 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
- llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
- llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
- llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
- llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
- llama_stack/providers/remote/vector_io/oci/config.py +41 -0
- llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
- llama_stack/providers/utils/bedrock/client.py +3 -3
- llama_stack/providers/utils/bedrock/config.py +7 -7
- llama_stack/providers/utils/inference/__init__.py +0 -25
- llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
- llama_stack/providers/utils/inference/http_client.py +239 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
- llama_stack/providers/utils/inference/model_registry.py +148 -2
- llama_stack/providers/utils/inference/openai_compat.py +1 -158
- llama_stack/providers/utils/inference/openai_mixin.py +42 -2
- llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
- llama_stack/providers/utils/memory/vector_store.py +46 -19
- llama_stack/providers/utils/responses/responses_store.py +40 -6
- llama_stack/providers/utils/safety.py +114 -0
- llama_stack/providers/utils/tools/mcp.py +44 -3
- llama_stack/testing/api_recorder.py +9 -3
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/RECORD +135 -279
- llama_stack-0.5.0.dist-info/top_level.txt +1 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
- llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
- llama_stack/models/llama/hadamard_utils.py +0 -88
- llama_stack/models/llama/llama3/args.py +0 -74
- llama_stack/models/llama/llama3/generation.py +0 -378
- llama_stack/models/llama/llama3/model.py +0 -304
- llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
- llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
- llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
- llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
- llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama3/quantization/loader.py +0 -316
- llama_stack/models/llama/llama3_1/__init__.py +0 -12
- llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
- llama_stack/models/llama/llama3_1/prompts.py +0 -258
- llama_stack/models/llama/llama3_2/__init__.py +0 -5
- llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
- llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
- llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
- llama_stack/models/llama/llama3_3/__init__.py +0 -5
- llama_stack/models/llama/llama3_3/prompts.py +0 -259
- llama_stack/models/llama/llama4/args.py +0 -107
- llama_stack/models/llama/llama4/ffn.py +0 -58
- llama_stack/models/llama/llama4/moe.py +0 -214
- llama_stack/models/llama/llama4/preprocess.py +0 -435
- llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama4/quantization/loader.py +0 -226
- llama_stack/models/llama/llama4/vision/__init__.py +0 -5
- llama_stack/models/llama/llama4/vision/embedding.py +0 -210
- llama_stack/models/llama/llama4/vision/encoder.py +0 -412
- llama_stack/models/llama/quantize_impls.py +0 -316
- llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
- llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
- llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
- llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
- llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
- llama_stack-0.4.3.dist-info/top_level.txt +0 -2
- llama_stack_api/__init__.py +0 -945
- llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/admin/api.py +0 -72
- llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/admin/models.py +0 -113
- llama_stack_api/agents.py +0 -173
- llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/batches/api.py +0 -53
- llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/batches/models.py +0 -78
- llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/common/errors.py +0 -95
- llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/common/responses.py +0 -77
- llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/connectors.py +0 -146
- llama_stack_api/conversations.py +0 -270
- llama_stack_api/datasetio.py +0 -55
- llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/datatypes.py +0 -373
- llama_stack_api/eval.py +0 -137
- llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/files/api.py +0 -51
- llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/files/models.py +0 -107
- llama_stack_api/inference.py +0 -1169
- llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/__init__.py +0 -945
- llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/llama_stack_api/admin/api.py +0 -72
- llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/llama_stack_api/admin/models.py +0 -113
- llama_stack_api/llama_stack_api/agents.py +0 -173
- llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/llama_stack_api/batches/api.py +0 -53
- llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/llama_stack_api/batches/models.py +0 -78
- llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/llama_stack_api/common/errors.py +0 -95
- llama_stack_api/llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/llama_stack_api/common/responses.py +0 -77
- llama_stack_api/llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/llama_stack_api/connectors.py +0 -146
- llama_stack_api/llama_stack_api/conversations.py +0 -270
- llama_stack_api/llama_stack_api/datasetio.py +0 -55
- llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/llama_stack_api/datatypes.py +0 -373
- llama_stack_api/llama_stack_api/eval.py +0 -137
- llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/llama_stack_api/files/api.py +0 -51
- llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/llama_stack_api/files/models.py +0 -107
- llama_stack_api/llama_stack_api/inference.py +0 -1169
- llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
- llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/models.py +0 -171
- llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/llama_stack_api/post_training.py +0 -370
- llama_stack_api/llama_stack_api/prompts.py +0 -203
- llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/llama_stack_api/providers/api.py +0 -16
- llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/llama_stack_api/providers/models.py +0 -24
- llama_stack_api/llama_stack_api/py.typed +0 -0
- llama_stack_api/llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/llama_stack_api/resource.py +0 -37
- llama_stack_api/llama_stack_api/router_utils.py +0 -160
- llama_stack_api/llama_stack_api/safety.py +0 -132
- llama_stack_api/llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/llama_stack_api/scoring.py +0 -93
- llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/llama_stack_api/shields.py +0 -93
- llama_stack_api/llama_stack_api/tools.py +0 -226
- llama_stack_api/llama_stack_api/vector_io.py +0 -941
- llama_stack_api/llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/llama_stack_api/version.py +0 -9
- llama_stack_api/models.py +0 -171
- llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/post_training.py +0 -370
- llama_stack_api/prompts.py +0 -203
- llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/providers/api.py +0 -16
- llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/providers/models.py +0 -24
- llama_stack_api/py.typed +0 -0
- llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/resource.py +0 -37
- llama_stack_api/router_utils.py +0 -160
- llama_stack_api/safety.py +0 -132
- llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/scoring.py +0 -93
- llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/shields.py +0 -93
- llama_stack_api/tools.py +0 -226
- llama_stack_api/vector_io.py +0 -941
- llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/version.py +0 -9
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -11,15 +11,19 @@ from llama_stack.providers.utils.inference.model_registry import ModelRegistryHe
|
|
|
11
11
|
from llama_stack_api import (
|
|
12
12
|
Agents,
|
|
13
13
|
Benchmark,
|
|
14
|
-
BenchmarkConfig,
|
|
15
14
|
BenchmarksProtocolPrivate,
|
|
16
15
|
DatasetIO,
|
|
17
16
|
Datasets,
|
|
18
17
|
Eval,
|
|
19
18
|
EvaluateResponse,
|
|
19
|
+
EvaluateRowsRequest,
|
|
20
20
|
Inference,
|
|
21
21
|
Job,
|
|
22
|
+
JobCancelRequest,
|
|
23
|
+
JobResultRequest,
|
|
22
24
|
JobStatus,
|
|
25
|
+
JobStatusRequest,
|
|
26
|
+
RunEvalRequest,
|
|
23
27
|
Scoring,
|
|
24
28
|
ScoringResult,
|
|
25
29
|
)
|
|
@@ -91,21 +95,20 @@ class NVIDIAEvalImpl(
|
|
|
91
95
|
|
|
92
96
|
async def run_eval(
|
|
93
97
|
self,
|
|
94
|
-
|
|
95
|
-
benchmark_config: BenchmarkConfig,
|
|
98
|
+
request: RunEvalRequest,
|
|
96
99
|
) -> Job:
|
|
97
100
|
"""Run an evaluation job for a benchmark."""
|
|
98
101
|
model = (
|
|
99
|
-
benchmark_config.eval_candidate.model
|
|
100
|
-
if benchmark_config.eval_candidate.type == "model"
|
|
101
|
-
else benchmark_config.eval_candidate.config.model
|
|
102
|
+
request.benchmark_config.eval_candidate.model
|
|
103
|
+
if request.benchmark_config.eval_candidate.type == "model"
|
|
104
|
+
else request.benchmark_config.eval_candidate.config.model
|
|
102
105
|
)
|
|
103
106
|
nvidia_model = self.get_provider_model_id(model) or model
|
|
104
107
|
|
|
105
108
|
result = await self._evaluator_post(
|
|
106
109
|
"/v1/evaluation/jobs",
|
|
107
110
|
{
|
|
108
|
-
"config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
|
|
111
|
+
"config": f"{DEFAULT_NAMESPACE}/{request.benchmark_id}",
|
|
109
112
|
"target": {"type": "model", "model": nvidia_model},
|
|
110
113
|
},
|
|
111
114
|
)
|
|
@@ -114,20 +117,17 @@ class NVIDIAEvalImpl(
|
|
|
114
117
|
|
|
115
118
|
async def evaluate_rows(
|
|
116
119
|
self,
|
|
117
|
-
|
|
118
|
-
input_rows: list[dict[str, Any]],
|
|
119
|
-
scoring_functions: list[str],
|
|
120
|
-
benchmark_config: BenchmarkConfig,
|
|
120
|
+
request: EvaluateRowsRequest,
|
|
121
121
|
) -> EvaluateResponse:
|
|
122
122
|
raise NotImplementedError()
|
|
123
123
|
|
|
124
|
-
async def job_status(self,
|
|
124
|
+
async def job_status(self, request: JobStatusRequest) -> Job:
|
|
125
125
|
"""Get the status of an evaluation job.
|
|
126
126
|
|
|
127
127
|
EvaluatorStatus: "created", "pending", "running", "cancelled", "cancelling", "failed", "completed".
|
|
128
128
|
JobStatus: "scheduled", "in_progress", "completed", "cancelled", "failed"
|
|
129
129
|
"""
|
|
130
|
-
result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}")
|
|
130
|
+
result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}")
|
|
131
131
|
result_status = result["status"]
|
|
132
132
|
|
|
133
133
|
job_status = JobStatus.failed
|
|
@@ -140,27 +140,28 @@ class NVIDIAEvalImpl(
|
|
|
140
140
|
elif result_status in ["cancelled"]:
|
|
141
141
|
job_status = JobStatus.cancelled
|
|
142
142
|
|
|
143
|
-
return Job(job_id=job_id, status=job_status)
|
|
143
|
+
return Job(job_id=request.job_id, status=job_status)
|
|
144
144
|
|
|
145
|
-
async def job_cancel(self,
|
|
145
|
+
async def job_cancel(self, request: JobCancelRequest) -> None:
|
|
146
146
|
"""Cancel the evaluation job."""
|
|
147
|
-
await self._evaluator_post(f"/v1/evaluation/jobs/{job_id}/cancel", {})
|
|
147
|
+
await self._evaluator_post(f"/v1/evaluation/jobs/{request.job_id}/cancel", {})
|
|
148
148
|
|
|
149
|
-
async def job_result(self,
|
|
149
|
+
async def job_result(self, request: JobResultRequest) -> EvaluateResponse:
|
|
150
150
|
"""Returns the results of the evaluation job."""
|
|
151
151
|
|
|
152
|
-
|
|
152
|
+
job_status_request = JobStatusRequest(benchmark_id=request.benchmark_id, job_id=request.job_id)
|
|
153
|
+
job = await self.job_status(job_status_request)
|
|
153
154
|
status = job.status
|
|
154
155
|
if not status or status != JobStatus.completed:
|
|
155
|
-
raise ValueError(f"Job {job_id} not completed. Status: {status.value}")
|
|
156
|
+
raise ValueError(f"Job {request.job_id} not completed. Status: {status.value}")
|
|
156
157
|
|
|
157
|
-
result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}/results")
|
|
158
|
+
result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}/results")
|
|
158
159
|
|
|
159
160
|
return EvaluateResponse(
|
|
160
161
|
# TODO: these are stored in detailed results on NeMo Evaluator side; can be added
|
|
161
162
|
generations=[],
|
|
162
163
|
scores={
|
|
163
|
-
benchmark_id: ScoringResult(
|
|
164
|
+
request.benchmark_id: ScoringResult(
|
|
164
165
|
score_rows=[],
|
|
165
166
|
aggregated_results=result,
|
|
166
167
|
)
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
# S3 Files Provider
|
|
2
|
+
|
|
3
|
+
A remote S3-based implementation of the Llama Stack Files API that provides scalable cloud file storage with metadata persistence.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **AWS S3 Storage**: Store files in AWS S3 buckets for scalable, durable storage
|
|
8
|
+
- **Metadata Management**: Uses SQL database for efficient file metadata queries
|
|
9
|
+
- **OpenAI API Compatibility**: Full compatibility with OpenAI Files API endpoints
|
|
10
|
+
- **Flexible Authentication**: Support for IAM roles and access keys
|
|
11
|
+
- **Custom S3 Endpoints**: Support for MinIO and other S3-compatible services
|
|
12
|
+
|
|
13
|
+
## Configuration
|
|
14
|
+
|
|
15
|
+
### Basic Configuration
|
|
16
|
+
|
|
17
|
+
```yaml
|
|
18
|
+
api: files
|
|
19
|
+
provider_type: remote::s3
|
|
20
|
+
config:
|
|
21
|
+
bucket_name: my-llama-stack-files
|
|
22
|
+
region: us-east-1
|
|
23
|
+
metadata_store:
|
|
24
|
+
type: sqlite
|
|
25
|
+
db_path: ./s3_files_metadata.db
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### Advanced Configuration
|
|
29
|
+
|
|
30
|
+
```yaml
|
|
31
|
+
api: files
|
|
32
|
+
provider_type: remote::s3
|
|
33
|
+
config:
|
|
34
|
+
bucket_name: my-llama-stack-files
|
|
35
|
+
region: us-east-1
|
|
36
|
+
aws_access_key_id: YOUR_ACCESS_KEY
|
|
37
|
+
aws_secret_access_key: YOUR_SECRET_KEY
|
|
38
|
+
endpoint_url: https://s3.amazonaws.com # Optional for custom endpoints
|
|
39
|
+
metadata_store:
|
|
40
|
+
type: sqlite
|
|
41
|
+
db_path: ./s3_files_metadata.db
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Environment Variables
|
|
45
|
+
|
|
46
|
+
The configuration supports environment variable substitution:
|
|
47
|
+
|
|
48
|
+
```yaml
|
|
49
|
+
config:
|
|
50
|
+
bucket_name: "${env.S3_BUCKET_NAME}"
|
|
51
|
+
region: "${env.AWS_REGION:=us-east-1}"
|
|
52
|
+
aws_access_key_id: "${env.AWS_ACCESS_KEY_ID:=}"
|
|
53
|
+
aws_secret_access_key: "${env.AWS_SECRET_ACCESS_KEY:=}"
|
|
54
|
+
endpoint_url: "${env.S3_ENDPOINT_URL:=}"
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Note: `S3_BUCKET_NAME` has no default value since S3 bucket names must be globally unique.
|
|
58
|
+
|
|
59
|
+
## Authentication
|
|
60
|
+
|
|
61
|
+
### IAM Roles (Recommended)
|
|
62
|
+
|
|
63
|
+
For production deployments, use IAM roles:
|
|
64
|
+
|
|
65
|
+
```yaml
|
|
66
|
+
config:
|
|
67
|
+
bucket_name: my-bucket
|
|
68
|
+
region: us-east-1
|
|
69
|
+
# No credentials needed - will use IAM role
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Access Keys
|
|
73
|
+
|
|
74
|
+
For development or specific use cases:
|
|
75
|
+
|
|
76
|
+
```yaml
|
|
77
|
+
config:
|
|
78
|
+
bucket_name: my-bucket
|
|
79
|
+
region: us-east-1
|
|
80
|
+
aws_access_key_id: AKIAIOSFODNN7EXAMPLE
|
|
81
|
+
aws_secret_access_key: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## S3 Bucket Setup
|
|
85
|
+
|
|
86
|
+
### Required Permissions
|
|
87
|
+
|
|
88
|
+
The S3 provider requires the following permissions:
|
|
89
|
+
|
|
90
|
+
```json
|
|
91
|
+
{
|
|
92
|
+
"Version": "2012-10-17",
|
|
93
|
+
"Statement": [
|
|
94
|
+
{
|
|
95
|
+
"Effect": "Allow",
|
|
96
|
+
"Action": [
|
|
97
|
+
"s3:GetObject",
|
|
98
|
+
"s3:PutObject",
|
|
99
|
+
"s3:DeleteObject",
|
|
100
|
+
"s3:ListBucket"
|
|
101
|
+
],
|
|
102
|
+
"Resource": [
|
|
103
|
+
"arn:aws:s3:::your-bucket-name",
|
|
104
|
+
"arn:aws:s3:::your-bucket-name/*"
|
|
105
|
+
]
|
|
106
|
+
}
|
|
107
|
+
]
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Automatic Bucket Creation
|
|
112
|
+
|
|
113
|
+
By default, the S3 provider expects the bucket to already exist. If you want the provider to automatically create the bucket when it doesn't exist, set `auto_create_bucket: true` in your configuration:
|
|
114
|
+
|
|
115
|
+
```yaml
|
|
116
|
+
config:
|
|
117
|
+
bucket_name: my-bucket
|
|
118
|
+
auto_create_bucket: true # Will create bucket if it doesn't exist
|
|
119
|
+
region: us-east-1
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Note**: When `auto_create_bucket` is enabled, the provider will need additional permissions:
|
|
123
|
+
|
|
124
|
+
```json
|
|
125
|
+
{
|
|
126
|
+
"Version": "2012-10-17",
|
|
127
|
+
"Statement": [
|
|
128
|
+
{
|
|
129
|
+
"Effect": "Allow",
|
|
130
|
+
"Action": [
|
|
131
|
+
"s3:GetObject",
|
|
132
|
+
"s3:PutObject",
|
|
133
|
+
"s3:DeleteObject",
|
|
134
|
+
"s3:ListBucket",
|
|
135
|
+
"s3:CreateBucket"
|
|
136
|
+
],
|
|
137
|
+
"Resource": [
|
|
138
|
+
"arn:aws:s3:::your-bucket-name",
|
|
139
|
+
"arn:aws:s3:::your-bucket-name/*"
|
|
140
|
+
]
|
|
141
|
+
}
|
|
142
|
+
]
|
|
143
|
+
}
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Bucket Policy (Optional)
|
|
147
|
+
|
|
148
|
+
For additional security, you can add a bucket policy:
|
|
149
|
+
|
|
150
|
+
```json
|
|
151
|
+
{
|
|
152
|
+
"Version": "2012-10-17",
|
|
153
|
+
"Statement": [
|
|
154
|
+
{
|
|
155
|
+
"Sid": "LlamaStackAccess",
|
|
156
|
+
"Effect": "Allow",
|
|
157
|
+
"Principal": {
|
|
158
|
+
"AWS": "arn:aws:iam::YOUR-ACCOUNT:role/LlamaStackRole"
|
|
159
|
+
},
|
|
160
|
+
"Action": [
|
|
161
|
+
"s3:GetObject",
|
|
162
|
+
"s3:PutObject",
|
|
163
|
+
"s3:DeleteObject"
|
|
164
|
+
],
|
|
165
|
+
"Resource": "arn:aws:s3:::your-bucket-name/*"
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
"Sid": "LlamaStackBucketAccess",
|
|
169
|
+
"Effect": "Allow",
|
|
170
|
+
"Principal": {
|
|
171
|
+
"AWS": "arn:aws:iam::YOUR-ACCOUNT:role/LlamaStackRole"
|
|
172
|
+
},
|
|
173
|
+
"Action": [
|
|
174
|
+
"s3:ListBucket"
|
|
175
|
+
],
|
|
176
|
+
"Resource": "arn:aws:s3:::your-bucket-name"
|
|
177
|
+
}
|
|
178
|
+
]
|
|
179
|
+
}
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## Features
|
|
183
|
+
|
|
184
|
+
### Metadata Persistence
|
|
185
|
+
|
|
186
|
+
File metadata is stored in a SQL database for fast queries and OpenAI API compatibility. The metadata includes:
|
|
187
|
+
|
|
188
|
+
- File ID
|
|
189
|
+
- Original filename
|
|
190
|
+
- Purpose (assistants, batch, etc.)
|
|
191
|
+
- File size in bytes
|
|
192
|
+
- Created and expiration timestamps
|
|
193
|
+
|
|
194
|
+
### TTL and Cleanup
|
|
195
|
+
|
|
196
|
+
Files currently have a fixed long expiration time (100 years).
|
|
197
|
+
|
|
198
|
+
## Development and Testing
|
|
199
|
+
|
|
200
|
+
### Using MinIO
|
|
201
|
+
|
|
202
|
+
For self-hosted S3-compatible storage:
|
|
203
|
+
|
|
204
|
+
```yaml
|
|
205
|
+
config:
|
|
206
|
+
bucket_name: test-bucket
|
|
207
|
+
region: us-east-1
|
|
208
|
+
endpoint_url: http://localhost:9000
|
|
209
|
+
aws_access_key_id: minioadmin
|
|
210
|
+
aws_secret_access_key: minioadmin
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Using OCI Object Storage with S3 Compatibility
|
|
214
|
+
[Official Object Storage Amazon S3 Compatibility API Documentation](https://docs.oracle.com/en-us/iaas/Content/Object/Tasks/s3compatibleapi.htm)
|
|
215
|
+
|
|
216
|
+
OCI Object Storage can be utilized through the OCI S3 Compatibility API. Simply Update the `config.yaml` and set the env-vars as below.
|
|
217
|
+
|
|
218
|
+
#### config.yaml
|
|
219
|
+
```yaml
|
|
220
|
+
provider_type: remote::s3
|
|
221
|
+
config:
|
|
222
|
+
bucket_name: "${env.S3_BUCKET_NAME}"
|
|
223
|
+
region: "${env.AWS_REGION:=us-east-1}"
|
|
224
|
+
aws_access_key_id: "${env.AWS_ACCESS_KEY_ID:=}"
|
|
225
|
+
aws_secret_access_key: "${env.AWS_SECRET_ACCESS_KEY:=}"
|
|
226
|
+
endpoint_url: "${env.S3_ENDPOINT_URL:=}"
|
|
227
|
+
metadata_store:
|
|
228
|
+
table_name: files_metadata
|
|
229
|
+
backend: sql_default
|
|
230
|
+
```
|
|
231
|
+
#### .env
|
|
232
|
+
```
|
|
233
|
+
AWS_ACCESS_KEY_ID=OCI_ACCESS_KEY
|
|
234
|
+
AWS_SECRET_ACCESS_KEY=OCI_SECRET_KEY
|
|
235
|
+
S3_BUCKET_NAME=OCI_BUCKET_NAME
|
|
236
|
+
S3_ENDPOINT_URL=https://<namespace>.compat.objectstorage.<region>.oci.customer-oci.com
|
|
237
|
+
AWS_REQUEST_CHECKSUM_CALCULATION=when_required
|
|
238
|
+
AWS_RESPONSE_CHECKSUM_VALIDATION=when_required
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
## Monitoring and Logging
|
|
243
|
+
|
|
244
|
+
The provider logs important operations and errors. For production deployments, consider:
|
|
245
|
+
|
|
246
|
+
- CloudWatch monitoring for S3 operations
|
|
247
|
+
- Custom metrics for file upload/download rates
|
|
248
|
+
- Error rate monitoring
|
|
249
|
+
- Performance metrics tracking
|
|
250
|
+
|
|
251
|
+
## Error Handling
|
|
252
|
+
|
|
253
|
+
The provider handles various error scenarios:
|
|
254
|
+
|
|
255
|
+
- S3 connectivity issues
|
|
256
|
+
- Bucket access permissions
|
|
257
|
+
- File not found errors
|
|
258
|
+
- Metadata consistency checks
|
|
259
|
+
|
|
260
|
+
## Known Limitations
|
|
261
|
+
|
|
262
|
+
- Fixed long TTL (100 years) instead of configurable expiration
|
|
263
|
+
- No server-side encryption enabled by default
|
|
264
|
+
- No support for AWS session tokens
|
|
265
|
+
- No S3 key prefix organization support
|
|
266
|
+
- No multipart upload support (all files uploaded as single objects)
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
from typing import Any
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, Field
|
|
9
|
+
from pydantic import BaseModel, Field, SecretStr
|
|
10
10
|
|
|
11
11
|
from llama_stack.core.storage.datatypes import SqlStoreReference
|
|
12
12
|
|
|
@@ -16,8 +16,10 @@ class S3FilesImplConfig(BaseModel):
|
|
|
16
16
|
|
|
17
17
|
bucket_name: str = Field(description="S3 bucket name to store files")
|
|
18
18
|
region: str = Field(default="us-east-1", description="AWS region where the bucket is located")
|
|
19
|
-
aws_access_key_id:
|
|
20
|
-
|
|
19
|
+
aws_access_key_id: SecretStr | None = Field(
|
|
20
|
+
default=None, description="AWS access key ID (optional if using IAM roles)"
|
|
21
|
+
)
|
|
22
|
+
aws_secret_access_key: SecretStr | None = Field(
|
|
21
23
|
default=None, description="AWS secret access key (optional if using IAM roles)"
|
|
22
24
|
)
|
|
23
25
|
endpoint_url: str | None = Field(default=None, description="Custom S3 endpoint URL (for MinIO, LocalStack, etc.)")
|
|
@@ -57,8 +57,8 @@ def _create_s3_client(config: S3FilesImplConfig) -> "S3Client":
|
|
|
57
57
|
if config.aws_access_key_id and config.aws_secret_access_key:
|
|
58
58
|
s3_config.update(
|
|
59
59
|
{
|
|
60
|
-
"aws_access_key_id": config.aws_access_key_id,
|
|
61
|
-
"aws_secret_access_key": config.aws_secret_access_key,
|
|
60
|
+
"aws_access_key_id": config.aws_access_key_id.get_secret_value(),
|
|
61
|
+
"aws_secret_access_key": config.aws_secret_access_key.get_secret_value(),
|
|
62
62
|
}
|
|
63
63
|
)
|
|
64
64
|
|
|
@@ -12,6 +12,7 @@ from llama_stack_api import (
|
|
|
12
12
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
13
13
|
OpenAIEmbeddingsResponse,
|
|
14
14
|
OpenAIEmbeddingUsage,
|
|
15
|
+
validate_embeddings_input_is_text,
|
|
15
16
|
)
|
|
16
17
|
|
|
17
18
|
from .config import GeminiConfig
|
|
@@ -37,6 +38,9 @@ class GeminiInferenceAdapter(OpenAIMixin):
|
|
|
37
38
|
Override embeddings method to handle Gemini's missing usage statistics.
|
|
38
39
|
Gemini's embedding API doesn't return usage information, so we provide default values.
|
|
39
40
|
"""
|
|
41
|
+
# Validate that input contains only text, not token arrays
|
|
42
|
+
validate_embeddings_input_is_text(params)
|
|
43
|
+
|
|
40
44
|
# Build request params conditionally to avoid NotGiven/Omit type mismatch
|
|
41
45
|
request_params: dict[str, Any] = {
|
|
42
46
|
"model": await self._get_provider_model_id(params.model),
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# NVIDIA Inference Provider for LlamaStack
|
|
2
|
+
|
|
3
|
+
This provider enables running inference using NVIDIA NIM.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
- Endpoints for completions, chat completions, and embeddings for registered models
|
|
7
|
+
|
|
8
|
+
## Getting Started
|
|
9
|
+
|
|
10
|
+
### Prerequisites
|
|
11
|
+
|
|
12
|
+
- LlamaStack with NVIDIA configuration
|
|
13
|
+
- Access to NVIDIA NIM deployment
|
|
14
|
+
- NIM for model to use for inference is deployed
|
|
15
|
+
|
|
16
|
+
### Setup
|
|
17
|
+
|
|
18
|
+
Build the NVIDIA environment:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
uv pip install llama-stack-client
|
|
22
|
+
uv run llama stack list-deps nvidia | xargs -L1 uv pip install
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### Basic Usage using the LlamaStack Python Client
|
|
26
|
+
|
|
27
|
+
#### Initialize the client
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
import os
|
|
31
|
+
|
|
32
|
+
os.environ["NVIDIA_API_KEY"] = (
|
|
33
|
+
"" # Required if using hosted NIM endpoint. If self-hosted, not required.
|
|
34
|
+
)
|
|
35
|
+
os.environ["NVIDIA_BASE_URL"] = "http://nim.test" # NIM URL
|
|
36
|
+
|
|
37
|
+
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
|
38
|
+
|
|
39
|
+
client = LlamaStackAsLibraryClient("nvidia")
|
|
40
|
+
client.initialize()
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### Create Chat Completion
|
|
44
|
+
|
|
45
|
+
The following example shows how to create a chat completion for an NVIDIA NIM.
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
response = client.chat.completions.create(
|
|
49
|
+
model="nvidia/meta/llama-3.1-8b-instruct",
|
|
50
|
+
messages=[
|
|
51
|
+
{
|
|
52
|
+
"role": "system",
|
|
53
|
+
"content": "You must respond to each message with only one word",
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"role": "user",
|
|
57
|
+
"content": "Complete the sentence using one word: Roses are red, violets are:",
|
|
58
|
+
},
|
|
59
|
+
],
|
|
60
|
+
stream=False,
|
|
61
|
+
max_tokens=50,
|
|
62
|
+
)
|
|
63
|
+
print(f"Response: {response.choices[0].message.content}")
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### Tool Calling Example ###
|
|
67
|
+
|
|
68
|
+
The following example shows how to do tool calling for an NVIDIA NIM.
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
tool_definition = {
|
|
72
|
+
"type": "function",
|
|
73
|
+
"function": {
|
|
74
|
+
"name": "get_weather",
|
|
75
|
+
"description": "Get current weather information for a location",
|
|
76
|
+
"parameters": {
|
|
77
|
+
"type": "object",
|
|
78
|
+
"properties": {
|
|
79
|
+
"location": {
|
|
80
|
+
"type": "string",
|
|
81
|
+
"description": "The city and state, e.g. San Francisco, CA",
|
|
82
|
+
},
|
|
83
|
+
"unit": {
|
|
84
|
+
"type": "string",
|
|
85
|
+
"description": "Temperature unit (celsius or fahrenheit)",
|
|
86
|
+
"default": "celsius",
|
|
87
|
+
},
|
|
88
|
+
},
|
|
89
|
+
"required": ["location"],
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
tool_response = client.chat.completions.create(
|
|
95
|
+
model="nvidia/meta/llama-3.1-8b-instruct",
|
|
96
|
+
messages=[{"role": "user", "content": "What's the weather like in San Francisco?"}],
|
|
97
|
+
tools=[tool_definition],
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
print(f"Response content: {tool_response.choices[0].message.content}")
|
|
101
|
+
if tool_response.choices[0].message.tool_calls:
|
|
102
|
+
for tool_call in tool_response.choices[0].message.tool_calls:
|
|
103
|
+
print(f"Tool Called: {tool_call.function.name}")
|
|
104
|
+
print(f"Arguments: {tool_call.function.arguments}")
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### Structured Output Example
|
|
108
|
+
|
|
109
|
+
The following example shows how to do structured output for an NVIDIA NIM.
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
person_schema = {
|
|
113
|
+
"type": "object",
|
|
114
|
+
"properties": {
|
|
115
|
+
"name": {"type": "string"},
|
|
116
|
+
"age": {"type": "number"},
|
|
117
|
+
"occupation": {"type": "string"},
|
|
118
|
+
},
|
|
119
|
+
"required": ["name", "age", "occupation"],
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
structured_response = client.chat.completions.create(
|
|
123
|
+
model="nvidia/meta/llama-3.1-8b-instruct",
|
|
124
|
+
messages=[
|
|
125
|
+
{
|
|
126
|
+
"role": "user",
|
|
127
|
+
"content": "Create a profile for a fictional person named Alice who is 30 years old and is a software engineer. ",
|
|
128
|
+
}
|
|
129
|
+
],
|
|
130
|
+
extra_body={"nvext": {"guided_json": person_schema}},
|
|
131
|
+
)
|
|
132
|
+
print(f"Structured Response: {structured_response.choices[0].message.content}")
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Create Embeddings
|
|
136
|
+
|
|
137
|
+
The following example shows how to create embeddings for an NVIDIA NIM.
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
response = client.embeddings.create(
|
|
141
|
+
model="nvidia/nvidia/llama-3.2-nv-embedqa-1b-v2",
|
|
142
|
+
input=["What is the capital of France?"],
|
|
143
|
+
extra_body={"input_type": "query"},
|
|
144
|
+
)
|
|
145
|
+
print(f"Embeddings: {response.data}")
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Vision Language Models Example
|
|
149
|
+
|
|
150
|
+
The following example shows how to run vision inference by using an NVIDIA NIM.
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
def load_image_as_base64(image_path):
|
|
154
|
+
with open(image_path, "rb") as image_file:
|
|
155
|
+
img_bytes = image_file.read()
|
|
156
|
+
return base64.b64encode(img_bytes).decode("utf-8")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
image_path = {path_to_the_image}
|
|
160
|
+
demo_image_b64 = load_image_as_base64(image_path)
|
|
161
|
+
|
|
162
|
+
vlm_response = client.chat.completions.create(
|
|
163
|
+
model="nvidia/meta/llama-3.2-11b-vision-instruct",
|
|
164
|
+
messages=[
|
|
165
|
+
{
|
|
166
|
+
"role": "user",
|
|
167
|
+
"content": [
|
|
168
|
+
{
|
|
169
|
+
"type": "image_url",
|
|
170
|
+
"image_url": {
|
|
171
|
+
"url": f"data:image/png;base64,{demo_image_b64}",
|
|
172
|
+
},
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
"type": "text",
|
|
176
|
+
"text": "Please describe what you see in this image in detail.",
|
|
177
|
+
},
|
|
178
|
+
],
|
|
179
|
+
}
|
|
180
|
+
],
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
print(f"VLM Response: {vlm_response.choices[0].message.content}")
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
### Rerank Example
|
|
187
|
+
|
|
188
|
+
The following example shows how to rerank documents using an NVIDIA NIM.
|
|
189
|
+
|
|
190
|
+
```python
|
|
191
|
+
rerank_response = client.alpha.inference.rerank(
|
|
192
|
+
model="nvidia/nvidia/llama-3.2-nv-rerankqa-1b-v2",
|
|
193
|
+
query="query",
|
|
194
|
+
items=[
|
|
195
|
+
"item_1",
|
|
196
|
+
"item_2",
|
|
197
|
+
"item_3",
|
|
198
|
+
],
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
for i, result in enumerate(rerank_response):
|
|
202
|
+
print(f"{i+1}. [Index: {result.index}, " f"Score: {(result.relevance_score):.3f}]")
|
|
203
|
+
```
|
|
@@ -24,6 +24,8 @@ class OpenAIInferenceAdapter(OpenAIMixin):
|
|
|
24
24
|
|
|
25
25
|
provider_data_api_key_field: str = "openai_api_key"
|
|
26
26
|
|
|
27
|
+
supports_tokenized_embeddings_input: bool = True
|
|
28
|
+
|
|
27
29
|
embedding_model_metadata: dict[str, dict[str, int]] = {
|
|
28
30
|
"text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
|
|
29
31
|
"text-embedding-3-large": {"embedding_dimension": 3072, "context_length": 8192},
|
|
@@ -18,6 +18,7 @@ from llama_stack_api import (
|
|
|
18
18
|
OpenAIEmbeddingsRequestWithExtraBody,
|
|
19
19
|
OpenAIEmbeddingsResponse,
|
|
20
20
|
OpenAIEmbeddingUsage,
|
|
21
|
+
validate_embeddings_input_is_text,
|
|
21
22
|
)
|
|
22
23
|
|
|
23
24
|
from .config import TogetherImplConfig
|
|
@@ -74,6 +75,9 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
|
|
|
74
75
|
- does not support user param, returns 400 Unrecognized request arguments supplied: user
|
|
75
76
|
- does not support dimensions param, returns 400 Unrecognized request arguments supplied: dimensions
|
|
76
77
|
"""
|
|
78
|
+
# Validate that input contains only text, not token arrays
|
|
79
|
+
validate_embeddings_input_is_text(params)
|
|
80
|
+
|
|
77
81
|
# Together support ticket #13332 -> will not fix
|
|
78
82
|
if params.user is not None:
|
|
79
83
|
raise ValueError("Together's embeddings endpoint does not support user param.")
|
|
@@ -19,7 +19,7 @@ class VertexAIProviderDataValidator(BaseModel):
|
|
|
19
19
|
)
|
|
20
20
|
vertex_location: str | None = Field(
|
|
21
21
|
default=None,
|
|
22
|
-
description="Google Cloud location for Vertex AI (e.g.,
|
|
22
|
+
description="Google Cloud location for Vertex AI (e.g., global)",
|
|
23
23
|
)
|
|
24
24
|
|
|
25
25
|
|
|
@@ -31,7 +31,7 @@ class VertexAIConfig(RemoteInferenceProviderConfig):
|
|
|
31
31
|
description="Google Cloud project ID for Vertex AI",
|
|
32
32
|
)
|
|
33
33
|
location: str = Field(
|
|
34
|
-
default="
|
|
34
|
+
default="global",
|
|
35
35
|
description="Google Cloud location for Vertex AI",
|
|
36
36
|
)
|
|
37
37
|
|
|
@@ -39,7 +39,7 @@ class VertexAIConfig(RemoteInferenceProviderConfig):
|
|
|
39
39
|
def sample_run_config(
|
|
40
40
|
cls,
|
|
41
41
|
project: str = "${env.VERTEX_AI_PROJECT:=}",
|
|
42
|
-
location: str = "${env.VERTEX_AI_LOCATION:=
|
|
42
|
+
location: str = "${env.VERTEX_AI_LOCATION:=global}",
|
|
43
43
|
**kwargs,
|
|
44
44
|
) -> dict[str, Any]:
|
|
45
45
|
return {
|