llama-stack 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/_list_deps.py +11 -7
- llama_stack/cli/stack/run.py +3 -25
- llama_stack/core/access_control/datatypes.py +78 -0
- llama_stack/core/configure.py +2 -2
- {llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
- llama_stack/core/connectors/connectors.py +162 -0
- llama_stack/core/conversations/conversations.py +61 -58
- llama_stack/core/datatypes.py +54 -8
- llama_stack/core/library_client.py +60 -13
- llama_stack/core/prompts/prompts.py +43 -42
- llama_stack/core/routers/datasets.py +20 -17
- llama_stack/core/routers/eval_scoring.py +143 -53
- llama_stack/core/routers/inference.py +20 -9
- llama_stack/core/routers/safety.py +30 -42
- llama_stack/core/routers/vector_io.py +15 -7
- llama_stack/core/routing_tables/models.py +42 -3
- llama_stack/core/routing_tables/scoring_functions.py +19 -19
- llama_stack/core/routing_tables/shields.py +20 -17
- llama_stack/core/routing_tables/vector_stores.py +8 -5
- llama_stack/core/server/auth.py +192 -17
- llama_stack/core/server/fastapi_router_registry.py +40 -5
- llama_stack/core/server/server.py +24 -5
- llama_stack/core/stack.py +54 -10
- llama_stack/core/storage/datatypes.py +9 -0
- llama_stack/core/store/registry.py +1 -1
- llama_stack/core/utils/exec.py +2 -2
- llama_stack/core/utils/type_inspection.py +16 -2
- llama_stack/distributions/dell/config.yaml +4 -1
- llama_stack/distributions/dell/doc_template.md +209 -0
- llama_stack/distributions/dell/run-with-safety.yaml +4 -1
- llama_stack/distributions/nvidia/config.yaml +4 -1
- llama_stack/distributions/nvidia/doc_template.md +170 -0
- llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
- llama_stack/distributions/oci/config.yaml +4 -1
- llama_stack/distributions/oci/doc_template.md +140 -0
- llama_stack/distributions/open-benchmark/config.yaml +9 -1
- llama_stack/distributions/postgres-demo/config.yaml +1 -1
- llama_stack/distributions/starter/build.yaml +62 -0
- llama_stack/distributions/starter/config.yaml +22 -3
- llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/starter/starter.py +13 -1
- llama_stack/distributions/starter-gpu/build.yaml +62 -0
- llama_stack/distributions/starter-gpu/config.yaml +22 -3
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
- llama_stack/distributions/template.py +10 -2
- llama_stack/distributions/watsonx/config.yaml +4 -1
- llama_stack/log.py +1 -0
- llama_stack/models/llama/resources/dog.jpg +0 -0
- llama_stack/models/llama/resources/pasta.jpeg +0 -0
- llama_stack/models/llama/resources/small_dog.jpg +0 -0
- llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
- llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
- llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +183 -60
- llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
- llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
- llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
- llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
- llama_stack/providers/inline/batches/reference/batches.py +2 -1
- llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
- llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
- llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
- llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
- llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
- llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
- llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
- llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
- llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
- llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
- llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
- llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
- llama_stack/providers/registry/agents.py +1 -0
- llama_stack/providers/registry/inference.py +1 -9
- llama_stack/providers/registry/vector_io.py +136 -16
- llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
- llama_stack/providers/remote/eval/nvidia/README.md +134 -0
- llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
- llama_stack/providers/remote/files/s3/README.md +266 -0
- llama_stack/providers/remote/files/s3/config.py +5 -3
- llama_stack/providers/remote/files/s3/files.py +2 -2
- llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
- llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
- llama_stack/providers/remote/inference/openai/openai.py +2 -0
- llama_stack/providers/remote/inference/together/together.py +4 -0
- llama_stack/providers/remote/inference/vertexai/config.py +3 -3
- llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
- llama_stack/providers/remote/inference/vllm/config.py +37 -18
- llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
- llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
- llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
- llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
- llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
- llama_stack/providers/remote/safety/nvidia/README.md +78 -0
- llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
- llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
- llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
- llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
- llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
- llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
- llama_stack/providers/remote/vector_io/oci/config.py +41 -0
- llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
- llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
- llama_stack/providers/utils/bedrock/client.py +3 -3
- llama_stack/providers/utils/bedrock/config.py +7 -7
- llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
- llama_stack/providers/utils/inference/http_client.py +239 -0
- llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
- llama_stack/providers/utils/inference/model_registry.py +148 -2
- llama_stack/providers/utils/inference/openai_compat.py +2 -1
- llama_stack/providers/utils/inference/openai_mixin.py +41 -2
- llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
- llama_stack/providers/utils/memory/vector_store.py +46 -19
- llama_stack/providers/utils/responses/responses_store.py +40 -6
- llama_stack/providers/utils/safety.py +114 -0
- llama_stack/providers/utils/tools/mcp.py +44 -3
- llama_stack/testing/api_recorder.py +9 -3
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +131 -275
- llama_stack-0.5.0rc1.dist-info/top_level.txt +1 -0
- llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
- llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
- llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
- llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
- llama_stack/models/llama/hadamard_utils.py +0 -88
- llama_stack/models/llama/llama3/args.py +0 -74
- llama_stack/models/llama/llama3/generation.py +0 -378
- llama_stack/models/llama/llama3/model.py +0 -304
- llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
- llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
- llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
- llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
- llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
- llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama3/quantization/loader.py +0 -316
- llama_stack/models/llama/llama3_1/__init__.py +0 -12
- llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
- llama_stack/models/llama/llama3_1/prompts.py +0 -258
- llama_stack/models/llama/llama3_2/__init__.py +0 -5
- llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
- llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
- llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
- llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
- llama_stack/models/llama/llama3_3/__init__.py +0 -5
- llama_stack/models/llama/llama3_3/prompts.py +0 -259
- llama_stack/models/llama/llama4/args.py +0 -107
- llama_stack/models/llama/llama4/ffn.py +0 -58
- llama_stack/models/llama/llama4/moe.py +0 -214
- llama_stack/models/llama/llama4/preprocess.py +0 -435
- llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
- llama_stack/models/llama/llama4/quantization/loader.py +0 -226
- llama_stack/models/llama/llama4/vision/__init__.py +0 -5
- llama_stack/models/llama/llama4/vision/embedding.py +0 -210
- llama_stack/models/llama/llama4/vision/encoder.py +0 -412
- llama_stack/models/llama/quantize_impls.py +0 -316
- llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
- llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
- llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
- llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
- llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
- llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
- llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
- llama_stack-0.4.3.dist-info/top_level.txt +0 -2
- llama_stack_api/__init__.py +0 -945
- llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/admin/api.py +0 -72
- llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/admin/models.py +0 -113
- llama_stack_api/agents.py +0 -173
- llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/batches/api.py +0 -53
- llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/batches/models.py +0 -78
- llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/common/errors.py +0 -95
- llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/common/responses.py +0 -77
- llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/connectors.py +0 -146
- llama_stack_api/conversations.py +0 -270
- llama_stack_api/datasetio.py +0 -55
- llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/datatypes.py +0 -373
- llama_stack_api/eval.py +0 -137
- llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/files/api.py +0 -51
- llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/files/models.py +0 -107
- llama_stack_api/inference.py +0 -1169
- llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/__init__.py +0 -945
- llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
- llama_stack_api/llama_stack_api/admin/api.py +0 -72
- llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
- llama_stack_api/llama_stack_api/admin/models.py +0 -113
- llama_stack_api/llama_stack_api/agents.py +0 -173
- llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
- llama_stack_api/llama_stack_api/batches/api.py +0 -53
- llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
- llama_stack_api/llama_stack_api/batches/models.py +0 -78
- llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
- llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
- llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
- llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
- llama_stack_api/llama_stack_api/common/__init__.py +0 -5
- llama_stack_api/llama_stack_api/common/content_types.py +0 -101
- llama_stack_api/llama_stack_api/common/errors.py +0 -95
- llama_stack_api/llama_stack_api/common/job_types.py +0 -38
- llama_stack_api/llama_stack_api/common/responses.py +0 -77
- llama_stack_api/llama_stack_api/common/training_types.py +0 -47
- llama_stack_api/llama_stack_api/common/type_system.py +0 -146
- llama_stack_api/llama_stack_api/connectors.py +0 -146
- llama_stack_api/llama_stack_api/conversations.py +0 -270
- llama_stack_api/llama_stack_api/datasetio.py +0 -55
- llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
- llama_stack_api/llama_stack_api/datasets/api.py +0 -35
- llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
- llama_stack_api/llama_stack_api/datasets/models.py +0 -152
- llama_stack_api/llama_stack_api/datatypes.py +0 -373
- llama_stack_api/llama_stack_api/eval.py +0 -137
- llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
- llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
- llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
- llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
- llama_stack_api/llama_stack_api/files/__init__.py +0 -35
- llama_stack_api/llama_stack_api/files/api.py +0 -51
- llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
- llama_stack_api/llama_stack_api/files/models.py +0 -107
- llama_stack_api/llama_stack_api/inference.py +0 -1169
- llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
- llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
- llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
- llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
- llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
- llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
- llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
- llama_stack_api/llama_stack_api/models.py +0 -171
- llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/llama_stack_api/post_training.py +0 -370
- llama_stack_api/llama_stack_api/prompts.py +0 -203
- llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/llama_stack_api/providers/api.py +0 -16
- llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/llama_stack_api/providers/models.py +0 -24
- llama_stack_api/llama_stack_api/py.typed +0 -0
- llama_stack_api/llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/llama_stack_api/resource.py +0 -37
- llama_stack_api/llama_stack_api/router_utils.py +0 -160
- llama_stack_api/llama_stack_api/safety.py +0 -132
- llama_stack_api/llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/llama_stack_api/scoring.py +0 -93
- llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/llama_stack_api/shields.py +0 -93
- llama_stack_api/llama_stack_api/tools.py +0 -226
- llama_stack_api/llama_stack_api/vector_io.py +0 -941
- llama_stack_api/llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/llama_stack_api/version.py +0 -9
- llama_stack_api/models.py +0 -171
- llama_stack_api/openai_responses.py +0 -1468
- llama_stack_api/post_training.py +0 -370
- llama_stack_api/prompts.py +0 -203
- llama_stack_api/providers/__init__.py +0 -33
- llama_stack_api/providers/api.py +0 -16
- llama_stack_api/providers/fastapi_routes.py +0 -57
- llama_stack_api/providers/models.py +0 -24
- llama_stack_api/py.typed +0 -0
- llama_stack_api/rag_tool.py +0 -168
- llama_stack_api/resource.py +0 -37
- llama_stack_api/router_utils.py +0 -160
- llama_stack_api/safety.py +0 -132
- llama_stack_api/schema_utils.py +0 -208
- llama_stack_api/scoring.py +0 -93
- llama_stack_api/scoring_functions.py +0 -211
- llama_stack_api/shields.py +0 -93
- llama_stack_api/tools.py +0 -226
- llama_stack_api/vector_io.py +0 -941
- llama_stack_api/vector_stores.py +0 -53
- llama_stack_api/version.py +0 -9
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0
|
@@ -3,17 +3,18 @@
|
|
|
3
3
|
#
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
|
-
from typing import Any
|
|
7
6
|
|
|
8
7
|
from llama_stack_api import (
|
|
9
8
|
DatasetIO,
|
|
10
9
|
Datasets,
|
|
11
10
|
Inference,
|
|
11
|
+
IterRowsRequest,
|
|
12
|
+
ScoreBatchRequest,
|
|
12
13
|
ScoreBatchResponse,
|
|
14
|
+
ScoreRequest,
|
|
13
15
|
ScoreResponse,
|
|
14
16
|
Scoring,
|
|
15
17
|
ScoringFn,
|
|
16
|
-
ScoringFnParams,
|
|
17
18
|
ScoringFunctionsProtocolPrivate,
|
|
18
19
|
ScoringResult,
|
|
19
20
|
)
|
|
@@ -64,19 +65,15 @@ class LlmAsJudgeScoringImpl(
|
|
|
64
65
|
|
|
65
66
|
async def score_batch(
|
|
66
67
|
self,
|
|
67
|
-
|
|
68
|
-
scoring_functions: dict[str, ScoringFnParams | None] = None,
|
|
69
|
-
save_results_dataset: bool = False,
|
|
68
|
+
request: ScoreBatchRequest,
|
|
70
69
|
) -> ScoreBatchResponse:
|
|
71
|
-
all_rows = await self.datasetio_api.iterrows(
|
|
72
|
-
|
|
73
|
-
limit=-1,
|
|
74
|
-
)
|
|
75
|
-
res = await self.score(
|
|
70
|
+
all_rows = await self.datasetio_api.iterrows(IterRowsRequest(dataset_id=request.dataset_id, limit=-1))
|
|
71
|
+
score_request = ScoreRequest(
|
|
76
72
|
input_rows=all_rows.data,
|
|
77
|
-
scoring_functions=scoring_functions,
|
|
73
|
+
scoring_functions=request.scoring_functions,
|
|
78
74
|
)
|
|
79
|
-
|
|
75
|
+
res = await self.score(score_request)
|
|
76
|
+
if request.save_results_dataset:
|
|
80
77
|
# TODO: persist and register dataset on to server for reading
|
|
81
78
|
# self.datasets_api.register_dataset()
|
|
82
79
|
raise NotImplementedError("Save results dataset not implemented yet")
|
|
@@ -87,14 +84,13 @@ class LlmAsJudgeScoringImpl(
|
|
|
87
84
|
|
|
88
85
|
async def score(
|
|
89
86
|
self,
|
|
90
|
-
|
|
91
|
-
scoring_functions: dict[str, ScoringFnParams | None] = None,
|
|
87
|
+
request: ScoreRequest,
|
|
92
88
|
) -> ScoreResponse:
|
|
93
89
|
res = {}
|
|
94
|
-
for scoring_fn_id in scoring_functions.keys():
|
|
90
|
+
for scoring_fn_id in request.scoring_functions.keys():
|
|
95
91
|
scoring_fn = self.llm_as_judge_fn
|
|
96
|
-
scoring_fn_params = scoring_functions.get(scoring_fn_id, None)
|
|
97
|
-
score_results = await scoring_fn.score(input_rows, scoring_fn_id, scoring_fn_params)
|
|
92
|
+
scoring_fn_params = request.scoring_functions.get(scoring_fn_id, None)
|
|
93
|
+
score_results = await scoring_fn.score(request.input_rows, scoring_fn_id, scoring_fn_params)
|
|
98
94
|
agg_results = await scoring_fn.aggregate(score_results, scoring_fn_id, scoring_fn_params)
|
|
99
95
|
res[scoring_fn_id] = ScoringResult(
|
|
100
96
|
score_rows=score_results,
|
|
@@ -59,7 +59,7 @@ def serialize_vector(vector: list[float]) -> bytes:
|
|
|
59
59
|
return struct.pack(f"{len(vector)}f", *vector)
|
|
60
60
|
|
|
61
61
|
|
|
62
|
-
def _create_sqlite_connection(db_path):
|
|
62
|
+
def _create_sqlite_connection(db_path: str):
|
|
63
63
|
"""Create a SQLite connection with sqlite_vec extension loaded."""
|
|
64
64
|
connection = sqlite3.connect(db_path)
|
|
65
65
|
connection.enable_load_extension(True)
|
|
@@ -28,14 +28,6 @@ META_REFERENCE_DEPS = [
|
|
|
28
28
|
|
|
29
29
|
def available_providers() -> list[ProviderSpec]:
|
|
30
30
|
return [
|
|
31
|
-
InlineProviderSpec(
|
|
32
|
-
api=Api.inference,
|
|
33
|
-
provider_type="inline::meta-reference",
|
|
34
|
-
pip_packages=META_REFERENCE_DEPS,
|
|
35
|
-
module="llama_stack.providers.inline.inference.meta_reference",
|
|
36
|
-
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
|
|
37
|
-
description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
|
|
38
|
-
),
|
|
39
31
|
InlineProviderSpec(
|
|
40
32
|
api=Api.inference,
|
|
41
33
|
provider_type="inline::sentence-transformers",
|
|
@@ -223,7 +215,7 @@ def available_providers() -> list[ProviderSpec]:
|
|
|
223
215
|
|
|
224
216
|
Configuration:
|
|
225
217
|
- Set VERTEX_AI_PROJECT environment variable (required)
|
|
226
|
-
- Set VERTEX_AI_LOCATION environment variable (optional, defaults to
|
|
218
|
+
- Set VERTEX_AI_LOCATION environment variable (optional, defaults to global)
|
|
227
219
|
- Use Google Cloud Application Default Credentials or service account key
|
|
228
220
|
|
|
229
221
|
Authentication Setup:
|
|
@@ -419,6 +419,7 @@ There are three implementations of search for PGVectoIndex available:
|
|
|
419
419
|
- Semantic understanding - finds documents similar in meaning even if they don't share keywords
|
|
420
420
|
- Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
|
|
421
421
|
- Best for: Finding conceptually related content, handling synonyms, cross-language search
|
|
422
|
+
- By default, Llama Stack creates a HNSW (Hierarchical Navigable Small Worlds) index on a column "embedding" in a vector store table enabling production-ready, performant and scalable vector search for large datasets out of the box.
|
|
422
423
|
|
|
423
424
|
2. Keyword Search
|
|
424
425
|
- How it works:
|
|
@@ -448,6 +449,7 @@ There are three implementations of search for PGVectoIndex available:
|
|
|
448
449
|
- Best for: General-purpose search where you want both precision and recall
|
|
449
450
|
|
|
450
451
|
4. Database Schema
|
|
452
|
+
|
|
451
453
|
The PGVector implementation stores data optimized for all three search types:
|
|
452
454
|
CREATE TABLE vector_store_xxx (
|
|
453
455
|
id TEXT PRIMARY KEY,
|
|
@@ -457,9 +459,6 @@ CREATE TABLE vector_store_xxx (
|
|
|
457
459
|
tokenized_content TSVECTOR -- For keyword search
|
|
458
460
|
);
|
|
459
461
|
|
|
460
|
-
-- Indexes for performance
|
|
461
|
-
CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search
|
|
462
|
-
-- Vector index created automatically by pgvector
|
|
463
462
|
|
|
464
463
|
## Usage
|
|
465
464
|
|
|
@@ -469,32 +468,55 @@ To use PGVector in your Llama Stack project, follow these steps:
|
|
|
469
468
|
2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
|
|
470
469
|
3. Start storing and querying vectors.
|
|
471
470
|
|
|
472
|
-
## This is an example how you can set up your environment for using PGVector
|
|
471
|
+
## This is an example how you can set up your environment for using PGVector (you can use either Podman or Docker)
|
|
473
472
|
|
|
474
|
-
1. Export
|
|
473
|
+
1. Export PGVector environment variables:
|
|
475
474
|
```bash
|
|
476
|
-
export
|
|
475
|
+
export PGVECTOR_DB=testvectordb
|
|
477
476
|
export PGVECTOR_HOST=localhost
|
|
478
477
|
export PGVECTOR_PORT=5432
|
|
479
|
-
export
|
|
480
|
-
export
|
|
481
|
-
export PGVECTOR_PASSWORD=llamastack
|
|
478
|
+
export PGVECTOR_USER=user
|
|
479
|
+
export PGVECTOR_PASSWORD=password
|
|
482
480
|
```
|
|
483
481
|
|
|
484
|
-
2.
|
|
482
|
+
2. Pull pgvector image with that tag you want:
|
|
483
|
+
|
|
484
|
+
Via Podman:
|
|
485
485
|
```bash
|
|
486
|
-
|
|
487
|
-
psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
|
|
488
|
-
psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
|
|
486
|
+
podman pull pgvector/pgvector:0.8.1-pg18-trixie
|
|
489
487
|
```
|
|
490
488
|
|
|
491
|
-
|
|
489
|
+
Via Docker:
|
|
490
|
+
```bash
|
|
491
|
+
docker pull pgvector/pgvector:0.8.1-pg18-trixie
|
|
492
|
+
```
|
|
493
|
+
|
|
494
|
+
3. Run container with PGVector:
|
|
492
495
|
|
|
493
|
-
|
|
496
|
+
Via Podman
|
|
497
|
+
```bash
|
|
498
|
+
podman run -d \
|
|
499
|
+
--name pgvector \
|
|
500
|
+
-e POSTGRES_PASSWORD=password \
|
|
501
|
+
-e POSTGRES_USER=user \
|
|
502
|
+
-e POSTGRES_DB=testvectordb \
|
|
503
|
+
-p 5432:5432 \
|
|
504
|
+
-v pgvector_data:/var/lib/postgresql \
|
|
505
|
+
pgvector/pgvector:0.8.1-pg18-trixie
|
|
506
|
+
```
|
|
494
507
|
|
|
508
|
+
Via Docker
|
|
495
509
|
```bash
|
|
496
|
-
docker
|
|
510
|
+
docker run -d \
|
|
511
|
+
--name pgvector \
|
|
512
|
+
-e POSTGRES_PASSWORD=password \
|
|
513
|
+
-e POSTGRES_USER=user \
|
|
514
|
+
-e POSTGRES_DB=testvectordb \
|
|
515
|
+
-p 5432:5432 \
|
|
516
|
+
-v pgvector_data:/var/lib/postgresql \
|
|
517
|
+
pgvector/pgvector:0.8.1-pg18-trixie
|
|
497
518
|
```
|
|
519
|
+
|
|
498
520
|
## Documentation
|
|
499
521
|
See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
|
|
500
522
|
""",
|
|
@@ -823,6 +845,104 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
|
|
|
823
845
|
optional_api_dependencies=[Api.files, Api.models],
|
|
824
846
|
description="""
|
|
825
847
|
Please refer to the remote provider documentation.
|
|
848
|
+
""",
|
|
849
|
+
),
|
|
850
|
+
RemoteProviderSpec(
|
|
851
|
+
api=Api.vector_io,
|
|
852
|
+
adapter_type="elasticsearch",
|
|
853
|
+
provider_type="remote::elasticsearch",
|
|
854
|
+
pip_packages=["elasticsearch>=8.16.0,<9.0.0"] + DEFAULT_VECTOR_IO_DEPS,
|
|
855
|
+
module="llama_stack.providers.remote.vector_io.elasticsearch",
|
|
856
|
+
config_class="llama_stack.providers.remote.vector_io.elasticsearch.ElasticsearchVectorIOConfig",
|
|
857
|
+
api_dependencies=[Api.inference],
|
|
858
|
+
optional_api_dependencies=[Api.files, Api.models],
|
|
859
|
+
description="""
|
|
860
|
+
[Elasticsearch](https://www.elastic.co/) is a vector database provider for Llama Stack.
|
|
861
|
+
It allows you to store and query vectors directly within an Elasticsearch database.
|
|
862
|
+
That means you're not limited to storing vectors in memory or in a separate service.
|
|
863
|
+
|
|
864
|
+
## Features
|
|
865
|
+
Elasticsearch supports:
|
|
866
|
+
- Store embeddings and their metadata
|
|
867
|
+
- Vector search
|
|
868
|
+
- Full-text search
|
|
869
|
+
- Fuzzy search
|
|
870
|
+
- Hybrid search
|
|
871
|
+
- Document storage
|
|
872
|
+
- Metadata filtering
|
|
873
|
+
- Inference service
|
|
874
|
+
- Machine Learning integrations
|
|
875
|
+
|
|
876
|
+
## Usage
|
|
877
|
+
|
|
878
|
+
To use Elasticsearch in your Llama Stack project, follow these steps:
|
|
879
|
+
|
|
880
|
+
1. Install the necessary dependencies.
|
|
881
|
+
2. Configure your Llama Stack project to use Elasticsearch.
|
|
882
|
+
3. Start storing and querying vectors.
|
|
883
|
+
|
|
884
|
+
## Installation
|
|
885
|
+
|
|
886
|
+
You can test Elasticsearch locally by running this script in the terminal:
|
|
887
|
+
|
|
888
|
+
```bash
|
|
889
|
+
curl -fsSL https://elastic.co/start-local | sh
|
|
890
|
+
```
|
|
891
|
+
|
|
892
|
+
Or you can [start a free trial](https://www.elastic.co/cloud/cloud-trial-overview?utm_campaign=llama-stack-integration) on Elastic Cloud.
|
|
893
|
+
For more information on how to deploy Elasticsearch, see the [official documentation](https://www.elastic.co/docs/deploy-manage/deploy).
|
|
894
|
+
|
|
895
|
+
## Documentation
|
|
896
|
+
See [Elasticsearch's documentation](https://www.elastic.co/docs/solutions/search) for more details about Elasticsearch in general.
|
|
897
|
+
""",
|
|
898
|
+
),
|
|
899
|
+
RemoteProviderSpec(
|
|
900
|
+
api=Api.vector_io,
|
|
901
|
+
adapter_type="oci",
|
|
902
|
+
provider_type="remote::oci",
|
|
903
|
+
pip_packages=["oracledb", "numpy"] + DEFAULT_VECTOR_IO_DEPS,
|
|
904
|
+
module="llama_stack.providers.remote.vector_io.oci",
|
|
905
|
+
config_class="llama_stack.providers.remote.vector_io.oci.OCI26aiVectorIOConfig",
|
|
906
|
+
api_dependencies=[Api.inference],
|
|
907
|
+
optional_api_dependencies=[Api.files, Api.models],
|
|
908
|
+
description="""
|
|
909
|
+
[Oracle 26ai](https://docs.oracle.com/en/database/oracle/oracle-database/26/index.html)
|
|
910
|
+
is a remote vector database provider for Llama Stack. It allows you to store and query vectors directly
|
|
911
|
+
in an Oracle 26ai database.
|
|
912
|
+
## Features
|
|
913
|
+
- Easy to use
|
|
914
|
+
- Fully integrated with Llama Stack
|
|
915
|
+
- Supports vector search, keyword search, and hybrid search
|
|
916
|
+
## Usage
|
|
917
|
+
To use Oracle 26ai in your Llama Stack project, follow these steps:
|
|
918
|
+
1. Install the necessary dependencies.
|
|
919
|
+
2. Configure your Llama Stack project to use Oracle 26ai.
|
|
920
|
+
3. Start storing and querying vectors.
|
|
921
|
+
## Installation
|
|
922
|
+
You can install the Oracle 26ai client using pip:
|
|
923
|
+
```bash
|
|
924
|
+
pip install oracledb
|
|
925
|
+
```
|
|
926
|
+
## Configuration
|
|
927
|
+
```yaml
|
|
928
|
+
vector_io:
|
|
929
|
+
- provider_id: oci
|
|
930
|
+
provider_type: remote::oci
|
|
931
|
+
config:
|
|
932
|
+
conn_str: "${env.OCI26AI_CONNECTION_STRING}"
|
|
933
|
+
user: "${env.OCI26AI_USER}"
|
|
934
|
+
password: "${env.OCI26AI_PASSWORD}"
|
|
935
|
+
tnsnames_loc: "${env.OCI26AI_TNSNAMES_LOC}"
|
|
936
|
+
ewallet_pem_loc: "${env.OCI26AI_EWALLET_PEM_LOC}"
|
|
937
|
+
ewallet_password: "${env.OCI26AI_EWALLET_PWD}"
|
|
938
|
+
vector_datatype: "${env.OCI26AI_VECTOR_DATATYPE:=FLOAT32}"
|
|
939
|
+
persistence:
|
|
940
|
+
namespace: vector_id::oci26ai
|
|
941
|
+
backend: kv_default
|
|
942
|
+
```
|
|
943
|
+
## Documentation
|
|
944
|
+
See the [Oracle 26ai documentation](https://docs.oracle.com/en/database/oracle/oracle-database/26/index.html)
|
|
945
|
+
for more details about Oracle 26ai in general.
|
|
826
946
|
""",
|
|
827
947
|
),
|
|
828
948
|
]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# NVIDIA DatasetIO Provider for LlamaStack
|
|
2
|
+
|
|
3
|
+
This provider enables dataset management using NVIDIA's NeMo Customizer service.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- Register datasets for fine-tuning LLMs
|
|
8
|
+
- Unregister datasets
|
|
9
|
+
|
|
10
|
+
## Getting Started
|
|
11
|
+
|
|
12
|
+
### Prerequisites
|
|
13
|
+
|
|
14
|
+
- LlamaStack with NVIDIA configuration
|
|
15
|
+
- Access to Hosted NVIDIA NeMo Microservice
|
|
16
|
+
- API key for authentication with the NVIDIA service
|
|
17
|
+
|
|
18
|
+
### Setup
|
|
19
|
+
|
|
20
|
+
Build the NVIDIA environment:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
uv pip install llama-stack-client
|
|
24
|
+
uv run llama stack list-deps nvidia | xargs -L1 uv pip install
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Basic Usage using the LlamaStack Python Client
|
|
28
|
+
|
|
29
|
+
#### Initialize the client
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
import os
|
|
33
|
+
|
|
34
|
+
os.environ["NVIDIA_API_KEY"] = "your-api-key"
|
|
35
|
+
os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test"
|
|
36
|
+
os.environ["NVIDIA_DATASET_NAMESPACE"] = "default"
|
|
37
|
+
os.environ["NVIDIA_PROJECT_ID"] = "test-project"
|
|
38
|
+
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
|
39
|
+
|
|
40
|
+
client = LlamaStackAsLibraryClient("nvidia")
|
|
41
|
+
client.initialize()
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
#### Register a dataset
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
client.datasets.register(
|
|
48
|
+
purpose="post-training/messages",
|
|
49
|
+
dataset_id="my-training-dataset",
|
|
50
|
+
source={"type": "uri", "uri": "hf://datasets/default/sample-dataset"},
|
|
51
|
+
metadata={
|
|
52
|
+
"format": "json",
|
|
53
|
+
"description": "Dataset for LLM fine-tuning",
|
|
54
|
+
"provider": "nvidia",
|
|
55
|
+
},
|
|
56
|
+
)
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
#### Get a list of all registered datasets
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
datasets = client.datasets.list()
|
|
63
|
+
for dataset in datasets:
|
|
64
|
+
print(f"Dataset ID: {dataset.identifier}")
|
|
65
|
+
print(f"Description: {dataset.metadata.get('description', '')}")
|
|
66
|
+
print(f"Source: {dataset.source.uri}")
|
|
67
|
+
print("---")
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
#### Unregister a dataset
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
client.datasets.unregister(dataset_id="my-training-dataset")
|
|
74
|
+
```
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
# NVIDIA NeMo Evaluator Eval Provider
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
## Overview
|
|
5
|
+
|
|
6
|
+
For the first integration, Benchmarks are mapped to Evaluation Configs on in the NeMo Evaluator. The full evaluation config object is provided as part of the meta-data. The `dataset_id` and `scoring_functions` are not used.
|
|
7
|
+
|
|
8
|
+
Below are a few examples of how to register a benchmark, which in turn will create an evaluation config in NeMo Evaluator and how to trigger an evaluation.
|
|
9
|
+
|
|
10
|
+
### Example for register an academic benchmark
|
|
11
|
+
|
|
12
|
+
```
|
|
13
|
+
POST /eval/benchmarks
|
|
14
|
+
```
|
|
15
|
+
```json
|
|
16
|
+
{
|
|
17
|
+
"benchmark_id": "mmlu",
|
|
18
|
+
"dataset_id": "",
|
|
19
|
+
"scoring_functions": [],
|
|
20
|
+
"metadata": {
|
|
21
|
+
"type": "mmlu"
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Example for register a custom evaluation
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
POST /eval/benchmarks
|
|
30
|
+
```
|
|
31
|
+
```json
|
|
32
|
+
{
|
|
33
|
+
"benchmark_id": "my-custom-benchmark",
|
|
34
|
+
"dataset_id": "",
|
|
35
|
+
"scoring_functions": [],
|
|
36
|
+
"metadata": {
|
|
37
|
+
"type": "custom",
|
|
38
|
+
"params": {
|
|
39
|
+
"parallelism": 8
|
|
40
|
+
},
|
|
41
|
+
"tasks": {
|
|
42
|
+
"qa": {
|
|
43
|
+
"type": "completion",
|
|
44
|
+
"params": {
|
|
45
|
+
"template": {
|
|
46
|
+
"prompt": "{{prompt}}",
|
|
47
|
+
"max_tokens": 200
|
|
48
|
+
}
|
|
49
|
+
},
|
|
50
|
+
"dataset": {
|
|
51
|
+
"files_url": "hf://datasets/default/sample-basic-test/testing/testing.jsonl"
|
|
52
|
+
},
|
|
53
|
+
"metrics": {
|
|
54
|
+
"bleu": {
|
|
55
|
+
"type": "bleu",
|
|
56
|
+
"params": {
|
|
57
|
+
"references": [
|
|
58
|
+
"{{ideal_response}}"
|
|
59
|
+
]
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### Example for triggering a benchmark/custom evaluation
|
|
70
|
+
|
|
71
|
+
```
|
|
72
|
+
POST /eval/benchmarks/{benchmark_id}/jobs
|
|
73
|
+
```
|
|
74
|
+
```json
|
|
75
|
+
{
|
|
76
|
+
"benchmark_id": "my-custom-benchmark",
|
|
77
|
+
"benchmark_config": {
|
|
78
|
+
"eval_candidate": {
|
|
79
|
+
"type": "model",
|
|
80
|
+
"model": "meta-llama/Llama3.1-8B-Instruct",
|
|
81
|
+
"sampling_params": {
|
|
82
|
+
"max_tokens": 100,
|
|
83
|
+
"temperature": 0.7
|
|
84
|
+
}
|
|
85
|
+
},
|
|
86
|
+
"scoring_params": {}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
Response example:
|
|
92
|
+
```json
|
|
93
|
+
{
|
|
94
|
+
"job_id": "eval-1234",
|
|
95
|
+
"status": "in_progress"
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
### Example for getting the status of a job
|
|
100
|
+
```
|
|
101
|
+
GET /eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
Response example:
|
|
105
|
+
```json
|
|
106
|
+
{
|
|
107
|
+
"job_id": "eval-1234",
|
|
108
|
+
"status": "in_progress"
|
|
109
|
+
}
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Example for cancelling a job
|
|
113
|
+
```
|
|
114
|
+
POST /eval/benchmarks/{benchmark_id}/jobs/{job_id}/cancel
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Example for getting the results
|
|
118
|
+
```
|
|
119
|
+
GET /eval/benchmarks/{benchmark_id}/results
|
|
120
|
+
```
|
|
121
|
+
```json
|
|
122
|
+
{
|
|
123
|
+
"generations": [],
|
|
124
|
+
"scores": {
|
|
125
|
+
"{benchmark_id}": {
|
|
126
|
+
"score_rows": [],
|
|
127
|
+
"aggregated_results": {
|
|
128
|
+
"tasks": {},
|
|
129
|
+
"groups": {}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
```
|
|
@@ -11,15 +11,19 @@ from llama_stack.providers.utils.inference.model_registry import ModelRegistryHe
|
|
|
11
11
|
from llama_stack_api import (
|
|
12
12
|
Agents,
|
|
13
13
|
Benchmark,
|
|
14
|
-
BenchmarkConfig,
|
|
15
14
|
BenchmarksProtocolPrivate,
|
|
16
15
|
DatasetIO,
|
|
17
16
|
Datasets,
|
|
18
17
|
Eval,
|
|
19
18
|
EvaluateResponse,
|
|
19
|
+
EvaluateRowsRequest,
|
|
20
20
|
Inference,
|
|
21
21
|
Job,
|
|
22
|
+
JobCancelRequest,
|
|
23
|
+
JobResultRequest,
|
|
22
24
|
JobStatus,
|
|
25
|
+
JobStatusRequest,
|
|
26
|
+
RunEvalRequest,
|
|
23
27
|
Scoring,
|
|
24
28
|
ScoringResult,
|
|
25
29
|
)
|
|
@@ -91,21 +95,20 @@ class NVIDIAEvalImpl(
|
|
|
91
95
|
|
|
92
96
|
async def run_eval(
|
|
93
97
|
self,
|
|
94
|
-
|
|
95
|
-
benchmark_config: BenchmarkConfig,
|
|
98
|
+
request: RunEvalRequest,
|
|
96
99
|
) -> Job:
|
|
97
100
|
"""Run an evaluation job for a benchmark."""
|
|
98
101
|
model = (
|
|
99
|
-
benchmark_config.eval_candidate.model
|
|
100
|
-
if benchmark_config.eval_candidate.type == "model"
|
|
101
|
-
else benchmark_config.eval_candidate.config.model
|
|
102
|
+
request.benchmark_config.eval_candidate.model
|
|
103
|
+
if request.benchmark_config.eval_candidate.type == "model"
|
|
104
|
+
else request.benchmark_config.eval_candidate.config.model
|
|
102
105
|
)
|
|
103
106
|
nvidia_model = self.get_provider_model_id(model) or model
|
|
104
107
|
|
|
105
108
|
result = await self._evaluator_post(
|
|
106
109
|
"/v1/evaluation/jobs",
|
|
107
110
|
{
|
|
108
|
-
"config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
|
|
111
|
+
"config": f"{DEFAULT_NAMESPACE}/{request.benchmark_id}",
|
|
109
112
|
"target": {"type": "model", "model": nvidia_model},
|
|
110
113
|
},
|
|
111
114
|
)
|
|
@@ -114,20 +117,17 @@ class NVIDIAEvalImpl(
|
|
|
114
117
|
|
|
115
118
|
async def evaluate_rows(
|
|
116
119
|
self,
|
|
117
|
-
|
|
118
|
-
input_rows: list[dict[str, Any]],
|
|
119
|
-
scoring_functions: list[str],
|
|
120
|
-
benchmark_config: BenchmarkConfig,
|
|
120
|
+
request: EvaluateRowsRequest,
|
|
121
121
|
) -> EvaluateResponse:
|
|
122
122
|
raise NotImplementedError()
|
|
123
123
|
|
|
124
|
-
async def job_status(self,
|
|
124
|
+
async def job_status(self, request: JobStatusRequest) -> Job:
|
|
125
125
|
"""Get the status of an evaluation job.
|
|
126
126
|
|
|
127
127
|
EvaluatorStatus: "created", "pending", "running", "cancelled", "cancelling", "failed", "completed".
|
|
128
128
|
JobStatus: "scheduled", "in_progress", "completed", "cancelled", "failed"
|
|
129
129
|
"""
|
|
130
|
-
result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}")
|
|
130
|
+
result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}")
|
|
131
131
|
result_status = result["status"]
|
|
132
132
|
|
|
133
133
|
job_status = JobStatus.failed
|
|
@@ -140,27 +140,28 @@ class NVIDIAEvalImpl(
|
|
|
140
140
|
elif result_status in ["cancelled"]:
|
|
141
141
|
job_status = JobStatus.cancelled
|
|
142
142
|
|
|
143
|
-
return Job(job_id=job_id, status=job_status)
|
|
143
|
+
return Job(job_id=request.job_id, status=job_status)
|
|
144
144
|
|
|
145
|
-
async def job_cancel(self,
|
|
145
|
+
async def job_cancel(self, request: JobCancelRequest) -> None:
|
|
146
146
|
"""Cancel the evaluation job."""
|
|
147
|
-
await self._evaluator_post(f"/v1/evaluation/jobs/{job_id}/cancel", {})
|
|
147
|
+
await self._evaluator_post(f"/v1/evaluation/jobs/{request.job_id}/cancel", {})
|
|
148
148
|
|
|
149
|
-
async def job_result(self,
|
|
149
|
+
async def job_result(self, request: JobResultRequest) -> EvaluateResponse:
|
|
150
150
|
"""Returns the results of the evaluation job."""
|
|
151
151
|
|
|
152
|
-
|
|
152
|
+
job_status_request = JobStatusRequest(benchmark_id=request.benchmark_id, job_id=request.job_id)
|
|
153
|
+
job = await self.job_status(job_status_request)
|
|
153
154
|
status = job.status
|
|
154
155
|
if not status or status != JobStatus.completed:
|
|
155
|
-
raise ValueError(f"Job {job_id} not completed. Status: {status.value}")
|
|
156
|
+
raise ValueError(f"Job {request.job_id} not completed. Status: {status.value}")
|
|
156
157
|
|
|
157
|
-
result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}/results")
|
|
158
|
+
result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}/results")
|
|
158
159
|
|
|
159
160
|
return EvaluateResponse(
|
|
160
161
|
# TODO: these are stored in detailed results on NeMo Evaluator side; can be added
|
|
161
162
|
generations=[],
|
|
162
163
|
scores={
|
|
163
|
-
benchmark_id: ScoringResult(
|
|
164
|
+
request.benchmark_id: ScoringResult(
|
|
164
165
|
score_rows=[],
|
|
165
166
|
aggregated_results=result,
|
|
166
167
|
)
|