PyPI - llama-stack - Versions diffs - 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl - Mend

llama-stack 0.4.3py3-none-any.whl → 0.5.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (307) hide show

llama_stack/cli/stack/_list_deps.py +11 -7
llama_stack/cli/stack/run.py +3 -25
llama_stack/core/access_control/datatypes.py +78 -0
llama_stack/core/configure.py +2 -2
{llama_stack_api/internal → llama_stack/core/connectors}/__init__.py +2 -2
llama_stack/core/connectors/connectors.py +162 -0
llama_stack/core/conversations/conversations.py +61 -58
llama_stack/core/datatypes.py +54 -8
llama_stack/core/library_client.py +60 -13
llama_stack/core/prompts/prompts.py +43 -42
llama_stack/core/routers/datasets.py +20 -17
llama_stack/core/routers/eval_scoring.py +143 -53
llama_stack/core/routers/inference.py +20 -9
llama_stack/core/routers/safety.py +30 -42
llama_stack/core/routers/vector_io.py +15 -7
llama_stack/core/routing_tables/models.py +42 -3
llama_stack/core/routing_tables/scoring_functions.py +19 -19
llama_stack/core/routing_tables/shields.py +20 -17
llama_stack/core/routing_tables/vector_stores.py +8 -5
llama_stack/core/server/auth.py +192 -17
llama_stack/core/server/fastapi_router_registry.py +40 -5
llama_stack/core/server/server.py +24 -5
llama_stack/core/stack.py +54 -10
llama_stack/core/storage/datatypes.py +9 -0
llama_stack/core/store/registry.py +1 -1
llama_stack/core/utils/exec.py +2 -2
llama_stack/core/utils/type_inspection.py +16 -2
llama_stack/distributions/dell/config.yaml +4 -1
llama_stack/distributions/dell/doc_template.md +209 -0
llama_stack/distributions/dell/run-with-safety.yaml +4 -1
llama_stack/distributions/nvidia/config.yaml +4 -1
llama_stack/distributions/nvidia/doc_template.md +170 -0
llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
llama_stack/distributions/oci/config.yaml +4 -1
llama_stack/distributions/oci/doc_template.md +140 -0
llama_stack/distributions/open-benchmark/config.yaml +9 -1
llama_stack/distributions/postgres-demo/config.yaml +1 -1
llama_stack/distributions/starter/build.yaml +62 -0
llama_stack/distributions/starter/config.yaml +22 -3
llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/starter/starter.py +13 -1
llama_stack/distributions/starter-gpu/build.yaml +62 -0
llama_stack/distributions/starter-gpu/config.yaml +22 -3
llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
llama_stack/distributions/template.py +10 -2
llama_stack/distributions/watsonx/config.yaml +4 -1
llama_stack/log.py +1 -0
llama_stack/models/llama/resources/dog.jpg +0 -0
llama_stack/models/llama/resources/pasta.jpeg +0 -0
llama_stack/models/llama/resources/small_dog.jpg +0 -0
llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
llama_stack/providers/inline/agents/meta_reference/agents.py +57 -61
llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +183 -60
llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +94 -22
llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
llama_stack/providers/inline/batches/reference/batches.py +2 -1
llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.h +9 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/LocalInference.swift +189 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/Parsing.swift +238 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/PromptTemplate.swift +12 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl/SystemPrompts.swift +89 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.pbxproj +550 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
llama_stack/providers/inline/ios/inference/LocalInferenceImpl.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist +8 -0
llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
llama_stack/providers/inline/safety/llama_guard/llama_guard.py +15 -18
llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
llama_stack/providers/registry/agents.py +1 -0
llama_stack/providers/registry/inference.py +1 -9
llama_stack/providers/registry/vector_io.py +136 -16
llama_stack/providers/remote/datasetio/nvidia/README.md +74 -0
llama_stack/providers/remote/eval/nvidia/README.md +134 -0
llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
llama_stack/providers/remote/files/s3/README.md +266 -0
llama_stack/providers/remote/files/s3/config.py +5 -3
llama_stack/providers/remote/files/s3/files.py +2 -2
llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
llama_stack/providers/remote/inference/nvidia/NVIDIA.md +203 -0
llama_stack/providers/remote/inference/openai/openai.py +2 -0
llama_stack/providers/remote/inference/together/together.py +4 -0
llama_stack/providers/remote/inference/vertexai/config.py +3 -3
llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
llama_stack/providers/remote/inference/vllm/config.py +37 -18
llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
llama_stack/providers/remote/post_training/nvidia/README.md +151 -0
llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
llama_stack/providers/remote/safety/nvidia/README.md +78 -0
llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
llama_stack/providers/remote/vector_io/oci/config.py +41 -0
llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
llama_stack/providers/utils/bedrock/client.py +3 -3
llama_stack/providers/utils/bedrock/config.py +7 -7
llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
llama_stack/providers/utils/inference/http_client.py +239 -0
llama_stack/providers/utils/inference/litellm_openai_mixin.py +5 -0
llama_stack/providers/utils/inference/model_registry.py +148 -2
llama_stack/providers/utils/inference/openai_compat.py +2 -1
llama_stack/providers/utils/inference/openai_mixin.py +41 -2
llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
llama_stack/providers/utils/memory/vector_store.py +46 -19
llama_stack/providers/utils/responses/responses_store.py +40 -6
llama_stack/providers/utils/safety.py +114 -0
llama_stack/providers/utils/tools/mcp.py +44 -3
llama_stack/testing/api_recorder.py +9 -3
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/METADATA +14 -2
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/RECORD +131 -275
llama_stack-0.5.0rc1.dist-info/top_level.txt +1 -0
llama_stack/distributions/meta-reference-gpu/__init__.py +0 -7
llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
llama_stack/models/llama/hadamard_utils.py +0 -88
llama_stack/models/llama/llama3/args.py +0 -74
llama_stack/models/llama/llama3/generation.py +0 -378
llama_stack/models/llama/llama3/model.py +0 -304
llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
llama_stack/models/llama/llama3/quantization/loader.py +0 -316
llama_stack/models/llama/llama3_1/__init__.py +0 -12
llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
llama_stack/models/llama/llama3_1/prompts.py +0 -258
llama_stack/models/llama/llama3_2/__init__.py +0 -5
llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
llama_stack/models/llama/llama3_3/__init__.py +0 -5
llama_stack/models/llama/llama3_3/prompts.py +0 -259
llama_stack/models/llama/llama4/args.py +0 -107
llama_stack/models/llama/llama4/ffn.py +0 -58
llama_stack/models/llama/llama4/moe.py +0 -214
llama_stack/models/llama/llama4/preprocess.py +0 -435
llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
llama_stack/models/llama/llama4/quantization/loader.py +0 -226
llama_stack/models/llama/llama4/vision/__init__.py +0 -5
llama_stack/models/llama/llama4/vision/embedding.py +0 -210
llama_stack/models/llama/llama4/vision/encoder.py +0 -412
llama_stack/models/llama/quantize_impls.py +0 -316
llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
llama_stack-0.4.3.dist-info/top_level.txt +0 -2
llama_stack_api/__init__.py +0 -945
llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/admin/api.py +0 -72
llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/admin/models.py +0 -113
llama_stack_api/agents.py +0 -173
llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/batches/api.py +0 -53
llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/batches/models.py +0 -78
llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/common/__init__.py +0 -5
llama_stack_api/common/content_types.py +0 -101
llama_stack_api/common/errors.py +0 -95
llama_stack_api/common/job_types.py +0 -38
llama_stack_api/common/responses.py +0 -77
llama_stack_api/common/training_types.py +0 -47
llama_stack_api/common/type_system.py +0 -146
llama_stack_api/connectors.py +0 -146
llama_stack_api/conversations.py +0 -270
llama_stack_api/datasetio.py +0 -55
llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/datasets/api.py +0 -35
llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/datasets/models.py +0 -152
llama_stack_api/datatypes.py +0 -373
llama_stack_api/eval.py +0 -137
llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/files/__init__.py +0 -35
llama_stack_api/files/api.py +0 -51
llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/files/models.py +0 -107
llama_stack_api/inference.py +0 -1169
llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/__init__.py +0 -945
llama_stack_api/llama_stack_api/admin/__init__.py +0 -45
llama_stack_api/llama_stack_api/admin/api.py +0 -72
llama_stack_api/llama_stack_api/admin/fastapi_routes.py +0 -117
llama_stack_api/llama_stack_api/admin/models.py +0 -113
llama_stack_api/llama_stack_api/agents.py +0 -173
llama_stack_api/llama_stack_api/batches/__init__.py +0 -40
llama_stack_api/llama_stack_api/batches/api.py +0 -53
llama_stack_api/llama_stack_api/batches/fastapi_routes.py +0 -113
llama_stack_api/llama_stack_api/batches/models.py +0 -78
llama_stack_api/llama_stack_api/benchmarks/__init__.py +0 -43
llama_stack_api/llama_stack_api/benchmarks/api.py +0 -39
llama_stack_api/llama_stack_api/benchmarks/fastapi_routes.py +0 -109
llama_stack_api/llama_stack_api/benchmarks/models.py +0 -109
llama_stack_api/llama_stack_api/common/__init__.py +0 -5
llama_stack_api/llama_stack_api/common/content_types.py +0 -101
llama_stack_api/llama_stack_api/common/errors.py +0 -95
llama_stack_api/llama_stack_api/common/job_types.py +0 -38
llama_stack_api/llama_stack_api/common/responses.py +0 -77
llama_stack_api/llama_stack_api/common/training_types.py +0 -47
llama_stack_api/llama_stack_api/common/type_system.py +0 -146
llama_stack_api/llama_stack_api/connectors.py +0 -146
llama_stack_api/llama_stack_api/conversations.py +0 -270
llama_stack_api/llama_stack_api/datasetio.py +0 -55
llama_stack_api/llama_stack_api/datasets/__init__.py +0 -61
llama_stack_api/llama_stack_api/datasets/api.py +0 -35
llama_stack_api/llama_stack_api/datasets/fastapi_routes.py +0 -104
llama_stack_api/llama_stack_api/datasets/models.py +0 -152
llama_stack_api/llama_stack_api/datatypes.py +0 -373
llama_stack_api/llama_stack_api/eval.py +0 -137
llama_stack_api/llama_stack_api/file_processors/__init__.py +0 -27
llama_stack_api/llama_stack_api/file_processors/api.py +0 -64
llama_stack_api/llama_stack_api/file_processors/fastapi_routes.py +0 -78
llama_stack_api/llama_stack_api/file_processors/models.py +0 -42
llama_stack_api/llama_stack_api/files/__init__.py +0 -35
llama_stack_api/llama_stack_api/files/api.py +0 -51
llama_stack_api/llama_stack_api/files/fastapi_routes.py +0 -124
llama_stack_api/llama_stack_api/files/models.py +0 -107
llama_stack_api/llama_stack_api/inference.py +0 -1169
llama_stack_api/llama_stack_api/inspect_api/__init__.py +0 -37
llama_stack_api/llama_stack_api/inspect_api/api.py +0 -25
llama_stack_api/llama_stack_api/inspect_api/fastapi_routes.py +0 -76
llama_stack_api/llama_stack_api/inspect_api/models.py +0 -28
llama_stack_api/llama_stack_api/internal/__init__.py +0 -9
llama_stack_api/llama_stack_api/internal/kvstore.py +0 -28
llama_stack_api/llama_stack_api/internal/sqlstore.py +0 -81
llama_stack_api/llama_stack_api/models.py +0 -171
llama_stack_api/llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/llama_stack_api/post_training.py +0 -370
llama_stack_api/llama_stack_api/prompts.py +0 -203
llama_stack_api/llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/llama_stack_api/providers/api.py +0 -16
llama_stack_api/llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/llama_stack_api/providers/models.py +0 -24
llama_stack_api/llama_stack_api/py.typed +0 -0
llama_stack_api/llama_stack_api/rag_tool.py +0 -168
llama_stack_api/llama_stack_api/resource.py +0 -37
llama_stack_api/llama_stack_api/router_utils.py +0 -160
llama_stack_api/llama_stack_api/safety.py +0 -132
llama_stack_api/llama_stack_api/schema_utils.py +0 -208
llama_stack_api/llama_stack_api/scoring.py +0 -93
llama_stack_api/llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/llama_stack_api/shields.py +0 -93
llama_stack_api/llama_stack_api/tools.py +0 -226
llama_stack_api/llama_stack_api/vector_io.py +0 -941
llama_stack_api/llama_stack_api/vector_stores.py +0 -53
llama_stack_api/llama_stack_api/version.py +0 -9
llama_stack_api/models.py +0 -171
llama_stack_api/openai_responses.py +0 -1468
llama_stack_api/post_training.py +0 -370
llama_stack_api/prompts.py +0 -203
llama_stack_api/providers/__init__.py +0 -33
llama_stack_api/providers/api.py +0 -16
llama_stack_api/providers/fastapi_routes.py +0 -57
llama_stack_api/providers/models.py +0 -24
llama_stack_api/py.typed +0 -0
llama_stack_api/rag_tool.py +0 -168
llama_stack_api/resource.py +0 -37
llama_stack_api/router_utils.py +0 -160
llama_stack_api/safety.py +0 -132
llama_stack_api/schema_utils.py +0 -208
llama_stack_api/scoring.py +0 -93
llama_stack_api/scoring_functions.py +0 -211
llama_stack_api/shields.py +0 -93
llama_stack_api/tools.py +0 -226
llama_stack_api/vector_io.py +0 -941
llama_stack_api/vector_stores.py +0 -53
llama_stack_api/version.py +0 -9
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/WHEEL +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/entry_points.txt +0 -0
{llama_stack-0.4.3.dist-info → llama_stack-0.5.0rc1.dist-info}/licenses/LICENSE +0 -0

llama_stack/providers/inline/scoring/llm_as_judge/scoring.py CHANGED Viewed

@@ -3,17 +3,18 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
-from typing import Any
 from llama_stack_api import (
     DatasetIO,
     Datasets,
     Inference,
+    IterRowsRequest,
+    ScoreBatchRequest,
     ScoreBatchResponse,
+    ScoreRequest,
     ScoreResponse,
     Scoring,
     ScoringFn,
-    ScoringFnParams,
     ScoringFunctionsProtocolPrivate,
     ScoringResult,
 )
@@ -64,19 +65,15 @@ class LlmAsJudgeScoringImpl(
     async def score_batch(
         self,
-        dataset_id: str,
-        scoring_functions: dict[str, ScoringFnParams | None] = None,
-        save_results_dataset: bool = False,
+        request: ScoreBatchRequest,
     ) -> ScoreBatchResponse:
-        all_rows = await self.datasetio_api.iterrows(
-            dataset_id=dataset_id,
-            limit=-1,
-        )
-        res = await self.score(
+        all_rows = await self.datasetio_api.iterrows(IterRowsRequest(dataset_id=request.dataset_id, limit=-1))
+        score_request = ScoreRequest(
             input_rows=all_rows.data,
-            scoring_functions=scoring_functions,
+            scoring_functions=request.scoring_functions,
         )
-        if save_results_dataset:
+        res = await self.score(score_request)
+        if request.save_results_dataset:
             # TODO: persist and register dataset on to server for reading
             # self.datasets_api.register_dataset()
             raise NotImplementedError("Save results dataset not implemented yet")
@@ -87,14 +84,13 @@ class LlmAsJudgeScoringImpl(
     async def score(
         self,
-        input_rows: list[dict[str, Any]],
-        scoring_functions: dict[str, ScoringFnParams | None] = None,
+        request: ScoreRequest,
     ) -> ScoreResponse:
         res = {}
-        for scoring_fn_id in scoring_functions.keys():
+        for scoring_fn_id in request.scoring_functions.keys():
             scoring_fn = self.llm_as_judge_fn
-            scoring_fn_params = scoring_functions.get(scoring_fn_id, None)
-            score_results = await scoring_fn.score(input_rows, scoring_fn_id, scoring_fn_params)
+            scoring_fn_params = request.scoring_functions.get(scoring_fn_id, None)
+            score_results = await scoring_fn.score(request.input_rows, scoring_fn_id, scoring_fn_params)
             agg_results = await scoring_fn.aggregate(score_results, scoring_fn_id, scoring_fn_params)
             res[scoring_fn_id] = ScoringResult(
                 score_rows=score_results,

llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py CHANGED Viewed

@@ -59,7 +59,7 @@ def serialize_vector(vector: list[float]) -> bytes:
     return struct.pack(f"{len(vector)}f", *vector)
-def _create_sqlite_connection(db_path):
+def _create_sqlite_connection(db_path: str):
     """Create a SQLite connection with sqlite_vec extension loaded."""
     connection = sqlite3.connect(db_path)
     connection.enable_load_extension(True)

llama_stack/providers/registry/agents.py CHANGED Viewed

@@ -37,6 +37,7 @@ def available_providers() -> list[ProviderSpec]:
                 Api.conversations,
                 Api.prompts,
                 Api.files,
+                Api.connectors,
             ],
             optional_api_dependencies=[
                 Api.safety,

llama_stack/providers/registry/inference.py CHANGED Viewed

@@ -28,14 +28,6 @@ META_REFERENCE_DEPS = [
 def available_providers() -> list[ProviderSpec]:
     return [
-        InlineProviderSpec(
-            api=Api.inference,
-            provider_type="inline::meta-reference",
-            pip_packages=META_REFERENCE_DEPS,
-            module="llama_stack.providers.inline.inference.meta_reference",
-            config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
-            description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
-        ),
         InlineProviderSpec(
             api=Api.inference,
             provider_type="inline::sentence-transformers",
@@ -223,7 +215,7 @@ def available_providers() -> list[ProviderSpec]:
 Configuration:
 - Set VERTEX_AI_PROJECT environment variable (required)
-- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
+- Set VERTEX_AI_LOCATION environment variable (optional, defaults to global)
 - Use Google Cloud Application Default Credentials or service account key
 Authentication Setup:

llama_stack/providers/registry/vector_io.py CHANGED Viewed

@@ -419,6 +419,7 @@ There are three implementations of search for PGVectoIndex available:
   - Semantic understanding - finds documents similar in meaning even if they don't share keywords
   - Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
   - Best for: Finding conceptually related content, handling synonyms, cross-language search
+  - By default, Llama Stack creates a HNSW (Hierarchical Navigable Small Worlds) index on a column "embedding" in a vector store table enabling production-ready, performant and scalable vector search for large datasets out of the box.
 2. Keyword Search
 - How it works:
@@ -448,6 +449,7 @@ There are three implementations of search for PGVectoIndex available:
   - Best for: General-purpose search where you want both precision and recall
 4. Database Schema
 The PGVector implementation stores data optimized for all three search types:
 CREATE TABLE vector_store_xxx (
     id TEXT PRIMARY KEY,
@@ -457,9 +459,6 @@ CREATE TABLE vector_store_xxx (
     tokenized_content TSVECTOR          -- For keyword search
 );
--- Indexes for performance
-CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content);  -- Keyword search
--- Vector index created automatically by pgvector
 ## Usage
@@ -469,32 +468,55 @@ To use PGVector in your Llama Stack project, follow these steps:
 2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
 3. Start storing and querying vectors.
-## This is an example how you can set up your environment for using PGVector
+## This is an example how you can set up your environment for using PGVector (you can use either Podman or Docker)
-1. Export env vars:
+1. Export PGVector environment variables:
 ```bash
-export ENABLE_PGVECTOR=true
+export PGVECTOR_DB=testvectordb
 export PGVECTOR_HOST=localhost
 export PGVECTOR_PORT=5432
-export PGVECTOR_DB=llamastack
-export PGVECTOR_USER=llamastack
-export PGVECTOR_PASSWORD=llamastack
+export PGVECTOR_USER=user
+export PGVECTOR_PASSWORD=password
 ```
-2. Create DB:
+2. Pull pgvector image with that tag you want:
+Via Podman:
 ```bash
-psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
-psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
-psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
+podman pull pgvector/pgvector:0.8.1-pg18-trixie
 ```
-## Installation
+Via Docker:
+```bash
+docker pull pgvector/pgvector:0.8.1-pg18-trixie
+```
+3. Run container with PGVector:
-You can install PGVector using docker:
+Via Podman
+```bash
+podman run -d \
+  --name pgvector \
+  -e POSTGRES_PASSWORD=password \
+  -e POSTGRES_USER=user \
+  -e POSTGRES_DB=testvectordb \
+  -p 5432:5432 \
+  -v pgvector_data:/var/lib/postgresql \
+  pgvector/pgvector:0.8.1-pg18-trixie
+```
+Via Docker
 ```bash
-docker pull pgvector/pgvector:pg17
+docker run -d \
+  --name pgvector \
+  -e POSTGRES_PASSWORD=password \
+  -e POSTGRES_USER=user \
+  -e POSTGRES_DB=testvectordb \
+  -p 5432:5432 \
+  -v pgvector_data:/var/lib/postgresql \
+  pgvector/pgvector:0.8.1-pg18-trixie
 ```
 ## Documentation
 See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
 """,
@@ -823,6 +845,104 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
             optional_api_dependencies=[Api.files, Api.models],
             description="""
 Please refer to the remote provider documentation.
+""",
+        ),
+        RemoteProviderSpec(
+            api=Api.vector_io,
+            adapter_type="elasticsearch",
+            provider_type="remote::elasticsearch",
+            pip_packages=["elasticsearch>=8.16.0,<9.0.0"] + DEFAULT_VECTOR_IO_DEPS,
+            module="llama_stack.providers.remote.vector_io.elasticsearch",
+            config_class="llama_stack.providers.remote.vector_io.elasticsearch.ElasticsearchVectorIOConfig",
+            api_dependencies=[Api.inference],
+            optional_api_dependencies=[Api.files, Api.models],
+            description="""
+[Elasticsearch](https://www.elastic.co/) is a vector database provider for Llama Stack.
+It allows you to store and query vectors directly within an Elasticsearch database.
+That means you're not limited to storing vectors in memory or in a separate service.
+## Features
+Elasticsearch supports:
+- Store embeddings and their metadata
+- Vector search
+- Full-text search
+- Fuzzy search
+- Hybrid search
+- Document storage
+- Metadata filtering
+- Inference service
+- Machine Learning integrations
+## Usage
+To use Elasticsearch in your Llama Stack project, follow these steps:
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Elasticsearch.
+3. Start storing and querying vectors.
+## Installation
+You can test Elasticsearch locally by running this script in the terminal:
+```bash
+curl -fsSL https://elastic.co/start-local | sh
+```
+Or you can [start a free trial](https://www.elastic.co/cloud/cloud-trial-overview?utm_campaign=llama-stack-integration) on Elastic Cloud.
+For more information on how to deploy Elasticsearch, see the [official documentation](https://www.elastic.co/docs/deploy-manage/deploy).
+## Documentation
+See [Elasticsearch's documentation](https://www.elastic.co/docs/solutions/search) for more details about Elasticsearch in general.
+""",
+        ),
+        RemoteProviderSpec(
+            api=Api.vector_io,
+            adapter_type="oci",
+            provider_type="remote::oci",
+            pip_packages=["oracledb", "numpy"] + DEFAULT_VECTOR_IO_DEPS,
+            module="llama_stack.providers.remote.vector_io.oci",
+            config_class="llama_stack.providers.remote.vector_io.oci.OCI26aiVectorIOConfig",
+            api_dependencies=[Api.inference],
+            optional_api_dependencies=[Api.files, Api.models],
+            description="""
+[Oracle 26ai](https://docs.oracle.com/en/database/oracle/oracle-database/26/index.html)
+is a remote vector database provider for Llama Stack. It allows you to store and query vectors directly
+in an Oracle 26ai database.
+## Features
+- Easy to use
+- Fully integrated with Llama Stack
+- Supports vector search, keyword search, and hybrid search
+## Usage
+To use Oracle 26ai in your Llama Stack project, follow these steps:
+1. Install the necessary dependencies.
+2. Configure your Llama Stack project to use Oracle 26ai.
+3. Start storing and querying vectors.
+## Installation
+You can install the Oracle 26ai client using pip:
+```bash
+pip install oracledb
+```
+## Configuration
+```yaml
+vector_io:
+- provider_id: oci
+  provider_type: remote::oci
+  config:
+    conn_str: "${env.OCI26AI_CONNECTION_STRING}"
+    user: "${env.OCI26AI_USER}"
+    password: "${env.OCI26AI_PASSWORD}"
+    tnsnames_loc: "${env.OCI26AI_TNSNAMES_LOC}"
+    ewallet_pem_loc: "${env.OCI26AI_EWALLET_PEM_LOC}"
+    ewallet_password: "${env.OCI26AI_EWALLET_PWD}"
+    vector_datatype: "${env.OCI26AI_VECTOR_DATATYPE:=FLOAT32}"
+    persistence:
+      namespace: vector_id::oci26ai
+      backend: kv_default
+```
+## Documentation
+See the [Oracle 26ai documentation](https://docs.oracle.com/en/database/oracle/oracle-database/26/index.html)
+for more details about Oracle 26ai in general.
 """,
         ),
     ]

llama_stack/providers/remote/datasetio/nvidia/README.md ADDED Viewed

@@ -0,0 +1,74 @@
+# NVIDIA DatasetIO Provider for LlamaStack
+This provider enables dataset management using NVIDIA's NeMo Customizer service.
+## Features
+- Register datasets for fine-tuning LLMs
+- Unregister datasets
+## Getting Started
+### Prerequisites
+- LlamaStack with NVIDIA configuration
+- Access to Hosted NVIDIA NeMo Microservice
+- API key for authentication with the NVIDIA service
+### Setup
+Build the NVIDIA environment:
+```bash
+uv pip install llama-stack-client
+uv run llama stack list-deps nvidia | xargs -L1 uv pip install
+```
+### Basic Usage using the LlamaStack Python Client
+#### Initialize the client
+```python
+import os
+os.environ["NVIDIA_API_KEY"] = "your-api-key"
+os.environ["NVIDIA_CUSTOMIZER_URL"] = "http://nemo.test"
+os.environ["NVIDIA_DATASET_NAMESPACE"] = "default"
+os.environ["NVIDIA_PROJECT_ID"] = "test-project"
+from llama_stack.core.library_client import LlamaStackAsLibraryClient
+client = LlamaStackAsLibraryClient("nvidia")
+client.initialize()
+```
+#### Register a dataset
+```python
+client.datasets.register(
+    purpose="post-training/messages",
+    dataset_id="my-training-dataset",
+    source={"type": "uri", "uri": "hf://datasets/default/sample-dataset"},
+    metadata={
+        "format": "json",
+        "description": "Dataset for LLM fine-tuning",
+        "provider": "nvidia",
+    },
+)
+```
+#### Get a list of all registered datasets
+```python
+datasets = client.datasets.list()
+for dataset in datasets:
+    print(f"Dataset ID: {dataset.identifier}")
+    print(f"Description: {dataset.metadata.get('description', '')}")
+    print(f"Source: {dataset.source.uri}")
+    print("---")
+```
+#### Unregister a dataset
+```python
+client.datasets.unregister(dataset_id="my-training-dataset")
+```

llama_stack/providers/remote/eval/nvidia/README.md ADDED Viewed

@@ -0,0 +1,134 @@
+# NVIDIA NeMo Evaluator Eval Provider
+## Overview
+For the first integration, Benchmarks are mapped to Evaluation Configs on in the NeMo Evaluator. The full evaluation config object is provided as part of the meta-data. The `dataset_id` and `scoring_functions` are not used.
+Below are a few examples of how to register a benchmark, which in turn will create an evaluation config in NeMo Evaluator and how to trigger an evaluation.
+### Example for register an academic benchmark
+```
+POST /eval/benchmarks
+```
+```json
+{
+  "benchmark_id": "mmlu",
+  "dataset_id": "",
+  "scoring_functions": [],
+  "metadata": {
+    "type": "mmlu"
+  }
+}
+```
+### Example for register a custom evaluation
+```
+POST /eval/benchmarks
+```
+```json
+{
+  "benchmark_id": "my-custom-benchmark",
+  "dataset_id": "",
+  "scoring_functions": [],
+  "metadata": {
+    "type": "custom",
+    "params": {
+      "parallelism": 8
+    },
+    "tasks": {
+      "qa": {
+        "type": "completion",
+        "params": {
+          "template": {
+            "prompt": "{{prompt}}",
+            "max_tokens": 200
+          }
+        },
+        "dataset": {
+          "files_url": "hf://datasets/default/sample-basic-test/testing/testing.jsonl"
+        },
+        "metrics": {
+          "bleu": {
+            "type": "bleu",
+            "params": {
+              "references": [
+                "{{ideal_response}}"
+              ]
+            }
+          }
+        }
+      }
+    }
+  }
+}
+```
+### Example for triggering a benchmark/custom evaluation
+```
+POST /eval/benchmarks/{benchmark_id}/jobs
+```
+```json
+{
+  "benchmark_id": "my-custom-benchmark",
+  "benchmark_config": {
+    "eval_candidate": {
+      "type": "model",
+      "model": "meta-llama/Llama3.1-8B-Instruct",
+      "sampling_params": {
+        "max_tokens": 100,
+        "temperature": 0.7
+      }
+    },
+    "scoring_params": {}
+  }
+}
+```
+Response example:
+```json
+{
+    "job_id": "eval-1234",
+    "status": "in_progress"
+}
+```
+### Example for getting the status of a job
+```
+GET /eval/benchmarks/{benchmark_id}/jobs/{job_id}
+```
+Response example:
+```json
+{
+  "job_id": "eval-1234",
+  "status": "in_progress"
+}
+```
+### Example for cancelling a job
+```
+POST /eval/benchmarks/{benchmark_id}/jobs/{job_id}/cancel
+```
+### Example for getting the results
+```
+GET /eval/benchmarks/{benchmark_id}/results
+```
+```json
+{
+  "generations": [],
+  "scores": {
+    "{benchmark_id}": {
+      "score_rows": [],
+      "aggregated_results": {
+        "tasks": {},
+        "groups": {}
+      }
+    }
+  }
+}
+```

llama_stack/providers/remote/eval/nvidia/eval.py CHANGED Viewed

@@ -11,15 +11,19 @@ from llama_stack.providers.utils.inference.model_registry import ModelRegistryHe
 from llama_stack_api import (
     Agents,
     Benchmark,
-    BenchmarkConfig,
     BenchmarksProtocolPrivate,
     DatasetIO,
     Datasets,
     Eval,
     EvaluateResponse,
+    EvaluateRowsRequest,
     Inference,
     Job,
+    JobCancelRequest,
+    JobResultRequest,
     JobStatus,
+    JobStatusRequest,
+    RunEvalRequest,
     Scoring,
     ScoringResult,
 )
@@ -91,21 +95,20 @@ class NVIDIAEvalImpl(
     async def run_eval(
         self,
-        benchmark_id: str,
-        benchmark_config: BenchmarkConfig,
+        request: RunEvalRequest,
     ) -> Job:
         """Run an evaluation job for a benchmark."""
         model = (
-            benchmark_config.eval_candidate.model
-            if benchmark_config.eval_candidate.type == "model"
-            else benchmark_config.eval_candidate.config.model
+            request.benchmark_config.eval_candidate.model
+            if request.benchmark_config.eval_candidate.type == "model"
+            else request.benchmark_config.eval_candidate.config.model
         )
         nvidia_model = self.get_provider_model_id(model) or model
         result = await self._evaluator_post(
             "/v1/evaluation/jobs",
             {
-                "config": f"{DEFAULT_NAMESPACE}/{benchmark_id}",
+                "config": f"{DEFAULT_NAMESPACE}/{request.benchmark_id}",
                 "target": {"type": "model", "model": nvidia_model},
             },
         )
@@ -114,20 +117,17 @@ class NVIDIAEvalImpl(
     async def evaluate_rows(
         self,
-        benchmark_id: str,
-        input_rows: list[dict[str, Any]],
-        scoring_functions: list[str],
-        benchmark_config: BenchmarkConfig,
+        request: EvaluateRowsRequest,
     ) -> EvaluateResponse:
         raise NotImplementedError()
-    async def job_status(self, benchmark_id: str, job_id: str) -> Job:
+    async def job_status(self, request: JobStatusRequest) -> Job:
         """Get the status of an evaluation job.
         EvaluatorStatus: "created", "pending", "running", "cancelled", "cancelling", "failed", "completed".
         JobStatus: "scheduled", "in_progress", "completed", "cancelled", "failed"
         """
-        result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}")
+        result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}")
         result_status = result["status"]
         job_status = JobStatus.failed
@@ -140,27 +140,28 @@ class NVIDIAEvalImpl(
         elif result_status in ["cancelled"]:
             job_status = JobStatus.cancelled
-        return Job(job_id=job_id, status=job_status)
+        return Job(job_id=request.job_id, status=job_status)
-    async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
+    async def job_cancel(self, request: JobCancelRequest) -> None:
         """Cancel the evaluation job."""
-        await self._evaluator_post(f"/v1/evaluation/jobs/{job_id}/cancel", {})
+        await self._evaluator_post(f"/v1/evaluation/jobs/{request.job_id}/cancel", {})
-    async def job_result(self, benchmark_id: str, job_id: str) -> EvaluateResponse:
+    async def job_result(self, request: JobResultRequest) -> EvaluateResponse:
         """Returns the results of the evaluation job."""
-        job = await self.job_status(benchmark_id, job_id)
+        job_status_request = JobStatusRequest(benchmark_id=request.benchmark_id, job_id=request.job_id)
+        job = await self.job_status(job_status_request)
         status = job.status
         if not status or status != JobStatus.completed:
-            raise ValueError(f"Job {job_id} not completed. Status: {status.value}")
+            raise ValueError(f"Job {request.job_id} not completed. Status: {status.value}")
-        result = await self._evaluator_get(f"/v1/evaluation/jobs/{job_id}/results")
+        result = await self._evaluator_get(f"/v1/evaluation/jobs/{request.job_id}/results")
         return EvaluateResponse(
             # TODO: these are stored in detailed results on NeMo Evaluator side; can be added
             generations=[],
             scores={
-                benchmark_id: ScoringResult(
+                request.benchmark_id: ScoringResult(
                     score_rows=[],
                     aggregated_results=result,
                 )

llama-stack 0.4.3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

llama-stack 0.4.3py3-none-any.whl → 0.5.0rc1py3-none-any.whl