llama-stack 0.4.4__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. llama_stack/cli/stack/_list_deps.py +11 -7
  2. llama_stack/cli/stack/run.py +3 -25
  3. llama_stack/core/access_control/datatypes.py +78 -0
  4. llama_stack/core/configure.py +2 -2
  5. llama_stack/{distributions/meta-reference-gpu → core/connectors}/__init__.py +3 -1
  6. llama_stack/core/connectors/connectors.py +162 -0
  7. llama_stack/core/conversations/conversations.py +61 -58
  8. llama_stack/core/datatypes.py +54 -8
  9. llama_stack/core/library_client.py +60 -13
  10. llama_stack/core/prompts/prompts.py +43 -42
  11. llama_stack/core/routers/datasets.py +20 -17
  12. llama_stack/core/routers/eval_scoring.py +143 -53
  13. llama_stack/core/routers/inference.py +20 -9
  14. llama_stack/core/routers/safety.py +30 -42
  15. llama_stack/core/routers/vector_io.py +15 -7
  16. llama_stack/core/routing_tables/models.py +42 -3
  17. llama_stack/core/routing_tables/scoring_functions.py +19 -19
  18. llama_stack/core/routing_tables/shields.py +20 -17
  19. llama_stack/core/routing_tables/vector_stores.py +8 -5
  20. llama_stack/core/server/auth.py +192 -17
  21. llama_stack/core/server/fastapi_router_registry.py +40 -5
  22. llama_stack/core/server/server.py +24 -5
  23. llama_stack/core/stack.py +54 -10
  24. llama_stack/core/storage/datatypes.py +9 -0
  25. llama_stack/core/store/registry.py +1 -1
  26. llama_stack/core/utils/exec.py +2 -2
  27. llama_stack/core/utils/type_inspection.py +16 -2
  28. llama_stack/distributions/dell/config.yaml +4 -1
  29. llama_stack/distributions/dell/run-with-safety.yaml +4 -1
  30. llama_stack/distributions/nvidia/config.yaml +4 -1
  31. llama_stack/distributions/nvidia/run-with-safety.yaml +4 -1
  32. llama_stack/distributions/oci/config.yaml +4 -1
  33. llama_stack/distributions/open-benchmark/config.yaml +9 -1
  34. llama_stack/distributions/postgres-demo/config.yaml +1 -1
  35. llama_stack/distributions/starter/build.yaml +62 -0
  36. llama_stack/distributions/starter/config.yaml +22 -3
  37. llama_stack/distributions/starter/run-with-postgres-store.yaml +22 -3
  38. llama_stack/distributions/starter/starter.py +13 -1
  39. llama_stack/distributions/starter-gpu/build.yaml +62 -0
  40. llama_stack/distributions/starter-gpu/config.yaml +22 -3
  41. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +22 -3
  42. llama_stack/distributions/template.py +10 -2
  43. llama_stack/distributions/watsonx/config.yaml +4 -1
  44. llama_stack/log.py +1 -0
  45. llama_stack/providers/inline/agents/meta_reference/__init__.py +1 -0
  46. llama_stack/providers/inline/agents/meta_reference/agents.py +58 -61
  47. llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py +53 -51
  48. llama_stack/providers/inline/agents/meta_reference/responses/streaming.py +99 -22
  49. llama_stack/providers/inline/agents/meta_reference/responses/types.py +2 -1
  50. llama_stack/providers/inline/agents/meta_reference/responses/utils.py +4 -1
  51. llama_stack/providers/inline/agents/meta_reference/safety.py +2 -2
  52. llama_stack/providers/inline/batches/reference/batches.py +2 -1
  53. llama_stack/providers/inline/eval/meta_reference/eval.py +40 -32
  54. llama_stack/providers/inline/post_training/huggingface/post_training.py +33 -38
  55. llama_stack/providers/inline/post_training/huggingface/utils.py +2 -5
  56. llama_stack/providers/inline/post_training/torchtune/common/utils.py +5 -9
  57. llama_stack/providers/inline/post_training/torchtune/post_training.py +28 -33
  58. llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +2 -4
  59. llama_stack/providers/inline/safety/code_scanner/code_scanner.py +12 -15
  60. llama_stack/providers/inline/safety/llama_guard/llama_guard.py +20 -24
  61. llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py +11 -17
  62. llama_stack/providers/inline/scoring/basic/scoring.py +13 -17
  63. llama_stack/providers/inline/scoring/braintrust/braintrust.py +15 -15
  64. llama_stack/providers/inline/scoring/llm_as_judge/scoring.py +13 -17
  65. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +1 -1
  66. llama_stack/providers/registry/agents.py +1 -0
  67. llama_stack/providers/registry/inference.py +1 -9
  68. llama_stack/providers/registry/vector_io.py +136 -16
  69. llama_stack/providers/remote/eval/nvidia/eval.py +22 -21
  70. llama_stack/providers/remote/files/s3/config.py +5 -3
  71. llama_stack/providers/remote/files/s3/files.py +2 -2
  72. llama_stack/providers/remote/inference/gemini/gemini.py +4 -0
  73. llama_stack/providers/remote/inference/openai/openai.py +2 -0
  74. llama_stack/providers/remote/inference/together/together.py +4 -0
  75. llama_stack/providers/remote/inference/vertexai/config.py +3 -3
  76. llama_stack/providers/remote/inference/vertexai/vertexai.py +5 -2
  77. llama_stack/providers/remote/inference/vllm/config.py +37 -18
  78. llama_stack/providers/remote/inference/vllm/vllm.py +0 -3
  79. llama_stack/providers/remote/inference/watsonx/watsonx.py +4 -0
  80. llama_stack/providers/remote/post_training/nvidia/models.py +3 -11
  81. llama_stack/providers/remote/post_training/nvidia/post_training.py +31 -33
  82. llama_stack/providers/remote/safety/bedrock/bedrock.py +10 -27
  83. llama_stack/providers/remote/safety/nvidia/nvidia.py +9 -25
  84. llama_stack/providers/remote/safety/sambanova/sambanova.py +13 -11
  85. llama_stack/providers/remote/vector_io/elasticsearch/__init__.py +17 -0
  86. llama_stack/providers/remote/vector_io/elasticsearch/config.py +32 -0
  87. llama_stack/providers/remote/vector_io/elasticsearch/elasticsearch.py +463 -0
  88. llama_stack/providers/remote/vector_io/oci/__init__.py +22 -0
  89. llama_stack/providers/remote/vector_io/oci/config.py +41 -0
  90. llama_stack/providers/remote/vector_io/oci/oci26ai.py +595 -0
  91. llama_stack/providers/remote/vector_io/pgvector/config.py +69 -2
  92. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +255 -6
  93. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +62 -38
  94. llama_stack/providers/utils/bedrock/client.py +3 -3
  95. llama_stack/providers/utils/bedrock/config.py +7 -7
  96. llama_stack/providers/utils/inference/__init__.py +0 -25
  97. llama_stack/providers/utils/inference/embedding_mixin.py +4 -0
  98. llama_stack/providers/utils/inference/http_client.py +239 -0
  99. llama_stack/providers/utils/inference/litellm_openai_mixin.py +6 -0
  100. llama_stack/providers/utils/inference/model_registry.py +148 -2
  101. llama_stack/providers/utils/inference/openai_compat.py +1 -158
  102. llama_stack/providers/utils/inference/openai_mixin.py +42 -2
  103. llama_stack/providers/utils/inference/prompt_adapter.py +0 -209
  104. llama_stack/providers/utils/memory/openai_vector_store_mixin.py +92 -5
  105. llama_stack/providers/utils/memory/vector_store.py +46 -19
  106. llama_stack/providers/utils/responses/responses_store.py +7 -7
  107. llama_stack/providers/utils/safety.py +114 -0
  108. llama_stack/providers/utils/tools/mcp.py +44 -3
  109. llama_stack/testing/api_recorder.py +9 -3
  110. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/METADATA +14 -2
  111. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/RECORD +115 -148
  112. llama_stack/distributions/meta-reference-gpu/config.yaml +0 -140
  113. llama_stack/distributions/meta-reference-gpu/doc_template.md +0 -119
  114. llama_stack/distributions/meta-reference-gpu/meta_reference.py +0 -163
  115. llama_stack/distributions/meta-reference-gpu/run-with-safety.yaml +0 -155
  116. llama_stack/models/llama/hadamard_utils.py +0 -88
  117. llama_stack/models/llama/llama3/args.py +0 -74
  118. llama_stack/models/llama/llama3/dog.jpg +0 -0
  119. llama_stack/models/llama/llama3/generation.py +0 -378
  120. llama_stack/models/llama/llama3/model.py +0 -304
  121. llama_stack/models/llama/llama3/multimodal/__init__.py +0 -12
  122. llama_stack/models/llama/llama3/multimodal/encoder_utils.py +0 -180
  123. llama_stack/models/llama/llama3/multimodal/image_transform.py +0 -409
  124. llama_stack/models/llama/llama3/multimodal/model.py +0 -1430
  125. llama_stack/models/llama/llama3/multimodal/utils.py +0 -26
  126. llama_stack/models/llama/llama3/pasta.jpeg +0 -0
  127. llama_stack/models/llama/llama3/quantization/__init__.py +0 -5
  128. llama_stack/models/llama/llama3/quantization/loader.py +0 -316
  129. llama_stack/models/llama/llama3_1/__init__.py +0 -12
  130. llama_stack/models/llama/llama3_1/prompt_format.md +0 -358
  131. llama_stack/models/llama/llama3_1/prompts.py +0 -258
  132. llama_stack/models/llama/llama3_2/__init__.py +0 -5
  133. llama_stack/models/llama/llama3_2/prompts_text.py +0 -229
  134. llama_stack/models/llama/llama3_2/prompts_vision.py +0 -126
  135. llama_stack/models/llama/llama3_2/text_prompt_format.md +0 -286
  136. llama_stack/models/llama/llama3_2/vision_prompt_format.md +0 -141
  137. llama_stack/models/llama/llama3_3/__init__.py +0 -5
  138. llama_stack/models/llama/llama3_3/prompts.py +0 -259
  139. llama_stack/models/llama/llama4/args.py +0 -107
  140. llama_stack/models/llama/llama4/ffn.py +0 -58
  141. llama_stack/models/llama/llama4/moe.py +0 -214
  142. llama_stack/models/llama/llama4/preprocess.py +0 -435
  143. llama_stack/models/llama/llama4/quantization/__init__.py +0 -5
  144. llama_stack/models/llama/llama4/quantization/loader.py +0 -226
  145. llama_stack/models/llama/llama4/vision/__init__.py +0 -5
  146. llama_stack/models/llama/llama4/vision/embedding.py +0 -210
  147. llama_stack/models/llama/llama4/vision/encoder.py +0 -412
  148. llama_stack/models/llama/quantize_impls.py +0 -316
  149. llama_stack/providers/inline/inference/meta_reference/__init__.py +0 -20
  150. llama_stack/providers/inline/inference/meta_reference/common.py +0 -24
  151. llama_stack/providers/inline/inference/meta_reference/config.py +0 -68
  152. llama_stack/providers/inline/inference/meta_reference/generators.py +0 -201
  153. llama_stack/providers/inline/inference/meta_reference/inference.py +0 -542
  154. llama_stack/providers/inline/inference/meta_reference/model_parallel.py +0 -77
  155. llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +0 -353
  156. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/WHEEL +0 -0
  157. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/entry_points.txt +0 -0
  158. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/licenses/LICENSE +0 -0
  159. {llama_stack-0.4.4.dist-info → llama_stack-0.5.0.dist-info}/top_level.txt +0 -0
@@ -7,16 +7,15 @@
7
7
  import re
8
8
  import uuid
9
9
  from string import Template
10
- from typing import Any
11
10
 
12
11
  from llama_stack.core.datatypes import Api
13
12
  from llama_stack.log import get_logger
14
13
  from llama_stack.models.llama.datatypes import Role
15
- from llama_stack.models.llama.sku_types import CoreModelId
16
14
  from llama_stack.providers.utils.inference.prompt_adapter import (
17
15
  interleaved_content_as_str,
18
16
  )
19
17
  from llama_stack_api import (
18
+ GetShieldRequest,
20
19
  ImageContentItem,
21
20
  Inference,
22
21
  ModerationObject,
@@ -24,6 +23,8 @@ from llama_stack_api import (
24
23
  OpenAIChatCompletionRequestWithExtraBody,
25
24
  OpenAIMessageParam,
26
25
  OpenAIUserMessageParam,
26
+ RunModerationRequest,
27
+ RunShieldRequest,
27
28
  RunShieldResponse,
28
29
  Safety,
29
30
  SafetyViolation,
@@ -91,13 +92,13 @@ DEFAULT_LG_V3_SAFETY_CATEGORIES = [
91
92
 
92
93
  # accept both CoreModelId and huggingface repo id
93
94
  LLAMA_GUARD_MODEL_IDS = {
94
- CoreModelId.llama_guard_3_8b.value: "meta-llama/Llama-Guard-3-8B",
95
+ "Llama-Guard-3-8B": "meta-llama/Llama-Guard-3-8B",
95
96
  "meta-llama/Llama-Guard-3-8B": "meta-llama/Llama-Guard-3-8B",
96
- CoreModelId.llama_guard_3_1b.value: "meta-llama/Llama-Guard-3-1B",
97
+ "Llama-Guard-3-1B": "meta-llama/Llama-Guard-3-1B",
97
98
  "meta-llama/Llama-Guard-3-1B": "meta-llama/Llama-Guard-3-1B",
98
- CoreModelId.llama_guard_3_11b_vision.value: "meta-llama/Llama-Guard-3-11B-Vision",
99
+ "Llama-Guard-3-11B-Vision": "meta-llama/Llama-Guard-3-11B-Vision",
99
100
  "meta-llama/Llama-Guard-3-11B-Vision": "meta-llama/Llama-Guard-3-11B-Vision",
100
- CoreModelId.llama_guard_4_12b.value: "meta-llama/Llama-Guard-4-12B",
101
+ "Llama-Guard-4-12B": "meta-llama/Llama-Guard-4-12B",
101
102
  "meta-llama/Llama-Guard-4-12B": "meta-llama/Llama-Guard-4-12B",
102
103
  }
103
104
 
@@ -161,17 +162,12 @@ class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
161
162
  # The routing table handles the removal from the registry
162
163
  pass
163
164
 
164
- async def run_shield(
165
- self,
166
- shield_id: str,
167
- messages: list[OpenAIMessageParam],
168
- params: dict[str, Any] = None,
169
- ) -> RunShieldResponse:
170
- shield = await self.shield_store.get_shield(shield_id)
165
+ async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse:
166
+ shield = await self.shield_store.get_shield(GetShieldRequest(identifier=request.shield_id))
171
167
  if not shield:
172
- raise ValueError(f"Unknown shield {shield_id}")
168
+ raise ValueError(f"Unknown shield {request.shield_id}")
173
169
 
174
- messages = messages.copy()
170
+ messages = request.messages.copy()
175
171
  # some shields like llama-guard require the first message to be a user message
176
172
  # since this might be a tool call, first role might not be user
177
173
  if len(messages) > 0 and messages[0].role != "user":
@@ -200,30 +196,30 @@ class LlamaGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
200
196
 
201
197
  return await impl.run(messages)
202
198
 
203
- async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
204
- if model is None:
199
+ async def run_moderation(self, request: RunModerationRequest) -> ModerationObject:
200
+ if request.model is None:
205
201
  raise ValueError("Llama Guard moderation requires a model identifier.")
206
202
 
207
- if isinstance(input, list):
208
- messages = input.copy()
203
+ if isinstance(request.input, list):
204
+ messages = request.input.copy()
209
205
  else:
210
- messages = [input]
206
+ messages = [request.input]
211
207
 
212
208
  # convert to user messages format with role
213
209
  messages = [OpenAIUserMessageParam(content=m) for m in messages]
214
210
 
215
211
  # Determine safety categories based on the model type
216
212
  # For known Llama Guard models, use specific categories
217
- if model in LLAMA_GUARD_MODEL_IDS:
213
+ if request.model in LLAMA_GUARD_MODEL_IDS:
218
214
  # Use the mapped model for categories but the original model_id for inference
219
- mapped_model = LLAMA_GUARD_MODEL_IDS[model]
215
+ mapped_model = LLAMA_GUARD_MODEL_IDS[request.model]
220
216
  safety_categories = MODEL_TO_SAFETY_CATEGORIES_MAP.get(mapped_model, DEFAULT_LG_V3_SAFETY_CATEGORIES)
221
217
  else:
222
218
  # For unknown models, use default Llama Guard 3 8B categories
223
219
  safety_categories = DEFAULT_LG_V3_SAFETY_CATEGORIES + [CAT_CODE_INTERPRETER_ABUSE]
224
220
 
225
221
  impl = LlamaGuardShield(
226
- model=model,
222
+ model=request.model,
227
223
  inference_api=self.inference_api,
228
224
  excluded_categories=self.config.excluded_categories,
229
225
  safety_categories=safety_categories,
@@ -293,7 +289,7 @@ class LlamaGuardShield:
293
289
  async def run(self, messages: list[OpenAIMessageParam]) -> RunShieldResponse:
294
290
  messages = self.validate_messages(messages)
295
291
 
296
- if self.model == CoreModelId.llama_guard_3_11b_vision.value:
292
+ if self.model == "Llama-Guard-3-11B-Vision":
297
293
  shield_input_message = self.build_vision_shield_input(messages)
298
294
  else:
299
295
  shield_input_message = self.build_text_shield_input(messages)
@@ -4,17 +4,19 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
- from typing import Any
8
-
9
7
  import torch
10
8
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
11
9
 
12
10
  from llama_stack.core.utils.model_utils import model_local_dir
13
11
  from llama_stack.log import get_logger
14
- from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
12
+ from llama_stack.providers.utils.inference.prompt_adapter import (
13
+ interleaved_content_as_str,
14
+ )
15
+ from llama_stack.providers.utils.safety import ShieldToModerationMixin
15
16
  from llama_stack_api import (
16
- ModerationObject,
17
+ GetShieldRequest,
17
18
  OpenAIMessageParam,
19
+ RunShieldRequest,
18
20
  RunShieldResponse,
19
21
  Safety,
20
22
  SafetyViolation,
@@ -31,7 +33,7 @@ log = get_logger(name=__name__, category="safety")
31
33
  PROMPT_GUARD_MODEL = "Prompt-Guard-86M"
32
34
 
33
35
 
34
- class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
36
+ class PromptGuardSafetyImpl(ShieldToModerationMixin, Safety, ShieldsProtocolPrivate):
35
37
  shield_store: ShieldStore
36
38
 
37
39
  def __init__(self, config: PromptGuardConfig, _deps) -> None:
@@ -51,20 +53,12 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
51
53
  async def unregister_shield(self, identifier: str) -> None:
52
54
  pass
53
55
 
54
- async def run_shield(
55
- self,
56
- shield_id: str,
57
- messages: list[OpenAIMessageParam],
58
- params: dict[str, Any],
59
- ) -> RunShieldResponse:
60
- shield = await self.shield_store.get_shield(shield_id)
56
+ async def run_shield(self, request: RunShieldRequest) -> RunShieldResponse:
57
+ shield = await self.shield_store.get_shield(GetShieldRequest(identifier=request.shield_id))
61
58
  if not shield:
62
- raise ValueError(f"Unknown shield {shield_id}")
63
-
64
- return await self.shield.run(messages)
59
+ raise ValueError(f"Unknown shield {request.shield_id}")
65
60
 
66
- async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
67
- raise NotImplementedError("run_moderation is not implemented for Prompt Guard")
61
+ return await self.shield.run(request.messages)
68
62
 
69
63
 
70
64
  class PromptGuardShield:
@@ -3,16 +3,17 @@
3
3
  #
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
- from typing import Any
7
6
 
8
7
  from llama_stack_api import (
9
8
  DatasetIO,
10
9
  Datasets,
10
+ IterRowsRequest,
11
+ ScoreBatchRequest,
11
12
  ScoreBatchResponse,
13
+ ScoreRequest,
12
14
  ScoreResponse,
13
15
  Scoring,
14
16
  ScoringFn,
15
- ScoringFnParams,
16
17
  ScoringFunctionsProtocolPrivate,
17
18
  ScoringResult,
18
19
  )
@@ -75,19 +76,15 @@ class BasicScoringImpl(
75
76
 
76
77
  async def score_batch(
77
78
  self,
78
- dataset_id: str,
79
- scoring_functions: dict[str, ScoringFnParams | None] = None,
80
- save_results_dataset: bool = False,
79
+ request: ScoreBatchRequest,
81
80
  ) -> ScoreBatchResponse:
82
- all_rows = await self.datasetio_api.iterrows(
83
- dataset_id=dataset_id,
84
- limit=-1,
85
- )
86
- res = await self.score(
81
+ all_rows = await self.datasetio_api.iterrows(IterRowsRequest(dataset_id=request.dataset_id, limit=-1))
82
+ score_request = ScoreRequest(
87
83
  input_rows=all_rows.data,
88
- scoring_functions=scoring_functions,
84
+ scoring_functions=request.scoring_functions,
89
85
  )
90
- if save_results_dataset:
86
+ res = await self.score(score_request)
87
+ if request.save_results_dataset:
91
88
  # TODO: persist and register dataset on to server for reading
92
89
  # self.datasets_api.register_dataset()
93
90
  raise NotImplementedError("Save results dataset not implemented yet")
@@ -98,16 +95,15 @@ class BasicScoringImpl(
98
95
 
99
96
  async def score(
100
97
  self,
101
- input_rows: list[dict[str, Any]],
102
- scoring_functions: dict[str, ScoringFnParams | None] = None,
98
+ request: ScoreRequest,
103
99
  ) -> ScoreResponse:
104
100
  res = {}
105
- for scoring_fn_id in scoring_functions.keys():
101
+ for scoring_fn_id in request.scoring_functions.keys():
106
102
  if scoring_fn_id not in self.scoring_fn_id_impls:
107
103
  raise ValueError(f"Scoring function {scoring_fn_id} is not supported.")
108
104
  scoring_fn = self.scoring_fn_id_impls[scoring_fn_id]
109
- scoring_fn_params = scoring_functions.get(scoring_fn_id, None)
110
- score_results = await scoring_fn.score(input_rows, scoring_fn_id, scoring_fn_params)
105
+ scoring_fn_params = request.scoring_functions.get(scoring_fn_id, None)
106
+ score_results = await scoring_fn.score(request.input_rows, scoring_fn_id, scoring_fn_params)
111
107
  agg_results = await scoring_fn.aggregate(score_results, scoring_fn_id, scoring_fn_params)
112
108
  res[scoring_fn_id] = ScoringResult(
113
109
  score_rows=score_results,
@@ -29,11 +29,13 @@ from llama_stack.providers.utils.scoring.aggregation_utils import aggregate_metr
29
29
  from llama_stack_api import (
30
30
  DatasetIO,
31
31
  Datasets,
32
+ IterRowsRequest,
33
+ ScoreBatchRequest,
32
34
  ScoreBatchResponse,
35
+ ScoreRequest,
33
36
  ScoreResponse,
34
37
  Scoring,
35
38
  ScoringFn,
36
- ScoringFnParams,
37
39
  ScoringFunctionsProtocolPrivate,
38
40
  ScoringResult,
39
41
  ScoringResultRow,
@@ -158,18 +160,17 @@ class BraintrustScoringImpl(
158
160
 
159
161
  async def score_batch(
160
162
  self,
161
- dataset_id: str,
162
- scoring_functions: dict[str, ScoringFnParams | None],
163
- save_results_dataset: bool = False,
163
+ request: ScoreBatchRequest,
164
164
  ) -> ScoreBatchResponse:
165
165
  await self.set_api_key()
166
166
 
167
- all_rows = await self.datasetio_api.iterrows(
168
- dataset_id=dataset_id,
169
- limit=-1,
167
+ all_rows = await self.datasetio_api.iterrows(IterRowsRequest(dataset_id=request.dataset_id, limit=-1))
168
+ score_request = ScoreRequest(
169
+ input_rows=all_rows.data,
170
+ scoring_functions=request.scoring_functions,
170
171
  )
171
- res = await self.score(input_rows=all_rows.data, scoring_functions=scoring_functions)
172
- if save_results_dataset:
172
+ res = await self.score(score_request)
173
+ if request.save_results_dataset:
173
174
  # TODO: persist and register dataset on to server for reading
174
175
  # self.datasets_api.register_dataset()
175
176
  raise NotImplementedError("Save results dataset not implemented yet")
@@ -198,21 +199,20 @@ class BraintrustScoringImpl(
198
199
 
199
200
  async def score(
200
201
  self,
201
- input_rows: list[dict[str, Any]],
202
- scoring_functions: dict[str, ScoringFnParams | None],
202
+ request: ScoreRequest,
203
203
  ) -> ScoreResponse:
204
204
  await self.set_api_key()
205
205
  res = {}
206
- for scoring_fn_id in scoring_functions:
206
+ for scoring_fn_id in request.scoring_functions:
207
207
  if scoring_fn_id not in self.supported_fn_defs_registry:
208
208
  raise ValueError(f"Scoring function {scoring_fn_id} is not supported.")
209
209
 
210
- score_results = [await self.score_row(input_row, scoring_fn_id) for input_row in input_rows]
210
+ score_results = [await self.score_row(input_row, scoring_fn_id) for input_row in request.input_rows]
211
211
  aggregation_functions = self.supported_fn_defs_registry[scoring_fn_id].params.aggregation_functions
212
212
 
213
213
  # override scoring_fn params if provided
214
- if scoring_functions[scoring_fn_id] is not None:
215
- override_params = scoring_functions[scoring_fn_id]
214
+ if request.scoring_functions[scoring_fn_id] is not None:
215
+ override_params = request.scoring_functions[scoring_fn_id]
216
216
  if override_params.aggregation_functions:
217
217
  aggregation_functions = override_params.aggregation_functions
218
218
 
@@ -3,17 +3,18 @@
3
3
  #
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
- from typing import Any
7
6
 
8
7
  from llama_stack_api import (
9
8
  DatasetIO,
10
9
  Datasets,
11
10
  Inference,
11
+ IterRowsRequest,
12
+ ScoreBatchRequest,
12
13
  ScoreBatchResponse,
14
+ ScoreRequest,
13
15
  ScoreResponse,
14
16
  Scoring,
15
17
  ScoringFn,
16
- ScoringFnParams,
17
18
  ScoringFunctionsProtocolPrivate,
18
19
  ScoringResult,
19
20
  )
@@ -64,19 +65,15 @@ class LlmAsJudgeScoringImpl(
64
65
 
65
66
  async def score_batch(
66
67
  self,
67
- dataset_id: str,
68
- scoring_functions: dict[str, ScoringFnParams | None] = None,
69
- save_results_dataset: bool = False,
68
+ request: ScoreBatchRequest,
70
69
  ) -> ScoreBatchResponse:
71
- all_rows = await self.datasetio_api.iterrows(
72
- dataset_id=dataset_id,
73
- limit=-1,
74
- )
75
- res = await self.score(
70
+ all_rows = await self.datasetio_api.iterrows(IterRowsRequest(dataset_id=request.dataset_id, limit=-1))
71
+ score_request = ScoreRequest(
76
72
  input_rows=all_rows.data,
77
- scoring_functions=scoring_functions,
73
+ scoring_functions=request.scoring_functions,
78
74
  )
79
- if save_results_dataset:
75
+ res = await self.score(score_request)
76
+ if request.save_results_dataset:
80
77
  # TODO: persist and register dataset on to server for reading
81
78
  # self.datasets_api.register_dataset()
82
79
  raise NotImplementedError("Save results dataset not implemented yet")
@@ -87,14 +84,13 @@ class LlmAsJudgeScoringImpl(
87
84
 
88
85
  async def score(
89
86
  self,
90
- input_rows: list[dict[str, Any]],
91
- scoring_functions: dict[str, ScoringFnParams | None] = None,
87
+ request: ScoreRequest,
92
88
  ) -> ScoreResponse:
93
89
  res = {}
94
- for scoring_fn_id in scoring_functions.keys():
90
+ for scoring_fn_id in request.scoring_functions.keys():
95
91
  scoring_fn = self.llm_as_judge_fn
96
- scoring_fn_params = scoring_functions.get(scoring_fn_id, None)
97
- score_results = await scoring_fn.score(input_rows, scoring_fn_id, scoring_fn_params)
92
+ scoring_fn_params = request.scoring_functions.get(scoring_fn_id, None)
93
+ score_results = await scoring_fn.score(request.input_rows, scoring_fn_id, scoring_fn_params)
98
94
  agg_results = await scoring_fn.aggregate(score_results, scoring_fn_id, scoring_fn_params)
99
95
  res[scoring_fn_id] = ScoringResult(
100
96
  score_rows=score_results,
@@ -59,7 +59,7 @@ def serialize_vector(vector: list[float]) -> bytes:
59
59
  return struct.pack(f"{len(vector)}f", *vector)
60
60
 
61
61
 
62
- def _create_sqlite_connection(db_path):
62
+ def _create_sqlite_connection(db_path: str):
63
63
  """Create a SQLite connection with sqlite_vec extension loaded."""
64
64
  connection = sqlite3.connect(db_path)
65
65
  connection.enable_load_extension(True)
@@ -37,6 +37,7 @@ def available_providers() -> list[ProviderSpec]:
37
37
  Api.conversations,
38
38
  Api.prompts,
39
39
  Api.files,
40
+ Api.connectors,
40
41
  ],
41
42
  optional_api_dependencies=[
42
43
  Api.safety,
@@ -28,14 +28,6 @@ META_REFERENCE_DEPS = [
28
28
 
29
29
  def available_providers() -> list[ProviderSpec]:
30
30
  return [
31
- InlineProviderSpec(
32
- api=Api.inference,
33
- provider_type="inline::meta-reference",
34
- pip_packages=META_REFERENCE_DEPS,
35
- module="llama_stack.providers.inline.inference.meta_reference",
36
- config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
37
- description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
38
- ),
39
31
  InlineProviderSpec(
40
32
  api=Api.inference,
41
33
  provider_type="inline::sentence-transformers",
@@ -223,7 +215,7 @@ def available_providers() -> list[ProviderSpec]:
223
215
 
224
216
  Configuration:
225
217
  - Set VERTEX_AI_PROJECT environment variable (required)
226
- - Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
218
+ - Set VERTEX_AI_LOCATION environment variable (optional, defaults to global)
227
219
  - Use Google Cloud Application Default Credentials or service account key
228
220
 
229
221
  Authentication Setup:
@@ -419,6 +419,7 @@ There are three implementations of search for PGVectoIndex available:
419
419
  - Semantic understanding - finds documents similar in meaning even if they don't share keywords
420
420
  - Works with high-dimensional vector embeddings (typically 768, 1024, or higher dimensions)
421
421
  - Best for: Finding conceptually related content, handling synonyms, cross-language search
422
+ - By default, Llama Stack creates a HNSW (Hierarchical Navigable Small Worlds) index on a column "embedding" in a vector store table enabling production-ready, performant and scalable vector search for large datasets out of the box.
422
423
 
423
424
  2. Keyword Search
424
425
  - How it works:
@@ -448,6 +449,7 @@ There are three implementations of search for PGVectoIndex available:
448
449
  - Best for: General-purpose search where you want both precision and recall
449
450
 
450
451
  4. Database Schema
452
+
451
453
  The PGVector implementation stores data optimized for all three search types:
452
454
  CREATE TABLE vector_store_xxx (
453
455
  id TEXT PRIMARY KEY,
@@ -457,9 +459,6 @@ CREATE TABLE vector_store_xxx (
457
459
  tokenized_content TSVECTOR -- For keyword search
458
460
  );
459
461
 
460
- -- Indexes for performance
461
- CREATE INDEX content_gin_idx ON table USING GIN(tokenized_content); -- Keyword search
462
- -- Vector index created automatically by pgvector
463
462
 
464
463
  ## Usage
465
464
 
@@ -469,32 +468,55 @@ To use PGVector in your Llama Stack project, follow these steps:
469
468
  2. Configure your Llama Stack project to use pgvector. (e.g. remote::pgvector).
470
469
  3. Start storing and querying vectors.
471
470
 
472
- ## This is an example how you can set up your environment for using PGVector
471
+ ## This is an example how you can set up your environment for using PGVector (you can use either Podman or Docker)
473
472
 
474
- 1. Export env vars:
473
+ 1. Export PGVector environment variables:
475
474
  ```bash
476
- export ENABLE_PGVECTOR=true
475
+ export PGVECTOR_DB=testvectordb
477
476
  export PGVECTOR_HOST=localhost
478
477
  export PGVECTOR_PORT=5432
479
- export PGVECTOR_DB=llamastack
480
- export PGVECTOR_USER=llamastack
481
- export PGVECTOR_PASSWORD=llamastack
478
+ export PGVECTOR_USER=user
479
+ export PGVECTOR_PASSWORD=password
482
480
  ```
483
481
 
484
- 2. Create DB:
482
+ 2. Pull pgvector image with that tag you want:
483
+
484
+ Via Podman:
485
485
  ```bash
486
- psql -h localhost -U postgres -c "CREATE ROLE llamastack LOGIN PASSWORD 'llamastack';"
487
- psql -h localhost -U postgres -c "CREATE DATABASE llamastack OWNER llamastack;"
488
- psql -h localhost -U llamastack -d llamastack -c "CREATE EXTENSION IF NOT EXISTS vector;"
486
+ podman pull pgvector/pgvector:0.8.1-pg18-trixie
489
487
  ```
490
488
 
491
- ## Installation
489
+ Via Docker:
490
+ ```bash
491
+ docker pull pgvector/pgvector:0.8.1-pg18-trixie
492
+ ```
493
+
494
+ 3. Run container with PGVector:
492
495
 
493
- You can install PGVector using docker:
496
+ Via Podman
497
+ ```bash
498
+ podman run -d \
499
+ --name pgvector \
500
+ -e POSTGRES_PASSWORD=password \
501
+ -e POSTGRES_USER=user \
502
+ -e POSTGRES_DB=testvectordb \
503
+ -p 5432:5432 \
504
+ -v pgvector_data:/var/lib/postgresql \
505
+ pgvector/pgvector:0.8.1-pg18-trixie
506
+ ```
494
507
 
508
+ Via Docker
495
509
  ```bash
496
- docker pull pgvector/pgvector:pg17
510
+ docker run -d \
511
+ --name pgvector \
512
+ -e POSTGRES_PASSWORD=password \
513
+ -e POSTGRES_USER=user \
514
+ -e POSTGRES_DB=testvectordb \
515
+ -p 5432:5432 \
516
+ -v pgvector_data:/var/lib/postgresql \
517
+ pgvector/pgvector:0.8.1-pg18-trixie
497
518
  ```
519
+
498
520
  ## Documentation
499
521
  See [PGVector's documentation](https://github.com/pgvector/pgvector) for more details about PGVector in general.
500
522
  """,
@@ -823,6 +845,104 @@ For more details on TLS configuration, refer to the [TLS setup guide](https://mi
823
845
  optional_api_dependencies=[Api.files, Api.models],
824
846
  description="""
825
847
  Please refer to the remote provider documentation.
848
+ """,
849
+ ),
850
+ RemoteProviderSpec(
851
+ api=Api.vector_io,
852
+ adapter_type="elasticsearch",
853
+ provider_type="remote::elasticsearch",
854
+ pip_packages=["elasticsearch>=8.16.0,<9.0.0"] + DEFAULT_VECTOR_IO_DEPS,
855
+ module="llama_stack.providers.remote.vector_io.elasticsearch",
856
+ config_class="llama_stack.providers.remote.vector_io.elasticsearch.ElasticsearchVectorIOConfig",
857
+ api_dependencies=[Api.inference],
858
+ optional_api_dependencies=[Api.files, Api.models],
859
+ description="""
860
+ [Elasticsearch](https://www.elastic.co/) is a vector database provider for Llama Stack.
861
+ It allows you to store and query vectors directly within an Elasticsearch database.
862
+ That means you're not limited to storing vectors in memory or in a separate service.
863
+
864
+ ## Features
865
+ Elasticsearch supports:
866
+ - Store embeddings and their metadata
867
+ - Vector search
868
+ - Full-text search
869
+ - Fuzzy search
870
+ - Hybrid search
871
+ - Document storage
872
+ - Metadata filtering
873
+ - Inference service
874
+ - Machine Learning integrations
875
+
876
+ ## Usage
877
+
878
+ To use Elasticsearch in your Llama Stack project, follow these steps:
879
+
880
+ 1. Install the necessary dependencies.
881
+ 2. Configure your Llama Stack project to use Elasticsearch.
882
+ 3. Start storing and querying vectors.
883
+
884
+ ## Installation
885
+
886
+ You can test Elasticsearch locally by running this script in the terminal:
887
+
888
+ ```bash
889
+ curl -fsSL https://elastic.co/start-local | sh
890
+ ```
891
+
892
+ Or you can [start a free trial](https://www.elastic.co/cloud/cloud-trial-overview?utm_campaign=llama-stack-integration) on Elastic Cloud.
893
+ For more information on how to deploy Elasticsearch, see the [official documentation](https://www.elastic.co/docs/deploy-manage/deploy).
894
+
895
+ ## Documentation
896
+ See [Elasticsearch's documentation](https://www.elastic.co/docs/solutions/search) for more details about Elasticsearch in general.
897
+ """,
898
+ ),
899
+ RemoteProviderSpec(
900
+ api=Api.vector_io,
901
+ adapter_type="oci",
902
+ provider_type="remote::oci",
903
+ pip_packages=["oracledb", "numpy"] + DEFAULT_VECTOR_IO_DEPS,
904
+ module="llama_stack.providers.remote.vector_io.oci",
905
+ config_class="llama_stack.providers.remote.vector_io.oci.OCI26aiVectorIOConfig",
906
+ api_dependencies=[Api.inference],
907
+ optional_api_dependencies=[Api.files, Api.models],
908
+ description="""
909
+ [Oracle 26ai](https://docs.oracle.com/en/database/oracle/oracle-database/26/index.html)
910
+ is a remote vector database provider for Llama Stack. It allows you to store and query vectors directly
911
+ in an Oracle 26ai database.
912
+ ## Features
913
+ - Easy to use
914
+ - Fully integrated with Llama Stack
915
+ - Supports vector search, keyword search, and hybrid search
916
+ ## Usage
917
+ To use Oracle 26ai in your Llama Stack project, follow these steps:
918
+ 1. Install the necessary dependencies.
919
+ 2. Configure your Llama Stack project to use Oracle 26ai.
920
+ 3. Start storing and querying vectors.
921
+ ## Installation
922
+ You can install the Oracle 26ai client using pip:
923
+ ```bash
924
+ pip install oracledb
925
+ ```
926
+ ## Configuration
927
+ ```yaml
928
+ vector_io:
929
+ - provider_id: oci
930
+ provider_type: remote::oci
931
+ config:
932
+ conn_str: "${env.OCI26AI_CONNECTION_STRING}"
933
+ user: "${env.OCI26AI_USER}"
934
+ password: "${env.OCI26AI_PASSWORD}"
935
+ tnsnames_loc: "${env.OCI26AI_TNSNAMES_LOC}"
936
+ ewallet_pem_loc: "${env.OCI26AI_EWALLET_PEM_LOC}"
937
+ ewallet_password: "${env.OCI26AI_EWALLET_PWD}"
938
+ vector_datatype: "${env.OCI26AI_VECTOR_DATATYPE:=FLOAT32}"
939
+ persistence:
940
+ namespace: vector_id::oci26ai
941
+ backend: kv_default
942
+ ```
943
+ ## Documentation
944
+ See the [Oracle 26ai documentation](https://docs.oracle.com/en/database/oracle/oracle-database/26/index.html)
945
+ for more details about Oracle 26ai in general.
826
946
  """,
827
947
  ),
828
948
  ]