llama-stack 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/list_deps.py +4 -0
- llama_stack/core/routers/inference.py +66 -40
- llama_stack/distributions/starter/build.yaml +1 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/starter.py +86 -68
- llama_stack/distributions/starter-gpu/build.yaml +1 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +25 -2
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +15 -4
- llama_stack/providers/remote/inference/vertexai/vertexai.py +10 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +9 -3
- llama_stack/providers/remote/vector_io/milvus/milvus.py +7 -4
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +32 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +11 -6
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +7 -4
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -2
- llama_stack/providers/utils/inference/inference_store.py +30 -10
- llama_stack/providers/utils/inference/model_registry.py +1 -1
- llama_stack/providers/utils/inference/openai_mixin.py +33 -10
- llama_stack/providers/utils/responses/responses_store.py +12 -58
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +25 -9
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +31 -1
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/METADATA +3 -3
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/RECORD +29 -26
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/top_level.txt +0 -0
|
@@ -46,6 +46,10 @@ class StackListDeps(Subcommand):
|
|
|
46
46
|
def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
|
|
47
47
|
# always keep implementation completely silo-ed away from CLI so CLI
|
|
48
48
|
# can be fast to load and reduces dependencies
|
|
49
|
+
if not args.config and not args.providers:
|
|
50
|
+
self.parser.print_help()
|
|
51
|
+
self.parser.exit()
|
|
52
|
+
|
|
49
53
|
from ._list_deps import run_stack_list_deps_command
|
|
50
54
|
|
|
51
55
|
return run_stack_list_deps_command(args)
|
|
@@ -105,7 +105,8 @@ class InferenceRouter(Inference):
|
|
|
105
105
|
prompt_tokens: int,
|
|
106
106
|
completion_tokens: int,
|
|
107
107
|
total_tokens: int,
|
|
108
|
-
|
|
108
|
+
fully_qualified_model_id: str,
|
|
109
|
+
provider_id: str,
|
|
109
110
|
) -> list[MetricEvent]:
|
|
110
111
|
"""Constructs a list of MetricEvent objects containing token usage metrics.
|
|
111
112
|
|
|
@@ -113,7 +114,8 @@ class InferenceRouter(Inference):
|
|
|
113
114
|
prompt_tokens: Number of tokens in the prompt
|
|
114
115
|
completion_tokens: Number of tokens in the completion
|
|
115
116
|
total_tokens: Total number of tokens used
|
|
116
|
-
|
|
117
|
+
fully_qualified_model_id:
|
|
118
|
+
provider_id: The provider identifier
|
|
117
119
|
|
|
118
120
|
Returns:
|
|
119
121
|
List of MetricEvent objects with token usage metrics
|
|
@@ -139,8 +141,8 @@ class InferenceRouter(Inference):
|
|
|
139
141
|
timestamp=datetime.now(UTC),
|
|
140
142
|
unit="tokens",
|
|
141
143
|
attributes={
|
|
142
|
-
"model_id":
|
|
143
|
-
"provider_id":
|
|
144
|
+
"model_id": fully_qualified_model_id,
|
|
145
|
+
"provider_id": provider_id,
|
|
144
146
|
},
|
|
145
147
|
)
|
|
146
148
|
)
|
|
@@ -153,7 +155,9 @@ class InferenceRouter(Inference):
|
|
|
153
155
|
total_tokens: int,
|
|
154
156
|
model: Model,
|
|
155
157
|
) -> list[MetricInResponse]:
|
|
156
|
-
metrics = self._construct_metrics(
|
|
158
|
+
metrics = self._construct_metrics(
|
|
159
|
+
prompt_tokens, completion_tokens, total_tokens, model.model_id, model.provider_id
|
|
160
|
+
)
|
|
157
161
|
if self.telemetry:
|
|
158
162
|
for metric in metrics:
|
|
159
163
|
enqueue_event(metric)
|
|
@@ -173,14 +177,25 @@ class InferenceRouter(Inference):
|
|
|
173
177
|
encoded = self.formatter.encode_content(messages)
|
|
174
178
|
return len(encoded.tokens) if encoded and encoded.tokens else 0
|
|
175
179
|
|
|
176
|
-
async def
|
|
177
|
-
|
|
178
|
-
model
|
|
179
|
-
|
|
180
|
+
async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]:
|
|
181
|
+
model = await self.routing_table.get_object_by_identifier("model", model_id)
|
|
182
|
+
if model:
|
|
183
|
+
if model.model_type != expected_model_type:
|
|
184
|
+
raise ModelTypeError(model_id, model.model_type, expected_model_type)
|
|
185
|
+
|
|
186
|
+
provider = await self.routing_table.get_provider_impl(model.identifier)
|
|
187
|
+
return provider, model.provider_resource_id
|
|
188
|
+
|
|
189
|
+
splits = model_id.split("/", maxsplit=1)
|
|
190
|
+
if len(splits) != 2:
|
|
191
|
+
raise ModelNotFoundError(model_id)
|
|
192
|
+
|
|
193
|
+
provider_id, provider_resource_id = splits
|
|
194
|
+
if provider_id not in self.routing_table.impls_by_provider_id:
|
|
195
|
+
logger.warning(f"Provider {provider_id} not found for model {model_id}")
|
|
180
196
|
raise ModelNotFoundError(model_id)
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
return model
|
|
197
|
+
|
|
198
|
+
return self.routing_table.impls_by_provider_id[provider_id], provider_resource_id
|
|
184
199
|
|
|
185
200
|
async def openai_completion(
|
|
186
201
|
self,
|
|
@@ -189,24 +204,24 @@ class InferenceRouter(Inference):
|
|
|
189
204
|
logger.debug(
|
|
190
205
|
f"InferenceRouter.openai_completion: model={params.model}, stream={params.stream}, prompt={params.prompt}",
|
|
191
206
|
)
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
params.model = model_obj.identifier
|
|
207
|
+
request_model_id = params.model
|
|
208
|
+
provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.llm)
|
|
209
|
+
params.model = provider_resource_id
|
|
196
210
|
|
|
197
|
-
provider = await self.routing_table.get_provider_impl(model_obj.identifier)
|
|
198
211
|
if params.stream:
|
|
199
212
|
return await provider.openai_completion(params)
|
|
200
213
|
# TODO: Metrics do NOT work with openai_completion stream=True due to the fact
|
|
201
214
|
# that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently.
|
|
202
215
|
|
|
203
216
|
response = await provider.openai_completion(params)
|
|
217
|
+
response.model = request_model_id
|
|
204
218
|
if self.telemetry:
|
|
205
219
|
metrics = self._construct_metrics(
|
|
206
220
|
prompt_tokens=response.usage.prompt_tokens,
|
|
207
221
|
completion_tokens=response.usage.completion_tokens,
|
|
208
222
|
total_tokens=response.usage.total_tokens,
|
|
209
|
-
|
|
223
|
+
fully_qualified_model_id=request_model_id,
|
|
224
|
+
provider_id=provider.__provider_id__,
|
|
210
225
|
)
|
|
211
226
|
for metric in metrics:
|
|
212
227
|
enqueue_event(metric)
|
|
@@ -224,7 +239,9 @@ class InferenceRouter(Inference):
|
|
|
224
239
|
logger.debug(
|
|
225
240
|
f"InferenceRouter.openai_chat_completion: model={params.model}, stream={params.stream}, messages={params.messages}",
|
|
226
241
|
)
|
|
227
|
-
|
|
242
|
+
request_model_id = params.model
|
|
243
|
+
provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.llm)
|
|
244
|
+
params.model = provider_resource_id
|
|
228
245
|
|
|
229
246
|
# Use the OpenAI client for a bit of extra input validation without
|
|
230
247
|
# exposing the OpenAI client itself as part of our API surface
|
|
@@ -242,10 +259,6 @@ class InferenceRouter(Inference):
|
|
|
242
259
|
params.tool_choice = None
|
|
243
260
|
params.tools = None
|
|
244
261
|
|
|
245
|
-
# Update params with the resolved model identifier
|
|
246
|
-
params.model = model_obj.identifier
|
|
247
|
-
|
|
248
|
-
provider = await self.routing_table.get_provider_impl(model_obj.identifier)
|
|
249
262
|
if params.stream:
|
|
250
263
|
response_stream = await provider.openai_chat_completion(params)
|
|
251
264
|
|
|
@@ -253,11 +266,13 @@ class InferenceRouter(Inference):
|
|
|
253
266
|
# We need to add metrics to each chunk and store the final completion
|
|
254
267
|
return self.stream_tokens_and_compute_metrics_openai_chat(
|
|
255
268
|
response=response_stream,
|
|
256
|
-
|
|
269
|
+
fully_qualified_model_id=request_model_id,
|
|
270
|
+
provider_id=provider.__provider_id__,
|
|
257
271
|
messages=params.messages,
|
|
258
272
|
)
|
|
259
273
|
|
|
260
274
|
response = await self._nonstream_openai_chat_completion(provider, params)
|
|
275
|
+
response.model = request_model_id
|
|
261
276
|
|
|
262
277
|
# Store the response with the ID that will be returned to the client
|
|
263
278
|
if self.store:
|
|
@@ -268,7 +283,8 @@ class InferenceRouter(Inference):
|
|
|
268
283
|
prompt_tokens=response.usage.prompt_tokens,
|
|
269
284
|
completion_tokens=response.usage.completion_tokens,
|
|
270
285
|
total_tokens=response.usage.total_tokens,
|
|
271
|
-
|
|
286
|
+
fully_qualified_model_id=request_model_id,
|
|
287
|
+
provider_id=provider.__provider_id__,
|
|
272
288
|
)
|
|
273
289
|
for metric in metrics:
|
|
274
290
|
enqueue_event(metric)
|
|
@@ -285,13 +301,13 @@ class InferenceRouter(Inference):
|
|
|
285
301
|
logger.debug(
|
|
286
302
|
f"InferenceRouter.openai_embeddings: model={params.model}, input_type={type(params.input)}, encoding_format={params.encoding_format}, dimensions={params.dimensions}",
|
|
287
303
|
)
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
params.model = model_obj.identifier
|
|
304
|
+
request_model_id = params.model
|
|
305
|
+
provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.embedding)
|
|
306
|
+
params.model = provider_resource_id
|
|
292
307
|
|
|
293
|
-
|
|
294
|
-
|
|
308
|
+
response = await provider.openai_embeddings(params)
|
|
309
|
+
response.model = request_model_id
|
|
310
|
+
return response
|
|
295
311
|
|
|
296
312
|
async def list_chat_completions(
|
|
297
313
|
self,
|
|
@@ -347,7 +363,8 @@ class InferenceRouter(Inference):
|
|
|
347
363
|
self,
|
|
348
364
|
response,
|
|
349
365
|
prompt_tokens,
|
|
350
|
-
|
|
366
|
+
fully_qualified_model_id: str,
|
|
367
|
+
provider_id: str,
|
|
351
368
|
tool_prompt_format: ToolPromptFormat | None = None,
|
|
352
369
|
) -> AsyncGenerator[ChatCompletionResponseStreamChunk, None] | AsyncGenerator[CompletionResponseStreamChunk, None]:
|
|
353
370
|
completion_text = ""
|
|
@@ -385,7 +402,8 @@ class InferenceRouter(Inference):
|
|
|
385
402
|
prompt_tokens=prompt_tokens,
|
|
386
403
|
completion_tokens=completion_tokens,
|
|
387
404
|
total_tokens=total_tokens,
|
|
388
|
-
|
|
405
|
+
fully_qualified_model_id=fully_qualified_model_id,
|
|
406
|
+
provider_id=provider_id,
|
|
389
407
|
)
|
|
390
408
|
for metric in completion_metrics:
|
|
391
409
|
if metric.metric in [
|
|
@@ -405,7 +423,8 @@ class InferenceRouter(Inference):
|
|
|
405
423
|
prompt_tokens or 0,
|
|
406
424
|
completion_tokens or 0,
|
|
407
425
|
total_tokens,
|
|
408
|
-
|
|
426
|
+
fully_qualified_model_id=fully_qualified_model_id,
|
|
427
|
+
provider_id=provider_id,
|
|
409
428
|
)
|
|
410
429
|
async_metrics = [
|
|
411
430
|
MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics
|
|
@@ -417,7 +436,8 @@ class InferenceRouter(Inference):
|
|
|
417
436
|
self,
|
|
418
437
|
response: ChatCompletionResponse | CompletionResponse,
|
|
419
438
|
prompt_tokens,
|
|
420
|
-
|
|
439
|
+
fully_qualified_model_id: str,
|
|
440
|
+
provider_id: str,
|
|
421
441
|
tool_prompt_format: ToolPromptFormat | None = None,
|
|
422
442
|
):
|
|
423
443
|
if isinstance(response, ChatCompletionResponse):
|
|
@@ -434,7 +454,8 @@ class InferenceRouter(Inference):
|
|
|
434
454
|
prompt_tokens=prompt_tokens,
|
|
435
455
|
completion_tokens=completion_tokens,
|
|
436
456
|
total_tokens=total_tokens,
|
|
437
|
-
|
|
457
|
+
fully_qualified_model_id=fully_qualified_model_id,
|
|
458
|
+
provider_id=provider_id,
|
|
438
459
|
)
|
|
439
460
|
for metric in completion_metrics:
|
|
440
461
|
if metric.metric in ["completion_tokens", "total_tokens"]: # Only log completion and total tokens
|
|
@@ -448,14 +469,16 @@ class InferenceRouter(Inference):
|
|
|
448
469
|
prompt_tokens or 0,
|
|
449
470
|
completion_tokens or 0,
|
|
450
471
|
total_tokens,
|
|
451
|
-
|
|
472
|
+
fully_qualified_model_id=fully_qualified_model_id,
|
|
473
|
+
provider_id=provider_id,
|
|
452
474
|
)
|
|
453
475
|
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
|
|
454
476
|
|
|
455
477
|
async def stream_tokens_and_compute_metrics_openai_chat(
|
|
456
478
|
self,
|
|
457
479
|
response: AsyncIterator[OpenAIChatCompletionChunk],
|
|
458
|
-
|
|
480
|
+
fully_qualified_model_id: str,
|
|
481
|
+
provider_id: str,
|
|
459
482
|
messages: list[OpenAIMessageParam] | None = None,
|
|
460
483
|
) -> AsyncIterator[OpenAIChatCompletionChunk]:
|
|
461
484
|
"""Stream OpenAI chat completion chunks, compute metrics, and store the final completion."""
|
|
@@ -475,6 +498,8 @@ class InferenceRouter(Inference):
|
|
|
475
498
|
if created is None and chunk.created:
|
|
476
499
|
created = chunk.created
|
|
477
500
|
|
|
501
|
+
chunk.model = fully_qualified_model_id
|
|
502
|
+
|
|
478
503
|
# Accumulate choice data for final assembly
|
|
479
504
|
if chunk.choices:
|
|
480
505
|
for choice_delta in chunk.choices:
|
|
@@ -531,7 +556,8 @@ class InferenceRouter(Inference):
|
|
|
531
556
|
prompt_tokens=chunk.usage.prompt_tokens,
|
|
532
557
|
completion_tokens=chunk.usage.completion_tokens,
|
|
533
558
|
total_tokens=chunk.usage.total_tokens,
|
|
534
|
-
|
|
559
|
+
model_id=fully_qualified_model_id,
|
|
560
|
+
provider_id=provider_id,
|
|
535
561
|
)
|
|
536
562
|
for metric in metrics:
|
|
537
563
|
enqueue_event(metric)
|
|
@@ -579,7 +605,7 @@ class InferenceRouter(Inference):
|
|
|
579
605
|
id=id,
|
|
580
606
|
choices=assembled_choices,
|
|
581
607
|
created=created or int(time.time()),
|
|
582
|
-
model=
|
|
608
|
+
model=fully_qualified_model_id,
|
|
583
609
|
object="chat.completion",
|
|
584
610
|
)
|
|
585
611
|
logger.debug(f"InferenceRouter.completion_response: {final_response}")
|
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
image_name: starter
|
|
3
|
+
apis:
|
|
4
|
+
- agents
|
|
5
|
+
- batches
|
|
6
|
+
- datasetio
|
|
7
|
+
- eval
|
|
8
|
+
- files
|
|
9
|
+
- inference
|
|
10
|
+
- post_training
|
|
11
|
+
- safety
|
|
12
|
+
- scoring
|
|
13
|
+
- tool_runtime
|
|
14
|
+
- vector_io
|
|
15
|
+
providers:
|
|
16
|
+
inference:
|
|
17
|
+
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
|
18
|
+
provider_type: remote::cerebras
|
|
19
|
+
config:
|
|
20
|
+
base_url: https://api.cerebras.ai
|
|
21
|
+
api_key: ${env.CEREBRAS_API_KEY:=}
|
|
22
|
+
- provider_id: ${env.OLLAMA_URL:+ollama}
|
|
23
|
+
provider_type: remote::ollama
|
|
24
|
+
config:
|
|
25
|
+
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
|
26
|
+
- provider_id: ${env.VLLM_URL:+vllm}
|
|
27
|
+
provider_type: remote::vllm
|
|
28
|
+
config:
|
|
29
|
+
url: ${env.VLLM_URL:=}
|
|
30
|
+
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
|
+
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
|
+
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
|
+
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
|
+
provider_type: remote::tgi
|
|
35
|
+
config:
|
|
36
|
+
url: ${env.TGI_URL:=}
|
|
37
|
+
- provider_id: fireworks
|
|
38
|
+
provider_type: remote::fireworks
|
|
39
|
+
config:
|
|
40
|
+
url: https://api.fireworks.ai/inference/v1
|
|
41
|
+
api_key: ${env.FIREWORKS_API_KEY:=}
|
|
42
|
+
- provider_id: together
|
|
43
|
+
provider_type: remote::together
|
|
44
|
+
config:
|
|
45
|
+
url: https://api.together.xyz/v1
|
|
46
|
+
api_key: ${env.TOGETHER_API_KEY:=}
|
|
47
|
+
- provider_id: bedrock
|
|
48
|
+
provider_type: remote::bedrock
|
|
49
|
+
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
|
50
|
+
provider_type: remote::nvidia
|
|
51
|
+
config:
|
|
52
|
+
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
|
53
|
+
api_key: ${env.NVIDIA_API_KEY:=}
|
|
54
|
+
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
|
55
|
+
- provider_id: openai
|
|
56
|
+
provider_type: remote::openai
|
|
57
|
+
config:
|
|
58
|
+
api_key: ${env.OPENAI_API_KEY:=}
|
|
59
|
+
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
|
|
60
|
+
- provider_id: anthropic
|
|
61
|
+
provider_type: remote::anthropic
|
|
62
|
+
config:
|
|
63
|
+
api_key: ${env.ANTHROPIC_API_KEY:=}
|
|
64
|
+
- provider_id: gemini
|
|
65
|
+
provider_type: remote::gemini
|
|
66
|
+
config:
|
|
67
|
+
api_key: ${env.GEMINI_API_KEY:=}
|
|
68
|
+
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
|
|
69
|
+
provider_type: remote::vertexai
|
|
70
|
+
config:
|
|
71
|
+
project: ${env.VERTEX_AI_PROJECT:=}
|
|
72
|
+
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
|
73
|
+
- provider_id: groq
|
|
74
|
+
provider_type: remote::groq
|
|
75
|
+
config:
|
|
76
|
+
url: https://api.groq.com
|
|
77
|
+
api_key: ${env.GROQ_API_KEY:=}
|
|
78
|
+
- provider_id: sambanova
|
|
79
|
+
provider_type: remote::sambanova
|
|
80
|
+
config:
|
|
81
|
+
url: https://api.sambanova.ai/v1
|
|
82
|
+
api_key: ${env.SAMBANOVA_API_KEY:=}
|
|
83
|
+
- provider_id: ${env.AZURE_API_KEY:+azure}
|
|
84
|
+
provider_type: remote::azure
|
|
85
|
+
config:
|
|
86
|
+
api_key: ${env.AZURE_API_KEY:=}
|
|
87
|
+
api_base: ${env.AZURE_API_BASE:=}
|
|
88
|
+
api_version: ${env.AZURE_API_VERSION:=}
|
|
89
|
+
api_type: ${env.AZURE_API_TYPE:=}
|
|
90
|
+
- provider_id: sentence-transformers
|
|
91
|
+
provider_type: inline::sentence-transformers
|
|
92
|
+
vector_io:
|
|
93
|
+
- provider_id: faiss
|
|
94
|
+
provider_type: inline::faiss
|
|
95
|
+
config:
|
|
96
|
+
persistence:
|
|
97
|
+
namespace: vector_io::faiss
|
|
98
|
+
backend: kv_default
|
|
99
|
+
- provider_id: sqlite-vec
|
|
100
|
+
provider_type: inline::sqlite-vec
|
|
101
|
+
config:
|
|
102
|
+
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
|
|
103
|
+
persistence:
|
|
104
|
+
namespace: vector_io::sqlite_vec
|
|
105
|
+
backend: kv_default
|
|
106
|
+
- provider_id: ${env.MILVUS_URL:+milvus}
|
|
107
|
+
provider_type: inline::milvus
|
|
108
|
+
config:
|
|
109
|
+
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
|
|
110
|
+
persistence:
|
|
111
|
+
namespace: vector_io::milvus
|
|
112
|
+
backend: kv_default
|
|
113
|
+
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
|
114
|
+
provider_type: remote::chromadb
|
|
115
|
+
config:
|
|
116
|
+
url: ${env.CHROMADB_URL:=}
|
|
117
|
+
persistence:
|
|
118
|
+
namespace: vector_io::chroma_remote
|
|
119
|
+
backend: kv_default
|
|
120
|
+
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
|
121
|
+
provider_type: remote::pgvector
|
|
122
|
+
config:
|
|
123
|
+
host: ${env.PGVECTOR_HOST:=localhost}
|
|
124
|
+
port: ${env.PGVECTOR_PORT:=5432}
|
|
125
|
+
db: ${env.PGVECTOR_DB:=}
|
|
126
|
+
user: ${env.PGVECTOR_USER:=}
|
|
127
|
+
password: ${env.PGVECTOR_PASSWORD:=}
|
|
128
|
+
persistence:
|
|
129
|
+
namespace: vector_io::pgvector
|
|
130
|
+
backend: kv_default
|
|
131
|
+
- provider_id: ${env.QDRANT_URL:+qdrant}
|
|
132
|
+
provider_type: remote::qdrant
|
|
133
|
+
config:
|
|
134
|
+
api_key: ${env.QDRANT_API_KEY:=}
|
|
135
|
+
persistence:
|
|
136
|
+
namespace: vector_io::qdrant_remote
|
|
137
|
+
backend: kv_default
|
|
138
|
+
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
|
|
139
|
+
provider_type: remote::weaviate
|
|
140
|
+
config:
|
|
141
|
+
weaviate_api_key: null
|
|
142
|
+
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
|
|
143
|
+
persistence:
|
|
144
|
+
namespace: vector_io::weaviate
|
|
145
|
+
backend: kv_default
|
|
146
|
+
files:
|
|
147
|
+
- provider_id: meta-reference-files
|
|
148
|
+
provider_type: inline::localfs
|
|
149
|
+
config:
|
|
150
|
+
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
|
151
|
+
metadata_store:
|
|
152
|
+
table_name: files_metadata
|
|
153
|
+
backend: sql_default
|
|
154
|
+
safety:
|
|
155
|
+
- provider_id: llama-guard
|
|
156
|
+
provider_type: inline::llama-guard
|
|
157
|
+
config:
|
|
158
|
+
excluded_categories: []
|
|
159
|
+
- provider_id: code-scanner
|
|
160
|
+
provider_type: inline::code-scanner
|
|
161
|
+
agents:
|
|
162
|
+
- provider_id: meta-reference
|
|
163
|
+
provider_type: inline::meta-reference
|
|
164
|
+
config:
|
|
165
|
+
persistence:
|
|
166
|
+
agent_state:
|
|
167
|
+
namespace: agents
|
|
168
|
+
backend: kv_default
|
|
169
|
+
responses:
|
|
170
|
+
table_name: responses
|
|
171
|
+
backend: sql_default
|
|
172
|
+
max_write_queue_size: 10000
|
|
173
|
+
num_writers: 4
|
|
174
|
+
post_training:
|
|
175
|
+
- provider_id: torchtune-cpu
|
|
176
|
+
provider_type: inline::torchtune-cpu
|
|
177
|
+
config:
|
|
178
|
+
checkpoint_format: meta
|
|
179
|
+
eval:
|
|
180
|
+
- provider_id: meta-reference
|
|
181
|
+
provider_type: inline::meta-reference
|
|
182
|
+
config:
|
|
183
|
+
kvstore:
|
|
184
|
+
namespace: eval
|
|
185
|
+
backend: kv_default
|
|
186
|
+
datasetio:
|
|
187
|
+
- provider_id: huggingface
|
|
188
|
+
provider_type: remote::huggingface
|
|
189
|
+
config:
|
|
190
|
+
kvstore:
|
|
191
|
+
namespace: datasetio::huggingface
|
|
192
|
+
backend: kv_default
|
|
193
|
+
- provider_id: localfs
|
|
194
|
+
provider_type: inline::localfs
|
|
195
|
+
config:
|
|
196
|
+
kvstore:
|
|
197
|
+
namespace: datasetio::localfs
|
|
198
|
+
backend: kv_default
|
|
199
|
+
scoring:
|
|
200
|
+
- provider_id: basic
|
|
201
|
+
provider_type: inline::basic
|
|
202
|
+
- provider_id: llm-as-judge
|
|
203
|
+
provider_type: inline::llm-as-judge
|
|
204
|
+
- provider_id: braintrust
|
|
205
|
+
provider_type: inline::braintrust
|
|
206
|
+
config:
|
|
207
|
+
openai_api_key: ${env.OPENAI_API_KEY:=}
|
|
208
|
+
tool_runtime:
|
|
209
|
+
- provider_id: brave-search
|
|
210
|
+
provider_type: remote::brave-search
|
|
211
|
+
config:
|
|
212
|
+
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
|
|
213
|
+
max_results: 3
|
|
214
|
+
- provider_id: tavily-search
|
|
215
|
+
provider_type: remote::tavily-search
|
|
216
|
+
config:
|
|
217
|
+
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
|
218
|
+
max_results: 3
|
|
219
|
+
- provider_id: rag-runtime
|
|
220
|
+
provider_type: inline::rag-runtime
|
|
221
|
+
- provider_id: model-context-protocol
|
|
222
|
+
provider_type: remote::model-context-protocol
|
|
223
|
+
batches:
|
|
224
|
+
- provider_id: reference
|
|
225
|
+
provider_type: inline::reference
|
|
226
|
+
config:
|
|
227
|
+
kvstore:
|
|
228
|
+
namespace: batches
|
|
229
|
+
backend: kv_default
|
|
230
|
+
storage:
|
|
231
|
+
backends:
|
|
232
|
+
kv_default:
|
|
233
|
+
type: kv_postgres
|
|
234
|
+
host: ${env.POSTGRES_HOST:=localhost}
|
|
235
|
+
port: ${env.POSTGRES_PORT:=5432}
|
|
236
|
+
db: ${env.POSTGRES_DB:=llamastack}
|
|
237
|
+
user: ${env.POSTGRES_USER:=llamastack}
|
|
238
|
+
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
239
|
+
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
|
|
240
|
+
sql_default:
|
|
241
|
+
type: sql_postgres
|
|
242
|
+
host: ${env.POSTGRES_HOST:=localhost}
|
|
243
|
+
port: ${env.POSTGRES_PORT:=5432}
|
|
244
|
+
db: ${env.POSTGRES_DB:=llamastack}
|
|
245
|
+
user: ${env.POSTGRES_USER:=llamastack}
|
|
246
|
+
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
247
|
+
stores:
|
|
248
|
+
metadata:
|
|
249
|
+
namespace: registry
|
|
250
|
+
backend: kv_default
|
|
251
|
+
inference:
|
|
252
|
+
table_name: inference_store
|
|
253
|
+
backend: sql_default
|
|
254
|
+
max_write_queue_size: 10000
|
|
255
|
+
num_writers: 4
|
|
256
|
+
conversations:
|
|
257
|
+
table_name: openai_conversations
|
|
258
|
+
backend: sql_default
|
|
259
|
+
registered_resources:
|
|
260
|
+
models: []
|
|
261
|
+
shields:
|
|
262
|
+
- shield_id: llama-guard
|
|
263
|
+
provider_id: ${env.SAFETY_MODEL:+llama-guard}
|
|
264
|
+
provider_shield_id: ${env.SAFETY_MODEL:=}
|
|
265
|
+
- shield_id: code-scanner
|
|
266
|
+
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
|
|
267
|
+
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
|
|
268
|
+
vector_dbs: []
|
|
269
|
+
datasets: []
|
|
270
|
+
scoring_fns: []
|
|
271
|
+
benchmarks: []
|
|
272
|
+
tool_groups:
|
|
273
|
+
- toolgroup_id: builtin::websearch
|
|
274
|
+
provider_id: tavily-search
|
|
275
|
+
- toolgroup_id: builtin::rag
|
|
276
|
+
provider_id: rag-runtime
|
|
277
|
+
server:
|
|
278
|
+
port: 8321
|
|
279
|
+
telemetry:
|
|
280
|
+
enabled: true
|
|
281
|
+
vector_stores:
|
|
282
|
+
default_provider_id: faiss
|
|
283
|
+
default_embedding_model:
|
|
284
|
+
provider_id: sentence-transformers
|
|
285
|
+
model_id: nomic-ai/nomic-embed-text-v1.5
|