llama-stack 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,6 +46,10 @@ class StackListDeps(Subcommand):
46
46
  def _run_stack_list_deps_command(self, args: argparse.Namespace) -> None:
47
47
  # always keep implementation completely silo-ed away from CLI so CLI
48
48
  # can be fast to load and reduces dependencies
49
+ if not args.config and not args.providers:
50
+ self.parser.print_help()
51
+ self.parser.exit()
52
+
49
53
  from ._list_deps import run_stack_list_deps_command
50
54
 
51
55
  return run_stack_list_deps_command(args)
@@ -105,7 +105,8 @@ class InferenceRouter(Inference):
105
105
  prompt_tokens: int,
106
106
  completion_tokens: int,
107
107
  total_tokens: int,
108
- model: Model,
108
+ fully_qualified_model_id: str,
109
+ provider_id: str,
109
110
  ) -> list[MetricEvent]:
110
111
  """Constructs a list of MetricEvent objects containing token usage metrics.
111
112
 
@@ -113,7 +114,8 @@ class InferenceRouter(Inference):
113
114
  prompt_tokens: Number of tokens in the prompt
114
115
  completion_tokens: Number of tokens in the completion
115
116
  total_tokens: Total number of tokens used
116
- model: Model object containing model_id and provider_id
117
+ fully_qualified_model_id:
118
+ provider_id: The provider identifier
117
119
 
118
120
  Returns:
119
121
  List of MetricEvent objects with token usage metrics
@@ -139,8 +141,8 @@ class InferenceRouter(Inference):
139
141
  timestamp=datetime.now(UTC),
140
142
  unit="tokens",
141
143
  attributes={
142
- "model_id": model.model_id,
143
- "provider_id": model.provider_id,
144
+ "model_id": fully_qualified_model_id,
145
+ "provider_id": provider_id,
144
146
  },
145
147
  )
146
148
  )
@@ -153,7 +155,9 @@ class InferenceRouter(Inference):
153
155
  total_tokens: int,
154
156
  model: Model,
155
157
  ) -> list[MetricInResponse]:
156
- metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
158
+ metrics = self._construct_metrics(
159
+ prompt_tokens, completion_tokens, total_tokens, model.model_id, model.provider_id
160
+ )
157
161
  if self.telemetry:
158
162
  for metric in metrics:
159
163
  enqueue_event(metric)
@@ -173,14 +177,25 @@ class InferenceRouter(Inference):
173
177
  encoded = self.formatter.encode_content(messages)
174
178
  return len(encoded.tokens) if encoded and encoded.tokens else 0
175
179
 
176
- async def _get_model(self, model_id: str, expected_model_type: str) -> Model:
177
- """takes a model id and gets model after ensuring that it is accessible and of the correct type"""
178
- model = await self.routing_table.get_model(model_id)
179
- if model is None:
180
+ async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]:
181
+ model = await self.routing_table.get_object_by_identifier("model", model_id)
182
+ if model:
183
+ if model.model_type != expected_model_type:
184
+ raise ModelTypeError(model_id, model.model_type, expected_model_type)
185
+
186
+ provider = await self.routing_table.get_provider_impl(model.identifier)
187
+ return provider, model.provider_resource_id
188
+
189
+ splits = model_id.split("/", maxsplit=1)
190
+ if len(splits) != 2:
191
+ raise ModelNotFoundError(model_id)
192
+
193
+ provider_id, provider_resource_id = splits
194
+ if provider_id not in self.routing_table.impls_by_provider_id:
195
+ logger.warning(f"Provider {provider_id} not found for model {model_id}")
180
196
  raise ModelNotFoundError(model_id)
181
- if model.model_type != expected_model_type:
182
- raise ModelTypeError(model_id, model.model_type, expected_model_type)
183
- return model
197
+
198
+ return self.routing_table.impls_by_provider_id[provider_id], provider_resource_id
184
199
 
185
200
  async def openai_completion(
186
201
  self,
@@ -189,24 +204,24 @@ class InferenceRouter(Inference):
189
204
  logger.debug(
190
205
  f"InferenceRouter.openai_completion: model={params.model}, stream={params.stream}, prompt={params.prompt}",
191
206
  )
192
- model_obj = await self._get_model(params.model, ModelType.llm)
193
-
194
- # Update params with the resolved model identifier
195
- params.model = model_obj.identifier
207
+ request_model_id = params.model
208
+ provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.llm)
209
+ params.model = provider_resource_id
196
210
 
197
- provider = await self.routing_table.get_provider_impl(model_obj.identifier)
198
211
  if params.stream:
199
212
  return await provider.openai_completion(params)
200
213
  # TODO: Metrics do NOT work with openai_completion stream=True due to the fact
201
214
  # that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently.
202
215
 
203
216
  response = await provider.openai_completion(params)
217
+ response.model = request_model_id
204
218
  if self.telemetry:
205
219
  metrics = self._construct_metrics(
206
220
  prompt_tokens=response.usage.prompt_tokens,
207
221
  completion_tokens=response.usage.completion_tokens,
208
222
  total_tokens=response.usage.total_tokens,
209
- model=model_obj,
223
+ fully_qualified_model_id=request_model_id,
224
+ provider_id=provider.__provider_id__,
210
225
  )
211
226
  for metric in metrics:
212
227
  enqueue_event(metric)
@@ -224,7 +239,9 @@ class InferenceRouter(Inference):
224
239
  logger.debug(
225
240
  f"InferenceRouter.openai_chat_completion: model={params.model}, stream={params.stream}, messages={params.messages}",
226
241
  )
227
- model_obj = await self._get_model(params.model, ModelType.llm)
242
+ request_model_id = params.model
243
+ provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.llm)
244
+ params.model = provider_resource_id
228
245
 
229
246
  # Use the OpenAI client for a bit of extra input validation without
230
247
  # exposing the OpenAI client itself as part of our API surface
@@ -242,10 +259,6 @@ class InferenceRouter(Inference):
242
259
  params.tool_choice = None
243
260
  params.tools = None
244
261
 
245
- # Update params with the resolved model identifier
246
- params.model = model_obj.identifier
247
-
248
- provider = await self.routing_table.get_provider_impl(model_obj.identifier)
249
262
  if params.stream:
250
263
  response_stream = await provider.openai_chat_completion(params)
251
264
 
@@ -253,11 +266,13 @@ class InferenceRouter(Inference):
253
266
  # We need to add metrics to each chunk and store the final completion
254
267
  return self.stream_tokens_and_compute_metrics_openai_chat(
255
268
  response=response_stream,
256
- model=model_obj,
269
+ fully_qualified_model_id=request_model_id,
270
+ provider_id=provider.__provider_id__,
257
271
  messages=params.messages,
258
272
  )
259
273
 
260
274
  response = await self._nonstream_openai_chat_completion(provider, params)
275
+ response.model = request_model_id
261
276
 
262
277
  # Store the response with the ID that will be returned to the client
263
278
  if self.store:
@@ -268,7 +283,8 @@ class InferenceRouter(Inference):
268
283
  prompt_tokens=response.usage.prompt_tokens,
269
284
  completion_tokens=response.usage.completion_tokens,
270
285
  total_tokens=response.usage.total_tokens,
271
- model=model_obj,
286
+ fully_qualified_model_id=request_model_id,
287
+ provider_id=provider.__provider_id__,
272
288
  )
273
289
  for metric in metrics:
274
290
  enqueue_event(metric)
@@ -285,13 +301,13 @@ class InferenceRouter(Inference):
285
301
  logger.debug(
286
302
  f"InferenceRouter.openai_embeddings: model={params.model}, input_type={type(params.input)}, encoding_format={params.encoding_format}, dimensions={params.dimensions}",
287
303
  )
288
- model_obj = await self._get_model(params.model, ModelType.embedding)
289
-
290
- # Update model to use resolved identifier
291
- params.model = model_obj.identifier
304
+ request_model_id = params.model
305
+ provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.embedding)
306
+ params.model = provider_resource_id
292
307
 
293
- provider = await self.routing_table.get_provider_impl(model_obj.identifier)
294
- return await provider.openai_embeddings(params)
308
+ response = await provider.openai_embeddings(params)
309
+ response.model = request_model_id
310
+ return response
295
311
 
296
312
  async def list_chat_completions(
297
313
  self,
@@ -347,7 +363,8 @@ class InferenceRouter(Inference):
347
363
  self,
348
364
  response,
349
365
  prompt_tokens,
350
- model,
366
+ fully_qualified_model_id: str,
367
+ provider_id: str,
351
368
  tool_prompt_format: ToolPromptFormat | None = None,
352
369
  ) -> AsyncGenerator[ChatCompletionResponseStreamChunk, None] | AsyncGenerator[CompletionResponseStreamChunk, None]:
353
370
  completion_text = ""
@@ -385,7 +402,8 @@ class InferenceRouter(Inference):
385
402
  prompt_tokens=prompt_tokens,
386
403
  completion_tokens=completion_tokens,
387
404
  total_tokens=total_tokens,
388
- model=model,
405
+ fully_qualified_model_id=fully_qualified_model_id,
406
+ provider_id=provider_id,
389
407
  )
390
408
  for metric in completion_metrics:
391
409
  if metric.metric in [
@@ -405,7 +423,8 @@ class InferenceRouter(Inference):
405
423
  prompt_tokens or 0,
406
424
  completion_tokens or 0,
407
425
  total_tokens,
408
- model,
426
+ fully_qualified_model_id=fully_qualified_model_id,
427
+ provider_id=provider_id,
409
428
  )
410
429
  async_metrics = [
411
430
  MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics
@@ -417,7 +436,8 @@ class InferenceRouter(Inference):
417
436
  self,
418
437
  response: ChatCompletionResponse | CompletionResponse,
419
438
  prompt_tokens,
420
- model,
439
+ fully_qualified_model_id: str,
440
+ provider_id: str,
421
441
  tool_prompt_format: ToolPromptFormat | None = None,
422
442
  ):
423
443
  if isinstance(response, ChatCompletionResponse):
@@ -434,7 +454,8 @@ class InferenceRouter(Inference):
434
454
  prompt_tokens=prompt_tokens,
435
455
  completion_tokens=completion_tokens,
436
456
  total_tokens=total_tokens,
437
- model=model,
457
+ fully_qualified_model_id=fully_qualified_model_id,
458
+ provider_id=provider_id,
438
459
  )
439
460
  for metric in completion_metrics:
440
461
  if metric.metric in ["completion_tokens", "total_tokens"]: # Only log completion and total tokens
@@ -448,14 +469,16 @@ class InferenceRouter(Inference):
448
469
  prompt_tokens or 0,
449
470
  completion_tokens or 0,
450
471
  total_tokens,
451
- model,
472
+ fully_qualified_model_id=fully_qualified_model_id,
473
+ provider_id=provider_id,
452
474
  )
453
475
  return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
454
476
 
455
477
  async def stream_tokens_and_compute_metrics_openai_chat(
456
478
  self,
457
479
  response: AsyncIterator[OpenAIChatCompletionChunk],
458
- model: Model,
480
+ fully_qualified_model_id: str,
481
+ provider_id: str,
459
482
  messages: list[OpenAIMessageParam] | None = None,
460
483
  ) -> AsyncIterator[OpenAIChatCompletionChunk]:
461
484
  """Stream OpenAI chat completion chunks, compute metrics, and store the final completion."""
@@ -475,6 +498,8 @@ class InferenceRouter(Inference):
475
498
  if created is None and chunk.created:
476
499
  created = chunk.created
477
500
 
501
+ chunk.model = fully_qualified_model_id
502
+
478
503
  # Accumulate choice data for final assembly
479
504
  if chunk.choices:
480
505
  for choice_delta in chunk.choices:
@@ -531,7 +556,8 @@ class InferenceRouter(Inference):
531
556
  prompt_tokens=chunk.usage.prompt_tokens,
532
557
  completion_tokens=chunk.usage.completion_tokens,
533
558
  total_tokens=chunk.usage.total_tokens,
534
- model=model,
559
+ model_id=fully_qualified_model_id,
560
+ provider_id=provider_id,
535
561
  )
536
562
  for metric in metrics:
537
563
  enqueue_event(metric)
@@ -579,7 +605,7 @@ class InferenceRouter(Inference):
579
605
  id=id,
580
606
  choices=assembled_choices,
581
607
  created=created or int(time.time()),
582
- model=model.identifier,
608
+ model=fully_qualified_model_id,
583
609
  object="chat.completion",
584
610
  )
585
611
  logger.debug(f"InferenceRouter.completion_response: {final_response}")
@@ -57,4 +57,5 @@ image_type: venv
57
57
  additional_pip_packages:
58
58
  - aiosqlite
59
59
  - asyncpg
60
+ - psycopg2-binary
60
61
  - sqlalchemy[asyncio]
@@ -0,0 +1,285 @@
1
+ version: 2
2
+ image_name: starter
3
+ apis:
4
+ - agents
5
+ - batches
6
+ - datasetio
7
+ - eval
8
+ - files
9
+ - inference
10
+ - post_training
11
+ - safety
12
+ - scoring
13
+ - tool_runtime
14
+ - vector_io
15
+ providers:
16
+ inference:
17
+ - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
18
+ provider_type: remote::cerebras
19
+ config:
20
+ base_url: https://api.cerebras.ai
21
+ api_key: ${env.CEREBRAS_API_KEY:=}
22
+ - provider_id: ${env.OLLAMA_URL:+ollama}
23
+ provider_type: remote::ollama
24
+ config:
25
+ url: ${env.OLLAMA_URL:=http://localhost:11434}
26
+ - provider_id: ${env.VLLM_URL:+vllm}
27
+ provider_type: remote::vllm
28
+ config:
29
+ url: ${env.VLLM_URL:=}
30
+ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
+ api_token: ${env.VLLM_API_TOKEN:=fake}
32
+ tls_verify: ${env.VLLM_TLS_VERIFY:=true}
33
+ - provider_id: ${env.TGI_URL:+tgi}
34
+ provider_type: remote::tgi
35
+ config:
36
+ url: ${env.TGI_URL:=}
37
+ - provider_id: fireworks
38
+ provider_type: remote::fireworks
39
+ config:
40
+ url: https://api.fireworks.ai/inference/v1
41
+ api_key: ${env.FIREWORKS_API_KEY:=}
42
+ - provider_id: together
43
+ provider_type: remote::together
44
+ config:
45
+ url: https://api.together.xyz/v1
46
+ api_key: ${env.TOGETHER_API_KEY:=}
47
+ - provider_id: bedrock
48
+ provider_type: remote::bedrock
49
+ - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
50
+ provider_type: remote::nvidia
51
+ config:
52
+ url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
53
+ api_key: ${env.NVIDIA_API_KEY:=}
54
+ append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
55
+ - provider_id: openai
56
+ provider_type: remote::openai
57
+ config:
58
+ api_key: ${env.OPENAI_API_KEY:=}
59
+ base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
60
+ - provider_id: anthropic
61
+ provider_type: remote::anthropic
62
+ config:
63
+ api_key: ${env.ANTHROPIC_API_KEY:=}
64
+ - provider_id: gemini
65
+ provider_type: remote::gemini
66
+ config:
67
+ api_key: ${env.GEMINI_API_KEY:=}
68
+ - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
69
+ provider_type: remote::vertexai
70
+ config:
71
+ project: ${env.VERTEX_AI_PROJECT:=}
72
+ location: ${env.VERTEX_AI_LOCATION:=us-central1}
73
+ - provider_id: groq
74
+ provider_type: remote::groq
75
+ config:
76
+ url: https://api.groq.com
77
+ api_key: ${env.GROQ_API_KEY:=}
78
+ - provider_id: sambanova
79
+ provider_type: remote::sambanova
80
+ config:
81
+ url: https://api.sambanova.ai/v1
82
+ api_key: ${env.SAMBANOVA_API_KEY:=}
83
+ - provider_id: ${env.AZURE_API_KEY:+azure}
84
+ provider_type: remote::azure
85
+ config:
86
+ api_key: ${env.AZURE_API_KEY:=}
87
+ api_base: ${env.AZURE_API_BASE:=}
88
+ api_version: ${env.AZURE_API_VERSION:=}
89
+ api_type: ${env.AZURE_API_TYPE:=}
90
+ - provider_id: sentence-transformers
91
+ provider_type: inline::sentence-transformers
92
+ vector_io:
93
+ - provider_id: faiss
94
+ provider_type: inline::faiss
95
+ config:
96
+ persistence:
97
+ namespace: vector_io::faiss
98
+ backend: kv_default
99
+ - provider_id: sqlite-vec
100
+ provider_type: inline::sqlite-vec
101
+ config:
102
+ db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
103
+ persistence:
104
+ namespace: vector_io::sqlite_vec
105
+ backend: kv_default
106
+ - provider_id: ${env.MILVUS_URL:+milvus}
107
+ provider_type: inline::milvus
108
+ config:
109
+ db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
110
+ persistence:
111
+ namespace: vector_io::milvus
112
+ backend: kv_default
113
+ - provider_id: ${env.CHROMADB_URL:+chromadb}
114
+ provider_type: remote::chromadb
115
+ config:
116
+ url: ${env.CHROMADB_URL:=}
117
+ persistence:
118
+ namespace: vector_io::chroma_remote
119
+ backend: kv_default
120
+ - provider_id: ${env.PGVECTOR_DB:+pgvector}
121
+ provider_type: remote::pgvector
122
+ config:
123
+ host: ${env.PGVECTOR_HOST:=localhost}
124
+ port: ${env.PGVECTOR_PORT:=5432}
125
+ db: ${env.PGVECTOR_DB:=}
126
+ user: ${env.PGVECTOR_USER:=}
127
+ password: ${env.PGVECTOR_PASSWORD:=}
128
+ persistence:
129
+ namespace: vector_io::pgvector
130
+ backend: kv_default
131
+ - provider_id: ${env.QDRANT_URL:+qdrant}
132
+ provider_type: remote::qdrant
133
+ config:
134
+ api_key: ${env.QDRANT_API_KEY:=}
135
+ persistence:
136
+ namespace: vector_io::qdrant_remote
137
+ backend: kv_default
138
+ - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
139
+ provider_type: remote::weaviate
140
+ config:
141
+ weaviate_api_key: null
142
+ weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
143
+ persistence:
144
+ namespace: vector_io::weaviate
145
+ backend: kv_default
146
+ files:
147
+ - provider_id: meta-reference-files
148
+ provider_type: inline::localfs
149
+ config:
150
+ storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
151
+ metadata_store:
152
+ table_name: files_metadata
153
+ backend: sql_default
154
+ safety:
155
+ - provider_id: llama-guard
156
+ provider_type: inline::llama-guard
157
+ config:
158
+ excluded_categories: []
159
+ - provider_id: code-scanner
160
+ provider_type: inline::code-scanner
161
+ agents:
162
+ - provider_id: meta-reference
163
+ provider_type: inline::meta-reference
164
+ config:
165
+ persistence:
166
+ agent_state:
167
+ namespace: agents
168
+ backend: kv_default
169
+ responses:
170
+ table_name: responses
171
+ backend: sql_default
172
+ max_write_queue_size: 10000
173
+ num_writers: 4
174
+ post_training:
175
+ - provider_id: torchtune-cpu
176
+ provider_type: inline::torchtune-cpu
177
+ config:
178
+ checkpoint_format: meta
179
+ eval:
180
+ - provider_id: meta-reference
181
+ provider_type: inline::meta-reference
182
+ config:
183
+ kvstore:
184
+ namespace: eval
185
+ backend: kv_default
186
+ datasetio:
187
+ - provider_id: huggingface
188
+ provider_type: remote::huggingface
189
+ config:
190
+ kvstore:
191
+ namespace: datasetio::huggingface
192
+ backend: kv_default
193
+ - provider_id: localfs
194
+ provider_type: inline::localfs
195
+ config:
196
+ kvstore:
197
+ namespace: datasetio::localfs
198
+ backend: kv_default
199
+ scoring:
200
+ - provider_id: basic
201
+ provider_type: inline::basic
202
+ - provider_id: llm-as-judge
203
+ provider_type: inline::llm-as-judge
204
+ - provider_id: braintrust
205
+ provider_type: inline::braintrust
206
+ config:
207
+ openai_api_key: ${env.OPENAI_API_KEY:=}
208
+ tool_runtime:
209
+ - provider_id: brave-search
210
+ provider_type: remote::brave-search
211
+ config:
212
+ api_key: ${env.BRAVE_SEARCH_API_KEY:=}
213
+ max_results: 3
214
+ - provider_id: tavily-search
215
+ provider_type: remote::tavily-search
216
+ config:
217
+ api_key: ${env.TAVILY_SEARCH_API_KEY:=}
218
+ max_results: 3
219
+ - provider_id: rag-runtime
220
+ provider_type: inline::rag-runtime
221
+ - provider_id: model-context-protocol
222
+ provider_type: remote::model-context-protocol
223
+ batches:
224
+ - provider_id: reference
225
+ provider_type: inline::reference
226
+ config:
227
+ kvstore:
228
+ namespace: batches
229
+ backend: kv_default
230
+ storage:
231
+ backends:
232
+ kv_default:
233
+ type: kv_postgres
234
+ host: ${env.POSTGRES_HOST:=localhost}
235
+ port: ${env.POSTGRES_PORT:=5432}
236
+ db: ${env.POSTGRES_DB:=llamastack}
237
+ user: ${env.POSTGRES_USER:=llamastack}
238
+ password: ${env.POSTGRES_PASSWORD:=llamastack}
239
+ table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
240
+ sql_default:
241
+ type: sql_postgres
242
+ host: ${env.POSTGRES_HOST:=localhost}
243
+ port: ${env.POSTGRES_PORT:=5432}
244
+ db: ${env.POSTGRES_DB:=llamastack}
245
+ user: ${env.POSTGRES_USER:=llamastack}
246
+ password: ${env.POSTGRES_PASSWORD:=llamastack}
247
+ stores:
248
+ metadata:
249
+ namespace: registry
250
+ backend: kv_default
251
+ inference:
252
+ table_name: inference_store
253
+ backend: sql_default
254
+ max_write_queue_size: 10000
255
+ num_writers: 4
256
+ conversations:
257
+ table_name: openai_conversations
258
+ backend: sql_default
259
+ registered_resources:
260
+ models: []
261
+ shields:
262
+ - shield_id: llama-guard
263
+ provider_id: ${env.SAFETY_MODEL:+llama-guard}
264
+ provider_shield_id: ${env.SAFETY_MODEL:=}
265
+ - shield_id: code-scanner
266
+ provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
267
+ provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
268
+ vector_dbs: []
269
+ datasets: []
270
+ scoring_fns: []
271
+ benchmarks: []
272
+ tool_groups:
273
+ - toolgroup_id: builtin::websearch
274
+ provider_id: tavily-search
275
+ - toolgroup_id: builtin::rag
276
+ provider_id: rag-runtime
277
+ server:
278
+ port: 8321
279
+ telemetry:
280
+ enabled: true
281
+ vector_stores:
282
+ default_provider_id: faiss
283
+ default_embedding_model:
284
+ provider_id: sentence-transformers
285
+ model_id: nomic-ai/nomic-embed-text-v1.5