llama-stack 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. llama_stack/cli/stack/list_deps.py +4 -0
  2. llama_stack/core/routers/inference.py +66 -40
  3. llama_stack/distributions/starter/build.yaml +1 -0
  4. llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
  5. llama_stack/distributions/starter/starter.py +86 -68
  6. llama_stack/distributions/starter-gpu/build.yaml +1 -0
  7. llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
  8. llama_stack/providers/inline/vector_io/faiss/faiss.py +25 -2
  9. llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +15 -4
  10. llama_stack/providers/remote/inference/vertexai/vertexai.py +10 -0
  11. llama_stack/providers/remote/vector_io/chroma/chroma.py +9 -3
  12. llama_stack/providers/remote/vector_io/milvus/milvus.py +7 -4
  13. llama_stack/providers/remote/vector_io/pgvector/pgvector.py +32 -6
  14. llama_stack/providers/remote/vector_io/qdrant/qdrant.py +11 -6
  15. llama_stack/providers/remote/vector_io/weaviate/weaviate.py +7 -4
  16. llama_stack/providers/utils/inference/embedding_mixin.py +1 -2
  17. llama_stack/providers/utils/inference/inference_store.py +30 -10
  18. llama_stack/providers/utils/inference/model_registry.py +1 -1
  19. llama_stack/providers/utils/inference/openai_mixin.py +33 -10
  20. llama_stack/providers/utils/responses/responses_store.py +12 -58
  21. llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +25 -9
  22. llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +31 -1
  23. llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
  24. {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/METADATA +3 -3
  25. {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/RECORD +29 -26
  26. {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/WHEEL +0 -0
  27. {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/entry_points.txt +0 -0
  28. {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/licenses/LICENSE +0 -0
  29. {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/top_level.txt +0 -0
@@ -35,6 +35,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
35
35
  )
36
36
  from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
37
37
  from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
38
+ from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
38
39
  from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
39
40
 
40
41
 
@@ -147,10 +148,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
147
148
  BuildProvider(provider_type="inline::reference"),
148
149
  ],
149
150
  }
151
+ files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}")
150
152
  files_provider = Provider(
151
153
  provider_id="meta-reference-files",
152
154
  provider_type="inline::localfs",
153
- config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
155
+ config=files_config,
154
156
  )
155
157
  embedding_provider = Provider(
156
158
  provider_id="sentence-transformers",
@@ -180,6 +182,87 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
180
182
  provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
181
183
  ),
182
184
  ]
185
+ postgres_sql_config = PostgresSqlStoreConfig.sample_run_config()
186
+ postgres_kv_config = PostgresKVStoreConfig.sample_run_config()
187
+ default_overrides = {
188
+ "inference": remote_inference_providers + [embedding_provider],
189
+ "vector_io": [
190
+ Provider(
191
+ provider_id="faiss",
192
+ provider_type="inline::faiss",
193
+ config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
194
+ ),
195
+ Provider(
196
+ provider_id="sqlite-vec",
197
+ provider_type="inline::sqlite-vec",
198
+ config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
199
+ ),
200
+ Provider(
201
+ provider_id="${env.MILVUS_URL:+milvus}",
202
+ provider_type="inline::milvus",
203
+ config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
204
+ ),
205
+ Provider(
206
+ provider_id="${env.CHROMADB_URL:+chromadb}",
207
+ provider_type="remote::chromadb",
208
+ config=ChromaVectorIOConfig.sample_run_config(
209
+ f"~/.llama/distributions/{name}/",
210
+ url="${env.CHROMADB_URL:=}",
211
+ ),
212
+ ),
213
+ Provider(
214
+ provider_id="${env.PGVECTOR_DB:+pgvector}",
215
+ provider_type="remote::pgvector",
216
+ config=PGVectorVectorIOConfig.sample_run_config(
217
+ f"~/.llama/distributions/{name}",
218
+ db="${env.PGVECTOR_DB:=}",
219
+ user="${env.PGVECTOR_USER:=}",
220
+ password="${env.PGVECTOR_PASSWORD:=}",
221
+ ),
222
+ ),
223
+ Provider(
224
+ provider_id="${env.QDRANT_URL:+qdrant}",
225
+ provider_type="remote::qdrant",
226
+ config=QdrantVectorIOConfig.sample_run_config(
227
+ f"~/.llama/distributions/{name}",
228
+ url="${env.QDRANT_URL:=}",
229
+ ),
230
+ ),
231
+ Provider(
232
+ provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
233
+ provider_type="remote::weaviate",
234
+ config=WeaviateVectorIOConfig.sample_run_config(
235
+ f"~/.llama/distributions/{name}",
236
+ cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
237
+ ),
238
+ ),
239
+ ],
240
+ "files": [files_provider],
241
+ }
242
+
243
+ base_run_settings = RunConfigSettings(
244
+ provider_overrides=default_overrides,
245
+ default_models=[],
246
+ default_tool_groups=default_tool_groups,
247
+ default_shields=default_shields,
248
+ vector_stores_config=VectorStoresConfig(
249
+ default_provider_id="faiss",
250
+ default_embedding_model=QualifiedModel(
251
+ provider_id="sentence-transformers",
252
+ model_id="nomic-ai/nomic-embed-text-v1.5",
253
+ ),
254
+ ),
255
+ )
256
+
257
+ postgres_run_settings = base_run_settings.model_copy(
258
+ update={
259
+ "storage_backends": {
260
+ "kv_default": postgres_kv_config,
261
+ "sql_default": postgres_sql_config,
262
+ }
263
+ },
264
+ deep=True,
265
+ )
183
266
 
184
267
  return DistributionTemplate(
185
268
  name=name,
@@ -190,73 +273,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
190
273
  providers=providers,
191
274
  additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
192
275
  run_configs={
193
- "run.yaml": RunConfigSettings(
194
- provider_overrides={
195
- "inference": remote_inference_providers + [embedding_provider],
196
- "vector_io": [
197
- Provider(
198
- provider_id="faiss",
199
- provider_type="inline::faiss",
200
- config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
201
- ),
202
- Provider(
203
- provider_id="sqlite-vec",
204
- provider_type="inline::sqlite-vec",
205
- config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
206
- ),
207
- Provider(
208
- provider_id="${env.MILVUS_URL:+milvus}",
209
- provider_type="inline::milvus",
210
- config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
211
- ),
212
- Provider(
213
- provider_id="${env.CHROMADB_URL:+chromadb}",
214
- provider_type="remote::chromadb",
215
- config=ChromaVectorIOConfig.sample_run_config(
216
- f"~/.llama/distributions/{name}/",
217
- url="${env.CHROMADB_URL:=}",
218
- ),
219
- ),
220
- Provider(
221
- provider_id="${env.PGVECTOR_DB:+pgvector}",
222
- provider_type="remote::pgvector",
223
- config=PGVectorVectorIOConfig.sample_run_config(
224
- f"~/.llama/distributions/{name}",
225
- db="${env.PGVECTOR_DB:=}",
226
- user="${env.PGVECTOR_USER:=}",
227
- password="${env.PGVECTOR_PASSWORD:=}",
228
- ),
229
- ),
230
- Provider(
231
- provider_id="${env.QDRANT_URL:+qdrant}",
232
- provider_type="remote::qdrant",
233
- config=QdrantVectorIOConfig.sample_run_config(
234
- f"~/.llama/distributions/{name}",
235
- url="${env.QDRANT_URL:=}",
236
- ),
237
- ),
238
- Provider(
239
- provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
240
- provider_type="remote::weaviate",
241
- config=WeaviateVectorIOConfig.sample_run_config(
242
- f"~/.llama/distributions/{name}",
243
- cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
244
- ),
245
- ),
246
- ],
247
- "files": [files_provider],
248
- },
249
- default_models=[],
250
- default_tool_groups=default_tool_groups,
251
- default_shields=default_shields,
252
- vector_stores_config=VectorStoresConfig(
253
- default_provider_id="faiss",
254
- default_embedding_model=QualifiedModel(
255
- provider_id="sentence-transformers",
256
- model_id="nomic-ai/nomic-embed-text-v1.5",
257
- ),
258
- ),
259
- ),
276
+ "run.yaml": base_run_settings,
277
+ "run-with-postgres-store.yaml": postgres_run_settings,
260
278
  },
261
279
  run_config_env_vars={
262
280
  "LLAMA_STACK_PORT": (
@@ -57,4 +57,5 @@ image_type: venv
57
57
  additional_pip_packages:
58
58
  - aiosqlite
59
59
  - asyncpg
60
+ - psycopg2-binary
60
61
  - sqlalchemy[asyncio]
@@ -0,0 +1,288 @@
1
+ version: 2
2
+ image_name: starter-gpu
3
+ apis:
4
+ - agents
5
+ - batches
6
+ - datasetio
7
+ - eval
8
+ - files
9
+ - inference
10
+ - post_training
11
+ - safety
12
+ - scoring
13
+ - tool_runtime
14
+ - vector_io
15
+ providers:
16
+ inference:
17
+ - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
18
+ provider_type: remote::cerebras
19
+ config:
20
+ base_url: https://api.cerebras.ai
21
+ api_key: ${env.CEREBRAS_API_KEY:=}
22
+ - provider_id: ${env.OLLAMA_URL:+ollama}
23
+ provider_type: remote::ollama
24
+ config:
25
+ url: ${env.OLLAMA_URL:=http://localhost:11434}
26
+ - provider_id: ${env.VLLM_URL:+vllm}
27
+ provider_type: remote::vllm
28
+ config:
29
+ url: ${env.VLLM_URL:=}
30
+ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
+ api_token: ${env.VLLM_API_TOKEN:=fake}
32
+ tls_verify: ${env.VLLM_TLS_VERIFY:=true}
33
+ - provider_id: ${env.TGI_URL:+tgi}
34
+ provider_type: remote::tgi
35
+ config:
36
+ url: ${env.TGI_URL:=}
37
+ - provider_id: fireworks
38
+ provider_type: remote::fireworks
39
+ config:
40
+ url: https://api.fireworks.ai/inference/v1
41
+ api_key: ${env.FIREWORKS_API_KEY:=}
42
+ - provider_id: together
43
+ provider_type: remote::together
44
+ config:
45
+ url: https://api.together.xyz/v1
46
+ api_key: ${env.TOGETHER_API_KEY:=}
47
+ - provider_id: bedrock
48
+ provider_type: remote::bedrock
49
+ - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
50
+ provider_type: remote::nvidia
51
+ config:
52
+ url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
53
+ api_key: ${env.NVIDIA_API_KEY:=}
54
+ append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
55
+ - provider_id: openai
56
+ provider_type: remote::openai
57
+ config:
58
+ api_key: ${env.OPENAI_API_KEY:=}
59
+ base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
60
+ - provider_id: anthropic
61
+ provider_type: remote::anthropic
62
+ config:
63
+ api_key: ${env.ANTHROPIC_API_KEY:=}
64
+ - provider_id: gemini
65
+ provider_type: remote::gemini
66
+ config:
67
+ api_key: ${env.GEMINI_API_KEY:=}
68
+ - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
69
+ provider_type: remote::vertexai
70
+ config:
71
+ project: ${env.VERTEX_AI_PROJECT:=}
72
+ location: ${env.VERTEX_AI_LOCATION:=us-central1}
73
+ - provider_id: groq
74
+ provider_type: remote::groq
75
+ config:
76
+ url: https://api.groq.com
77
+ api_key: ${env.GROQ_API_KEY:=}
78
+ - provider_id: sambanova
79
+ provider_type: remote::sambanova
80
+ config:
81
+ url: https://api.sambanova.ai/v1
82
+ api_key: ${env.SAMBANOVA_API_KEY:=}
83
+ - provider_id: ${env.AZURE_API_KEY:+azure}
84
+ provider_type: remote::azure
85
+ config:
86
+ api_key: ${env.AZURE_API_KEY:=}
87
+ api_base: ${env.AZURE_API_BASE:=}
88
+ api_version: ${env.AZURE_API_VERSION:=}
89
+ api_type: ${env.AZURE_API_TYPE:=}
90
+ - provider_id: sentence-transformers
91
+ provider_type: inline::sentence-transformers
92
+ vector_io:
93
+ - provider_id: faiss
94
+ provider_type: inline::faiss
95
+ config:
96
+ persistence:
97
+ namespace: vector_io::faiss
98
+ backend: kv_default
99
+ - provider_id: sqlite-vec
100
+ provider_type: inline::sqlite-vec
101
+ config:
102
+ db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
103
+ persistence:
104
+ namespace: vector_io::sqlite_vec
105
+ backend: kv_default
106
+ - provider_id: ${env.MILVUS_URL:+milvus}
107
+ provider_type: inline::milvus
108
+ config:
109
+ db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
110
+ persistence:
111
+ namespace: vector_io::milvus
112
+ backend: kv_default
113
+ - provider_id: ${env.CHROMADB_URL:+chromadb}
114
+ provider_type: remote::chromadb
115
+ config:
116
+ url: ${env.CHROMADB_URL:=}
117
+ persistence:
118
+ namespace: vector_io::chroma_remote
119
+ backend: kv_default
120
+ - provider_id: ${env.PGVECTOR_DB:+pgvector}
121
+ provider_type: remote::pgvector
122
+ config:
123
+ host: ${env.PGVECTOR_HOST:=localhost}
124
+ port: ${env.PGVECTOR_PORT:=5432}
125
+ db: ${env.PGVECTOR_DB:=}
126
+ user: ${env.PGVECTOR_USER:=}
127
+ password: ${env.PGVECTOR_PASSWORD:=}
128
+ persistence:
129
+ namespace: vector_io::pgvector
130
+ backend: kv_default
131
+ - provider_id: ${env.QDRANT_URL:+qdrant}
132
+ provider_type: remote::qdrant
133
+ config:
134
+ api_key: ${env.QDRANT_API_KEY:=}
135
+ persistence:
136
+ namespace: vector_io::qdrant_remote
137
+ backend: kv_default
138
+ - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
139
+ provider_type: remote::weaviate
140
+ config:
141
+ weaviate_api_key: null
142
+ weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
143
+ persistence:
144
+ namespace: vector_io::weaviate
145
+ backend: kv_default
146
+ files:
147
+ - provider_id: meta-reference-files
148
+ provider_type: inline::localfs
149
+ config:
150
+ storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
151
+ metadata_store:
152
+ table_name: files_metadata
153
+ backend: sql_default
154
+ safety:
155
+ - provider_id: llama-guard
156
+ provider_type: inline::llama-guard
157
+ config:
158
+ excluded_categories: []
159
+ - provider_id: code-scanner
160
+ provider_type: inline::code-scanner
161
+ agents:
162
+ - provider_id: meta-reference
163
+ provider_type: inline::meta-reference
164
+ config:
165
+ persistence:
166
+ agent_state:
167
+ namespace: agents
168
+ backend: kv_default
169
+ responses:
170
+ table_name: responses
171
+ backend: sql_default
172
+ max_write_queue_size: 10000
173
+ num_writers: 4
174
+ post_training:
175
+ - provider_id: huggingface-gpu
176
+ provider_type: inline::huggingface-gpu
177
+ config:
178
+ checkpoint_format: huggingface
179
+ distributed_backend: null
180
+ device: cpu
181
+ dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
182
+ eval:
183
+ - provider_id: meta-reference
184
+ provider_type: inline::meta-reference
185
+ config:
186
+ kvstore:
187
+ namespace: eval
188
+ backend: kv_default
189
+ datasetio:
190
+ - provider_id: huggingface
191
+ provider_type: remote::huggingface
192
+ config:
193
+ kvstore:
194
+ namespace: datasetio::huggingface
195
+ backend: kv_default
196
+ - provider_id: localfs
197
+ provider_type: inline::localfs
198
+ config:
199
+ kvstore:
200
+ namespace: datasetio::localfs
201
+ backend: kv_default
202
+ scoring:
203
+ - provider_id: basic
204
+ provider_type: inline::basic
205
+ - provider_id: llm-as-judge
206
+ provider_type: inline::llm-as-judge
207
+ - provider_id: braintrust
208
+ provider_type: inline::braintrust
209
+ config:
210
+ openai_api_key: ${env.OPENAI_API_KEY:=}
211
+ tool_runtime:
212
+ - provider_id: brave-search
213
+ provider_type: remote::brave-search
214
+ config:
215
+ api_key: ${env.BRAVE_SEARCH_API_KEY:=}
216
+ max_results: 3
217
+ - provider_id: tavily-search
218
+ provider_type: remote::tavily-search
219
+ config:
220
+ api_key: ${env.TAVILY_SEARCH_API_KEY:=}
221
+ max_results: 3
222
+ - provider_id: rag-runtime
223
+ provider_type: inline::rag-runtime
224
+ - provider_id: model-context-protocol
225
+ provider_type: remote::model-context-protocol
226
+ batches:
227
+ - provider_id: reference
228
+ provider_type: inline::reference
229
+ config:
230
+ kvstore:
231
+ namespace: batches
232
+ backend: kv_default
233
+ storage:
234
+ backends:
235
+ kv_default:
236
+ type: kv_postgres
237
+ host: ${env.POSTGRES_HOST:=localhost}
238
+ port: ${env.POSTGRES_PORT:=5432}
239
+ db: ${env.POSTGRES_DB:=llamastack}
240
+ user: ${env.POSTGRES_USER:=llamastack}
241
+ password: ${env.POSTGRES_PASSWORD:=llamastack}
242
+ table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
243
+ sql_default:
244
+ type: sql_postgres
245
+ host: ${env.POSTGRES_HOST:=localhost}
246
+ port: ${env.POSTGRES_PORT:=5432}
247
+ db: ${env.POSTGRES_DB:=llamastack}
248
+ user: ${env.POSTGRES_USER:=llamastack}
249
+ password: ${env.POSTGRES_PASSWORD:=llamastack}
250
+ stores:
251
+ metadata:
252
+ namespace: registry
253
+ backend: kv_default
254
+ inference:
255
+ table_name: inference_store
256
+ backend: sql_default
257
+ max_write_queue_size: 10000
258
+ num_writers: 4
259
+ conversations:
260
+ table_name: openai_conversations
261
+ backend: sql_default
262
+ registered_resources:
263
+ models: []
264
+ shields:
265
+ - shield_id: llama-guard
266
+ provider_id: ${env.SAFETY_MODEL:+llama-guard}
267
+ provider_shield_id: ${env.SAFETY_MODEL:=}
268
+ - shield_id: code-scanner
269
+ provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
270
+ provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
271
+ vector_dbs: []
272
+ datasets: []
273
+ scoring_fns: []
274
+ benchmarks: []
275
+ tool_groups:
276
+ - toolgroup_id: builtin::websearch
277
+ provider_id: tavily-search
278
+ - toolgroup_id: builtin::rag
279
+ provider_id: rag-runtime
280
+ server:
281
+ port: 8321
282
+ telemetry:
283
+ enabled: true
284
+ vector_stores:
285
+ default_provider_id: faiss
286
+ default_embedding_model:
287
+ provider_id: sentence-transformers
288
+ model_id: nomic-ai/nomic-embed-text-v1.5
@@ -223,7 +223,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
223
223
  return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
224
224
 
225
225
  async def register_vector_store(self, vector_store: VectorStore) -> None:
226
- assert self.kvstore is not None
226
+ if self.kvstore is None:
227
+ raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
227
228
 
228
229
  key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
229
230
  await self.kvstore.set(key=key, value=vector_store.model_dump_json())
@@ -239,7 +240,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
239
240
  return [i.vector_store for i in self.cache.values()]
240
241
 
241
242
  async def unregister_vector_store(self, vector_store_id: str) -> None:
242
- assert self.kvstore is not None
243
+ if self.kvstore is None:
244
+ raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
243
245
 
244
246
  if vector_store_id not in self.cache:
245
247
  return
@@ -248,6 +250,27 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
248
250
  del self.cache[vector_store_id]
249
251
  await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
250
252
 
253
+ async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
254
+ if vector_store_id in self.cache:
255
+ return self.cache[vector_store_id]
256
+
257
+ if self.kvstore is None:
258
+ raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
259
+
260
+ key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
261
+ vector_store_data = await self.kvstore.get(key)
262
+ if not vector_store_data:
263
+ raise VectorStoreNotFoundError(vector_store_id)
264
+
265
+ vector_store = VectorStore.model_validate_json(vector_store_data)
266
+ index = VectorStoreWithIndex(
267
+ vector_store=vector_store,
268
+ index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
269
+ inference_api=self.inference_api,
270
+ )
271
+ self.cache[vector_store_id] = index
272
+ return index
273
+
251
274
  async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
252
275
  index = self.cache.get(vector_db_id)
253
276
  if index is None:
@@ -412,6 +412,14 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
412
412
  return [v.vector_store for v in self.cache.values()]
413
413
 
414
414
  async def register_vector_store(self, vector_store: VectorStore) -> None:
415
+ if self.kvstore is None:
416
+ raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
417
+
418
+ # Save to kvstore for persistence
419
+ key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
420
+ await self.kvstore.set(key=key, value=vector_store.model_dump_json())
421
+
422
+ # Create and cache the index
415
423
  index = await SQLiteVecIndex.create(
416
424
  vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
417
425
  )
@@ -421,13 +429,16 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
421
429
  if vector_store_id in self.cache:
422
430
  return self.cache[vector_store_id]
423
431
 
424
- if self.vector_store_table is None:
425
- raise VectorStoreNotFoundError(vector_store_id)
432
+ # Try to load from kvstore
433
+ if self.kvstore is None:
434
+ raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
426
435
 
427
- vector_store = self.vector_store_table.get_vector_store(vector_store_id)
428
- if not vector_store:
436
+ key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
437
+ vector_store_data = await self.kvstore.get(key)
438
+ if not vector_store_data:
429
439
  raise VectorStoreNotFoundError(vector_store_id)
430
440
 
441
+ vector_store = VectorStore.model_validate_json(vector_store_data)
431
442
  index = VectorStoreWithIndex(
432
443
  vector_store=vector_store,
433
444
  index=SQLiteVecIndex(
@@ -4,6 +4,7 @@
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
6
 
7
+ from collections.abc import Iterable
7
8
 
8
9
  import google.auth.transport.requests
9
10
  from google.auth import default
@@ -42,3 +43,12 @@ class VertexAIInferenceAdapter(OpenAIMixin):
42
43
  Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
43
44
  """
44
45
  return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
46
+
47
+ async def list_provider_model_ids(self) -> Iterable[str]:
48
+ """
49
+ VertexAI doesn't currently offer a way to query a list of available models from Google's Model Garden
50
+ For now we return a hardcoded version of the available models
51
+
52
+ :return: An iterable of model IDs
53
+ """
54
+ return ["google/gemini-2.0-flash", "google/gemini-2.5-flash", "google/gemini-2.5-pro"]
@@ -131,7 +131,6 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
131
131
 
132
132
  async def initialize(self) -> None:
133
133
  self.kvstore = await kvstore_impl(self.config.persistence)
134
- self.vector_store_table = self.kvstore
135
134
 
136
135
  if isinstance(self.config, RemoteChromaVectorIOConfig):
137
136
  log.info(f"Connecting to Chroma server at: {self.config.url}")
@@ -190,9 +189,16 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
190
189
  if vector_store_id in self.cache:
191
190
  return self.cache[vector_store_id]
192
191
 
193
- vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
194
- if not vector_store:
192
+ # Try to load from kvstore
193
+ if self.kvstore is None:
194
+ raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
195
+
196
+ key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
197
+ vector_store_data = await self.kvstore.get(key)
198
+ if not vector_store_data:
195
199
  raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
200
+
201
+ vector_store = VectorStore.model_validate_json(vector_store_data)
196
202
  collection = await maybe_await(self.client.get_collection(vector_store_id))
197
203
  if not collection:
198
204
  raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
@@ -328,13 +328,16 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
328
328
  if vector_store_id in self.cache:
329
329
  return self.cache[vector_store_id]
330
330
 
331
- if self.vector_store_table is None:
332
- raise VectorStoreNotFoundError(vector_store_id)
331
+ # Try to load from kvstore
332
+ if self.kvstore is None:
333
+ raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
333
334
 
334
- vector_store = await self.vector_store_table.get_vector_store(vector_store_id)
335
- if not vector_store:
335
+ key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
336
+ vector_store_data = await self.kvstore.get(key)
337
+ if not vector_store_data:
336
338
  raise VectorStoreNotFoundError(vector_store_id)
337
339
 
340
+ vector_store = VectorStore.model_validate_json(vector_store_data)
338
341
  index = VectorStoreWithIndex(
339
342
  vector_store=vector_store,
340
343
  index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),