llama-stack 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/list_deps.py +4 -0
- llama_stack/core/routers/inference.py +66 -40
- llama_stack/distributions/starter/build.yaml +1 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/starter.py +86 -68
- llama_stack/distributions/starter-gpu/build.yaml +1 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/providers/inline/vector_io/faiss/faiss.py +25 -2
- llama_stack/providers/inline/vector_io/sqlite_vec/sqlite_vec.py +15 -4
- llama_stack/providers/remote/inference/vertexai/vertexai.py +10 -0
- llama_stack/providers/remote/vector_io/chroma/chroma.py +9 -3
- llama_stack/providers/remote/vector_io/milvus/milvus.py +7 -4
- llama_stack/providers/remote/vector_io/pgvector/pgvector.py +32 -6
- llama_stack/providers/remote/vector_io/qdrant/qdrant.py +11 -6
- llama_stack/providers/remote/vector_io/weaviate/weaviate.py +7 -4
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -2
- llama_stack/providers/utils/inference/inference_store.py +30 -10
- llama_stack/providers/utils/inference/model_registry.py +1 -1
- llama_stack/providers/utils/inference/openai_mixin.py +33 -10
- llama_stack/providers/utils/responses/responses_store.py +12 -58
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +25 -9
- llama_stack/providers/utils/sqlstore/sqlalchemy_sqlstore.py +31 -1
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/METADATA +3 -3
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/RECORD +29 -26
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/licenses/LICENSE +0 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.3.dist-info}/top_level.txt +0 -0
|
@@ -35,6 +35,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
|
|
|
35
35
|
)
|
|
36
36
|
from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
|
|
37
37
|
from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
|
|
38
|
+
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
|
|
38
39
|
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
|
|
39
40
|
|
|
40
41
|
|
|
@@ -147,10 +148,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
147
148
|
BuildProvider(provider_type="inline::reference"),
|
|
148
149
|
],
|
|
149
150
|
}
|
|
151
|
+
files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}")
|
|
150
152
|
files_provider = Provider(
|
|
151
153
|
provider_id="meta-reference-files",
|
|
152
154
|
provider_type="inline::localfs",
|
|
153
|
-
config=
|
|
155
|
+
config=files_config,
|
|
154
156
|
)
|
|
155
157
|
embedding_provider = Provider(
|
|
156
158
|
provider_id="sentence-transformers",
|
|
@@ -180,6 +182,87 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
180
182
|
provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
|
|
181
183
|
),
|
|
182
184
|
]
|
|
185
|
+
postgres_sql_config = PostgresSqlStoreConfig.sample_run_config()
|
|
186
|
+
postgres_kv_config = PostgresKVStoreConfig.sample_run_config()
|
|
187
|
+
default_overrides = {
|
|
188
|
+
"inference": remote_inference_providers + [embedding_provider],
|
|
189
|
+
"vector_io": [
|
|
190
|
+
Provider(
|
|
191
|
+
provider_id="faiss",
|
|
192
|
+
provider_type="inline::faiss",
|
|
193
|
+
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
194
|
+
),
|
|
195
|
+
Provider(
|
|
196
|
+
provider_id="sqlite-vec",
|
|
197
|
+
provider_type="inline::sqlite-vec",
|
|
198
|
+
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
199
|
+
),
|
|
200
|
+
Provider(
|
|
201
|
+
provider_id="${env.MILVUS_URL:+milvus}",
|
|
202
|
+
provider_type="inline::milvus",
|
|
203
|
+
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
204
|
+
),
|
|
205
|
+
Provider(
|
|
206
|
+
provider_id="${env.CHROMADB_URL:+chromadb}",
|
|
207
|
+
provider_type="remote::chromadb",
|
|
208
|
+
config=ChromaVectorIOConfig.sample_run_config(
|
|
209
|
+
f"~/.llama/distributions/{name}/",
|
|
210
|
+
url="${env.CHROMADB_URL:=}",
|
|
211
|
+
),
|
|
212
|
+
),
|
|
213
|
+
Provider(
|
|
214
|
+
provider_id="${env.PGVECTOR_DB:+pgvector}",
|
|
215
|
+
provider_type="remote::pgvector",
|
|
216
|
+
config=PGVectorVectorIOConfig.sample_run_config(
|
|
217
|
+
f"~/.llama/distributions/{name}",
|
|
218
|
+
db="${env.PGVECTOR_DB:=}",
|
|
219
|
+
user="${env.PGVECTOR_USER:=}",
|
|
220
|
+
password="${env.PGVECTOR_PASSWORD:=}",
|
|
221
|
+
),
|
|
222
|
+
),
|
|
223
|
+
Provider(
|
|
224
|
+
provider_id="${env.QDRANT_URL:+qdrant}",
|
|
225
|
+
provider_type="remote::qdrant",
|
|
226
|
+
config=QdrantVectorIOConfig.sample_run_config(
|
|
227
|
+
f"~/.llama/distributions/{name}",
|
|
228
|
+
url="${env.QDRANT_URL:=}",
|
|
229
|
+
),
|
|
230
|
+
),
|
|
231
|
+
Provider(
|
|
232
|
+
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
|
|
233
|
+
provider_type="remote::weaviate",
|
|
234
|
+
config=WeaviateVectorIOConfig.sample_run_config(
|
|
235
|
+
f"~/.llama/distributions/{name}",
|
|
236
|
+
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
|
|
237
|
+
),
|
|
238
|
+
),
|
|
239
|
+
],
|
|
240
|
+
"files": [files_provider],
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
base_run_settings = RunConfigSettings(
|
|
244
|
+
provider_overrides=default_overrides,
|
|
245
|
+
default_models=[],
|
|
246
|
+
default_tool_groups=default_tool_groups,
|
|
247
|
+
default_shields=default_shields,
|
|
248
|
+
vector_stores_config=VectorStoresConfig(
|
|
249
|
+
default_provider_id="faiss",
|
|
250
|
+
default_embedding_model=QualifiedModel(
|
|
251
|
+
provider_id="sentence-transformers",
|
|
252
|
+
model_id="nomic-ai/nomic-embed-text-v1.5",
|
|
253
|
+
),
|
|
254
|
+
),
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
postgres_run_settings = base_run_settings.model_copy(
|
|
258
|
+
update={
|
|
259
|
+
"storage_backends": {
|
|
260
|
+
"kv_default": postgres_kv_config,
|
|
261
|
+
"sql_default": postgres_sql_config,
|
|
262
|
+
}
|
|
263
|
+
},
|
|
264
|
+
deep=True,
|
|
265
|
+
)
|
|
183
266
|
|
|
184
267
|
return DistributionTemplate(
|
|
185
268
|
name=name,
|
|
@@ -190,73 +273,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
190
273
|
providers=providers,
|
|
191
274
|
additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
|
|
192
275
|
run_configs={
|
|
193
|
-
"run.yaml":
|
|
194
|
-
|
|
195
|
-
"inference": remote_inference_providers + [embedding_provider],
|
|
196
|
-
"vector_io": [
|
|
197
|
-
Provider(
|
|
198
|
-
provider_id="faiss",
|
|
199
|
-
provider_type="inline::faiss",
|
|
200
|
-
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
201
|
-
),
|
|
202
|
-
Provider(
|
|
203
|
-
provider_id="sqlite-vec",
|
|
204
|
-
provider_type="inline::sqlite-vec",
|
|
205
|
-
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
206
|
-
),
|
|
207
|
-
Provider(
|
|
208
|
-
provider_id="${env.MILVUS_URL:+milvus}",
|
|
209
|
-
provider_type="inline::milvus",
|
|
210
|
-
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
211
|
-
),
|
|
212
|
-
Provider(
|
|
213
|
-
provider_id="${env.CHROMADB_URL:+chromadb}",
|
|
214
|
-
provider_type="remote::chromadb",
|
|
215
|
-
config=ChromaVectorIOConfig.sample_run_config(
|
|
216
|
-
f"~/.llama/distributions/{name}/",
|
|
217
|
-
url="${env.CHROMADB_URL:=}",
|
|
218
|
-
),
|
|
219
|
-
),
|
|
220
|
-
Provider(
|
|
221
|
-
provider_id="${env.PGVECTOR_DB:+pgvector}",
|
|
222
|
-
provider_type="remote::pgvector",
|
|
223
|
-
config=PGVectorVectorIOConfig.sample_run_config(
|
|
224
|
-
f"~/.llama/distributions/{name}",
|
|
225
|
-
db="${env.PGVECTOR_DB:=}",
|
|
226
|
-
user="${env.PGVECTOR_USER:=}",
|
|
227
|
-
password="${env.PGVECTOR_PASSWORD:=}",
|
|
228
|
-
),
|
|
229
|
-
),
|
|
230
|
-
Provider(
|
|
231
|
-
provider_id="${env.QDRANT_URL:+qdrant}",
|
|
232
|
-
provider_type="remote::qdrant",
|
|
233
|
-
config=QdrantVectorIOConfig.sample_run_config(
|
|
234
|
-
f"~/.llama/distributions/{name}",
|
|
235
|
-
url="${env.QDRANT_URL:=}",
|
|
236
|
-
),
|
|
237
|
-
),
|
|
238
|
-
Provider(
|
|
239
|
-
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
|
|
240
|
-
provider_type="remote::weaviate",
|
|
241
|
-
config=WeaviateVectorIOConfig.sample_run_config(
|
|
242
|
-
f"~/.llama/distributions/{name}",
|
|
243
|
-
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
|
|
244
|
-
),
|
|
245
|
-
),
|
|
246
|
-
],
|
|
247
|
-
"files": [files_provider],
|
|
248
|
-
},
|
|
249
|
-
default_models=[],
|
|
250
|
-
default_tool_groups=default_tool_groups,
|
|
251
|
-
default_shields=default_shields,
|
|
252
|
-
vector_stores_config=VectorStoresConfig(
|
|
253
|
-
default_provider_id="faiss",
|
|
254
|
-
default_embedding_model=QualifiedModel(
|
|
255
|
-
provider_id="sentence-transformers",
|
|
256
|
-
model_id="nomic-ai/nomic-embed-text-v1.5",
|
|
257
|
-
),
|
|
258
|
-
),
|
|
259
|
-
),
|
|
276
|
+
"run.yaml": base_run_settings,
|
|
277
|
+
"run-with-postgres-store.yaml": postgres_run_settings,
|
|
260
278
|
},
|
|
261
279
|
run_config_env_vars={
|
|
262
280
|
"LLAMA_STACK_PORT": (
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
image_name: starter-gpu
|
|
3
|
+
apis:
|
|
4
|
+
- agents
|
|
5
|
+
- batches
|
|
6
|
+
- datasetio
|
|
7
|
+
- eval
|
|
8
|
+
- files
|
|
9
|
+
- inference
|
|
10
|
+
- post_training
|
|
11
|
+
- safety
|
|
12
|
+
- scoring
|
|
13
|
+
- tool_runtime
|
|
14
|
+
- vector_io
|
|
15
|
+
providers:
|
|
16
|
+
inference:
|
|
17
|
+
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
|
18
|
+
provider_type: remote::cerebras
|
|
19
|
+
config:
|
|
20
|
+
base_url: https://api.cerebras.ai
|
|
21
|
+
api_key: ${env.CEREBRAS_API_KEY:=}
|
|
22
|
+
- provider_id: ${env.OLLAMA_URL:+ollama}
|
|
23
|
+
provider_type: remote::ollama
|
|
24
|
+
config:
|
|
25
|
+
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
|
26
|
+
- provider_id: ${env.VLLM_URL:+vllm}
|
|
27
|
+
provider_type: remote::vllm
|
|
28
|
+
config:
|
|
29
|
+
url: ${env.VLLM_URL:=}
|
|
30
|
+
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
|
+
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
|
+
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
|
+
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
|
+
provider_type: remote::tgi
|
|
35
|
+
config:
|
|
36
|
+
url: ${env.TGI_URL:=}
|
|
37
|
+
- provider_id: fireworks
|
|
38
|
+
provider_type: remote::fireworks
|
|
39
|
+
config:
|
|
40
|
+
url: https://api.fireworks.ai/inference/v1
|
|
41
|
+
api_key: ${env.FIREWORKS_API_KEY:=}
|
|
42
|
+
- provider_id: together
|
|
43
|
+
provider_type: remote::together
|
|
44
|
+
config:
|
|
45
|
+
url: https://api.together.xyz/v1
|
|
46
|
+
api_key: ${env.TOGETHER_API_KEY:=}
|
|
47
|
+
- provider_id: bedrock
|
|
48
|
+
provider_type: remote::bedrock
|
|
49
|
+
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
|
50
|
+
provider_type: remote::nvidia
|
|
51
|
+
config:
|
|
52
|
+
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
|
53
|
+
api_key: ${env.NVIDIA_API_KEY:=}
|
|
54
|
+
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
|
55
|
+
- provider_id: openai
|
|
56
|
+
provider_type: remote::openai
|
|
57
|
+
config:
|
|
58
|
+
api_key: ${env.OPENAI_API_KEY:=}
|
|
59
|
+
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
|
|
60
|
+
- provider_id: anthropic
|
|
61
|
+
provider_type: remote::anthropic
|
|
62
|
+
config:
|
|
63
|
+
api_key: ${env.ANTHROPIC_API_KEY:=}
|
|
64
|
+
- provider_id: gemini
|
|
65
|
+
provider_type: remote::gemini
|
|
66
|
+
config:
|
|
67
|
+
api_key: ${env.GEMINI_API_KEY:=}
|
|
68
|
+
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
|
|
69
|
+
provider_type: remote::vertexai
|
|
70
|
+
config:
|
|
71
|
+
project: ${env.VERTEX_AI_PROJECT:=}
|
|
72
|
+
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
|
73
|
+
- provider_id: groq
|
|
74
|
+
provider_type: remote::groq
|
|
75
|
+
config:
|
|
76
|
+
url: https://api.groq.com
|
|
77
|
+
api_key: ${env.GROQ_API_KEY:=}
|
|
78
|
+
- provider_id: sambanova
|
|
79
|
+
provider_type: remote::sambanova
|
|
80
|
+
config:
|
|
81
|
+
url: https://api.sambanova.ai/v1
|
|
82
|
+
api_key: ${env.SAMBANOVA_API_KEY:=}
|
|
83
|
+
- provider_id: ${env.AZURE_API_KEY:+azure}
|
|
84
|
+
provider_type: remote::azure
|
|
85
|
+
config:
|
|
86
|
+
api_key: ${env.AZURE_API_KEY:=}
|
|
87
|
+
api_base: ${env.AZURE_API_BASE:=}
|
|
88
|
+
api_version: ${env.AZURE_API_VERSION:=}
|
|
89
|
+
api_type: ${env.AZURE_API_TYPE:=}
|
|
90
|
+
- provider_id: sentence-transformers
|
|
91
|
+
provider_type: inline::sentence-transformers
|
|
92
|
+
vector_io:
|
|
93
|
+
- provider_id: faiss
|
|
94
|
+
provider_type: inline::faiss
|
|
95
|
+
config:
|
|
96
|
+
persistence:
|
|
97
|
+
namespace: vector_io::faiss
|
|
98
|
+
backend: kv_default
|
|
99
|
+
- provider_id: sqlite-vec
|
|
100
|
+
provider_type: inline::sqlite-vec
|
|
101
|
+
config:
|
|
102
|
+
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
|
|
103
|
+
persistence:
|
|
104
|
+
namespace: vector_io::sqlite_vec
|
|
105
|
+
backend: kv_default
|
|
106
|
+
- provider_id: ${env.MILVUS_URL:+milvus}
|
|
107
|
+
provider_type: inline::milvus
|
|
108
|
+
config:
|
|
109
|
+
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
|
|
110
|
+
persistence:
|
|
111
|
+
namespace: vector_io::milvus
|
|
112
|
+
backend: kv_default
|
|
113
|
+
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
|
114
|
+
provider_type: remote::chromadb
|
|
115
|
+
config:
|
|
116
|
+
url: ${env.CHROMADB_URL:=}
|
|
117
|
+
persistence:
|
|
118
|
+
namespace: vector_io::chroma_remote
|
|
119
|
+
backend: kv_default
|
|
120
|
+
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
|
121
|
+
provider_type: remote::pgvector
|
|
122
|
+
config:
|
|
123
|
+
host: ${env.PGVECTOR_HOST:=localhost}
|
|
124
|
+
port: ${env.PGVECTOR_PORT:=5432}
|
|
125
|
+
db: ${env.PGVECTOR_DB:=}
|
|
126
|
+
user: ${env.PGVECTOR_USER:=}
|
|
127
|
+
password: ${env.PGVECTOR_PASSWORD:=}
|
|
128
|
+
persistence:
|
|
129
|
+
namespace: vector_io::pgvector
|
|
130
|
+
backend: kv_default
|
|
131
|
+
- provider_id: ${env.QDRANT_URL:+qdrant}
|
|
132
|
+
provider_type: remote::qdrant
|
|
133
|
+
config:
|
|
134
|
+
api_key: ${env.QDRANT_API_KEY:=}
|
|
135
|
+
persistence:
|
|
136
|
+
namespace: vector_io::qdrant_remote
|
|
137
|
+
backend: kv_default
|
|
138
|
+
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
|
|
139
|
+
provider_type: remote::weaviate
|
|
140
|
+
config:
|
|
141
|
+
weaviate_api_key: null
|
|
142
|
+
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
|
|
143
|
+
persistence:
|
|
144
|
+
namespace: vector_io::weaviate
|
|
145
|
+
backend: kv_default
|
|
146
|
+
files:
|
|
147
|
+
- provider_id: meta-reference-files
|
|
148
|
+
provider_type: inline::localfs
|
|
149
|
+
config:
|
|
150
|
+
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
|
|
151
|
+
metadata_store:
|
|
152
|
+
table_name: files_metadata
|
|
153
|
+
backend: sql_default
|
|
154
|
+
safety:
|
|
155
|
+
- provider_id: llama-guard
|
|
156
|
+
provider_type: inline::llama-guard
|
|
157
|
+
config:
|
|
158
|
+
excluded_categories: []
|
|
159
|
+
- provider_id: code-scanner
|
|
160
|
+
provider_type: inline::code-scanner
|
|
161
|
+
agents:
|
|
162
|
+
- provider_id: meta-reference
|
|
163
|
+
provider_type: inline::meta-reference
|
|
164
|
+
config:
|
|
165
|
+
persistence:
|
|
166
|
+
agent_state:
|
|
167
|
+
namespace: agents
|
|
168
|
+
backend: kv_default
|
|
169
|
+
responses:
|
|
170
|
+
table_name: responses
|
|
171
|
+
backend: sql_default
|
|
172
|
+
max_write_queue_size: 10000
|
|
173
|
+
num_writers: 4
|
|
174
|
+
post_training:
|
|
175
|
+
- provider_id: huggingface-gpu
|
|
176
|
+
provider_type: inline::huggingface-gpu
|
|
177
|
+
config:
|
|
178
|
+
checkpoint_format: huggingface
|
|
179
|
+
distributed_backend: null
|
|
180
|
+
device: cpu
|
|
181
|
+
dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
|
|
182
|
+
eval:
|
|
183
|
+
- provider_id: meta-reference
|
|
184
|
+
provider_type: inline::meta-reference
|
|
185
|
+
config:
|
|
186
|
+
kvstore:
|
|
187
|
+
namespace: eval
|
|
188
|
+
backend: kv_default
|
|
189
|
+
datasetio:
|
|
190
|
+
- provider_id: huggingface
|
|
191
|
+
provider_type: remote::huggingface
|
|
192
|
+
config:
|
|
193
|
+
kvstore:
|
|
194
|
+
namespace: datasetio::huggingface
|
|
195
|
+
backend: kv_default
|
|
196
|
+
- provider_id: localfs
|
|
197
|
+
provider_type: inline::localfs
|
|
198
|
+
config:
|
|
199
|
+
kvstore:
|
|
200
|
+
namespace: datasetio::localfs
|
|
201
|
+
backend: kv_default
|
|
202
|
+
scoring:
|
|
203
|
+
- provider_id: basic
|
|
204
|
+
provider_type: inline::basic
|
|
205
|
+
- provider_id: llm-as-judge
|
|
206
|
+
provider_type: inline::llm-as-judge
|
|
207
|
+
- provider_id: braintrust
|
|
208
|
+
provider_type: inline::braintrust
|
|
209
|
+
config:
|
|
210
|
+
openai_api_key: ${env.OPENAI_API_KEY:=}
|
|
211
|
+
tool_runtime:
|
|
212
|
+
- provider_id: brave-search
|
|
213
|
+
provider_type: remote::brave-search
|
|
214
|
+
config:
|
|
215
|
+
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
|
|
216
|
+
max_results: 3
|
|
217
|
+
- provider_id: tavily-search
|
|
218
|
+
provider_type: remote::tavily-search
|
|
219
|
+
config:
|
|
220
|
+
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
|
221
|
+
max_results: 3
|
|
222
|
+
- provider_id: rag-runtime
|
|
223
|
+
provider_type: inline::rag-runtime
|
|
224
|
+
- provider_id: model-context-protocol
|
|
225
|
+
provider_type: remote::model-context-protocol
|
|
226
|
+
batches:
|
|
227
|
+
- provider_id: reference
|
|
228
|
+
provider_type: inline::reference
|
|
229
|
+
config:
|
|
230
|
+
kvstore:
|
|
231
|
+
namespace: batches
|
|
232
|
+
backend: kv_default
|
|
233
|
+
storage:
|
|
234
|
+
backends:
|
|
235
|
+
kv_default:
|
|
236
|
+
type: kv_postgres
|
|
237
|
+
host: ${env.POSTGRES_HOST:=localhost}
|
|
238
|
+
port: ${env.POSTGRES_PORT:=5432}
|
|
239
|
+
db: ${env.POSTGRES_DB:=llamastack}
|
|
240
|
+
user: ${env.POSTGRES_USER:=llamastack}
|
|
241
|
+
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
242
|
+
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
|
|
243
|
+
sql_default:
|
|
244
|
+
type: sql_postgres
|
|
245
|
+
host: ${env.POSTGRES_HOST:=localhost}
|
|
246
|
+
port: ${env.POSTGRES_PORT:=5432}
|
|
247
|
+
db: ${env.POSTGRES_DB:=llamastack}
|
|
248
|
+
user: ${env.POSTGRES_USER:=llamastack}
|
|
249
|
+
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
250
|
+
stores:
|
|
251
|
+
metadata:
|
|
252
|
+
namespace: registry
|
|
253
|
+
backend: kv_default
|
|
254
|
+
inference:
|
|
255
|
+
table_name: inference_store
|
|
256
|
+
backend: sql_default
|
|
257
|
+
max_write_queue_size: 10000
|
|
258
|
+
num_writers: 4
|
|
259
|
+
conversations:
|
|
260
|
+
table_name: openai_conversations
|
|
261
|
+
backend: sql_default
|
|
262
|
+
registered_resources:
|
|
263
|
+
models: []
|
|
264
|
+
shields:
|
|
265
|
+
- shield_id: llama-guard
|
|
266
|
+
provider_id: ${env.SAFETY_MODEL:+llama-guard}
|
|
267
|
+
provider_shield_id: ${env.SAFETY_MODEL:=}
|
|
268
|
+
- shield_id: code-scanner
|
|
269
|
+
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
|
|
270
|
+
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
|
|
271
|
+
vector_dbs: []
|
|
272
|
+
datasets: []
|
|
273
|
+
scoring_fns: []
|
|
274
|
+
benchmarks: []
|
|
275
|
+
tool_groups:
|
|
276
|
+
- toolgroup_id: builtin::websearch
|
|
277
|
+
provider_id: tavily-search
|
|
278
|
+
- toolgroup_id: builtin::rag
|
|
279
|
+
provider_id: rag-runtime
|
|
280
|
+
server:
|
|
281
|
+
port: 8321
|
|
282
|
+
telemetry:
|
|
283
|
+
enabled: true
|
|
284
|
+
vector_stores:
|
|
285
|
+
default_provider_id: faiss
|
|
286
|
+
default_embedding_model:
|
|
287
|
+
provider_id: sentence-transformers
|
|
288
|
+
model_id: nomic-ai/nomic-embed-text-v1.5
|
|
@@ -223,7 +223,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
|
|
|
223
223
|
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
|
224
224
|
|
|
225
225
|
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
|
226
|
-
|
|
226
|
+
if self.kvstore is None:
|
|
227
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
|
|
227
228
|
|
|
228
229
|
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
|
229
230
|
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
|
@@ -239,7 +240,8 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
|
|
|
239
240
|
return [i.vector_store for i in self.cache.values()]
|
|
240
241
|
|
|
241
242
|
async def unregister_vector_store(self, vector_store_id: str) -> None:
|
|
242
|
-
|
|
243
|
+
if self.kvstore is None:
|
|
244
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before unregistering vector stores.")
|
|
243
245
|
|
|
244
246
|
if vector_store_id not in self.cache:
|
|
245
247
|
return
|
|
@@ -248,6 +250,27 @@ class FaissVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoco
|
|
|
248
250
|
del self.cache[vector_store_id]
|
|
249
251
|
await self.kvstore.delete(f"{VECTOR_DBS_PREFIX}{vector_store_id}")
|
|
250
252
|
|
|
253
|
+
async def _get_and_cache_vector_store_index(self, vector_store_id: str) -> VectorStoreWithIndex | None:
|
|
254
|
+
if vector_store_id in self.cache:
|
|
255
|
+
return self.cache[vector_store_id]
|
|
256
|
+
|
|
257
|
+
if self.kvstore is None:
|
|
258
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
|
259
|
+
|
|
260
|
+
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
|
261
|
+
vector_store_data = await self.kvstore.get(key)
|
|
262
|
+
if not vector_store_data:
|
|
263
|
+
raise VectorStoreNotFoundError(vector_store_id)
|
|
264
|
+
|
|
265
|
+
vector_store = VectorStore.model_validate_json(vector_store_data)
|
|
266
|
+
index = VectorStoreWithIndex(
|
|
267
|
+
vector_store=vector_store,
|
|
268
|
+
index=await FaissIndex.create(vector_store.embedding_dimension, self.kvstore, vector_store.identifier),
|
|
269
|
+
inference_api=self.inference_api,
|
|
270
|
+
)
|
|
271
|
+
self.cache[vector_store_id] = index
|
|
272
|
+
return index
|
|
273
|
+
|
|
251
274
|
async def insert_chunks(self, vector_db_id: str, chunks: list[Chunk], ttl_seconds: int | None = None) -> None:
|
|
252
275
|
index = self.cache.get(vector_db_id)
|
|
253
276
|
if index is None:
|
|
@@ -412,6 +412,14 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
|
|
|
412
412
|
return [v.vector_store for v in self.cache.values()]
|
|
413
413
|
|
|
414
414
|
async def register_vector_store(self, vector_store: VectorStore) -> None:
|
|
415
|
+
if self.kvstore is None:
|
|
416
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before registering vector stores.")
|
|
417
|
+
|
|
418
|
+
# Save to kvstore for persistence
|
|
419
|
+
key = f"{VECTOR_DBS_PREFIX}{vector_store.identifier}"
|
|
420
|
+
await self.kvstore.set(key=key, value=vector_store.model_dump_json())
|
|
421
|
+
|
|
422
|
+
# Create and cache the index
|
|
415
423
|
index = await SQLiteVecIndex.create(
|
|
416
424
|
vector_store.embedding_dimension, self.config.db_path, vector_store.identifier
|
|
417
425
|
)
|
|
@@ -421,13 +429,16 @@ class SQLiteVecVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresPro
|
|
|
421
429
|
if vector_store_id in self.cache:
|
|
422
430
|
return self.cache[vector_store_id]
|
|
423
431
|
|
|
424
|
-
|
|
425
|
-
|
|
432
|
+
# Try to load from kvstore
|
|
433
|
+
if self.kvstore is None:
|
|
434
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
|
426
435
|
|
|
427
|
-
|
|
428
|
-
|
|
436
|
+
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
|
437
|
+
vector_store_data = await self.kvstore.get(key)
|
|
438
|
+
if not vector_store_data:
|
|
429
439
|
raise VectorStoreNotFoundError(vector_store_id)
|
|
430
440
|
|
|
441
|
+
vector_store = VectorStore.model_validate_json(vector_store_data)
|
|
431
442
|
index = VectorStoreWithIndex(
|
|
432
443
|
vector_store=vector_store,
|
|
433
444
|
index=SQLiteVecIndex(
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
6
|
|
|
7
|
+
from collections.abc import Iterable
|
|
7
8
|
|
|
8
9
|
import google.auth.transport.requests
|
|
9
10
|
from google.auth import default
|
|
@@ -42,3 +43,12 @@ class VertexAIInferenceAdapter(OpenAIMixin):
|
|
|
42
43
|
Source: https://cloud.google.com/vertex-ai/generative-ai/docs/start/openai
|
|
43
44
|
"""
|
|
44
45
|
return f"https://{self.config.location}-aiplatform.googleapis.com/v1/projects/{self.config.project}/locations/{self.config.location}/endpoints/openapi"
|
|
46
|
+
|
|
47
|
+
async def list_provider_model_ids(self) -> Iterable[str]:
|
|
48
|
+
"""
|
|
49
|
+
VertexAI doesn't currently offer a way to query a list of available models from Google's Model Garden
|
|
50
|
+
For now we return a hardcoded version of the available models
|
|
51
|
+
|
|
52
|
+
:return: An iterable of model IDs
|
|
53
|
+
"""
|
|
54
|
+
return ["google/gemini-2.0-flash", "google/gemini-2.5-flash", "google/gemini-2.5-pro"]
|
|
@@ -131,7 +131,6 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
131
131
|
|
|
132
132
|
async def initialize(self) -> None:
|
|
133
133
|
self.kvstore = await kvstore_impl(self.config.persistence)
|
|
134
|
-
self.vector_store_table = self.kvstore
|
|
135
134
|
|
|
136
135
|
if isinstance(self.config, RemoteChromaVectorIOConfig):
|
|
137
136
|
log.info(f"Connecting to Chroma server at: {self.config.url}")
|
|
@@ -190,9 +189,16 @@ class ChromaVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
190
189
|
if vector_store_id in self.cache:
|
|
191
190
|
return self.cache[vector_store_id]
|
|
192
191
|
|
|
193
|
-
|
|
194
|
-
if
|
|
192
|
+
# Try to load from kvstore
|
|
193
|
+
if self.kvstore is None:
|
|
194
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
|
195
|
+
|
|
196
|
+
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
|
197
|
+
vector_store_data = await self.kvstore.get(key)
|
|
198
|
+
if not vector_store_data:
|
|
195
199
|
raise ValueError(f"Vector DB {vector_store_id} not found in Llama Stack")
|
|
200
|
+
|
|
201
|
+
vector_store = VectorStore.model_validate_json(vector_store_data)
|
|
196
202
|
collection = await maybe_await(self.client.get_collection(vector_store_id))
|
|
197
203
|
if not collection:
|
|
198
204
|
raise ValueError(f"Vector DB {vector_store_id} not found in Chroma")
|
|
@@ -328,13 +328,16 @@ class MilvusVectorIOAdapter(OpenAIVectorStoreMixin, VectorIO, VectorStoresProtoc
|
|
|
328
328
|
if vector_store_id in self.cache:
|
|
329
329
|
return self.cache[vector_store_id]
|
|
330
330
|
|
|
331
|
-
|
|
332
|
-
|
|
331
|
+
# Try to load from kvstore
|
|
332
|
+
if self.kvstore is None:
|
|
333
|
+
raise RuntimeError("KVStore not initialized. Call initialize() before using vector stores.")
|
|
333
334
|
|
|
334
|
-
|
|
335
|
-
|
|
335
|
+
key = f"{VECTOR_DBS_PREFIX}{vector_store_id}"
|
|
336
|
+
vector_store_data = await self.kvstore.get(key)
|
|
337
|
+
if not vector_store_data:
|
|
336
338
|
raise VectorStoreNotFoundError(vector_store_id)
|
|
337
339
|
|
|
340
|
+
vector_store = VectorStore.model_validate_json(vector_store_data)
|
|
338
341
|
index = VectorStoreWithIndex(
|
|
339
342
|
vector_store=vector_store,
|
|
340
343
|
index=MilvusIndex(client=self.client, collection_name=vector_store.identifier, kvstore=self.kvstore),
|