llama-stack 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llama_stack/cli/stack/list_deps.py +4 -0
- llama_stack/core/routers/inference.py +66 -40
- llama_stack/distributions/starter/build.yaml +1 -0
- llama_stack/distributions/starter/run-with-postgres-store.yaml +285 -0
- llama_stack/distributions/starter/starter.py +86 -68
- llama_stack/distributions/starter-gpu/build.yaml +1 -0
- llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +288 -0
- llama_stack/providers/utils/inference/embedding_mixin.py +1 -2
- llama_stack/providers/utils/inference/inference_store.py +19 -7
- llama_stack/providers/utils/inference/openai_mixin.py +4 -1
- llama_stack/providers/utils/responses/responses_store.py +6 -68
- llama_stack/providers/utils/sqlstore/authorized_sqlstore.py +25 -9
- llama_stack/ui/node_modules/flatted/python/flatted.py +149 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.2.dist-info}/METADATA +3 -3
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.2.dist-info}/RECORD +19 -16
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.2.dist-info}/WHEEL +0 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.2.dist-info}/entry_points.txt +0 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.2.dist-info}/licenses/LICENSE +0 -0
- {llama_stack-0.3.1.dist-info → llama_stack-0.3.2.dist-info}/top_level.txt +0 -0
|
@@ -35,6 +35,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
|
|
|
35
35
|
)
|
|
36
36
|
from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
|
|
37
37
|
from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
|
|
38
|
+
from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
|
|
38
39
|
from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
|
|
39
40
|
|
|
40
41
|
|
|
@@ -147,10 +148,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
147
148
|
BuildProvider(provider_type="inline::reference"),
|
|
148
149
|
],
|
|
149
150
|
}
|
|
151
|
+
files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}")
|
|
150
152
|
files_provider = Provider(
|
|
151
153
|
provider_id="meta-reference-files",
|
|
152
154
|
provider_type="inline::localfs",
|
|
153
|
-
config=
|
|
155
|
+
config=files_config,
|
|
154
156
|
)
|
|
155
157
|
embedding_provider = Provider(
|
|
156
158
|
provider_id="sentence-transformers",
|
|
@@ -180,6 +182,87 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
180
182
|
provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
|
|
181
183
|
),
|
|
182
184
|
]
|
|
185
|
+
postgres_sql_config = PostgresSqlStoreConfig.sample_run_config()
|
|
186
|
+
postgres_kv_config = PostgresKVStoreConfig.sample_run_config()
|
|
187
|
+
default_overrides = {
|
|
188
|
+
"inference": remote_inference_providers + [embedding_provider],
|
|
189
|
+
"vector_io": [
|
|
190
|
+
Provider(
|
|
191
|
+
provider_id="faiss",
|
|
192
|
+
provider_type="inline::faiss",
|
|
193
|
+
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
194
|
+
),
|
|
195
|
+
Provider(
|
|
196
|
+
provider_id="sqlite-vec",
|
|
197
|
+
provider_type="inline::sqlite-vec",
|
|
198
|
+
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
199
|
+
),
|
|
200
|
+
Provider(
|
|
201
|
+
provider_id="${env.MILVUS_URL:+milvus}",
|
|
202
|
+
provider_type="inline::milvus",
|
|
203
|
+
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
204
|
+
),
|
|
205
|
+
Provider(
|
|
206
|
+
provider_id="${env.CHROMADB_URL:+chromadb}",
|
|
207
|
+
provider_type="remote::chromadb",
|
|
208
|
+
config=ChromaVectorIOConfig.sample_run_config(
|
|
209
|
+
f"~/.llama/distributions/{name}/",
|
|
210
|
+
url="${env.CHROMADB_URL:=}",
|
|
211
|
+
),
|
|
212
|
+
),
|
|
213
|
+
Provider(
|
|
214
|
+
provider_id="${env.PGVECTOR_DB:+pgvector}",
|
|
215
|
+
provider_type="remote::pgvector",
|
|
216
|
+
config=PGVectorVectorIOConfig.sample_run_config(
|
|
217
|
+
f"~/.llama/distributions/{name}",
|
|
218
|
+
db="${env.PGVECTOR_DB:=}",
|
|
219
|
+
user="${env.PGVECTOR_USER:=}",
|
|
220
|
+
password="${env.PGVECTOR_PASSWORD:=}",
|
|
221
|
+
),
|
|
222
|
+
),
|
|
223
|
+
Provider(
|
|
224
|
+
provider_id="${env.QDRANT_URL:+qdrant}",
|
|
225
|
+
provider_type="remote::qdrant",
|
|
226
|
+
config=QdrantVectorIOConfig.sample_run_config(
|
|
227
|
+
f"~/.llama/distributions/{name}",
|
|
228
|
+
url="${env.QDRANT_URL:=}",
|
|
229
|
+
),
|
|
230
|
+
),
|
|
231
|
+
Provider(
|
|
232
|
+
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
|
|
233
|
+
provider_type="remote::weaviate",
|
|
234
|
+
config=WeaviateVectorIOConfig.sample_run_config(
|
|
235
|
+
f"~/.llama/distributions/{name}",
|
|
236
|
+
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
|
|
237
|
+
),
|
|
238
|
+
),
|
|
239
|
+
],
|
|
240
|
+
"files": [files_provider],
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
base_run_settings = RunConfigSettings(
|
|
244
|
+
provider_overrides=default_overrides,
|
|
245
|
+
default_models=[],
|
|
246
|
+
default_tool_groups=default_tool_groups,
|
|
247
|
+
default_shields=default_shields,
|
|
248
|
+
vector_stores_config=VectorStoresConfig(
|
|
249
|
+
default_provider_id="faiss",
|
|
250
|
+
default_embedding_model=QualifiedModel(
|
|
251
|
+
provider_id="sentence-transformers",
|
|
252
|
+
model_id="nomic-ai/nomic-embed-text-v1.5",
|
|
253
|
+
),
|
|
254
|
+
),
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
postgres_run_settings = base_run_settings.model_copy(
|
|
258
|
+
update={
|
|
259
|
+
"storage_backends": {
|
|
260
|
+
"kv_default": postgres_kv_config,
|
|
261
|
+
"sql_default": postgres_sql_config,
|
|
262
|
+
}
|
|
263
|
+
},
|
|
264
|
+
deep=True,
|
|
265
|
+
)
|
|
183
266
|
|
|
184
267
|
return DistributionTemplate(
|
|
185
268
|
name=name,
|
|
@@ -190,73 +273,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|
|
190
273
|
providers=providers,
|
|
191
274
|
additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
|
|
192
275
|
run_configs={
|
|
193
|
-
"run.yaml":
|
|
194
|
-
|
|
195
|
-
"inference": remote_inference_providers + [embedding_provider],
|
|
196
|
-
"vector_io": [
|
|
197
|
-
Provider(
|
|
198
|
-
provider_id="faiss",
|
|
199
|
-
provider_type="inline::faiss",
|
|
200
|
-
config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
201
|
-
),
|
|
202
|
-
Provider(
|
|
203
|
-
provider_id="sqlite-vec",
|
|
204
|
-
provider_type="inline::sqlite-vec",
|
|
205
|
-
config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
206
|
-
),
|
|
207
|
-
Provider(
|
|
208
|
-
provider_id="${env.MILVUS_URL:+milvus}",
|
|
209
|
-
provider_type="inline::milvus",
|
|
210
|
-
config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
|
|
211
|
-
),
|
|
212
|
-
Provider(
|
|
213
|
-
provider_id="${env.CHROMADB_URL:+chromadb}",
|
|
214
|
-
provider_type="remote::chromadb",
|
|
215
|
-
config=ChromaVectorIOConfig.sample_run_config(
|
|
216
|
-
f"~/.llama/distributions/{name}/",
|
|
217
|
-
url="${env.CHROMADB_URL:=}",
|
|
218
|
-
),
|
|
219
|
-
),
|
|
220
|
-
Provider(
|
|
221
|
-
provider_id="${env.PGVECTOR_DB:+pgvector}",
|
|
222
|
-
provider_type="remote::pgvector",
|
|
223
|
-
config=PGVectorVectorIOConfig.sample_run_config(
|
|
224
|
-
f"~/.llama/distributions/{name}",
|
|
225
|
-
db="${env.PGVECTOR_DB:=}",
|
|
226
|
-
user="${env.PGVECTOR_USER:=}",
|
|
227
|
-
password="${env.PGVECTOR_PASSWORD:=}",
|
|
228
|
-
),
|
|
229
|
-
),
|
|
230
|
-
Provider(
|
|
231
|
-
provider_id="${env.QDRANT_URL:+qdrant}",
|
|
232
|
-
provider_type="remote::qdrant",
|
|
233
|
-
config=QdrantVectorIOConfig.sample_run_config(
|
|
234
|
-
f"~/.llama/distributions/{name}",
|
|
235
|
-
url="${env.QDRANT_URL:=}",
|
|
236
|
-
),
|
|
237
|
-
),
|
|
238
|
-
Provider(
|
|
239
|
-
provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
|
|
240
|
-
provider_type="remote::weaviate",
|
|
241
|
-
config=WeaviateVectorIOConfig.sample_run_config(
|
|
242
|
-
f"~/.llama/distributions/{name}",
|
|
243
|
-
cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
|
|
244
|
-
),
|
|
245
|
-
),
|
|
246
|
-
],
|
|
247
|
-
"files": [files_provider],
|
|
248
|
-
},
|
|
249
|
-
default_models=[],
|
|
250
|
-
default_tool_groups=default_tool_groups,
|
|
251
|
-
default_shields=default_shields,
|
|
252
|
-
vector_stores_config=VectorStoresConfig(
|
|
253
|
-
default_provider_id="faiss",
|
|
254
|
-
default_embedding_model=QualifiedModel(
|
|
255
|
-
provider_id="sentence-transformers",
|
|
256
|
-
model_id="nomic-ai/nomic-embed-text-v1.5",
|
|
257
|
-
),
|
|
258
|
-
),
|
|
259
|
-
),
|
|
276
|
+
"run.yaml": base_run_settings,
|
|
277
|
+
"run-with-postgres-store.yaml": postgres_run_settings,
|
|
260
278
|
},
|
|
261
279
|
run_config_env_vars={
|
|
262
280
|
"LLAMA_STACK_PORT": (
|
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
image_name: starter-gpu
|
|
3
|
+
apis:
|
|
4
|
+
- agents
|
|
5
|
+
- batches
|
|
6
|
+
- datasetio
|
|
7
|
+
- eval
|
|
8
|
+
- files
|
|
9
|
+
- inference
|
|
10
|
+
- post_training
|
|
11
|
+
- safety
|
|
12
|
+
- scoring
|
|
13
|
+
- tool_runtime
|
|
14
|
+
- vector_io
|
|
15
|
+
providers:
|
|
16
|
+
inference:
|
|
17
|
+
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
|
18
|
+
provider_type: remote::cerebras
|
|
19
|
+
config:
|
|
20
|
+
base_url: https://api.cerebras.ai
|
|
21
|
+
api_key: ${env.CEREBRAS_API_KEY:=}
|
|
22
|
+
- provider_id: ${env.OLLAMA_URL:+ollama}
|
|
23
|
+
provider_type: remote::ollama
|
|
24
|
+
config:
|
|
25
|
+
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
|
26
|
+
- provider_id: ${env.VLLM_URL:+vllm}
|
|
27
|
+
provider_type: remote::vllm
|
|
28
|
+
config:
|
|
29
|
+
url: ${env.VLLM_URL:=}
|
|
30
|
+
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
|
31
|
+
api_token: ${env.VLLM_API_TOKEN:=fake}
|
|
32
|
+
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
|
33
|
+
- provider_id: ${env.TGI_URL:+tgi}
|
|
34
|
+
provider_type: remote::tgi
|
|
35
|
+
config:
|
|
36
|
+
url: ${env.TGI_URL:=}
|
|
37
|
+
- provider_id: fireworks
|
|
38
|
+
provider_type: remote::fireworks
|
|
39
|
+
config:
|
|
40
|
+
url: https://api.fireworks.ai/inference/v1
|
|
41
|
+
api_key: ${env.FIREWORKS_API_KEY:=}
|
|
42
|
+
- provider_id: together
|
|
43
|
+
provider_type: remote::together
|
|
44
|
+
config:
|
|
45
|
+
url: https://api.together.xyz/v1
|
|
46
|
+
api_key: ${env.TOGETHER_API_KEY:=}
|
|
47
|
+
- provider_id: bedrock
|
|
48
|
+
provider_type: remote::bedrock
|
|
49
|
+
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
|
50
|
+
provider_type: remote::nvidia
|
|
51
|
+
config:
|
|
52
|
+
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
|
53
|
+
api_key: ${env.NVIDIA_API_KEY:=}
|
|
54
|
+
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
|
55
|
+
- provider_id: openai
|
|
56
|
+
provider_type: remote::openai
|
|
57
|
+
config:
|
|
58
|
+
api_key: ${env.OPENAI_API_KEY:=}
|
|
59
|
+
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
|
|
60
|
+
- provider_id: anthropic
|
|
61
|
+
provider_type: remote::anthropic
|
|
62
|
+
config:
|
|
63
|
+
api_key: ${env.ANTHROPIC_API_KEY:=}
|
|
64
|
+
- provider_id: gemini
|
|
65
|
+
provider_type: remote::gemini
|
|
66
|
+
config:
|
|
67
|
+
api_key: ${env.GEMINI_API_KEY:=}
|
|
68
|
+
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
|
|
69
|
+
provider_type: remote::vertexai
|
|
70
|
+
config:
|
|
71
|
+
project: ${env.VERTEX_AI_PROJECT:=}
|
|
72
|
+
location: ${env.VERTEX_AI_LOCATION:=us-central1}
|
|
73
|
+
- provider_id: groq
|
|
74
|
+
provider_type: remote::groq
|
|
75
|
+
config:
|
|
76
|
+
url: https://api.groq.com
|
|
77
|
+
api_key: ${env.GROQ_API_KEY:=}
|
|
78
|
+
- provider_id: sambanova
|
|
79
|
+
provider_type: remote::sambanova
|
|
80
|
+
config:
|
|
81
|
+
url: https://api.sambanova.ai/v1
|
|
82
|
+
api_key: ${env.SAMBANOVA_API_KEY:=}
|
|
83
|
+
- provider_id: ${env.AZURE_API_KEY:+azure}
|
|
84
|
+
provider_type: remote::azure
|
|
85
|
+
config:
|
|
86
|
+
api_key: ${env.AZURE_API_KEY:=}
|
|
87
|
+
api_base: ${env.AZURE_API_BASE:=}
|
|
88
|
+
api_version: ${env.AZURE_API_VERSION:=}
|
|
89
|
+
api_type: ${env.AZURE_API_TYPE:=}
|
|
90
|
+
- provider_id: sentence-transformers
|
|
91
|
+
provider_type: inline::sentence-transformers
|
|
92
|
+
vector_io:
|
|
93
|
+
- provider_id: faiss
|
|
94
|
+
provider_type: inline::faiss
|
|
95
|
+
config:
|
|
96
|
+
persistence:
|
|
97
|
+
namespace: vector_io::faiss
|
|
98
|
+
backend: kv_default
|
|
99
|
+
- provider_id: sqlite-vec
|
|
100
|
+
provider_type: inline::sqlite-vec
|
|
101
|
+
config:
|
|
102
|
+
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
|
|
103
|
+
persistence:
|
|
104
|
+
namespace: vector_io::sqlite_vec
|
|
105
|
+
backend: kv_default
|
|
106
|
+
- provider_id: ${env.MILVUS_URL:+milvus}
|
|
107
|
+
provider_type: inline::milvus
|
|
108
|
+
config:
|
|
109
|
+
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
|
|
110
|
+
persistence:
|
|
111
|
+
namespace: vector_io::milvus
|
|
112
|
+
backend: kv_default
|
|
113
|
+
- provider_id: ${env.CHROMADB_URL:+chromadb}
|
|
114
|
+
provider_type: remote::chromadb
|
|
115
|
+
config:
|
|
116
|
+
url: ${env.CHROMADB_URL:=}
|
|
117
|
+
persistence:
|
|
118
|
+
namespace: vector_io::chroma_remote
|
|
119
|
+
backend: kv_default
|
|
120
|
+
- provider_id: ${env.PGVECTOR_DB:+pgvector}
|
|
121
|
+
provider_type: remote::pgvector
|
|
122
|
+
config:
|
|
123
|
+
host: ${env.PGVECTOR_HOST:=localhost}
|
|
124
|
+
port: ${env.PGVECTOR_PORT:=5432}
|
|
125
|
+
db: ${env.PGVECTOR_DB:=}
|
|
126
|
+
user: ${env.PGVECTOR_USER:=}
|
|
127
|
+
password: ${env.PGVECTOR_PASSWORD:=}
|
|
128
|
+
persistence:
|
|
129
|
+
namespace: vector_io::pgvector
|
|
130
|
+
backend: kv_default
|
|
131
|
+
- provider_id: ${env.QDRANT_URL:+qdrant}
|
|
132
|
+
provider_type: remote::qdrant
|
|
133
|
+
config:
|
|
134
|
+
api_key: ${env.QDRANT_API_KEY:=}
|
|
135
|
+
persistence:
|
|
136
|
+
namespace: vector_io::qdrant_remote
|
|
137
|
+
backend: kv_default
|
|
138
|
+
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
|
|
139
|
+
provider_type: remote::weaviate
|
|
140
|
+
config:
|
|
141
|
+
weaviate_api_key: null
|
|
142
|
+
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
|
|
143
|
+
persistence:
|
|
144
|
+
namespace: vector_io::weaviate
|
|
145
|
+
backend: kv_default
|
|
146
|
+
files:
|
|
147
|
+
- provider_id: meta-reference-files
|
|
148
|
+
provider_type: inline::localfs
|
|
149
|
+
config:
|
|
150
|
+
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
|
|
151
|
+
metadata_store:
|
|
152
|
+
table_name: files_metadata
|
|
153
|
+
backend: sql_default
|
|
154
|
+
safety:
|
|
155
|
+
- provider_id: llama-guard
|
|
156
|
+
provider_type: inline::llama-guard
|
|
157
|
+
config:
|
|
158
|
+
excluded_categories: []
|
|
159
|
+
- provider_id: code-scanner
|
|
160
|
+
provider_type: inline::code-scanner
|
|
161
|
+
agents:
|
|
162
|
+
- provider_id: meta-reference
|
|
163
|
+
provider_type: inline::meta-reference
|
|
164
|
+
config:
|
|
165
|
+
persistence:
|
|
166
|
+
agent_state:
|
|
167
|
+
namespace: agents
|
|
168
|
+
backend: kv_default
|
|
169
|
+
responses:
|
|
170
|
+
table_name: responses
|
|
171
|
+
backend: sql_default
|
|
172
|
+
max_write_queue_size: 10000
|
|
173
|
+
num_writers: 4
|
|
174
|
+
post_training:
|
|
175
|
+
- provider_id: huggingface-gpu
|
|
176
|
+
provider_type: inline::huggingface-gpu
|
|
177
|
+
config:
|
|
178
|
+
checkpoint_format: huggingface
|
|
179
|
+
distributed_backend: null
|
|
180
|
+
device: cpu
|
|
181
|
+
dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
|
|
182
|
+
eval:
|
|
183
|
+
- provider_id: meta-reference
|
|
184
|
+
provider_type: inline::meta-reference
|
|
185
|
+
config:
|
|
186
|
+
kvstore:
|
|
187
|
+
namespace: eval
|
|
188
|
+
backend: kv_default
|
|
189
|
+
datasetio:
|
|
190
|
+
- provider_id: huggingface
|
|
191
|
+
provider_type: remote::huggingface
|
|
192
|
+
config:
|
|
193
|
+
kvstore:
|
|
194
|
+
namespace: datasetio::huggingface
|
|
195
|
+
backend: kv_default
|
|
196
|
+
- provider_id: localfs
|
|
197
|
+
provider_type: inline::localfs
|
|
198
|
+
config:
|
|
199
|
+
kvstore:
|
|
200
|
+
namespace: datasetio::localfs
|
|
201
|
+
backend: kv_default
|
|
202
|
+
scoring:
|
|
203
|
+
- provider_id: basic
|
|
204
|
+
provider_type: inline::basic
|
|
205
|
+
- provider_id: llm-as-judge
|
|
206
|
+
provider_type: inline::llm-as-judge
|
|
207
|
+
- provider_id: braintrust
|
|
208
|
+
provider_type: inline::braintrust
|
|
209
|
+
config:
|
|
210
|
+
openai_api_key: ${env.OPENAI_API_KEY:=}
|
|
211
|
+
tool_runtime:
|
|
212
|
+
- provider_id: brave-search
|
|
213
|
+
provider_type: remote::brave-search
|
|
214
|
+
config:
|
|
215
|
+
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
|
|
216
|
+
max_results: 3
|
|
217
|
+
- provider_id: tavily-search
|
|
218
|
+
provider_type: remote::tavily-search
|
|
219
|
+
config:
|
|
220
|
+
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
|
|
221
|
+
max_results: 3
|
|
222
|
+
- provider_id: rag-runtime
|
|
223
|
+
provider_type: inline::rag-runtime
|
|
224
|
+
- provider_id: model-context-protocol
|
|
225
|
+
provider_type: remote::model-context-protocol
|
|
226
|
+
batches:
|
|
227
|
+
- provider_id: reference
|
|
228
|
+
provider_type: inline::reference
|
|
229
|
+
config:
|
|
230
|
+
kvstore:
|
|
231
|
+
namespace: batches
|
|
232
|
+
backend: kv_default
|
|
233
|
+
storage:
|
|
234
|
+
backends:
|
|
235
|
+
kv_default:
|
|
236
|
+
type: kv_postgres
|
|
237
|
+
host: ${env.POSTGRES_HOST:=localhost}
|
|
238
|
+
port: ${env.POSTGRES_PORT:=5432}
|
|
239
|
+
db: ${env.POSTGRES_DB:=llamastack}
|
|
240
|
+
user: ${env.POSTGRES_USER:=llamastack}
|
|
241
|
+
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
242
|
+
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
|
|
243
|
+
sql_default:
|
|
244
|
+
type: sql_postgres
|
|
245
|
+
host: ${env.POSTGRES_HOST:=localhost}
|
|
246
|
+
port: ${env.POSTGRES_PORT:=5432}
|
|
247
|
+
db: ${env.POSTGRES_DB:=llamastack}
|
|
248
|
+
user: ${env.POSTGRES_USER:=llamastack}
|
|
249
|
+
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
|
250
|
+
stores:
|
|
251
|
+
metadata:
|
|
252
|
+
namespace: registry
|
|
253
|
+
backend: kv_default
|
|
254
|
+
inference:
|
|
255
|
+
table_name: inference_store
|
|
256
|
+
backend: sql_default
|
|
257
|
+
max_write_queue_size: 10000
|
|
258
|
+
num_writers: 4
|
|
259
|
+
conversations:
|
|
260
|
+
table_name: openai_conversations
|
|
261
|
+
backend: sql_default
|
|
262
|
+
registered_resources:
|
|
263
|
+
models: []
|
|
264
|
+
shields:
|
|
265
|
+
- shield_id: llama-guard
|
|
266
|
+
provider_id: ${env.SAFETY_MODEL:+llama-guard}
|
|
267
|
+
provider_shield_id: ${env.SAFETY_MODEL:=}
|
|
268
|
+
- shield_id: code-scanner
|
|
269
|
+
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
|
|
270
|
+
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
|
|
271
|
+
vector_dbs: []
|
|
272
|
+
datasets: []
|
|
273
|
+
scoring_fns: []
|
|
274
|
+
benchmarks: []
|
|
275
|
+
tool_groups:
|
|
276
|
+
- toolgroup_id: builtin::websearch
|
|
277
|
+
provider_id: tavily-search
|
|
278
|
+
- toolgroup_id: builtin::rag
|
|
279
|
+
provider_id: rag-runtime
|
|
280
|
+
server:
|
|
281
|
+
port: 8321
|
|
282
|
+
telemetry:
|
|
283
|
+
enabled: true
|
|
284
|
+
vector_stores:
|
|
285
|
+
default_provider_id: faiss
|
|
286
|
+
default_embedding_model:
|
|
287
|
+
provider_id: sentence-transformers
|
|
288
|
+
model_id: nomic-ai/nomic-embed-text-v1.5
|
|
@@ -46,8 +46,7 @@ class SentenceTransformerEmbeddingMixin:
|
|
|
46
46
|
raise ValueError("Empty list not supported")
|
|
47
47
|
|
|
48
48
|
# Get the model and generate embeddings
|
|
49
|
-
|
|
50
|
-
embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id)
|
|
49
|
+
embedding_model = await self._load_sentence_transformer_model(params.model)
|
|
51
50
|
embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False)
|
|
52
51
|
|
|
53
52
|
# Convert embeddings to the requested format
|
|
@@ -66,13 +66,6 @@ class InferenceStore:
|
|
|
66
66
|
},
|
|
67
67
|
)
|
|
68
68
|
|
|
69
|
-
if self.enable_write_queue:
|
|
70
|
-
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
|
|
71
|
-
for _ in range(self._num_writers):
|
|
72
|
-
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
|
|
73
|
-
else:
|
|
74
|
-
logger.info("Write queue disabled for SQLite to avoid concurrency issues")
|
|
75
|
-
|
|
76
69
|
async def shutdown(self) -> None:
|
|
77
70
|
if not self._worker_tasks:
|
|
78
71
|
return
|
|
@@ -93,10 +86,29 @@ class InferenceStore:
|
|
|
93
86
|
if self.enable_write_queue and self._queue is not None:
|
|
94
87
|
await self._queue.join()
|
|
95
88
|
|
|
89
|
+
async def _ensure_workers_started(self) -> None:
|
|
90
|
+
"""Ensure the async write queue workers run on the current loop."""
|
|
91
|
+
if not self.enable_write_queue:
|
|
92
|
+
return
|
|
93
|
+
|
|
94
|
+
if self._queue is None:
|
|
95
|
+
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
|
|
96
|
+
logger.debug(
|
|
97
|
+
f"Inference store write queue created with max size {self._max_write_queue_size} "
|
|
98
|
+
f"and {self._num_writers} writers"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if not self._worker_tasks:
|
|
102
|
+
loop = asyncio.get_running_loop()
|
|
103
|
+
for _ in range(self._num_writers):
|
|
104
|
+
task = loop.create_task(self._worker_loop())
|
|
105
|
+
self._worker_tasks.append(task)
|
|
106
|
+
|
|
96
107
|
async def store_chat_completion(
|
|
97
108
|
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
|
|
98
109
|
) -> None:
|
|
99
110
|
if self.enable_write_queue:
|
|
111
|
+
await self._ensure_workers_started()
|
|
100
112
|
if self._queue is None:
|
|
101
113
|
raise ValueError("Inference store is not initialized")
|
|
102
114
|
try:
|
|
@@ -201,8 +201,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|
|
201
201
|
:param model: The registered model name/identifier
|
|
202
202
|
:return: The provider-specific model ID (e.g., "gpt-4")
|
|
203
203
|
"""
|
|
204
|
-
# Look up the registered model to get the provider-specific model ID
|
|
205
204
|
# self.model_store is injected by the distribution system at runtime
|
|
205
|
+
if not await self.model_store.has_model(model): # type: ignore[attr-defined]
|
|
206
|
+
return model
|
|
207
|
+
|
|
208
|
+
# Look up the registered model to get the provider-specific model ID
|
|
206
209
|
model_obj: Model = await self.model_store.get_model(model) # type: ignore[attr-defined]
|
|
207
210
|
# provider_resource_id is str | None, but we expect it to be str for OpenAI calls
|
|
208
211
|
if model_obj.provider_resource_id is None:
|
|
@@ -3,8 +3,6 @@
|
|
|
3
3
|
#
|
|
4
4
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
5
5
|
# the root directory of this source tree.
|
|
6
|
-
import asyncio
|
|
7
|
-
from typing import Any
|
|
8
6
|
|
|
9
7
|
from llama_stack.apis.agents import (
|
|
10
8
|
Order,
|
|
@@ -19,12 +17,12 @@ from llama_stack.apis.agents.openai_responses import (
|
|
|
19
17
|
)
|
|
20
18
|
from llama_stack.apis.inference import OpenAIMessageParam
|
|
21
19
|
from llama_stack.core.datatypes import AccessRule
|
|
22
|
-
from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
|
|
20
|
+
from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
|
|
23
21
|
from llama_stack.log import get_logger
|
|
24
22
|
|
|
25
23
|
from ..sqlstore.api import ColumnDefinition, ColumnType
|
|
26
24
|
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
|
27
|
-
from ..sqlstore.sqlstore import
|
|
25
|
+
from ..sqlstore.sqlstore import sqlstore_impl
|
|
28
26
|
|
|
29
27
|
logger = get_logger(name=__name__, category="openai_responses")
|
|
30
28
|
|
|
@@ -55,28 +53,12 @@ class ResponsesStore:
|
|
|
55
53
|
|
|
56
54
|
self.policy = policy
|
|
57
55
|
self.sql_store = None
|
|
58
|
-
self.enable_write_queue = True
|
|
59
|
-
|
|
60
|
-
# Async write queue and worker control
|
|
61
|
-
self._queue: (
|
|
62
|
-
asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None
|
|
63
|
-
) = None
|
|
64
|
-
self._worker_tasks: list[asyncio.Task[Any]] = []
|
|
65
|
-
self._max_write_queue_size: int = self.reference.max_write_queue_size
|
|
66
|
-
self._num_writers: int = max(1, self.reference.num_writers)
|
|
67
56
|
|
|
68
57
|
async def initialize(self):
|
|
69
58
|
"""Create the necessary tables if they don't exist."""
|
|
70
59
|
base_store = sqlstore_impl(self.reference)
|
|
71
60
|
self.sql_store = AuthorizedSqlStore(base_store, self.policy)
|
|
72
61
|
|
|
73
|
-
backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
|
|
74
|
-
if backend_config is None:
|
|
75
|
-
raise ValueError(
|
|
76
|
-
f"Unregistered SQL backend '{self.reference.backend}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
|
|
77
|
-
)
|
|
78
|
-
if backend_config.type == StorageBackendType.SQL_SQLITE:
|
|
79
|
-
self.enable_write_queue = False
|
|
80
62
|
await self.sql_store.create_table(
|
|
81
63
|
"openai_responses",
|
|
82
64
|
{
|
|
@@ -95,32 +77,12 @@ class ResponsesStore:
|
|
|
95
77
|
},
|
|
96
78
|
)
|
|
97
79
|
|
|
98
|
-
if self.enable_write_queue:
|
|
99
|
-
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
|
|
100
|
-
for _ in range(self._num_writers):
|
|
101
|
-
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
|
|
102
|
-
else:
|
|
103
|
-
logger.debug("Write queue disabled for SQLite to avoid concurrency issues")
|
|
104
|
-
|
|
105
80
|
async def shutdown(self) -> None:
|
|
106
|
-
|
|
107
|
-
return
|
|
108
|
-
if self._queue is not None:
|
|
109
|
-
await self._queue.join()
|
|
110
|
-
for t in self._worker_tasks:
|
|
111
|
-
if not t.done():
|
|
112
|
-
t.cancel()
|
|
113
|
-
for t in self._worker_tasks:
|
|
114
|
-
try:
|
|
115
|
-
await t
|
|
116
|
-
except asyncio.CancelledError:
|
|
117
|
-
pass
|
|
118
|
-
self._worker_tasks.clear()
|
|
81
|
+
return
|
|
119
82
|
|
|
120
83
|
async def flush(self) -> None:
|
|
121
|
-
"""
|
|
122
|
-
|
|
123
|
-
await self._queue.join()
|
|
84
|
+
"""Maintained for compatibility; no-op now that writes are synchronous."""
|
|
85
|
+
return
|
|
124
86
|
|
|
125
87
|
async def store_response_object(
|
|
126
88
|
self,
|
|
@@ -128,31 +90,7 @@ class ResponsesStore:
|
|
|
128
90
|
input: list[OpenAIResponseInput],
|
|
129
91
|
messages: list[OpenAIMessageParam],
|
|
130
92
|
) -> None:
|
|
131
|
-
|
|
132
|
-
if self._queue is None:
|
|
133
|
-
raise ValueError("Responses store is not initialized")
|
|
134
|
-
try:
|
|
135
|
-
self._queue.put_nowait((response_object, input, messages))
|
|
136
|
-
except asyncio.QueueFull:
|
|
137
|
-
logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '<unknown>')}")
|
|
138
|
-
await self._queue.put((response_object, input, messages))
|
|
139
|
-
else:
|
|
140
|
-
await self._write_response_object(response_object, input, messages)
|
|
141
|
-
|
|
142
|
-
async def _worker_loop(self) -> None:
|
|
143
|
-
assert self._queue is not None
|
|
144
|
-
while True:
|
|
145
|
-
try:
|
|
146
|
-
item = await self._queue.get()
|
|
147
|
-
except asyncio.CancelledError:
|
|
148
|
-
break
|
|
149
|
-
response_object, input, messages = item
|
|
150
|
-
try:
|
|
151
|
-
await self._write_response_object(response_object, input, messages)
|
|
152
|
-
except Exception as e: # noqa: BLE001
|
|
153
|
-
logger.error(f"Error writing response object: {e}")
|
|
154
|
-
finally:
|
|
155
|
-
self._queue.task_done()
|
|
93
|
+
await self._write_response_object(response_object, input, messages)
|
|
156
94
|
|
|
157
95
|
async def _write_response_object(
|
|
158
96
|
self,
|