llama-stack 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -35,6 +35,7 @@ from llama_stack.providers.remote.vector_io.pgvector.config import (
35
35
  )
36
36
  from llama_stack.providers.remote.vector_io.qdrant.config import QdrantVectorIOConfig
37
37
  from llama_stack.providers.remote.vector_io.weaviate.config import WeaviateVectorIOConfig
38
+ from llama_stack.providers.utils.kvstore.config import PostgresKVStoreConfig
38
39
  from llama_stack.providers.utils.sqlstore.sqlstore import PostgresSqlStoreConfig
39
40
 
40
41
 
@@ -147,10 +148,11 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
147
148
  BuildProvider(provider_type="inline::reference"),
148
149
  ],
149
150
  }
151
+ files_config = LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}")
150
152
  files_provider = Provider(
151
153
  provider_id="meta-reference-files",
152
154
  provider_type="inline::localfs",
153
- config=LocalfsFilesImplConfig.sample_run_config(f"~/.llama/distributions/{name}"),
155
+ config=files_config,
154
156
  )
155
157
  embedding_provider = Provider(
156
158
  provider_id="sentence-transformers",
@@ -180,6 +182,87 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
180
182
  provider_shield_id="${env.CODE_SCANNER_MODEL:=}",
181
183
  ),
182
184
  ]
185
+ postgres_sql_config = PostgresSqlStoreConfig.sample_run_config()
186
+ postgres_kv_config = PostgresKVStoreConfig.sample_run_config()
187
+ default_overrides = {
188
+ "inference": remote_inference_providers + [embedding_provider],
189
+ "vector_io": [
190
+ Provider(
191
+ provider_id="faiss",
192
+ provider_type="inline::faiss",
193
+ config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
194
+ ),
195
+ Provider(
196
+ provider_id="sqlite-vec",
197
+ provider_type="inline::sqlite-vec",
198
+ config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
199
+ ),
200
+ Provider(
201
+ provider_id="${env.MILVUS_URL:+milvus}",
202
+ provider_type="inline::milvus",
203
+ config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
204
+ ),
205
+ Provider(
206
+ provider_id="${env.CHROMADB_URL:+chromadb}",
207
+ provider_type="remote::chromadb",
208
+ config=ChromaVectorIOConfig.sample_run_config(
209
+ f"~/.llama/distributions/{name}/",
210
+ url="${env.CHROMADB_URL:=}",
211
+ ),
212
+ ),
213
+ Provider(
214
+ provider_id="${env.PGVECTOR_DB:+pgvector}",
215
+ provider_type="remote::pgvector",
216
+ config=PGVectorVectorIOConfig.sample_run_config(
217
+ f"~/.llama/distributions/{name}",
218
+ db="${env.PGVECTOR_DB:=}",
219
+ user="${env.PGVECTOR_USER:=}",
220
+ password="${env.PGVECTOR_PASSWORD:=}",
221
+ ),
222
+ ),
223
+ Provider(
224
+ provider_id="${env.QDRANT_URL:+qdrant}",
225
+ provider_type="remote::qdrant",
226
+ config=QdrantVectorIOConfig.sample_run_config(
227
+ f"~/.llama/distributions/{name}",
228
+ url="${env.QDRANT_URL:=}",
229
+ ),
230
+ ),
231
+ Provider(
232
+ provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
233
+ provider_type="remote::weaviate",
234
+ config=WeaviateVectorIOConfig.sample_run_config(
235
+ f"~/.llama/distributions/{name}",
236
+ cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
237
+ ),
238
+ ),
239
+ ],
240
+ "files": [files_provider],
241
+ }
242
+
243
+ base_run_settings = RunConfigSettings(
244
+ provider_overrides=default_overrides,
245
+ default_models=[],
246
+ default_tool_groups=default_tool_groups,
247
+ default_shields=default_shields,
248
+ vector_stores_config=VectorStoresConfig(
249
+ default_provider_id="faiss",
250
+ default_embedding_model=QualifiedModel(
251
+ provider_id="sentence-transformers",
252
+ model_id="nomic-ai/nomic-embed-text-v1.5",
253
+ ),
254
+ ),
255
+ )
256
+
257
+ postgres_run_settings = base_run_settings.model_copy(
258
+ update={
259
+ "storage_backends": {
260
+ "kv_default": postgres_kv_config,
261
+ "sql_default": postgres_sql_config,
262
+ }
263
+ },
264
+ deep=True,
265
+ )
183
266
 
184
267
  return DistributionTemplate(
185
268
  name=name,
@@ -190,73 +273,8 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
190
273
  providers=providers,
191
274
  additional_pip_packages=PostgresSqlStoreConfig.pip_packages(),
192
275
  run_configs={
193
- "run.yaml": RunConfigSettings(
194
- provider_overrides={
195
- "inference": remote_inference_providers + [embedding_provider],
196
- "vector_io": [
197
- Provider(
198
- provider_id="faiss",
199
- provider_type="inline::faiss",
200
- config=FaissVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
201
- ),
202
- Provider(
203
- provider_id="sqlite-vec",
204
- provider_type="inline::sqlite-vec",
205
- config=SQLiteVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
206
- ),
207
- Provider(
208
- provider_id="${env.MILVUS_URL:+milvus}",
209
- provider_type="inline::milvus",
210
- config=MilvusVectorIOConfig.sample_run_config(f"~/.llama/distributions/{name}"),
211
- ),
212
- Provider(
213
- provider_id="${env.CHROMADB_URL:+chromadb}",
214
- provider_type="remote::chromadb",
215
- config=ChromaVectorIOConfig.sample_run_config(
216
- f"~/.llama/distributions/{name}/",
217
- url="${env.CHROMADB_URL:=}",
218
- ),
219
- ),
220
- Provider(
221
- provider_id="${env.PGVECTOR_DB:+pgvector}",
222
- provider_type="remote::pgvector",
223
- config=PGVectorVectorIOConfig.sample_run_config(
224
- f"~/.llama/distributions/{name}",
225
- db="${env.PGVECTOR_DB:=}",
226
- user="${env.PGVECTOR_USER:=}",
227
- password="${env.PGVECTOR_PASSWORD:=}",
228
- ),
229
- ),
230
- Provider(
231
- provider_id="${env.QDRANT_URL:+qdrant}",
232
- provider_type="remote::qdrant",
233
- config=QdrantVectorIOConfig.sample_run_config(
234
- f"~/.llama/distributions/{name}",
235
- url="${env.QDRANT_URL:=}",
236
- ),
237
- ),
238
- Provider(
239
- provider_id="${env.WEAVIATE_CLUSTER_URL:+weaviate}",
240
- provider_type="remote::weaviate",
241
- config=WeaviateVectorIOConfig.sample_run_config(
242
- f"~/.llama/distributions/{name}",
243
- cluster_url="${env.WEAVIATE_CLUSTER_URL:=}",
244
- ),
245
- ),
246
- ],
247
- "files": [files_provider],
248
- },
249
- default_models=[],
250
- default_tool_groups=default_tool_groups,
251
- default_shields=default_shields,
252
- vector_stores_config=VectorStoresConfig(
253
- default_provider_id="faiss",
254
- default_embedding_model=QualifiedModel(
255
- provider_id="sentence-transformers",
256
- model_id="nomic-ai/nomic-embed-text-v1.5",
257
- ),
258
- ),
259
- ),
276
+ "run.yaml": base_run_settings,
277
+ "run-with-postgres-store.yaml": postgres_run_settings,
260
278
  },
261
279
  run_config_env_vars={
262
280
  "LLAMA_STACK_PORT": (
@@ -57,4 +57,5 @@ image_type: venv
57
57
  additional_pip_packages:
58
58
  - aiosqlite
59
59
  - asyncpg
60
+ - psycopg2-binary
60
61
  - sqlalchemy[asyncio]
@@ -0,0 +1,288 @@
1
+ version: 2
2
+ image_name: starter-gpu
3
+ apis:
4
+ - agents
5
+ - batches
6
+ - datasetio
7
+ - eval
8
+ - files
9
+ - inference
10
+ - post_training
11
+ - safety
12
+ - scoring
13
+ - tool_runtime
14
+ - vector_io
15
+ providers:
16
+ inference:
17
+ - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
18
+ provider_type: remote::cerebras
19
+ config:
20
+ base_url: https://api.cerebras.ai
21
+ api_key: ${env.CEREBRAS_API_KEY:=}
22
+ - provider_id: ${env.OLLAMA_URL:+ollama}
23
+ provider_type: remote::ollama
24
+ config:
25
+ url: ${env.OLLAMA_URL:=http://localhost:11434}
26
+ - provider_id: ${env.VLLM_URL:+vllm}
27
+ provider_type: remote::vllm
28
+ config:
29
+ url: ${env.VLLM_URL:=}
30
+ max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
31
+ api_token: ${env.VLLM_API_TOKEN:=fake}
32
+ tls_verify: ${env.VLLM_TLS_VERIFY:=true}
33
+ - provider_id: ${env.TGI_URL:+tgi}
34
+ provider_type: remote::tgi
35
+ config:
36
+ url: ${env.TGI_URL:=}
37
+ - provider_id: fireworks
38
+ provider_type: remote::fireworks
39
+ config:
40
+ url: https://api.fireworks.ai/inference/v1
41
+ api_key: ${env.FIREWORKS_API_KEY:=}
42
+ - provider_id: together
43
+ provider_type: remote::together
44
+ config:
45
+ url: https://api.together.xyz/v1
46
+ api_key: ${env.TOGETHER_API_KEY:=}
47
+ - provider_id: bedrock
48
+ provider_type: remote::bedrock
49
+ - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
50
+ provider_type: remote::nvidia
51
+ config:
52
+ url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
53
+ api_key: ${env.NVIDIA_API_KEY:=}
54
+ append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
55
+ - provider_id: openai
56
+ provider_type: remote::openai
57
+ config:
58
+ api_key: ${env.OPENAI_API_KEY:=}
59
+ base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
60
+ - provider_id: anthropic
61
+ provider_type: remote::anthropic
62
+ config:
63
+ api_key: ${env.ANTHROPIC_API_KEY:=}
64
+ - provider_id: gemini
65
+ provider_type: remote::gemini
66
+ config:
67
+ api_key: ${env.GEMINI_API_KEY:=}
68
+ - provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
69
+ provider_type: remote::vertexai
70
+ config:
71
+ project: ${env.VERTEX_AI_PROJECT:=}
72
+ location: ${env.VERTEX_AI_LOCATION:=us-central1}
73
+ - provider_id: groq
74
+ provider_type: remote::groq
75
+ config:
76
+ url: https://api.groq.com
77
+ api_key: ${env.GROQ_API_KEY:=}
78
+ - provider_id: sambanova
79
+ provider_type: remote::sambanova
80
+ config:
81
+ url: https://api.sambanova.ai/v1
82
+ api_key: ${env.SAMBANOVA_API_KEY:=}
83
+ - provider_id: ${env.AZURE_API_KEY:+azure}
84
+ provider_type: remote::azure
85
+ config:
86
+ api_key: ${env.AZURE_API_KEY:=}
87
+ api_base: ${env.AZURE_API_BASE:=}
88
+ api_version: ${env.AZURE_API_VERSION:=}
89
+ api_type: ${env.AZURE_API_TYPE:=}
90
+ - provider_id: sentence-transformers
91
+ provider_type: inline::sentence-transformers
92
+ vector_io:
93
+ - provider_id: faiss
94
+ provider_type: inline::faiss
95
+ config:
96
+ persistence:
97
+ namespace: vector_io::faiss
98
+ backend: kv_default
99
+ - provider_id: sqlite-vec
100
+ provider_type: inline::sqlite-vec
101
+ config:
102
+ db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
103
+ persistence:
104
+ namespace: vector_io::sqlite_vec
105
+ backend: kv_default
106
+ - provider_id: ${env.MILVUS_URL:+milvus}
107
+ provider_type: inline::milvus
108
+ config:
109
+ db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
110
+ persistence:
111
+ namespace: vector_io::milvus
112
+ backend: kv_default
113
+ - provider_id: ${env.CHROMADB_URL:+chromadb}
114
+ provider_type: remote::chromadb
115
+ config:
116
+ url: ${env.CHROMADB_URL:=}
117
+ persistence:
118
+ namespace: vector_io::chroma_remote
119
+ backend: kv_default
120
+ - provider_id: ${env.PGVECTOR_DB:+pgvector}
121
+ provider_type: remote::pgvector
122
+ config:
123
+ host: ${env.PGVECTOR_HOST:=localhost}
124
+ port: ${env.PGVECTOR_PORT:=5432}
125
+ db: ${env.PGVECTOR_DB:=}
126
+ user: ${env.PGVECTOR_USER:=}
127
+ password: ${env.PGVECTOR_PASSWORD:=}
128
+ persistence:
129
+ namespace: vector_io::pgvector
130
+ backend: kv_default
131
+ - provider_id: ${env.QDRANT_URL:+qdrant}
132
+ provider_type: remote::qdrant
133
+ config:
134
+ api_key: ${env.QDRANT_API_KEY:=}
135
+ persistence:
136
+ namespace: vector_io::qdrant_remote
137
+ backend: kv_default
138
+ - provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
139
+ provider_type: remote::weaviate
140
+ config:
141
+ weaviate_api_key: null
142
+ weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
143
+ persistence:
144
+ namespace: vector_io::weaviate
145
+ backend: kv_default
146
+ files:
147
+ - provider_id: meta-reference-files
148
+ provider_type: inline::localfs
149
+ config:
150
+ storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
151
+ metadata_store:
152
+ table_name: files_metadata
153
+ backend: sql_default
154
+ safety:
155
+ - provider_id: llama-guard
156
+ provider_type: inline::llama-guard
157
+ config:
158
+ excluded_categories: []
159
+ - provider_id: code-scanner
160
+ provider_type: inline::code-scanner
161
+ agents:
162
+ - provider_id: meta-reference
163
+ provider_type: inline::meta-reference
164
+ config:
165
+ persistence:
166
+ agent_state:
167
+ namespace: agents
168
+ backend: kv_default
169
+ responses:
170
+ table_name: responses
171
+ backend: sql_default
172
+ max_write_queue_size: 10000
173
+ num_writers: 4
174
+ post_training:
175
+ - provider_id: huggingface-gpu
176
+ provider_type: inline::huggingface-gpu
177
+ config:
178
+ checkpoint_format: huggingface
179
+ distributed_backend: null
180
+ device: cpu
181
+ dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
182
+ eval:
183
+ - provider_id: meta-reference
184
+ provider_type: inline::meta-reference
185
+ config:
186
+ kvstore:
187
+ namespace: eval
188
+ backend: kv_default
189
+ datasetio:
190
+ - provider_id: huggingface
191
+ provider_type: remote::huggingface
192
+ config:
193
+ kvstore:
194
+ namespace: datasetio::huggingface
195
+ backend: kv_default
196
+ - provider_id: localfs
197
+ provider_type: inline::localfs
198
+ config:
199
+ kvstore:
200
+ namespace: datasetio::localfs
201
+ backend: kv_default
202
+ scoring:
203
+ - provider_id: basic
204
+ provider_type: inline::basic
205
+ - provider_id: llm-as-judge
206
+ provider_type: inline::llm-as-judge
207
+ - provider_id: braintrust
208
+ provider_type: inline::braintrust
209
+ config:
210
+ openai_api_key: ${env.OPENAI_API_KEY:=}
211
+ tool_runtime:
212
+ - provider_id: brave-search
213
+ provider_type: remote::brave-search
214
+ config:
215
+ api_key: ${env.BRAVE_SEARCH_API_KEY:=}
216
+ max_results: 3
217
+ - provider_id: tavily-search
218
+ provider_type: remote::tavily-search
219
+ config:
220
+ api_key: ${env.TAVILY_SEARCH_API_KEY:=}
221
+ max_results: 3
222
+ - provider_id: rag-runtime
223
+ provider_type: inline::rag-runtime
224
+ - provider_id: model-context-protocol
225
+ provider_type: remote::model-context-protocol
226
+ batches:
227
+ - provider_id: reference
228
+ provider_type: inline::reference
229
+ config:
230
+ kvstore:
231
+ namespace: batches
232
+ backend: kv_default
233
+ storage:
234
+ backends:
235
+ kv_default:
236
+ type: kv_postgres
237
+ host: ${env.POSTGRES_HOST:=localhost}
238
+ port: ${env.POSTGRES_PORT:=5432}
239
+ db: ${env.POSTGRES_DB:=llamastack}
240
+ user: ${env.POSTGRES_USER:=llamastack}
241
+ password: ${env.POSTGRES_PASSWORD:=llamastack}
242
+ table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
243
+ sql_default:
244
+ type: sql_postgres
245
+ host: ${env.POSTGRES_HOST:=localhost}
246
+ port: ${env.POSTGRES_PORT:=5432}
247
+ db: ${env.POSTGRES_DB:=llamastack}
248
+ user: ${env.POSTGRES_USER:=llamastack}
249
+ password: ${env.POSTGRES_PASSWORD:=llamastack}
250
+ stores:
251
+ metadata:
252
+ namespace: registry
253
+ backend: kv_default
254
+ inference:
255
+ table_name: inference_store
256
+ backend: sql_default
257
+ max_write_queue_size: 10000
258
+ num_writers: 4
259
+ conversations:
260
+ table_name: openai_conversations
261
+ backend: sql_default
262
+ registered_resources:
263
+ models: []
264
+ shields:
265
+ - shield_id: llama-guard
266
+ provider_id: ${env.SAFETY_MODEL:+llama-guard}
267
+ provider_shield_id: ${env.SAFETY_MODEL:=}
268
+ - shield_id: code-scanner
269
+ provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
270
+ provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
271
+ vector_dbs: []
272
+ datasets: []
273
+ scoring_fns: []
274
+ benchmarks: []
275
+ tool_groups:
276
+ - toolgroup_id: builtin::websearch
277
+ provider_id: tavily-search
278
+ - toolgroup_id: builtin::rag
279
+ provider_id: rag-runtime
280
+ server:
281
+ port: 8321
282
+ telemetry:
283
+ enabled: true
284
+ vector_stores:
285
+ default_provider_id: faiss
286
+ default_embedding_model:
287
+ provider_id: sentence-transformers
288
+ model_id: nomic-ai/nomic-embed-text-v1.5
@@ -46,8 +46,7 @@ class SentenceTransformerEmbeddingMixin:
46
46
  raise ValueError("Empty list not supported")
47
47
 
48
48
  # Get the model and generate embeddings
49
- model_obj = await self.model_store.get_model(params.model)
50
- embedding_model = await self._load_sentence_transformer_model(model_obj.provider_resource_id)
49
+ embedding_model = await self._load_sentence_transformer_model(params.model)
51
50
  embeddings = await asyncio.to_thread(embedding_model.encode, input_list, show_progress_bar=False)
52
51
 
53
52
  # Convert embeddings to the requested format
@@ -66,13 +66,6 @@ class InferenceStore:
66
66
  },
67
67
  )
68
68
 
69
- if self.enable_write_queue:
70
- self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
71
- for _ in range(self._num_writers):
72
- self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
73
- else:
74
- logger.info("Write queue disabled for SQLite to avoid concurrency issues")
75
-
76
69
  async def shutdown(self) -> None:
77
70
  if not self._worker_tasks:
78
71
  return
@@ -93,10 +86,29 @@ class InferenceStore:
93
86
  if self.enable_write_queue and self._queue is not None:
94
87
  await self._queue.join()
95
88
 
89
+ async def _ensure_workers_started(self) -> None:
90
+ """Ensure the async write queue workers run on the current loop."""
91
+ if not self.enable_write_queue:
92
+ return
93
+
94
+ if self._queue is None:
95
+ self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
96
+ logger.debug(
97
+ f"Inference store write queue created with max size {self._max_write_queue_size} "
98
+ f"and {self._num_writers} writers"
99
+ )
100
+
101
+ if not self._worker_tasks:
102
+ loop = asyncio.get_running_loop()
103
+ for _ in range(self._num_writers):
104
+ task = loop.create_task(self._worker_loop())
105
+ self._worker_tasks.append(task)
106
+
96
107
  async def store_chat_completion(
97
108
  self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
98
109
  ) -> None:
99
110
  if self.enable_write_queue:
111
+ await self._ensure_workers_started()
100
112
  if self._queue is None:
101
113
  raise ValueError("Inference store is not initialized")
102
114
  try:
@@ -201,8 +201,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
201
201
  :param model: The registered model name/identifier
202
202
  :return: The provider-specific model ID (e.g., "gpt-4")
203
203
  """
204
- # Look up the registered model to get the provider-specific model ID
205
204
  # self.model_store is injected by the distribution system at runtime
205
+ if not await self.model_store.has_model(model): # type: ignore[attr-defined]
206
+ return model
207
+
208
+ # Look up the registered model to get the provider-specific model ID
206
209
  model_obj: Model = await self.model_store.get_model(model) # type: ignore[attr-defined]
207
210
  # provider_resource_id is str | None, but we expect it to be str for OpenAI calls
208
211
  if model_obj.provider_resource_id is None:
@@ -3,8 +3,6 @@
3
3
  #
4
4
  # This source code is licensed under the terms described in the LICENSE file in
5
5
  # the root directory of this source tree.
6
- import asyncio
7
- from typing import Any
8
6
 
9
7
  from llama_stack.apis.agents import (
10
8
  Order,
@@ -19,12 +17,12 @@ from llama_stack.apis.agents.openai_responses import (
19
17
  )
20
18
  from llama_stack.apis.inference import OpenAIMessageParam
21
19
  from llama_stack.core.datatypes import AccessRule
22
- from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference, StorageBackendType
20
+ from llama_stack.core.storage.datatypes import ResponsesStoreReference, SqlStoreReference
23
21
  from llama_stack.log import get_logger
24
22
 
25
23
  from ..sqlstore.api import ColumnDefinition, ColumnType
26
24
  from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
27
- from ..sqlstore.sqlstore import _SQLSTORE_BACKENDS, sqlstore_impl
25
+ from ..sqlstore.sqlstore import sqlstore_impl
28
26
 
29
27
  logger = get_logger(name=__name__, category="openai_responses")
30
28
 
@@ -55,28 +53,12 @@ class ResponsesStore:
55
53
 
56
54
  self.policy = policy
57
55
  self.sql_store = None
58
- self.enable_write_queue = True
59
-
60
- # Async write queue and worker control
61
- self._queue: (
62
- asyncio.Queue[tuple[OpenAIResponseObject, list[OpenAIResponseInput], list[OpenAIMessageParam]]] | None
63
- ) = None
64
- self._worker_tasks: list[asyncio.Task[Any]] = []
65
- self._max_write_queue_size: int = self.reference.max_write_queue_size
66
- self._num_writers: int = max(1, self.reference.num_writers)
67
56
 
68
57
  async def initialize(self):
69
58
  """Create the necessary tables if they don't exist."""
70
59
  base_store = sqlstore_impl(self.reference)
71
60
  self.sql_store = AuthorizedSqlStore(base_store, self.policy)
72
61
 
73
- backend_config = _SQLSTORE_BACKENDS.get(self.reference.backend)
74
- if backend_config is None:
75
- raise ValueError(
76
- f"Unregistered SQL backend '{self.reference.backend}'. Registered backends: {sorted(_SQLSTORE_BACKENDS)}"
77
- )
78
- if backend_config.type == StorageBackendType.SQL_SQLITE:
79
- self.enable_write_queue = False
80
62
  await self.sql_store.create_table(
81
63
  "openai_responses",
82
64
  {
@@ -95,32 +77,12 @@ class ResponsesStore:
95
77
  },
96
78
  )
97
79
 
98
- if self.enable_write_queue:
99
- self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
100
- for _ in range(self._num_writers):
101
- self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
102
- else:
103
- logger.debug("Write queue disabled for SQLite to avoid concurrency issues")
104
-
105
80
  async def shutdown(self) -> None:
106
- if not self._worker_tasks:
107
- return
108
- if self._queue is not None:
109
- await self._queue.join()
110
- for t in self._worker_tasks:
111
- if not t.done():
112
- t.cancel()
113
- for t in self._worker_tasks:
114
- try:
115
- await t
116
- except asyncio.CancelledError:
117
- pass
118
- self._worker_tasks.clear()
81
+ return
119
82
 
120
83
  async def flush(self) -> None:
121
- """Wait for all queued writes to complete. Useful for testing."""
122
- if self.enable_write_queue and self._queue is not None:
123
- await self._queue.join()
84
+ """Maintained for compatibility; no-op now that writes are synchronous."""
85
+ return
124
86
 
125
87
  async def store_response_object(
126
88
  self,
@@ -128,31 +90,7 @@ class ResponsesStore:
128
90
  input: list[OpenAIResponseInput],
129
91
  messages: list[OpenAIMessageParam],
130
92
  ) -> None:
131
- if self.enable_write_queue:
132
- if self._queue is None:
133
- raise ValueError("Responses store is not initialized")
134
- try:
135
- self._queue.put_nowait((response_object, input, messages))
136
- except asyncio.QueueFull:
137
- logger.warning(f"Write queue full; adding response id={getattr(response_object, 'id', '<unknown>')}")
138
- await self._queue.put((response_object, input, messages))
139
- else:
140
- await self._write_response_object(response_object, input, messages)
141
-
142
- async def _worker_loop(self) -> None:
143
- assert self._queue is not None
144
- while True:
145
- try:
146
- item = await self._queue.get()
147
- except asyncio.CancelledError:
148
- break
149
- response_object, input, messages = item
150
- try:
151
- await self._write_response_object(response_object, input, messages)
152
- except Exception as e: # noqa: BLE001
153
- logger.error(f"Error writing response object: {e}")
154
- finally:
155
- self._queue.task_done()
93
+ await self._write_response_object(response_object, input, messages)
156
94
 
157
95
  async def _write_response_object(
158
96
  self,