autobatcher 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ release:
5
+ types: [published]
6
+
7
+ permissions:
8
+ id-token: write # Required for trusted publishing
9
+
10
+ jobs:
11
+ publish:
12
+ runs-on: ubuntu-latest
13
+ environment:
14
+ name: pypi
15
+ url: https://pypi.org/p/autobatcher
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+
19
+ - name: Set up Python
20
+ uses: actions/setup-python@v5
21
+ with:
22
+ python-version: "3.12"
23
+
24
+ - name: Install build dependencies
25
+ run: pip install build
26
+
27
+ - name: Build package
28
+ run: python -m build
29
+
30
+ - name: Publish to PyPI
31
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,24 @@
1
+ name: Release Please
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+
8
+ permissions:
9
+ contents: write
10
+ pull-requests: write
11
+
12
+ jobs:
13
+ release-please:
14
+ runs-on: ubuntu-latest
15
+ outputs:
16
+ release_created: ${{ steps.release.outputs.release_created }}
17
+ tag_name: ${{ steps.release.outputs.tag_name }}
18
+ steps:
19
+ - uses: googleapis/release-please-action@v4
20
+ id: release
21
+ with:
22
+ token: ${{ secrets.RELEASE_PLEASE_TOKEN }}
23
+ manifest-file: .release-please-manifest.json
24
+ config-file: release-please-config.json
@@ -0,0 +1,37 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Distribution / packaging
7
+ build/
8
+ dist/
9
+ *.egg-info/
10
+ *.egg
11
+
12
+ # Virtual environments
13
+ .venv/
14
+ venv/
15
+ ENV/
16
+
17
+ # IDE
18
+ .idea/
19
+ .vscode/
20
+ *.swp
21
+ *.swo
22
+
23
+ # Testing
24
+ .pytest_cache/
25
+ .coverage
26
+ htmlcov/
27
+
28
+ # mypy
29
+ .mypy_cache/
30
+
31
+ # Environment
32
+ .env
33
+ .env.local
34
+
35
+ # OS
36
+ .DS_Store
37
+ Thumbs.db
@@ -0,0 +1,3 @@
1
+ {
2
+ ".": "0.1.1"
3
+ }
@@ -0,0 +1,13 @@
1
+ # Changelog
2
+
3
+ ## [0.1.1](https://github.com/doublewordai/autobatcher/compare/autobatcher-v0.1.0...autobatcher-v0.1.1) (2026-01-05)
4
+
5
+
6
+ ### Features
7
+
8
+ * add release-please and PyPI trusted publishing workflows ([aea341d](https://github.com/doublewordai/autobatcher/commit/aea341d57d6fbee30f186547691bf6d1ff08f69d))
9
+
10
+
11
+ ### Documentation
12
+
13
+ * update readme ([64d8d24](https://github.com/doublewordai/autobatcher/commit/64d8d24640e18aa456d50a9a7b25d091bb7332a5))
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Doubleword AI
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,148 @@
1
+ Metadata-Version: 2.4
2
+ Name: autobatcher
3
+ Version: 0.1.1
4
+ Summary: Drop-in AsyncOpenAI replacement that transparently batches requests using the batch API
5
+ Project-URL: Homepage, https://github.com/doublewordai/autobatcher
6
+ Project-URL: Repository, https://github.com/doublewordai/autobatcher
7
+ Project-URL: Issues, https://github.com/doublewordai/autobatcher/issues
8
+ Author-email: Doubleword AI <hello@doubleword.ai>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: api,async,batch,inference,llm,openai
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Typing :: Typed
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: httpx>=0.25.0
24
+ Requires-Dist: loguru>=0.7.0
25
+ Requires-Dist: openai>=1.0.0
26
+ Description-Content-Type: text/markdown
27
+
28
+ # autobatcher
29
+
30
+ Drop-in replacement for `AsyncOpenAI` that transparently batches requests using
31
+ OpenAI's (or compatible) [Batch
32
+ API](https://platform.openai.com/docs/guides/batch).
33
+
34
+ ## Why?
35
+
36
+ Batch LLM APIs (like OpenAI's) offers 50% cost savings, but requires you to
37
+ restructure your code around file uploads and polling. **autobatcher** lets you
38
+ keep your existing async code while getting batch pricing automatically.
39
+
40
+ ```python
41
+ # Before: regular async calls (full price)
42
+ from openai import AsyncOpenAI
43
+ client = AsyncOpenAI()
44
+
45
+ # After: batched calls (50% off)
46
+ from autobatcher import BatchOpenAI
47
+ client = BatchOpenAI()
48
+
49
+ # Same interface, same code
50
+ response = await client.chat.completions.create(
51
+ model="gpt-4o",
52
+ messages=[{"role": "user", "content": "Hello!"}]
53
+ )
54
+ ```
55
+
56
+ ## How it works
57
+
58
+ 1. Requests are collected over a configurable time window (default: 1 second)
59
+ 2. When the window closes or batch size is reached, requests are submitted as a batch
60
+ 3. Results are polled and returned to waiting callers as they complete
61
+ 4. Your code sees normal `ChatCompletion` responses
62
+
63
+ ## Installation
64
+
65
+ ```bash
66
+ pip install autobatcher
67
+ ```
68
+
69
+ ## Usage
70
+
71
+ ```python
72
+ import asyncio
73
+ from autobatcher import BatchOpenAI
74
+
75
+ async def main():
76
+ client = BatchOpenAI(
77
+ api_key="sk-...", # or set OPENAI_API_KEY env var
78
+ batch_size=100, # submit batch when this many requests queued
79
+ batch_window_seconds=1.0, # or after this many seconds
80
+ poll_interval_seconds=5.0, # how often to check for results
81
+ )
82
+
83
+ # Use exactly like AsyncOpenAI
84
+ response = await client.chat.completions.create(
85
+ model="gpt-4o",
86
+ messages=[{"role": "user", "content": "What is 2+2?"}],
87
+ )
88
+ print(response.choices[0].message.content)
89
+
90
+ await client.close()
91
+
92
+ asyncio.run(main())
93
+ ```
94
+
95
+ ### Parallel requests
96
+
97
+ The real power comes when you have many requests:
98
+
99
+ ```python
100
+ async def process_many(prompts: list[str]) -> list[str]:
101
+ client = BatchOpenAI(batch_size=50, batch_window_seconds=2.0)
102
+
103
+ async def get_response(prompt: str) -> str:
104
+ response = await client.chat.completions.create(
105
+ model="gpt-4o-mini",
106
+ messages=[{"role": "user", "content": prompt}],
107
+ )
108
+ return response.choices[0].message.content
109
+
110
+ # All requests are batched together automatically
111
+ results = await asyncio.gather(*[get_response(p) for p in prompts])
112
+
113
+ await client.close()
114
+ return results
115
+ ```
116
+
117
+ ### Context manager
118
+
119
+ ```python
120
+ async with BatchOpenAI() as client:
121
+ response = await client.chat.completions.create(...)
122
+ ```
123
+
124
+ ## Configuration
125
+
126
+ | Parameter | Default | Description |
127
+ |-----------|---------|-------------|
128
+ | `api_key` | `None` | OpenAI API key (falls back to `OPENAI_API_KEY` env var) |
129
+ | `base_url` | `None` | API base URL (for proxies or compatible APIs) |
130
+ | `batch_size` | `100` | Submit batch when this many requests are queued |
131
+ | `batch_window_seconds` | `1.0` | Submit batch after this many seconds |
132
+ | `poll_interval_seconds` | `5.0` | How often to poll for batch completion |
133
+ | `completion_window` | `"24h"` | Batch completion window (`"24h"` or `"1h"`) |
134
+
135
+ ## Limitations
136
+
137
+ - Only `chat.completions.create` is supported for now
138
+ - Batch API has a 24-hour completion window by default
139
+ - No escalations when the completion window elapses
140
+ - Not suitable for real-time/interactive use cases
141
+ - This library is designed or use with the [Doubleword batched
142
+ API](https://docs.doubleword.ai/batches/getting-started-with-batched-api).
143
+ Support for OpenAI's batch API or other compatible APIs is best effort. If you
144
+ experience any issues, please open an issue.
145
+
146
+ ## License
147
+
148
+ MIT
@@ -0,0 +1,121 @@
1
+ # autobatcher
2
+
3
+ Drop-in replacement for `AsyncOpenAI` that transparently batches requests using
4
+ OpenAI's (or compatible) [Batch
5
+ API](https://platform.openai.com/docs/guides/batch).
6
+
7
+ ## Why?
8
+
9
+ Batch LLM APIs (like OpenAI's) offers 50% cost savings, but requires you to
10
+ restructure your code around file uploads and polling. **autobatcher** lets you
11
+ keep your existing async code while getting batch pricing automatically.
12
+
13
+ ```python
14
+ # Before: regular async calls (full price)
15
+ from openai import AsyncOpenAI
16
+ client = AsyncOpenAI()
17
+
18
+ # After: batched calls (50% off)
19
+ from autobatcher import BatchOpenAI
20
+ client = BatchOpenAI()
21
+
22
+ # Same interface, same code
23
+ response = await client.chat.completions.create(
24
+ model="gpt-4o",
25
+ messages=[{"role": "user", "content": "Hello!"}]
26
+ )
27
+ ```
28
+
29
+ ## How it works
30
+
31
+ 1. Requests are collected over a configurable time window (default: 1 second)
32
+ 2. When the window closes or batch size is reached, requests are submitted as a batch
33
+ 3. Results are polled and returned to waiting callers as they complete
34
+ 4. Your code sees normal `ChatCompletion` responses
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install autobatcher
40
+ ```
41
+
42
+ ## Usage
43
+
44
+ ```python
45
+ import asyncio
46
+ from autobatcher import BatchOpenAI
47
+
48
+ async def main():
49
+ client = BatchOpenAI(
50
+ api_key="sk-...", # or set OPENAI_API_KEY env var
51
+ batch_size=100, # submit batch when this many requests queued
52
+ batch_window_seconds=1.0, # or after this many seconds
53
+ poll_interval_seconds=5.0, # how often to check for results
54
+ )
55
+
56
+ # Use exactly like AsyncOpenAI
57
+ response = await client.chat.completions.create(
58
+ model="gpt-4o",
59
+ messages=[{"role": "user", "content": "What is 2+2?"}],
60
+ )
61
+ print(response.choices[0].message.content)
62
+
63
+ await client.close()
64
+
65
+ asyncio.run(main())
66
+ ```
67
+
68
+ ### Parallel requests
69
+
70
+ The real power comes when you have many requests:
71
+
72
+ ```python
73
+ async def process_many(prompts: list[str]) -> list[str]:
74
+ client = BatchOpenAI(batch_size=50, batch_window_seconds=2.0)
75
+
76
+ async def get_response(prompt: str) -> str:
77
+ response = await client.chat.completions.create(
78
+ model="gpt-4o-mini",
79
+ messages=[{"role": "user", "content": prompt}],
80
+ )
81
+ return response.choices[0].message.content
82
+
83
+ # All requests are batched together automatically
84
+ results = await asyncio.gather(*[get_response(p) for p in prompts])
85
+
86
+ await client.close()
87
+ return results
88
+ ```
89
+
90
+ ### Context manager
91
+
92
+ ```python
93
+ async with BatchOpenAI() as client:
94
+ response = await client.chat.completions.create(...)
95
+ ```
96
+
97
+ ## Configuration
98
+
99
+ | Parameter | Default | Description |
100
+ |-----------|---------|-------------|
101
+ | `api_key` | `None` | OpenAI API key (falls back to `OPENAI_API_KEY` env var) |
102
+ | `base_url` | `None` | API base URL (for proxies or compatible APIs) |
103
+ | `batch_size` | `100` | Submit batch when this many requests are queued |
104
+ | `batch_window_seconds` | `1.0` | Submit batch after this many seconds |
105
+ | `poll_interval_seconds` | `5.0` | How often to poll for batch completion |
106
+ | `completion_window` | `"24h"` | Batch completion window (`"24h"` or `"1h"`) |
107
+
108
+ ## Limitations
109
+
110
+ - Only `chat.completions.create` is supported for now
111
+ - Batch API has a 24-hour completion window by default
112
+ - No escalations when the completion window elapses
113
+ - Not suitable for real-time/interactive use cases
114
+ - This library is designed or use with the [Doubleword batched
115
+ API](https://docs.doubleword.ai/batches/getting-started-with-batched-api).
116
+ Support for OpenAI's batch API or other compatible APIs is best effort. If you
117
+ experience any issues, please open an issue.
118
+
119
+ ## License
120
+
121
+ MIT
@@ -0,0 +1,40 @@
1
+ [project]
2
+ name = "autobatcher"
3
+ version = "0.1.1"
4
+ description = "Drop-in AsyncOpenAI replacement that transparently batches requests using the batch API"
5
+ readme = "README.md"
6
+ license = "MIT"
7
+ requires-python = ">=3.10"
8
+ authors = [
9
+ { name = "Doubleword AI", email = "hello@doubleword.ai" }
10
+ ]
11
+ keywords = ["openai", "batch", "llm", "async", "api", "inference"]
12
+ classifiers = [
13
+ "Development Status :: 4 - Beta",
14
+ "Intended Audience :: Developers",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.10",
18
+ "Programming Language :: Python :: 3.11",
19
+ "Programming Language :: Python :: 3.12",
20
+ "Programming Language :: Python :: 3.13",
21
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
22
+ "Typing :: Typed",
23
+ ]
24
+ dependencies = [
25
+ "openai>=1.0.0",
26
+ "httpx>=0.25.0",
27
+ "loguru>=0.7.0",
28
+ ]
29
+
30
+ [project.urls]
31
+ Homepage = "https://github.com/doublewordai/autobatcher"
32
+ Repository = "https://github.com/doublewordai/autobatcher"
33
+ Issues = "https://github.com/doublewordai/autobatcher/issues"
34
+
35
+ [build-system]
36
+ requires = ["hatchling"]
37
+ build-backend = "hatchling.build"
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["src/autobatcher"]
@@ -0,0 +1,13 @@
1
+ {
2
+ "packages": {
3
+ ".": {
4
+ "release-type": "python",
5
+ "package-name": "autobatcher",
6
+ "bump-minor-pre-major": true,
7
+ "bump-patch-for-minor-pre-major": true,
8
+ "extra-files": [
9
+ "src/autobatcher/__init__.py"
10
+ ]
11
+ }
12
+ }
13
+ }
@@ -0,0 +1,17 @@
1
+ """
2
+ Autobatcher: Drop-in AsyncOpenAI replacement that transparently batches requests.
3
+
4
+ Usage:
5
+ from autobatcher import BatchOpenAI
6
+
7
+ client = BatchOpenAI(api_key="...")
8
+ response = await client.chat.completions.create(
9
+ model="gpt-4o",
10
+ messages=[{"role": "user", "content": "Hello!"}]
11
+ )
12
+ """
13
+
14
+ from .client import BatchOpenAI
15
+
16
+ __version__ = "0.1.1"
17
+ __all__ = ["BatchOpenAI"]
@@ -0,0 +1,446 @@
1
+ """
2
+ BatchOpenAI: A drop-in replacement for AsyncOpenAI that uses the batch API.
3
+
4
+ Collects requests over a time window or until a size threshold, submits them
5
+ as a batch, polls for results, and returns them to waiting callers.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import json
12
+ import io
13
+ import uuid
14
+ import time
15
+ from dataclasses import dataclass
16
+ from typing import Any, Literal
17
+
18
+ import httpx
19
+ from loguru import logger
20
+ from openai import AsyncOpenAI
21
+ from openai.types.chat import ChatCompletion
22
+
23
+
24
+ @dataclass
25
+ class _PendingRequest:
26
+ """A request waiting to be batched."""
27
+
28
+ custom_id: str
29
+ params: dict[str, Any]
30
+ future: asyncio.Future[ChatCompletion]
31
+
32
+
33
+ @dataclass
34
+ class _ActiveBatch:
35
+ """A batch that has been submitted and is being polled."""
36
+
37
+ batch_id: str
38
+ output_file_id: str
39
+ error_file_id: str
40
+ requests: dict[str, _PendingRequest] # custom_id -> request
41
+ created_at: float
42
+ last_offset: int = 0 # Track offset for partial result streaming
43
+
44
+
45
+ class _ChatCompletions:
46
+ """Proxy for chat.completions that batches requests."""
47
+
48
+ def __init__(self, client: BatchOpenAI):
49
+ self._client = client
50
+
51
+ async def create(
52
+ self,
53
+ *,
54
+ model: str,
55
+ messages: list[dict[str, Any]],
56
+ **kwargs: Any,
57
+ ) -> ChatCompletion:
58
+ """
59
+ Create a chat completion. The request is queued and batched.
60
+
61
+ Returns when the batch completes and results are available.
62
+ """
63
+ return await self._client._enqueue_request(
64
+ model=model,
65
+ messages=messages,
66
+ **kwargs,
67
+ )
68
+
69
+
70
+ class _Chat:
71
+ """Proxy for chat namespace."""
72
+
73
+ def __init__(self, client: BatchOpenAI):
74
+ self.completions = _ChatCompletions(client)
75
+
76
+
77
+ class BatchOpenAI:
78
+ """
79
+ Drop-in replacement for AsyncOpenAI that uses the batch API.
80
+
81
+ Requests are collected and submitted as batches based on size and time
82
+ thresholds. Results are polled and returned to waiting callers.
83
+
84
+ Usage:
85
+ client = BatchOpenAI(
86
+ api_key="...",
87
+ base_url="https://api.doubleword.ai/v1",
88
+ batch_size=100,
89
+ batch_window_seconds=1.0,
90
+ )
91
+
92
+ # Use exactly like AsyncOpenAI
93
+ response = await client.chat.completions.create(
94
+ model="gpt-4o",
95
+ messages=[{"role": "user", "content": "Hello!"}],
96
+ )
97
+ """
98
+
99
+ def __init__(
100
+ self,
101
+ *,
102
+ api_key: str | None = None,
103
+ base_url: str | None = None,
104
+ batch_size: int = 100,
105
+ batch_window_seconds: float = 1.0,
106
+ poll_interval_seconds: float = 5.0,
107
+ completion_window: Literal["24h", "1h"] = "24h",
108
+ **openai_kwargs: Any,
109
+ ):
110
+ """
111
+ Initialize BatchOpenAI.
112
+
113
+ Args:
114
+ api_key: API key for the OpenAI-compatible endpoint
115
+ base_url: Base URL for the API (e.g., "https://api.doubleword.ai/v1")
116
+ batch_size: Submit batch when this many requests are queued
117
+ batch_window_seconds: Submit batch after this many seconds, even if size not reached
118
+ poll_interval_seconds: How often to poll for batch completion
119
+ completion_window: Batch completion window ("24h" or "1h")
120
+ **openai_kwargs: Additional arguments passed to AsyncOpenAI
121
+ """
122
+ self._openai = AsyncOpenAI(
123
+ api_key=api_key,
124
+ base_url=base_url,
125
+ **openai_kwargs,
126
+ )
127
+ self._base_url = (base_url or "https://api.openai.com/v1").rstrip("/")
128
+ self._api_key = api_key
129
+ self._batch_size = batch_size
130
+ self._batch_window_seconds = batch_window_seconds
131
+ self._poll_interval_seconds = poll_interval_seconds
132
+ self._completion_window = completion_window
133
+
134
+ # HTTP client for raw requests (needed to access response headers for partial results)
135
+ self._http_client = httpx.AsyncClient(
136
+ headers={"Authorization": f"Bearer {api_key}"} if api_key else {},
137
+ timeout=httpx.Timeout(60.0),
138
+ )
139
+
140
+ # Request collection
141
+ self._pending: list[_PendingRequest] = []
142
+ self._pending_lock = asyncio.Lock()
143
+ self._window_task: asyncio.Task[None] | None = None
144
+
145
+ # Active batches being polled
146
+ self._active_batches: list[_ActiveBatch] = []
147
+ self._poller_task: asyncio.Task[None] | None = None
148
+
149
+ # Public interface matching AsyncOpenAI
150
+ self.chat = _Chat(self)
151
+
152
+ logger.debug("Initialized with batch_size={}, window={}s", batch_size, batch_window_seconds)
153
+
154
+ async def _enqueue_request(
155
+ self,
156
+ model: str,
157
+ messages: list[dict[str, Any]],
158
+ **kwargs: Any,
159
+ ) -> ChatCompletion:
160
+ """Add a request to the pending queue and return when result is ready."""
161
+ loop = asyncio.get_running_loop()
162
+ future: asyncio.Future[ChatCompletion] = loop.create_future()
163
+
164
+ request = _PendingRequest(
165
+ custom_id=str(uuid.uuid4()),
166
+ params={
167
+ "model": model,
168
+ "messages": messages,
169
+ **kwargs,
170
+ },
171
+ future=future,
172
+ )
173
+
174
+ async with self._pending_lock:
175
+ self._pending.append(request)
176
+ pending_count = len(self._pending)
177
+
178
+ # Start window timer if this is the first request
179
+ if pending_count == 1:
180
+ logger.debug("Starting {}s batch window timer", self._batch_window_seconds)
181
+ self._window_task = asyncio.create_task(
182
+ self._window_timer(),
183
+ name="batch_window_timer"
184
+ )
185
+
186
+ # Check if we've hit the size threshold
187
+ if pending_count >= self._batch_size:
188
+ logger.debug("Batch size {} reached", self._batch_size)
189
+ await self._submit_batch()
190
+
191
+ return await future
192
+
193
+ async def _window_timer(self) -> None:
194
+ """Timer that triggers batch submission after the window elapses."""
195
+ try:
196
+ await asyncio.sleep(self._batch_window_seconds)
197
+ async with self._pending_lock:
198
+ if self._pending:
199
+ await self._submit_batch()
200
+ except asyncio.CancelledError:
201
+ logger.debug("Window timer cancelled")
202
+ raise
203
+ except Exception as e:
204
+ logger.error("Window timer error: {}", e)
205
+ # Fail all pending futures
206
+ for req in self._pending:
207
+ if not req.future.done():
208
+ req.future.set_exception(e)
209
+ raise
210
+
211
+ async def _submit_batch(self) -> None:
212
+ """Submit all pending requests as a batch."""
213
+ if not self._pending:
214
+ return
215
+
216
+ # Cancel the window timer if running (but not if we ARE the window timer)
217
+ current_task = asyncio.current_task()
218
+ if self._window_task and not self._window_task.done() and self._window_task is not current_task:
219
+ self._window_task.cancel()
220
+ self._window_task = None
221
+
222
+ # Take all pending requests
223
+ requests = self._pending
224
+ self._pending = []
225
+
226
+ # Create JSONL content
227
+ lines = []
228
+ for req in requests:
229
+ line = {
230
+ "custom_id": req.custom_id,
231
+ "method": "POST",
232
+ "url": "/v1/chat/completions",
233
+ "body": req.params,
234
+ }
235
+ lines.append(json.dumps(line))
236
+ content = "\n".join(lines)
237
+
238
+ try:
239
+ # Upload the batch file using BytesIO
240
+ file_obj = io.BytesIO(content.encode("utf-8"))
241
+ filename = f"batch-{uuid.uuid4()}.jsonl"
242
+
243
+ file_response = await self._openai.files.create(
244
+ file=(filename, file_obj, "application/jsonl"),
245
+ purpose="batch",
246
+ )
247
+ logger.debug("Uploaded batch file: {}", file_response.id)
248
+
249
+ # Create the batch
250
+ batch_response = await self._openai.batches.create(
251
+ input_file_id=file_response.id,
252
+ endpoint="/v1/chat/completions",
253
+ completion_window=self._completion_window,
254
+ )
255
+ logger.info("Submitted batch {} with {} requests", batch_response.id, len(requests))
256
+
257
+ # Track the active batch
258
+ active_batch = _ActiveBatch(
259
+ batch_id=batch_response.id,
260
+ output_file_id=batch_response.output_file_id or "",
261
+ error_file_id=batch_response.error_file_id or "",
262
+ requests={req.custom_id: req for req in requests},
263
+ created_at=time.time(),
264
+ )
265
+ self._active_batches.append(active_batch)
266
+
267
+ # Start the poller if not running
268
+ if self._poller_task is None or self._poller_task.done():
269
+ self._poller_task = asyncio.create_task(
270
+ self._poll_batches(),
271
+ name="batch_poller"
272
+ )
273
+
274
+ except Exception as e:
275
+ logger.error("Batch submission failed: {}", e)
276
+ # If batch submission fails, fail all waiting requests
277
+ for req in requests:
278
+ if not req.future.done():
279
+ req.future.set_exception(e)
280
+
281
+ async def _poll_batches(self) -> None:
282
+ """Poll active batches for completion and distribute results."""
283
+ logger.debug("Poller started with {} active batches", len(self._active_batches))
284
+
285
+ while self._active_batches:
286
+ await asyncio.sleep(self._poll_interval_seconds)
287
+
288
+ completed_indices = []
289
+
290
+ for i, batch in enumerate(self._active_batches):
291
+ try:
292
+ status = await self._openai.batches.retrieve(batch.batch_id)
293
+ counts = status.request_counts
294
+ logger.debug(
295
+ "Batch {} status: {} (completed={}/{})",
296
+ batch.batch_id[:12], status.status,
297
+ counts.completed if counts else 0,
298
+ counts.total if counts else 0
299
+ )
300
+
301
+ # Update output_file_id if it becomes available
302
+ if status.output_file_id and not batch.output_file_id:
303
+ batch.output_file_id = status.output_file_id
304
+
305
+ if status.status == "completed":
306
+ await self._process_completed_batch(batch, status.output_file_id)
307
+ completed_indices.append(i)
308
+ logger.info("Batch {} completed", batch.batch_id)
309
+ elif status.status in ("failed", "expired", "cancelled"):
310
+ logger.error("Batch {} {}", batch.batch_id, status.status)
311
+ error = Exception(f"Batch {batch.batch_id} {status.status}")
312
+ for req in batch.requests.values():
313
+ if not req.future.done():
314
+ req.future.set_exception(error)
315
+ completed_indices.append(i)
316
+ elif status.status in ("in_progress", "validating", "finalizing"):
317
+ # Fetch partial results if output file is available
318
+ if batch.output_file_id:
319
+ await self._fetch_partial_results(batch, batch.output_file_id)
320
+
321
+ except Exception as e:
322
+ logger.error("Error polling batch {}: {}", batch.batch_id, e)
323
+
324
+ # Remove completed batches (in reverse order to preserve indices)
325
+ for i in reversed(completed_indices):
326
+ self._active_batches.pop(i)
327
+
328
+ logger.debug("Poller finished")
329
+
330
+ async def _fetch_partial_results(self, batch: _ActiveBatch, output_file_id: str) -> bool:
331
+ """
332
+ Fetch partial results from an in-progress batch and resolve available futures.
333
+
334
+ Uses the Doubleword API's partial result streaming:
335
+ - X-Incomplete header indicates if more results are coming
336
+ - X-Last-Line header tracks progress for resumption
337
+ - ?offset= query param fetches only new results
338
+
339
+ Returns True if there are more results to fetch, False if complete.
340
+ """
341
+ url = f"{self._base_url}/files/{output_file_id}/content"
342
+ if batch.last_offset > 0:
343
+ url = f"{url}?offset={batch.last_offset}"
344
+
345
+ try:
346
+ response = await self._http_client.get(url)
347
+ response.raise_for_status()
348
+
349
+ is_incomplete = response.headers.get("X-Incomplete", "").lower() == "true"
350
+ last_line = response.headers.get("X-Last-Line")
351
+
352
+ text = response.text
353
+ if not text.strip():
354
+ return is_incomplete
355
+
356
+ # Parse each line and resolve the corresponding future
357
+ resolved = 0
358
+ for line in text.strip().split("\n"):
359
+ if not line:
360
+ continue
361
+
362
+ result = json.loads(line)
363
+ custom_id = result.get("custom_id")
364
+
365
+ # Handle both success and error responses
366
+ response_data = result.get("response", {})
367
+ error_data = result.get("error")
368
+
369
+ if custom_id in batch.requests:
370
+ req = batch.requests[custom_id]
371
+ if not req.future.done():
372
+ if error_data:
373
+ req.future.set_exception(
374
+ Exception(f"Request {custom_id} failed: {error_data}")
375
+ )
376
+ else:
377
+ response_body = response_data.get("body", {})
378
+ completion = ChatCompletion.model_validate(response_body)
379
+ req.future.set_result(completion)
380
+ resolved += 1
381
+
382
+ # Update offset for next fetch
383
+ if last_line:
384
+ batch.last_offset = int(last_line)
385
+
386
+ if resolved > 0:
387
+ pending = sum(1 for req in batch.requests.values() if not req.future.done())
388
+ logger.debug("Resolved {} partial results, {} pending", resolved, pending)
389
+
390
+ return is_incomplete
391
+
392
+ except httpx.HTTPStatusError as e:
393
+ if e.response.status_code == 404:
394
+ # File not ready yet, this is normal for early polling
395
+ return True
396
+ logger.debug("HTTP error fetching partial results: {}", e)
397
+ return True
398
+ except Exception as e:
399
+ logger.debug("Error fetching partial results: {}", e)
400
+ return True
401
+
402
+ async def _process_completed_batch(
403
+ self, batch: _ActiveBatch, output_file_id: str | None
404
+ ) -> None:
405
+ """Fetch any remaining results and ensure all futures are resolved."""
406
+ if not output_file_id:
407
+ logger.error("Batch {} completed but no output file", batch.batch_id)
408
+ error = Exception(f"Batch {batch.batch_id} completed but no output file")
409
+ for req in batch.requests.values():
410
+ if not req.future.done():
411
+ req.future.set_exception(error)
412
+ return
413
+
414
+ try:
415
+ # Fetch any remaining results using the partial results mechanism
416
+ # This continues from where we left off (using batch.last_offset)
417
+ await self._fetch_partial_results(batch, output_file_id)
418
+
419
+ # Handle any requests that didn't get results
420
+ for req in batch.requests.values():
421
+ if not req.future.done():
422
+ logger.warning("No result for request {}", req.custom_id)
423
+ req.future.set_exception(
424
+ Exception(f"No result for request {req.custom_id}")
425
+ )
426
+
427
+ except Exception as e:
428
+ logger.error("Error processing batch results: {}", e)
429
+ for req in batch.requests.values():
430
+ if not req.future.done():
431
+ req.future.set_exception(e)
432
+
433
+ async def close(self) -> None:
434
+ """Close the client and cancel any pending operations."""
435
+ if self._window_task and not self._window_task.done():
436
+ self._window_task.cancel()
437
+ if self._poller_task and not self._poller_task.done():
438
+ self._poller_task.cancel()
439
+ await self._http_client.aclose()
440
+ await self._openai.close()
441
+
442
+ async def __aenter__(self) -> BatchOpenAI:
443
+ return self
444
+
445
+ async def __aexit__(self, *args: Any) -> None:
446
+ await self.close()
File without changes