autobatcher 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- autobatcher-0.1.1/.github/workflows/publish.yml +31 -0
- autobatcher-0.1.1/.github/workflows/release-please.yml +24 -0
- autobatcher-0.1.1/.gitignore +37 -0
- autobatcher-0.1.1/.release-please-manifest.json +3 -0
- autobatcher-0.1.1/CHANGELOG.md +13 -0
- autobatcher-0.1.1/LICENSE +21 -0
- autobatcher-0.1.1/PKG-INFO +148 -0
- autobatcher-0.1.1/README.md +121 -0
- autobatcher-0.1.1/pyproject.toml +40 -0
- autobatcher-0.1.1/release-please-config.json +13 -0
- autobatcher-0.1.1/src/autobatcher/__init__.py +17 -0
- autobatcher-0.1.1/src/autobatcher/client.py +446 -0
- autobatcher-0.1.1/src/autobatcher/py.typed +0 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
id-token: write # Required for trusted publishing
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
publish:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
environment:
|
|
14
|
+
name: pypi
|
|
15
|
+
url: https://pypi.org/p/autobatcher
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
uses: actions/setup-python@v5
|
|
21
|
+
with:
|
|
22
|
+
python-version: "3.12"
|
|
23
|
+
|
|
24
|
+
- name: Install build dependencies
|
|
25
|
+
run: pip install build
|
|
26
|
+
|
|
27
|
+
- name: Build package
|
|
28
|
+
run: python -m build
|
|
29
|
+
|
|
30
|
+
- name: Publish to PyPI
|
|
31
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
name: Release Please
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
pull-requests: write
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
release-please:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
outputs:
|
|
16
|
+
release_created: ${{ steps.release.outputs.release_created }}
|
|
17
|
+
tag_name: ${{ steps.release.outputs.tag_name }}
|
|
18
|
+
steps:
|
|
19
|
+
- uses: googleapis/release-please-action@v4
|
|
20
|
+
id: release
|
|
21
|
+
with:
|
|
22
|
+
token: ${{ secrets.RELEASE_PLEASE_TOKEN }}
|
|
23
|
+
manifest-file: .release-please-manifest.json
|
|
24
|
+
config-file: release-please-config.json
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# Distribution / packaging
|
|
7
|
+
build/
|
|
8
|
+
dist/
|
|
9
|
+
*.egg-info/
|
|
10
|
+
*.egg
|
|
11
|
+
|
|
12
|
+
# Virtual environments
|
|
13
|
+
.venv/
|
|
14
|
+
venv/
|
|
15
|
+
ENV/
|
|
16
|
+
|
|
17
|
+
# IDE
|
|
18
|
+
.idea/
|
|
19
|
+
.vscode/
|
|
20
|
+
*.swp
|
|
21
|
+
*.swo
|
|
22
|
+
|
|
23
|
+
# Testing
|
|
24
|
+
.pytest_cache/
|
|
25
|
+
.coverage
|
|
26
|
+
htmlcov/
|
|
27
|
+
|
|
28
|
+
# mypy
|
|
29
|
+
.mypy_cache/
|
|
30
|
+
|
|
31
|
+
# Environment
|
|
32
|
+
.env
|
|
33
|
+
.env.local
|
|
34
|
+
|
|
35
|
+
# OS
|
|
36
|
+
.DS_Store
|
|
37
|
+
Thumbs.db
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.1.1](https://github.com/doublewordai/autobatcher/compare/autobatcher-v0.1.0...autobatcher-v0.1.1) (2026-01-05)
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
### Features
|
|
7
|
+
|
|
8
|
+
* add release-please and PyPI trusted publishing workflows ([aea341d](https://github.com/doublewordai/autobatcher/commit/aea341d57d6fbee30f186547691bf6d1ff08f69d))
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
### Documentation
|
|
12
|
+
|
|
13
|
+
* update readme ([64d8d24](https://github.com/doublewordai/autobatcher/commit/64d8d24640e18aa456d50a9a7b25d091bb7332a5))
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Doubleword AI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: autobatcher
|
|
3
|
+
Version: 0.1.1
|
|
4
|
+
Summary: Drop-in AsyncOpenAI replacement that transparently batches requests using the batch API
|
|
5
|
+
Project-URL: Homepage, https://github.com/doublewordai/autobatcher
|
|
6
|
+
Project-URL: Repository, https://github.com/doublewordai/autobatcher
|
|
7
|
+
Project-URL: Issues, https://github.com/doublewordai/autobatcher/issues
|
|
8
|
+
Author-email: Doubleword AI <hello@doubleword.ai>
|
|
9
|
+
License-Expression: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: api,async,batch,inference,llm,openai
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Classifier: Typing :: Typed
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Requires-Dist: httpx>=0.25.0
|
|
24
|
+
Requires-Dist: loguru>=0.7.0
|
|
25
|
+
Requires-Dist: openai>=1.0.0
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
|
|
28
|
+
# autobatcher
|
|
29
|
+
|
|
30
|
+
Drop-in replacement for `AsyncOpenAI` that transparently batches requests using
|
|
31
|
+
OpenAI's (or compatible) [Batch
|
|
32
|
+
API](https://platform.openai.com/docs/guides/batch).
|
|
33
|
+
|
|
34
|
+
## Why?
|
|
35
|
+
|
|
36
|
+
Batch LLM APIs (like OpenAI's) offers 50% cost savings, but requires you to
|
|
37
|
+
restructure your code around file uploads and polling. **autobatcher** lets you
|
|
38
|
+
keep your existing async code while getting batch pricing automatically.
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
# Before: regular async calls (full price)
|
|
42
|
+
from openai import AsyncOpenAI
|
|
43
|
+
client = AsyncOpenAI()
|
|
44
|
+
|
|
45
|
+
# After: batched calls (50% off)
|
|
46
|
+
from autobatcher import BatchOpenAI
|
|
47
|
+
client = BatchOpenAI()
|
|
48
|
+
|
|
49
|
+
# Same interface, same code
|
|
50
|
+
response = await client.chat.completions.create(
|
|
51
|
+
model="gpt-4o",
|
|
52
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
53
|
+
)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## How it works
|
|
57
|
+
|
|
58
|
+
1. Requests are collected over a configurable time window (default: 1 second)
|
|
59
|
+
2. When the window closes or batch size is reached, requests are submitted as a batch
|
|
60
|
+
3. Results are polled and returned to waiting callers as they complete
|
|
61
|
+
4. Your code sees normal `ChatCompletion` responses
|
|
62
|
+
|
|
63
|
+
## Installation
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install autobatcher
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Usage
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
import asyncio
|
|
73
|
+
from autobatcher import BatchOpenAI
|
|
74
|
+
|
|
75
|
+
async def main():
|
|
76
|
+
client = BatchOpenAI(
|
|
77
|
+
api_key="sk-...", # or set OPENAI_API_KEY env var
|
|
78
|
+
batch_size=100, # submit batch when this many requests queued
|
|
79
|
+
batch_window_seconds=1.0, # or after this many seconds
|
|
80
|
+
poll_interval_seconds=5.0, # how often to check for results
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Use exactly like AsyncOpenAI
|
|
84
|
+
response = await client.chat.completions.create(
|
|
85
|
+
model="gpt-4o",
|
|
86
|
+
messages=[{"role": "user", "content": "What is 2+2?"}],
|
|
87
|
+
)
|
|
88
|
+
print(response.choices[0].message.content)
|
|
89
|
+
|
|
90
|
+
await client.close()
|
|
91
|
+
|
|
92
|
+
asyncio.run(main())
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### Parallel requests
|
|
96
|
+
|
|
97
|
+
The real power comes when you have many requests:
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
async def process_many(prompts: list[str]) -> list[str]:
|
|
101
|
+
client = BatchOpenAI(batch_size=50, batch_window_seconds=2.0)
|
|
102
|
+
|
|
103
|
+
async def get_response(prompt: str) -> str:
|
|
104
|
+
response = await client.chat.completions.create(
|
|
105
|
+
model="gpt-4o-mini",
|
|
106
|
+
messages=[{"role": "user", "content": prompt}],
|
|
107
|
+
)
|
|
108
|
+
return response.choices[0].message.content
|
|
109
|
+
|
|
110
|
+
# All requests are batched together automatically
|
|
111
|
+
results = await asyncio.gather(*[get_response(p) for p in prompts])
|
|
112
|
+
|
|
113
|
+
await client.close()
|
|
114
|
+
return results
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Context manager
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
async with BatchOpenAI() as client:
|
|
121
|
+
response = await client.chat.completions.create(...)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Configuration
|
|
125
|
+
|
|
126
|
+
| Parameter | Default | Description |
|
|
127
|
+
|-----------|---------|-------------|
|
|
128
|
+
| `api_key` | `None` | OpenAI API key (falls back to `OPENAI_API_KEY` env var) |
|
|
129
|
+
| `base_url` | `None` | API base URL (for proxies or compatible APIs) |
|
|
130
|
+
| `batch_size` | `100` | Submit batch when this many requests are queued |
|
|
131
|
+
| `batch_window_seconds` | `1.0` | Submit batch after this many seconds |
|
|
132
|
+
| `poll_interval_seconds` | `5.0` | How often to poll for batch completion |
|
|
133
|
+
| `completion_window` | `"24h"` | Batch completion window (`"24h"` or `"1h"`) |
|
|
134
|
+
|
|
135
|
+
## Limitations
|
|
136
|
+
|
|
137
|
+
- Only `chat.completions.create` is supported for now
|
|
138
|
+
- Batch API has a 24-hour completion window by default
|
|
139
|
+
- No escalations when the completion window elapses
|
|
140
|
+
- Not suitable for real-time/interactive use cases
|
|
141
|
+
- This library is designed or use with the [Doubleword batched
|
|
142
|
+
API](https://docs.doubleword.ai/batches/getting-started-with-batched-api).
|
|
143
|
+
Support for OpenAI's batch API or other compatible APIs is best effort. If you
|
|
144
|
+
experience any issues, please open an issue.
|
|
145
|
+
|
|
146
|
+
## License
|
|
147
|
+
|
|
148
|
+
MIT
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
# autobatcher
|
|
2
|
+
|
|
3
|
+
Drop-in replacement for `AsyncOpenAI` that transparently batches requests using
|
|
4
|
+
OpenAI's (or compatible) [Batch
|
|
5
|
+
API](https://platform.openai.com/docs/guides/batch).
|
|
6
|
+
|
|
7
|
+
## Why?
|
|
8
|
+
|
|
9
|
+
Batch LLM APIs (like OpenAI's) offers 50% cost savings, but requires you to
|
|
10
|
+
restructure your code around file uploads and polling. **autobatcher** lets you
|
|
11
|
+
keep your existing async code while getting batch pricing automatically.
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
# Before: regular async calls (full price)
|
|
15
|
+
from openai import AsyncOpenAI
|
|
16
|
+
client = AsyncOpenAI()
|
|
17
|
+
|
|
18
|
+
# After: batched calls (50% off)
|
|
19
|
+
from autobatcher import BatchOpenAI
|
|
20
|
+
client = BatchOpenAI()
|
|
21
|
+
|
|
22
|
+
# Same interface, same code
|
|
23
|
+
response = await client.chat.completions.create(
|
|
24
|
+
model="gpt-4o",
|
|
25
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
26
|
+
)
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## How it works
|
|
30
|
+
|
|
31
|
+
1. Requests are collected over a configurable time window (default: 1 second)
|
|
32
|
+
2. When the window closes or batch size is reached, requests are submitted as a batch
|
|
33
|
+
3. Results are polled and returned to waiting callers as they complete
|
|
34
|
+
4. Your code sees normal `ChatCompletion` responses
|
|
35
|
+
|
|
36
|
+
## Installation
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install autobatcher
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Usage
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
import asyncio
|
|
46
|
+
from autobatcher import BatchOpenAI
|
|
47
|
+
|
|
48
|
+
async def main():
|
|
49
|
+
client = BatchOpenAI(
|
|
50
|
+
api_key="sk-...", # or set OPENAI_API_KEY env var
|
|
51
|
+
batch_size=100, # submit batch when this many requests queued
|
|
52
|
+
batch_window_seconds=1.0, # or after this many seconds
|
|
53
|
+
poll_interval_seconds=5.0, # how often to check for results
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Use exactly like AsyncOpenAI
|
|
57
|
+
response = await client.chat.completions.create(
|
|
58
|
+
model="gpt-4o",
|
|
59
|
+
messages=[{"role": "user", "content": "What is 2+2?"}],
|
|
60
|
+
)
|
|
61
|
+
print(response.choices[0].message.content)
|
|
62
|
+
|
|
63
|
+
await client.close()
|
|
64
|
+
|
|
65
|
+
asyncio.run(main())
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Parallel requests
|
|
69
|
+
|
|
70
|
+
The real power comes when you have many requests:
|
|
71
|
+
|
|
72
|
+
```python
|
|
73
|
+
async def process_many(prompts: list[str]) -> list[str]:
|
|
74
|
+
client = BatchOpenAI(batch_size=50, batch_window_seconds=2.0)
|
|
75
|
+
|
|
76
|
+
async def get_response(prompt: str) -> str:
|
|
77
|
+
response = await client.chat.completions.create(
|
|
78
|
+
model="gpt-4o-mini",
|
|
79
|
+
messages=[{"role": "user", "content": prompt}],
|
|
80
|
+
)
|
|
81
|
+
return response.choices[0].message.content
|
|
82
|
+
|
|
83
|
+
# All requests are batched together automatically
|
|
84
|
+
results = await asyncio.gather(*[get_response(p) for p in prompts])
|
|
85
|
+
|
|
86
|
+
await client.close()
|
|
87
|
+
return results
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Context manager
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
async with BatchOpenAI() as client:
|
|
94
|
+
response = await client.chat.completions.create(...)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Configuration
|
|
98
|
+
|
|
99
|
+
| Parameter | Default | Description |
|
|
100
|
+
|-----------|---------|-------------|
|
|
101
|
+
| `api_key` | `None` | OpenAI API key (falls back to `OPENAI_API_KEY` env var) |
|
|
102
|
+
| `base_url` | `None` | API base URL (for proxies or compatible APIs) |
|
|
103
|
+
| `batch_size` | `100` | Submit batch when this many requests are queued |
|
|
104
|
+
| `batch_window_seconds` | `1.0` | Submit batch after this many seconds |
|
|
105
|
+
| `poll_interval_seconds` | `5.0` | How often to poll for batch completion |
|
|
106
|
+
| `completion_window` | `"24h"` | Batch completion window (`"24h"` or `"1h"`) |
|
|
107
|
+
|
|
108
|
+
## Limitations
|
|
109
|
+
|
|
110
|
+
- Only `chat.completions.create` is supported for now
|
|
111
|
+
- Batch API has a 24-hour completion window by default
|
|
112
|
+
- No escalations when the completion window elapses
|
|
113
|
+
- Not suitable for real-time/interactive use cases
|
|
114
|
+
- This library is designed or use with the [Doubleword batched
|
|
115
|
+
API](https://docs.doubleword.ai/batches/getting-started-with-batched-api).
|
|
116
|
+
Support for OpenAI's batch API or other compatible APIs is best effort. If you
|
|
117
|
+
experience any issues, please open an issue.
|
|
118
|
+
|
|
119
|
+
## License
|
|
120
|
+
|
|
121
|
+
MIT
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "autobatcher"
|
|
3
|
+
version = "0.1.1"
|
|
4
|
+
description = "Drop-in AsyncOpenAI replacement that transparently batches requests using the batch API"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = "MIT"
|
|
7
|
+
requires-python = ">=3.10"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Doubleword AI", email = "hello@doubleword.ai" }
|
|
10
|
+
]
|
|
11
|
+
keywords = ["openai", "batch", "llm", "async", "api", "inference"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"License :: OSI Approved :: MIT License",
|
|
16
|
+
"Programming Language :: Python :: 3",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Programming Language :: Python :: 3.13",
|
|
21
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
22
|
+
"Typing :: Typed",
|
|
23
|
+
]
|
|
24
|
+
dependencies = [
|
|
25
|
+
"openai>=1.0.0",
|
|
26
|
+
"httpx>=0.25.0",
|
|
27
|
+
"loguru>=0.7.0",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.urls]
|
|
31
|
+
Homepage = "https://github.com/doublewordai/autobatcher"
|
|
32
|
+
Repository = "https://github.com/doublewordai/autobatcher"
|
|
33
|
+
Issues = "https://github.com/doublewordai/autobatcher/issues"
|
|
34
|
+
|
|
35
|
+
[build-system]
|
|
36
|
+
requires = ["hatchling"]
|
|
37
|
+
build-backend = "hatchling.build"
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["src/autobatcher"]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Autobatcher: Drop-in AsyncOpenAI replacement that transparently batches requests.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
from autobatcher import BatchOpenAI
|
|
6
|
+
|
|
7
|
+
client = BatchOpenAI(api_key="...")
|
|
8
|
+
response = await client.chat.completions.create(
|
|
9
|
+
model="gpt-4o",
|
|
10
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
11
|
+
)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .client import BatchOpenAI
|
|
15
|
+
|
|
16
|
+
__version__ = "0.1.1"
|
|
17
|
+
__all__ = ["BatchOpenAI"]
|
|
@@ -0,0 +1,446 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BatchOpenAI: A drop-in replacement for AsyncOpenAI that uses the batch API.
|
|
3
|
+
|
|
4
|
+
Collects requests over a time window or until a size threshold, submits them
|
|
5
|
+
as a batch, polls for results, and returns them to waiting callers.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import json
|
|
12
|
+
import io
|
|
13
|
+
import uuid
|
|
14
|
+
import time
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import Any, Literal
|
|
17
|
+
|
|
18
|
+
import httpx
|
|
19
|
+
from loguru import logger
|
|
20
|
+
from openai import AsyncOpenAI
|
|
21
|
+
from openai.types.chat import ChatCompletion
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class _PendingRequest:
|
|
26
|
+
"""A request waiting to be batched."""
|
|
27
|
+
|
|
28
|
+
custom_id: str
|
|
29
|
+
params: dict[str, Any]
|
|
30
|
+
future: asyncio.Future[ChatCompletion]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class _ActiveBatch:
|
|
35
|
+
"""A batch that has been submitted and is being polled."""
|
|
36
|
+
|
|
37
|
+
batch_id: str
|
|
38
|
+
output_file_id: str
|
|
39
|
+
error_file_id: str
|
|
40
|
+
requests: dict[str, _PendingRequest] # custom_id -> request
|
|
41
|
+
created_at: float
|
|
42
|
+
last_offset: int = 0 # Track offset for partial result streaming
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class _ChatCompletions:
|
|
46
|
+
"""Proxy for chat.completions that batches requests."""
|
|
47
|
+
|
|
48
|
+
def __init__(self, client: BatchOpenAI):
|
|
49
|
+
self._client = client
|
|
50
|
+
|
|
51
|
+
async def create(
|
|
52
|
+
self,
|
|
53
|
+
*,
|
|
54
|
+
model: str,
|
|
55
|
+
messages: list[dict[str, Any]],
|
|
56
|
+
**kwargs: Any,
|
|
57
|
+
) -> ChatCompletion:
|
|
58
|
+
"""
|
|
59
|
+
Create a chat completion. The request is queued and batched.
|
|
60
|
+
|
|
61
|
+
Returns when the batch completes and results are available.
|
|
62
|
+
"""
|
|
63
|
+
return await self._client._enqueue_request(
|
|
64
|
+
model=model,
|
|
65
|
+
messages=messages,
|
|
66
|
+
**kwargs,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class _Chat:
|
|
71
|
+
"""Proxy for chat namespace."""
|
|
72
|
+
|
|
73
|
+
def __init__(self, client: BatchOpenAI):
|
|
74
|
+
self.completions = _ChatCompletions(client)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class BatchOpenAI:
|
|
78
|
+
"""
|
|
79
|
+
Drop-in replacement for AsyncOpenAI that uses the batch API.
|
|
80
|
+
|
|
81
|
+
Requests are collected and submitted as batches based on size and time
|
|
82
|
+
thresholds. Results are polled and returned to waiting callers.
|
|
83
|
+
|
|
84
|
+
Usage:
|
|
85
|
+
client = BatchOpenAI(
|
|
86
|
+
api_key="...",
|
|
87
|
+
base_url="https://api.doubleword.ai/v1",
|
|
88
|
+
batch_size=100,
|
|
89
|
+
batch_window_seconds=1.0,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Use exactly like AsyncOpenAI
|
|
93
|
+
response = await client.chat.completions.create(
|
|
94
|
+
model="gpt-4o",
|
|
95
|
+
messages=[{"role": "user", "content": "Hello!"}],
|
|
96
|
+
)
|
|
97
|
+
"""
|
|
98
|
+
|
|
99
|
+
def __init__(
|
|
100
|
+
self,
|
|
101
|
+
*,
|
|
102
|
+
api_key: str | None = None,
|
|
103
|
+
base_url: str | None = None,
|
|
104
|
+
batch_size: int = 100,
|
|
105
|
+
batch_window_seconds: float = 1.0,
|
|
106
|
+
poll_interval_seconds: float = 5.0,
|
|
107
|
+
completion_window: Literal["24h", "1h"] = "24h",
|
|
108
|
+
**openai_kwargs: Any,
|
|
109
|
+
):
|
|
110
|
+
"""
|
|
111
|
+
Initialize BatchOpenAI.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
api_key: API key for the OpenAI-compatible endpoint
|
|
115
|
+
base_url: Base URL for the API (e.g., "https://api.doubleword.ai/v1")
|
|
116
|
+
batch_size: Submit batch when this many requests are queued
|
|
117
|
+
batch_window_seconds: Submit batch after this many seconds, even if size not reached
|
|
118
|
+
poll_interval_seconds: How often to poll for batch completion
|
|
119
|
+
completion_window: Batch completion window ("24h" or "1h")
|
|
120
|
+
**openai_kwargs: Additional arguments passed to AsyncOpenAI
|
|
121
|
+
"""
|
|
122
|
+
self._openai = AsyncOpenAI(
|
|
123
|
+
api_key=api_key,
|
|
124
|
+
base_url=base_url,
|
|
125
|
+
**openai_kwargs,
|
|
126
|
+
)
|
|
127
|
+
self._base_url = (base_url or "https://api.openai.com/v1").rstrip("/")
|
|
128
|
+
self._api_key = api_key
|
|
129
|
+
self._batch_size = batch_size
|
|
130
|
+
self._batch_window_seconds = batch_window_seconds
|
|
131
|
+
self._poll_interval_seconds = poll_interval_seconds
|
|
132
|
+
self._completion_window = completion_window
|
|
133
|
+
|
|
134
|
+
# HTTP client for raw requests (needed to access response headers for partial results)
|
|
135
|
+
self._http_client = httpx.AsyncClient(
|
|
136
|
+
headers={"Authorization": f"Bearer {api_key}"} if api_key else {},
|
|
137
|
+
timeout=httpx.Timeout(60.0),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
# Request collection
|
|
141
|
+
self._pending: list[_PendingRequest] = []
|
|
142
|
+
self._pending_lock = asyncio.Lock()
|
|
143
|
+
self._window_task: asyncio.Task[None] | None = None
|
|
144
|
+
|
|
145
|
+
# Active batches being polled
|
|
146
|
+
self._active_batches: list[_ActiveBatch] = []
|
|
147
|
+
self._poller_task: asyncio.Task[None] | None = None
|
|
148
|
+
|
|
149
|
+
# Public interface matching AsyncOpenAI
|
|
150
|
+
self.chat = _Chat(self)
|
|
151
|
+
|
|
152
|
+
logger.debug("Initialized with batch_size={}, window={}s", batch_size, batch_window_seconds)
|
|
153
|
+
|
|
154
|
+
async def _enqueue_request(
|
|
155
|
+
self,
|
|
156
|
+
model: str,
|
|
157
|
+
messages: list[dict[str, Any]],
|
|
158
|
+
**kwargs: Any,
|
|
159
|
+
) -> ChatCompletion:
|
|
160
|
+
"""Add a request to the pending queue and return when result is ready."""
|
|
161
|
+
loop = asyncio.get_running_loop()
|
|
162
|
+
future: asyncio.Future[ChatCompletion] = loop.create_future()
|
|
163
|
+
|
|
164
|
+
request = _PendingRequest(
|
|
165
|
+
custom_id=str(uuid.uuid4()),
|
|
166
|
+
params={
|
|
167
|
+
"model": model,
|
|
168
|
+
"messages": messages,
|
|
169
|
+
**kwargs,
|
|
170
|
+
},
|
|
171
|
+
future=future,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
async with self._pending_lock:
|
|
175
|
+
self._pending.append(request)
|
|
176
|
+
pending_count = len(self._pending)
|
|
177
|
+
|
|
178
|
+
# Start window timer if this is the first request
|
|
179
|
+
if pending_count == 1:
|
|
180
|
+
logger.debug("Starting {}s batch window timer", self._batch_window_seconds)
|
|
181
|
+
self._window_task = asyncio.create_task(
|
|
182
|
+
self._window_timer(),
|
|
183
|
+
name="batch_window_timer"
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Check if we've hit the size threshold
|
|
187
|
+
if pending_count >= self._batch_size:
|
|
188
|
+
logger.debug("Batch size {} reached", self._batch_size)
|
|
189
|
+
await self._submit_batch()
|
|
190
|
+
|
|
191
|
+
return await future
|
|
192
|
+
|
|
193
|
+
async def _window_timer(self) -> None:
|
|
194
|
+
"""Timer that triggers batch submission after the window elapses."""
|
|
195
|
+
try:
|
|
196
|
+
await asyncio.sleep(self._batch_window_seconds)
|
|
197
|
+
async with self._pending_lock:
|
|
198
|
+
if self._pending:
|
|
199
|
+
await self._submit_batch()
|
|
200
|
+
except asyncio.CancelledError:
|
|
201
|
+
logger.debug("Window timer cancelled")
|
|
202
|
+
raise
|
|
203
|
+
except Exception as e:
|
|
204
|
+
logger.error("Window timer error: {}", e)
|
|
205
|
+
# Fail all pending futures
|
|
206
|
+
for req in self._pending:
|
|
207
|
+
if not req.future.done():
|
|
208
|
+
req.future.set_exception(e)
|
|
209
|
+
raise
|
|
210
|
+
|
|
211
|
+
async def _submit_batch(self) -> None:
|
|
212
|
+
"""Submit all pending requests as a batch."""
|
|
213
|
+
if not self._pending:
|
|
214
|
+
return
|
|
215
|
+
|
|
216
|
+
# Cancel the window timer if running (but not if we ARE the window timer)
|
|
217
|
+
current_task = asyncio.current_task()
|
|
218
|
+
if self._window_task and not self._window_task.done() and self._window_task is not current_task:
|
|
219
|
+
self._window_task.cancel()
|
|
220
|
+
self._window_task = None
|
|
221
|
+
|
|
222
|
+
# Take all pending requests
|
|
223
|
+
requests = self._pending
|
|
224
|
+
self._pending = []
|
|
225
|
+
|
|
226
|
+
# Create JSONL content
|
|
227
|
+
lines = []
|
|
228
|
+
for req in requests:
|
|
229
|
+
line = {
|
|
230
|
+
"custom_id": req.custom_id,
|
|
231
|
+
"method": "POST",
|
|
232
|
+
"url": "/v1/chat/completions",
|
|
233
|
+
"body": req.params,
|
|
234
|
+
}
|
|
235
|
+
lines.append(json.dumps(line))
|
|
236
|
+
content = "\n".join(lines)
|
|
237
|
+
|
|
238
|
+
try:
|
|
239
|
+
# Upload the batch file using BytesIO
|
|
240
|
+
file_obj = io.BytesIO(content.encode("utf-8"))
|
|
241
|
+
filename = f"batch-{uuid.uuid4()}.jsonl"
|
|
242
|
+
|
|
243
|
+
file_response = await self._openai.files.create(
|
|
244
|
+
file=(filename, file_obj, "application/jsonl"),
|
|
245
|
+
purpose="batch",
|
|
246
|
+
)
|
|
247
|
+
logger.debug("Uploaded batch file: {}", file_response.id)
|
|
248
|
+
|
|
249
|
+
# Create the batch
|
|
250
|
+
batch_response = await self._openai.batches.create(
|
|
251
|
+
input_file_id=file_response.id,
|
|
252
|
+
endpoint="/v1/chat/completions",
|
|
253
|
+
completion_window=self._completion_window,
|
|
254
|
+
)
|
|
255
|
+
logger.info("Submitted batch {} with {} requests", batch_response.id, len(requests))
|
|
256
|
+
|
|
257
|
+
# Track the active batch
|
|
258
|
+
active_batch = _ActiveBatch(
|
|
259
|
+
batch_id=batch_response.id,
|
|
260
|
+
output_file_id=batch_response.output_file_id or "",
|
|
261
|
+
error_file_id=batch_response.error_file_id or "",
|
|
262
|
+
requests={req.custom_id: req for req in requests},
|
|
263
|
+
created_at=time.time(),
|
|
264
|
+
)
|
|
265
|
+
self._active_batches.append(active_batch)
|
|
266
|
+
|
|
267
|
+
# Start the poller if not running
|
|
268
|
+
if self._poller_task is None or self._poller_task.done():
|
|
269
|
+
self._poller_task = asyncio.create_task(
|
|
270
|
+
self._poll_batches(),
|
|
271
|
+
name="batch_poller"
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
except Exception as e:
|
|
275
|
+
logger.error("Batch submission failed: {}", e)
|
|
276
|
+
# If batch submission fails, fail all waiting requests
|
|
277
|
+
for req in requests:
|
|
278
|
+
if not req.future.done():
|
|
279
|
+
req.future.set_exception(e)
|
|
280
|
+
|
|
281
|
+
async def _poll_batches(self) -> None:
|
|
282
|
+
"""Poll active batches for completion and distribute results."""
|
|
283
|
+
logger.debug("Poller started with {} active batches", len(self._active_batches))
|
|
284
|
+
|
|
285
|
+
while self._active_batches:
|
|
286
|
+
await asyncio.sleep(self._poll_interval_seconds)
|
|
287
|
+
|
|
288
|
+
completed_indices = []
|
|
289
|
+
|
|
290
|
+
for i, batch in enumerate(self._active_batches):
|
|
291
|
+
try:
|
|
292
|
+
status = await self._openai.batches.retrieve(batch.batch_id)
|
|
293
|
+
counts = status.request_counts
|
|
294
|
+
logger.debug(
|
|
295
|
+
"Batch {} status: {} (completed={}/{})",
|
|
296
|
+
batch.batch_id[:12], status.status,
|
|
297
|
+
counts.completed if counts else 0,
|
|
298
|
+
counts.total if counts else 0
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Update output_file_id if it becomes available
|
|
302
|
+
if status.output_file_id and not batch.output_file_id:
|
|
303
|
+
batch.output_file_id = status.output_file_id
|
|
304
|
+
|
|
305
|
+
if status.status == "completed":
|
|
306
|
+
await self._process_completed_batch(batch, status.output_file_id)
|
|
307
|
+
completed_indices.append(i)
|
|
308
|
+
logger.info("Batch {} completed", batch.batch_id)
|
|
309
|
+
elif status.status in ("failed", "expired", "cancelled"):
|
|
310
|
+
logger.error("Batch {} {}", batch.batch_id, status.status)
|
|
311
|
+
error = Exception(f"Batch {batch.batch_id} {status.status}")
|
|
312
|
+
for req in batch.requests.values():
|
|
313
|
+
if not req.future.done():
|
|
314
|
+
req.future.set_exception(error)
|
|
315
|
+
completed_indices.append(i)
|
|
316
|
+
elif status.status in ("in_progress", "validating", "finalizing"):
|
|
317
|
+
# Fetch partial results if output file is available
|
|
318
|
+
if batch.output_file_id:
|
|
319
|
+
await self._fetch_partial_results(batch, batch.output_file_id)
|
|
320
|
+
|
|
321
|
+
except Exception as e:
|
|
322
|
+
logger.error("Error polling batch {}: {}", batch.batch_id, e)
|
|
323
|
+
|
|
324
|
+
# Remove completed batches (in reverse order to preserve indices)
|
|
325
|
+
for i in reversed(completed_indices):
|
|
326
|
+
self._active_batches.pop(i)
|
|
327
|
+
|
|
328
|
+
logger.debug("Poller finished")
|
|
329
|
+
|
|
330
|
+
async def _fetch_partial_results(self, batch: _ActiveBatch, output_file_id: str) -> bool:
|
|
331
|
+
"""
|
|
332
|
+
Fetch partial results from an in-progress batch and resolve available futures.
|
|
333
|
+
|
|
334
|
+
Uses the Doubleword API's partial result streaming:
|
|
335
|
+
- X-Incomplete header indicates if more results are coming
|
|
336
|
+
- X-Last-Line header tracks progress for resumption
|
|
337
|
+
- ?offset= query param fetches only new results
|
|
338
|
+
|
|
339
|
+
Returns True if there are more results to fetch, False if complete.
|
|
340
|
+
"""
|
|
341
|
+
url = f"{self._base_url}/files/{output_file_id}/content"
|
|
342
|
+
if batch.last_offset > 0:
|
|
343
|
+
url = f"{url}?offset={batch.last_offset}"
|
|
344
|
+
|
|
345
|
+
try:
|
|
346
|
+
response = await self._http_client.get(url)
|
|
347
|
+
response.raise_for_status()
|
|
348
|
+
|
|
349
|
+
is_incomplete = response.headers.get("X-Incomplete", "").lower() == "true"
|
|
350
|
+
last_line = response.headers.get("X-Last-Line")
|
|
351
|
+
|
|
352
|
+
text = response.text
|
|
353
|
+
if not text.strip():
|
|
354
|
+
return is_incomplete
|
|
355
|
+
|
|
356
|
+
# Parse each line and resolve the corresponding future
|
|
357
|
+
resolved = 0
|
|
358
|
+
for line in text.strip().split("\n"):
|
|
359
|
+
if not line:
|
|
360
|
+
continue
|
|
361
|
+
|
|
362
|
+
result = json.loads(line)
|
|
363
|
+
custom_id = result.get("custom_id")
|
|
364
|
+
|
|
365
|
+
# Handle both success and error responses
|
|
366
|
+
response_data = result.get("response", {})
|
|
367
|
+
error_data = result.get("error")
|
|
368
|
+
|
|
369
|
+
if custom_id in batch.requests:
|
|
370
|
+
req = batch.requests[custom_id]
|
|
371
|
+
if not req.future.done():
|
|
372
|
+
if error_data:
|
|
373
|
+
req.future.set_exception(
|
|
374
|
+
Exception(f"Request {custom_id} failed: {error_data}")
|
|
375
|
+
)
|
|
376
|
+
else:
|
|
377
|
+
response_body = response_data.get("body", {})
|
|
378
|
+
completion = ChatCompletion.model_validate(response_body)
|
|
379
|
+
req.future.set_result(completion)
|
|
380
|
+
resolved += 1
|
|
381
|
+
|
|
382
|
+
# Update offset for next fetch
|
|
383
|
+
if last_line:
|
|
384
|
+
batch.last_offset = int(last_line)
|
|
385
|
+
|
|
386
|
+
if resolved > 0:
|
|
387
|
+
pending = sum(1 for req in batch.requests.values() if not req.future.done())
|
|
388
|
+
logger.debug("Resolved {} partial results, {} pending", resolved, pending)
|
|
389
|
+
|
|
390
|
+
return is_incomplete
|
|
391
|
+
|
|
392
|
+
except httpx.HTTPStatusError as e:
|
|
393
|
+
if e.response.status_code == 404:
|
|
394
|
+
# File not ready yet, this is normal for early polling
|
|
395
|
+
return True
|
|
396
|
+
logger.debug("HTTP error fetching partial results: {}", e)
|
|
397
|
+
return True
|
|
398
|
+
except Exception as e:
|
|
399
|
+
logger.debug("Error fetching partial results: {}", e)
|
|
400
|
+
return True
|
|
401
|
+
|
|
402
|
+
async def _process_completed_batch(
|
|
403
|
+
self, batch: _ActiveBatch, output_file_id: str | None
|
|
404
|
+
) -> None:
|
|
405
|
+
"""Fetch any remaining results and ensure all futures are resolved."""
|
|
406
|
+
if not output_file_id:
|
|
407
|
+
logger.error("Batch {} completed but no output file", batch.batch_id)
|
|
408
|
+
error = Exception(f"Batch {batch.batch_id} completed but no output file")
|
|
409
|
+
for req in batch.requests.values():
|
|
410
|
+
if not req.future.done():
|
|
411
|
+
req.future.set_exception(error)
|
|
412
|
+
return
|
|
413
|
+
|
|
414
|
+
try:
|
|
415
|
+
# Fetch any remaining results using the partial results mechanism
|
|
416
|
+
# This continues from where we left off (using batch.last_offset)
|
|
417
|
+
await self._fetch_partial_results(batch, output_file_id)
|
|
418
|
+
|
|
419
|
+
# Handle any requests that didn't get results
|
|
420
|
+
for req in batch.requests.values():
|
|
421
|
+
if not req.future.done():
|
|
422
|
+
logger.warning("No result for request {}", req.custom_id)
|
|
423
|
+
req.future.set_exception(
|
|
424
|
+
Exception(f"No result for request {req.custom_id}")
|
|
425
|
+
)
|
|
426
|
+
|
|
427
|
+
except Exception as e:
|
|
428
|
+
logger.error("Error processing batch results: {}", e)
|
|
429
|
+
for req in batch.requests.values():
|
|
430
|
+
if not req.future.done():
|
|
431
|
+
req.future.set_exception(e)
|
|
432
|
+
|
|
433
|
+
async def close(self) -> None:
|
|
434
|
+
"""Close the client and cancel any pending operations."""
|
|
435
|
+
if self._window_task and not self._window_task.done():
|
|
436
|
+
self._window_task.cancel()
|
|
437
|
+
if self._poller_task and not self._poller_task.done():
|
|
438
|
+
self._poller_task.cancel()
|
|
439
|
+
await self._http_client.aclose()
|
|
440
|
+
await self._openai.close()
|
|
441
|
+
|
|
442
|
+
async def __aenter__(self) -> BatchOpenAI:
|
|
443
|
+
return self
|
|
444
|
+
|
|
445
|
+
async def __aexit__(self, *args: Any) -> None:
|
|
446
|
+
await self.close()
|
|
File without changes
|