llm_batch_helper 0.2.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {llm_batch_helper-0.2.0 → llm_batch_helper-0.3.1}/PKG-INFO +147 -63
- {llm_batch_helper-0.2.0 → llm_batch_helper-0.3.1}/README.md +145 -61
- {llm_batch_helper-0.2.0 → llm_batch_helper-0.3.1}/llm_batch_helper/__init__.py +3 -2
- {llm_batch_helper-0.2.0 → llm_batch_helper-0.3.1}/llm_batch_helper/config.py +3 -3
- {llm_batch_helper-0.2.0 → llm_batch_helper-0.3.1}/llm_batch_helper/providers.py +115 -4
- {llm_batch_helper-0.2.0 → llm_batch_helper-0.3.1}/pyproject.toml +2 -2
- {llm_batch_helper-0.2.0 → llm_batch_helper-0.3.1}/LICENSE +0 -0
- {llm_batch_helper-0.2.0 → llm_batch_helper-0.3.1}/llm_batch_helper/cache.py +0 -0
- {llm_batch_helper-0.2.0 → llm_batch_helper-0.3.1}/llm_batch_helper/exceptions.py +0 -0
- {llm_batch_helper-0.2.0 → llm_batch_helper-0.3.1}/llm_batch_helper/input_handlers.py +0 -0
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: llm_batch_helper
|
3
|
-
Version: 0.
|
4
|
-
Summary: A Python package that enables batch submission of prompts to LLM APIs, with built-in async capabilities
|
3
|
+
Version: 0.3.1
|
4
|
+
Summary: A Python package that enables batch submission of prompts to LLM APIs, with simplified interface and built-in async capabilities handled implicitly.
|
5
5
|
License: MIT
|
6
6
|
Keywords: llm,openai,together,openrouter,batch,async,ai,nlp,api
|
7
7
|
Author: Tianyi Peng
|
@@ -56,10 +56,12 @@ This package is designed to solve these exact pain points with async processing,
|
|
56
56
|
- **Async Processing**: Submit multiple prompts concurrently for faster processing
|
57
57
|
- **Response Caching**: Automatically cache responses to avoid redundant API calls
|
58
58
|
- **Multiple Input Formats**: Support for both file-based and list-based prompts
|
59
|
-
- **Provider Support**: Works with OpenAI and Together.ai APIs
|
60
|
-
- **Retry Logic**: Built-in retry mechanism with exponential backoff
|
61
|
-
- **Verification Callbacks**: Custom verification for response quality
|
59
|
+
- **Provider Support**: Works with OpenAI (all models including GPT-5), OpenRouter (100+ models), and Together.ai APIs
|
60
|
+
- **Retry Logic**: Built-in retry mechanism with exponential backoff and detailed logging
|
61
|
+
- **Verification Callbacks**: Custom verification for response quality
|
62
62
|
- **Progress Tracking**: Real-time progress bars for batch operations
|
63
|
+
- **Simplified API**: Async operations handled implicitly - no async/await needed (v0.3.0+)
|
64
|
+
- **Detailed Error Logging**: See exactly what happens during retries with timestamps and error details
|
63
65
|
|
64
66
|
## Installation
|
65
67
|
|
@@ -90,9 +92,12 @@ poetry shell
|
|
90
92
|
|
91
93
|
**Option A: Environment Variables**
|
92
94
|
```bash
|
93
|
-
# For OpenAI
|
95
|
+
# For OpenAI (all models including GPT-5)
|
94
96
|
export OPENAI_API_KEY="your-openai-api-key"
|
95
97
|
|
98
|
+
# For OpenRouter (100+ models - Recommended)
|
99
|
+
export OPENROUTER_API_KEY="your-openrouter-api-key"
|
100
|
+
|
96
101
|
# For Together.ai
|
97
102
|
export TOGETHER_API_KEY="your-together-api-key"
|
98
103
|
```
|
@@ -122,71 +127,111 @@ The tutorial covers all features with interactive examples!
|
|
122
127
|
### 3. Basic usage
|
123
128
|
|
124
129
|
```python
|
125
|
-
import asyncio
|
126
130
|
from dotenv import load_dotenv # Optional: for .env file support
|
127
131
|
from llm_batch_helper import LLMConfig, process_prompts_batch
|
128
132
|
|
129
133
|
# Optional: Load environment variables from .env file
|
130
134
|
load_dotenv()
|
131
135
|
|
136
|
+
# Create configuration
|
137
|
+
config = LLMConfig(
|
138
|
+
model_name="gpt-4o-mini",
|
139
|
+
temperature=1.0,
|
140
|
+
max_completion_tokens=100,
|
141
|
+
max_concurrent_requests=30 # number of concurrent requests with asyncIO
|
142
|
+
)
|
143
|
+
|
144
|
+
# Process prompts - no async/await needed!
|
145
|
+
prompts = [
|
146
|
+
"What is the capital of France?",
|
147
|
+
"What is 2+2?",
|
148
|
+
"Who wrote 'Hamlet'?"
|
149
|
+
]
|
150
|
+
|
151
|
+
results = process_prompts_batch(
|
152
|
+
config=config,
|
153
|
+
provider="openai",
|
154
|
+
prompts=prompts,
|
155
|
+
cache_dir="cache"
|
156
|
+
)
|
157
|
+
|
158
|
+
# Print results
|
159
|
+
for prompt_id, response in results.items():
|
160
|
+
print(f"{prompt_id}: {response['response_text']}")
|
161
|
+
```
|
162
|
+
|
163
|
+
**🎉 New in v0.3.0**: `process_prompts_batch` now handles async operations **implicitly** - no more async/await syntax needed! Works seamlessly in Jupyter notebooks.
|
164
|
+
|
165
|
+
### 🔄 Backward Compatibility
|
166
|
+
|
167
|
+
For users who prefer the async version or have existing code, the async API is still available:
|
168
|
+
|
169
|
+
```python
|
170
|
+
import asyncio
|
171
|
+
from llm_batch_helper import process_prompts_batch_async
|
172
|
+
|
132
173
|
async def main():
|
133
|
-
|
134
|
-
|
135
|
-
model_name="gpt-4o-mini",
|
136
|
-
temperature=0.7,
|
137
|
-
max_completion_tokens=100, # or use max_tokens for backward compatibility
|
138
|
-
max_concurrent_requests=30 # number of concurrent requests with asyncIO
|
139
|
-
)
|
140
|
-
|
141
|
-
# Process prompts
|
142
|
-
prompts = [
|
143
|
-
"What is the capital of France?",
|
144
|
-
"What is 2+2?",
|
145
|
-
"Who wrote 'Hamlet'?"
|
146
|
-
]
|
147
|
-
|
148
|
-
results = await process_prompts_batch(
|
174
|
+
results = await process_prompts_batch_async(
|
175
|
+
prompts=["Hello world!"],
|
149
176
|
config=config,
|
150
|
-
provider="openai"
|
151
|
-
prompts=prompts,
|
152
|
-
cache_dir="cache"
|
177
|
+
provider="openai"
|
153
178
|
)
|
154
|
-
|
155
|
-
# Print results
|
156
|
-
for prompt_id, response in results.items():
|
157
|
-
print(f"{prompt_id}: {response['response_text']}")
|
179
|
+
return results
|
158
180
|
|
159
|
-
|
160
|
-
asyncio.run(main())
|
181
|
+
results = asyncio.run(main())
|
161
182
|
```
|
162
183
|
|
163
184
|
## Usage Examples
|
164
185
|
|
186
|
+
### OpenRouter (Recommended - 100+ Models)
|
187
|
+
|
188
|
+
```python
|
189
|
+
from llm_batch_helper import LLMConfig, process_prompts_batch
|
190
|
+
|
191
|
+
# Access 100+ models through OpenRouter
|
192
|
+
config = LLMConfig(
|
193
|
+
model_name="deepseek/deepseek-v3.1-base", # or openai/gpt-4o, anthropic/claude-3-5-sonnet
|
194
|
+
temperature=1.0,
|
195
|
+
max_completion_tokens=500
|
196
|
+
)
|
197
|
+
|
198
|
+
prompts = [
|
199
|
+
"Explain quantum computing briefly.",
|
200
|
+
"What are the benefits of renewable energy?",
|
201
|
+
"How does machine learning work?"
|
202
|
+
]
|
203
|
+
|
204
|
+
results = process_prompts_batch(
|
205
|
+
prompts=prompts,
|
206
|
+
config=config,
|
207
|
+
provider="openrouter" # Access to 100+ models!
|
208
|
+
)
|
209
|
+
|
210
|
+
for prompt_id, result in results.items():
|
211
|
+
print(f"Response: {result['response_text']}")
|
212
|
+
```
|
213
|
+
|
165
214
|
### File-based Prompts
|
166
215
|
|
167
216
|
```python
|
168
|
-
import asyncio
|
169
217
|
from llm_batch_helper import LLMConfig, process_prompts_batch
|
170
218
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
)
|
186
|
-
|
187
|
-
return results
|
219
|
+
config = LLMConfig(
|
220
|
+
model_name="gpt-4o-mini",
|
221
|
+
temperature=1.0,
|
222
|
+
max_completion_tokens=200
|
223
|
+
)
|
224
|
+
|
225
|
+
# Process all .txt files in a directory
|
226
|
+
results = process_prompts_batch(
|
227
|
+
config=config,
|
228
|
+
provider="openai",
|
229
|
+
input_dir="prompts", # Directory containing .txt files
|
230
|
+
cache_dir="cache",
|
231
|
+
force=False # Use cached responses if available
|
232
|
+
)
|
188
233
|
|
189
|
-
|
234
|
+
print(f"Processed {len(results)} prompts from files")
|
190
235
|
```
|
191
236
|
|
192
237
|
### Custom Verification
|
@@ -210,7 +255,7 @@ def verify_response(prompt_id, llm_response_data, original_prompt_text, **kwargs
|
|
210
255
|
|
211
256
|
config = LLMConfig(
|
212
257
|
model_name="gpt-4o-mini",
|
213
|
-
temperature=0
|
258
|
+
temperature=1.0,
|
214
259
|
verification_callback=verify_response,
|
215
260
|
verification_callback_args={"min_length": 20}
|
216
261
|
)
|
@@ -227,12 +272,12 @@ Configuration class for LLM requests.
|
|
227
272
|
```python
|
228
273
|
LLMConfig(
|
229
274
|
model_name: str,
|
230
|
-
temperature: float = 0
|
275
|
+
temperature: float = 1.0,
|
231
276
|
max_completion_tokens: Optional[int] = None, # Preferred parameter
|
232
277
|
max_tokens: Optional[int] = None, # Deprecated, kept for backward compatibility
|
233
278
|
system_instruction: Optional[str] = None,
|
234
|
-
max_retries: int =
|
235
|
-
max_concurrent_requests: int =
|
279
|
+
max_retries: int = 5,
|
280
|
+
max_concurrent_requests: int = 30,
|
236
281
|
verification_callback: Optional[Callable] = None,
|
237
282
|
verification_callback_args: Optional[Dict] = None
|
238
283
|
)
|
@@ -240,12 +285,28 @@ LLMConfig(
|
|
240
285
|
|
241
286
|
### process_prompts_batch
|
242
287
|
|
243
|
-
Main function for batch processing of prompts.
|
288
|
+
Main function for batch processing of prompts (async operations handled implicitly).
|
244
289
|
|
245
290
|
```python
|
246
|
-
|
291
|
+
def process_prompts_batch(
|
247
292
|
config: LLMConfig,
|
248
|
-
provider: str, # "openai", "
|
293
|
+
provider: str, # "openai", "openrouter" (recommended), or "together"
|
294
|
+
prompts: Optional[List[str]] = None,
|
295
|
+
input_dir: Optional[str] = None,
|
296
|
+
cache_dir: str = "llm_cache",
|
297
|
+
force: bool = False,
|
298
|
+
desc: str = "Processing prompts"
|
299
|
+
) -> Dict[str, Dict[str, Any]]
|
300
|
+
```
|
301
|
+
|
302
|
+
### process_prompts_batch_async
|
303
|
+
|
304
|
+
Async version for backward compatibility and advanced use cases.
|
305
|
+
|
306
|
+
```python
|
307
|
+
async def process_prompts_batch_async(
|
308
|
+
config: LLMConfig,
|
309
|
+
provider: str, # "openai", "openrouter" (recommended), or "together"
|
249
310
|
prompts: Optional[List[str]] = None,
|
250
311
|
input_dir: Optional[str] = None,
|
251
312
|
cache_dir: str = "llm_cache",
|
@@ -297,10 +358,15 @@ llm_batch_helper/
|
|
297
358
|
## Supported Models
|
298
359
|
|
299
360
|
### OpenAI
|
300
|
-
-
|
301
|
-
|
302
|
-
-
|
303
|
-
- gpt-
|
361
|
+
- **All OpenAI models**
|
362
|
+
|
363
|
+
### OpenRouter (Recommended - 100+ Models)
|
364
|
+
- **OpenAI models**: `openai/gpt-4o`, `openai/gpt-4o-mini`
|
365
|
+
- **Anthropic models**: `anthropic/claude-3-5-sonnet`, `anthropic/claude-3-haiku`
|
366
|
+
- **DeepSeek models**: `deepseek/deepseek-v3.1-base`, `deepseek/deepseek-chat`
|
367
|
+
- **Meta models**: `meta-llama/llama-3.1-405b-instruct`
|
368
|
+
- **Google models**: `google/gemini-pro-1.5`
|
369
|
+
- **And 90+ more models** from all major providers
|
304
370
|
|
305
371
|
### Together.ai
|
306
372
|
- meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
@@ -317,7 +383,7 @@ llm_batch_helper/
|
|
317
383
|
- [API Reference](https://llm-batch-helper.readthedocs.io/en/latest/api.html) - Complete API documentation
|
318
384
|
- [Examples](https://llm-batch-helper.readthedocs.io/en/latest/examples.html) - Practical usage examples
|
319
385
|
- [Tutorials](https://llm-batch-helper.readthedocs.io/en/latest/tutorials.html) - Step-by-step tutorials
|
320
|
-
- [Provider Guide](https://llm-batch-helper.readthedocs.io/en/latest/providers.html) - OpenAI & Together.ai setup
|
386
|
+
- [Provider Guide](https://llm-batch-helper.readthedocs.io/en/latest/providers.html) - OpenAI, OpenRouter & Together.ai setup
|
321
387
|
|
322
388
|
## Contributing
|
323
389
|
|
@@ -334,6 +400,24 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
334
400
|
|
335
401
|
## Changelog
|
336
402
|
|
403
|
+
### v0.3.1
|
404
|
+
- **🔧 Configuration Updates**: Optimized default values for better performance
|
405
|
+
- Updated `max_retries` from 10 to 5 for faster failure detection
|
406
|
+
- Updated `max_concurrent_requests` from 5 to 30 for improved batch processing performance
|
407
|
+
|
408
|
+
### v0.3.0
|
409
|
+
- **🎉 Major Update**: Simplified API - async operations handled implicitly, no async/await required!
|
410
|
+
- **📓 Jupyter Support**: Works seamlessly in notebooks without event loop issues
|
411
|
+
- **🔍 Detailed Retry Logging**: See exactly what happens during retries with timestamps
|
412
|
+
- **🔄 Backward Compatibility**: Original async API still available as `process_prompts_batch_async`
|
413
|
+
- **📚 Updated Examples**: All documentation updated to show simplified usage
|
414
|
+
- **⚡ Smart Event Loop Handling**: Automatically detects and handles different Python environments
|
415
|
+
|
416
|
+
### v0.2.0
|
417
|
+
- Enhanced API stability
|
418
|
+
- Improved error handling
|
419
|
+
- Better documentation
|
420
|
+
|
337
421
|
### v0.1.5
|
338
422
|
- Added Together.ai provider support
|
339
423
|
- Support for open-source models (Llama, Mixtral, etc.)
|
@@ -29,10 +29,12 @@ This package is designed to solve these exact pain points with async processing,
|
|
29
29
|
- **Async Processing**: Submit multiple prompts concurrently for faster processing
|
30
30
|
- **Response Caching**: Automatically cache responses to avoid redundant API calls
|
31
31
|
- **Multiple Input Formats**: Support for both file-based and list-based prompts
|
32
|
-
- **Provider Support**: Works with OpenAI and Together.ai APIs
|
33
|
-
- **Retry Logic**: Built-in retry mechanism with exponential backoff
|
34
|
-
- **Verification Callbacks**: Custom verification for response quality
|
32
|
+
- **Provider Support**: Works with OpenAI (all models including GPT-5), OpenRouter (100+ models), and Together.ai APIs
|
33
|
+
- **Retry Logic**: Built-in retry mechanism with exponential backoff and detailed logging
|
34
|
+
- **Verification Callbacks**: Custom verification for response quality
|
35
35
|
- **Progress Tracking**: Real-time progress bars for batch operations
|
36
|
+
- **Simplified API**: Async operations handled implicitly - no async/await needed (v0.3.0+)
|
37
|
+
- **Detailed Error Logging**: See exactly what happens during retries with timestamps and error details
|
36
38
|
|
37
39
|
## Installation
|
38
40
|
|
@@ -63,9 +65,12 @@ poetry shell
|
|
63
65
|
|
64
66
|
**Option A: Environment Variables**
|
65
67
|
```bash
|
66
|
-
# For OpenAI
|
68
|
+
# For OpenAI (all models including GPT-5)
|
67
69
|
export OPENAI_API_KEY="your-openai-api-key"
|
68
70
|
|
71
|
+
# For OpenRouter (100+ models - Recommended)
|
72
|
+
export OPENROUTER_API_KEY="your-openrouter-api-key"
|
73
|
+
|
69
74
|
# For Together.ai
|
70
75
|
export TOGETHER_API_KEY="your-together-api-key"
|
71
76
|
```
|
@@ -95,71 +100,111 @@ The tutorial covers all features with interactive examples!
|
|
95
100
|
### 3. Basic usage
|
96
101
|
|
97
102
|
```python
|
98
|
-
import asyncio
|
99
103
|
from dotenv import load_dotenv # Optional: for .env file support
|
100
104
|
from llm_batch_helper import LLMConfig, process_prompts_batch
|
101
105
|
|
102
106
|
# Optional: Load environment variables from .env file
|
103
107
|
load_dotenv()
|
104
108
|
|
109
|
+
# Create configuration
|
110
|
+
config = LLMConfig(
|
111
|
+
model_name="gpt-4o-mini",
|
112
|
+
temperature=1.0,
|
113
|
+
max_completion_tokens=100,
|
114
|
+
max_concurrent_requests=30 # number of concurrent requests with asyncIO
|
115
|
+
)
|
116
|
+
|
117
|
+
# Process prompts - no async/await needed!
|
118
|
+
prompts = [
|
119
|
+
"What is the capital of France?",
|
120
|
+
"What is 2+2?",
|
121
|
+
"Who wrote 'Hamlet'?"
|
122
|
+
]
|
123
|
+
|
124
|
+
results = process_prompts_batch(
|
125
|
+
config=config,
|
126
|
+
provider="openai",
|
127
|
+
prompts=prompts,
|
128
|
+
cache_dir="cache"
|
129
|
+
)
|
130
|
+
|
131
|
+
# Print results
|
132
|
+
for prompt_id, response in results.items():
|
133
|
+
print(f"{prompt_id}: {response['response_text']}")
|
134
|
+
```
|
135
|
+
|
136
|
+
**🎉 New in v0.3.0**: `process_prompts_batch` now handles async operations **implicitly** - no more async/await syntax needed! Works seamlessly in Jupyter notebooks.
|
137
|
+
|
138
|
+
### 🔄 Backward Compatibility
|
139
|
+
|
140
|
+
For users who prefer the async version or have existing code, the async API is still available:
|
141
|
+
|
142
|
+
```python
|
143
|
+
import asyncio
|
144
|
+
from llm_batch_helper import process_prompts_batch_async
|
145
|
+
|
105
146
|
async def main():
|
106
|
-
|
107
|
-
|
108
|
-
model_name="gpt-4o-mini",
|
109
|
-
temperature=0.7,
|
110
|
-
max_completion_tokens=100, # or use max_tokens for backward compatibility
|
111
|
-
max_concurrent_requests=30 # number of concurrent requests with asyncIO
|
112
|
-
)
|
113
|
-
|
114
|
-
# Process prompts
|
115
|
-
prompts = [
|
116
|
-
"What is the capital of France?",
|
117
|
-
"What is 2+2?",
|
118
|
-
"Who wrote 'Hamlet'?"
|
119
|
-
]
|
120
|
-
|
121
|
-
results = await process_prompts_batch(
|
147
|
+
results = await process_prompts_batch_async(
|
148
|
+
prompts=["Hello world!"],
|
122
149
|
config=config,
|
123
|
-
provider="openai"
|
124
|
-
prompts=prompts,
|
125
|
-
cache_dir="cache"
|
150
|
+
provider="openai"
|
126
151
|
)
|
127
|
-
|
128
|
-
# Print results
|
129
|
-
for prompt_id, response in results.items():
|
130
|
-
print(f"{prompt_id}: {response['response_text']}")
|
152
|
+
return results
|
131
153
|
|
132
|
-
|
133
|
-
asyncio.run(main())
|
154
|
+
results = asyncio.run(main())
|
134
155
|
```
|
135
156
|
|
136
157
|
## Usage Examples
|
137
158
|
|
159
|
+
### OpenRouter (Recommended - 100+ Models)
|
160
|
+
|
161
|
+
```python
|
162
|
+
from llm_batch_helper import LLMConfig, process_prompts_batch
|
163
|
+
|
164
|
+
# Access 100+ models through OpenRouter
|
165
|
+
config = LLMConfig(
|
166
|
+
model_name="deepseek/deepseek-v3.1-base", # or openai/gpt-4o, anthropic/claude-3-5-sonnet
|
167
|
+
temperature=1.0,
|
168
|
+
max_completion_tokens=500
|
169
|
+
)
|
170
|
+
|
171
|
+
prompts = [
|
172
|
+
"Explain quantum computing briefly.",
|
173
|
+
"What are the benefits of renewable energy?",
|
174
|
+
"How does machine learning work?"
|
175
|
+
]
|
176
|
+
|
177
|
+
results = process_prompts_batch(
|
178
|
+
prompts=prompts,
|
179
|
+
config=config,
|
180
|
+
provider="openrouter" # Access to 100+ models!
|
181
|
+
)
|
182
|
+
|
183
|
+
for prompt_id, result in results.items():
|
184
|
+
print(f"Response: {result['response_text']}")
|
185
|
+
```
|
186
|
+
|
138
187
|
### File-based Prompts
|
139
188
|
|
140
189
|
```python
|
141
|
-
import asyncio
|
142
190
|
from llm_batch_helper import LLMConfig, process_prompts_batch
|
143
191
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
)
|
159
|
-
|
160
|
-
return results
|
192
|
+
config = LLMConfig(
|
193
|
+
model_name="gpt-4o-mini",
|
194
|
+
temperature=1.0,
|
195
|
+
max_completion_tokens=200
|
196
|
+
)
|
197
|
+
|
198
|
+
# Process all .txt files in a directory
|
199
|
+
results = process_prompts_batch(
|
200
|
+
config=config,
|
201
|
+
provider="openai",
|
202
|
+
input_dir="prompts", # Directory containing .txt files
|
203
|
+
cache_dir="cache",
|
204
|
+
force=False # Use cached responses if available
|
205
|
+
)
|
161
206
|
|
162
|
-
|
207
|
+
print(f"Processed {len(results)} prompts from files")
|
163
208
|
```
|
164
209
|
|
165
210
|
### Custom Verification
|
@@ -183,7 +228,7 @@ def verify_response(prompt_id, llm_response_data, original_prompt_text, **kwargs
|
|
183
228
|
|
184
229
|
config = LLMConfig(
|
185
230
|
model_name="gpt-4o-mini",
|
186
|
-
temperature=0
|
231
|
+
temperature=1.0,
|
187
232
|
verification_callback=verify_response,
|
188
233
|
verification_callback_args={"min_length": 20}
|
189
234
|
)
|
@@ -200,12 +245,12 @@ Configuration class for LLM requests.
|
|
200
245
|
```python
|
201
246
|
LLMConfig(
|
202
247
|
model_name: str,
|
203
|
-
temperature: float = 0
|
248
|
+
temperature: float = 1.0,
|
204
249
|
max_completion_tokens: Optional[int] = None, # Preferred parameter
|
205
250
|
max_tokens: Optional[int] = None, # Deprecated, kept for backward compatibility
|
206
251
|
system_instruction: Optional[str] = None,
|
207
|
-
max_retries: int =
|
208
|
-
max_concurrent_requests: int =
|
252
|
+
max_retries: int = 5,
|
253
|
+
max_concurrent_requests: int = 30,
|
209
254
|
verification_callback: Optional[Callable] = None,
|
210
255
|
verification_callback_args: Optional[Dict] = None
|
211
256
|
)
|
@@ -213,12 +258,28 @@ LLMConfig(
|
|
213
258
|
|
214
259
|
### process_prompts_batch
|
215
260
|
|
216
|
-
Main function for batch processing of prompts.
|
261
|
+
Main function for batch processing of prompts (async operations handled implicitly).
|
217
262
|
|
218
263
|
```python
|
219
|
-
|
264
|
+
def process_prompts_batch(
|
220
265
|
config: LLMConfig,
|
221
|
-
provider: str, # "openai", "
|
266
|
+
provider: str, # "openai", "openrouter" (recommended), or "together"
|
267
|
+
prompts: Optional[List[str]] = None,
|
268
|
+
input_dir: Optional[str] = None,
|
269
|
+
cache_dir: str = "llm_cache",
|
270
|
+
force: bool = False,
|
271
|
+
desc: str = "Processing prompts"
|
272
|
+
) -> Dict[str, Dict[str, Any]]
|
273
|
+
```
|
274
|
+
|
275
|
+
### process_prompts_batch_async
|
276
|
+
|
277
|
+
Async version for backward compatibility and advanced use cases.
|
278
|
+
|
279
|
+
```python
|
280
|
+
async def process_prompts_batch_async(
|
281
|
+
config: LLMConfig,
|
282
|
+
provider: str, # "openai", "openrouter" (recommended), or "together"
|
222
283
|
prompts: Optional[List[str]] = None,
|
223
284
|
input_dir: Optional[str] = None,
|
224
285
|
cache_dir: str = "llm_cache",
|
@@ -270,10 +331,15 @@ llm_batch_helper/
|
|
270
331
|
## Supported Models
|
271
332
|
|
272
333
|
### OpenAI
|
273
|
-
-
|
274
|
-
|
275
|
-
-
|
276
|
-
- gpt-
|
334
|
+
- **All OpenAI models**
|
335
|
+
|
336
|
+
### OpenRouter (Recommended - 100+ Models)
|
337
|
+
- **OpenAI models**: `openai/gpt-4o`, `openai/gpt-4o-mini`
|
338
|
+
- **Anthropic models**: `anthropic/claude-3-5-sonnet`, `anthropic/claude-3-haiku`
|
339
|
+
- **DeepSeek models**: `deepseek/deepseek-v3.1-base`, `deepseek/deepseek-chat`
|
340
|
+
- **Meta models**: `meta-llama/llama-3.1-405b-instruct`
|
341
|
+
- **Google models**: `google/gemini-pro-1.5`
|
342
|
+
- **And 90+ more models** from all major providers
|
277
343
|
|
278
344
|
### Together.ai
|
279
345
|
- meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo
|
@@ -290,7 +356,7 @@ llm_batch_helper/
|
|
290
356
|
- [API Reference](https://llm-batch-helper.readthedocs.io/en/latest/api.html) - Complete API documentation
|
291
357
|
- [Examples](https://llm-batch-helper.readthedocs.io/en/latest/examples.html) - Practical usage examples
|
292
358
|
- [Tutorials](https://llm-batch-helper.readthedocs.io/en/latest/tutorials.html) - Step-by-step tutorials
|
293
|
-
- [Provider Guide](https://llm-batch-helper.readthedocs.io/en/latest/providers.html) - OpenAI & Together.ai setup
|
359
|
+
- [Provider Guide](https://llm-batch-helper.readthedocs.io/en/latest/providers.html) - OpenAI, OpenRouter & Together.ai setup
|
294
360
|
|
295
361
|
## Contributing
|
296
362
|
|
@@ -307,6 +373,24 @@ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file
|
|
307
373
|
|
308
374
|
## Changelog
|
309
375
|
|
376
|
+
### v0.3.1
|
377
|
+
- **🔧 Configuration Updates**: Optimized default values for better performance
|
378
|
+
- Updated `max_retries` from 10 to 5 for faster failure detection
|
379
|
+
- Updated `max_concurrent_requests` from 5 to 30 for improved batch processing performance
|
380
|
+
|
381
|
+
### v0.3.0
|
382
|
+
- **🎉 Major Update**: Simplified API - async operations handled implicitly, no async/await required!
|
383
|
+
- **📓 Jupyter Support**: Works seamlessly in notebooks without event loop issues
|
384
|
+
- **🔍 Detailed Retry Logging**: See exactly what happens during retries with timestamps
|
385
|
+
- **🔄 Backward Compatibility**: Original async API still available as `process_prompts_batch_async`
|
386
|
+
- **📚 Updated Examples**: All documentation updated to show simplified usage
|
387
|
+
- **⚡ Smart Event Loop Handling**: Automatically detects and handles different Python environments
|
388
|
+
|
389
|
+
### v0.2.0
|
390
|
+
- Enhanced API stability
|
391
|
+
- Improved error handling
|
392
|
+
- Better documentation
|
393
|
+
|
310
394
|
### v0.1.5
|
311
395
|
- Added Together.ai provider support
|
312
396
|
- Support for open-source models (Llama, Mixtral, etc.)
|
@@ -1,15 +1,16 @@
|
|
1
1
|
from .cache import LLMCache
|
2
2
|
from .config import LLMConfig
|
3
3
|
from .input_handlers import get_prompts, read_prompt_files, read_prompt_list
|
4
|
-
from .providers import process_prompts_batch
|
4
|
+
from .providers import process_prompts_batch, process_prompts_batch_async
|
5
5
|
|
6
|
-
__version__ = "0.
|
6
|
+
__version__ = "0.3.1"
|
7
7
|
|
8
8
|
__all__ = [
|
9
9
|
"LLMCache",
|
10
10
|
"LLMConfig",
|
11
11
|
"get_prompts",
|
12
12
|
"process_prompts_batch",
|
13
|
+
"process_prompts_batch_async", # For backward compatibility
|
13
14
|
"read_prompt_files",
|
14
15
|
"read_prompt_list",
|
15
16
|
]
|
@@ -8,11 +8,11 @@ class LLMConfig:
|
|
8
8
|
def __init__(
|
9
9
|
self,
|
10
10
|
model_name: str,
|
11
|
-
temperature: float = 0
|
11
|
+
temperature: float = 1.0,
|
12
12
|
max_tokens: Optional[int] = None,
|
13
13
|
system_instruction: Optional[str] = None,
|
14
|
-
max_retries: int =
|
15
|
-
max_concurrent_requests: int =
|
14
|
+
max_retries: int = 5, # Max retries for the combined LLM call + Verification
|
15
|
+
max_concurrent_requests: int = 30,
|
16
16
|
verification_callback: Optional[Callable[..., bool]] = None,
|
17
17
|
verification_callback_args: Optional[Dict] = None,
|
18
18
|
max_completion_tokens: Optional[int] = None,
|
@@ -1,10 +1,12 @@
|
|
1
1
|
import asyncio
|
2
2
|
import os
|
3
3
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
4
|
+
from datetime import datetime
|
5
|
+
import warnings
|
4
6
|
|
5
7
|
import httpx
|
6
8
|
import openai
|
7
|
-
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
|
9
|
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential, before_sleep_log
|
8
10
|
from tqdm.asyncio import tqdm_asyncio
|
9
11
|
|
10
12
|
from .cache import LLMCache
|
@@ -12,6 +14,55 @@ from .config import LLMConfig
|
|
12
14
|
from .input_handlers import get_prompts
|
13
15
|
|
14
16
|
|
17
|
+
def _run_async_function(async_func, *args, **kwargs):
|
18
|
+
"""
|
19
|
+
Run an async function in a way that works in both regular Python and Jupyter notebooks.
|
20
|
+
|
21
|
+
This handles the event loop management properly for different environments.
|
22
|
+
"""
|
23
|
+
try:
|
24
|
+
# Try to get the current event loop
|
25
|
+
loop = asyncio.get_running_loop()
|
26
|
+
# If we're in a running loop (like Jupyter), we need to use nest_asyncio
|
27
|
+
try:
|
28
|
+
import nest_asyncio
|
29
|
+
nest_asyncio.apply()
|
30
|
+
return asyncio.run(async_func(*args, **kwargs))
|
31
|
+
except ImportError:
|
32
|
+
# If nest_asyncio is not available, try to run in the current loop
|
33
|
+
# This is a fallback that might work in some cases
|
34
|
+
import concurrent.futures
|
35
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
36
|
+
future = executor.submit(asyncio.run, async_func(*args, **kwargs))
|
37
|
+
return future.result()
|
38
|
+
except RuntimeError:
|
39
|
+
# No event loop running, we can use asyncio.run directly
|
40
|
+
return asyncio.run(async_func(*args, **kwargs))
|
41
|
+
|
42
|
+
|
43
|
+
def log_retry_attempt(retry_state):
|
44
|
+
"""Custom logging function for retry attempts."""
|
45
|
+
attempt_number = retry_state.attempt_number
|
46
|
+
exception = retry_state.outcome.exception()
|
47
|
+
wait_time = retry_state.next_action.sleep if retry_state.next_action else 0
|
48
|
+
|
49
|
+
error_type = type(exception).__name__
|
50
|
+
error_msg = str(exception)
|
51
|
+
|
52
|
+
# Extract status code if available
|
53
|
+
status_code = "unknown"
|
54
|
+
if hasattr(exception, 'status_code'):
|
55
|
+
status_code = exception.status_code
|
56
|
+
elif hasattr(exception, 'response') and hasattr(exception.response, 'status_code'):
|
57
|
+
status_code = exception.response.status_code
|
58
|
+
|
59
|
+
print(f"🔄 [{datetime.now().strftime('%H:%M:%S')}] Retry attempt {attempt_number}/5:")
|
60
|
+
print(f" Error: {error_type} (status: {status_code})")
|
61
|
+
print(f" Message: {error_msg[:100]}{'...' if len(error_msg) > 100 else ''}")
|
62
|
+
print(f" Waiting {wait_time:.1f}s before next attempt...")
|
63
|
+
print()
|
64
|
+
|
65
|
+
|
15
66
|
@retry(
|
16
67
|
stop=stop_after_attempt(5),
|
17
68
|
wait=wait_exponential(multiplier=1, min=4, max=60),
|
@@ -25,6 +76,7 @@ from .input_handlers import get_prompts
|
|
25
76
|
openai.APIError,
|
26
77
|
)
|
27
78
|
),
|
79
|
+
before_sleep=log_retry_attempt,
|
28
80
|
reraise=True,
|
29
81
|
)
|
30
82
|
async def _get_openai_response_direct(
|
@@ -130,6 +182,7 @@ async def _get_together_response_direct(
|
|
130
182
|
httpx.RequestError,
|
131
183
|
)
|
132
184
|
),
|
185
|
+
before_sleep=log_retry_attempt,
|
133
186
|
reraise=True,
|
134
187
|
)
|
135
188
|
async def _get_openrouter_response_direct(
|
@@ -214,7 +267,7 @@ async def get_llm_response_with_internal_retry(
|
|
214
267
|
}
|
215
268
|
|
216
269
|
|
217
|
-
async def
|
270
|
+
async def process_prompts_batch_async(
|
218
271
|
prompts: Optional[List[Union[str, Tuple[str, str], Dict[str, Any]]]] = None,
|
219
272
|
input_dir: Optional[str] = None,
|
220
273
|
config: LLMConfig = None,
|
@@ -270,6 +323,57 @@ async def process_prompts_batch(
|
|
270
323
|
return results
|
271
324
|
|
272
325
|
|
326
|
+
def process_prompts_batch(
|
327
|
+
prompts: Optional[List[Union[str, Tuple[str, str], Dict[str, Any]]]] = None,
|
328
|
+
input_dir: Optional[str] = None,
|
329
|
+
config: LLMConfig = None,
|
330
|
+
provider: str = "openai",
|
331
|
+
desc: str = "Processing prompts",
|
332
|
+
cache_dir: Optional[str] = None,
|
333
|
+
force: bool = False,
|
334
|
+
) -> Dict[str, Dict[str, Union[str, Dict]]]:
|
335
|
+
"""
|
336
|
+
Process a batch of prompts through the LLM (synchronous version).
|
337
|
+
|
338
|
+
This is the main user-facing function that works in both regular Python scripts
|
339
|
+
and Jupyter notebooks without requiring async/await syntax.
|
340
|
+
|
341
|
+
Args:
|
342
|
+
prompts: Optional list of prompts in any supported format (string, tuple, or dict)
|
343
|
+
input_dir: Optional path to directory containing prompt files
|
344
|
+
config: LLM configuration
|
345
|
+
provider: LLM provider to use ("openai", "together", or "openrouter")
|
346
|
+
desc: Description for progress bar
|
347
|
+
cache_dir: Optional directory for caching responses
|
348
|
+
force: If True, force regeneration even if cached response exists
|
349
|
+
|
350
|
+
Returns:
|
351
|
+
Dict mapping prompt IDs to their responses
|
352
|
+
|
353
|
+
Note:
|
354
|
+
Either prompts or input_dir must be provided, but not both.
|
355
|
+
|
356
|
+
Example:
|
357
|
+
>>> from llm_batch_helper import LLMConfig, process_prompts_batch
|
358
|
+
>>> config = LLMConfig(model_name="gpt-4o-mini")
|
359
|
+
>>> results = process_prompts_batch(
|
360
|
+
... prompts=["What is 2+2?", "What is the capital of France?"],
|
361
|
+
... config=config,
|
362
|
+
... provider="openai"
|
363
|
+
... )
|
364
|
+
"""
|
365
|
+
return _run_async_function(
|
366
|
+
process_prompts_batch_async,
|
367
|
+
prompts=prompts,
|
368
|
+
input_dir=input_dir,
|
369
|
+
config=config,
|
370
|
+
provider=provider,
|
371
|
+
desc=desc,
|
372
|
+
cache_dir=cache_dir,
|
373
|
+
force=force,
|
374
|
+
)
|
375
|
+
|
376
|
+
|
273
377
|
async def _process_single_prompt_attempt_with_verification(
|
274
378
|
prompt_id: str,
|
275
379
|
prompt_text: str,
|
@@ -302,6 +406,9 @@ async def _process_single_prompt_attempt_with_verification(
|
|
302
406
|
# Process the prompt
|
303
407
|
last_exception_details = None
|
304
408
|
for attempt in range(config.max_retries):
|
409
|
+
if attempt > 0:
|
410
|
+
print(f"🔁 [{datetime.now().strftime('%H:%M:%S')}] Application-level retry {attempt+1}/{config.max_retries} for prompt: {prompt_id}")
|
411
|
+
|
305
412
|
try:
|
306
413
|
# Get LLM response
|
307
414
|
llm_response_data = await get_llm_response_with_internal_retry(
|
@@ -309,7 +416,12 @@ async def _process_single_prompt_attempt_with_verification(
|
|
309
416
|
)
|
310
417
|
|
311
418
|
if "error" in llm_response_data:
|
419
|
+
print(f"❌ [{datetime.now().strftime('%H:%M:%S')}] API call failed on attempt {attempt+1}: {llm_response_data.get('error', 'Unknown error')}")
|
312
420
|
last_exception_details = llm_response_data
|
421
|
+
if attempt < config.max_retries - 1:
|
422
|
+
wait_time = min(2 * 2**attempt, 30)
|
423
|
+
print(f" Waiting {wait_time}s before next application retry...")
|
424
|
+
await asyncio.sleep(wait_time)
|
313
425
|
continue
|
314
426
|
|
315
427
|
# Verify response if callback provided
|
@@ -329,7 +441,6 @@ async def _process_single_prompt_attempt_with_verification(
|
|
329
441
|
}
|
330
442
|
if attempt == config.max_retries - 1:
|
331
443
|
return prompt_id, last_exception_details
|
332
|
-
await asyncio.sleep(min(2 * 2**attempt, 30))
|
333
444
|
continue
|
334
445
|
|
335
446
|
# Save to cache if cache_dir provided
|
@@ -346,7 +457,7 @@ async def _process_single_prompt_attempt_with_verification(
|
|
346
457
|
}
|
347
458
|
if attempt == config.max_retries - 1:
|
348
459
|
return prompt_id, last_exception_details
|
349
|
-
|
460
|
+
# Sleep is now handled above with logging
|
350
461
|
continue
|
351
462
|
|
352
463
|
return prompt_id, last_exception_details or {
|
@@ -1,7 +1,7 @@
|
|
1
1
|
[tool.poetry]
|
2
2
|
name = "llm_batch_helper"
|
3
|
-
version = "0.
|
4
|
-
description = "A Python package that enables batch submission of prompts to LLM APIs, with built-in async capabilities
|
3
|
+
version = "0.3.1"
|
4
|
+
description = "A Python package that enables batch submission of prompts to LLM APIs, with simplified interface and built-in async capabilities handled implicitly."
|
5
5
|
authors = ["Tianyi Peng <tianyipeng95@gmail.com>"]
|
6
6
|
readme = "README.md"
|
7
7
|
license = "MIT"
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|