flexllm 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. flexllm/__init__.py +224 -0
  2. flexllm/__main__.py +1096 -0
  3. flexllm/async_api/__init__.py +9 -0
  4. flexllm/async_api/concurrent_call.py +100 -0
  5. flexllm/async_api/concurrent_executor.py +1036 -0
  6. flexllm/async_api/core.py +373 -0
  7. flexllm/async_api/interface.py +12 -0
  8. flexllm/async_api/progress.py +277 -0
  9. flexllm/base_client.py +988 -0
  10. flexllm/batch_tools/__init__.py +16 -0
  11. flexllm/batch_tools/folder_processor.py +317 -0
  12. flexllm/batch_tools/table_processor.py +363 -0
  13. flexllm/cache/__init__.py +10 -0
  14. flexllm/cache/response_cache.py +293 -0
  15. flexllm/chain_of_thought_client.py +1120 -0
  16. flexllm/claudeclient.py +402 -0
  17. flexllm/client_pool.py +698 -0
  18. flexllm/geminiclient.py +563 -0
  19. flexllm/llm_client.py +523 -0
  20. flexllm/llm_parser.py +60 -0
  21. flexllm/mllm_client.py +559 -0
  22. flexllm/msg_processors/__init__.py +174 -0
  23. flexllm/msg_processors/image_processor.py +729 -0
  24. flexllm/msg_processors/image_processor_helper.py +485 -0
  25. flexllm/msg_processors/messages_processor.py +341 -0
  26. flexllm/msg_processors/unified_processor.py +1404 -0
  27. flexllm/openaiclient.py +256 -0
  28. flexllm/pricing/__init__.py +104 -0
  29. flexllm/pricing/data.json +1201 -0
  30. flexllm/pricing/updater.py +223 -0
  31. flexllm/provider_router.py +213 -0
  32. flexllm/token_counter.py +270 -0
  33. flexllm/utils/__init__.py +1 -0
  34. flexllm/utils/core.py +41 -0
  35. flexllm-0.3.3.dist-info/METADATA +573 -0
  36. flexllm-0.3.3.dist-info/RECORD +39 -0
  37. flexllm-0.3.3.dist-info/WHEEL +4 -0
  38. flexllm-0.3.3.dist-info/entry_points.txt +3 -0
  39. flexllm-0.3.3.dist-info/licenses/LICENSE +201 -0
flexllm/utils/core.py ADDED
@@ -0,0 +1,41 @@
1
+ """Core utilities for flexllm"""
2
+
3
+ from functools import wraps
4
+ import asyncio
5
+ import logging
6
+
7
+
8
+ def async_retry(
9
+ retry_times: int = 3,
10
+ retry_delay: float = 1.0,
11
+ exceptions: tuple = (Exception,),
12
+ logger=None,
13
+ ):
14
+ """
15
+ Async retry decorator
16
+
17
+ Args:
18
+ retry_times: Maximum retry count
19
+ retry_delay: Delay between retries (seconds)
20
+ exceptions: Exception types to retry on
21
+ logger: Logger instance
22
+ """
23
+ if logger is None:
24
+ logger = logging.getLogger(__name__)
25
+
26
+ def decorator(func):
27
+ @wraps(func)
28
+ async def wrapper(*args, **kwargs):
29
+ for attempt in range(retry_times):
30
+ try:
31
+ return await func(*args, **kwargs)
32
+ except exceptions as e:
33
+ if attempt == retry_times - 1:
34
+ raise
35
+ logger.warning(f"Attempt {attempt + 1} failed: {str(e)}")
36
+ await asyncio.sleep(retry_delay)
37
+ return await func(*args, **kwargs)
38
+
39
+ return wrapper
40
+
41
+ return decorator
@@ -0,0 +1,573 @@
1
+ Metadata-Version: 2.4
2
+ Name: flexllm
3
+ Version: 0.3.3
4
+ Summary: High-performance LLM client with batch processing, caching, and checkpoint recovery
5
+ Project-URL: Homepage, https://github.com/KenyonY/flexllm
6
+ Project-URL: Repository, https://github.com/KenyonY/flexllm
7
+ Project-URL: Documentation, https://github.com/KenyonY/flexllm#readme
8
+ Project-URL: Issues, https://github.com/KenyonY/flexllm/issues
9
+ Author-email: kunyuan <beidongjiedeguang@gmail.com>
10
+ License: MIT
11
+ License-File: LICENSE
12
+ Keywords: anthropic,async,batch,cache,claude,gemini,llm,multimodal,openai
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Requires-Python: >=3.10
23
+ Requires-Dist: aiohttp>=3.8.0
24
+ Requires-Dist: aiolimiter>=1.1.0
25
+ Requires-Dist: json5>=0.9.0
26
+ Requires-Dist: loguru>=0.6.0
27
+ Requires-Dist: numpy
28
+ Requires-Dist: orjson
29
+ Requires-Dist: pillow
30
+ Requires-Dist: requests>=2.28.0
31
+ Requires-Dist: rich>=12.0.0
32
+ Requires-Dist: tqdm>=4.60.0
33
+ Provides-Extra: all
34
+ Requires-Dist: flaxkv2>=0.1.5; extra == 'all'
35
+ Requires-Dist: google-auth>=2.0.0; extra == 'all'
36
+ Requires-Dist: opencv-python; extra == 'all'
37
+ Requires-Dist: pyyaml>=6.0; extra == 'all'
38
+ Requires-Dist: tiktoken>=0.5.0; extra == 'all'
39
+ Requires-Dist: typer>=0.9.0; extra == 'all'
40
+ Provides-Extra: cache
41
+ Requires-Dist: flaxkv2>=0.1.5; extra == 'cache'
42
+ Provides-Extra: cli
43
+ Requires-Dist: pyyaml>=6.0; extra == 'cli'
44
+ Requires-Dist: typer>=0.9.0; extra == 'cli'
45
+ Provides-Extra: dev
46
+ Requires-Dist: black>=23.0.0; extra == 'dev'
47
+ Requires-Dist: isort>=5.12.0; extra == 'dev'
48
+ Requires-Dist: pytest-asyncio>=0.20.0; extra == 'dev'
49
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
50
+ Provides-Extra: image
51
+ Requires-Dist: opencv-python; extra == 'image'
52
+ Provides-Extra: test
53
+ Requires-Dist: flaxkv2>=0.1.5; extra == 'test'
54
+ Requires-Dist: opencv-python; extra == 'test'
55
+ Requires-Dist: pandas>=1.3.0; extra == 'test'
56
+ Requires-Dist: pytest-asyncio>=0.20.0; extra == 'test'
57
+ Requires-Dist: pytest>=7.0.0; extra == 'test'
58
+ Provides-Extra: token
59
+ Requires-Dist: tiktoken>=0.5.0; extra == 'token'
60
+ Provides-Extra: vertex
61
+ Requires-Dist: google-auth>=2.0.0; extra == 'vertex'
62
+ Description-Content-Type: text/markdown
63
+
64
+ <h1 align="center">flexllm</h1>
65
+
66
+ <p align="center">
67
+ <strong>Production-grade LLM client with checkpoint recovery, response caching, and multi-provider support</strong>
68
+ </p>
69
+
70
+ <p align="center">
71
+ <a href="https://pypi.org/project/flexllm/">
72
+ <img src="https://img.shields.io/pypi/v/flexllm?color=brightgreen&style=flat-square" alt="PyPI version">
73
+ </a>
74
+ <a href="https://github.com/KenyonY/flexllm/blob/main/LICENSE">
75
+ <img alt="License" src="https://img.shields.io/github/license/KenyonY/flexllm.svg?color=blue&style=flat-square">
76
+ </a>
77
+ <a href="https://pypistats.org/packages/flexllm">
78
+ <img alt="pypi downloads" src="https://img.shields.io/pypi/dm/flexllm?style=flat-square">
79
+ </a>
80
+ </p>
81
+
82
+ ---
83
+
84
+ ## Features
85
+
86
+ | Feature | Description |
87
+ |---------|-------------|
88
+ | **Checkpoint Recovery** | Batch jobs auto-resume from interruption - process millions of requests without losing progress |
89
+ | **Response Caching** | Built-in intelligent caching with TTL and IPC multi-process sharing - avoid duplicate API calls |
90
+ | **Multi-Provider** | One interface for OpenAI, Gemini, Claude, and any OpenAI-compatible API (vLLM, Ollama, etc.) |
91
+ | **High-Performance Async** | Fine-grained concurrency control, QPS limiting, and streaming batch results |
92
+ | **Load Balancing** | Multi-endpoint distribution with automatic failover (round_robin/weighted/random/fallback) |
93
+
94
+ ---
95
+
96
+ ## Core Strengths
97
+
98
+ ### 1. Checkpoint Recovery - Never Lose Progress
99
+
100
+ Process millions of requests without fear of interruption. When your batch job crashes at 3 AM, just restart it - flexllm picks up exactly where it left off.
101
+
102
+ ```python
103
+ # Process 100,000 requests - if interrupted, resume automatically
104
+ results = await client.chat_completions_batch(
105
+ messages_list,
106
+ output_jsonl="results.jsonl", # Progress saved here
107
+ )
108
+ # Ctrl+C at 50,000? No problem. Re-run and it continues from 50,001.
109
+ ```
110
+
111
+ ### 2. Response Caching - Save Money, Save Time
112
+
113
+ Built-in intelligent caching avoids duplicate API calls. Same question? Instant answer from cache.
114
+
115
+ ```python
116
+ client = LLMClient(
117
+ model="gpt-4",
118
+ cache=ResponseCacheConfig.with_ttl(3600), # 1 hour cache
119
+ )
120
+
121
+ # First call: API request (~2s, ~$0.01)
122
+ result1 = await client.chat_completions(messages)
123
+
124
+ # Second call: Cache hit (~0.001s, $0)
125
+ result2 = await client.chat_completions(messages)
126
+ ```
127
+
128
+ Supports multi-process cache sharing via IPC - perfect for distributed workloads.
129
+
130
+ ### 3. One Interface, All Providers
131
+
132
+ Write once, run everywhere. Switch between OpenAI, Gemini, Claude, or self-hosted models without changing your code.
133
+
134
+ ```python
135
+ # OpenAI
136
+ client = LLMClient(provider="openai", base_url="https://api.openai.com/v1", ...)
137
+
138
+ # Gemini
139
+ client = LLMClient(provider="gemini", api_key="...", model="gemini-2.0-flash")
140
+
141
+ # Claude
142
+ client = LLMClient(provider="claude", api_key="...", model="claude-sonnet-4-20250514")
143
+
144
+ # Self-hosted (vLLM, Ollama, etc.)
145
+ client = LLMClient(base_url="http://localhost:8000/v1", model="qwen2.5")
146
+
147
+ # Same API for all:
148
+ result = await client.chat_completions(messages)
149
+ ```
150
+
151
+ ### 4. High-Performance Async Engine
152
+
153
+ Maximize throughput with fine-grained concurrency control and QPS limiting.
154
+
155
+ ```python
156
+ client = LLMClient(
157
+ concurrency_limit=100, # 100 concurrent requests
158
+ max_qps=50, # Rate limit: 50 req/sec
159
+ retry_times=3, # Auto-retry on failure
160
+ )
161
+
162
+ # Process 10,000 requests with optimal parallelism
163
+ results = await client.chat_completions_batch(messages_list, show_progress=True)
164
+ ```
165
+
166
+ Streaming results - process results as they complete, don't wait for all:
167
+
168
+ ```python
169
+ async for result in client.iter_chat_completions_batch(messages_list):
170
+ process(result) # Handle each result immediately
171
+ ```
172
+
173
+ ### 5. Load Balancing & Failover
174
+
175
+ Distribute workloads across multiple endpoints with automatic failover.
176
+
177
+ ```python
178
+ pool = LLMClientPool(
179
+ endpoints=[
180
+ {"base_url": "http://gpu1:8000/v1", "model": "qwen"},
181
+ {"base_url": "http://gpu2:8000/v1", "model": "qwen"},
182
+ {"base_url": "http://gpu3:8000/v1", "model": "qwen"},
183
+ ],
184
+ load_balance="round_robin", # or "weighted", "random", "fallback"
185
+ fallback=True, # Auto-switch on failure
186
+ )
187
+
188
+ # Requests automatically distributed across healthy endpoints
189
+ results = await pool.chat_completions_batch(messages_list, distribute=True)
190
+ ```
191
+
192
+ ### 6. Thinking Mode Support
193
+
194
+ Unified interface for reasoning models - DeepSeek-R1, Qwen3, Claude extended thinking, Gemini thinking.
195
+
196
+ ```python
197
+ result = await client.chat_completions(
198
+ messages,
199
+ thinking=True, # Enable thinking
200
+ return_raw=True,
201
+ )
202
+
203
+ # Unified parsing across all providers
204
+ parsed = client.parse_thoughts(result.data)
205
+ print("Thinking:", parsed["thought"])
206
+ print("Answer:", parsed["answer"])
207
+ ```
208
+
209
+ ---
210
+
211
+ ## Installation
212
+
213
+ ```bash
214
+ pip install flexllm
215
+
216
+ # With caching support
217
+ pip install flexllm[cache]
218
+
219
+ # With CLI
220
+ pip install flexllm[cli]
221
+
222
+ # All features
223
+ pip install flexllm[all]
224
+ ```
225
+
226
+ ## Quick Start
227
+
228
+ ### Single Request
229
+
230
+ ```python
231
+ from flexllm import LLMClient
232
+
233
+ client = LLMClient(
234
+ model="gpt-4",
235
+ base_url="https://api.openai.com/v1",
236
+ api_key="your-api-key"
237
+ )
238
+
239
+ # Async
240
+ response = await client.chat_completions([
241
+ {"role": "user", "content": "Hello!"}
242
+ ])
243
+
244
+ # Sync
245
+ response = client.chat_completions_sync([
246
+ {"role": "user", "content": "Hello!"}
247
+ ])
248
+ ```
249
+
250
+ ### Batch Processing with Checkpoint Recovery
251
+
252
+ ```python
253
+ from flexllm import LLMClient
254
+
255
+ client = LLMClient(
256
+ model="gpt-4",
257
+ base_url="https://api.openai.com/v1",
258
+ api_key="your-api-key",
259
+ concurrency_limit=50,
260
+ max_qps=100,
261
+ )
262
+
263
+ messages_list = [
264
+ [{"role": "user", "content": f"Question {i}"}]
265
+ for i in range(10000)
266
+ ]
267
+
268
+ # If interrupted, re-running resumes from where it stopped
269
+ results = await client.chat_completions_batch(
270
+ messages_list,
271
+ output_jsonl="results.jsonl",
272
+ show_progress=True,
273
+ )
274
+ ```
275
+
276
+ ### Response Caching
277
+
278
+ ```python
279
+ from flexllm import LLMClient, ResponseCacheConfig
280
+
281
+ client = LLMClient(
282
+ model="gpt-4",
283
+ base_url="https://api.openai.com/v1",
284
+ api_key="your-api-key",
285
+ cache=ResponseCacheConfig.with_ttl(3600), # 1 hour TTL
286
+ )
287
+
288
+ # Duplicate requests hit cache automatically
289
+ result1 = await client.chat_completions(messages) # API call
290
+ result2 = await client.chat_completions(messages) # Cache hit (instant)
291
+
292
+ # Multi-process cache sharing (IPC mode - default)
293
+ cache = ResponseCacheConfig.ipc(ttl=86400) # 24h, shared across processes
294
+ ```
295
+
296
+ ### Streaming Response
297
+
298
+ ```python
299
+ async for chunk in client.chat_completions_stream(messages):
300
+ print(chunk, end="", flush=True)
301
+ ```
302
+
303
+ ### Multi-Modal (Vision)
304
+
305
+ ```python
306
+ from flexllm import MllmClient
307
+
308
+ client = MllmClient(
309
+ base_url="https://api.openai.com/v1",
310
+ api_key="your-api-key",
311
+ model="gpt-4o",
312
+ )
313
+
314
+ messages = [
315
+ {
316
+ "role": "user",
317
+ "content": [
318
+ {"type": "text", "text": "What's in this image?"},
319
+ {"type": "image_url", "image_url": {"url": "path/to/image.jpg"}}
320
+ ]
321
+ }
322
+ ]
323
+
324
+ response = await client.call_llm([messages])
325
+ ```
326
+
327
+ ### Load Balancing with Failover
328
+
329
+ ```python
330
+ from flexllm import LLMClientPool
331
+
332
+ pool = LLMClientPool(
333
+ endpoints=[
334
+ {"base_url": "http://host1:8000/v1", "api_key": "key1", "model": "qwen"},
335
+ {"base_url": "http://host2:8000/v1", "api_key": "key2", "model": "qwen"},
336
+ ],
337
+ load_balance="round_robin",
338
+ fallback=True,
339
+ )
340
+
341
+ # Single request with automatic failover
342
+ result = await pool.chat_completions(messages)
343
+
344
+ # Batch requests distributed across endpoints
345
+ results = await pool.chat_completions_batch(messages_list, distribute=True)
346
+ ```
347
+
348
+ ### Gemini Client
349
+
350
+ ```python
351
+ from flexllm import GeminiClient
352
+
353
+ # Gemini Developer API
354
+ client = GeminiClient(
355
+ model="gemini-2.0-flash",
356
+ api_key="your-gemini-api-key"
357
+ )
358
+
359
+ # With thinking mode
360
+ response = await client.chat_completions(
361
+ messages,
362
+ thinking="high", # False, True, "minimal", "low", "medium", "high"
363
+ )
364
+
365
+ # Vertex AI mode
366
+ client = GeminiClient(
367
+ model="gemini-2.0-flash",
368
+ project_id="your-project-id",
369
+ location="us-central1",
370
+ use_vertex_ai=True,
371
+ )
372
+ ```
373
+
374
+ ### Claude Client
375
+
376
+ ```python
377
+ from flexllm import LLMClient, ClaudeClient
378
+
379
+ # Using unified LLMClient (recommended)
380
+ client = LLMClient(
381
+ provider="claude",
382
+ api_key="your-anthropic-key",
383
+ model="claude-sonnet-4-20250514",
384
+ )
385
+
386
+ response = await client.chat_completions([
387
+ {"role": "user", "content": "Hello, Claude!"}
388
+ ])
389
+
390
+ # With extended thinking
391
+ result = await client.chat_completions(
392
+ messages,
393
+ thinking=True,
394
+ return_raw=True,
395
+ )
396
+ parsed = client.parse_thoughts(result.data)
397
+ ```
398
+
399
+ ### Function Calling (Tool Use)
400
+
401
+ ```python
402
+ from flexllm import LLMClient
403
+
404
+ client = LLMClient(
405
+ base_url="https://api.openai.com/v1",
406
+ api_key="your-api-key",
407
+ model="gpt-4",
408
+ )
409
+
410
+ tools = [
411
+ {
412
+ "type": "function",
413
+ "function": {
414
+ "name": "get_weather",
415
+ "description": "Get current weather for a location",
416
+ "parameters": {
417
+ "type": "object",
418
+ "properties": {
419
+ "location": {"type": "string", "description": "City name"}
420
+ },
421
+ "required": ["location"]
422
+ }
423
+ }
424
+ }
425
+ ]
426
+
427
+ result = await client.chat_completions(
428
+ messages=[{"role": "user", "content": "What's the weather in Tokyo?"}],
429
+ tools=tools,
430
+ return_usage=True,
431
+ )
432
+
433
+ if result.tool_calls:
434
+ for tool_call in result.tool_calls:
435
+ print(f"Function: {tool_call.function['name']}")
436
+ print(f"Arguments: {tool_call.function['arguments']}")
437
+ ```
438
+
439
+ ## CLI Usage
440
+
441
+ ```bash
442
+ # Quick ask
443
+ flexllm ask "What is Python?"
444
+ flexllm ask "Explain this" -s "You are a code expert"
445
+ echo "long text" | flexllm ask "Summarize"
446
+
447
+ # Interactive chat
448
+ flexllm chat
449
+ flexllm chat --model=gpt-4 "Hello"
450
+
451
+ # Batch processing with checkpoint recovery
452
+ flexllm batch input.jsonl -o output.jsonl
453
+
454
+ # List models
455
+ flexllm models # Remote models
456
+ flexllm list_models # Configured models
457
+
458
+ # Test connection
459
+ flexllm test
460
+
461
+ # Initialize config
462
+ flexllm init
463
+ ```
464
+
465
+ ### CLI Configuration
466
+
467
+ Create `~/.flexllm/config.yaml`:
468
+
469
+ ```yaml
470
+ default: "gpt-4"
471
+
472
+ models:
473
+ - id: gpt-4
474
+ name: gpt-4
475
+ provider: openai
476
+ base_url: https://api.openai.com/v1
477
+ api_key: your-api-key
478
+
479
+ - id: local
480
+ name: local-ollama
481
+ provider: openai
482
+ base_url: http://localhost:11434/v1
483
+ api_key: EMPTY
484
+ ```
485
+
486
+ Or use environment variables:
487
+
488
+ ```bash
489
+ export FLEXLLM_BASE_URL="https://api.openai.com/v1"
490
+ export FLEXLLM_API_KEY="your-key"
491
+ export FLEXLLM_MODEL="gpt-4"
492
+ ```
493
+
494
+ ## API Reference
495
+
496
+ ### LLMClient
497
+
498
+ ```python
499
+ LLMClient(
500
+ provider: str = "auto", # "auto", "openai", "gemini", "claude"
501
+ model: str, # Model name
502
+ base_url: str, # API base URL
503
+ api_key: str = "EMPTY", # API key
504
+ cache: ResponseCacheConfig, # Cache config
505
+ concurrency_limit: int = 10, # Max concurrent requests
506
+ max_qps: float = None, # Max requests per second
507
+ retry_times: int = 3, # Retry count on failure
508
+ retry_delay: float = 1.0, # Delay between retries
509
+ timeout: int = 120, # Request timeout (seconds)
510
+ )
511
+ ```
512
+
513
+ ### Methods
514
+
515
+ | Method | Description |
516
+ |--------|-------------|
517
+ | `chat_completions(messages)` | Single async request |
518
+ | `chat_completions_sync(messages)` | Single sync request |
519
+ | `chat_completions_batch(messages_list)` | Batch async with checkpoint |
520
+ | `chat_completions_batch_sync(messages_list)` | Batch sync with checkpoint |
521
+ | `iter_chat_completions_batch(messages_list)` | Streaming batch results |
522
+ | `chat_completions_stream(messages)` | Token-by-token streaming |
523
+ | `parse_thoughts(response_data)` | Parse thinking content |
524
+
525
+ ### ResponseCacheConfig
526
+
527
+ ```python
528
+ # Shortcuts
529
+ ResponseCacheConfig.with_ttl(3600) # 1 hour TTL
530
+ ResponseCacheConfig.persistent() # Never expire
531
+ ResponseCacheConfig.ipc(ttl=86400) # Multi-process shared (default)
532
+ ResponseCacheConfig.local(ttl=86400) # Single process only
533
+
534
+ # Full config
535
+ ResponseCacheConfig(
536
+ enabled: bool = False,
537
+ ttl: int = 86400, # Time-to-live in seconds
538
+ cache_dir: str = "~/.cache/flexllm/llm_response",
539
+ use_ipc: bool = True, # Multi-process cache sharing
540
+ )
541
+ ```
542
+
543
+ ### Token Counting
544
+
545
+ ```python
546
+ from flexllm import count_tokens, estimate_cost, estimate_batch_cost
547
+
548
+ tokens = count_tokens("Hello world", model="gpt-4")
549
+ cost = estimate_cost(tokens, model="gpt-4", is_input=True)
550
+ total_cost = estimate_batch_cost(messages_list, model="gpt-4")
551
+ ```
552
+
553
+ ## Architecture
554
+
555
+ ```
556
+ LLMClient (Unified entry point)
557
+ ├── OpenAIClient (OpenAI-compatible APIs)
558
+ ├── GeminiClient (Google Gemini)
559
+ └── ClaudeClient (Anthropic Claude)
560
+
561
+ └── LLMClientBase (Abstract base - 4 methods to implement)
562
+
563
+ ├── ConcurrentRequester (Async engine with QPS control)
564
+ ├── ResponseCache (FlaxKV2-based caching with IPC)
565
+ └── ImageProcessor (Multi-modal support)
566
+
567
+ LLMClientPool (Multi-endpoint load balancing)
568
+ └── ProviderRouter (round_robin / weighted / random / fallback)
569
+ ```
570
+
571
+ ## License
572
+
573
+ Apache 2.0
@@ -0,0 +1,39 @@
1
+ flexllm/__init__.py,sha256=GMd4DJMgpxUGaLZ55X5pdSJoa-79lZCIBtyPr63IHH4,7160
2
+ flexllm/__main__.py,sha256=n63FUI84kJ81A1PQW0Takd9s4yVExGCzyGDDw0gTMzY,40789
3
+ flexllm/base_client.py,sha256=_82yHc7QieIr0OMFU5hqjrshnLq1ROIyzdjVdFUTJTs,42204
4
+ flexllm/chain_of_thought_client.py,sha256=ohmUaEWtvfIFFrw_h2ADNSkiEVE_qryLr18KtWzU00Y,41351
5
+ flexllm/claudeclient.py,sha256=BTmkh5bc0Lgvo90v-XB_bc_vFPaaUhc8hol2nXc12ec,15060
6
+ flexllm/client_pool.py,sha256=yZlCO7aopAgKhpHn5tG6OwCzLzMcAFt4ovH1MEJ8-AY,24229
7
+ flexllm/geminiclient.py,sha256=2K03uGIpED7ryul7dmftpIq5OuY_ZIIVYHbnIUADu9s,20306
8
+ flexllm/llm_client.py,sha256=U0zoMsf1q7VX0CYlB6MGgjqynlTCs8QtqTkIzzStkV4,19002
9
+ flexllm/llm_parser.py,sha256=raQnxIuSUNbRvrBLeZnpIIkDCcO2UzThtplGy8Cb1Y8,1832
10
+ flexllm/mllm_client.py,sha256=a6C2JJPrA6JW3XN07ZPQ0xVqc2ketMEQWKCK0uupEvw,17251
11
+ flexllm/openaiclient.py,sha256=pb8ttzPYMC7pLVMCpRXknzs2-kC-CmM73nUHw5bo7gw,8377
12
+ flexllm/provider_router.py,sha256=PElAQ4v6Go21VxTm9LF5_w0Pkt6GcOYJDp-OKRIEFtU,5978
13
+ flexllm/token_counter.py,sha256=GHzZwniQo4uaDfXRgljzVvoTfcKzkZTpP4S3VJ2PGXE,8188
14
+ flexllm/async_api/__init__.py,sha256=LqybQzdw2JfpdOMMeif9qlTIm68oOg3UseFBC17lcDg,196
15
+ flexllm/async_api/concurrent_call.py,sha256=pymX43aQ4HdiGUk5oPheOsWssjnitDz0nLbQ0N7B5tQ,2904
16
+ flexllm/async_api/concurrent_executor.py,sha256=-BzI2QH_RkzrCO1_RY0EkOWyAS24HOWncED43SchTq0,33809
17
+ flexllm/async_api/core.py,sha256=UqFQ5A-ObJhmRUS8nheB6UNFCCdfhJk5TkRK3goZSzU,13350
18
+ flexllm/async_api/interface.py,sha256=s6pDxAjm0r0rfXWYvEnY9qmGl571thm7cKHqPlyxfGU,227
19
+ flexllm/async_api/progress.py,sha256=n8Mlf6rpNotEdZ6n6snkUN9JhSorcGCUR8ZGkue9yTs,10537
20
+ flexllm/batch_tools/__init__.py,sha256=2hK377iLyFxv5HXc2CK4b6ff_gC5eUKlyv11vhXEhaU,392
21
+ flexllm/batch_tools/folder_processor.py,sha256=FjkeGqowRxtvvCYpGYHWGx7esdJ5OOpPMo3cQ9oR2QE,10695
22
+ flexllm/batch_tools/table_processor.py,sha256=VRH5kZK3QPcVrc_ZSQSszMPYlw0lzMLk3Yata6Ub9FE,11490
23
+ flexllm/cache/__init__.py,sha256=yVRpoQNltmcupnOt5daiFrJ5zv7SV71oOsBkbK390tA,241
24
+ flexllm/cache/response_cache.py,sha256=lkOKESR3aAXkAde6s0zL5KsH2l8vmDI6QMiDKyTKv_U,8300
25
+ flexllm/msg_processors/__init__.py,sha256=je_TqbV_QeSFHJV-zUq9hhyml7rsnKMZw8gYwzDCqzs,5799
26
+ flexllm/msg_processors/image_processor.py,sha256=gg2eJYav1_AhzB1JYCBDP8wxfNHEarKAyhINZsnXC9o,28915
27
+ flexllm/msg_processors/image_processor_helper.py,sha256=QIc_MkD2EYPmc1SyfBDani9yfiQ62FhrWCzn-0k9wRI,16067
28
+ flexllm/msg_processors/messages_processor.py,sha256=Rzn1lt361gwNctyAhUmiEkZgTogsGXEYE1LSApqGJe4,13199
29
+ flexllm/msg_processors/unified_processor.py,sha256=IKRO1rR0gJxir_nPnahVYboqufRPsnSjvtnjkSwLjhc,49103
30
+ flexllm/pricing/__init__.py,sha256=4HjLPiBy5WHDaQ0Idp-sVPUt5W_UxZeeq38nJwIKga0,2586
31
+ flexllm/pricing/data.json,sha256=QKEmoa6WjC3CpH8nWwxYL0cy5VXS15LI8UDNe2QwqXI,22334
32
+ flexllm/pricing/updater.py,sha256=TvKOtBLgc8VWwBY4pwOQlTDZmJ2kEZe1GYxYh_GE8Rs,5837
33
+ flexllm/utils/__init__.py,sha256=-BAWBrc4Kc18yrPK_OTOa4JaDyx2OgFcRuIiQe-zYeI,30
34
+ flexllm/utils/core.py,sha256=eraE-FMnQ4vYmdjgs82ZdakcHTEn9dXbTHFZ76plnWw,1079
35
+ flexllm-0.3.3.dist-info/METADATA,sha256=blNSncn_AILRQwXgTrmAw46NIiv-IDjFfvhf6-DahpM,15697
36
+ flexllm-0.3.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
37
+ flexllm-0.3.3.dist-info/entry_points.txt,sha256=BWKjRwFpLLBW8GLo6M-B15VYWiefec4kuFFdVtyOSLY,79
38
+ flexllm-0.3.3.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
39
+ flexllm-0.3.3.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ flexllm = flexllm.__main__:main
3
+ xllm = flexllm.__main__:main