python-infrakit-dev 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- infrakit/__init__.py +0 -0
- infrakit/cli/__init__.py +1 -0
- infrakit/cli/commands/__init__.py +1 -0
- infrakit/cli/commands/deps.py +530 -0
- infrakit/cli/commands/init.py +129 -0
- infrakit/cli/commands/llm.py +295 -0
- infrakit/cli/commands/logger.py +160 -0
- infrakit/cli/commands/module.py +342 -0
- infrakit/cli/commands/time.py +81 -0
- infrakit/cli/main.py +65 -0
- infrakit/core/__init__.py +0 -0
- infrakit/core/config/__init__.py +0 -0
- infrakit/core/config/converter.py +480 -0
- infrakit/core/config/exporter.py +304 -0
- infrakit/core/config/loader.py +713 -0
- infrakit/core/config/validator.py +389 -0
- infrakit/core/logger/__init__.py +21 -0
- infrakit/core/logger/formatters.py +143 -0
- infrakit/core/logger/handlers.py +322 -0
- infrakit/core/logger/retention.py +176 -0
- infrakit/core/logger/setup.py +314 -0
- infrakit/deps/__init__.py +239 -0
- infrakit/deps/clean.py +141 -0
- infrakit/deps/depfile.py +405 -0
- infrakit/deps/health.py +357 -0
- infrakit/deps/optimizer.py +642 -0
- infrakit/deps/scanner.py +550 -0
- infrakit/llm/__init__.py +35 -0
- infrakit/llm/batch.py +165 -0
- infrakit/llm/client.py +575 -0
- infrakit/llm/key_manager.py +728 -0
- infrakit/llm/llm_readme.md +306 -0
- infrakit/llm/models.py +148 -0
- infrakit/llm/providers/__init__.py +5 -0
- infrakit/llm/providers/base.py +112 -0
- infrakit/llm/providers/gemini.py +164 -0
- infrakit/llm/providers/openai.py +168 -0
- infrakit/llm/rate_limiter.py +54 -0
- infrakit/scaffolder/__init__.py +31 -0
- infrakit/scaffolder/ai.py +508 -0
- infrakit/scaffolder/backend.py +555 -0
- infrakit/scaffolder/cli_tool.py +386 -0
- infrakit/scaffolder/generator.py +338 -0
- infrakit/scaffolder/pipeline.py +562 -0
- infrakit/scaffolder/registry.py +121 -0
- infrakit/time/__init__.py +60 -0
- infrakit/time/profiler.py +511 -0
- python_infrakit_dev-0.1.0.dist-info/METADATA +124 -0
- python_infrakit_dev-0.1.0.dist-info/RECORD +51 -0
- python_infrakit_dev-0.1.0.dist-info/WHEEL +4 -0
- python_infrakit_dev-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
# infrakit.llm
|
|
2
|
+
|
|
3
|
+
Unified LLM client for OpenAI and Gemini with key rotation, quota tracking,
|
|
4
|
+
rate limiting, and batch processing. Designed for free-tier API keys where
|
|
5
|
+
RPM/daily limits matter.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# core
|
|
13
|
+
pip install pydantic tqdm
|
|
14
|
+
|
|
15
|
+
# for OpenAI
|
|
16
|
+
pip install openai
|
|
17
|
+
|
|
18
|
+
# for Gemini
|
|
19
|
+
pip install google-generativeai
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## File structure
|
|
25
|
+
|
|
26
|
+
```
|
|
27
|
+
infrakit/llm/
|
|
28
|
+
├── __init__.py Public API surface
|
|
29
|
+
├── client.py LLMClient — main entry point
|
|
30
|
+
├── key_manager.py Key rotation, quota, persistence
|
|
31
|
+
├── rate_limiter.py RPM / TPM async and sync gates
|
|
32
|
+
├── batch.py Async + threaded batch engine
|
|
33
|
+
├── models.py Shared types (Prompt, LLMResponse, etc.)
|
|
34
|
+
└── providers/
|
|
35
|
+
├── __init__.py
|
|
36
|
+
├── base.py AbstractProvider + schema validation
|
|
37
|
+
├── openai.py OpenAI provider
|
|
38
|
+
└── gemini.py Gemini provider
|
|
39
|
+
|
|
40
|
+
infrakit/cli/
|
|
41
|
+
└── llm_cmd.py CLI commands (ik llm status, ik llm quota set)
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## Quick start
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from infrakit.llm import LLMClient, Prompt
|
|
50
|
+
|
|
51
|
+
client = LLMClient(
|
|
52
|
+
keys={
|
|
53
|
+
"openai_keys": ["sk-key1", "sk-key2"],
|
|
54
|
+
"gemini_keys": ["AIza-key1"],
|
|
55
|
+
},
|
|
56
|
+
storage_dir="./logs", # key state persisted here
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
# simple prompt
|
|
60
|
+
response = client.generate(Prompt(user="What is 2 + 2?"), provider="openai")
|
|
61
|
+
print(response.content)
|
|
62
|
+
|
|
63
|
+
# system + user split
|
|
64
|
+
response = client.generate(
|
|
65
|
+
Prompt(system="You are a maths tutor.", user="Explain derivatives."),
|
|
66
|
+
provider="gemini",
|
|
67
|
+
)
|
|
68
|
+
print(response.content)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
73
|
+
## Structured output (Pydantic)
|
|
74
|
+
|
|
75
|
+
Pass a Pydantic model as `response_model`. The system will try to parse the
|
|
76
|
+
model's JSON response and validate it against your schema. If validation fails
|
|
77
|
+
after retries, you still get the raw `content` back with `schema_matched=False`.
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from pydantic import BaseModel
|
|
81
|
+
|
|
82
|
+
class Sentiment(BaseModel):
|
|
83
|
+
label: str # "positive" | "negative" | "neutral"
|
|
84
|
+
confidence: float
|
|
85
|
+
|
|
86
|
+
# Your prompt must instruct the model to return JSON — infrakit does not
|
|
87
|
+
# inject instructions automatically.
|
|
88
|
+
response = client.generate(
|
|
89
|
+
Prompt(
|
|
90
|
+
system='Respond ONLY with valid JSON matching: {"label": str, "confidence": float}',
|
|
91
|
+
user="I love this product!",
|
|
92
|
+
),
|
|
93
|
+
provider="openai",
|
|
94
|
+
response_model=Sentiment,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
if response.schema_matched:
|
|
98
|
+
print(response.parsed.label) # "positive"
|
|
99
|
+
print(response.parsed.confidence) # 0.97
|
|
100
|
+
else:
|
|
101
|
+
print("Schema mismatch — raw response:", response.content)
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Batch processing
|
|
107
|
+
|
|
108
|
+
```python
|
|
109
|
+
from infrakit.llm import Prompt
|
|
110
|
+
|
|
111
|
+
words = ["cat", "dog", "bird", "fish"]
|
|
112
|
+
prompts = [
|
|
113
|
+
Prompt(
|
|
114
|
+
system="Translate to French. Reply with only the translation.",
|
|
115
|
+
user=word,
|
|
116
|
+
)
|
|
117
|
+
for word in words
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
batch = client.batch_generate(prompts, provider="gemini")
|
|
121
|
+
|
|
122
|
+
# results are in the same order as prompts
|
|
123
|
+
for word, result in zip(words, batch.results):
|
|
124
|
+
if result.error:
|
|
125
|
+
print(f"{word}: ERROR — {result.error}")
|
|
126
|
+
else:
|
|
127
|
+
print(f"{word}: {result.content}")
|
|
128
|
+
|
|
129
|
+
print(f"\nTotal tokens: {batch.total_tokens}")
|
|
130
|
+
print(f"Success: {batch.success_count} / Failure: {batch.failure_count}")
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
---
|
|
134
|
+
|
|
135
|
+
## LLMClient parameters
|
|
136
|
+
|
|
137
|
+
| Parameter | Default | Description |
|
|
138
|
+
|---|---|---|
|
|
139
|
+
| `keys` | required | `{"openai_keys": [...], "gemini_keys": [...]}` |
|
|
140
|
+
| `storage_dir` | required | Folder for persistent key state |
|
|
141
|
+
| `mode` | `"async"` | `"async"` or `"threaded"` |
|
|
142
|
+
| `max_concurrent` | `3` | Max simultaneous batch requests |
|
|
143
|
+
| `key_retries` | `2` | Retries on same key before rotating |
|
|
144
|
+
| `schema_retries` | `2` | JSON parse/validate retries |
|
|
145
|
+
| `meta_window` | `50` | Recent request metadata records per key |
|
|
146
|
+
| `openai_model` | `"gpt-4o-mini"` | Default OpenAI model |
|
|
147
|
+
| `gemini_model` | `"gemini-1.5-flash"` | Default Gemini model |
|
|
148
|
+
| `show_progress` | `True` | tqdm progress bar for batch calls |
|
|
149
|
+
|
|
150
|
+
---
|
|
151
|
+
|
|
152
|
+
## Quota management
|
|
153
|
+
|
|
154
|
+
### Set quota for a key
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from infrakit.llm import QuotaConfig
|
|
158
|
+
|
|
159
|
+
client.set_quota(
|
|
160
|
+
provider="openai",
|
|
161
|
+
key_id="sk-abc123", # first 8 chars of the key
|
|
162
|
+
quota=QuotaConfig(
|
|
163
|
+
rpm_limit=60, # 60 requests per minute
|
|
164
|
+
tpm_limit=90_000, # 90k tokens per minute
|
|
165
|
+
daily_token_limit=1_000_000,
|
|
166
|
+
reset_hour_utc=0, # resets at midnight UTC
|
|
167
|
+
),
|
|
168
|
+
)
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Only the fields you set are enforced. Unset fields are unconstrained.
|
|
172
|
+
|
|
173
|
+
### Key lifecycle
|
|
174
|
+
|
|
175
|
+
- **active** — key is in normal use.
|
|
176
|
+
- **inactive** — key hit a quota limit (daily tokens, or a hard 401/429 from the API).
|
|
177
|
+
Automatically reactivates at `reset_hour_utc` the following day.
|
|
178
|
+
- Keys are rotated round-robin among all active keys. When a key is deactivated,
|
|
179
|
+
remaining active keys absorb the load.
|
|
180
|
+
|
|
181
|
+
---
|
|
182
|
+
|
|
183
|
+
## Check status (Python)
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
# all keys
|
|
187
|
+
client.print_status()
|
|
188
|
+
|
|
189
|
+
# one provider
|
|
190
|
+
client.print_status(provider="openai")
|
|
191
|
+
|
|
192
|
+
# one key
|
|
193
|
+
client.print_status(provider="openai", key_id="sk-abc123")
|
|
194
|
+
|
|
195
|
+
# raw dict (for programmatic use)
|
|
196
|
+
rows = client.status(provider="openai")
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## CLI
|
|
202
|
+
|
|
203
|
+
Register `llm_cmd` in your main CLI (add to `infrakit/cli/__init__.py`):
|
|
204
|
+
|
|
205
|
+
```python
|
|
206
|
+
from infrakit.cli.llm_cmd import register as register_llm
|
|
207
|
+
register_llm(cli)
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
### Commands
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
# show all keys
|
|
214
|
+
ik llm status --storage-dir ./logs
|
|
215
|
+
|
|
216
|
+
# filter by provider
|
|
217
|
+
ik llm status --provider openai --storage-dir ./logs
|
|
218
|
+
|
|
219
|
+
# filter by key (first 8 chars)
|
|
220
|
+
ik llm status --key sk-abc123 --storage-dir ./logs
|
|
221
|
+
|
|
222
|
+
# JSON output
|
|
223
|
+
ik llm status --json --storage-dir ./logs
|
|
224
|
+
|
|
225
|
+
# set quota for a key
|
|
226
|
+
ik llm quota set \
|
|
227
|
+
--provider openai \
|
|
228
|
+
--key sk-abc123 \
|
|
229
|
+
--rpm 60 \
|
|
230
|
+
--tpm 90000 \
|
|
231
|
+
--daily 1000000 \
|
|
232
|
+
--reset-hour 0 \
|
|
233
|
+
--storage-dir ./logs
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
If your keys are stored in a JSON file:
|
|
237
|
+
```bash
|
|
238
|
+
ik llm status --keys-file keys.json --storage-dir ./logs
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
`keys.json` format:
|
|
242
|
+
```json
|
|
243
|
+
{
|
|
244
|
+
"openai_keys": ["sk-key1", "sk-key2"],
|
|
245
|
+
"gemini_keys": ["AIza-key1"]
|
|
246
|
+
}
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
---
|
|
250
|
+
|
|
251
|
+
## Error handling
|
|
252
|
+
|
|
253
|
+
Every `generate()` call returns an `LLMResponse` — it never raises. Check `.error`:
|
|
254
|
+
|
|
255
|
+
```python
|
|
256
|
+
result = client.generate(Prompt(user="Hello"), provider="openai")
|
|
257
|
+
if result.error:
|
|
258
|
+
print(f"Request failed: {result.error}")
|
|
259
|
+
else:
|
|
260
|
+
print(result.content)
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
### What infrakit handles automatically
|
|
264
|
+
|
|
265
|
+
| Situation | Behaviour |
|
|
266
|
+
|---|---|
|
|
267
|
+
| Transient error (network, 5xx) | Retry same key up to `key_retries` times with backoff |
|
|
268
|
+
| Quota / auth error (401, 402, 429+quota) | Deactivate key immediately, rotate to next |
|
|
269
|
+
| All keys exhausted | Return `LLMResponse(error="All keys exhausted.")` |
|
|
270
|
+
| RPM limit reached | Async/sync sleep until slot opens |
|
|
271
|
+
| Daily token limit reached | Deactivate key, auto-reactivate at reset hour |
|
|
272
|
+
| Schema validation fails | Return raw content + `schema_matched=False` |
|
|
273
|
+
|
|
274
|
+
---
|
|
275
|
+
|
|
276
|
+
## LLMResponse fields
|
|
277
|
+
|
|
278
|
+
| Field | Type | Description |
|
|
279
|
+
|---|---|---|
|
|
280
|
+
| `content` | `str` | Raw text from the model |
|
|
281
|
+
| `parsed` | `BaseModel \| None` | Validated Pydantic instance (if `response_model` given and matched) |
|
|
282
|
+
| `schema_matched` | `bool` | True if structured output validated successfully |
|
|
283
|
+
| `provider` | `str` | `"openai"` or `"gemini"` |
|
|
284
|
+
| `model` | `str` | Model string used |
|
|
285
|
+
| `key_id` | `str` | First 8 chars of the key used |
|
|
286
|
+
| `input_tokens` | `int` | Prompt token count |
|
|
287
|
+
| `output_tokens` | `int` | Completion token count |
|
|
288
|
+
| `total_tokens` | `int` | input + output |
|
|
289
|
+
| `latency_ms` | `float` | Wall-clock API call time |
|
|
290
|
+
| `error` | `str \| None` | Set on failure |
|
|
291
|
+
|
|
292
|
+
---
|
|
293
|
+
|
|
294
|
+
## What is and isn't stored
|
|
295
|
+
|
|
296
|
+
infrakit stores **no prompt or response content**. Per key, it persists:
|
|
297
|
+
|
|
298
|
+
- Status (active/inactive), deactivation timestamp
|
|
299
|
+
- Quota config (RPM, TPM, daily limit, reset hour)
|
|
300
|
+
- Lifetime totals (requests, tokens in/out, errors)
|
|
301
|
+
- Daily token total + day start epoch
|
|
302
|
+
- RPM window (timestamps of last 60 s of requests)
|
|
303
|
+
- TPM window (timestamps + token counts for last 60 s)
|
|
304
|
+
- Rolling metadata for last N requests: timestamp, model, token counts, latency, success/error code
|
|
305
|
+
|
|
306
|
+
All stored in `{storage_dir}/llm_key_state.json`.
|
infrakit/llm/models.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""
|
|
2
|
+
infrakit.llm.models
|
|
3
|
+
-------------------
|
|
4
|
+
Shared data structures for the LLM subsystem.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import time
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from enum import Enum
|
|
12
|
+
from typing import Any, Optional, Type
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ── enums ──────────────────────────────────────────────────────────────────
|
|
18
|
+
|
|
19
|
+
class Provider(str, Enum):
|
|
20
|
+
OPENAI = "openai"
|
|
21
|
+
GEMINI = "gemini"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class KeyStatus(str, Enum):
|
|
25
|
+
ACTIVE = "active"
|
|
26
|
+
INACTIVE = "inactive" # all models exhausted — auto-reactivates after reset
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ModelStatus(str, Enum):
|
|
30
|
+
ACTIVE = "active"
|
|
31
|
+
INACTIVE = "inactive" # this model's quota exhausted on this key
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# ── prompt input ───────────────────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class Prompt:
|
|
38
|
+
"""
|
|
39
|
+
Represents a single LLM prompt.
|
|
40
|
+
|
|
41
|
+
Usage::
|
|
42
|
+
|
|
43
|
+
# combined prompt
|
|
44
|
+
Prompt(user="Tell me about Python.")
|
|
45
|
+
|
|
46
|
+
# system + user split
|
|
47
|
+
Prompt(system="You are a helpful assistant.", user="Tell me about Python.")
|
|
48
|
+
"""
|
|
49
|
+
user: str
|
|
50
|
+
system: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
# ── response ───────────────────────────────────────────────────────────────
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class LLMResponse:
|
|
57
|
+
"""
|
|
58
|
+
Returned by every generate() call.
|
|
59
|
+
|
|
60
|
+
Attributes
|
|
61
|
+
----------
|
|
62
|
+
content Raw text from the model.
|
|
63
|
+
parsed Populated when a response_model is given and validation
|
|
64
|
+
succeeds; None otherwise.
|
|
65
|
+
schema_matched True if parsed is not None. False means validation failed
|
|
66
|
+
after all retries — content still contains the raw reply.
|
|
67
|
+
provider Which provider handled this request.
|
|
68
|
+
model Model string used (e.g. "gpt-4o-mini").
|
|
69
|
+
key_id Truncated key identifier (first 8 chars).
|
|
70
|
+
input_tokens Prompt token count.
|
|
71
|
+
output_tokens Completion token count.
|
|
72
|
+
total_tokens input + output.
|
|
73
|
+
latency_ms Wall-clock time for the API call in milliseconds.
|
|
74
|
+
error Set when the request ultimately failed.
|
|
75
|
+
"""
|
|
76
|
+
content: str
|
|
77
|
+
parsed: Optional[Any]
|
|
78
|
+
schema_matched: bool
|
|
79
|
+
provider: str
|
|
80
|
+
model: str
|
|
81
|
+
key_id: str
|
|
82
|
+
input_tokens: int
|
|
83
|
+
output_tokens: int
|
|
84
|
+
total_tokens: int
|
|
85
|
+
latency_ms: float
|
|
86
|
+
error: Optional[str] = None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
# ── request metadata (stored for transparency) ────────────────────────────
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class RequestMeta:
|
|
93
|
+
"""
|
|
94
|
+
Lightweight record of one API call stored in the rolling window.
|
|
95
|
+
NO prompt or response content is kept here.
|
|
96
|
+
"""
|
|
97
|
+
timestamp: float = field(default_factory=time.time)
|
|
98
|
+
provider: str = ""
|
|
99
|
+
key_id: str = ""
|
|
100
|
+
model: str = ""
|
|
101
|
+
input_tokens: int = 0
|
|
102
|
+
output_tokens: int = 0
|
|
103
|
+
total_tokens: int = 0
|
|
104
|
+
latency_ms: float = 0.0
|
|
105
|
+
success: bool = True
|
|
106
|
+
error: Optional[str] = None
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# ── quota config ───────────────────────────────────────────────────────────
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class QuotaConfig:
|
|
113
|
+
"""
|
|
114
|
+
Quota limits for a key, optionally scoped to a specific model.
|
|
115
|
+
|
|
116
|
+
Fields
|
|
117
|
+
------
|
|
118
|
+
model Model this config applies to. ``None`` means it is a
|
|
119
|
+
default that applies to any model without an explicit
|
|
120
|
+
entry. When both a model-specific config and a default
|
|
121
|
+
exist, the model-specific one wins.
|
|
122
|
+
rpm_limit Max requests per minute (key-level, shared across models).
|
|
123
|
+
tpm_limit Max tokens per minute for this model.
|
|
124
|
+
daily_token_limit Max tokens per calendar day for this model.
|
|
125
|
+
reset_hour_utc UTC hour (0-23) at which the daily quota resets.
|
|
126
|
+
"""
|
|
127
|
+
model: Optional[str] = None # None = default / applies to all
|
|
128
|
+
rpm_limit: Optional[int] = None # key-level
|
|
129
|
+
tpm_limit: Optional[int] = None # model-level
|
|
130
|
+
daily_token_limit: Optional[int] = None # model-level
|
|
131
|
+
reset_hour_utc: int = 0
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
# ── batch result ───────────────────────────────────────────────────────────
|
|
135
|
+
|
|
136
|
+
@dataclass
|
|
137
|
+
class BatchResult:
|
|
138
|
+
"""
|
|
139
|
+
Container for a batch generate() call. Results are in the same
|
|
140
|
+
order as the input prompts.
|
|
141
|
+
"""
|
|
142
|
+
results: list[Optional[LLMResponse]]
|
|
143
|
+
total_input_tokens: int
|
|
144
|
+
total_output_tokens: int
|
|
145
|
+
total_tokens: int
|
|
146
|
+
total_latency_ms: float
|
|
147
|
+
success_count: int
|
|
148
|
+
failure_count: int
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""
|
|
2
|
+
infrakit.llm.providers.base
|
|
3
|
+
---------------------------
|
|
4
|
+
Abstract base class that every provider must implement.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from abc import ABC, abstractmethod
|
|
10
|
+
from typing import Any, Optional, Type
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
|
|
14
|
+
from ..models import LLMResponse, Prompt
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class BaseProvider(ABC):
|
|
18
|
+
"""
|
|
19
|
+
All provider-specific logic (API call, token counting, error
|
|
20
|
+
classification) lives in subclasses. The client only talks to
|
|
21
|
+
this interface.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# ── configuration ──────────────────────────────────────────────────────
|
|
25
|
+
|
|
26
|
+
#: Override in subclass with a sensible default (e.g. "gpt-4o-mini").
|
|
27
|
+
DEFAULT_MODEL: str = ""
|
|
28
|
+
|
|
29
|
+
def __init__(self, model: Optional[str] = None) -> None:
|
|
30
|
+
self.model = model or self.DEFAULT_MODEL
|
|
31
|
+
|
|
32
|
+
# ── abstract interface ─────────────────────────────────────────────────
|
|
33
|
+
|
|
34
|
+
@abstractmethod
|
|
35
|
+
async def async_generate(
|
|
36
|
+
self,
|
|
37
|
+
prompt: Prompt,
|
|
38
|
+
api_key: str,
|
|
39
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
40
|
+
schema_retries: int = 2,
|
|
41
|
+
**kwargs: Any,
|
|
42
|
+
) -> LLMResponse:
|
|
43
|
+
"""
|
|
44
|
+
Async generate. Must return an LLMResponse even on soft failures
|
|
45
|
+
(schema mismatch). Hard failures (network, auth) should raise.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
@abstractmethod
|
|
49
|
+
def sync_generate(
|
|
50
|
+
self,
|
|
51
|
+
prompt: Prompt,
|
|
52
|
+
api_key: str,
|
|
53
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
54
|
+
schema_retries: int = 2,
|
|
55
|
+
**kwargs: Any,
|
|
56
|
+
) -> LLMResponse:
|
|
57
|
+
"""Sync wrapper around async_generate (or a native sync call)."""
|
|
58
|
+
|
|
59
|
+
# ── helpers shared by subclasses ───────────────────────────────────────
|
|
60
|
+
|
|
61
|
+
@staticmethod
|
|
62
|
+
def _validate_schema(
|
|
63
|
+
content: str,
|
|
64
|
+
response_model: Type[BaseModel],
|
|
65
|
+
retries: int,
|
|
66
|
+
) -> tuple[Optional[BaseModel], bool]:
|
|
67
|
+
"""
|
|
68
|
+
Try to parse *content* as JSON and validate against *response_model*.
|
|
69
|
+
|
|
70
|
+
Returns
|
|
71
|
+
-------
|
|
72
|
+
(parsed_instance, matched)
|
|
73
|
+
matched is True on success, False after all retries exhausted.
|
|
74
|
+
"""
|
|
75
|
+
import json
|
|
76
|
+
|
|
77
|
+
for attempt in range(retries + 1):
|
|
78
|
+
try:
|
|
79
|
+
# strip common markdown fences
|
|
80
|
+
text = content.strip()
|
|
81
|
+
if text.startswith("```"):
|
|
82
|
+
lines = text.splitlines()
|
|
83
|
+
# drop first (```json) and last (```) lines
|
|
84
|
+
text = "\n".join(lines[1:-1]) if len(lines) > 2 else text
|
|
85
|
+
data = json.loads(text)
|
|
86
|
+
instance = response_model.model_validate(data)
|
|
87
|
+
return instance, True
|
|
88
|
+
except Exception:
|
|
89
|
+
if attempt == retries:
|
|
90
|
+
return None, False
|
|
91
|
+
return None, False
|
|
92
|
+
|
|
93
|
+
@staticmethod
|
|
94
|
+
def _is_quota_error(exc: Exception) -> bool:
|
|
95
|
+
"""
|
|
96
|
+
Return True if *exc* indicates a hard quota / auth failure
|
|
97
|
+
(should deactivate the key, not retry).
|
|
98
|
+
Subclasses may override for provider-specific error codes.
|
|
99
|
+
"""
|
|
100
|
+
msg = str(exc).lower()
|
|
101
|
+
return any(
|
|
102
|
+
kw in msg
|
|
103
|
+
for kw in (
|
|
104
|
+
"quota",
|
|
105
|
+
"rate_limit_exceeded",
|
|
106
|
+
"billing",
|
|
107
|
+
"insufficient_quota",
|
|
108
|
+
"resource_exhausted",
|
|
109
|
+
"invalid_api_key",
|
|
110
|
+
"permission_denied",
|
|
111
|
+
)
|
|
112
|
+
)
|