python-infrakit-dev 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. infrakit/__init__.py +0 -0
  2. infrakit/cli/__init__.py +1 -0
  3. infrakit/cli/commands/__init__.py +1 -0
  4. infrakit/cli/commands/deps.py +530 -0
  5. infrakit/cli/commands/init.py +129 -0
  6. infrakit/cli/commands/llm.py +295 -0
  7. infrakit/cli/commands/logger.py +160 -0
  8. infrakit/cli/commands/module.py +342 -0
  9. infrakit/cli/commands/time.py +81 -0
  10. infrakit/cli/main.py +65 -0
  11. infrakit/core/__init__.py +0 -0
  12. infrakit/core/config/__init__.py +0 -0
  13. infrakit/core/config/converter.py +480 -0
  14. infrakit/core/config/exporter.py +304 -0
  15. infrakit/core/config/loader.py +713 -0
  16. infrakit/core/config/validator.py +389 -0
  17. infrakit/core/logger/__init__.py +21 -0
  18. infrakit/core/logger/formatters.py +143 -0
  19. infrakit/core/logger/handlers.py +322 -0
  20. infrakit/core/logger/retention.py +176 -0
  21. infrakit/core/logger/setup.py +314 -0
  22. infrakit/deps/__init__.py +239 -0
  23. infrakit/deps/clean.py +141 -0
  24. infrakit/deps/depfile.py +405 -0
  25. infrakit/deps/health.py +357 -0
  26. infrakit/deps/optimizer.py +642 -0
  27. infrakit/deps/scanner.py +550 -0
  28. infrakit/llm/__init__.py +35 -0
  29. infrakit/llm/batch.py +165 -0
  30. infrakit/llm/client.py +575 -0
  31. infrakit/llm/key_manager.py +728 -0
  32. infrakit/llm/llm_readme.md +306 -0
  33. infrakit/llm/models.py +148 -0
  34. infrakit/llm/providers/__init__.py +5 -0
  35. infrakit/llm/providers/base.py +112 -0
  36. infrakit/llm/providers/gemini.py +164 -0
  37. infrakit/llm/providers/openai.py +168 -0
  38. infrakit/llm/rate_limiter.py +54 -0
  39. infrakit/scaffolder/__init__.py +31 -0
  40. infrakit/scaffolder/ai.py +508 -0
  41. infrakit/scaffolder/backend.py +555 -0
  42. infrakit/scaffolder/cli_tool.py +386 -0
  43. infrakit/scaffolder/generator.py +338 -0
  44. infrakit/scaffolder/pipeline.py +562 -0
  45. infrakit/scaffolder/registry.py +121 -0
  46. infrakit/time/__init__.py +60 -0
  47. infrakit/time/profiler.py +511 -0
  48. python_infrakit_dev-0.1.0.dist-info/METADATA +124 -0
  49. python_infrakit_dev-0.1.0.dist-info/RECORD +51 -0
  50. python_infrakit_dev-0.1.0.dist-info/WHEEL +4 -0
  51. python_infrakit_dev-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,728 @@
1
+ """
2
+ infrakit.llm.key_manager
3
+ ------------------------
4
+ Manages API keys across providers with model-level quota tracking.
5
+
6
+ Key design
7
+ ----------
8
+ - Deactivation is at the (key, model) level, not the key level.
9
+ A key is only fully deactivated when ALL its models are inactive.
10
+ Exhausting gemini-2.5-pro leaves gemini-2.5-flash available on the
11
+ same key.
12
+
13
+ - RPM is tracked at the key level (the API counts all calls regardless
14
+ of model). TPM and daily tokens are tracked per model.
15
+
16
+ - Quota can be loaded from a JSON file so users don't have to call
17
+ set_quota() in code every session.
18
+
19
+ - Default storage paths live under ~/.infrakit/llm/ so state persists
20
+ across projects using the same keys.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import datetime
26
+ import json
27
+ import threading
28
+ import time
29
+ from dataclasses import dataclass, field
30
+ from pathlib import Path
31
+ from typing import Optional
32
+
33
+ from .models import KeyStatus, ModelStatus, Provider, QuotaConfig, RequestMeta
34
+
35
+
36
+ # ── default paths ──────────────────────────────────────────────────────────
37
+
38
+ #: Default directory for all infrakit LLM state files.
39
+ DEFAULT_LLM_DIR: Path = Path.home() / ".infrakit" / "llm"
40
+
41
+ #: Default path for the key state persistence file.
42
+ DEFAULT_STATE_FILE: Path = DEFAULT_LLM_DIR / "key_state.json"
43
+
44
+ #: Default path for the quota definition file.
45
+ DEFAULT_QUOTA_FILE: Path = DEFAULT_LLM_DIR / "quotas.json"
46
+
47
+ # internal constants
48
+ _META_WINDOW = 50 # rolling metadata records per key
49
+ _STATE_FILE = "key_state.json"
50
+ _QUOTA_FILE = "quotas.json"
51
+
52
+
53
+ # ── model-level state ──────────────────────────────────────────────────────
54
+
55
+ @dataclass
56
+ class ModelState:
57
+ """
58
+ Per-model quota state for one API key.
59
+
60
+ Tracks usage and status independently so exhausting one model does
61
+ not block others on the same key.
62
+ """
63
+ model: str
64
+ status: str = ModelStatus.ACTIVE
65
+ deactivated_at: Optional[float] = None
66
+
67
+ # quota config for this model (merged from file + set_quota calls)
68
+ tpm_limit: Optional[int] = None
69
+ daily_token_limit: Optional[int] = None
70
+ reset_hour_utc: int = 0
71
+
72
+ # daily usage window
73
+ day_token_total: int = 0
74
+ day_start_epoch: float = field(default_factory=time.time)
75
+
76
+ # TPM sliding window — list of (epoch, tokens)
77
+ tpm_window: list[tuple[float, int]] = field(default_factory=list)
78
+
79
+ # per-model lifetime totals
80
+ total_input_tokens: int = 0
81
+ total_output_tokens: int = 0
82
+ total_tokens: int = 0
83
+ total_requests: int = 0
84
+ total_errors: int = 0
85
+
86
+ def to_dict(self) -> dict:
87
+ return {
88
+ "model": self.model,
89
+ "status": self.status,
90
+ "deactivated_at": self.deactivated_at,
91
+ "tpm_limit": self.tpm_limit,
92
+ "daily_token_limit": self.daily_token_limit,
93
+ "reset_hour_utc": self.reset_hour_utc,
94
+ "day_token_total": self.day_token_total,
95
+ "day_start_epoch": self.day_start_epoch,
96
+ "tpm_window": self.tpm_window,
97
+ "total_input_tokens": self.total_input_tokens,
98
+ "total_output_tokens": self.total_output_tokens,
99
+ "total_tokens": self.total_tokens,
100
+ "total_requests": self.total_requests,
101
+ "total_errors": self.total_errors,
102
+ }
103
+
104
+ @classmethod
105
+ def from_dict(cls, d: dict) -> "ModelState":
106
+ ms = cls(model=d["model"])
107
+ ms.status = d.get("status", ModelStatus.ACTIVE)
108
+ ms.deactivated_at = d.get("deactivated_at")
109
+ ms.tpm_limit = d.get("tpm_limit")
110
+ ms.daily_token_limit = d.get("daily_token_limit")
111
+ ms.reset_hour_utc = d.get("reset_hour_utc", 0)
112
+ ms.day_token_total = d.get("day_token_total", 0)
113
+ ms.day_start_epoch = d.get("day_start_epoch", time.time())
114
+ ms.tpm_window = [tuple(x) for x in d.get("tpm_window", [])]
115
+ ms.total_input_tokens = d.get("total_input_tokens", 0)
116
+ ms.total_output_tokens = d.get("total_output_tokens", 0)
117
+ ms.total_tokens = d.get("total_tokens", 0)
118
+ ms.total_requests = d.get("total_requests", 0)
119
+ ms.total_errors = d.get("total_errors", 0)
120
+ return ms
121
+
122
+
123
+ # ── per-key state ──────────────────────────────────────────────────────────
124
+
125
+ @dataclass
126
+ class KeyState:
127
+ """
128
+ Full runtime + persisted state for one API key.
129
+
130
+ Key-level fields
131
+ ----------------
132
+ rpm_limit / rpm_window RPM is shared across all models on this key.
133
+ model_states Per-model quota and usage state.
134
+ recent_meta Rolling metadata window (no prompt/response content).
135
+
136
+ The key's overall ``status`` is derived: ACTIVE if at least one model
137
+ is active, INACTIVE if all models are inactive.
138
+ """
139
+
140
+ # identity
141
+ provider: str
142
+ key_id: str # first 8 chars of raw key — safe for logs
143
+ key_hash: str # sha256 — used to re-match on reload (raw key never stored)
144
+
145
+ # key-level RPM (shared across models)
146
+ rpm_limit: Optional[int] = None
147
+ rpm_window: list[float] = field(default_factory=list)
148
+
149
+ # per-model state — keyed by model string
150
+ model_states: dict[str, ModelState] = field(default_factory=dict)
151
+
152
+ # rolling metadata (no prompt/response)
153
+ recent_meta: list[dict] = field(default_factory=list)
154
+
155
+ # ── derived status ────────────────────────────────────────────────────
156
+
157
+ @property
158
+ def status(self) -> str:
159
+ """ACTIVE if any model state is active, INACTIVE if all are inactive."""
160
+ if not self.model_states:
161
+ return KeyStatus.ACTIVE # no model tracking yet — assume active
162
+ if any(ms.status == ModelStatus.ACTIVE for ms in self.model_states.values()):
163
+ return KeyStatus.ACTIVE
164
+ return KeyStatus.INACTIVE
165
+
166
+ def is_model_active(self, model: str) -> bool:
167
+ ms = self.model_states.get(model)
168
+ return ms is None or ms.status == ModelStatus.ACTIVE
169
+
170
+ def get_or_create_model_state(self, model: str) -> ModelState:
171
+ if model not in self.model_states:
172
+ self.model_states[model] = ModelState(model=model)
173
+ return self.model_states[model]
174
+
175
+ # ── serialisation ─────────────────────────────────────────────────────
176
+
177
+ def to_dict(self) -> dict:
178
+ return {
179
+ "provider": self.provider,
180
+ "key_id": self.key_id,
181
+ "key_hash": self.key_hash,
182
+ "rpm_limit": self.rpm_limit,
183
+ "rpm_window": self.rpm_window,
184
+ "model_states": {
185
+ m: ms.to_dict() for m, ms in self.model_states.items()
186
+ },
187
+ "recent_meta": self.recent_meta,
188
+ }
189
+
190
+ @classmethod
191
+ def from_dict(cls, d: dict) -> "KeyState":
192
+ ks = cls(
193
+ provider=d["provider"],
194
+ key_id=d["key_id"],
195
+ key_hash=d["key_hash"],
196
+ )
197
+ ks.rpm_limit = d.get("rpm_limit")
198
+ ks.rpm_window = d.get("rpm_window", [])
199
+ ks.recent_meta = d.get("recent_meta", [])
200
+ for model_str, ms_dict in d.get("model_states", {}).items():
201
+ ks.model_states[model_str] = ModelState.from_dict(ms_dict)
202
+ return ks
203
+
204
+
205
+ # ── key manager ────────────────────────────────────────────────────────────
206
+
207
+ class KeyManager:
208
+ """
209
+ Thread-safe manager for all provider API keys.
210
+
211
+ Parameters
212
+ ----------
213
+ keys ``{"openai_keys": [...], "gemini_keys": [...]}``.
214
+ storage_dir Folder where ``key_state.json`` is written.
215
+ Defaults to ``~/.infrakit/llm/``.
216
+ quota_file Path to a JSON quota definition file.
217
+ Defaults to ``~/.infrakit/llm/quotas.json``.
218
+ Pass ``None`` to skip file-based quota loading.
219
+ meta_window Rolling metadata records kept per key.
220
+ """
221
+
222
+ def __init__(
223
+ self,
224
+ keys: dict[str, list[str]],
225
+ storage_dir: Optional[str | Path] = None,
226
+ quota_file: Optional[str | Path] = None,
227
+ meta_window: int = _META_WINDOW,
228
+ ) -> None:
229
+ import hashlib
230
+
231
+ self._lock = threading.Lock()
232
+ self._meta_window = meta_window
233
+
234
+ # ── resolve paths ─────────────────────────────────────────────────
235
+ storage_path = Path(storage_dir) if storage_dir else DEFAULT_LLM_DIR
236
+ storage_path.mkdir(parents=True, exist_ok=True)
237
+ self._storage_path = storage_path / _STATE_FILE
238
+
239
+ # quota file: explicit arg > default location > skip
240
+ if quota_file is not None:
241
+ self._quota_file: Optional[Path] = Path(quota_file)
242
+ elif (DEFAULT_LLM_DIR / _QUOTA_FILE).exists():
243
+ self._quota_file = DEFAULT_LLM_DIR / _QUOTA_FILE
244
+ else:
245
+ self._quota_file = None
246
+
247
+ # ── load persisted state & quota file ─────────────────────────────
248
+ persisted = self._load_persisted()
249
+ file_quotas = self._load_quota_file() # {provider: {model|"default": QuotaConfig}}
250
+
251
+ # ── build in-memory structures ────────────────────────────────────
252
+ self._states: dict[str, list[KeyState]] = {
253
+ Provider.OPENAI: [],
254
+ Provider.GEMINI: [],
255
+ }
256
+ self._rr_index: dict[str, dict[str, int]] = {
257
+ # per-provider, per-model round-robin index
258
+ Provider.OPENAI: {},
259
+ Provider.GEMINI: {},
260
+ }
261
+
262
+ provider_map = {
263
+ "openai_keys": Provider.OPENAI,
264
+ "gemini_keys": Provider.GEMINI,
265
+ }
266
+
267
+ for key_field, provider in provider_map.items():
268
+ for raw_key in keys.get(key_field, []):
269
+ key_hash = hashlib.sha256(raw_key.encode()).hexdigest()
270
+ key_id = raw_key[:8]
271
+
272
+ existing = persisted.get((provider, key_hash))
273
+ if existing:
274
+ ks = existing
275
+ # auto-reactivate any models whose reset time has passed
276
+ for ms in ks.model_states.values():
277
+ self._maybe_reactivate_model(ms)
278
+ else:
279
+ ks = KeyState(provider=provider, key_id=key_id, key_hash=key_hash)
280
+
281
+ # Apply file-level quota defaults for this provider
282
+ # (only sets fields not already configured on persisted state)
283
+ self._apply_file_quotas(ks, file_quotas.get(provider, {}))
284
+
285
+ ks._raw_key = raw_key # type: ignore[attr-defined]
286
+ self._states[provider].append(ks)
287
+
288
+ self._persist()
289
+
290
+ # ── public: key acquisition ────────────────────────────────────────────
291
+
292
+ def get_key(self, provider: str, model: str) -> tuple[str, KeyState]:
293
+ """
294
+ Return (raw_key, KeyState) for the next key that has *model* active.
295
+
296
+ Round-robins separately per (provider, model) so different models
297
+ can be load-balanced independently.
298
+
299
+ Raises RuntimeError if no key has this model available.
300
+ """
301
+ with self._lock:
302
+ self._reactivate_all_due(provider)
303
+ candidates = self._states.get(provider, [])
304
+
305
+ # keys where this specific model is not deactivated
306
+ eligible = [ks for ks in candidates if ks.is_model_active(model)]
307
+ if not eligible:
308
+ raise RuntimeError(
309
+ f"No active {provider} keys available for model '{model}'. "
310
+ "All keys may have hit their quota for this model."
311
+ )
312
+
313
+ rr = self._rr_index[provider]
314
+ idx = rr.get(model, 0) % len(eligible)
315
+ ks = eligible[idx]
316
+ rr[model] = (idx + 1) % len(eligible)
317
+
318
+ return ks._raw_key, ks # type: ignore[attr-defined]
319
+
320
+ # ── public: rate-limit checks ──────────────────────────────────────────
321
+
322
+ def check_rpm(self, ks: KeyState) -> bool:
323
+ """True if another request is allowed under the key-level RPM limit."""
324
+ if ks.rpm_limit is None:
325
+ return True
326
+ now = time.time()
327
+ ks.rpm_window = [t for t in ks.rpm_window if now - t < 60.0]
328
+ return len(ks.rpm_window) < ks.rpm_limit
329
+
330
+ def check_tpm(self, ks: KeyState, model: str, tokens_needed: int = 0) -> bool:
331
+ """True if another request is allowed under the model-level TPM limit."""
332
+ ms = ks.model_states.get(model)
333
+ if ms is None or ms.tpm_limit is None:
334
+ return True
335
+ now = time.time()
336
+ ms.tpm_window = [(t, tok) for t, tok in ms.tpm_window if now - t < 60.0]
337
+ used = sum(tok for _, tok in ms.tpm_window)
338
+ return used + tokens_needed <= ms.tpm_limit
339
+
340
+ def seconds_until_rpm_slot(self, ks: KeyState) -> float:
341
+ if ks.rpm_limit is None or len(ks.rpm_window) < ks.rpm_limit:
342
+ return 0.0
343
+ oldest = min(ks.rpm_window)
344
+ return max(0.0, 60.0 - (time.time() - oldest))
345
+
346
+ # ── public: record a completed request ────────────────────────────────
347
+
348
+ def record_request(self, ks: KeyState, meta: RequestMeta) -> None:
349
+ """
350
+ Update all counters after an API call (success or failure).
351
+ - RPM window updated at key level.
352
+ - TPM window, daily tokens, totals updated at model level.
353
+ - Rolling metadata appended (no prompt/response content).
354
+ """
355
+ with self._lock:
356
+ now = time.time()
357
+ model = meta.model
358
+
359
+ # ── key-level RPM ─────────────────────────────────────────────
360
+ ks.rpm_window.append(now)
361
+ ks.rpm_window = [t for t in ks.rpm_window if now - t < 60.0]
362
+
363
+ # ── model-level state ─────────────────────────────────────────
364
+ ms = ks.get_or_create_model_state(model)
365
+
366
+ ms.tpm_window.append((now, meta.total_tokens))
367
+ ms.tpm_window = [(t, tok) for t, tok in ms.tpm_window if now - t < 60.0]
368
+
369
+ self._maybe_reset_day(ms)
370
+ ms.day_token_total += meta.total_tokens
371
+
372
+ ms.total_requests += 1
373
+ ms.total_input_tokens += meta.input_tokens
374
+ ms.total_output_tokens += meta.output_tokens
375
+ ms.total_tokens += meta.total_tokens
376
+ if not meta.success:
377
+ ms.total_errors += 1
378
+
379
+ # check model-level daily quota
380
+ if (
381
+ ms.daily_token_limit is not None
382
+ and ms.day_token_total >= ms.daily_token_limit
383
+ ):
384
+ self._deactivate_model(ms, reason="daily token limit reached")
385
+
386
+ # ── rolling metadata ──────────────────────────────────────────
387
+ ks.recent_meta.append({
388
+ "timestamp": meta.timestamp,
389
+ "provider": meta.provider,
390
+ "key_id": meta.key_id,
391
+ "model": model,
392
+ "input_tokens": meta.input_tokens,
393
+ "output_tokens": meta.output_tokens,
394
+ "total_tokens": meta.total_tokens,
395
+ "latency_ms": meta.latency_ms,
396
+ "success": meta.success,
397
+ "error": meta.error,
398
+ })
399
+ if len(ks.recent_meta) > self._meta_window:
400
+ ks.recent_meta = ks.recent_meta[-self._meta_window:]
401
+
402
+ self._persist()
403
+
404
+ def deactivate_model(
405
+ self, ks: KeyState, model: str, reason: str = "quota exceeded"
406
+ ) -> None:
407
+ """
408
+ Mark a specific (key, model) pair as inactive.
409
+
410
+ The key itself remains available for other models. The key is
411
+ only considered fully inactive when every tracked model is inactive.
412
+ """
413
+ with self._lock:
414
+ ms = ks.get_or_create_model_state(model)
415
+ self._deactivate_model(ms, reason=reason)
416
+ self._persist()
417
+
418
+ # kept for backwards-compat — deactivates ALL models on the key
419
+ def deactivate_key(self, ks: KeyState, reason: str = "quota exceeded") -> None:
420
+ """Deactivate all models on a key (hard failure like bad API key)."""
421
+ with self._lock:
422
+ for ms in ks.model_states.values():
423
+ self._deactivate_model(ms, reason=reason)
424
+ # if no model states exist yet, add a sentinel
425
+ if not ks.model_states:
426
+ sentinel = ModelState(model="__all__")
427
+ self._deactivate_model(sentinel, reason=reason)
428
+ ks.model_states["__all__"] = sentinel
429
+ self._persist()
430
+
431
+ # ── public: quota config ───────────────────────────────────────────────
432
+
433
+ def set_quota(
434
+ self,
435
+ provider: str,
436
+ key_id: str,
437
+ quota: QuotaConfig,
438
+ ) -> None:
439
+ """
440
+ Set quota for a key, optionally for a specific model.
441
+
442
+ If ``quota.model`` is None the config is treated as the default
443
+ for all models on this key that don't have their own entry.
444
+ If ``quota.model`` is set, it applies only to that model.
445
+ """
446
+ with self._lock:
447
+ for ks in self._states.get(provider, []):
448
+ if ks.key_id != key_id:
449
+ continue
450
+
451
+ # key-level RPM (always updated regardless of model scope)
452
+ if quota.rpm_limit is not None:
453
+ ks.rpm_limit = quota.rpm_limit
454
+
455
+ target_model = quota.model # None = default
456
+
457
+ if target_model is None:
458
+ # apply as default to all existing model states
459
+ # and store as a special "__default__" entry for new models
460
+ self._apply_quota_to_model_state(
461
+ ks.get_or_create_model_state("__default__"), quota
462
+ )
463
+ # also propagate to already-known models that have no override
464
+ for ms in ks.model_states.values():
465
+ if ms.model == "__default__":
466
+ continue
467
+ if not self._has_explicit_quota(ms):
468
+ self._apply_quota_to_model_state(ms, quota)
469
+ else:
470
+ ms = ks.get_or_create_model_state(target_model)
471
+ self._apply_quota_to_model_state(ms, quota)
472
+
473
+ self._persist()
474
+ return
475
+
476
+ raise KeyError(f"Key '{key_id}' not found for provider '{provider}'.")
477
+
478
+ # ── public: status report ──────────────────────────────────────────────
479
+
480
+ def status_report(
481
+ self,
482
+ provider: Optional[str] = None,
483
+ key_id: Optional[str] = None,
484
+ ) -> list[dict]:
485
+ """
486
+ Return status dicts for CLI / programmatic display.
487
+ Filters by provider and/or key_id.
488
+ """
489
+ now = time.time()
490
+ results = []
491
+
492
+ with self._lock:
493
+ for prov, key_list in self._states.items():
494
+ if provider and prov != provider:
495
+ continue
496
+
497
+ for ks in key_list:
498
+ if key_id and ks.key_id != key_id:
499
+ continue
500
+
501
+ # refresh reactivation state before reporting
502
+ for ms in ks.model_states.values():
503
+ self._maybe_reactivate_model(ms)
504
+
505
+ current_rpm = len([t for t in ks.rpm_window if now - t < 60.0])
506
+
507
+ model_rows = []
508
+ for model_name, ms in ks.model_states.items():
509
+ if model_name == "__default__":
510
+ continue
511
+ self._maybe_reset_day(ms)
512
+ current_tpm = sum(
513
+ tok for t, tok in ms.tpm_window if now - t < 60.0
514
+ )
515
+ daily_remaining = (
516
+ max(0, ms.daily_token_limit - ms.day_token_total)
517
+ if ms.daily_token_limit is not None else None
518
+ )
519
+ model_rows.append({
520
+ "model": model_name,
521
+ "status": ms.status,
522
+ "deactivated_at": ms.deactivated_at,
523
+ "tpm_limit": ms.tpm_limit,
524
+ "daily_token_limit": ms.daily_token_limit,
525
+ "reset_hour_utc": ms.reset_hour_utc,
526
+ "current_tpm": current_tpm,
527
+ "day_token_total": ms.day_token_total,
528
+ "daily_remaining": daily_remaining,
529
+ "total_tokens": ms.total_tokens,
530
+ "total_requests": ms.total_requests,
531
+ "total_errors": ms.total_errors,
532
+ })
533
+
534
+ results.append({
535
+ "provider": ks.provider,
536
+ "key_id": ks.key_id,
537
+ "status": ks.status,
538
+ "rpm_limit": ks.rpm_limit,
539
+ "current_rpm": current_rpm,
540
+ "models": model_rows,
541
+ "recent_meta": ks.recent_meta[-5:],
542
+ })
543
+
544
+ return results
545
+
546
+ # ── internal: deactivation & reactivation ─────────────────────────────
547
+
548
+ def _deactivate_model(self, ms: ModelState, reason: str = "") -> None:
549
+ ms.status = ModelStatus.INACTIVE
550
+ ms.deactivated_at = time.time()
551
+
552
+ def _maybe_reactivate_model(self, ms: ModelState) -> None:
553
+ """Auto-reactivate a model if its daily reset hour has passed."""
554
+ if ms.status != ModelStatus.INACTIVE or ms.deactivated_at is None:
555
+ return
556
+ now_utc = datetime.datetime.utcnow()
557
+ reset_today = now_utc.replace(
558
+ hour=ms.reset_hour_utc, minute=0, second=0, microsecond=0
559
+ )
560
+ deactivated_dt = datetime.datetime.utcfromtimestamp(ms.deactivated_at)
561
+ if deactivated_dt < reset_today <= now_utc:
562
+ ms.status = ModelStatus.ACTIVE
563
+ ms.deactivated_at = None
564
+ ms.day_token_total = 0
565
+ ms.day_start_epoch = time.time()
566
+
567
+ def _reactivate_all_due(self, provider: str) -> None:
568
+ for ks in self._states.get(provider, []):
569
+ for ms in ks.model_states.values():
570
+ self._maybe_reactivate_model(ms)
571
+
572
+ def _maybe_reset_day(self, ms: ModelState) -> None:
573
+ """Reset daily token counter if the reset hour has passed today."""
574
+ now_utc = datetime.datetime.utcnow()
575
+ reset_today = now_utc.replace(
576
+ hour=ms.reset_hour_utc, minute=0, second=0, microsecond=0
577
+ )
578
+ day_start_dt = datetime.datetime.utcfromtimestamp(ms.day_start_epoch)
579
+ if day_start_dt < reset_today <= now_utc:
580
+ ms.day_token_total = 0
581
+ ms.day_start_epoch = time.time()
582
+
583
+ # ── internal: quota helpers ────────────────────────────────────────────
584
+
585
+ @staticmethod
586
+ def _apply_quota_to_model_state(ms: ModelState, quota: QuotaConfig) -> None:
587
+ if quota.tpm_limit is not None:
588
+ ms.tpm_limit = quota.tpm_limit
589
+ if quota.daily_token_limit is not None:
590
+ ms.daily_token_limit = quota.daily_token_limit
591
+ if quota.reset_hour_utc is not None:
592
+ ms.reset_hour_utc = quota.reset_hour_utc
593
+
594
+ @staticmethod
595
+ def _has_explicit_quota(ms: ModelState) -> bool:
596
+ """True if any quota field was explicitly set on this ModelState."""
597
+ return any([
598
+ ms.tpm_limit is not None,
599
+ ms.daily_token_limit is not None,
600
+ ])
601
+
602
+ def _apply_file_quotas(
603
+ self,
604
+ ks: KeyState,
605
+ provider_quotas: dict[str, QuotaConfig],
606
+ ) -> None:
607
+ """
608
+ Apply quotas loaded from the quota file onto a KeyState.
609
+
610
+ ``provider_quotas`` is keyed by model name or ``"default"``.
611
+ Only sets fields that aren't already configured (persisted state wins).
612
+ """
613
+ default_q = provider_quotas.get("default")
614
+
615
+ # key-level RPM from default or explicit key config
616
+ if ks.rpm_limit is None and default_q and default_q.rpm_limit:
617
+ ks.rpm_limit = default_q.rpm_limit
618
+
619
+ for model_name, quota in provider_quotas.items():
620
+ if model_name == "default":
621
+ # store as __default__ so new model states inherit it
622
+ dflt_ms = ks.get_or_create_model_state("__default__")
623
+ if not self._has_explicit_quota(dflt_ms):
624
+ self._apply_quota_to_model_state(dflt_ms, quota)
625
+ else:
626
+ ms = ks.get_or_create_model_state(model_name)
627
+ if not self._has_explicit_quota(ms):
628
+ self._apply_quota_to_model_state(ms, quota)
629
+ # model-specific rpm_limit overrides key-level if set
630
+ if quota.rpm_limit is not None and ks.rpm_limit is None:
631
+ ks.rpm_limit = quota.rpm_limit
632
+
633
+ def _get_effective_model_quota(self, ks: KeyState, model: str) -> ModelState:
634
+ """
635
+ Return the ModelState for *model*, inheriting from __default__ if
636
+ the model has no explicit quota set.
637
+ """
638
+ ms = ks.get_or_create_model_state(model)
639
+ default_ms = ks.model_states.get("__default__")
640
+ if default_ms and not self._has_explicit_quota(ms):
641
+ if ms.tpm_limit is None and default_ms.tpm_limit is not None:
642
+ ms.tpm_limit = default_ms.tpm_limit
643
+ if ms.daily_token_limit is None and default_ms.daily_token_limit is not None:
644
+ ms.daily_token_limit = default_ms.daily_token_limit
645
+ if ms.reset_hour_utc == 0 and default_ms.reset_hour_utc != 0:
646
+ ms.reset_hour_utc = default_ms.reset_hour_utc
647
+ return ms
648
+
649
+ # ── internal: quota file loading ───────────────────────────────────────
650
+
651
+ def _load_quota_file(self) -> dict[str, dict[str, QuotaConfig]]:
652
+ """
653
+ Load quotas.json and return a nested dict:
654
+ { provider: { model_or_"default": QuotaConfig } }
655
+
656
+ File format::
657
+
658
+ {
659
+ "openai": {
660
+ "default": { "rpm": 60, "tpm": 90000, "daily_tokens": 1000000, "reset_hour_utc": 0 },
661
+ "gpt-4o": { "rpm": 10, "daily_tokens": 100000 }
662
+ },
663
+ "gemini": {
664
+ "default": { "rpm": 15 },
665
+ "gemini-2.0-flash": { "daily_tokens": 1500000 },
666
+ "gemini-2.5-pro": { "daily_tokens": 250000 }
667
+ }
668
+ }
669
+
670
+ All fields are optional. Unknown fields are ignored.
671
+ """
672
+ if self._quota_file is None or not self._quota_file.exists():
673
+ return {}
674
+
675
+ try:
676
+ with open(self._quota_file, encoding="utf-8") as f:
677
+ raw = json.load(f)
678
+ except (OSError, json.JSONDecodeError) as exc:
679
+ print(f"[infrakit.llm] Warning: could not load quota file "
680
+ f"'{self._quota_file}': {exc}")
681
+ return {}
682
+
683
+ result: dict[str, dict[str, QuotaConfig]] = {}
684
+ for provider, models in raw.items():
685
+ if not isinstance(models, dict):
686
+ continue
687
+ result[provider] = {}
688
+ for model_key, cfg in models.items():
689
+ if not isinstance(cfg, dict):
690
+ continue
691
+ result[provider][model_key] = QuotaConfig(
692
+ model=None if model_key == "default" else model_key,
693
+ rpm_limit=cfg.get("rpm"),
694
+ tpm_limit=cfg.get("tpm"),
695
+ daily_token_limit=cfg.get("daily_tokens"),
696
+ reset_hour_utc=cfg.get("reset_hour_utc", 0),
697
+ )
698
+
699
+ return result
700
+
701
+ # ── internal: persistence ──────────────────────────────────────────────
702
+
703
+ def _persist(self) -> None:
704
+ data = {
705
+ prov: [ks.to_dict() for ks in key_list]
706
+ for prov, key_list in self._states.items()
707
+ }
708
+ try:
709
+ with open(self._storage_path, "w", encoding="utf-8") as f:
710
+ json.dump(data, f, indent=2)
711
+ except OSError:
712
+ pass # non-fatal
713
+
714
+ def _load_persisted(self) -> dict[tuple[str, str], KeyState]:
715
+ if not self._storage_path.exists():
716
+ return {}
717
+ try:
718
+ with open(self._storage_path, encoding="utf-8") as f:
719
+ data = json.load(f)
720
+ except (OSError, json.JSONDecodeError):
721
+ return {}
722
+
723
+ result: dict[tuple[str, str], KeyState] = {}
724
+ for provider, key_list in data.items():
725
+ for d in key_list:
726
+ ks = KeyState.from_dict(d)
727
+ result[(provider, ks.key_hash)] = ks
728
+ return result