python-infrakit-dev 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. infrakit/__init__.py +0 -0
  2. infrakit/cli/__init__.py +1 -0
  3. infrakit/cli/commands/__init__.py +1 -0
  4. infrakit/cli/commands/deps.py +530 -0
  5. infrakit/cli/commands/init.py +129 -0
  6. infrakit/cli/commands/llm.py +295 -0
  7. infrakit/cli/commands/logger.py +160 -0
  8. infrakit/cli/commands/module.py +342 -0
  9. infrakit/cli/commands/time.py +81 -0
  10. infrakit/cli/main.py +65 -0
  11. infrakit/core/__init__.py +0 -0
  12. infrakit/core/config/__init__.py +0 -0
  13. infrakit/core/config/converter.py +480 -0
  14. infrakit/core/config/exporter.py +304 -0
  15. infrakit/core/config/loader.py +713 -0
  16. infrakit/core/config/validator.py +389 -0
  17. infrakit/core/logger/__init__.py +21 -0
  18. infrakit/core/logger/formatters.py +143 -0
  19. infrakit/core/logger/handlers.py +322 -0
  20. infrakit/core/logger/retention.py +176 -0
  21. infrakit/core/logger/setup.py +314 -0
  22. infrakit/deps/__init__.py +239 -0
  23. infrakit/deps/clean.py +141 -0
  24. infrakit/deps/depfile.py +405 -0
  25. infrakit/deps/health.py +357 -0
  26. infrakit/deps/optimizer.py +642 -0
  27. infrakit/deps/scanner.py +550 -0
  28. infrakit/llm/__init__.py +35 -0
  29. infrakit/llm/batch.py +165 -0
  30. infrakit/llm/client.py +575 -0
  31. infrakit/llm/key_manager.py +728 -0
  32. infrakit/llm/llm_readme.md +306 -0
  33. infrakit/llm/models.py +148 -0
  34. infrakit/llm/providers/__init__.py +5 -0
  35. infrakit/llm/providers/base.py +112 -0
  36. infrakit/llm/providers/gemini.py +164 -0
  37. infrakit/llm/providers/openai.py +168 -0
  38. infrakit/llm/rate_limiter.py +54 -0
  39. infrakit/scaffolder/__init__.py +31 -0
  40. infrakit/scaffolder/ai.py +508 -0
  41. infrakit/scaffolder/backend.py +555 -0
  42. infrakit/scaffolder/cli_tool.py +386 -0
  43. infrakit/scaffolder/generator.py +338 -0
  44. infrakit/scaffolder/pipeline.py +562 -0
  45. infrakit/scaffolder/registry.py +121 -0
  46. infrakit/time/__init__.py +60 -0
  47. infrakit/time/profiler.py +511 -0
  48. python_infrakit_dev-0.1.0.dist-info/METADATA +124 -0
  49. python_infrakit_dev-0.1.0.dist-info/RECORD +51 -0
  50. python_infrakit_dev-0.1.0.dist-info/WHEEL +4 -0
  51. python_infrakit_dev-0.1.0.dist-info/entry_points.txt +3 -0
infrakit/llm/client.py ADDED
@@ -0,0 +1,575 @@
1
+ """
2
+ infrakit.llm.client
3
+ --------------------
4
+ LLMClient — the single entry point for all LLM interactions.
5
+
6
+ Quick start::
7
+
8
+ from infrakit.llm import LLMClient, Prompt
9
+ from pydantic import BaseModel
10
+
11
+ client = LLMClient(
12
+ keys={"openai_keys": ["sk-..."], "gemini_keys": ["AIza..."]},
13
+ storage_dir="./logs",
14
+ )
15
+
16
+ # simple generate
17
+ response = client.generate(Prompt(user="What is 2+2?"), provider="openai")
18
+ print(response.content)
19
+
20
+ # structured output
21
+ class Answer(BaseModel):
22
+ value: int
23
+ explanation: str
24
+
25
+ response = client.generate(
26
+ Prompt(system="Be concise.", user="What is 2+2?"),
27
+ provider="openai",
28
+ response_model=Answer,
29
+ )
30
+ if response.schema_matched:
31
+ print(response.parsed.value)
32
+
33
+ # batch
34
+ prompts = [Prompt(user=f"Translate '{w}' to French") for w in ["cat", "dog", "bird"]]
35
+ batch = client.batch_generate(prompts, provider="gemini")
36
+ for r in batch.results:
37
+ print(r.content)
38
+ """
39
+
40
+ from __future__ import annotations
41
+
42
+ import asyncio
43
+ import time
44
+ from pathlib import Path
45
+ from typing import Any, Literal, Optional, Type
46
+
47
+ from pydantic import BaseModel
48
+
49
+ from .batch import async_batch, threaded_batch
50
+
51
+
52
+ def _run_async(coro):
53
+ """
54
+ Run an async coroutine from sync code, safely.
55
+
56
+ - If there is no running event loop (normal script / CLI):
57
+ use asyncio.run() directly.
58
+ - If there IS a running loop (Jupyter, FastAPI, nested asyncio):
59
+ spin up a new loop in a background thread and block until done.
60
+ This avoids the "asyncio.run() cannot be called from a running
61
+ event loop" error without requiring the caller to be async.
62
+ """
63
+ import concurrent.futures
64
+
65
+ try:
66
+ loop = asyncio.get_running_loop()
67
+ except RuntimeError:
68
+ loop = None
69
+
70
+ if loop is None:
71
+ return asyncio.run(coro)
72
+
73
+ # Running inside an existing loop — use a thread with its own loop
74
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
75
+ future = pool.submit(asyncio.run, coro)
76
+ return future.result()
77
+
78
+
79
+ from .key_manager import KeyManager
80
+ from .models import BatchResult, LLMResponse, Prompt, Provider, QuotaConfig, RequestMeta
81
+ from .providers.base import BaseProvider
82
+ from .providers.gemini import GeminiProvider
83
+ from .providers.openai import OpenAIProvider
84
+ from .rate_limiter import RateLimiter
85
+
86
+
87
+ # ── defaults ───────────────────────────────────────────────────────────────
88
+
89
+ _DEFAULT_MAX_CONCURRENT = 3
90
+ _DEFAULT_KEY_RETRIES = 2 # retry same key N times before rotating
91
+ _DEFAULT_SCHEMA_RETRIES = 2 # attempts to parse structured output
92
+ _DEFAULT_META_WINDOW = 50 # recent request metadata records per key
93
+
94
+
95
+ class LLMClient:
96
+ """
97
+ Unified client for OpenAI and Gemini.
98
+
99
+ Parameters
100
+ ----------
101
+ keys API keys dict::
102
+
103
+ {
104
+ "openai_keys": ["sk-key1", "sk-key2"],
105
+ "gemini_keys": ["AIza-key1"],
106
+ }
107
+
108
+ storage_dir Path to a folder where key state is persisted.
109
+ Defaults to ``~/.infrakit/llm/`` if not given.
110
+ mode ``"async"`` — asyncio + semaphore concurrency.
111
+ ``"threaded"`` — ThreadPoolExecutor concurrency.
112
+ Default: ``"async"``.
113
+ max_concurrent Max simultaneous in-flight requests for batch calls.
114
+ Default: 3.
115
+ key_retries How many times to retry the *same* key on a transient
116
+ error before rotating to the next one. Default: 2.
117
+ schema_retries How many times to retry JSON parsing/validation before
118
+ giving up and returning schema_matched=False. Default: 2.
119
+ meta_window How many recent request metadata records to keep per key.
120
+ Default: 50.
121
+ openai_model Default OpenAI model. Default: ``"gpt-4o-mini"``.
122
+ gemini_model Default Gemini model. Default: ``"gemini-1.5-flash"``.
123
+ show_progress Show tqdm progress bar during batch calls. Default: True.
124
+ """
125
+
126
+ def __init__(
127
+ self,
128
+ keys: dict[str, list[str]],
129
+ storage_dir: Optional[str | Path] = None,
130
+ quota_file: Optional[str | Path] = None,
131
+ mode: Literal["async", "threaded"] = "async",
132
+ max_concurrent: int = _DEFAULT_MAX_CONCURRENT,
133
+ key_retries: int = _DEFAULT_KEY_RETRIES,
134
+ schema_retries: int = _DEFAULT_SCHEMA_RETRIES,
135
+ meta_window: int = _DEFAULT_META_WINDOW,
136
+ openai_model: Optional[str] = None,
137
+ gemini_model: Optional[str] = None,
138
+ show_progress: bool = True,
139
+ ) -> None:
140
+ self._mode = mode
141
+ self._max_concurrent = max_concurrent
142
+ self._key_retries = key_retries
143
+ self._schema_retries = schema_retries
144
+ self._show_progress = show_progress
145
+
146
+ # Key manager (persistence, rotation, quota)
147
+ self._km = KeyManager(
148
+ keys=keys,
149
+ storage_dir=storage_dir,
150
+ quota_file=quota_file,
151
+ meta_window=meta_window,
152
+ )
153
+
154
+ # Rate limiter (RPM/TPM gating)
155
+ self._rl = RateLimiter(self._km)
156
+
157
+ # Providers
158
+ self._providers: dict[str, BaseProvider] = {
159
+ Provider.OPENAI: OpenAIProvider(model=openai_model),
160
+ Provider.GEMINI: GeminiProvider(model=gemini_model),
161
+ }
162
+
163
+ # ── public: single generate ────────────────────────────────────────────
164
+
165
+ def generate(
166
+ self,
167
+ prompt: Prompt,
168
+ provider: str,
169
+ response_model: Optional[Type[BaseModel]] = None,
170
+ **kwargs: Any,
171
+ ) -> LLMResponse:
172
+ """
173
+ Generate a response for a single prompt (blocking).
174
+
175
+ Always uses the sync code path so it is safe to call from any context:
176
+ scripts, threads, Jupyter, FastAPI handlers, Windows, etc.
177
+
178
+ If you are inside an async function use ``await async_generate()``
179
+ instead; that path uses the async SDK clients end-to-end.
180
+
181
+ Handles key rotation, RPM waiting, retries, and metadata recording.
182
+ Always returns an LLMResponse — check ``.error`` for failures.
183
+ """
184
+ return self._sync_single_generate(
185
+ prompt=prompt,
186
+ response_model=response_model,
187
+ provider=provider,
188
+ **kwargs,
189
+ )
190
+
191
+ async def async_generate(
192
+ self,
193
+ prompt: Prompt,
194
+ provider: str,
195
+ response_model: Optional[Type[BaseModel]] = None,
196
+ **kwargs: Any,
197
+ ) -> LLMResponse:
198
+ """Async version of generate() — await this inside an async context."""
199
+ return await self._async_single_generate(
200
+ prompt=prompt,
201
+ response_model=response_model,
202
+ provider=provider,
203
+ **kwargs,
204
+ )
205
+
206
+ # ── public: batch generate ─────────────────────────────────────────────
207
+
208
+ def batch_generate(
209
+ self,
210
+ prompts: list[Prompt],
211
+ provider: str,
212
+ response_model: Optional[Type[BaseModel]] = None,
213
+ max_concurrent: Optional[int] = None,
214
+ show_progress: Optional[bool] = None,
215
+ **kwargs: Any,
216
+ ) -> BatchResult:
217
+ """
218
+ Generate responses for a list of prompts.
219
+
220
+ Results are in the same order as *prompts*.
221
+ Uses async or threaded mode based on client ``mode`` setting.
222
+
223
+ Parameters
224
+ ----------
225
+ prompts List of Prompt objects.
226
+ provider ``"openai"`` or ``"gemini"``.
227
+ response_model Optional Pydantic model for structured output.
228
+ max_concurrent Override per-call concurrency limit.
229
+ show_progress Override per-call progress bar setting.
230
+ """
231
+ concurrency = max_concurrent or self._max_concurrent
232
+ progress = show_progress if show_progress is not None else self._show_progress
233
+
234
+ if self._mode == "async":
235
+ return _run_async(
236
+ async_batch(
237
+ generate_fn=self._async_single_generate,
238
+ prompts=prompts,
239
+ response_model=response_model,
240
+ schema_retries=self._schema_retries,
241
+ provider=provider,
242
+ max_concurrent=concurrency,
243
+ show_progress=progress,
244
+ extra_kwargs=kwargs,
245
+ )
246
+ )
247
+ else:
248
+ return threaded_batch(
249
+ generate_fn=self._sync_single_generate,
250
+ prompts=prompts,
251
+ response_model=response_model,
252
+ schema_retries=self._schema_retries,
253
+ provider=provider,
254
+ max_concurrent=concurrency,
255
+ show_progress=progress,
256
+ extra_kwargs=kwargs,
257
+ )
258
+
259
+ async def async_batch_generate(
260
+ self,
261
+ prompts: list[Prompt],
262
+ provider: str,
263
+ response_model: Optional[Type[BaseModel]] = None,
264
+ max_concurrent: Optional[int] = None,
265
+ show_progress: Optional[bool] = None,
266
+ **kwargs: Any,
267
+ ) -> BatchResult:
268
+ """Async version of batch_generate() for use inside async contexts."""
269
+ concurrency = max_concurrent or self._max_concurrent
270
+ progress = show_progress if show_progress is not None else self._show_progress
271
+
272
+ return await async_batch(
273
+ generate_fn=self._async_single_generate,
274
+ prompts=prompts,
275
+ response_model=response_model,
276
+ schema_retries=self._schema_retries,
277
+ provider=provider,
278
+ max_concurrent=concurrency,
279
+ show_progress=progress,
280
+ extra_kwargs=kwargs,
281
+ )
282
+
283
+ # ── public: quota management ───────────────────────────────────────────
284
+
285
+ def set_quota(self, provider: str, key_id: str, quota: QuotaConfig) -> None:
286
+ """
287
+ Set or update quota limits for a specific key.
288
+
289
+ ``quota.model`` controls scope:
290
+ - ``None`` (default) — applies to all models on this key that
291
+ don't have their own explicit entry.
292
+ - A model string — applies only to that model.
293
+
294
+ Examples::
295
+
296
+ # key-level RPM + default daily limit for all models
297
+ client.set_quota(
298
+ provider="gemini",
299
+ key_id="AIza-abc1",
300
+ quota=QuotaConfig(rpm_limit=15, daily_token_limit=1_500_000),
301
+ )
302
+
303
+ # tighter limit for one expensive model only
304
+ client.set_quota(
305
+ provider="gemini",
306
+ key_id="AIza-abc1",
307
+ quota=QuotaConfig(model="gemini-2.5-pro", daily_token_limit=250_000),
308
+ )
309
+ """
310
+ self._km.set_quota(provider, key_id, quota)
311
+
312
+ def status(
313
+ self,
314
+ provider: Optional[str] = None,
315
+ key_id: Optional[str] = None,
316
+ ) -> list[dict]:
317
+ """
318
+ Return key status dicts.
319
+
320
+ Parameters
321
+ ----------
322
+ provider Filter to a specific provider (``"openai"`` / ``"gemini"``).
323
+ None returns all providers.
324
+ key_id Filter to a specific key (first 8 chars of the key).
325
+ None returns all keys for the provider.
326
+ """
327
+ return self._km.status_report(provider=provider, key_id=key_id)
328
+
329
+ def print_status(
330
+ self,
331
+ provider: Optional[str] = None,
332
+ key_id: Optional[str] = None,
333
+ ) -> None:
334
+ """Pretty-print key status to stdout (model-aware)."""
335
+ import datetime
336
+ rows = self._km.status_report(provider=provider, key_id=key_id)
337
+ if not rows:
338
+ print("No keys found.")
339
+ return
340
+
341
+ sep = "-" * 72
342
+ for r in rows:
343
+ print(sep)
344
+ print(f" Provider : {r['provider']}")
345
+ print(f" Key ID : {r['key_id']}...")
346
+ print(f" Status : {r['status']}")
347
+ print(f" RPM limit: {r['rpm_limit'] or 'not set'} | "
348
+ f"Current RPM: {r['current_rpm']}")
349
+ print()
350
+
351
+ models = r.get("models", [])
352
+ if models:
353
+ print(f" Models ({len(models)} tracked)")
354
+ for mr in models:
355
+ status_flag = "\u2713" if mr["status"] == "active" else "\u2717"
356
+ print(f" [{status_flag}] {mr['model']}")
357
+ if mr["deactivated_at"]:
358
+ dt = datetime.datetime.utcfromtimestamp(mr["deactivated_at"])
359
+ print(f" Deactivated : {dt.strftime('%Y-%m-%d %H:%M:%S')} UTC")
360
+ print(f" TPM limit : {mr['tpm_limit'] or 'not set'} "
361
+ f"Current TPM: {mr['current_tpm']}")
362
+ daily_rem = mr["daily_remaining"]
363
+ print(f" Daily limit : {mr['daily_token_limit'] or 'not set'} "
364
+ f"Used: {mr['day_token_total']} "
365
+ f"Remaining: {daily_rem if daily_rem is not None else 'unlimited'}")
366
+ print(f" Reset hour : {mr['reset_hour_utc']:02d}:00 UTC")
367
+ print(f" Totals : {mr['total_requests']} req "
368
+ f"{mr['total_tokens']} tok {mr['total_errors']} err")
369
+ else:
370
+ print(" No model usage recorded yet.")
371
+
372
+ if r["recent_meta"]:
373
+ print()
374
+ print(f" Last {len(r['recent_meta'])} requests")
375
+ for m in r["recent_meta"]:
376
+ ts = datetime.datetime.utcfromtimestamp(m["timestamp"])
377
+ status_str = "ok" if m["success"] else f"ERR: {m.get('error', '')[:60]}"
378
+ print(
379
+ f" {ts.strftime('%H:%M:%S')} UTC | "
380
+ f"{m['model']:<28} | "
381
+ f"in={m['input_tokens']} out={m['output_tokens']} "
382
+ f"total={m['total_tokens']} | "
383
+ f"{m['latency_ms']:.0f}ms | {status_str}"
384
+ )
385
+ print(sep)
386
+
387
+ # ── internal: async single generate ───────────────────────────────────
388
+
389
+ async def _async_single_generate(
390
+ self,
391
+ prompt: Prompt,
392
+ provider: str,
393
+ response_model: Optional[Type[BaseModel]] = None,
394
+ schema_retries: Optional[int] = None,
395
+ **kwargs: Any,
396
+ ) -> LLMResponse:
397
+ schema_retries = schema_retries if schema_retries is not None else self._schema_retries
398
+ prov_impl = self._get_provider(provider)
399
+
400
+ last_error: Optional[str] = None
401
+ keys_tried: set[tuple] = set()
402
+
403
+ while True:
404
+ # acquire a key that has this model active
405
+ try:
406
+ raw_key, ks = self._km.get_key(provider, model=prov_impl.model)
407
+ except RuntimeError as exc:
408
+ return self._error_response(provider, str(exc))
409
+
410
+ # avoid re-trying the same (key, model) combination
411
+ if (ks.key_hash, prov_impl.model) in keys_tried:
412
+ break
413
+
414
+ # wait for RPM slot (key-level) and TPM slot (model-level)
415
+ await self._rl.async_wait_for_slot(ks, prov_impl.model)
416
+
417
+ # attempt with retries on same key
418
+ for attempt in range(self._key_retries + 1):
419
+ t0 = time.perf_counter()
420
+ try:
421
+ response = await prov_impl.async_generate(
422
+ prompt=prompt,
423
+ api_key=raw_key,
424
+ response_model=response_model,
425
+ schema_retries=schema_retries,
426
+ **kwargs,
427
+ )
428
+ # success — record metadata
429
+ meta = RequestMeta(
430
+ provider=provider,
431
+ key_id=ks.key_id,
432
+ model=prov_impl.model,
433
+ input_tokens=response.input_tokens,
434
+ output_tokens=response.output_tokens,
435
+ total_tokens=response.total_tokens,
436
+ latency_ms=response.latency_ms,
437
+ success=True,
438
+ )
439
+ self._km.record_request(ks, meta)
440
+ return response
441
+
442
+ except Exception as exc:
443
+ latency_ms = (time.perf_counter() - t0) * 1000
444
+ last_error = str(exc)
445
+ is_quota = prov_impl._is_quota_error(exc)
446
+
447
+ # record failed request
448
+ meta = RequestMeta(
449
+ provider=provider,
450
+ key_id=ks.key_id,
451
+ model=prov_impl.model,
452
+ latency_ms=latency_ms,
453
+ success=False,
454
+ error=last_error[:200],
455
+ )
456
+ self._km.record_request(ks, meta)
457
+
458
+ if is_quota:
459
+ # deactivate only this model on this key, not the whole key
460
+ self._km.deactivate_model(
461
+ ks, model=prov_impl.model, reason=last_error[:100]
462
+ )
463
+ break # rotate to next key/model
464
+ if attempt < self._key_retries:
465
+ # small backoff before same-key retry
466
+ await asyncio.sleep(1.0 * (attempt + 1))
467
+ # else: fall through and rotate key
468
+
469
+ keys_tried.add((ks.key_hash, prov_impl.model))
470
+
471
+ return self._error_response(provider, last_error or "All keys exhausted.")
472
+
473
+ # ── internal: sync single generate ────────────────────────────────────
474
+
475
+ def _sync_single_generate(
476
+ self,
477
+ prompt: Prompt,
478
+ provider: str,
479
+ response_model: Optional[Type[BaseModel]] = None,
480
+ schema_retries: Optional[int] = None,
481
+ **kwargs: Any,
482
+ ) -> LLMResponse:
483
+ schema_retries = schema_retries if schema_retries is not None else self._schema_retries
484
+ prov_impl = self._get_provider(provider)
485
+
486
+ last_error: Optional[str] = None
487
+ keys_tried: set[tuple] = set()
488
+
489
+ while True:
490
+ try:
491
+ raw_key, ks = self._km.get_key(provider, model=prov_impl.model)
492
+ except RuntimeError as exc:
493
+ return self._error_response(provider, str(exc))
494
+
495
+ if (ks.key_hash, prov_impl.model) in keys_tried:
496
+ break
497
+
498
+ self._rl.sync_wait_for_slot(ks, prov_impl.model)
499
+
500
+ for attempt in range(self._key_retries + 1):
501
+ t0 = time.perf_counter()
502
+ try:
503
+ response = prov_impl.sync_generate(
504
+ prompt=prompt,
505
+ api_key=raw_key,
506
+ response_model=response_model,
507
+ schema_retries=schema_retries,
508
+ **kwargs,
509
+ )
510
+ meta = RequestMeta(
511
+ provider=provider,
512
+ key_id=ks.key_id,
513
+ model=prov_impl.model,
514
+ input_tokens=response.input_tokens,
515
+ output_tokens=response.output_tokens,
516
+ total_tokens=response.total_tokens,
517
+ latency_ms=response.latency_ms,
518
+ success=True,
519
+ )
520
+ self._km.record_request(ks, meta)
521
+ return response
522
+
523
+ except Exception as exc:
524
+ latency_ms = (time.perf_counter() - t0) * 1000
525
+ last_error = str(exc)
526
+ is_quota = prov_impl._is_quota_error(exc)
527
+
528
+ meta = RequestMeta(
529
+ provider=provider,
530
+ key_id=ks.key_id,
531
+ model=prov_impl.model,
532
+ latency_ms=latency_ms,
533
+ success=False,
534
+ error=last_error[:200],
535
+ )
536
+ self._km.record_request(ks, meta)
537
+
538
+ if is_quota:
539
+ self._km.deactivate_model(
540
+ ks, model=prov_impl.model, reason=last_error[:100]
541
+ )
542
+ break
543
+ if attempt < self._key_retries:
544
+ time.sleep(1.0 * (attempt + 1))
545
+
546
+ keys_tried.add((ks.key_hash, prov_impl.model))
547
+
548
+ return self._error_response(provider, last_error or "All keys exhausted.")
549
+
550
+ # ── internal helpers ───────────────────────────────────────────────────
551
+
552
+ def _get_provider(self, provider: str) -> BaseProvider:
553
+ impl = self._providers.get(provider)
554
+ if impl is None:
555
+ raise ValueError(
556
+ f"Unknown provider '{provider}'. "
557
+ f"Valid options: {list(self._providers.keys())}"
558
+ )
559
+ return impl
560
+
561
+ @staticmethod
562
+ def _error_response(provider: str, error: str) -> LLMResponse:
563
+ return LLMResponse(
564
+ content="",
565
+ parsed=None,
566
+ schema_matched=False,
567
+ provider=provider,
568
+ model="",
569
+ key_id="",
570
+ input_tokens=0,
571
+ output_tokens=0,
572
+ total_tokens=0,
573
+ latency_ms=0.0,
574
+ error=error,
575
+ )