python-infrakit-dev 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. infrakit/__init__.py +0 -0
  2. infrakit/cli/__init__.py +1 -0
  3. infrakit/cli/commands/__init__.py +1 -0
  4. infrakit/cli/commands/deps.py +530 -0
  5. infrakit/cli/commands/init.py +129 -0
  6. infrakit/cli/commands/llm.py +295 -0
  7. infrakit/cli/commands/logger.py +160 -0
  8. infrakit/cli/commands/module.py +342 -0
  9. infrakit/cli/commands/time.py +81 -0
  10. infrakit/cli/main.py +65 -0
  11. infrakit/core/__init__.py +0 -0
  12. infrakit/core/config/__init__.py +0 -0
  13. infrakit/core/config/converter.py +480 -0
  14. infrakit/core/config/exporter.py +304 -0
  15. infrakit/core/config/loader.py +713 -0
  16. infrakit/core/config/validator.py +389 -0
  17. infrakit/core/logger/__init__.py +21 -0
  18. infrakit/core/logger/formatters.py +143 -0
  19. infrakit/core/logger/handlers.py +322 -0
  20. infrakit/core/logger/retention.py +176 -0
  21. infrakit/core/logger/setup.py +314 -0
  22. infrakit/deps/__init__.py +239 -0
  23. infrakit/deps/clean.py +141 -0
  24. infrakit/deps/depfile.py +405 -0
  25. infrakit/deps/health.py +357 -0
  26. infrakit/deps/optimizer.py +642 -0
  27. infrakit/deps/scanner.py +550 -0
  28. infrakit/llm/__init__.py +35 -0
  29. infrakit/llm/batch.py +165 -0
  30. infrakit/llm/client.py +575 -0
  31. infrakit/llm/key_manager.py +728 -0
  32. infrakit/llm/llm_readme.md +306 -0
  33. infrakit/llm/models.py +148 -0
  34. infrakit/llm/providers/__init__.py +5 -0
  35. infrakit/llm/providers/base.py +112 -0
  36. infrakit/llm/providers/gemini.py +164 -0
  37. infrakit/llm/providers/openai.py +168 -0
  38. infrakit/llm/rate_limiter.py +54 -0
  39. infrakit/scaffolder/__init__.py +31 -0
  40. infrakit/scaffolder/ai.py +508 -0
  41. infrakit/scaffolder/backend.py +555 -0
  42. infrakit/scaffolder/cli_tool.py +386 -0
  43. infrakit/scaffolder/generator.py +338 -0
  44. infrakit/scaffolder/pipeline.py +562 -0
  45. infrakit/scaffolder/registry.py +121 -0
  46. infrakit/time/__init__.py +60 -0
  47. infrakit/time/profiler.py +511 -0
  48. python_infrakit_dev-0.1.0.dist-info/METADATA +124 -0
  49. python_infrakit_dev-0.1.0.dist-info/RECORD +51 -0
  50. python_infrakit_dev-0.1.0.dist-info/WHEEL +4 -0
  51. python_infrakit_dev-0.1.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,306 @@
1
+ # infrakit.llm
2
+
3
+ Unified LLM client for OpenAI and Gemini with key rotation, quota tracking,
4
+ rate limiting, and batch processing. Designed for free-tier API keys where
5
+ RPM/daily limits matter.
6
+
7
+ ---
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ # core
13
+ pip install pydantic tqdm
14
+
15
+ # for OpenAI
16
+ pip install openai
17
+
18
+ # for Gemini
19
+ pip install google-generativeai
20
+ ```
21
+
22
+ ---
23
+
24
+ ## File structure
25
+
26
+ ```
27
+ infrakit/llm/
28
+ ├── __init__.py Public API surface
29
+ ├── client.py LLMClient — main entry point
30
+ ├── key_manager.py Key rotation, quota, persistence
31
+ ├── rate_limiter.py RPM / TPM async and sync gates
32
+ ├── batch.py Async + threaded batch engine
33
+ ├── models.py Shared types (Prompt, LLMResponse, etc.)
34
+ └── providers/
35
+ ├── __init__.py
36
+ ├── base.py AbstractProvider + schema validation
37
+ ├── openai.py OpenAI provider
38
+ └── gemini.py Gemini provider
39
+
40
+ infrakit/cli/
41
+ └── llm_cmd.py CLI commands (ik llm status, ik llm quota set)
42
+ ```
43
+
44
+ ---
45
+
46
+ ## Quick start
47
+
48
+ ```python
49
+ from infrakit.llm import LLMClient, Prompt
50
+
51
+ client = LLMClient(
52
+ keys={
53
+ "openai_keys": ["sk-key1", "sk-key2"],
54
+ "gemini_keys": ["AIza-key1"],
55
+ },
56
+ storage_dir="./logs", # key state persisted here
57
+ )
58
+
59
+ # simple prompt
60
+ response = client.generate(Prompt(user="What is 2 + 2?"), provider="openai")
61
+ print(response.content)
62
+
63
+ # system + user split
64
+ response = client.generate(
65
+ Prompt(system="You are a maths tutor.", user="Explain derivatives."),
66
+ provider="gemini",
67
+ )
68
+ print(response.content)
69
+ ```
70
+
71
+ ---
72
+
73
+ ## Structured output (Pydantic)
74
+
75
+ Pass a Pydantic model as `response_model`. The system will try to parse the
76
+ model's JSON response and validate it against your schema. If validation fails
77
+ after retries, you still get the raw `content` back with `schema_matched=False`.
78
+
79
+ ```python
80
+ from pydantic import BaseModel
81
+
82
+ class Sentiment(BaseModel):
83
+ label: str # "positive" | "negative" | "neutral"
84
+ confidence: float
85
+
86
+ # Your prompt must instruct the model to return JSON — infrakit does not
87
+ # inject instructions automatically.
88
+ response = client.generate(
89
+ Prompt(
90
+ system='Respond ONLY with valid JSON matching: {"label": str, "confidence": float}',
91
+ user="I love this product!",
92
+ ),
93
+ provider="openai",
94
+ response_model=Sentiment,
95
+ )
96
+
97
+ if response.schema_matched:
98
+ print(response.parsed.label) # "positive"
99
+ print(response.parsed.confidence) # 0.97
100
+ else:
101
+ print("Schema mismatch — raw response:", response.content)
102
+ ```
103
+
104
+ ---
105
+
106
+ ## Batch processing
107
+
108
+ ```python
109
+ from infrakit.llm import Prompt
110
+
111
+ words = ["cat", "dog", "bird", "fish"]
112
+ prompts = [
113
+ Prompt(
114
+ system="Translate to French. Reply with only the translation.",
115
+ user=word,
116
+ )
117
+ for word in words
118
+ ]
119
+
120
+ batch = client.batch_generate(prompts, provider="gemini")
121
+
122
+ # results are in the same order as prompts
123
+ for word, result in zip(words, batch.results):
124
+ if result.error:
125
+ print(f"{word}: ERROR — {result.error}")
126
+ else:
127
+ print(f"{word}: {result.content}")
128
+
129
+ print(f"\nTotal tokens: {batch.total_tokens}")
130
+ print(f"Success: {batch.success_count} / Failure: {batch.failure_count}")
131
+ ```
132
+
133
+ ---
134
+
135
+ ## LLMClient parameters
136
+
137
+ | Parameter | Default | Description |
138
+ |---|---|---|
139
+ | `keys` | required | `{"openai_keys": [...], "gemini_keys": [...]}` |
140
+ | `storage_dir` | required | Folder for persistent key state |
141
+ | `mode` | `"async"` | `"async"` or `"threaded"` |
142
+ | `max_concurrent` | `3` | Max simultaneous batch requests |
143
+ | `key_retries` | `2` | Retries on same key before rotating |
144
+ | `schema_retries` | `2` | JSON parse/validate retries |
145
+ | `meta_window` | `50` | Recent request metadata records per key |
146
+ | `openai_model` | `"gpt-4o-mini"` | Default OpenAI model |
147
+ | `gemini_model` | `"gemini-1.5-flash"` | Default Gemini model |
148
+ | `show_progress` | `True` | tqdm progress bar for batch calls |
149
+
150
+ ---
151
+
152
+ ## Quota management
153
+
154
+ ### Set quota for a key
155
+
156
+ ```python
157
+ from infrakit.llm import QuotaConfig
158
+
159
+ client.set_quota(
160
+ provider="openai",
161
+ key_id="sk-abc123", # first 8 chars of the key
162
+ quota=QuotaConfig(
163
+ rpm_limit=60, # 60 requests per minute
164
+ tpm_limit=90_000, # 90k tokens per minute
165
+ daily_token_limit=1_000_000,
166
+ reset_hour_utc=0, # resets at midnight UTC
167
+ ),
168
+ )
169
+ ```
170
+
171
+ Only the fields you set are enforced. Unset fields are unconstrained.
172
+
173
+ ### Key lifecycle
174
+
175
+ - **active** — key is in normal use.
176
+ - **inactive** — key hit a quota limit (daily tokens, or a hard 401/429 from the API).
177
+ Automatically reactivates at `reset_hour_utc` the following day.
178
+ - Keys are rotated round-robin among all active keys. When a key is deactivated,
179
+ remaining active keys absorb the load.
180
+
181
+ ---
182
+
183
+ ## Check status (Python)
184
+
185
+ ```python
186
+ # all keys
187
+ client.print_status()
188
+
189
+ # one provider
190
+ client.print_status(provider="openai")
191
+
192
+ # one key
193
+ client.print_status(provider="openai", key_id="sk-abc123")
194
+
195
+ # raw dict (for programmatic use)
196
+ rows = client.status(provider="openai")
197
+ ```
198
+
199
+ ---
200
+
201
+ ## CLI
202
+
203
+ Register `llm_cmd` in your main CLI (add to `infrakit/cli/__init__.py`):
204
+
205
+ ```python
206
+ from infrakit.cli.llm_cmd import register as register_llm
207
+ register_llm(cli)
208
+ ```
209
+
210
+ ### Commands
211
+
212
+ ```bash
213
+ # show all keys
214
+ ik llm status --storage-dir ./logs
215
+
216
+ # filter by provider
217
+ ik llm status --provider openai --storage-dir ./logs
218
+
219
+ # filter by key (first 8 chars)
220
+ ik llm status --key sk-abc123 --storage-dir ./logs
221
+
222
+ # JSON output
223
+ ik llm status --json --storage-dir ./logs
224
+
225
+ # set quota for a key
226
+ ik llm quota set \
227
+ --provider openai \
228
+ --key sk-abc123 \
229
+ --rpm 60 \
230
+ --tpm 90000 \
231
+ --daily 1000000 \
232
+ --reset-hour 0 \
233
+ --storage-dir ./logs
234
+ ```
235
+
236
+ If your keys are stored in a JSON file:
237
+ ```bash
238
+ ik llm status --keys-file keys.json --storage-dir ./logs
239
+ ```
240
+
241
+ `keys.json` format:
242
+ ```json
243
+ {
244
+ "openai_keys": ["sk-key1", "sk-key2"],
245
+ "gemini_keys": ["AIza-key1"]
246
+ }
247
+ ```
248
+
249
+ ---
250
+
251
+ ## Error handling
252
+
253
+ Every `generate()` call returns an `LLMResponse` — it never raises. Check `.error`:
254
+
255
+ ```python
256
+ result = client.generate(Prompt(user="Hello"), provider="openai")
257
+ if result.error:
258
+ print(f"Request failed: {result.error}")
259
+ else:
260
+ print(result.content)
261
+ ```
262
+
263
+ ### What infrakit handles automatically
264
+
265
+ | Situation | Behaviour |
266
+ |---|---|
267
+ | Transient error (network, 5xx) | Retry same key up to `key_retries` times with backoff |
268
+ | Quota / auth error (401, 402, 429+quota) | Deactivate key immediately, rotate to next |
269
+ | All keys exhausted | Return `LLMResponse(error="All keys exhausted.")` |
270
+ | RPM limit reached | Async/sync sleep until slot opens |
271
+ | Daily token limit reached | Deactivate key, auto-reactivate at reset hour |
272
+ | Schema validation fails | Return raw content + `schema_matched=False` |
273
+
274
+ ---
275
+
276
+ ## LLMResponse fields
277
+
278
+ | Field | Type | Description |
279
+ |---|---|---|
280
+ | `content` | `str` | Raw text from the model |
281
+ | `parsed` | `BaseModel \| None` | Validated Pydantic instance (if `response_model` given and matched) |
282
+ | `schema_matched` | `bool` | True if structured output validated successfully |
283
+ | `provider` | `str` | `"openai"` or `"gemini"` |
284
+ | `model` | `str` | Model string used |
285
+ | `key_id` | `str` | First 8 chars of the key used |
286
+ | `input_tokens` | `int` | Prompt token count |
287
+ | `output_tokens` | `int` | Completion token count |
288
+ | `total_tokens` | `int` | input + output |
289
+ | `latency_ms` | `float` | Wall-clock API call time |
290
+ | `error` | `str \| None` | Set on failure |
291
+
292
+ ---
293
+
294
+ ## What is and isn't stored
295
+
296
+ infrakit stores **no prompt or response content**. Per key, it persists:
297
+
298
+ - Status (active/inactive), deactivation timestamp
299
+ - Quota config (RPM, TPM, daily limit, reset hour)
300
+ - Lifetime totals (requests, tokens in/out, errors)
301
+ - Daily token total + day start epoch
302
+ - RPM window (timestamps of last 60 s of requests)
303
+ - TPM window (timestamps + token counts for last 60 s)
304
+ - Rolling metadata for last N requests: timestamp, model, token counts, latency, success/error code
305
+
306
+ All stored in `{storage_dir}/llm_key_state.json`.
infrakit/llm/models.py ADDED
@@ -0,0 +1,148 @@
1
+ """
2
+ infrakit.llm.models
3
+ -------------------
4
+ Shared data structures for the LLM subsystem.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import time
10
+ from dataclasses import dataclass, field
11
+ from enum import Enum
12
+ from typing import Any, Optional, Type
13
+
14
+ from pydantic import BaseModel
15
+
16
+
17
+ # ── enums ──────────────────────────────────────────────────────────────────
18
+
19
+ class Provider(str, Enum):
20
+ OPENAI = "openai"
21
+ GEMINI = "gemini"
22
+
23
+
24
+ class KeyStatus(str, Enum):
25
+ ACTIVE = "active"
26
+ INACTIVE = "inactive" # all models exhausted — auto-reactivates after reset
27
+
28
+
29
+ class ModelStatus(str, Enum):
30
+ ACTIVE = "active"
31
+ INACTIVE = "inactive" # this model's quota exhausted on this key
32
+
33
+
34
+ # ── prompt input ───────────────────────────────────────────────────────────
35
+
36
+ @dataclass
37
+ class Prompt:
38
+ """
39
+ Represents a single LLM prompt.
40
+
41
+ Usage::
42
+
43
+ # combined prompt
44
+ Prompt(user="Tell me about Python.")
45
+
46
+ # system + user split
47
+ Prompt(system="You are a helpful assistant.", user="Tell me about Python.")
48
+ """
49
+ user: str
50
+ system: Optional[str] = None
51
+
52
+
53
+ # ── response ───────────────────────────────────────────────────────────────
54
+
55
+ @dataclass
56
+ class LLMResponse:
57
+ """
58
+ Returned by every generate() call.
59
+
60
+ Attributes
61
+ ----------
62
+ content Raw text from the model.
63
+ parsed Populated when a response_model is given and validation
64
+ succeeds; None otherwise.
65
+ schema_matched True if parsed is not None. False means validation failed
66
+ after all retries — content still contains the raw reply.
67
+ provider Which provider handled this request.
68
+ model Model string used (e.g. "gpt-4o-mini").
69
+ key_id Truncated key identifier (first 8 chars).
70
+ input_tokens Prompt token count.
71
+ output_tokens Completion token count.
72
+ total_tokens input + output.
73
+ latency_ms Wall-clock time for the API call in milliseconds.
74
+ error Set when the request ultimately failed.
75
+ """
76
+ content: str
77
+ parsed: Optional[Any]
78
+ schema_matched: bool
79
+ provider: str
80
+ model: str
81
+ key_id: str
82
+ input_tokens: int
83
+ output_tokens: int
84
+ total_tokens: int
85
+ latency_ms: float
86
+ error: Optional[str] = None
87
+
88
+
89
+ # ── request metadata (stored for transparency) ────────────────────────────
90
+
91
+ @dataclass
92
+ class RequestMeta:
93
+ """
94
+ Lightweight record of one API call stored in the rolling window.
95
+ NO prompt or response content is kept here.
96
+ """
97
+ timestamp: float = field(default_factory=time.time)
98
+ provider: str = ""
99
+ key_id: str = ""
100
+ model: str = ""
101
+ input_tokens: int = 0
102
+ output_tokens: int = 0
103
+ total_tokens: int = 0
104
+ latency_ms: float = 0.0
105
+ success: bool = True
106
+ error: Optional[str] = None
107
+
108
+
109
+ # ── quota config ───────────────────────────────────────────────────────────
110
+
111
+ @dataclass
112
+ class QuotaConfig:
113
+ """
114
+ Quota limits for a key, optionally scoped to a specific model.
115
+
116
+ Fields
117
+ ------
118
+ model Model this config applies to. ``None`` means it is a
119
+ default that applies to any model without an explicit
120
+ entry. When both a model-specific config and a default
121
+ exist, the model-specific one wins.
122
+ rpm_limit Max requests per minute (key-level, shared across models).
123
+ tpm_limit Max tokens per minute for this model.
124
+ daily_token_limit Max tokens per calendar day for this model.
125
+ reset_hour_utc UTC hour (0-23) at which the daily quota resets.
126
+ """
127
+ model: Optional[str] = None # None = default / applies to all
128
+ rpm_limit: Optional[int] = None # key-level
129
+ tpm_limit: Optional[int] = None # model-level
130
+ daily_token_limit: Optional[int] = None # model-level
131
+ reset_hour_utc: int = 0
132
+
133
+
134
+ # ── batch result ───────────────────────────────────────────────────────────
135
+
136
+ @dataclass
137
+ class BatchResult:
138
+ """
139
+ Container for a batch generate() call. Results are in the same
140
+ order as the input prompts.
141
+ """
142
+ results: list[Optional[LLMResponse]]
143
+ total_input_tokens: int
144
+ total_output_tokens: int
145
+ total_tokens: int
146
+ total_latency_ms: float
147
+ success_count: int
148
+ failure_count: int
@@ -0,0 +1,5 @@
1
+ from .base import BaseProvider
2
+ from .openai import OpenAIProvider
3
+ from .gemini import GeminiProvider
4
+
5
+ __all__ = ["BaseProvider", "OpenAIProvider", "GeminiProvider"]
@@ -0,0 +1,112 @@
1
+ """
2
+ infrakit.llm.providers.base
3
+ ---------------------------
4
+ Abstract base class that every provider must implement.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from abc import ABC, abstractmethod
10
+ from typing import Any, Optional, Type
11
+
12
+ from pydantic import BaseModel
13
+
14
+ from ..models import LLMResponse, Prompt
15
+
16
+
17
+ class BaseProvider(ABC):
18
+ """
19
+ All provider-specific logic (API call, token counting, error
20
+ classification) lives in subclasses. The client only talks to
21
+ this interface.
22
+ """
23
+
24
+ # ── configuration ──────────────────────────────────────────────────────
25
+
26
+ #: Override in subclass with a sensible default (e.g. "gpt-4o-mini").
27
+ DEFAULT_MODEL: str = ""
28
+
29
+ def __init__(self, model: Optional[str] = None) -> None:
30
+ self.model = model or self.DEFAULT_MODEL
31
+
32
+ # ── abstract interface ─────────────────────────────────────────────────
33
+
34
+ @abstractmethod
35
+ async def async_generate(
36
+ self,
37
+ prompt: Prompt,
38
+ api_key: str,
39
+ response_model: Optional[Type[BaseModel]] = None,
40
+ schema_retries: int = 2,
41
+ **kwargs: Any,
42
+ ) -> LLMResponse:
43
+ """
44
+ Async generate. Must return an LLMResponse even on soft failures
45
+ (schema mismatch). Hard failures (network, auth) should raise.
46
+ """
47
+
48
+ @abstractmethod
49
+ def sync_generate(
50
+ self,
51
+ prompt: Prompt,
52
+ api_key: str,
53
+ response_model: Optional[Type[BaseModel]] = None,
54
+ schema_retries: int = 2,
55
+ **kwargs: Any,
56
+ ) -> LLMResponse:
57
+ """Sync wrapper around async_generate (or a native sync call)."""
58
+
59
+ # ── helpers shared by subclasses ───────────────────────────────────────
60
+
61
+ @staticmethod
62
+ def _validate_schema(
63
+ content: str,
64
+ response_model: Type[BaseModel],
65
+ retries: int,
66
+ ) -> tuple[Optional[BaseModel], bool]:
67
+ """
68
+ Try to parse *content* as JSON and validate against *response_model*.
69
+
70
+ Returns
71
+ -------
72
+ (parsed_instance, matched)
73
+ matched is True on success, False after all retries exhausted.
74
+ """
75
+ import json
76
+
77
+ for attempt in range(retries + 1):
78
+ try:
79
+ # strip common markdown fences
80
+ text = content.strip()
81
+ if text.startswith("```"):
82
+ lines = text.splitlines()
83
+ # drop first (```json) and last (```) lines
84
+ text = "\n".join(lines[1:-1]) if len(lines) > 2 else text
85
+ data = json.loads(text)
86
+ instance = response_model.model_validate(data)
87
+ return instance, True
88
+ except Exception:
89
+ if attempt == retries:
90
+ return None, False
91
+ return None, False
92
+
93
+ @staticmethod
94
+ def _is_quota_error(exc: Exception) -> bool:
95
+ """
96
+ Return True if *exc* indicates a hard quota / auth failure
97
+ (should deactivate the key, not retry).
98
+ Subclasses may override for provider-specific error codes.
99
+ """
100
+ msg = str(exc).lower()
101
+ return any(
102
+ kw in msg
103
+ for kw in (
104
+ "quota",
105
+ "rate_limit_exceeded",
106
+ "billing",
107
+ "insufficient_quota",
108
+ "resource_exhausted",
109
+ "invalid_api_key",
110
+ "permission_denied",
111
+ )
112
+ )