python-infrakit-dev 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- infrakit/__init__.py +0 -0
- infrakit/cli/__init__.py +1 -0
- infrakit/cli/commands/__init__.py +1 -0
- infrakit/cli/commands/deps.py +530 -0
- infrakit/cli/commands/init.py +129 -0
- infrakit/cli/commands/llm.py +295 -0
- infrakit/cli/commands/logger.py +160 -0
- infrakit/cli/commands/module.py +342 -0
- infrakit/cli/commands/time.py +81 -0
- infrakit/cli/main.py +65 -0
- infrakit/core/__init__.py +0 -0
- infrakit/core/config/__init__.py +0 -0
- infrakit/core/config/converter.py +480 -0
- infrakit/core/config/exporter.py +304 -0
- infrakit/core/config/loader.py +713 -0
- infrakit/core/config/validator.py +389 -0
- infrakit/core/logger/__init__.py +21 -0
- infrakit/core/logger/formatters.py +143 -0
- infrakit/core/logger/handlers.py +322 -0
- infrakit/core/logger/retention.py +176 -0
- infrakit/core/logger/setup.py +314 -0
- infrakit/deps/__init__.py +239 -0
- infrakit/deps/clean.py +141 -0
- infrakit/deps/depfile.py +405 -0
- infrakit/deps/health.py +357 -0
- infrakit/deps/optimizer.py +642 -0
- infrakit/deps/scanner.py +550 -0
- infrakit/llm/__init__.py +35 -0
- infrakit/llm/batch.py +165 -0
- infrakit/llm/client.py +575 -0
- infrakit/llm/key_manager.py +728 -0
- infrakit/llm/llm_readme.md +306 -0
- infrakit/llm/models.py +148 -0
- infrakit/llm/providers/__init__.py +5 -0
- infrakit/llm/providers/base.py +112 -0
- infrakit/llm/providers/gemini.py +164 -0
- infrakit/llm/providers/openai.py +168 -0
- infrakit/llm/rate_limiter.py +54 -0
- infrakit/scaffolder/__init__.py +31 -0
- infrakit/scaffolder/ai.py +508 -0
- infrakit/scaffolder/backend.py +555 -0
- infrakit/scaffolder/cli_tool.py +386 -0
- infrakit/scaffolder/generator.py +338 -0
- infrakit/scaffolder/pipeline.py +562 -0
- infrakit/scaffolder/registry.py +121 -0
- infrakit/time/__init__.py +60 -0
- infrakit/time/profiler.py +511 -0
- python_infrakit_dev-0.1.0.dist-info/METADATA +124 -0
- python_infrakit_dev-0.1.0.dist-info/RECORD +51 -0
- python_infrakit_dev-0.1.0.dist-info/WHEEL +4 -0
- python_infrakit_dev-0.1.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
"""
|
|
2
|
+
infrakit.llm.providers.gemini
|
|
3
|
+
------------------------------
|
|
4
|
+
Gemini provider — wraps the google-generativeai Python SDK.
|
|
5
|
+
|
|
6
|
+
Install dependency::
|
|
7
|
+
|
|
8
|
+
pip install google-genai
|
|
9
|
+
|
|
10
|
+
Supported models (default): gemini-2.5-flash
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import time
|
|
17
|
+
from typing import Any, Optional, Type
|
|
18
|
+
|
|
19
|
+
from pydantic import BaseModel
|
|
20
|
+
|
|
21
|
+
from ..models import LLMResponse, Prompt
|
|
22
|
+
from .base import BaseProvider
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class GeminiProvider(BaseProvider):
|
|
26
|
+
"""
|
|
27
|
+
Provider for Google's Gemini generative models.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
model Model string to use. Defaults to ``gemini-1.5-flash``.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
DEFAULT_MODEL = "gemini-2.5-flash"
|
|
35
|
+
PROVIDER_NAME = "gemini"
|
|
36
|
+
|
|
37
|
+
def __init__(self, model: Optional[str] = None) -> None:
|
|
38
|
+
super().__init__(model)
|
|
39
|
+
self._check_sdk()
|
|
40
|
+
|
|
41
|
+
# ── public interface ───────────────────────────────────────────────────
|
|
42
|
+
|
|
43
|
+
async def async_generate(
|
|
44
|
+
self,
|
|
45
|
+
prompt: Prompt,
|
|
46
|
+
api_key: str,
|
|
47
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
48
|
+
schema_retries: int = 2,
|
|
49
|
+
**kwargs: Any,
|
|
50
|
+
) -> LLMResponse:
|
|
51
|
+
"""
|
|
52
|
+
Async generate using google-generativeai.
|
|
53
|
+
|
|
54
|
+
The SDK's generate_content is synchronous; we run it in a thread
|
|
55
|
+
executor to avoid blocking the event loop.
|
|
56
|
+
"""
|
|
57
|
+
loop = asyncio.get_event_loop()
|
|
58
|
+
t0 = time.perf_counter()
|
|
59
|
+
|
|
60
|
+
result = await loop.run_in_executor(
|
|
61
|
+
None,
|
|
62
|
+
lambda: self._sync_call(prompt, api_key, **kwargs),
|
|
63
|
+
)
|
|
64
|
+
latency_ms = (time.perf_counter() - t0) * 1000
|
|
65
|
+
return self._build_response(result, latency_ms, response_model, schema_retries, api_key)
|
|
66
|
+
|
|
67
|
+
def sync_generate(
|
|
68
|
+
self,
|
|
69
|
+
prompt: Prompt,
|
|
70
|
+
api_key: str,
|
|
71
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
72
|
+
schema_retries: int = 2,
|
|
73
|
+
**kwargs: Any,
|
|
74
|
+
) -> LLMResponse:
|
|
75
|
+
"""Sync generate — direct SDK call."""
|
|
76
|
+
t0 = time.perf_counter()
|
|
77
|
+
result = self._sync_call(prompt, api_key, **kwargs)
|
|
78
|
+
latency_ms = (time.perf_counter() - t0) * 1000
|
|
79
|
+
return self._build_response(result, latency_ms, response_model, schema_retries, api_key)
|
|
80
|
+
|
|
81
|
+
# ── internal helpers ───────────────────────────────────────────────────
|
|
82
|
+
|
|
83
|
+
def _sync_call(self, prompt: Prompt, api_key: str, **kwargs: Any) -> Any:
|
|
84
|
+
from google import genai
|
|
85
|
+
from google.genai import types
|
|
86
|
+
|
|
87
|
+
client = genai.Client(api_key=api_key)
|
|
88
|
+
|
|
89
|
+
# The new SDK uses a config object for arguments like system instructions,
|
|
90
|
+
# temperature, etc. We map your kwargs directly into it.
|
|
91
|
+
config_args = kwargs.copy()
|
|
92
|
+
if prompt.system:
|
|
93
|
+
config_args["system_instruction"] = prompt.system
|
|
94
|
+
|
|
95
|
+
config = types.GenerateContentConfig(**config_args) if config_args else None
|
|
96
|
+
|
|
97
|
+
response = client.models.generate_content(
|
|
98
|
+
model=self.model,
|
|
99
|
+
contents=prompt.user,
|
|
100
|
+
config=config,
|
|
101
|
+
)
|
|
102
|
+
return response
|
|
103
|
+
|
|
104
|
+
def _build_response(
|
|
105
|
+
self,
|
|
106
|
+
response: Any,
|
|
107
|
+
latency_ms: float,
|
|
108
|
+
response_model: Optional[Type[BaseModel]],
|
|
109
|
+
schema_retries: int,
|
|
110
|
+
api_key: str,
|
|
111
|
+
) -> LLMResponse:
|
|
112
|
+
# Extract text
|
|
113
|
+
try:
|
|
114
|
+
content = response.text or ""
|
|
115
|
+
except Exception:
|
|
116
|
+
content = ""
|
|
117
|
+
|
|
118
|
+
# Token counts — available in usage_metadata (may be None on some models)
|
|
119
|
+
usage = getattr(response, "usage_metadata", None)
|
|
120
|
+
input_tokens = getattr(usage, "prompt_token_count", 0) or 0
|
|
121
|
+
output_tokens = getattr(usage, "candidates_token_count", 0) or 0
|
|
122
|
+
total_tokens = getattr(usage, "total_token_count", 0) or (input_tokens + output_tokens)
|
|
123
|
+
|
|
124
|
+
parsed = None
|
|
125
|
+
schema_matched = False
|
|
126
|
+
|
|
127
|
+
if response_model is not None:
|
|
128
|
+
parsed, schema_matched = self._validate_schema(
|
|
129
|
+
content, response_model, schema_retries
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return LLMResponse(
|
|
133
|
+
content=content,
|
|
134
|
+
parsed=parsed,
|
|
135
|
+
schema_matched=schema_matched,
|
|
136
|
+
provider=self.PROVIDER_NAME,
|
|
137
|
+
model=self.model,
|
|
138
|
+
key_id=api_key[:8],
|
|
139
|
+
input_tokens=input_tokens,
|
|
140
|
+
output_tokens=output_tokens,
|
|
141
|
+
total_tokens=total_tokens,
|
|
142
|
+
latency_ms=latency_ms,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
@staticmethod
|
|
146
|
+
def _check_sdk() -> None:
|
|
147
|
+
try:
|
|
148
|
+
from google import genai # noqa: F401
|
|
149
|
+
except ImportError as exc:
|
|
150
|
+
raise ImportError(
|
|
151
|
+
"google-genai package is required for GeminiProvider. "
|
|
152
|
+
"Install it with: pip install google-genai"
|
|
153
|
+
) from exc
|
|
154
|
+
|
|
155
|
+
@staticmethod
|
|
156
|
+
def _is_quota_error(exc: Exception) -> bool:
|
|
157
|
+
"""Gemini-specific quota/auth error detection."""
|
|
158
|
+
# Handle the new SDK's APIError exception
|
|
159
|
+
if exc.__class__.__name__ == "APIError":
|
|
160
|
+
code = getattr(exc, "code", None)
|
|
161
|
+
# 429: Resource Exhausted, 403: Permission Denied, 401: Unauthenticated
|
|
162
|
+
if code in (429, 403, 401):
|
|
163
|
+
return True
|
|
164
|
+
return BaseProvider._is_quota_error(exc)
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""
|
|
2
|
+
infrakit.llm.providers.openai
|
|
3
|
+
------------------------------
|
|
4
|
+
OpenAI provider — wraps the openai Python SDK.
|
|
5
|
+
|
|
6
|
+
Install dependency::
|
|
7
|
+
|
|
8
|
+
pip install openai
|
|
9
|
+
|
|
10
|
+
Supported models (default): gpt-4o-mini
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
from typing import Any, Optional, Type
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
|
|
20
|
+
from ..models import LLMResponse, Prompt
|
|
21
|
+
from .base import BaseProvider
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class OpenAIProvider(BaseProvider):
|
|
25
|
+
"""
|
|
26
|
+
Provider for OpenAI's Chat Completions API.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
model Model string to use. Defaults to ``gpt-4o-mini``.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
DEFAULT_MODEL = "gpt-4o-mini"
|
|
34
|
+
PROVIDER_NAME = "openai"
|
|
35
|
+
|
|
36
|
+
def __init__(self, model: Optional[str] = None) -> None:
|
|
37
|
+
super().__init__(model)
|
|
38
|
+
self._check_sdk()
|
|
39
|
+
|
|
40
|
+
# ── public interface ───────────────────────────────────────────────────
|
|
41
|
+
|
|
42
|
+
async def async_generate(
|
|
43
|
+
self,
|
|
44
|
+
prompt: Prompt,
|
|
45
|
+
api_key: str,
|
|
46
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
47
|
+
schema_retries: int = 2,
|
|
48
|
+
**kwargs: Any,
|
|
49
|
+
) -> LLMResponse:
|
|
50
|
+
"""Async generate using openai.AsyncOpenAI."""
|
|
51
|
+
from openai import AsyncOpenAI
|
|
52
|
+
|
|
53
|
+
client = AsyncOpenAI(api_key=api_key)
|
|
54
|
+
messages = self._build_messages(prompt)
|
|
55
|
+
t0 = time.perf_counter()
|
|
56
|
+
|
|
57
|
+
response = await client.chat.completions.create(
|
|
58
|
+
model=self.model,
|
|
59
|
+
messages=messages,
|
|
60
|
+
**kwargs,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
latency_ms = (time.perf_counter() - t0) * 1000
|
|
64
|
+
return self._build_response(response, latency_ms, response_model, schema_retries, api_key)
|
|
65
|
+
|
|
66
|
+
def sync_generate(
|
|
67
|
+
self,
|
|
68
|
+
prompt: Prompt,
|
|
69
|
+
api_key: str,
|
|
70
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
71
|
+
schema_retries: int = 2,
|
|
72
|
+
**kwargs: Any,
|
|
73
|
+
) -> LLMResponse:
|
|
74
|
+
"""
|
|
75
|
+
Sync generate using openai.OpenAI (the blocking SDK client).
|
|
76
|
+
|
|
77
|
+
Deliberately does NOT use asyncio.run() so it is safe to call from:
|
|
78
|
+
- threads (threaded batch mode)
|
|
79
|
+
- environments with a running event loop (Jupyter, FastAPI, etc.)
|
|
80
|
+
- Windows, where asyncio loop/thread interactions are more restrictive
|
|
81
|
+
"""
|
|
82
|
+
from openai import OpenAI
|
|
83
|
+
|
|
84
|
+
client = OpenAI(api_key=api_key)
|
|
85
|
+
messages = self._build_messages(prompt)
|
|
86
|
+
t0 = time.perf_counter()
|
|
87
|
+
|
|
88
|
+
response = client.chat.completions.create(
|
|
89
|
+
model=self.model,
|
|
90
|
+
messages=messages,
|
|
91
|
+
**kwargs,
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
latency_ms = (time.perf_counter() - t0) * 1000
|
|
95
|
+
return self._build_response(response, latency_ms, response_model, schema_retries, api_key)
|
|
96
|
+
|
|
97
|
+
# ── internal helpers ───────────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
@staticmethod
|
|
100
|
+
def _build_messages(prompt: Prompt) -> list[dict]:
|
|
101
|
+
messages = []
|
|
102
|
+
if prompt.system:
|
|
103
|
+
messages.append({"role": "system", "content": prompt.system})
|
|
104
|
+
messages.append({"role": "user", "content": prompt.user})
|
|
105
|
+
return messages
|
|
106
|
+
|
|
107
|
+
def _build_response(
|
|
108
|
+
self,
|
|
109
|
+
response: Any,
|
|
110
|
+
latency_ms: float,
|
|
111
|
+
response_model: Optional[Type[BaseModel]],
|
|
112
|
+
schema_retries: int,
|
|
113
|
+
api_key: str,
|
|
114
|
+
) -> LLMResponse:
|
|
115
|
+
choice = response.choices[0]
|
|
116
|
+
content = choice.message.content or ""
|
|
117
|
+
|
|
118
|
+
usage = response.usage
|
|
119
|
+
input_tokens = usage.prompt_tokens if usage else 0
|
|
120
|
+
output_tokens = usage.completion_tokens if usage else 0
|
|
121
|
+
total_tokens = usage.total_tokens if usage else 0
|
|
122
|
+
|
|
123
|
+
parsed = None
|
|
124
|
+
schema_matched = False
|
|
125
|
+
|
|
126
|
+
if response_model is not None:
|
|
127
|
+
parsed, schema_matched = self._validate_schema(
|
|
128
|
+
content, response_model, schema_retries
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return LLMResponse(
|
|
132
|
+
content=content,
|
|
133
|
+
parsed=parsed,
|
|
134
|
+
schema_matched=schema_matched,
|
|
135
|
+
provider=self.PROVIDER_NAME,
|
|
136
|
+
model=self.model,
|
|
137
|
+
key_id=api_key[:8],
|
|
138
|
+
input_tokens=input_tokens,
|
|
139
|
+
output_tokens=output_tokens,
|
|
140
|
+
total_tokens=total_tokens,
|
|
141
|
+
latency_ms=latency_ms,
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
@staticmethod
|
|
145
|
+
def _check_sdk() -> None:
|
|
146
|
+
try:
|
|
147
|
+
import openai # noqa: F401
|
|
148
|
+
except ImportError as exc:
|
|
149
|
+
raise ImportError(
|
|
150
|
+
"openai package is required for OpenAIProvider. "
|
|
151
|
+
"Install it with: pip install openai"
|
|
152
|
+
) from exc
|
|
153
|
+
|
|
154
|
+
@staticmethod
|
|
155
|
+
def _is_quota_error(exc: Exception) -> bool:
|
|
156
|
+
"""OpenAI-specific quota/auth HTTP status codes."""
|
|
157
|
+
try:
|
|
158
|
+
from openai import APIStatusError
|
|
159
|
+
if isinstance(exc, APIStatusError):
|
|
160
|
+
# 401 invalid key, 402 billing, 429 with quota message
|
|
161
|
+
if exc.status_code in (401, 402):
|
|
162
|
+
return True
|
|
163
|
+
if exc.status_code == 429:
|
|
164
|
+
msg = str(exc).lower()
|
|
165
|
+
return "quota" in msg or "billing" in msg
|
|
166
|
+
except ImportError:
|
|
167
|
+
pass
|
|
168
|
+
return BaseProvider._is_quota_error(exc)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
infrakit.llm.rate_limiter
|
|
3
|
+
-------------------------
|
|
4
|
+
RPM-aware gate (key-level) and TPM-aware gate (model-level).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
import time
|
|
11
|
+
|
|
12
|
+
from .key_manager import KeyManager, KeyState
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class RateLimiter:
|
|
16
|
+
"""
|
|
17
|
+
Async-and-sync compatible RPM/TPM gate.
|
|
18
|
+
|
|
19
|
+
RPM is enforced at the key level (all models share the key's request budget).
|
|
20
|
+
TPM is enforced at the model level.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
key_manager Shared KeyManager instance (owns all rate state).
|
|
25
|
+
poll_interval How often (seconds) to re-check the window while waiting.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
key_manager: KeyManager,
|
|
31
|
+
poll_interval: float = 0.25,
|
|
32
|
+
) -> None:
|
|
33
|
+
self._km = key_manager
|
|
34
|
+
self._poll = poll_interval
|
|
35
|
+
|
|
36
|
+
# ── async ──────────────────────────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
async def async_wait_for_slot(self, ks: KeyState, model: str) -> None:
|
|
39
|
+
"""Async-suspend until both RPM and TPM slots are open."""
|
|
40
|
+
while not self._km.check_rpm(ks):
|
|
41
|
+
wait = self._km.seconds_until_rpm_slot(ks)
|
|
42
|
+
await asyncio.sleep(max(wait, self._poll))
|
|
43
|
+
while not self._km.check_tpm(ks, model):
|
|
44
|
+
await asyncio.sleep(self._poll)
|
|
45
|
+
|
|
46
|
+
# ── sync ───────────────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
def sync_wait_for_slot(self, ks: KeyState, model: str) -> None:
|
|
49
|
+
"""Blocking RPM+TPM wait. Used in sync / threaded batch mode."""
|
|
50
|
+
while not self._km.check_rpm(ks):
|
|
51
|
+
wait = self._km.seconds_until_rpm_slot(ks)
|
|
52
|
+
time.sleep(max(wait, self._poll))
|
|
53
|
+
while not self._km.check_tpm(ks, model):
|
|
54
|
+
time.sleep(self._poll)
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""
|
|
2
|
+
infrakit.scaffolder.templates
|
|
3
|
+
------------------------------
|
|
4
|
+
Project template scaffolders.
|
|
5
|
+
|
|
6
|
+
Quick reference
|
|
7
|
+
---------------
|
|
8
|
+
from infrakit.scaffolder.templates.registry import get_template, list_templates
|
|
9
|
+
from infrakit.scaffolder.templates.ai import scaffold_ai
|
|
10
|
+
from infrakit.scaffolder.templates.backend import scaffold_backend
|
|
11
|
+
from infrakit.scaffolder.templates.cli_tool import scaffold_cli_tool
|
|
12
|
+
from infrakit.scaffolder.templates.pipeline import scaffold_pipeline
|
|
13
|
+
"""
|
|
14
|
+
from infrakit.scaffolder.generator import scaffold_basic, ScaffoldEntry, ScaffoldResult
|
|
15
|
+
from infrakit.scaffolder.ai import scaffold_ai
|
|
16
|
+
from infrakit.scaffolder.backend import scaffold_backend
|
|
17
|
+
from infrakit.scaffolder.cli_tool import scaffold_cli_tool
|
|
18
|
+
from infrakit.scaffolder.pipeline import scaffold_pipeline
|
|
19
|
+
from infrakit.scaffolder.registry import get_template, list_templates
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
'scaffold_basic',
|
|
23
|
+
'ScaffoldEntry',
|
|
24
|
+
'ScaffoldResult',
|
|
25
|
+
"scaffold_ai",
|
|
26
|
+
"scaffold_backend",
|
|
27
|
+
"scaffold_cli_tool",
|
|
28
|
+
"scaffold_pipeline",
|
|
29
|
+
"get_template",
|
|
30
|
+
"list_templates",
|
|
31
|
+
]
|