power-loop 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. llm_client/__init__.py +0 -0
  2. llm_client/capabilities.py +162 -0
  3. llm_client/interface.py +470 -0
  4. llm_client/llm_factory.py +981 -0
  5. llm_client/llm_tooling.py +645 -0
  6. llm_client/llm_utils.py +205 -0
  7. llm_client/multimodal.py +237 -0
  8. llm_client/qwen_image.py +576 -0
  9. llm_client/web_search.py +149 -0
  10. power_loop/__init__.py +326 -0
  11. power_loop/agent/__init__.py +6 -0
  12. power_loop/agent/sink.py +247 -0
  13. power_loop/agent/stateful_loop.py +363 -0
  14. power_loop/agent/system_prompt.py +396 -0
  15. power_loop/agent/types.py +41 -0
  16. power_loop/contracts/__init__.py +132 -0
  17. power_loop/contracts/errors.py +140 -0
  18. power_loop/contracts/event_payloads.py +278 -0
  19. power_loop/contracts/events.py +86 -0
  20. power_loop/contracts/handlers.py +45 -0
  21. power_loop/contracts/hook_contexts.py +265 -0
  22. power_loop/contracts/hooks.py +64 -0
  23. power_loop/contracts/messages.py +90 -0
  24. power_loop/contracts/protocols.py +48 -0
  25. power_loop/contracts/tools.py +56 -0
  26. power_loop/core/agent_context.py +94 -0
  27. power_loop/core/events.py +124 -0
  28. power_loop/core/hooks.py +122 -0
  29. power_loop/core/phase.py +217 -0
  30. power_loop/core/pipeline.py +880 -0
  31. power_loop/core/runner.py +60 -0
  32. power_loop/core/state.py +208 -0
  33. power_loop/runtime/budget.py +179 -0
  34. power_loop/runtime/cancellation.py +127 -0
  35. power_loop/runtime/compact.py +300 -0
  36. power_loop/runtime/env.py +103 -0
  37. power_loop/runtime/memory.py +107 -0
  38. power_loop/runtime/provider.py +176 -0
  39. power_loop/runtime/retry.py +182 -0
  40. power_loop/runtime/session_store.py +636 -0
  41. power_loop/runtime/skills.py +201 -0
  42. power_loop/runtime/spec.py +233 -0
  43. power_loop/runtime/structured.py +225 -0
  44. power_loop/tools/__init__.py +51 -0
  45. power_loop/tools/default_manifest.py +244 -0
  46. power_loop/tools/default_tools.py +766 -0
  47. power_loop/tools/registry.py +162 -0
  48. power_loop/tools/spawn_agent.py +173 -0
  49. power_loop-0.2.0.dist-info/METADATA +632 -0
  50. power_loop-0.2.0.dist-info/RECORD +53 -0
  51. power_loop-0.2.0.dist-info/WHEEL +5 -0
  52. power_loop-0.2.0.dist-info/licenses/LICENSE +21 -0
  53. power_loop-0.2.0.dist-info/top_level.txt +2 -0
@@ -0,0 +1,300 @@
1
+ """Context compaction — protocol + default implementation (M1.7a).
2
+
3
+ Design contract (from ROADMAP §M1.7a / README §1):
4
+
5
+ * Triggered every ``round.start`` when estimated tokens >=
6
+ ``max_tokens × trigger_ratio`` (or absolute ``CONTEXT_COMPACT_THRESHOLD``
7
+ env override). Idempotent within a round.
8
+ * **Preserve** the first ``role=system`` message (the original system_prompt)
9
+ and ``memory_*`` messages. Old ``compact_note`` messages are foldable —
10
+ the new summary merges them so at most one compact_note ever exists.
11
+ * **Preserve** the last ``keep_last_n`` exchanges. An exchange is a
12
+ ``user / assistant(+optional tool_calls) / tool*`` triple — never split
13
+ the atomic ``assistant(tool_calls)`` ↔ matching ``tool(tool_call_id=…)``
14
+ pair.
15
+ * Summarize the cuttable middle via a separate LLM call (default = main
16
+ LLM; injectable ``summary_llm`` for cheaper models).
17
+ * Insert one ``system / name=compact_note`` message in place of the cut
18
+ range.
19
+ * Fail-soft: on summary error, return ``None`` plan → caller continues with
20
+ uncompacted history; the pipeline then escalates to ``loop.degraded``
21
+ only if the main LLM rejects on context-overflow.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import os
27
+ from dataclasses import dataclass
28
+ from typing import Any, Protocol, runtime_checkable
29
+
30
+ from llm_client.interface import LLMRequest
31
+ from power_loop.runtime.budget import estimate_tokens
32
+
33
+
34
+ @dataclass(frozen=True)
35
+ class CompactionPlan:
36
+ """The output of a successful compaction round.
37
+
38
+ ``fold_start_idx`` and ``fold_end_idx`` are **inclusive** indices into
39
+ the pipeline's in-memory ``history`` list; everything between them is
40
+ replaced by one ``compact_note`` message at ``fold_start_idx``.
41
+ """
42
+
43
+ fold_start_idx: int
44
+ fold_end_idx: int
45
+ summary_text: str
46
+ before_tokens: int
47
+ after_tokens: int
48
+
49
+
50
+ @runtime_checkable
51
+ class Compactor(Protocol):
52
+ """A pluggable strategy that decides whether and how to compact."""
53
+
54
+ async def maybe_compact(
55
+ self,
56
+ messages: list[dict[str, Any]],
57
+ *,
58
+ llm: Any,
59
+ max_tokens: int,
60
+ round_index: int,
61
+ ) -> CompactionPlan | None: ...
62
+
63
+
64
+ # ── default implementation ──────────────────────────────────────────────
65
+
66
+
67
+ class DefaultCompactor:
68
+ """Vendor-neutral compactor matching the M1.7a contract."""
69
+
70
+ def __init__(
71
+ self,
72
+ *,
73
+ trigger_ratio: float = 0.75,
74
+ keep_last_n: int = 4,
75
+ summary_max_tokens: int = 5000,
76
+ summary_llm: Any | None = None,
77
+ absolute_threshold: int | None = None,
78
+ ) -> None:
79
+ """
80
+ Parameters
81
+ ----------
82
+ trigger_ratio
83
+ Fire when ``estimate_tokens(history) ≥ max_tokens × trigger_ratio``.
84
+ Default 0.75 leaves headroom for the next round's prompt + reply.
85
+ keep_last_n
86
+ Preserve the last N **user-bounded exchanges** verbatim (not
87
+ summarized). This is the freshest context the model needs to
88
+ answer the next turn well; folding it hurts reply quality
89
+ dramatically. ``keep_last_n=1`` ≈ "aggressive — summarize
90
+ everything but the last user turn"; the default 4 follows
91
+ Anthropic's compaction guide.
92
+ summary_max_tokens
93
+ Token cap for the summary LLM call. Since at most one compact_note
94
+ exists at any time (each compaction merges the old note into the
95
+ new one), this can be generous — 5000 is a good default for
96
+ preserving detailed context across many compaction rounds.
97
+ summary_llm
98
+ Optional cheaper LLM dedicated to the summary call. Defaults
99
+ to the main loop's LLM.
100
+ absolute_threshold
101
+ Absolute token count that overrides ``trigger_ratio`` when
102
+ non-None. Env ``CONTEXT_COMPACT_THRESHOLD`` always wins over
103
+ this if set.
104
+ """
105
+ self.trigger_ratio = float(trigger_ratio)
106
+ self.keep_last_n = int(keep_last_n)
107
+ self.summary_max_tokens = int(summary_max_tokens)
108
+ self.summary_llm = summary_llm
109
+ self.absolute_threshold = absolute_threshold
110
+
111
+ # ── public ──────────────────────────────────────────────────────────
112
+
113
+ async def maybe_compact(
114
+ self,
115
+ messages: list[dict[str, Any]],
116
+ *,
117
+ llm: Any,
118
+ max_tokens: int,
119
+ round_index: int,
120
+ ) -> CompactionPlan | None:
121
+ before = estimate_tokens(messages)
122
+ if not self._should_trigger(before, max_tokens):
123
+ return None
124
+ span = self._compactable_span(messages)
125
+ if span is None:
126
+ return None
127
+ start, end = span
128
+ summary = await self._summarize_async(messages[start : end + 1], llm=llm)
129
+ if summary is None:
130
+ return None # soft-fail; caller continues uncompacted
131
+ after = estimate_tokens(
132
+ [*messages[:start], _note(summary), *messages[end + 1 :]]
133
+ )
134
+ return CompactionPlan(
135
+ fold_start_idx=start,
136
+ fold_end_idx=end,
137
+ summary_text=summary,
138
+ before_tokens=before,
139
+ after_tokens=after,
140
+ )
141
+
142
+ # ── trigger ─────────────────────────────────────────────────────────
143
+
144
+ def _should_trigger(self, before_tokens: int, max_tokens: int) -> bool:
145
+ # Env override (read lazily so monkeypatch in tests works).
146
+ env = os.environ.get("CONTEXT_COMPACT_THRESHOLD")
147
+ absolute = int(env) if env else self.absolute_threshold
148
+ if absolute is not None:
149
+ return before_tokens >= absolute
150
+ if max_tokens <= 0:
151
+ return False
152
+ return before_tokens >= int(max_tokens * self.trigger_ratio)
153
+
154
+ # ── span selection ──────────────────────────────────────────────────
155
+
156
+ def _compactable_span(
157
+ self, messages: list[dict[str, Any]]
158
+ ) -> tuple[int, int] | None:
159
+ """Return the inclusive index range we can safely fold, or None.
160
+
161
+ Preserves the first ``system`` message (the original system_prompt)
162
+ and ``memory_*`` messages. Old ``compact_note`` messages are foldable
163
+ so at most one compact_note ever exists.
164
+ """
165
+ n = len(messages)
166
+ if n == 0:
167
+ return None
168
+ # Find end of preserved system block: first system msg + memory_* msgs.
169
+ sys_end = 0
170
+ # Always preserve the first system message (the original system_prompt).
171
+ if sys_end < n and messages[sys_end].get("role") == "system":
172
+ sys_end = 1
173
+ # Preserve subsequent memory_* messages (they share system-region protection).
174
+ while sys_end < n and messages[sys_end].get("role") == "system":
175
+ name = messages[sys_end].get("name") or ""
176
+ if name.startswith("memory_"):
177
+ sys_end += 1
178
+ else:
179
+ break
180
+ # Decide the tail boundary by counting exchanges from the end.
181
+ tail_start = self._tail_start(messages, sys_end)
182
+ if tail_start <= sys_end:
183
+ return None
184
+ # Don't split a pending pair.
185
+ tail_start = self._expand_back_to_atomic(messages, tail_start)
186
+ if tail_start <= sys_end:
187
+ return None
188
+ end = tail_start - 1
189
+ while end > sys_end and messages[end].get("role") == "tool":
190
+ end -= 1
191
+ if end < sys_end:
192
+ return None
193
+ return (sys_end, end)
194
+
195
+ def _tail_start(self, messages: list[dict[str, Any]], sys_end: int) -> int:
196
+ """Count exchanges from the tail; return the index of the start of
197
+ the kept tail. An "exchange" begins at a ``user`` message."""
198
+ n = len(messages)
199
+ if n == sys_end:
200
+ return n
201
+ kept = 0
202
+ i = n - 1
203
+ boundary = n
204
+ while i >= sys_end:
205
+ if messages[i].get("role") == "user":
206
+ kept += 1
207
+ if kept >= self.keep_last_n:
208
+ boundary = i
209
+ break
210
+ i -= 1
211
+ else:
212
+ boundary = sys_end # not enough exchanges → keep everything after sys
213
+ return boundary
214
+
215
+ @staticmethod
216
+ def _expand_back_to_atomic(
217
+ messages: list[dict[str, Any]], tail_start: int
218
+ ) -> int:
219
+ """If ``messages[tail_start]`` is a ``tool`` and the corresponding
220
+ ``assistant(tool_calls)`` is below the boundary, pull the boundary
221
+ back so the pair stays together."""
222
+ if tail_start >= len(messages):
223
+ return tail_start
224
+ msg = messages[tail_start]
225
+ if msg.get("role") != "tool":
226
+ return tail_start
227
+ # walk back to the matching assistant
228
+ j = tail_start - 1
229
+ while j >= 0:
230
+ m = messages[j]
231
+ if m.get("role") == "assistant" and m.get("tool_calls"):
232
+ return j
233
+ j -= 1
234
+ return tail_start
235
+
236
+ # ── summarization ───────────────────────────────────────────────────
237
+
238
+ async def _summarize_async(
239
+ self, slice_msgs: list[dict[str, Any]], *, llm: Any
240
+ ) -> str | None:
241
+ if not slice_msgs:
242
+ return None
243
+ summary_llm = self.summary_llm or llm
244
+ prompt = (
245
+ "You are a conversation summarizer. Below is a slice of an "
246
+ "agent's working transcript that needs to be compressed for "
247
+ "context-window economy. The slice may include prior compact_notes — "
248
+ "merge their content into your summary so there is at most ONE "
249
+ "compact note at any time. Preserve: (1) decisions made, "
250
+ "(2) facts established, (3) errors and how they were handled, "
251
+ "(4) any pending intent the assistant was about to act on. "
252
+ "Do NOT call tools. Wrap your summary in <summary>…</summary>.\n\n"
253
+ "--- transcript slice ---\n"
254
+ + _stringify_slice(slice_msgs)
255
+ )
256
+ try:
257
+ response = await summary_llm.complete(
258
+ LLMRequest(
259
+ messages=[{"role": "user", "content": prompt}],
260
+ max_tokens=self.summary_max_tokens,
261
+ temperature=0.0,
262
+ )
263
+ )
264
+ except Exception:
265
+ return None
266
+ text = (
267
+ getattr(response, "raw_text", "")
268
+ or getattr(response, "content_text", "")
269
+ or ""
270
+ ).strip()
271
+ if not text:
272
+ return None
273
+ # Strip <summary>…</summary> if present.
274
+ if text.startswith("<summary>") and "</summary>" in text:
275
+ text = text[len("<summary>") :].split("</summary>")[0].strip()
276
+ return text or None
277
+
278
+
279
+ # ── helpers ─────────────────────────────────────────────────────────────
280
+
281
+
282
+ def _stringify_slice(slice_msgs: list[dict[str, Any]]) -> str:
283
+ lines: list[str] = []
284
+ for m in slice_msgs:
285
+ role = m.get("role", "?")
286
+ content = m.get("content")
287
+ text = content if isinstance(content, str) else str(content or "")
288
+ head = f"[{role}]"
289
+ tool_calls = m.get("tool_calls")
290
+ if tool_calls:
291
+ head += f" tool_calls={len(tool_calls)}"
292
+ lines.append(f"{head}\n{text}")
293
+ return "\n\n".join(lines)
294
+
295
+
296
+ def _note(text: str) -> dict[str, Any]:
297
+ return {"role": "system", "name": "compact_note", "content": text}
298
+
299
+
300
+ __all__ = ["Compactor", "CompactionPlan", "DefaultCompactor"]
@@ -0,0 +1,103 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ AGENT_DIR = Path(__file__).resolve().parent.parent.parent
7
+ WORKSPACE_ENV_KEYS = (
8
+ "POWER_LOOP_WORKSPACE",
9
+ "ZERO_CODE_WORKSPACE",
10
+ "ZERO_CODE_WORKDIR",
11
+ "VSCODE_WORKSPACE_FOLDER",
12
+ )
13
+
14
+
15
+ def _resolve_workspace_dir() -> Path:
16
+ for key in WORKSPACE_ENV_KEYS:
17
+ value = (os.environ.get(key) or "").strip()
18
+ if value:
19
+ return Path(value).expanduser().resolve()
20
+ return Path.cwd().resolve()
21
+
22
+
23
+ WORKSPACE_DIR = _resolve_workspace_dir()
24
+ WORKDIR = WORKSPACE_DIR
25
+ DEFAULT_SKILLS_DIR = AGENT_DIR / ".skills"
26
+
27
+
28
+ def _resolve_skills_dir() -> Path:
29
+ raw = (os.environ.get("POWER_LOOP_SKILLS_DIR") or os.environ.get("ZERO_CODE_SKILLS_DIR") or "").strip()
30
+ if not raw:
31
+ return DEFAULT_SKILLS_DIR
32
+
33
+ candidate = Path(raw).expanduser()
34
+ if not candidate.is_absolute():
35
+ candidate = (AGENT_DIR / candidate).resolve()
36
+ else:
37
+ candidate = candidate.resolve()
38
+
39
+ if candidate.exists() and candidate.is_dir():
40
+ return candidate
41
+ return DEFAULT_SKILLS_DIR
42
+
43
+
44
+ SKILLS_DIR = _resolve_skills_dir()
45
+ AGENT_RW_ALLOWLIST = (
46
+ AGENT_DIR / ".cache",
47
+ AGENT_DIR / "logs",
48
+ SKILLS_DIR,
49
+ )
50
+
51
+
52
+ def _is_in_agent_rw_allowlist(path: Path) -> bool:
53
+ resolved = path.resolve()
54
+ for allowed_root in AGENT_RW_ALLOWLIST:
55
+ try:
56
+ if resolved.is_relative_to(allowed_root.resolve()):
57
+ return True
58
+ except Exception:
59
+ continue
60
+ return False
61
+
62
+
63
+ def safe_path(p: str, purpose: str = "rw") -> Path:
64
+ raw_input = (p or "").strip()
65
+ if not raw_input:
66
+ raise ValueError("Path is required")
67
+
68
+ if raw_input.startswith("@workspace/"):
69
+ candidate = (WORKSPACE_DIR / raw_input[len("@workspace/") :]).resolve()
70
+ elif raw_input.startswith("@agent/"):
71
+ candidate = (AGENT_DIR / raw_input[len("@agent/") :]).resolve()
72
+ else:
73
+ raw = Path(raw_input).expanduser()
74
+ if raw.is_absolute():
75
+ candidate = raw.resolve()
76
+ else:
77
+ candidate = (WORKSPACE_DIR / raw).resolve()
78
+
79
+ if candidate.is_relative_to(WORKSPACE_DIR):
80
+ return candidate
81
+
82
+ if candidate.is_relative_to(AGENT_DIR):
83
+ if _is_in_agent_rw_allowlist(candidate):
84
+ return candidate
85
+ try:
86
+ rel_to_agent = candidate.relative_to(AGENT_DIR)
87
+ workspace_alt = WORKSPACE_DIR / rel_to_agent
88
+ hint = (
89
+ f" Did you mean the workspace file instead? "
90
+ f"Try: {rel_to_agent} (resolves to {workspace_alt})"
91
+ )
92
+ except ValueError:
93
+ hint = ""
94
+ raise ValueError(
95
+ f"Access to agent home is restricted.{hint} "
96
+ f"Workspace is at {WORKSPACE_DIR}, not {AGENT_DIR}. "
97
+ "Use relative paths (they default to workspace) or @workspace/<path>."
98
+ )
99
+
100
+ raise ValueError(
101
+ f"Path escapes allowed directories: {p}. "
102
+ f"Workspace: {WORKSPACE_DIR}. Use relative paths or @workspace/<path>."
103
+ )
@@ -0,0 +1,107 @@
1
+ """MemoryProvider — pluggable long-term / cross-session memory.
2
+
3
+ Library scope
4
+ -------------
5
+ power-loop **does not implement a memory backend**. It defines:
6
+
7
+ * the ``MemoryProvider`` Protocol callers implement,
8
+ * a ``MemorySnapshot`` shape passed to ``remember``,
9
+ * the pipeline integration points (``MEMORY_RECALLED`` hook +
10
+ ``MEMORY_RECALLED`` / ``MEMORY_FAILED`` events),
11
+ * the **inject position** invariant (after existing system messages,
12
+ after compact_note, before the conversation history).
13
+
14
+ Concrete backends live in callers' code or in ``examples/`` — SQLite
15
+ fact store, HTTP API diary, vector DB RAG, etc. — none of them belong
16
+ in the library.
17
+
18
+ Failure model
19
+ -------------
20
+ * ``recall`` raises → treated as **no memory** (returns ``[]``) and emit
21
+ ``MEMORY_FAILED``. Loop continues.
22
+ * ``remember`` raises → emit ``MEMORY_FAILED``. ``StatefulResult`` is
23
+ still returned unchanged. Persisting memory must never block the user
24
+ from getting a reply.
25
+ * Hook ``MEMORY_RECALLED`` returning ``HookDirective.SKIP`` → drop the
26
+ recalled messages (do not inject).
27
+ """
28
+
29
+ from __future__ import annotations
30
+
31
+ from dataclasses import dataclass, field
32
+ from typing import Any, Protocol, runtime_checkable
33
+
34
+ LoopMessage = dict[str, Any]
35
+
36
+
37
+ @dataclass
38
+ class MemorySnapshot:
39
+ """What ``remember`` receives at session end.
40
+
41
+ Includes the **full final history** (messages list as seen by the
42
+ pipeline at SESSION_END time, after any compaction). Providers
43
+ typically only persist a summary or selected facts; the full
44
+ snapshot is supplied so the provider can decide.
45
+ """
46
+
47
+ session_id: str
48
+ messages: list[LoopMessage] = field(default_factory=list)
49
+ final_text: str = ""
50
+ rounds: int = 0
51
+ status: str = ""
52
+ metadata: dict[str, Any] = field(default_factory=dict)
53
+
54
+
55
+ @runtime_checkable
56
+ class MemoryProvider(Protocol):
57
+ """Caller-implemented memory backend.
58
+
59
+ ``recall`` is called **once per send** (at SESSION_START, before the
60
+ first round). The returned list is injected as ``role=system``
61
+ messages with ``name`` prefixed ``memory_*`` (the library tags them
62
+ automatically if you don't). Returning ``[]`` means "no memory this
63
+ session".
64
+
65
+ ``remember`` is called at SESSION_END regardless of status (including
66
+ ``cancelled`` and ``degraded``); callers that only want to persist
67
+ successful sessions should check ``snapshot.status`` themselves.
68
+ """
69
+
70
+ async def recall(
71
+ self,
72
+ *,
73
+ messages: list[LoopMessage],
74
+ session_id: str | None,
75
+ budget_tokens: int = 1500,
76
+ ) -> list[LoopMessage]:
77
+ ...
78
+
79
+ async def remember(
80
+ self,
81
+ *,
82
+ snapshot: MemorySnapshot,
83
+ session_id: str | None,
84
+ ) -> None:
85
+ ...
86
+
87
+
88
+ def tag_as_memory(messages: list[LoopMessage], *, prefix: str = "memory_") -> list[LoopMessage]:
89
+ """Ensure every recalled message is a system message with a ``name``
90
+ starting ``memory_*``. Idempotent; non-destructive (returns new dicts).
91
+
92
+ The library calls this on the provider's output before injection so
93
+ downstream code (hooks, compactor, audit) can identify memory rows
94
+ by ``msg.get("name", "").startswith("memory_")``.
95
+ """
96
+ tagged: list[LoopMessage] = []
97
+ for i, m in enumerate(messages):
98
+ m2 = dict(m)
99
+ m2["role"] = "system"
100
+ name = str(m2.get("name") or "")
101
+ if not name.startswith(prefix):
102
+ m2["name"] = f"{prefix}{name or i}"
103
+ tagged.append(m2)
104
+ return tagged
105
+
106
+
107
+ __all__ = ["LoopMessage", "MemorySnapshot", "MemoryProvider", "tag_as_memory"]
@@ -0,0 +1,176 @@
1
+ """Unified LLM provider configuration (M1.4).
2
+
3
+ Why
4
+ ---
5
+ The library wraps a **single** transport today —
6
+ ``OpenAICompatibleChatLLMService`` — but speaks to many actual providers
7
+ through it (OpenAI, DashScope/Qwen, DeepSeek, OpenRouter, Together,
8
+ Groq, local OpenAI-compatible servers). Each caller used to assemble an
9
+ ``OpenAICompatibleChatConfig`` by hand and read env vars in its own way,
10
+ which made provider-swapping a per-call code change.
11
+
12
+ ``LLMProviderConfig`` is the single config shape callers should target.
13
+ Two factories build an ``LLMService`` from it:
14
+
15
+ * :func:`create_llm_service_from_config` — given an explicit config.
16
+ * :func:`create_llm_service_from_env` — assembles from environment
17
+ variables (``POWER_LOOP_*``), falling back to legacy
18
+ ``OPENAI_COMPAT_*`` names so existing ``.env`` files keep working.
19
+
20
+ The ``provider`` field is currently informational (a string tag) — when
21
+ we add Anthropic-native transport in M3 it becomes the router key.
22
+ Callers that want to pin to a specific provider can set it; today it
23
+ does not affect the transport.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import os
29
+ from dataclasses import dataclass, field
30
+ from typing import Any
31
+
32
+ from llm_client.interface import LLMService, OpenAICompatibleChatConfig
33
+ from llm_client.llm_factory import OpenAICompatibleChatLLMService
34
+
35
+ DEFAULT_PREFIX = "POWER_LOOP"
36
+ LEGACY_PREFIX = "OPENAI_COMPAT"
37
+
38
+
39
+ @dataclass
40
+ class LLMProviderConfig:
41
+ """Unified, provider-agnostic LLM config.
42
+
43
+ Required: ``base_url`` / ``api_key`` / ``model``. Everything else
44
+ has sensible defaults that match :class:`OpenAICompatibleChatConfig`.
45
+
46
+ ``provider`` is a free-form tag (``"openai"`` / ``"dashscope"`` /
47
+ ``"deepseek"`` / …) used today only for telemetry and human
48
+ readability; it becomes the routing key when multi-transport lands.
49
+ """
50
+
51
+ base_url: str
52
+ api_key: str
53
+ model: str
54
+ provider: str = "openai"
55
+ timeout_s: float = 180.0
56
+ max_tokens: int = 8000
57
+ temperature: float = 0.0
58
+ max_retries: int = 3
59
+ extra: dict[str, Any] = field(default_factory=dict)
60
+
61
+ def __post_init__(self) -> None:
62
+ # Fail fast at config build time, not on the first complete() call.
63
+ missing = [
64
+ name for name, val in (("base_url", self.base_url),
65
+ ("api_key", self.api_key),
66
+ ("model", self.model))
67
+ if not val
68
+ ]
69
+ if missing:
70
+ raise ValueError(
71
+ f"LLMProviderConfig missing required field(s): {', '.join(missing)}"
72
+ )
73
+
74
+ # ── Factories ───────────────────────────────────────────────────────
75
+
76
+ @classmethod
77
+ def from_env(
78
+ cls,
79
+ *,
80
+ prefix: str = DEFAULT_PREFIX,
81
+ fallback_prefix: str | None = LEGACY_PREFIX,
82
+ env: dict[str, str] | None = None,
83
+ ) -> LLMProviderConfig:
84
+ """Build a config from ``{PREFIX}_*`` environment variables.
85
+
86
+ Reads (in order of preference):
87
+
88
+ * ``{prefix}_BASE_URL`` / ``{prefix}_API_KEY`` / ``{prefix}_MODEL``
89
+ (required)
90
+ * ``{prefix}_PROVIDER`` / ``{prefix}_TIMEOUT_S`` /
91
+ ``{prefix}_MAX_TOKENS`` / ``{prefix}_TEMPERATURE`` /
92
+ ``{prefix}_MAX_RETRIES`` (optional)
93
+
94
+ If ``fallback_prefix`` is set and a primary var is missing, falls
95
+ back to the same suffix under the fallback prefix. Default
96
+ fallback is ``OPENAI_COMPAT`` so existing ``.env`` files
97
+ (``OPENAI_COMPAT_BASE_URL`` etc.) keep working without edits.
98
+
99
+ ``env`` argument is for tests; defaults to ``os.environ``.
100
+ """
101
+ src: dict[str, str] = dict(os.environ if env is None else env)
102
+
103
+ def _get(name: str, default: str | None = None) -> str | None:
104
+ primary = src.get(f"{prefix}_{name}")
105
+ if primary:
106
+ return primary
107
+ if fallback_prefix:
108
+ alt = src.get(f"{fallback_prefix}_{name}")
109
+ if alt:
110
+ return alt
111
+ return default
112
+
113
+ base_url = _get("BASE_URL", "")
114
+ api_key = _get("API_KEY", "")
115
+ model = _get("MODEL", "")
116
+ provider = _get("PROVIDER", "openai") or "openai"
117
+ timeout_s = float(_get("TIMEOUT_S", "180") or 180)
118
+ max_tokens = int(_get("MAX_TOKENS", "8000") or 8000)
119
+ temperature = float(_get("TEMPERATURE", "0") or 0)
120
+ max_retries = int(_get("MAX_RETRIES", "3") or 3)
121
+
122
+ return cls(
123
+ base_url=base_url or "",
124
+ api_key=api_key or "",
125
+ model=model or "",
126
+ provider=provider,
127
+ timeout_s=timeout_s,
128
+ max_tokens=max_tokens,
129
+ temperature=temperature,
130
+ max_retries=max_retries,
131
+ )
132
+
133
+ # ── Adaptation ──────────────────────────────────────────────────────
134
+
135
+ def to_openai_compatible(self) -> OpenAICompatibleChatConfig:
136
+ """Render into the transport-specific config the current backend
137
+ expects. New transports (Anthropic-native in M3) will add their
138
+ own adapter alongside this one.
139
+ """
140
+ return OpenAICompatibleChatConfig(
141
+ base_url=self.base_url,
142
+ api_key=self.api_key,
143
+ model=self.model,
144
+ timeout_s=self.timeout_s,
145
+ max_tokens=self.max_tokens,
146
+ temperature=self.temperature,
147
+ max_retries=self.max_retries,
148
+ )
149
+
150
+
151
+ def create_llm_service_from_config(cfg: LLMProviderConfig) -> LLMService:
152
+ """Build an ``LLMService`` from an :class:`LLMProviderConfig`.
153
+
154
+ Today this always returns an ``OpenAICompatibleChatLLMService``;
155
+ when a second transport lands it will dispatch on ``cfg.provider``.
156
+ """
157
+ return OpenAICompatibleChatLLMService(cfg.to_openai_compatible())
158
+
159
+
160
+ def create_llm_service_from_env(
161
+ *,
162
+ prefix: str = DEFAULT_PREFIX,
163
+ fallback_prefix: str | None = LEGACY_PREFIX,
164
+ ) -> LLMService:
165
+ """One-liner for the common case: read env, build, return service."""
166
+ cfg = LLMProviderConfig.from_env(prefix=prefix, fallback_prefix=fallback_prefix)
167
+ return create_llm_service_from_config(cfg)
168
+
169
+
170
+ __all__ = [
171
+ "DEFAULT_PREFIX",
172
+ "LEGACY_PREFIX",
173
+ "LLMProviderConfig",
174
+ "create_llm_service_from_config",
175
+ "create_llm_service_from_env",
176
+ ]