power-loop 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- llm_client/__init__.py +0 -0
- llm_client/capabilities.py +162 -0
- llm_client/interface.py +470 -0
- llm_client/llm_factory.py +981 -0
- llm_client/llm_tooling.py +645 -0
- llm_client/llm_utils.py +205 -0
- llm_client/multimodal.py +237 -0
- llm_client/qwen_image.py +576 -0
- llm_client/web_search.py +149 -0
- power_loop/__init__.py +326 -0
- power_loop/agent/__init__.py +6 -0
- power_loop/agent/sink.py +247 -0
- power_loop/agent/stateful_loop.py +363 -0
- power_loop/agent/system_prompt.py +396 -0
- power_loop/agent/types.py +41 -0
- power_loop/contracts/__init__.py +132 -0
- power_loop/contracts/errors.py +140 -0
- power_loop/contracts/event_payloads.py +278 -0
- power_loop/contracts/events.py +86 -0
- power_loop/contracts/handlers.py +45 -0
- power_loop/contracts/hook_contexts.py +265 -0
- power_loop/contracts/hooks.py +64 -0
- power_loop/contracts/messages.py +90 -0
- power_loop/contracts/protocols.py +48 -0
- power_loop/contracts/tools.py +56 -0
- power_loop/core/agent_context.py +94 -0
- power_loop/core/events.py +124 -0
- power_loop/core/hooks.py +122 -0
- power_loop/core/phase.py +217 -0
- power_loop/core/pipeline.py +880 -0
- power_loop/core/runner.py +60 -0
- power_loop/core/state.py +208 -0
- power_loop/runtime/budget.py +179 -0
- power_loop/runtime/cancellation.py +127 -0
- power_loop/runtime/compact.py +300 -0
- power_loop/runtime/env.py +103 -0
- power_loop/runtime/memory.py +107 -0
- power_loop/runtime/provider.py +176 -0
- power_loop/runtime/retry.py +182 -0
- power_loop/runtime/session_store.py +636 -0
- power_loop/runtime/skills.py +201 -0
- power_loop/runtime/spec.py +233 -0
- power_loop/runtime/structured.py +225 -0
- power_loop/tools/__init__.py +51 -0
- power_loop/tools/default_manifest.py +244 -0
- power_loop/tools/default_tools.py +766 -0
- power_loop/tools/registry.py +162 -0
- power_loop/tools/spawn_agent.py +173 -0
- power_loop-0.2.0.dist-info/METADATA +632 -0
- power_loop-0.2.0.dist-info/RECORD +53 -0
- power_loop-0.2.0.dist-info/WHEEL +5 -0
- power_loop-0.2.0.dist-info/licenses/LICENSE +21 -0
- power_loop-0.2.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
"""Context compaction — protocol + default implementation (M1.7a).
|
|
2
|
+
|
|
3
|
+
Design contract (from ROADMAP §M1.7a / README §1):
|
|
4
|
+
|
|
5
|
+
* Triggered every ``round.start`` when estimated tokens >=
|
|
6
|
+
``max_tokens × trigger_ratio`` (or absolute ``CONTEXT_COMPACT_THRESHOLD``
|
|
7
|
+
env override). Idempotent within a round.
|
|
8
|
+
* **Preserve** the first ``role=system`` message (the original system_prompt)
|
|
9
|
+
and ``memory_*`` messages. Old ``compact_note`` messages are foldable —
|
|
10
|
+
the new summary merges them so at most one compact_note ever exists.
|
|
11
|
+
* **Preserve** the last ``keep_last_n`` exchanges. An exchange is a
|
|
12
|
+
``user / assistant(+optional tool_calls) / tool*`` triple — never split
|
|
13
|
+
the atomic ``assistant(tool_calls)`` ↔ matching ``tool(tool_call_id=…)``
|
|
14
|
+
pair.
|
|
15
|
+
* Summarize the cuttable middle via a separate LLM call (default = main
|
|
16
|
+
LLM; injectable ``summary_llm`` for cheaper models).
|
|
17
|
+
* Insert one ``system / name=compact_note`` message in place of the cut
|
|
18
|
+
range.
|
|
19
|
+
* Fail-soft: on summary error, return ``None`` plan → caller continues with
|
|
20
|
+
uncompacted history; the pipeline then escalates to ``loop.degraded``
|
|
21
|
+
only if the main LLM rejects on context-overflow.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import os
|
|
27
|
+
from dataclasses import dataclass
|
|
28
|
+
from typing import Any, Protocol, runtime_checkable
|
|
29
|
+
|
|
30
|
+
from llm_client.interface import LLMRequest
|
|
31
|
+
from power_loop.runtime.budget import estimate_tokens
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dataclass(frozen=True)
|
|
35
|
+
class CompactionPlan:
|
|
36
|
+
"""The output of a successful compaction round.
|
|
37
|
+
|
|
38
|
+
``fold_start_idx`` and ``fold_end_idx`` are **inclusive** indices into
|
|
39
|
+
the pipeline's in-memory ``history`` list; everything between them is
|
|
40
|
+
replaced by one ``compact_note`` message at ``fold_start_idx``.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
fold_start_idx: int
|
|
44
|
+
fold_end_idx: int
|
|
45
|
+
summary_text: str
|
|
46
|
+
before_tokens: int
|
|
47
|
+
after_tokens: int
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@runtime_checkable
|
|
51
|
+
class Compactor(Protocol):
|
|
52
|
+
"""A pluggable strategy that decides whether and how to compact."""
|
|
53
|
+
|
|
54
|
+
async def maybe_compact(
|
|
55
|
+
self,
|
|
56
|
+
messages: list[dict[str, Any]],
|
|
57
|
+
*,
|
|
58
|
+
llm: Any,
|
|
59
|
+
max_tokens: int,
|
|
60
|
+
round_index: int,
|
|
61
|
+
) -> CompactionPlan | None: ...
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ── default implementation ──────────────────────────────────────────────
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class DefaultCompactor:
|
|
68
|
+
"""Vendor-neutral compactor matching the M1.7a contract."""
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
*,
|
|
73
|
+
trigger_ratio: float = 0.75,
|
|
74
|
+
keep_last_n: int = 4,
|
|
75
|
+
summary_max_tokens: int = 5000,
|
|
76
|
+
summary_llm: Any | None = None,
|
|
77
|
+
absolute_threshold: int | None = None,
|
|
78
|
+
) -> None:
|
|
79
|
+
"""
|
|
80
|
+
Parameters
|
|
81
|
+
----------
|
|
82
|
+
trigger_ratio
|
|
83
|
+
Fire when ``estimate_tokens(history) ≥ max_tokens × trigger_ratio``.
|
|
84
|
+
Default 0.75 leaves headroom for the next round's prompt + reply.
|
|
85
|
+
keep_last_n
|
|
86
|
+
Preserve the last N **user-bounded exchanges** verbatim (not
|
|
87
|
+
summarized). This is the freshest context the model needs to
|
|
88
|
+
answer the next turn well; folding it hurts reply quality
|
|
89
|
+
dramatically. ``keep_last_n=1`` ≈ "aggressive — summarize
|
|
90
|
+
everything but the last user turn"; the default 4 follows
|
|
91
|
+
Anthropic's compaction guide.
|
|
92
|
+
summary_max_tokens
|
|
93
|
+
Token cap for the summary LLM call. Since at most one compact_note
|
|
94
|
+
exists at any time (each compaction merges the old note into the
|
|
95
|
+
new one), this can be generous — 5000 is a good default for
|
|
96
|
+
preserving detailed context across many compaction rounds.
|
|
97
|
+
summary_llm
|
|
98
|
+
Optional cheaper LLM dedicated to the summary call. Defaults
|
|
99
|
+
to the main loop's LLM.
|
|
100
|
+
absolute_threshold
|
|
101
|
+
Absolute token count that overrides ``trigger_ratio`` when
|
|
102
|
+
non-None. Env ``CONTEXT_COMPACT_THRESHOLD`` always wins over
|
|
103
|
+
this if set.
|
|
104
|
+
"""
|
|
105
|
+
self.trigger_ratio = float(trigger_ratio)
|
|
106
|
+
self.keep_last_n = int(keep_last_n)
|
|
107
|
+
self.summary_max_tokens = int(summary_max_tokens)
|
|
108
|
+
self.summary_llm = summary_llm
|
|
109
|
+
self.absolute_threshold = absolute_threshold
|
|
110
|
+
|
|
111
|
+
# ── public ──────────────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
async def maybe_compact(
|
|
114
|
+
self,
|
|
115
|
+
messages: list[dict[str, Any]],
|
|
116
|
+
*,
|
|
117
|
+
llm: Any,
|
|
118
|
+
max_tokens: int,
|
|
119
|
+
round_index: int,
|
|
120
|
+
) -> CompactionPlan | None:
|
|
121
|
+
before = estimate_tokens(messages)
|
|
122
|
+
if not self._should_trigger(before, max_tokens):
|
|
123
|
+
return None
|
|
124
|
+
span = self._compactable_span(messages)
|
|
125
|
+
if span is None:
|
|
126
|
+
return None
|
|
127
|
+
start, end = span
|
|
128
|
+
summary = await self._summarize_async(messages[start : end + 1], llm=llm)
|
|
129
|
+
if summary is None:
|
|
130
|
+
return None # soft-fail; caller continues uncompacted
|
|
131
|
+
after = estimate_tokens(
|
|
132
|
+
[*messages[:start], _note(summary), *messages[end + 1 :]]
|
|
133
|
+
)
|
|
134
|
+
return CompactionPlan(
|
|
135
|
+
fold_start_idx=start,
|
|
136
|
+
fold_end_idx=end,
|
|
137
|
+
summary_text=summary,
|
|
138
|
+
before_tokens=before,
|
|
139
|
+
after_tokens=after,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# ── trigger ─────────────────────────────────────────────────────────
|
|
143
|
+
|
|
144
|
+
def _should_trigger(self, before_tokens: int, max_tokens: int) -> bool:
|
|
145
|
+
# Env override (read lazily so monkeypatch in tests works).
|
|
146
|
+
env = os.environ.get("CONTEXT_COMPACT_THRESHOLD")
|
|
147
|
+
absolute = int(env) if env else self.absolute_threshold
|
|
148
|
+
if absolute is not None:
|
|
149
|
+
return before_tokens >= absolute
|
|
150
|
+
if max_tokens <= 0:
|
|
151
|
+
return False
|
|
152
|
+
return before_tokens >= int(max_tokens * self.trigger_ratio)
|
|
153
|
+
|
|
154
|
+
# ── span selection ──────────────────────────────────────────────────
|
|
155
|
+
|
|
156
|
+
def _compactable_span(
|
|
157
|
+
self, messages: list[dict[str, Any]]
|
|
158
|
+
) -> tuple[int, int] | None:
|
|
159
|
+
"""Return the inclusive index range we can safely fold, or None.
|
|
160
|
+
|
|
161
|
+
Preserves the first ``system`` message (the original system_prompt)
|
|
162
|
+
and ``memory_*`` messages. Old ``compact_note`` messages are foldable
|
|
163
|
+
so at most one compact_note ever exists.
|
|
164
|
+
"""
|
|
165
|
+
n = len(messages)
|
|
166
|
+
if n == 0:
|
|
167
|
+
return None
|
|
168
|
+
# Find end of preserved system block: first system msg + memory_* msgs.
|
|
169
|
+
sys_end = 0
|
|
170
|
+
# Always preserve the first system message (the original system_prompt).
|
|
171
|
+
if sys_end < n and messages[sys_end].get("role") == "system":
|
|
172
|
+
sys_end = 1
|
|
173
|
+
# Preserve subsequent memory_* messages (they share system-region protection).
|
|
174
|
+
while sys_end < n and messages[sys_end].get("role") == "system":
|
|
175
|
+
name = messages[sys_end].get("name") or ""
|
|
176
|
+
if name.startswith("memory_"):
|
|
177
|
+
sys_end += 1
|
|
178
|
+
else:
|
|
179
|
+
break
|
|
180
|
+
# Decide the tail boundary by counting exchanges from the end.
|
|
181
|
+
tail_start = self._tail_start(messages, sys_end)
|
|
182
|
+
if tail_start <= sys_end:
|
|
183
|
+
return None
|
|
184
|
+
# Don't split a pending pair.
|
|
185
|
+
tail_start = self._expand_back_to_atomic(messages, tail_start)
|
|
186
|
+
if tail_start <= sys_end:
|
|
187
|
+
return None
|
|
188
|
+
end = tail_start - 1
|
|
189
|
+
while end > sys_end and messages[end].get("role") == "tool":
|
|
190
|
+
end -= 1
|
|
191
|
+
if end < sys_end:
|
|
192
|
+
return None
|
|
193
|
+
return (sys_end, end)
|
|
194
|
+
|
|
195
|
+
def _tail_start(self, messages: list[dict[str, Any]], sys_end: int) -> int:
|
|
196
|
+
"""Count exchanges from the tail; return the index of the start of
|
|
197
|
+
the kept tail. An "exchange" begins at a ``user`` message."""
|
|
198
|
+
n = len(messages)
|
|
199
|
+
if n == sys_end:
|
|
200
|
+
return n
|
|
201
|
+
kept = 0
|
|
202
|
+
i = n - 1
|
|
203
|
+
boundary = n
|
|
204
|
+
while i >= sys_end:
|
|
205
|
+
if messages[i].get("role") == "user":
|
|
206
|
+
kept += 1
|
|
207
|
+
if kept >= self.keep_last_n:
|
|
208
|
+
boundary = i
|
|
209
|
+
break
|
|
210
|
+
i -= 1
|
|
211
|
+
else:
|
|
212
|
+
boundary = sys_end # not enough exchanges → keep everything after sys
|
|
213
|
+
return boundary
|
|
214
|
+
|
|
215
|
+
@staticmethod
|
|
216
|
+
def _expand_back_to_atomic(
|
|
217
|
+
messages: list[dict[str, Any]], tail_start: int
|
|
218
|
+
) -> int:
|
|
219
|
+
"""If ``messages[tail_start]`` is a ``tool`` and the corresponding
|
|
220
|
+
``assistant(tool_calls)`` is below the boundary, pull the boundary
|
|
221
|
+
back so the pair stays together."""
|
|
222
|
+
if tail_start >= len(messages):
|
|
223
|
+
return tail_start
|
|
224
|
+
msg = messages[tail_start]
|
|
225
|
+
if msg.get("role") != "tool":
|
|
226
|
+
return tail_start
|
|
227
|
+
# walk back to the matching assistant
|
|
228
|
+
j = tail_start - 1
|
|
229
|
+
while j >= 0:
|
|
230
|
+
m = messages[j]
|
|
231
|
+
if m.get("role") == "assistant" and m.get("tool_calls"):
|
|
232
|
+
return j
|
|
233
|
+
j -= 1
|
|
234
|
+
return tail_start
|
|
235
|
+
|
|
236
|
+
# ── summarization ───────────────────────────────────────────────────
|
|
237
|
+
|
|
238
|
+
async def _summarize_async(
|
|
239
|
+
self, slice_msgs: list[dict[str, Any]], *, llm: Any
|
|
240
|
+
) -> str | None:
|
|
241
|
+
if not slice_msgs:
|
|
242
|
+
return None
|
|
243
|
+
summary_llm = self.summary_llm or llm
|
|
244
|
+
prompt = (
|
|
245
|
+
"You are a conversation summarizer. Below is a slice of an "
|
|
246
|
+
"agent's working transcript that needs to be compressed for "
|
|
247
|
+
"context-window economy. The slice may include prior compact_notes — "
|
|
248
|
+
"merge their content into your summary so there is at most ONE "
|
|
249
|
+
"compact note at any time. Preserve: (1) decisions made, "
|
|
250
|
+
"(2) facts established, (3) errors and how they were handled, "
|
|
251
|
+
"(4) any pending intent the assistant was about to act on. "
|
|
252
|
+
"Do NOT call tools. Wrap your summary in <summary>…</summary>.\n\n"
|
|
253
|
+
"--- transcript slice ---\n"
|
|
254
|
+
+ _stringify_slice(slice_msgs)
|
|
255
|
+
)
|
|
256
|
+
try:
|
|
257
|
+
response = await summary_llm.complete(
|
|
258
|
+
LLMRequest(
|
|
259
|
+
messages=[{"role": "user", "content": prompt}],
|
|
260
|
+
max_tokens=self.summary_max_tokens,
|
|
261
|
+
temperature=0.0,
|
|
262
|
+
)
|
|
263
|
+
)
|
|
264
|
+
except Exception:
|
|
265
|
+
return None
|
|
266
|
+
text = (
|
|
267
|
+
getattr(response, "raw_text", "")
|
|
268
|
+
or getattr(response, "content_text", "")
|
|
269
|
+
or ""
|
|
270
|
+
).strip()
|
|
271
|
+
if not text:
|
|
272
|
+
return None
|
|
273
|
+
# Strip <summary>…</summary> if present.
|
|
274
|
+
if text.startswith("<summary>") and "</summary>" in text:
|
|
275
|
+
text = text[len("<summary>") :].split("</summary>")[0].strip()
|
|
276
|
+
return text or None
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
# ── helpers ─────────────────────────────────────────────────────────────
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def _stringify_slice(slice_msgs: list[dict[str, Any]]) -> str:
|
|
283
|
+
lines: list[str] = []
|
|
284
|
+
for m in slice_msgs:
|
|
285
|
+
role = m.get("role", "?")
|
|
286
|
+
content = m.get("content")
|
|
287
|
+
text = content if isinstance(content, str) else str(content or "")
|
|
288
|
+
head = f"[{role}]"
|
|
289
|
+
tool_calls = m.get("tool_calls")
|
|
290
|
+
if tool_calls:
|
|
291
|
+
head += f" tool_calls={len(tool_calls)}"
|
|
292
|
+
lines.append(f"{head}\n{text}")
|
|
293
|
+
return "\n\n".join(lines)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _note(text: str) -> dict[str, Any]:
|
|
297
|
+
return {"role": "system", "name": "compact_note", "content": text}
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
__all__ = ["Compactor", "CompactionPlan", "DefaultCompactor"]
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
AGENT_DIR = Path(__file__).resolve().parent.parent.parent
|
|
7
|
+
WORKSPACE_ENV_KEYS = (
|
|
8
|
+
"POWER_LOOP_WORKSPACE",
|
|
9
|
+
"ZERO_CODE_WORKSPACE",
|
|
10
|
+
"ZERO_CODE_WORKDIR",
|
|
11
|
+
"VSCODE_WORKSPACE_FOLDER",
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _resolve_workspace_dir() -> Path:
|
|
16
|
+
for key in WORKSPACE_ENV_KEYS:
|
|
17
|
+
value = (os.environ.get(key) or "").strip()
|
|
18
|
+
if value:
|
|
19
|
+
return Path(value).expanduser().resolve()
|
|
20
|
+
return Path.cwd().resolve()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
WORKSPACE_DIR = _resolve_workspace_dir()
|
|
24
|
+
WORKDIR = WORKSPACE_DIR
|
|
25
|
+
DEFAULT_SKILLS_DIR = AGENT_DIR / ".skills"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _resolve_skills_dir() -> Path:
|
|
29
|
+
raw = (os.environ.get("POWER_LOOP_SKILLS_DIR") or os.environ.get("ZERO_CODE_SKILLS_DIR") or "").strip()
|
|
30
|
+
if not raw:
|
|
31
|
+
return DEFAULT_SKILLS_DIR
|
|
32
|
+
|
|
33
|
+
candidate = Path(raw).expanduser()
|
|
34
|
+
if not candidate.is_absolute():
|
|
35
|
+
candidate = (AGENT_DIR / candidate).resolve()
|
|
36
|
+
else:
|
|
37
|
+
candidate = candidate.resolve()
|
|
38
|
+
|
|
39
|
+
if candidate.exists() and candidate.is_dir():
|
|
40
|
+
return candidate
|
|
41
|
+
return DEFAULT_SKILLS_DIR
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
SKILLS_DIR = _resolve_skills_dir()
|
|
45
|
+
AGENT_RW_ALLOWLIST = (
|
|
46
|
+
AGENT_DIR / ".cache",
|
|
47
|
+
AGENT_DIR / "logs",
|
|
48
|
+
SKILLS_DIR,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _is_in_agent_rw_allowlist(path: Path) -> bool:
|
|
53
|
+
resolved = path.resolve()
|
|
54
|
+
for allowed_root in AGENT_RW_ALLOWLIST:
|
|
55
|
+
try:
|
|
56
|
+
if resolved.is_relative_to(allowed_root.resolve()):
|
|
57
|
+
return True
|
|
58
|
+
except Exception:
|
|
59
|
+
continue
|
|
60
|
+
return False
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def safe_path(p: str, purpose: str = "rw") -> Path:
|
|
64
|
+
raw_input = (p or "").strip()
|
|
65
|
+
if not raw_input:
|
|
66
|
+
raise ValueError("Path is required")
|
|
67
|
+
|
|
68
|
+
if raw_input.startswith("@workspace/"):
|
|
69
|
+
candidate = (WORKSPACE_DIR / raw_input[len("@workspace/") :]).resolve()
|
|
70
|
+
elif raw_input.startswith("@agent/"):
|
|
71
|
+
candidate = (AGENT_DIR / raw_input[len("@agent/") :]).resolve()
|
|
72
|
+
else:
|
|
73
|
+
raw = Path(raw_input).expanduser()
|
|
74
|
+
if raw.is_absolute():
|
|
75
|
+
candidate = raw.resolve()
|
|
76
|
+
else:
|
|
77
|
+
candidate = (WORKSPACE_DIR / raw).resolve()
|
|
78
|
+
|
|
79
|
+
if candidate.is_relative_to(WORKSPACE_DIR):
|
|
80
|
+
return candidate
|
|
81
|
+
|
|
82
|
+
if candidate.is_relative_to(AGENT_DIR):
|
|
83
|
+
if _is_in_agent_rw_allowlist(candidate):
|
|
84
|
+
return candidate
|
|
85
|
+
try:
|
|
86
|
+
rel_to_agent = candidate.relative_to(AGENT_DIR)
|
|
87
|
+
workspace_alt = WORKSPACE_DIR / rel_to_agent
|
|
88
|
+
hint = (
|
|
89
|
+
f" Did you mean the workspace file instead? "
|
|
90
|
+
f"Try: {rel_to_agent} (resolves to {workspace_alt})"
|
|
91
|
+
)
|
|
92
|
+
except ValueError:
|
|
93
|
+
hint = ""
|
|
94
|
+
raise ValueError(
|
|
95
|
+
f"Access to agent home is restricted.{hint} "
|
|
96
|
+
f"Workspace is at {WORKSPACE_DIR}, not {AGENT_DIR}. "
|
|
97
|
+
"Use relative paths (they default to workspace) or @workspace/<path>."
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
raise ValueError(
|
|
101
|
+
f"Path escapes allowed directories: {p}. "
|
|
102
|
+
f"Workspace: {WORKSPACE_DIR}. Use relative paths or @workspace/<path>."
|
|
103
|
+
)
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
"""MemoryProvider — pluggable long-term / cross-session memory.
|
|
2
|
+
|
|
3
|
+
Library scope
|
|
4
|
+
-------------
|
|
5
|
+
power-loop **does not implement a memory backend**. It defines:
|
|
6
|
+
|
|
7
|
+
* the ``MemoryProvider`` Protocol callers implement,
|
|
8
|
+
* a ``MemorySnapshot`` shape passed to ``remember``,
|
|
9
|
+
* the pipeline integration points (``MEMORY_RECALLED`` hook +
|
|
10
|
+
``MEMORY_RECALLED`` / ``MEMORY_FAILED`` events),
|
|
11
|
+
* the **inject position** invariant (after existing system messages,
|
|
12
|
+
after compact_note, before the conversation history).
|
|
13
|
+
|
|
14
|
+
Concrete backends live in callers' code or in ``examples/`` — SQLite
|
|
15
|
+
fact store, HTTP API diary, vector DB RAG, etc. — none of them belong
|
|
16
|
+
in the library.
|
|
17
|
+
|
|
18
|
+
Failure model
|
|
19
|
+
-------------
|
|
20
|
+
* ``recall`` raises → treated as **no memory** (returns ``[]``) and emit
|
|
21
|
+
``MEMORY_FAILED``. Loop continues.
|
|
22
|
+
* ``remember`` raises → emit ``MEMORY_FAILED``. ``StatefulResult`` is
|
|
23
|
+
still returned unchanged. Persisting memory must never block the user
|
|
24
|
+
from getting a reply.
|
|
25
|
+
* Hook ``MEMORY_RECALLED`` returning ``HookDirective.SKIP`` → drop the
|
|
26
|
+
recalled messages (do not inject).
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
from dataclasses import dataclass, field
|
|
32
|
+
from typing import Any, Protocol, runtime_checkable
|
|
33
|
+
|
|
34
|
+
LoopMessage = dict[str, Any]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class MemorySnapshot:
|
|
39
|
+
"""What ``remember`` receives at session end.
|
|
40
|
+
|
|
41
|
+
Includes the **full final history** (messages list as seen by the
|
|
42
|
+
pipeline at SESSION_END time, after any compaction). Providers
|
|
43
|
+
typically only persist a summary or selected facts; the full
|
|
44
|
+
snapshot is supplied so the provider can decide.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
session_id: str
|
|
48
|
+
messages: list[LoopMessage] = field(default_factory=list)
|
|
49
|
+
final_text: str = ""
|
|
50
|
+
rounds: int = 0
|
|
51
|
+
status: str = ""
|
|
52
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@runtime_checkable
|
|
56
|
+
class MemoryProvider(Protocol):
|
|
57
|
+
"""Caller-implemented memory backend.
|
|
58
|
+
|
|
59
|
+
``recall`` is called **once per send** (at SESSION_START, before the
|
|
60
|
+
first round). The returned list is injected as ``role=system``
|
|
61
|
+
messages with ``name`` prefixed ``memory_*`` (the library tags them
|
|
62
|
+
automatically if you don't). Returning ``[]`` means "no memory this
|
|
63
|
+
session".
|
|
64
|
+
|
|
65
|
+
``remember`` is called at SESSION_END regardless of status (including
|
|
66
|
+
``cancelled`` and ``degraded``); callers that only want to persist
|
|
67
|
+
successful sessions should check ``snapshot.status`` themselves.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
async def recall(
|
|
71
|
+
self,
|
|
72
|
+
*,
|
|
73
|
+
messages: list[LoopMessage],
|
|
74
|
+
session_id: str | None,
|
|
75
|
+
budget_tokens: int = 1500,
|
|
76
|
+
) -> list[LoopMessage]:
|
|
77
|
+
...
|
|
78
|
+
|
|
79
|
+
async def remember(
|
|
80
|
+
self,
|
|
81
|
+
*,
|
|
82
|
+
snapshot: MemorySnapshot,
|
|
83
|
+
session_id: str | None,
|
|
84
|
+
) -> None:
|
|
85
|
+
...
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def tag_as_memory(messages: list[LoopMessage], *, prefix: str = "memory_") -> list[LoopMessage]:
|
|
89
|
+
"""Ensure every recalled message is a system message with a ``name``
|
|
90
|
+
starting ``memory_*``. Idempotent; non-destructive (returns new dicts).
|
|
91
|
+
|
|
92
|
+
The library calls this on the provider's output before injection so
|
|
93
|
+
downstream code (hooks, compactor, audit) can identify memory rows
|
|
94
|
+
by ``msg.get("name", "").startswith("memory_")``.
|
|
95
|
+
"""
|
|
96
|
+
tagged: list[LoopMessage] = []
|
|
97
|
+
for i, m in enumerate(messages):
|
|
98
|
+
m2 = dict(m)
|
|
99
|
+
m2["role"] = "system"
|
|
100
|
+
name = str(m2.get("name") or "")
|
|
101
|
+
if not name.startswith(prefix):
|
|
102
|
+
m2["name"] = f"{prefix}{name or i}"
|
|
103
|
+
tagged.append(m2)
|
|
104
|
+
return tagged
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
__all__ = ["LoopMessage", "MemorySnapshot", "MemoryProvider", "tag_as_memory"]
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""Unified LLM provider configuration (M1.4).
|
|
2
|
+
|
|
3
|
+
Why
|
|
4
|
+
---
|
|
5
|
+
The library wraps a **single** transport today —
|
|
6
|
+
``OpenAICompatibleChatLLMService`` — but speaks to many actual providers
|
|
7
|
+
through it (OpenAI, DashScope/Qwen, DeepSeek, OpenRouter, Together,
|
|
8
|
+
Groq, local OpenAI-compatible servers). Each caller used to assemble an
|
|
9
|
+
``OpenAICompatibleChatConfig`` by hand and read env vars in its own way,
|
|
10
|
+
which made provider-swapping a per-call code change.
|
|
11
|
+
|
|
12
|
+
``LLMProviderConfig`` is the single config shape callers should target.
|
|
13
|
+
Two factories build an ``LLMService`` from it:
|
|
14
|
+
|
|
15
|
+
* :func:`create_llm_service_from_config` — given an explicit config.
|
|
16
|
+
* :func:`create_llm_service_from_env` — assembles from environment
|
|
17
|
+
variables (``POWER_LOOP_*``), falling back to legacy
|
|
18
|
+
``OPENAI_COMPAT_*`` names so existing ``.env`` files keep working.
|
|
19
|
+
|
|
20
|
+
The ``provider`` field is currently informational (a string tag) — when
|
|
21
|
+
we add Anthropic-native transport in M3 it becomes the router key.
|
|
22
|
+
Callers that want to pin to a specific provider can set it; today it
|
|
23
|
+
does not affect the transport.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import os
|
|
29
|
+
from dataclasses import dataclass, field
|
|
30
|
+
from typing import Any
|
|
31
|
+
|
|
32
|
+
from llm_client.interface import LLMService, OpenAICompatibleChatConfig
|
|
33
|
+
from llm_client.llm_factory import OpenAICompatibleChatLLMService
|
|
34
|
+
|
|
35
|
+
DEFAULT_PREFIX = "POWER_LOOP"
|
|
36
|
+
LEGACY_PREFIX = "OPENAI_COMPAT"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class LLMProviderConfig:
|
|
41
|
+
"""Unified, provider-agnostic LLM config.
|
|
42
|
+
|
|
43
|
+
Required: ``base_url`` / ``api_key`` / ``model``. Everything else
|
|
44
|
+
has sensible defaults that match :class:`OpenAICompatibleChatConfig`.
|
|
45
|
+
|
|
46
|
+
``provider`` is a free-form tag (``"openai"`` / ``"dashscope"`` /
|
|
47
|
+
``"deepseek"`` / …) used today only for telemetry and human
|
|
48
|
+
readability; it becomes the routing key when multi-transport lands.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
base_url: str
|
|
52
|
+
api_key: str
|
|
53
|
+
model: str
|
|
54
|
+
provider: str = "openai"
|
|
55
|
+
timeout_s: float = 180.0
|
|
56
|
+
max_tokens: int = 8000
|
|
57
|
+
temperature: float = 0.0
|
|
58
|
+
max_retries: int = 3
|
|
59
|
+
extra: dict[str, Any] = field(default_factory=dict)
|
|
60
|
+
|
|
61
|
+
def __post_init__(self) -> None:
|
|
62
|
+
# Fail fast at config build time, not on the first complete() call.
|
|
63
|
+
missing = [
|
|
64
|
+
name for name, val in (("base_url", self.base_url),
|
|
65
|
+
("api_key", self.api_key),
|
|
66
|
+
("model", self.model))
|
|
67
|
+
if not val
|
|
68
|
+
]
|
|
69
|
+
if missing:
|
|
70
|
+
raise ValueError(
|
|
71
|
+
f"LLMProviderConfig missing required field(s): {', '.join(missing)}"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# ── Factories ───────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
@classmethod
|
|
77
|
+
def from_env(
|
|
78
|
+
cls,
|
|
79
|
+
*,
|
|
80
|
+
prefix: str = DEFAULT_PREFIX,
|
|
81
|
+
fallback_prefix: str | None = LEGACY_PREFIX,
|
|
82
|
+
env: dict[str, str] | None = None,
|
|
83
|
+
) -> LLMProviderConfig:
|
|
84
|
+
"""Build a config from ``{PREFIX}_*`` environment variables.
|
|
85
|
+
|
|
86
|
+
Reads (in order of preference):
|
|
87
|
+
|
|
88
|
+
* ``{prefix}_BASE_URL`` / ``{prefix}_API_KEY`` / ``{prefix}_MODEL``
|
|
89
|
+
(required)
|
|
90
|
+
* ``{prefix}_PROVIDER`` / ``{prefix}_TIMEOUT_S`` /
|
|
91
|
+
``{prefix}_MAX_TOKENS`` / ``{prefix}_TEMPERATURE`` /
|
|
92
|
+
``{prefix}_MAX_RETRIES`` (optional)
|
|
93
|
+
|
|
94
|
+
If ``fallback_prefix`` is set and a primary var is missing, falls
|
|
95
|
+
back to the same suffix under the fallback prefix. Default
|
|
96
|
+
fallback is ``OPENAI_COMPAT`` so existing ``.env`` files
|
|
97
|
+
(``OPENAI_COMPAT_BASE_URL`` etc.) keep working without edits.
|
|
98
|
+
|
|
99
|
+
``env`` argument is for tests; defaults to ``os.environ``.
|
|
100
|
+
"""
|
|
101
|
+
src: dict[str, str] = dict(os.environ if env is None else env)
|
|
102
|
+
|
|
103
|
+
def _get(name: str, default: str | None = None) -> str | None:
|
|
104
|
+
primary = src.get(f"{prefix}_{name}")
|
|
105
|
+
if primary:
|
|
106
|
+
return primary
|
|
107
|
+
if fallback_prefix:
|
|
108
|
+
alt = src.get(f"{fallback_prefix}_{name}")
|
|
109
|
+
if alt:
|
|
110
|
+
return alt
|
|
111
|
+
return default
|
|
112
|
+
|
|
113
|
+
base_url = _get("BASE_URL", "")
|
|
114
|
+
api_key = _get("API_KEY", "")
|
|
115
|
+
model = _get("MODEL", "")
|
|
116
|
+
provider = _get("PROVIDER", "openai") or "openai"
|
|
117
|
+
timeout_s = float(_get("TIMEOUT_S", "180") or 180)
|
|
118
|
+
max_tokens = int(_get("MAX_TOKENS", "8000") or 8000)
|
|
119
|
+
temperature = float(_get("TEMPERATURE", "0") or 0)
|
|
120
|
+
max_retries = int(_get("MAX_RETRIES", "3") or 3)
|
|
121
|
+
|
|
122
|
+
return cls(
|
|
123
|
+
base_url=base_url or "",
|
|
124
|
+
api_key=api_key or "",
|
|
125
|
+
model=model or "",
|
|
126
|
+
provider=provider,
|
|
127
|
+
timeout_s=timeout_s,
|
|
128
|
+
max_tokens=max_tokens,
|
|
129
|
+
temperature=temperature,
|
|
130
|
+
max_retries=max_retries,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# ── Adaptation ──────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
def to_openai_compatible(self) -> OpenAICompatibleChatConfig:
|
|
136
|
+
"""Render into the transport-specific config the current backend
|
|
137
|
+
expects. New transports (Anthropic-native in M3) will add their
|
|
138
|
+
own adapter alongside this one.
|
|
139
|
+
"""
|
|
140
|
+
return OpenAICompatibleChatConfig(
|
|
141
|
+
base_url=self.base_url,
|
|
142
|
+
api_key=self.api_key,
|
|
143
|
+
model=self.model,
|
|
144
|
+
timeout_s=self.timeout_s,
|
|
145
|
+
max_tokens=self.max_tokens,
|
|
146
|
+
temperature=self.temperature,
|
|
147
|
+
max_retries=self.max_retries,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def create_llm_service_from_config(cfg: LLMProviderConfig) -> LLMService:
|
|
152
|
+
"""Build an ``LLMService`` from an :class:`LLMProviderConfig`.
|
|
153
|
+
|
|
154
|
+
Today this always returns an ``OpenAICompatibleChatLLMService``;
|
|
155
|
+
when a second transport lands it will dispatch on ``cfg.provider``.
|
|
156
|
+
"""
|
|
157
|
+
return OpenAICompatibleChatLLMService(cfg.to_openai_compatible())
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def create_llm_service_from_env(
|
|
161
|
+
*,
|
|
162
|
+
prefix: str = DEFAULT_PREFIX,
|
|
163
|
+
fallback_prefix: str | None = LEGACY_PREFIX,
|
|
164
|
+
) -> LLMService:
|
|
165
|
+
"""One-liner for the common case: read env, build, return service."""
|
|
166
|
+
cfg = LLMProviderConfig.from_env(prefix=prefix, fallback_prefix=fallback_prefix)
|
|
167
|
+
return create_llm_service_from_config(cfg)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
__all__ = [
|
|
171
|
+
"DEFAULT_PREFIX",
|
|
172
|
+
"LEGACY_PREFIX",
|
|
173
|
+
"LLMProviderConfig",
|
|
174
|
+
"create_llm_service_from_config",
|
|
175
|
+
"create_llm_service_from_env",
|
|
176
|
+
]
|