power-loop 3.0.0__tar.gz → 3.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. {power_loop-3.0.0 → power_loop-3.0.2}/PKG-INFO +32 -9
  2. {power_loop-3.0.0 → power_loop-3.0.2}/README.md +31 -8
  3. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/__init__.py +1 -1
  4. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/agent/stateful_loop.py +38 -7
  5. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/agent/types.py +16 -1
  6. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/history_projector.py +9 -0
  7. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/representation.py +20 -3
  8. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/dialect.py +1 -1
  9. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/schema.py +10 -1
  10. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/tools/default_tools.py +8 -1
  11. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/runner.py +23 -8
  12. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop.egg-info/PKG-INFO +32 -9
  13. {power_loop-3.0.0 → power_loop-3.0.2}/LICENSE +0 -0
  14. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/_vendor/__init__.py +0 -0
  15. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/_vendor/llm_client/__init__.py +0 -0
  16. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/_vendor/llm_client/anthropic_factory.py +0 -0
  17. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/_vendor/llm_client/capabilities.py +0 -0
  18. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/_vendor/llm_client/interface.py +0 -0
  19. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/_vendor/llm_client/llm_factory.py +0 -0
  20. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/_vendor/llm_client/llm_tooling.py +0 -0
  21. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/_vendor/llm_client/llm_utils.py +0 -0
  22. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/_vendor/llm_client/multimodal.py +0 -0
  23. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/agent/__init__.py +0 -0
  24. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/agent/follow_up.py +0 -0
  25. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/agent/sink.py +0 -0
  26. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/agent/system_prompt.py +0 -0
  27. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contracts/__init__.py +0 -0
  28. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contracts/errors.py +0 -0
  29. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contracts/event_payloads.py +0 -0
  30. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contracts/events.py +0 -0
  31. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contracts/handlers.py +0 -0
  32. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contracts/hook_contexts.py +0 -0
  33. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contracts/hooks.py +0 -0
  34. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contracts/messages.py +0 -0
  35. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contracts/protocols.py +0 -0
  36. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contracts/tools.py +0 -0
  37. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contrib/__init__.py +0 -0
  38. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contrib/_redact.py +0 -0
  39. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contrib/jsonl_sink.py +0 -0
  40. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contrib/logging_sink.py +0 -0
  41. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contrib/mcp.py +0 -0
  42. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contrib/metrics_sink.py +0 -0
  43. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/contrib/otel_sink.py +0 -0
  44. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/core/agent_context.py +0 -0
  45. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/core/events.py +0 -0
  46. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/core/hooks.py +0 -0
  47. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/core/phase.py +0 -0
  48. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/core/pipeline.py +0 -0
  49. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/core/runner.py +0 -0
  50. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/core/state.py +0 -0
  51. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/py.typed +0 -0
  52. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/blackboard.py +0 -0
  53. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/budget.py +0 -0
  54. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/cancellation.py +0 -0
  55. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/compact.py +0 -0
  56. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/env.py +0 -0
  57. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/exec_backend.py +0 -0
  58. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/fold.py +0 -0
  59. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/fold_adapter.py +0 -0
  60. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/history_sanitize.py +0 -0
  61. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/human_input.py +0 -0
  62. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/memory.py +0 -0
  63. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/notes.py +0 -0
  64. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/provider.py +0 -0
  65. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/retry.py +0 -0
  66. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/runtime_state.py +0 -0
  67. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/session_store.py +0 -0
  68. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/skills.py +0 -0
  69. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/spec.py +0 -0
  70. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/__init__.py +0 -0
  71. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/backends/__init__.py +0 -0
  72. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/backends/mysql.py +0 -0
  73. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/backends/postgres.py +0 -0
  74. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/backends/sqlite.py +0 -0
  75. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/capabilities.py +0 -0
  76. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/db.py +0 -0
  77. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/factory.py +0 -0
  78. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/store.py +0 -0
  79. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/store/types.py +0 -0
  80. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/structured.py +0 -0
  81. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/stub_provider.py +0 -0
  82. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/runtime/timers.py +0 -0
  83. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/tools/__init__.py +0 -0
  84. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/tools/blackboard.py +0 -0
  85. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/tools/default_manifest.py +0 -0
  86. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/tools/registry.py +0 -0
  87. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/tools/spawn_agent.py +0 -0
  88. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/__init__.py +0 -0
  89. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/api.py +0 -0
  90. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/engine.py +0 -0
  91. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/introspect.py +0 -0
  92. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/journal.py +0 -0
  93. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/result.py +0 -0
  94. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/resume.py +0 -0
  95. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/spec.py +0 -0
  96. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/subprocess_executor.py +0 -0
  97. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/tool.py +0 -0
  98. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop/workflow/worker.py +0 -0
  99. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop.egg-info/SOURCES.txt +0 -0
  100. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop.egg-info/dependency_links.txt +0 -0
  101. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop.egg-info/requires.txt +0 -0
  102. {power_loop-3.0.0 → power_loop-3.0.2}/power_loop.egg-info/top_level.txt +0 -0
  103. {power_loop-3.0.0 → power_loop-3.0.2}/pyproject.toml +0 -0
  104. {power_loop-3.0.0 → power_loop-3.0.2}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: power-loop
3
- Version: 3.0.0
3
+ Version: 3.0.2
4
4
  Summary: Embeddable agent execution kernel — LLM loop, hooks, events, tools, dynamic sub-agents.
5
5
  Author-email: zhangran <zhangran24@126.com>
6
6
  License: MIT
@@ -106,7 +106,7 @@ print((await loop.send("And my second-favorite?", session_id=sid)).final_text)
106
106
  pip install 'power-loop[openai]' # or [anthropic] · add [postgres] / [mysql] for those backends
107
107
  ```
108
108
 
109
- > **1.0 stable.** The public API is frozen under SemVer (a breaking change requires a major bump), machine-enforced by a baseline guard in CI. The **core has zero runtime dependencies** (pure stdlib; verified by a CI job that imports it with nothing else installed) — LLM transports *and database drivers* are optional extras. See [Stability](#stability--semver) and the [honest caveats](#honest-scope) — a young, single-maintainer project says so plainly.
109
+ > **Stable since 1.0; now 3.x.** The public API is frozen under SemVer and machine-enforced by a baseline guard in CI — and the two major bumps since prove the discipline rather than undercut it: **2.0** moved storage to a pluggable async backend, **3.0** made context handling two orthogonal axes. Both were real breaking changes, so both got a major bump. The **core has zero runtime dependencies** (pure stdlib; verified by a CI job that imports it with nothing else installed) — LLM transports *and database drivers* are optional extras. Backed by **900+ unit tests**, a **live-LLM** suite, and a **3-backend conformance suite** (SQLite/PostgreSQL/MySQL). See [Stability](#stability--semver) and the [honest caveats](#honest-scope) — a young, single-maintainer project says so plainly.
110
110
 
111
111
  ---
112
112
 
@@ -116,7 +116,7 @@ pip install 'power-loop[openai]' # or [anthropic] · add [postgres] / [mysq
116
116
  |---|---|
117
117
  | 🚀 **New** — show me the 5-minute version | [Getting Started](docs/en/getting-started.md) |
118
118
  | 🛠️ **Learning by building** | [Tutorials](docs/en/tutorials/index.md) — chatbot · tools · human-in-the-loop · multi-agent |
119
- | 🧩 **Browsing runnable code** | [40 examples](examples/README.md) — `00_hello_world.py` → full chatbot |
119
+ | 🧩 **Browsing runnable code** | [43 examples](examples/README.md) — `00_hello_world.py` → full chatbot |
120
120
  | 📚 **Looking something up** | [User Guide](docs/en/user-guide/index.md) · [API reference](docs/en/api/index.md) |
121
121
  | 🤔 **Deciding if it fits** | [How it compares](#how-it-compares) · [Honest scope](#honest-scope) |
122
122
 
@@ -128,10 +128,11 @@ pip install 'power-loop[openai]' # or [anthropic] · add [postgres] / [mysq
128
128
 
129
129
  Most "agent frameworks" ask you to build your app *inside* them. power-loop is the opposite: a **library you embed**. You keep your HTTP layer, your auth, your queues, your RAG, your UI, your deploy. It runs the agent loop — and lets you *engineer* it.
130
130
 
131
- - 🪶 **Featherweight & zero-dependency.** No `pydantic`, no LangChain, no graph DSL. A compact, pure-stdlib core (~20k lines) whose public surface is essentially one class — and **zero runtime dependencies**. LLM transports *and* the Postgres/MySQL drivers are pulled in only by the extra you install.
131
+ - 🪶 **Featherweight & zero-dependency.** No `pydantic`, no LangChain, no graph DSL. A compact, pure-stdlib core (~24k lines) whose public surface is essentially one class — and **zero runtime dependencies**. LLM transports *and* the Postgres/MySQL drivers are pulled in only by the extra you install.
132
132
  - 🗄️ **Pluggable storage, zero-infra default.** Sessions, timers, sub-agent trees, workflow journals, the shared blackboard — one backend-neutral store written once against a tiny `Database`/`Dialect` port. The default is **one SQLite file** (copy the file, you've copied the state); point a DSN at **PostgreSQL or MySQL** when you want a real multi-writer server — same code, same conformance suite. Tables are auto-created, or **provisioned out-of-band** with a printed DDL script (see [Storage backends](docs/en/user-guide/storage-backends.md)).
133
133
  - ♻️ **Stateless, resumable loops.** A `StatefulAgentLoop` carries no authoritative state — all of it lives in the store. So a loop is cheap to create and trivially **restored from a DSN + a session id** (ideal for web handlers, workers, cold starts). It self-caches each session's active window (a rebuildable accelerator that never changes what the model sees) to skip re-reads on hot paths.
134
134
  - ⏱️ **Durable by default.** Crash mid-run and `resume()`. Agents schedule their own **durable timers** that survive restarts. Workflows **replay finished steps and re-run only the unfinished tail** after a process death. The store survives version upgrades (a portable, backend-neutral migration-version table) and can be **pruned, VACUUMed, and exported**.
135
+ - 🧠 **Context engineering, not one fixed strategy.** How each finished send is *recorded/rendered* (**representation**: full **verbatim** or a terse per-send **projection**) and how older history is *compacted* once over budget (**fold strategy**: a single **LLM summary**, or an **agentic** pass that also writes durable notes) are two **orthogonal, config-driven axes** — any representation composes with any fold strategy, and both take your own `Representation` / `FoldStrategy` implementation. Folds always keep whole sends (never split a tool-call/result pair); `recall_send` / `recall_compacted` pull the original detail back from the immutable audit log.
135
136
  - 🧩 **Composable from one loop to a fleet.** Start with `send()`. Add tools. Spawn sub-agents. Fan out a deterministic **workflow** (`sequence`/`parallel`/`foreach`/`branch`). Run each leaf in its **own process and DB** behind a sandbox. Same primitives all the way up.
136
137
  - 🛡️ **Isolation seams where it counts.** Tool-level sandboxing via a `ShellBackend` (drop in gVisor/Docker for `bash`); process-level via a `WorkerLauncher` (wrap a whole sub-agent worker per leaf). power-loop stays sandbox-agnostic; you choose the policy.
137
138
  - 🔬 **Built to be observed.** Typed events for every stream chunk, tool call, round, and **individual LLM call** — each `seq`-ordered + monotonic-clock stamped. Pluggable sinks behind extras: durable **JSONL** (with `replay`), **Prometheus/StatsD** metrics, an **OpenTelemetry** span tree. Per-run + per-session token accounting and hard per-run budgets.
@@ -153,8 +154,8 @@ Most "agent frameworks" ask you to build your app *inside* them. power-loop is t
153
154
  | **Workflow resume** | Journals each step; after a crash, replays completed steps and re-runs only the tail | [Workflows](docs/en/user-guide/workflows.md) |
154
155
  | **Process sandboxing** | Each workflow leaf in its own OS process + own DB; wrap each in gVisor/Docker per leaf | [Sandboxing](docs/en/user-guide/sandboxing.md) |
155
156
  | **Durable timers** | Agents schedule their own wake-ups; survive restarts; one-shot or recurring | [Timers](docs/en/user-guide/timers.md) |
156
- | **Context compaction** | Auto-summarize old turns (never splits a tool-call pair); `recall_compacted` to pull originals back | [Compaction](docs/en/user-guide/compaction.md) |
157
- | **Send-context projection** | Opt-in: feed a per-send plain-text projection of finished sends (derived `pl_project_messages`) instead of verbatim history; `pl_messages` stays immutable; `recall_send` to re-expand | [Projection](docs/en/user-guide/send-context-projection.md) |
157
+ | **Context — representation** | Record/render each finished send **verbatim** or as a terse per-send **projection** (derived `pl_project_messages`); `pl_messages` stays immutable; `recall_send` re-expands | [Projection](docs/en/user-guide/send-context-projection.md) |
158
+ | **Context — fold strategy** | Compact older history once over budget: **LLM summary** or **agentic** (also writes notes); pluggable `FoldStrategy`; never splits a tool pair; `recall_compacted` re-expands | [Compaction](docs/en/user-guide/compaction.md) |
158
159
  | **Durability ops** | Portable migration-version table, retention/prune, VACUUM, `export_session`/`import_session`, graceful `aclose()` | [Sessions](docs/en/user-guide/sessions.md) |
159
160
  | **Observability** | Typed `seq`-ordered events → durable JSONL + `replay`, Prometheus/StatsD metrics, OpenTelemetry spans | [Observability](docs/en/user-guide/observability.md) |
160
161
  | **MCP tools** | Surface a Model Context Protocol server's tools as power-loop tools | [Extending](docs/en/user-guide/extending-tools.md) |
@@ -200,6 +201,28 @@ result = await loop.send(user_text, session_id=session_id)
200
201
 
201
202
  Under the hood the loop keeps a per-session **active-window cache** — but it caches only the *durable* projection, validated by a monotonic `next_seq` token, so it's a pure accelerator: a cold loop with an empty cache produces byte-for-byte the same prompts (proven by a warm-vs-cold conformance test, including the recall/compaction/prompt-edit edge cases).
202
203
 
204
+ ### Context engineering — two orthogonal axes you choose (and can implement yourself)
205
+
206
+ Long conversations outgrow the window. Most libraries give you *one* fixed compaction behavior; power-loop (3.0) splits it into two independent, config-driven axes:
207
+
208
+ - **Representation** — how each *finished send* is recorded & rendered: `VerbatimRepresentation` (full, byte-identical history) or `ProjectedRepresentation` (a terse per-send plain-text projection). The original detail always stays in the immutable `pl_messages` audit log.
209
+ - **Fold strategy** — how *older* history is compacted once the rendered prefix crosses the budget: `LLMSummaryFold` (one summary call) or `AgenticFold` (a bounded tool loop that also persists durable facts as notes).
210
+
211
+ ```python
212
+ from power_loop import (
213
+ StatefulAgentLoop, AgentLoopConfig,
214
+ ProjectedRepresentation, AgenticFold, # mix & match either axis — or pass your own impl
215
+ )
216
+
217
+ cfg = AgentLoopConfig(
218
+ representation=ProjectedRepresentation(max_chars=300), # terse projection (or VerbatimRepresentation)
219
+ fold_strategy=AgenticFold(keep_last_sends=4), # summarize older sends + write notes
220
+ )
221
+ loop = StatefulAgentLoop(llm=llm, dsn="app.db", config=cfg)
222
+ ```
223
+
224
+ Any representation composes with any fold strategy, and each axis is a small `Protocol` you can implement yourself. A fold always keeps **whole sends** (it never splits an atomic tool-call/result pair), and the model can call `recall_send(send_index=N)` / `recall_compacted()` to pull the full original detail back from the audit log. (The two classes above are public but **provisional** — added in 3.0, not yet frozen into `STABLE_API`; `AgentLoopConfig` itself is Stable.)
225
+
203
226
  ### Deterministic multi-agent workflows — that the model can author, and that survive a crash
204
227
 
205
228
  Sub-agent delegation is *model-driven* ("go do this"). When you want **code-driven, deterministic** orchestration — fan out over a list, branch on a result, run a pipeline — describe it as a `WorkflowSpec` and let the engine interpret it. The only LLM calls are the leaves; `sequence`/`parallel`/`foreach`/`branch` are plain code.
@@ -281,7 +304,7 @@ await register_mcp_tools(registry, client, prefix="fs.") # MCP tools → power
281
304
 
282
305
  The seam is a tiny `MCPToolSource` Protocol, so the `mcp` SDK is optional and any client works.
283
306
 
284
- > More: hard token budgets, structured output, crash recovery, memory, the blackboard — see [`examples/`](examples/README.md) (40 runnable programs) and the [docs](docs/en/index.md).
307
+ > More: hard token budgets, structured output, crash recovery, memory, the blackboard — see [`examples/`](examples/README.md) (43 runnable programs) and the [docs](docs/en/index.md).
285
308
 
286
309
  ---
287
310
 
@@ -289,7 +312,7 @@ The seam is a tiny `MCPToolSource` Protocol, so the `mcp` SDK is optional and an
289
312
 
290
313
  power-loop is a **kernel**, not a platform — that's the whole trade-off.
291
314
 
292
- - **vs. LangChain / LangGraph / LlamaIndex / CrewAI / AutoGen** — those are batteries-included frameworks with large ecosystems (connectors, vector stores, integrations) and heavy dependency trees. power-loop deliberately ships **none of that**: a compact (~20k-line) pure-stdlib core with zero runtime dependencies, and you bring your own tools (or an MCP server). You get durable sessions across SQLite/PG/MySQL, crash-resumable workflows, and real sandbox seams out of the box; you do **not** get a bundled RAG stack or 100 connectors.
315
+ - **vs. LangChain / LangGraph / LlamaIndex / CrewAI / AutoGen** — those are batteries-included frameworks with large ecosystems (connectors, vector stores, integrations) and heavy dependency trees. power-loop deliberately ships **none of that**: a compact (~24k-line) pure-stdlib core with zero runtime dependencies, and you bring your own tools (or an MCP server). You get durable sessions across SQLite/PG/MySQL, crash-resumable workflows, and real sandbox seams out of the box; you do **not** get a bundled RAG stack or 100 connectors.
293
316
  - **Choose power-loop** when you want to *embed* an agent in an existing app, keep your dependency surface tiny, pick your own database, and care about durability + isolation + a stable contract.
294
317
  - **Choose a framework** when you want batteries included, a big integration catalog, and don't mind the weight.
295
318
 
@@ -321,7 +344,7 @@ Python 3.10+. See [Getting Started](docs/en/getting-started.md). Optional extras
321
344
 
322
345
  ## Stability & SemVer
323
346
 
324
- As of **1.0**, the **STABLE** API (listed in `power_loop.STABLE_API`) is under SemVer: a breaking change requires a major bump (`2.0.0`), enforced by a frozen-baseline test in CI — including the flagship `StatefulAgentLoop` *and the LLM contract needed to construct it*. Error `.code` strings are frozen too.
347
+ Since **1.0**, the **STABLE** API (listed in `power_loop.STABLE_API`) is under SemVer: a breaking change requires a major bump, enforced by a frozen-baseline test in CI — including the flagship `StatefulAgentLoop` *and the LLM contract needed to construct it*. Error `.code` strings are frozen too. The two majors since (2.0 pluggable async storage, 3.0 orthogonal context axes) were exactly that policy in action — breaking changes earned a major bump, each documented in the [Changelog](CHANGELOG.md).
325
348
 
326
349
  | Tier | Meaning |
327
350
  |---|---|
@@ -33,7 +33,7 @@ print((await loop.send("And my second-favorite?", session_id=sid)).final_text)
33
33
  pip install 'power-loop[openai]' # or [anthropic] · add [postgres] / [mysql] for those backends
34
34
  ```
35
35
 
36
- > **1.0 stable.** The public API is frozen under SemVer (a breaking change requires a major bump), machine-enforced by a baseline guard in CI. The **core has zero runtime dependencies** (pure stdlib; verified by a CI job that imports it with nothing else installed) — LLM transports *and database drivers* are optional extras. See [Stability](#stability--semver) and the [honest caveats](#honest-scope) — a young, single-maintainer project says so plainly.
36
+ > **Stable since 1.0; now 3.x.** The public API is frozen under SemVer and machine-enforced by a baseline guard in CI — and the two major bumps since prove the discipline rather than undercut it: **2.0** moved storage to a pluggable async backend, **3.0** made context handling two orthogonal axes. Both were real breaking changes, so both got a major bump. The **core has zero runtime dependencies** (pure stdlib; verified by a CI job that imports it with nothing else installed) — LLM transports *and database drivers* are optional extras. Backed by **900+ unit tests**, a **live-LLM** suite, and a **3-backend conformance suite** (SQLite/PostgreSQL/MySQL). See [Stability](#stability--semver) and the [honest caveats](#honest-scope) — a young, single-maintainer project says so plainly.
37
37
 
38
38
  ---
39
39
 
@@ -43,7 +43,7 @@ pip install 'power-loop[openai]' # or [anthropic] · add [postgres] / [mysq
43
43
  |---|---|
44
44
  | 🚀 **New** — show me the 5-minute version | [Getting Started](docs/en/getting-started.md) |
45
45
  | 🛠️ **Learning by building** | [Tutorials](docs/en/tutorials/index.md) — chatbot · tools · human-in-the-loop · multi-agent |
46
- | 🧩 **Browsing runnable code** | [40 examples](examples/README.md) — `00_hello_world.py` → full chatbot |
46
+ | 🧩 **Browsing runnable code** | [43 examples](examples/README.md) — `00_hello_world.py` → full chatbot |
47
47
  | 📚 **Looking something up** | [User Guide](docs/en/user-guide/index.md) · [API reference](docs/en/api/index.md) |
48
48
  | 🤔 **Deciding if it fits** | [How it compares](#how-it-compares) · [Honest scope](#honest-scope) |
49
49
 
@@ -55,10 +55,11 @@ pip install 'power-loop[openai]' # or [anthropic] · add [postgres] / [mysq
55
55
 
56
56
  Most "agent frameworks" ask you to build your app *inside* them. power-loop is the opposite: a **library you embed**. You keep your HTTP layer, your auth, your queues, your RAG, your UI, your deploy. It runs the agent loop — and lets you *engineer* it.
57
57
 
58
- - 🪶 **Featherweight & zero-dependency.** No `pydantic`, no LangChain, no graph DSL. A compact, pure-stdlib core (~20k lines) whose public surface is essentially one class — and **zero runtime dependencies**. LLM transports *and* the Postgres/MySQL drivers are pulled in only by the extra you install.
58
+ - 🪶 **Featherweight & zero-dependency.** No `pydantic`, no LangChain, no graph DSL. A compact, pure-stdlib core (~24k lines) whose public surface is essentially one class — and **zero runtime dependencies**. LLM transports *and* the Postgres/MySQL drivers are pulled in only by the extra you install.
59
59
  - 🗄️ **Pluggable storage, zero-infra default.** Sessions, timers, sub-agent trees, workflow journals, the shared blackboard — one backend-neutral store written once against a tiny `Database`/`Dialect` port. The default is **one SQLite file** (copy the file, you've copied the state); point a DSN at **PostgreSQL or MySQL** when you want a real multi-writer server — same code, same conformance suite. Tables are auto-created, or **provisioned out-of-band** with a printed DDL script (see [Storage backends](docs/en/user-guide/storage-backends.md)).
60
60
  - ♻️ **Stateless, resumable loops.** A `StatefulAgentLoop` carries no authoritative state — all of it lives in the store. So a loop is cheap to create and trivially **restored from a DSN + a session id** (ideal for web handlers, workers, cold starts). It self-caches each session's active window (a rebuildable accelerator that never changes what the model sees) to skip re-reads on hot paths.
61
61
  - ⏱️ **Durable by default.** Crash mid-run and `resume()`. Agents schedule their own **durable timers** that survive restarts. Workflows **replay finished steps and re-run only the unfinished tail** after a process death. The store survives version upgrades (a portable, backend-neutral migration-version table) and can be **pruned, VACUUMed, and exported**.
62
+ - 🧠 **Context engineering, not one fixed strategy.** How each finished send is *recorded/rendered* (**representation**: full **verbatim** or a terse per-send **projection**) and how older history is *compacted* once over budget (**fold strategy**: a single **LLM summary**, or an **agentic** pass that also writes durable notes) are two **orthogonal, config-driven axes** — any representation composes with any fold strategy, and both take your own `Representation` / `FoldStrategy` implementation. Folds always keep whole sends (never split a tool-call/result pair); `recall_send` / `recall_compacted` pull the original detail back from the immutable audit log.
62
63
  - 🧩 **Composable from one loop to a fleet.** Start with `send()`. Add tools. Spawn sub-agents. Fan out a deterministic **workflow** (`sequence`/`parallel`/`foreach`/`branch`). Run each leaf in its **own process and DB** behind a sandbox. Same primitives all the way up.
63
64
  - 🛡️ **Isolation seams where it counts.** Tool-level sandboxing via a `ShellBackend` (drop in gVisor/Docker for `bash`); process-level via a `WorkerLauncher` (wrap a whole sub-agent worker per leaf). power-loop stays sandbox-agnostic; you choose the policy.
64
65
  - 🔬 **Built to be observed.** Typed events for every stream chunk, tool call, round, and **individual LLM call** — each `seq`-ordered + monotonic-clock stamped. Pluggable sinks behind extras: durable **JSONL** (with `replay`), **Prometheus/StatsD** metrics, an **OpenTelemetry** span tree. Per-run + per-session token accounting and hard per-run budgets.
@@ -80,8 +81,8 @@ Most "agent frameworks" ask you to build your app *inside* them. power-loop is t
80
81
  | **Workflow resume** | Journals each step; after a crash, replays completed steps and re-runs only the tail | [Workflows](docs/en/user-guide/workflows.md) |
81
82
  | **Process sandboxing** | Each workflow leaf in its own OS process + own DB; wrap each in gVisor/Docker per leaf | [Sandboxing](docs/en/user-guide/sandboxing.md) |
82
83
  | **Durable timers** | Agents schedule their own wake-ups; survive restarts; one-shot or recurring | [Timers](docs/en/user-guide/timers.md) |
83
- | **Context compaction** | Auto-summarize old turns (never splits a tool-call pair); `recall_compacted` to pull originals back | [Compaction](docs/en/user-guide/compaction.md) |
84
- | **Send-context projection** | Opt-in: feed a per-send plain-text projection of finished sends (derived `pl_project_messages`) instead of verbatim history; `pl_messages` stays immutable; `recall_send` to re-expand | [Projection](docs/en/user-guide/send-context-projection.md) |
84
+ | **Context — representation** | Record/render each finished send **verbatim** or as a terse per-send **projection** (derived `pl_project_messages`); `pl_messages` stays immutable; `recall_send` re-expands | [Projection](docs/en/user-guide/send-context-projection.md) |
85
+ | **Context — fold strategy** | Compact older history once over budget: **LLM summary** or **agentic** (also writes notes); pluggable `FoldStrategy`; never splits a tool pair; `recall_compacted` re-expands | [Compaction](docs/en/user-guide/compaction.md) |
85
86
  | **Durability ops** | Portable migration-version table, retention/prune, VACUUM, `export_session`/`import_session`, graceful `aclose()` | [Sessions](docs/en/user-guide/sessions.md) |
86
87
  | **Observability** | Typed `seq`-ordered events → durable JSONL + `replay`, Prometheus/StatsD metrics, OpenTelemetry spans | [Observability](docs/en/user-guide/observability.md) |
87
88
  | **MCP tools** | Surface a Model Context Protocol server's tools as power-loop tools | [Extending](docs/en/user-guide/extending-tools.md) |
@@ -127,6 +128,28 @@ result = await loop.send(user_text, session_id=session_id)
127
128
 
128
129
  Under the hood the loop keeps a per-session **active-window cache** — but it caches only the *durable* projection, validated by a monotonic `next_seq` token, so it's a pure accelerator: a cold loop with an empty cache produces byte-for-byte the same prompts (proven by a warm-vs-cold conformance test, including the recall/compaction/prompt-edit edge cases).
129
130
 
131
+ ### Context engineering — two orthogonal axes you choose (and can implement yourself)
132
+
133
+ Long conversations outgrow the window. Most libraries give you *one* fixed compaction behavior; power-loop (3.0) splits it into two independent, config-driven axes:
134
+
135
+ - **Representation** — how each *finished send* is recorded & rendered: `VerbatimRepresentation` (full, byte-identical history) or `ProjectedRepresentation` (a terse per-send plain-text projection). The original detail always stays in the immutable `pl_messages` audit log.
136
+ - **Fold strategy** — how *older* history is compacted once the rendered prefix crosses the budget: `LLMSummaryFold` (one summary call) or `AgenticFold` (a bounded tool loop that also persists durable facts as notes).
137
+
138
+ ```python
139
+ from power_loop import (
140
+ StatefulAgentLoop, AgentLoopConfig,
141
+ ProjectedRepresentation, AgenticFold, # mix & match either axis — or pass your own impl
142
+ )
143
+
144
+ cfg = AgentLoopConfig(
145
+ representation=ProjectedRepresentation(max_chars=300), # terse projection (or VerbatimRepresentation)
146
+ fold_strategy=AgenticFold(keep_last_sends=4), # summarize older sends + write notes
147
+ )
148
+ loop = StatefulAgentLoop(llm=llm, dsn="app.db", config=cfg)
149
+ ```
150
+
151
+ Any representation composes with any fold strategy, and each axis is a small `Protocol` you can implement yourself. A fold always keeps **whole sends** (it never splits an atomic tool-call/result pair), and the model can call `recall_send(send_index=N)` / `recall_compacted()` to pull the full original detail back from the audit log. (The two classes above are public but **provisional** — added in 3.0, not yet frozen into `STABLE_API`; `AgentLoopConfig` itself is Stable.)
152
+
130
153
  ### Deterministic multi-agent workflows — that the model can author, and that survive a crash
131
154
 
132
155
  Sub-agent delegation is *model-driven* ("go do this"). When you want **code-driven, deterministic** orchestration — fan out over a list, branch on a result, run a pipeline — describe it as a `WorkflowSpec` and let the engine interpret it. The only LLM calls are the leaves; `sequence`/`parallel`/`foreach`/`branch` are plain code.
@@ -208,7 +231,7 @@ await register_mcp_tools(registry, client, prefix="fs.") # MCP tools → power
208
231
 
209
232
  The seam is a tiny `MCPToolSource` Protocol, so the `mcp` SDK is optional and any client works.
210
233
 
211
- > More: hard token budgets, structured output, crash recovery, memory, the blackboard — see [`examples/`](examples/README.md) (40 runnable programs) and the [docs](docs/en/index.md).
234
+ > More: hard token budgets, structured output, crash recovery, memory, the blackboard — see [`examples/`](examples/README.md) (43 runnable programs) and the [docs](docs/en/index.md).
212
235
 
213
236
  ---
214
237
 
@@ -216,7 +239,7 @@ The seam is a tiny `MCPToolSource` Protocol, so the `mcp` SDK is optional and an
216
239
 
217
240
  power-loop is a **kernel**, not a platform — that's the whole trade-off.
218
241
 
219
- - **vs. LangChain / LangGraph / LlamaIndex / CrewAI / AutoGen** — those are batteries-included frameworks with large ecosystems (connectors, vector stores, integrations) and heavy dependency trees. power-loop deliberately ships **none of that**: a compact (~20k-line) pure-stdlib core with zero runtime dependencies, and you bring your own tools (or an MCP server). You get durable sessions across SQLite/PG/MySQL, crash-resumable workflows, and real sandbox seams out of the box; you do **not** get a bundled RAG stack or 100 connectors.
242
+ - **vs. LangChain / LangGraph / LlamaIndex / CrewAI / AutoGen** — those are batteries-included frameworks with large ecosystems (connectors, vector stores, integrations) and heavy dependency trees. power-loop deliberately ships **none of that**: a compact (~24k-line) pure-stdlib core with zero runtime dependencies, and you bring your own tools (or an MCP server). You get durable sessions across SQLite/PG/MySQL, crash-resumable workflows, and real sandbox seams out of the box; you do **not** get a bundled RAG stack or 100 connectors.
220
243
  - **Choose power-loop** when you want to *embed* an agent in an existing app, keep your dependency surface tiny, pick your own database, and care about durability + isolation + a stable contract.
221
244
  - **Choose a framework** when you want batteries included, a big integration catalog, and don't mind the weight.
222
245
 
@@ -248,7 +271,7 @@ Python 3.10+. See [Getting Started](docs/en/getting-started.md). Optional extras
248
271
 
249
272
  ## Stability & SemVer
250
273
 
251
- As of **1.0**, the **STABLE** API (listed in `power_loop.STABLE_API`) is under SemVer: a breaking change requires a major bump (`2.0.0`), enforced by a frozen-baseline test in CI — including the flagship `StatefulAgentLoop` *and the LLM contract needed to construct it*. Error `.code` strings are frozen too.
274
+ Since **1.0**, the **STABLE** API (listed in `power_loop.STABLE_API`) is under SemVer: a breaking change requires a major bump, enforced by a frozen-baseline test in CI — including the flagship `StatefulAgentLoop` *and the LLM contract needed to construct it*. Error `.code` strings are frozen too. The two majors since (2.0 pluggable async storage, 3.0 orthogonal context axes) were exactly that policy in action — breaking changes earned a major bump, each documented in the [Changelog](CHANGELOG.md).
252
275
 
253
276
  | Tier | Meaning |
254
277
  |---|---|
@@ -15,7 +15,7 @@ Stability tiers
15
15
  无版本承诺,可随时变更或删除。
16
16
  """
17
17
 
18
- __version__ = "3.0.0"
18
+ __version__ = "3.0.2"
19
19
 
20
20
  # Public LLM contract (SDK-free) re-exported so callers (e.g. writing llm.* hooks or
21
21
  # a custom LLMService) don't reach into the internal vendored transport package (H3.4).
@@ -774,6 +774,11 @@ class StatefulAgentLoop:
774
774
  sink = SQLiteSink(store, session_id)
775
775
  sink._unresolved = {str(tc.get("id") or "") for tc in tool_calls}
776
776
  sink._assistant_seq = pending.get("assistant_seq")
777
+ # Prime _tool_calls so a crash mid-abort (after some but not all <aborted> rows land)
778
+ # persists a CONSISTENT intermediate pending — on_message_appended rebuilds the still-pending
779
+ # tool_calls from self._tool_calls (sink.py:171-174); left empty it would write
780
+ # {tool_call_ids:[…], tool_calls:[]}, a self-inconsistent pending.
781
+ sink._tool_calls = list(tool_calls)
777
782
  for tc in tool_calls:
778
783
  cid = str(tc.get("id") or "")
779
784
  name = _tool_call_name(tc) if "function" in tc or "name" in tc else None
@@ -1056,7 +1061,13 @@ class StatefulAgentLoop:
1056
1061
  if pending.get("pending_interactions"):
1057
1062
  return
1058
1063
  round_index = int(pending.get("round_index") or 0)
1059
- tool_calls = pending.get("tool_calls") or []
1064
+ # Fall back to tool_call_ids (as abort_pending / _prime_sink_from_pending do): a pending that
1065
+ # carries only ids (e.g. a crash mid-abort left {tool_call_ids:[…], tool_calls:[]}) must
1066
+ # still be resolved here, else resume() returns "completed" while the pending stays set and
1067
+ # the session is permanently stranded.
1068
+ tool_calls = pending.get("tool_calls") or [
1069
+ {"id": cid} for cid in (pending.get("tool_call_ids") or [])
1070
+ ]
1060
1071
  if not tool_calls:
1061
1072
  return
1062
1073
  # Initialize sink's in-memory unresolved set so auto-resolve works.
@@ -1064,6 +1075,19 @@ class StatefulAgentLoop:
1064
1075
  for tc in tool_calls:
1065
1076
  cid = str(tc.get("id") or "")
1066
1077
  name = _tool_call_name(tc)
1078
+ if name is None:
1079
+ # Reconstructed from ids only — no name/args to replay. Resolve the protocol with an
1080
+ # aborted marker (clears unresolved → pending cleared) instead of stranding.
1081
+ await sink.on_message_appended(
1082
+ {
1083
+ "role": "tool",
1084
+ "tool_call_id": cid,
1085
+ "name": None,
1086
+ "content": "<aborted: unrecoverable tool_call on resume>",
1087
+ },
1088
+ round_index=round_index,
1089
+ )
1090
+ continue
1067
1091
  args = _tool_call_args(tc)
1068
1092
  if self.tool_registry is None:
1069
1093
  output, failed = (
@@ -1511,19 +1535,26 @@ class StatefulAgentLoop:
1511
1535
  llm=self.llm, max_tokens=self.config.max_tokens,
1512
1536
  ),
1513
1537
  )
1538
+ fold_as_project: list[int] = []
1514
1539
  if folded is not None:
1515
1540
  from_send = 0 if note is not None else (min(fold) if fold else 0)
1516
1541
  compact_tuple = (folded.content, folded.rendered_text, from_send, folded.folded_to_send)
1517
1542
  migration_note_ops = list(folded.note_ops)
1518
- elif note is not None:
1519
- # Only the note, nothing foldable beyond keep preserve the note as a standalone
1520
- # compact sitting just before the kept tail (or the current send).
1521
- to_send = (min(recent) - 1) if recent else (current_send_index - 1)
1522
- compact_tuple = ({"summary": note.content or ""}, None, 0, max(0, to_send))
1543
+ else:
1544
+ # The fold soft-failed (LLM error/timeout/empty) OR nothing was foldable. Do NOT write a
1545
+ # compact that claims to COVER sends it never merged — the reader uses compact_to_send as
1546
+ # the exclusion cutoff, so an over-claiming range silently drops real history (B4), and a
1547
+ # marker-set no-op drops compression forever (B13). Instead preserve everything: keep the
1548
+ # note as a standalone compact that covers NO real send (to_send=0), and write the
1549
+ # would-be-folded sends as individual project rows. A later end-of-send fold compresses
1550
+ # them (rolling this note compact forward) once over budget.
1551
+ if note is not None:
1552
+ compact_tuple = ({"summary": note.content or ""}, None, 0, 0)
1553
+ fold_as_project = fold
1523
1554
 
1524
1555
  project_rows = [
1525
1556
  (si, pr.kind, pr.content, pr.rendered_text)
1526
- for si in recent
1557
+ for si in (fold_as_project + recent)
1527
1558
  for pr in projected[si].rows
1528
1559
  ]
1529
1560
  # Mark migrated in the SAME transaction as the rows (atomic): a crash can't leave the
@@ -35,7 +35,11 @@ def _fold_from_legacy_projector(proj: Any) -> FoldStrategy:
35
35
  admin-configured projection settings). Without this the mapped fold would silently use
36
36
  ``LLMSummaryFold`` defaults (4 / 0.75) and ignore the operator's config."""
37
37
  from power_loop.runtime.fold import LLMSummaryFold
38
- keep = max(1, int(getattr(proj, "keep_last_sends", 4) or 4))
38
+ # Only a MISSING/None keep falls back to 4; an explicit 0 means "keep ~none" (fold aggressively)
39
+ # → clamp to the validator's floor of 1, NOT silently to 4 (B10). (A verbatim keep==0 projector is
40
+ # routed to never-fold in _map_legacy_axes and never reaches here.)
41
+ keep_raw = getattr(proj, "keep_last_sends", None)
42
+ keep = 4 if keep_raw is None else max(1, int(keep_raw))
39
43
  trigger = float(getattr(proj, "trigger_ratio", 0.75) or 0.75)
40
44
  return LLMSummaryFold(keep_last_sends=keep, trigger_ratio=trigger)
41
45
 
@@ -172,6 +176,17 @@ class AgentLoopConfig:
172
176
  # stray legacy compactor= must NOT silently disable it.
173
177
  if legacy_comp is not _UNSET and legacy_proj in (_UNSET, None) and fold_was_unset:
174
178
  object.__setattr__(self, "_legacy_verbatim_compactor", legacy_comp)
179
+ elif (
180
+ legacy_proj not in (_UNSET, None)
181
+ and fold_was_unset
182
+ and getattr(legacy_proj, "kind", None) == "verbatim"
183
+ and getattr(legacy_proj, "keep_last_sends", 1) == 0 # exact 0 (NOT `or 1`, which 0 defeats)
184
+ ):
185
+ # A legacy NEVER-FOLD projector (IdentityProjector: kind='verbatim', keep_last_sends==0)
186
+ # maps to never-fold (compactor=None) — NOT a folding fold_strategy. Else it would fold
187
+ # (the seeder coerces keep 0→positive) and, on the old projection path, drop the compact
188
+ # (B7 data loss). Routes via resolve_compactor's verbatim branch (kind=='verbatim').
189
+ object.__setattr__(self, "_legacy_verbatim_compactor", None)
175
190
  else:
176
191
  object.__setattr__(self, "_legacy_verbatim_compactor", _UNSET)
177
192
  if self.migrate_history_on_projection_switch is not _UNSET:
@@ -173,6 +173,7 @@ class IdentityProjector:
173
173
  with this projector sees byte-identical history to the no-projector default. Useful to
174
174
  prove the projection seam itself introduces no behavior change."""
175
175
 
176
+ kind: str = "verbatim" # routes to the safe in-place (verbatim) path, never the projection fold
176
177
  version: int = 1
177
178
  keep_last_sends: int = 0 # verbatim mode never folds
178
179
  trigger_ratio: float = 0.75 # unused (keep_last_sends==0 short-circuits folding); for Protocol parity
@@ -197,6 +198,14 @@ class IdentityProjector:
197
198
  def render(self, rows: list[ProjectMessageRow]) -> list[LoopMessage]:
198
199
  out: list[LoopMessage] = []
199
200
  for r in rows:
201
+ # Defensive: even though this projector never folds, a compact row could reach render
202
+ # via a mode switch / legacy mapping — render its summary instead of silently dropping it
203
+ # (the 3.0 invariant: every representation's render MUST handle kind=='compact').
204
+ if getattr(r, "kind", None) == "compact":
205
+ summary = (r.content or {}).get("summary")
206
+ if summary:
207
+ out.append({"role": "user", "content": str(summary)})
208
+ continue
200
209
  out.extend((r.content or {}).get("messages") or [])
201
210
  return out
202
211
 
@@ -260,15 +260,32 @@ class ProjectedRepresentation:
260
260
 
261
261
  # rendering ----------------------------------------------------------------
262
262
  def render(self, rows: list[ProjectMessageRow]) -> list[LoopMessage]:
263
+ # Each rendered send is tagged with its ``#N`` send_index so the model can call
264
+ # recall_send(send_index=N) on a folded/compacted earlier turn — the tool docstring and the
265
+ # host's RECALL_SEND_NOTE both tell it to use "the #N the summary shows", so render MUST
266
+ # actually emit them (else recall_send is undiscoverable). The folded compact carries its
267
+ # covered range.
263
268
  out: list[LoopMessage] = []
264
269
  for r in rows:
270
+ si = r.send_index
265
271
  if r.kind == "user":
266
272
  humans = (r.content or {}).get("human") or []
267
- out.append({"role": "user", "content": "\n".join(str(h) for h in humans)})
273
+ tag = f"[#{si}] " if si is not None else ""
274
+ out.append({"role": "user", "content": tag + "\n".join(str(h) for h in humans)})
268
275
  elif r.kind == "project":
269
- out.append({"role": "assistant", "content": self._render_project(r.content)})
276
+ tag = f"#{si} " if si is not None else ""
277
+ out.append({"role": "assistant", "content": tag + self._render_project(r.content)})
270
278
  elif r.kind == "compact":
271
- out.append(_render_compact_row(r))
279
+ msg = _render_compact_row(r)
280
+ lo, hi = r.compact_from_send, r.compact_to_send
281
+ if lo is not None and hi is not None and hi >= lo > 0:
282
+ rng = f"#{lo}" if lo == hi else f"#{lo}–#{hi}"
283
+ msg = {
284
+ "role": "user",
285
+ "content": f"[older sends {rng} folded — recall_send(send_index=N) to "
286
+ f"expand]\n{msg['content']}",
287
+ }
288
+ out.append(msg)
272
289
  return out
273
290
 
274
291
  def _render_tool(self, t: dict[str, Any]) -> str:
@@ -356,7 +356,7 @@ class MySQLDialect:
356
356
  status VARCHAR(32) NOT NULL, return_code BIGINT, output_tail TEXT, output_path TEXT,
357
357
  last_seen_at BIGINT NOT NULL DEFAULT 0, created_at BIGINT NOT NULL,
358
358
  updated_at BIGINT NOT NULL, PRIMARY KEY (session_id, task_id),
359
- KEY {p}idx_bgtasks_session_status (session_id, status, updated_at)) {opts}""",
359
+ KEY {p}idx_background_tasks_session_status (session_id, status, updated_at)) {opts}""",
360
360
  f"""CREATE TABLE IF NOT EXISTS {p}session_stats (
361
361
  session_id VARCHAR(255) NOT NULL, sends BIGINT NOT NULL DEFAULT 0,
362
362
  rounds BIGINT NOT NULL DEFAULT 0, llm_calls BIGINT NOT NULL DEFAULT 0,
@@ -243,7 +243,16 @@ async def _column_exists(tx: Transaction, dialect_name: str, table: str, column:
243
243
  rows = await tx.fetchall(f"PRAGMA table_info({table})")
244
244
  return any(r["name"] == column for r in rows)
245
245
  if dialect_name in ("postgres", "mysql"):
246
- scope = "AND table_schema=DATABASE() " if dialect_name == "mysql" else ""
246
+ # Scope to the CURRENT schema otherwise a same-named table in ANOTHER schema (PG
247
+ # search_path / multi-schema deployments) makes the probe return True for a column the
248
+ # current-schema table lacks, so the ALTER … ADD COLUMN is skipped but the version is still
249
+ # stamped → every subsequent append referencing that column crashes. Mirrors _table_exists
250
+ # (PG to_regclass honors search_path; MySQL DATABASE()).
251
+ scope = (
252
+ "AND table_schema=current_schema() "
253
+ if dialect_name == "postgres"
254
+ else "AND table_schema=DATABASE() "
255
+ )
247
256
  row = await tx.fetchone(
248
257
  "SELECT 1 AS present FROM information_schema.columns "
249
258
  f"WHERE table_name=? {scope}AND column_name=?",
@@ -310,7 +310,14 @@ def _validate_bash_command_scope(command: str) -> str | None:
310
310
  "Error: Reading agent-home internals is blocked outside allowlisted paths (.cache/logs/skills). "
311
311
  "Use load_skill(name) for skill content instead of direct file reads."
312
312
  )
313
- return None
313
+ # DEFAULT-DENY: the command references POWER_LOOP_HOME, is NOT under an allowlisted path, and
314
+ # matched none of the verb hints above — but the hint lists are not exhaustive (awk / base64 / od
315
+ # / python -c / dd of= / truncate / ln -s all reach agent-home undetected). Refuse rather than
316
+ # fall through to "allow", since the resolved target is provably an un-allowlisted home path.
317
+ return (
318
+ "Error: Accessing POWER_LOOP_HOME is blocked outside allowlisted paths (.cache/logs/skills). "
319
+ "Use workspace files or allowlisted agent paths only."
320
+ )
314
321
 
315
322
 
316
323
  def _dangerous_command_reason(command: str) -> str | None:
@@ -249,18 +249,33 @@ def make_wake_guard(store: Any):
249
249
  once (timers are at-least-once). Ignores non-workflow timers. Async because the
250
250
  store is async; ``run_typed_async`` awaits it."""
251
251
 
252
+ from power_loop.runtime.store.store import MUTATE_SKIP
253
+
252
254
  async def guard(ctx: TimerFireCtx) -> None:
253
255
  run_id = _parse_run_id(ctx.note)
254
256
  if run_id is None:
255
257
  return # not a workflow timer → CONTINUE
256
- j = await store.get_runtime_state(ctx.session_id, journal.run_key(run_id), default=None)
257
- if j is None:
258
- return
259
- if j.get("woke"):
260
- ctx.directive = HookDirective.SKIP # already delivered once
261
- return
262
- j["woke"] = True
263
- await store.set_runtime_state(ctx.session_id, journal.run_key(run_id), j)
258
+ # Claim the wake ATOMICALLY: a bare get→set RMW races a concurrent journal write
259
+ # (journal.update / record_step funnel through mutate_runtime_state on the SAME run key) —
260
+ # the guard's set would clobber that write, and two concurrent fires could both observe
261
+ # woke=False → double-wake. mutate_runtime_state is row-locked, so the claim is exclusive.
262
+ seen = {"woke": False}
263
+
264
+ def _claim(cur: Any) -> Any:
265
+ if cur is None:
266
+ return MUTATE_SKIP # no journal → CONTINUE (nothing to dedupe)
267
+ if cur.get("woke"):
268
+ seen["woke"] = True
269
+ return MUTATE_SKIP # already delivered once
270
+ return {**cur, "woke": True} # first delivery — set woke, preserve every other key
271
+
272
+ try:
273
+ await store.mutate_runtime_state(ctx.session_id, journal.run_key(run_id), _claim, default=None)
274
+ except ValueError:
275
+ return # session/state row gone (a stale timer firing on a deleted session) → CONTINUE,
276
+ # matching the old get_runtime_state(default=None) tolerance; nothing to dedupe.
277
+ if seen["woke"]:
278
+ ctx.directive = HookDirective.SKIP
264
279
 
265
280
  return guard
266
281
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: power-loop
3
- Version: 3.0.0
3
+ Version: 3.0.2
4
4
  Summary: Embeddable agent execution kernel — LLM loop, hooks, events, tools, dynamic sub-agents.
5
5
  Author-email: zhangran <zhangran24@126.com>
6
6
  License: MIT
@@ -106,7 +106,7 @@ print((await loop.send("And my second-favorite?", session_id=sid)).final_text)
106
106
  pip install 'power-loop[openai]' # or [anthropic] · add [postgres] / [mysql] for those backends
107
107
  ```
108
108
 
109
- > **1.0 stable.** The public API is frozen under SemVer (a breaking change requires a major bump), machine-enforced by a baseline guard in CI. The **core has zero runtime dependencies** (pure stdlib; verified by a CI job that imports it with nothing else installed) — LLM transports *and database drivers* are optional extras. See [Stability](#stability--semver) and the [honest caveats](#honest-scope) — a young, single-maintainer project says so plainly.
109
+ > **Stable since 1.0; now 3.x.** The public API is frozen under SemVer and machine-enforced by a baseline guard in CI — and the two major bumps since prove the discipline rather than undercut it: **2.0** moved storage to a pluggable async backend, **3.0** made context handling two orthogonal axes. Both were real breaking changes, so both got a major bump. The **core has zero runtime dependencies** (pure stdlib; verified by a CI job that imports it with nothing else installed) — LLM transports *and database drivers* are optional extras. Backed by **900+ unit tests**, a **live-LLM** suite, and a **3-backend conformance suite** (SQLite/PostgreSQL/MySQL). See [Stability](#stability--semver) and the [honest caveats](#honest-scope) — a young, single-maintainer project says so plainly.
110
110
 
111
111
  ---
112
112
 
@@ -116,7 +116,7 @@ pip install 'power-loop[openai]' # or [anthropic] · add [postgres] / [mysq
116
116
  |---|---|
117
117
  | 🚀 **New** — show me the 5-minute version | [Getting Started](docs/en/getting-started.md) |
118
118
  | 🛠️ **Learning by building** | [Tutorials](docs/en/tutorials/index.md) — chatbot · tools · human-in-the-loop · multi-agent |
119
- | 🧩 **Browsing runnable code** | [40 examples](examples/README.md) — `00_hello_world.py` → full chatbot |
119
+ | 🧩 **Browsing runnable code** | [43 examples](examples/README.md) — `00_hello_world.py` → full chatbot |
120
120
  | 📚 **Looking something up** | [User Guide](docs/en/user-guide/index.md) · [API reference](docs/en/api/index.md) |
121
121
  | 🤔 **Deciding if it fits** | [How it compares](#how-it-compares) · [Honest scope](#honest-scope) |
122
122
 
@@ -128,10 +128,11 @@ pip install 'power-loop[openai]' # or [anthropic] · add [postgres] / [mysq
128
128
 
129
129
  Most "agent frameworks" ask you to build your app *inside* them. power-loop is the opposite: a **library you embed**. You keep your HTTP layer, your auth, your queues, your RAG, your UI, your deploy. It runs the agent loop — and lets you *engineer* it.
130
130
 
131
- - 🪶 **Featherweight & zero-dependency.** No `pydantic`, no LangChain, no graph DSL. A compact, pure-stdlib core (~20k lines) whose public surface is essentially one class — and **zero runtime dependencies**. LLM transports *and* the Postgres/MySQL drivers are pulled in only by the extra you install.
131
+ - 🪶 **Featherweight & zero-dependency.** No `pydantic`, no LangChain, no graph DSL. A compact, pure-stdlib core (~24k lines) whose public surface is essentially one class — and **zero runtime dependencies**. LLM transports *and* the Postgres/MySQL drivers are pulled in only by the extra you install.
132
132
  - 🗄️ **Pluggable storage, zero-infra default.** Sessions, timers, sub-agent trees, workflow journals, the shared blackboard — one backend-neutral store written once against a tiny `Database`/`Dialect` port. The default is **one SQLite file** (copy the file, you've copied the state); point a DSN at **PostgreSQL or MySQL** when you want a real multi-writer server — same code, same conformance suite. Tables are auto-created, or **provisioned out-of-band** with a printed DDL script (see [Storage backends](docs/en/user-guide/storage-backends.md)).
133
133
  - ♻️ **Stateless, resumable loops.** A `StatefulAgentLoop` carries no authoritative state — all of it lives in the store. So a loop is cheap to create and trivially **restored from a DSN + a session id** (ideal for web handlers, workers, cold starts). It self-caches each session's active window (a rebuildable accelerator that never changes what the model sees) to skip re-reads on hot paths.
134
134
  - ⏱️ **Durable by default.** Crash mid-run and `resume()`. Agents schedule their own **durable timers** that survive restarts. Workflows **replay finished steps and re-run only the unfinished tail** after a process death. The store survives version upgrades (a portable, backend-neutral migration-version table) and can be **pruned, VACUUMed, and exported**.
135
+ - 🧠 **Context engineering, not one fixed strategy.** How each finished send is *recorded/rendered* (**representation**: full **verbatim** or a terse per-send **projection**) and how older history is *compacted* once over budget (**fold strategy**: a single **LLM summary**, or an **agentic** pass that also writes durable notes) are two **orthogonal, config-driven axes** — any representation composes with any fold strategy, and both take your own `Representation` / `FoldStrategy` implementation. Folds always keep whole sends (never split a tool-call/result pair); `recall_send` / `recall_compacted` pull the original detail back from the immutable audit log.
135
136
  - 🧩 **Composable from one loop to a fleet.** Start with `send()`. Add tools. Spawn sub-agents. Fan out a deterministic **workflow** (`sequence`/`parallel`/`foreach`/`branch`). Run each leaf in its **own process and DB** behind a sandbox. Same primitives all the way up.
136
137
  - 🛡️ **Isolation seams where it counts.** Tool-level sandboxing via a `ShellBackend` (drop in gVisor/Docker for `bash`); process-level via a `WorkerLauncher` (wrap a whole sub-agent worker per leaf). power-loop stays sandbox-agnostic; you choose the policy.
137
138
  - 🔬 **Built to be observed.** Typed events for every stream chunk, tool call, round, and **individual LLM call** — each `seq`-ordered + monotonic-clock stamped. Pluggable sinks behind extras: durable **JSONL** (with `replay`), **Prometheus/StatsD** metrics, an **OpenTelemetry** span tree. Per-run + per-session token accounting and hard per-run budgets.
@@ -153,8 +154,8 @@ Most "agent frameworks" ask you to build your app *inside* them. power-loop is t
153
154
  | **Workflow resume** | Journals each step; after a crash, replays completed steps and re-runs only the tail | [Workflows](docs/en/user-guide/workflows.md) |
154
155
  | **Process sandboxing** | Each workflow leaf in its own OS process + own DB; wrap each in gVisor/Docker per leaf | [Sandboxing](docs/en/user-guide/sandboxing.md) |
155
156
  | **Durable timers** | Agents schedule their own wake-ups; survive restarts; one-shot or recurring | [Timers](docs/en/user-guide/timers.md) |
156
- | **Context compaction** | Auto-summarize old turns (never splits a tool-call pair); `recall_compacted` to pull originals back | [Compaction](docs/en/user-guide/compaction.md) |
157
- | **Send-context projection** | Opt-in: feed a per-send plain-text projection of finished sends (derived `pl_project_messages`) instead of verbatim history; `pl_messages` stays immutable; `recall_send` to re-expand | [Projection](docs/en/user-guide/send-context-projection.md) |
157
+ | **Context — representation** | Record/render each finished send **verbatim** or as a terse per-send **projection** (derived `pl_project_messages`); `pl_messages` stays immutable; `recall_send` re-expands | [Projection](docs/en/user-guide/send-context-projection.md) |
158
+ | **Context — fold strategy** | Compact older history once over budget: **LLM summary** or **agentic** (also writes notes); pluggable `FoldStrategy`; never splits a tool pair; `recall_compacted` re-expands | [Compaction](docs/en/user-guide/compaction.md) |
158
159
  | **Durability ops** | Portable migration-version table, retention/prune, VACUUM, `export_session`/`import_session`, graceful `aclose()` | [Sessions](docs/en/user-guide/sessions.md) |
159
160
  | **Observability** | Typed `seq`-ordered events → durable JSONL + `replay`, Prometheus/StatsD metrics, OpenTelemetry spans | [Observability](docs/en/user-guide/observability.md) |
160
161
  | **MCP tools** | Surface a Model Context Protocol server's tools as power-loop tools | [Extending](docs/en/user-guide/extending-tools.md) |
@@ -200,6 +201,28 @@ result = await loop.send(user_text, session_id=session_id)
200
201
 
201
202
  Under the hood the loop keeps a per-session **active-window cache** — but it caches only the *durable* projection, validated by a monotonic `next_seq` token, so it's a pure accelerator: a cold loop with an empty cache produces byte-for-byte the same prompts (proven by a warm-vs-cold conformance test, including the recall/compaction/prompt-edit edge cases).
202
203
 
204
+ ### Context engineering — two orthogonal axes you choose (and can implement yourself)
205
+
206
+ Long conversations outgrow the window. Most libraries give you *one* fixed compaction behavior; power-loop (3.0) splits it into two independent, config-driven axes:
207
+
208
+ - **Representation** — how each *finished send* is recorded & rendered: `VerbatimRepresentation` (full, byte-identical history) or `ProjectedRepresentation` (a terse per-send plain-text projection). The original detail always stays in the immutable `pl_messages` audit log.
209
+ - **Fold strategy** — how *older* history is compacted once the rendered prefix crosses the budget: `LLMSummaryFold` (one summary call) or `AgenticFold` (a bounded tool loop that also persists durable facts as notes).
210
+
211
+ ```python
212
+ from power_loop import (
213
+ StatefulAgentLoop, AgentLoopConfig,
214
+ ProjectedRepresentation, AgenticFold, # mix & match either axis — or pass your own impl
215
+ )
216
+
217
+ cfg = AgentLoopConfig(
218
+ representation=ProjectedRepresentation(max_chars=300), # terse projection (or VerbatimRepresentation)
219
+ fold_strategy=AgenticFold(keep_last_sends=4), # summarize older sends + write notes
220
+ )
221
+ loop = StatefulAgentLoop(llm=llm, dsn="app.db", config=cfg)
222
+ ```
223
+
224
+ Any representation composes with any fold strategy, and each axis is a small `Protocol` you can implement yourself. A fold always keeps **whole sends** (it never splits an atomic tool-call/result pair), and the model can call `recall_send(send_index=N)` / `recall_compacted()` to pull the full original detail back from the audit log. (The two classes above are public but **provisional** — added in 3.0, not yet frozen into `STABLE_API`; `AgentLoopConfig` itself is Stable.)
225
+
203
226
  ### Deterministic multi-agent workflows — that the model can author, and that survive a crash
204
227
 
205
228
  Sub-agent delegation is *model-driven* ("go do this"). When you want **code-driven, deterministic** orchestration — fan out over a list, branch on a result, run a pipeline — describe it as a `WorkflowSpec` and let the engine interpret it. The only LLM calls are the leaves; `sequence`/`parallel`/`foreach`/`branch` are plain code.
@@ -281,7 +304,7 @@ await register_mcp_tools(registry, client, prefix="fs.") # MCP tools → power
281
304
 
282
305
  The seam is a tiny `MCPToolSource` Protocol, so the `mcp` SDK is optional and any client works.
283
306
 
284
- > More: hard token budgets, structured output, crash recovery, memory, the blackboard — see [`examples/`](examples/README.md) (40 runnable programs) and the [docs](docs/en/index.md).
307
+ > More: hard token budgets, structured output, crash recovery, memory, the blackboard — see [`examples/`](examples/README.md) (43 runnable programs) and the [docs](docs/en/index.md).
285
308
 
286
309
  ---
287
310
 
@@ -289,7 +312,7 @@ The seam is a tiny `MCPToolSource` Protocol, so the `mcp` SDK is optional and an
289
312
 
290
313
  power-loop is a **kernel**, not a platform — that's the whole trade-off.
291
314
 
292
- - **vs. LangChain / LangGraph / LlamaIndex / CrewAI / AutoGen** — those are batteries-included frameworks with large ecosystems (connectors, vector stores, integrations) and heavy dependency trees. power-loop deliberately ships **none of that**: a compact (~20k-line) pure-stdlib core with zero runtime dependencies, and you bring your own tools (or an MCP server). You get durable sessions across SQLite/PG/MySQL, crash-resumable workflows, and real sandbox seams out of the box; you do **not** get a bundled RAG stack or 100 connectors.
315
+ - **vs. LangChain / LangGraph / LlamaIndex / CrewAI / AutoGen** — those are batteries-included frameworks with large ecosystems (connectors, vector stores, integrations) and heavy dependency trees. power-loop deliberately ships **none of that**: a compact (~24k-line) pure-stdlib core with zero runtime dependencies, and you bring your own tools (or an MCP server). You get durable sessions across SQLite/PG/MySQL, crash-resumable workflows, and real sandbox seams out of the box; you do **not** get a bundled RAG stack or 100 connectors.
293
316
  - **Choose power-loop** when you want to *embed* an agent in an existing app, keep your dependency surface tiny, pick your own database, and care about durability + isolation + a stable contract.
294
317
  - **Choose a framework** when you want batteries included, a big integration catalog, and don't mind the weight.
295
318
 
@@ -321,7 +344,7 @@ Python 3.10+. See [Getting Started](docs/en/getting-started.md). Optional extras
321
344
 
322
345
  ## Stability & SemVer
323
346
 
324
- As of **1.0**, the **STABLE** API (listed in `power_loop.STABLE_API`) is under SemVer: a breaking change requires a major bump (`2.0.0`), enforced by a frozen-baseline test in CI — including the flagship `StatefulAgentLoop` *and the LLM contract needed to construct it*. Error `.code` strings are frozen too.
347
+ Since **1.0**, the **STABLE** API (listed in `power_loop.STABLE_API`) is under SemVer: a breaking change requires a major bump, enforced by a frozen-baseline test in CI — including the flagship `StatefulAgentLoop` *and the LLM contract needed to construct it*. Error `.code` strings are frozen too. The two majors since (2.0 pluggable async storage, 3.0 orthogonal context axes) were exactly that policy in action — breaking changes earned a major bump, each documented in the [Changelog](CHANGELOG.md).
325
348
 
326
349
  | Tier | Meaning |
327
350
  |---|---|
File without changes
File without changes
File without changes