power-loop 2.0.0__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. {power_loop-2.0.0 → power_loop-2.1.0}/PKG-INFO +1 -1
  2. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/__init__.py +1 -1
  3. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/agent/stateful_loop.py +207 -53
  4. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/store/backends/mysql.py +8 -5
  5. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/store/backends/sqlite.py +54 -8
  6. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/store/dialect.py +4 -2
  7. power_loop-2.1.0/power_loop/runtime/store/factory.py +103 -0
  8. power_loop-2.1.0/power_loop/runtime/store/schema.py +335 -0
  9. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/store/store.py +121 -24
  10. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/tools/default_tools.py +102 -18
  11. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/journal.py +46 -33
  12. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/resume.py +1 -1
  13. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/runner.py +6 -2
  14. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop.egg-info/PKG-INFO +1 -1
  15. power_loop-2.0.0/power_loop/runtime/store/factory.py +0 -59
  16. power_loop-2.0.0/power_loop/runtime/store/schema.py +0 -169
  17. {power_loop-2.0.0 → power_loop-2.1.0}/LICENSE +0 -0
  18. {power_loop-2.0.0 → power_loop-2.1.0}/README.md +0 -0
  19. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/_vendor/__init__.py +0 -0
  20. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/_vendor/llm_client/__init__.py +0 -0
  21. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/_vendor/llm_client/anthropic_factory.py +0 -0
  22. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/_vendor/llm_client/capabilities.py +0 -0
  23. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/_vendor/llm_client/interface.py +0 -0
  24. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/_vendor/llm_client/llm_factory.py +0 -0
  25. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/_vendor/llm_client/llm_tooling.py +0 -0
  26. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/_vendor/llm_client/llm_utils.py +0 -0
  27. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/_vendor/llm_client/multimodal.py +0 -0
  28. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/agent/__init__.py +0 -0
  29. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/agent/follow_up.py +0 -0
  30. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/agent/sink.py +0 -0
  31. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/agent/system_prompt.py +0 -0
  32. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/agent/types.py +0 -0
  33. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contracts/__init__.py +0 -0
  34. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contracts/errors.py +0 -0
  35. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contracts/event_payloads.py +0 -0
  36. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contracts/events.py +0 -0
  37. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contracts/handlers.py +0 -0
  38. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contracts/hook_contexts.py +0 -0
  39. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contracts/hooks.py +0 -0
  40. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contracts/messages.py +0 -0
  41. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contracts/protocols.py +0 -0
  42. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contracts/tools.py +0 -0
  43. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contrib/__init__.py +0 -0
  44. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contrib/_redact.py +0 -0
  45. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contrib/jsonl_sink.py +0 -0
  46. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contrib/logging_sink.py +0 -0
  47. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contrib/mcp.py +0 -0
  48. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contrib/metrics_sink.py +0 -0
  49. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/contrib/otel_sink.py +0 -0
  50. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/core/agent_context.py +0 -0
  51. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/core/events.py +0 -0
  52. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/core/hooks.py +0 -0
  53. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/core/phase.py +0 -0
  54. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/core/pipeline.py +0 -0
  55. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/core/runner.py +0 -0
  56. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/core/state.py +0 -0
  57. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/py.typed +0 -0
  58. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/blackboard.py +0 -0
  59. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/budget.py +0 -0
  60. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/cancellation.py +0 -0
  61. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/compact.py +0 -0
  62. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/env.py +0 -0
  63. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/exec_backend.py +0 -0
  64. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/human_input.py +0 -0
  65. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/memory.py +0 -0
  66. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/notes.py +0 -0
  67. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/provider.py +0 -0
  68. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/retry.py +0 -0
  69. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/runtime_state.py +0 -0
  70. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/session_store.py +0 -0
  71. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/skills.py +0 -0
  72. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/spec.py +0 -0
  73. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/store/__init__.py +0 -0
  74. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/store/backends/__init__.py +0 -0
  75. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/store/backends/postgres.py +0 -0
  76. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/store/capabilities.py +0 -0
  77. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/store/db.py +0 -0
  78. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/store/types.py +0 -0
  79. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/structured.py +0 -0
  80. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/stub_provider.py +0 -0
  81. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/runtime/timers.py +0 -0
  82. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/tools/__init__.py +0 -0
  83. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/tools/blackboard.py +0 -0
  84. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/tools/default_manifest.py +0 -0
  85. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/tools/registry.py +0 -0
  86. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/tools/spawn_agent.py +0 -0
  87. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/__init__.py +0 -0
  88. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/api.py +0 -0
  89. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/engine.py +0 -0
  90. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/introspect.py +0 -0
  91. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/result.py +0 -0
  92. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/spec.py +0 -0
  93. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/subprocess_executor.py +0 -0
  94. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/tool.py +0 -0
  95. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop/workflow/worker.py +0 -0
  96. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop.egg-info/SOURCES.txt +0 -0
  97. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop.egg-info/dependency_links.txt +0 -0
  98. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop.egg-info/requires.txt +0 -0
  99. {power_loop-2.0.0 → power_loop-2.1.0}/power_loop.egg-info/top_level.txt +0 -0
  100. {power_loop-2.0.0 → power_loop-2.1.0}/pyproject.toml +0 -0
  101. {power_loop-2.0.0 → power_loop-2.1.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: power-loop
3
- Version: 2.0.0
3
+ Version: 2.1.0
4
4
  Summary: Embeddable agent execution kernel — LLM loop, hooks, events, tools, dynamic sub-agents.
5
5
  Author-email: zhangran <zhangran24@126.com>
6
6
  License: MIT
@@ -15,7 +15,7 @@ Stability tiers
15
15
  无版本承诺,可随时变更或删除。
16
16
  """
17
17
 
18
- __version__ = "2.0.0"
18
+ __version__ = "2.1.0"
19
19
 
20
20
  # Public LLM contract (SDK-free) re-exported so callers (e.g. writing llm.* hooks or
21
21
  # a custom LLMService) don't reach into the internal vendored transport package (H3.4).
@@ -21,8 +21,9 @@ from __future__ import annotations
21
21
  import asyncio
22
22
  import json
23
23
  import logging
24
+ import threading
24
25
  from collections import OrderedDict
25
- from collections.abc import Sequence
26
+ from collections.abc import Coroutine, Sequence
26
27
  from dataclasses import dataclass, field, replace
27
28
  from typing import Any
28
29
 
@@ -73,11 +74,16 @@ class _SessionCache:
73
74
  ``rows`` is EXACTLY what ``store.load_active_messages(session_id)`` returns at the moment
74
75
  ``next_seq`` was observed — the DURABLE projection, never the pipeline's mutated working
75
76
  copy (recall placeholders / microcompacted content are re-applied fresh each send, never
76
- cached). ``next_seq`` is the validity token: a send reuses ``rows`` iff the live
77
- ``session_state.next_seq`` still matches; any mismatch (another writer, or a fold) forces a
78
- reload. So a cold loop with an empty cache reproduces identical behavior."""
77
+ cached). The validity token is the PAIR ``(next_seq, last_compact_seq)``: a send reuses
78
+ ``rows`` iff BOTH still match the live ``session_state``. ``next_seq`` alone is insufficient
79
+ a fold (compaction) reshuffles the OLDER active set into ``compacted_out`` while only
80
+ bumping ``next_seq`` by the note, so an out-of-band fold during a send could leave a stale
81
+ delta-extended window whose ``next_seq`` happens to match. ``last_compact_seq`` advances on
82
+ every fold, so pairing it in makes any fold — this loop's or another writer/process's —
83
+ invalidate the window. A cold loop with an empty cache reproduces identical behavior."""
79
84
 
80
85
  next_seq: int
86
+ last_compact_seq: int
81
87
  rows: list[MessageRow]
82
88
 
83
89
 
@@ -99,6 +105,38 @@ class StatefulResult:
99
105
  tool_calls: int = 0
100
106
 
101
107
 
108
+ class _SyncLoopRunner:
109
+ """A persistent event loop on a daemon thread that drives the blocking sync API.
110
+
111
+ ``send_sync`` / ``follow_up_sync`` / ``close`` must NOT spin a fresh ``asyncio.run`` per
112
+ call: an asyncpg/aiomysql connection pool binds to the event loop it was created on, so
113
+ a second ``asyncio.run`` (a new loop) finds the loop's cached store pool bound to the
114
+ now-closed first loop and raises ``InterfaceError`` / ``Event loop is closed``. One
115
+ long-lived loop keeps the pool valid for the whole lifetime of the StatefulAgentLoop —
116
+ matching the legacy synchronous store's "call it as often as you like" contract. (SQLite
117
+ is loop-agnostic but shares this path for uniformity.)
118
+ """
119
+
120
+ def __init__(self) -> None:
121
+ self._loop = asyncio.new_event_loop()
122
+ self._thread = threading.Thread(target=self._serve, name="power-loop-sync", daemon=True)
123
+ self._thread.start()
124
+
125
+ def _serve(self) -> None:
126
+ asyncio.set_event_loop(self._loop)
127
+ self._loop.run_forever()
128
+
129
+ def run(self, coro: Coroutine[Any, Any, Any]) -> Any:
130
+ """Submit a coroutine to the dedicated loop and block until it completes."""
131
+ return asyncio.run_coroutine_threadsafe(coro, self._loop).result()
132
+
133
+ def close(self) -> None:
134
+ self._loop.call_soon_threadsafe(self._loop.stop)
135
+ self._thread.join(timeout=5.0)
136
+ if not self._thread.is_alive():
137
+ self._loop.close()
138
+
139
+
102
140
  class StatefulAgentLoop:
103
141
  """The only public entry point for running an agent loop.
104
142
 
@@ -156,6 +194,13 @@ class StatefulAgentLoop:
156
194
  store.max_spawn_depth = max_spawn_depth
157
195
  self._owns_store = store is None
158
196
  self._store_open_lock = asyncio.Lock()
197
+ # Dedicated event loop (daemon thread) for the blocking sync API; opened lazily so
198
+ # the store pool stays bound to ONE loop across send_sync/follow_up_sync/close calls.
199
+ self._sync_runner: _SyncLoopRunner | None = None
200
+ self._sync_runner_lock = threading.Lock()
201
+ # Strong ref to a best-effort store.close() scheduled when sync close() is called
202
+ # from inside a running loop (keeps the task from being GC'd before it runs).
203
+ self._orphaned_close_task: asyncio.Future[None] | None = None
159
204
  self.config = config if config is not None else AgentLoopConfig()
160
205
  self.tool_registry = tool_registry
161
206
  self._runner = AgentRunner(event_bus=event_bus, hooks=hooks)
@@ -170,6 +215,17 @@ class StatefulAgentLoop:
170
215
  self._cache_misses = 0
171
216
  self._cache_evictions = 0
172
217
 
218
+ async def ensure_store(self) -> SessionStore:
219
+ """Public accessor: return this loop's store, opening an owned one on first use.
220
+
221
+ Construction is sync but the store opens lazily on first async use, so ``loop.store``
222
+ is ``None`` until then. Host integrations that need the store up front — e.g. building
223
+ a :class:`~power_loop.runtime.blackboard.SqliteBlackboard` to share with the loop —
224
+ must ``await loop.ensure_store()`` rather than reading ``loop.store`` directly (which
225
+ would capture ``None``).
226
+ """
227
+ return await self._ensure_store()
228
+
173
229
  async def _ensure_store(self) -> SessionStore:
174
230
  """Return the loop's store, opening an owned one on first use.
175
231
 
@@ -197,22 +253,29 @@ class StatefulAgentLoop:
197
253
 
198
254
  # ── per-session active-window cache helpers ─────────────────────────────
199
255
 
200
- def _cache_get(self, sid: str, next_seq: int) -> list[MessageRow] | None:
201
- """Return the cached active rows iff the validity token still matches; else None."""
256
+ def _cache_get(self, sid: str, next_seq: int, last_compact_seq: int) -> list[MessageRow] | None:
257
+ """Return the cached active rows iff the ``(next_seq, last_compact_seq)`` token still
258
+ matches; else None. The fold counter must match too — a fold reshuffles the older
259
+ active set, so a matching ``next_seq`` alone can still front a stale window."""
202
260
  if self._session_cache_size <= 0:
203
261
  return None
204
262
  entry = self._session_cache.get(sid)
205
- if entry is not None and entry.next_seq == next_seq:
263
+ if entry is not None and entry.next_seq == next_seq \
264
+ and entry.last_compact_seq == last_compact_seq:
206
265
  self._session_cache.move_to_end(sid) # LRU touch
207
266
  self._cache_hits += 1
208
267
  return entry.rows
209
268
  self._cache_misses += 1
210
269
  return None
211
270
 
212
- def _cache_put(self, sid: str, next_seq: int, rows: list[MessageRow]) -> None:
271
+ def _cache_put(
272
+ self, sid: str, next_seq: int, rows: list[MessageRow], last_compact_seq: int
273
+ ) -> None:
213
274
  if self._session_cache_size <= 0:
214
275
  return
215
- self._session_cache[sid] = _SessionCache(next_seq=next_seq, rows=list(rows))
276
+ self._session_cache[sid] = _SessionCache(
277
+ next_seq=next_seq, last_compact_seq=last_compact_seq, rows=list(rows)
278
+ )
216
279
  self._session_cache.move_to_end(sid)
217
280
  while len(self._session_cache) > self._session_cache_size:
218
281
  self._session_cache.popitem(last=False) # evict LRU
@@ -242,6 +305,31 @@ class StatefulAgentLoop:
242
305
  def _cache_invalidate(self, sid: str) -> None:
243
306
  self._session_cache.pop(sid, None)
244
307
 
308
+ async def _refresh_window_cache_after_send(self, sid: str, store: SessionStore) -> None:
309
+ """Fold this send's appended tail into the live window entry — UNLESS a fold
310
+ reshuffled the older active set, in which case drop the entry so the next send
311
+ full-reloads.
312
+
313
+ The fold check compares the durable ``last_compact_seq`` against the entry's, so it
314
+ fires for ANY fold since the entry was built — this send's own compaction OR an
315
+ out-of-band one by another writer/process. A bare ``next_seq`` delta-extend would
316
+ otherwise keep the now-``compacted_out`` rows in the window and, because a fold also
317
+ advances ``next_seq`` (the note), leave the entry's token matching the live state —
318
+ so the next send would HIT a corrupt window mixing folded-out rows with the note."""
319
+ entry = self._session_cache.get(sid)
320
+ if entry is None:
321
+ return
322
+ post_state = await store.get_state(sid)
323
+ if post_state is None:
324
+ return
325
+ if post_state.last_compact_seq != entry.last_compact_seq:
326
+ self._cache_invalidate(sid)
327
+ elif post_state.next_seq != entry.next_seq:
328
+ # Pure append tail (incl. follow-ups drained mid-run): cheap O(delta) extend.
329
+ delta = await store.load_active_messages(sid, after_seq=entry.next_seq)
330
+ entry.rows.extend(delta)
331
+ entry.next_seq = post_state.next_seq
332
+
245
333
  @property
246
334
  def cache_stats(self) -> dict[str, int]:
247
335
  """Observability for the per-session window cache: hits / misses / evictions /
@@ -257,27 +345,52 @@ class StatefulAgentLoop:
257
345
  # ── lifecycle ─────────────────────────────────────────────────────────
258
346
 
259
347
  def close(self) -> None:
260
- """Close the underlying store (if owned). Does NOT delete sessions.
348
+ """Close the underlying store (if owned) and the dedicated sync event loop.
261
349
 
262
350
  Synchronous and abrupt: it does NOT wait for in-flight sends or pending async
263
- event-bus tasks. The store is async, so this can only close cleanly when no
264
- event loop is running (it drives ``store.close()`` via ``asyncio.run``); when
265
- called from inside a running loop it schedules the close and warns. Prefer
266
- :meth:`aclose` (or ``async with loop:``) for graceful shutdown.
351
+ event-bus tasks. Prefer :meth:`aclose` (or ``async with loop:``) for graceful
352
+ shutdown. When the sync API was used, the store/pool live on the dedicated sync
353
+ loop and are torn down on it (a fresh ``asyncio.run`` could not close a pool bound
354
+ to another loop the bug this avoids); otherwise the close is driven via
355
+ ``asyncio.run``. Called from inside a running loop it only schedules + warns.
267
356
  """
268
- if not self._owns_store or self.store is None:
269
- return
270
- store = self.store
271
- try:
272
- asyncio.get_running_loop()
273
- except RuntimeError:
274
- asyncio.run(store.close())
275
- else:
276
- logger.warning(
277
- "StatefulAgentLoop.close() called inside a running event loop; "
278
- "use 'await loop.aclose()' for graceful async shutdown"
279
- )
280
- asyncio.ensure_future(store.close())
357
+ runner = self._sync_runner
358
+ # Let in-flight background tasks finish + persist their terminal status before the
359
+ # store/loop is torn down (a finishing task's write-back targets the runner loop,
360
+ # which is still alive here); then recover any already-deferred ones.
361
+ if self.store is not None and runner is not None:
362
+ from power_loop.tools.default_tools import BG
363
+
364
+ try:
365
+ BG.join_pending(timeout=5.0)
366
+ runner.run(BG.flush_orphaned(self.store))
367
+ except Exception: # pragma: no cover - drain must never block teardown
368
+ logger.warning("close: background-task drain failed; continuing", exc_info=True)
369
+ store = self.store if self._owns_store else None
370
+ if store is not None:
371
+ if runner is not None:
372
+ # Store/pool were opened on the dedicated loop; close them there.
373
+ runner.run(store.close())
374
+ self.store = None
375
+ else:
376
+ try:
377
+ asyncio.get_running_loop()
378
+ except RuntimeError:
379
+ asyncio.run(store.close())
380
+ self.store = None
381
+ else:
382
+ logger.warning(
383
+ "StatefulAgentLoop.close() called inside a running event loop; "
384
+ "use 'await loop.aclose()' for graceful async shutdown"
385
+ )
386
+ # Keep a strong reference: a bare ensure_future() returns a task nothing
387
+ # holds, which the GC can collect mid-flight ('Task was destroyed but it
388
+ # is pending') so store.close() never runs and the connection/pool leaks.
389
+ self._orphaned_close_task = asyncio.ensure_future(store.close())
390
+ self.store = None
391
+ if runner is not None:
392
+ runner.close()
393
+ self._sync_runner = None
281
394
 
282
395
  async def aclose(self, *, drain_timeout_s: float = 30.0) -> None:
283
396
  """Graceful, async shutdown: quiesce, then stop.
@@ -318,7 +431,18 @@ class StatefulAgentLoop:
318
431
  await self.event_bus.drain(timeout=drain_timeout_s)
319
432
  except Exception: # pragma: no cover - drain must never block teardown
320
433
  logger.warning("aclose: event-bus drain raised; continuing", exc_info=True)
321
- # (4) checkpoint + close the owned store (only if it was ever opened).
434
+ # (4) let in-flight background tasks finish so their terminal status write-back
435
+ # lands on the still-open store/loop, then recover any that were already deferred —
436
+ # otherwise closing the store here would strand them at 'running' forever.
437
+ if self.store is not None:
438
+ from power_loop.tools.default_tools import BG
439
+
440
+ try:
441
+ await asyncio.to_thread(BG.join_pending, drain_timeout_s)
442
+ await BG.flush_orphaned(self.store)
443
+ except Exception: # pragma: no cover - drain must never block teardown
444
+ logger.warning("aclose: background-task drain failed; continuing", exc_info=True)
445
+ # (5) checkpoint + close the owned store (only if it was ever opened).
322
446
  if self._owns_store and self.store is not None:
323
447
  try:
324
448
  await self.store.checkpoint(mode="TRUNCATE")
@@ -339,14 +463,16 @@ class StatefulAgentLoop:
339
463
  async def close_session(self, session_id: str, *, cascade: bool = True) -> int:
340
464
  """Physically delete the session and (by default) its LINKED subtree."""
341
465
  store = await self._ensure_store()
342
- n = await store.close_session(session_id, cascade=cascade)
343
- # Drop the per-session in-memory bookkeeping so a long-lived loop that
344
- # cycles through many sessions doesn't leak a Lock per session id (C12).
345
- self._locks.pop(session_id, None)
346
- self._follow_up_queue_locks.pop(session_id, None)
347
- self._follow_up_queues.pop(session_id, None)
348
- self._cache_invalidate(session_id)
349
- return n
466
+ deleted_ids = await store.close_session_tree(session_id, cascade=cascade)
467
+ # Drop the per-session in-memory bookkeeping for EVERY removed session so a long-lived
468
+ # loop that cycles through many sessions doesn't leak a Lock/queue/cache entry per id
469
+ # for the directly-closed session (C12) AND each cascade-deleted descendant (C4).
470
+ for sid in {session_id, *deleted_ids}:
471
+ self._locks.pop(sid, None)
472
+ self._follow_up_queue_locks.pop(sid, None)
473
+ self._follow_up_queues.pop(sid, None)
474
+ self._cache_invalidate(sid)
475
+ return len(deleted_ids)
350
476
 
351
477
  # ── primary API ───────────────────────────────────────────────────────
352
478
 
@@ -379,7 +505,7 @@ class StatefulAgentLoop:
379
505
  state = await store.get_state(session_id)
380
506
  rows = await store.load_active_messages(session_id)
381
507
  if state is not None:
382
- self._cache_put(session_id, state.next_seq, rows)
508
+ self._cache_put(session_id, state.next_seq, rows, state.last_compact_seq)
383
509
  return True
384
510
 
385
511
  async def send(
@@ -462,6 +588,29 @@ class StatefulAgentLoop:
462
588
  user_input, sid, stop_event=stop_event, tools=tools, system_prompt=system_prompt
463
589
  )
464
590
 
591
+ def _run_sync(self, coro: Coroutine[Any, Any, Any]) -> Any:
592
+ """Drive ``coro`` to completion on the loop's dedicated sync event loop.
593
+
594
+ All blocking sync entry points funnel through here so an owned PG/MySQL pool stays
595
+ bound to a single, long-lived loop (see :class:`_SyncLoopRunner`). Raises if called
596
+ from within a running event loop — use the async methods (``await loop.send(...)``)
597
+ in that case.
598
+ """
599
+ try:
600
+ asyncio.get_running_loop()
601
+ except RuntimeError:
602
+ pass
603
+ else:
604
+ coro.close() # avoid "coroutine was never awaited"
605
+ raise RuntimeError(
606
+ "sync API (send_sync/follow_up_sync) called from within a running event "
607
+ "loop; await the async method (loop.send / loop.follow_up) instead"
608
+ )
609
+ with self._sync_runner_lock:
610
+ if self._sync_runner is None:
611
+ self._sync_runner = _SyncLoopRunner()
612
+ return self._sync_runner.run(coro)
613
+
465
614
  def follow_up_sync(
466
615
  self,
467
616
  user_input: str | LoopMessage,
@@ -471,7 +620,7 @@ class StatefulAgentLoop:
471
620
  tools: Sequence[str] | ToolRegistry | None = None,
472
621
  system_prompt: str | None = None,
473
622
  ) -> StatefulResult | FollowUpQueued:
474
- return asyncio.run(
623
+ return self._run_sync(
475
624
  self.follow_up(
476
625
  user_input,
477
626
  session_id,
@@ -481,6 +630,20 @@ class StatefulAgentLoop:
481
630
  )
482
631
  )
483
632
 
633
+ def new_session_sync(
634
+ self,
635
+ *,
636
+ metadata: dict[str, Any] | None = None,
637
+ system_prompt: str | None = None,
638
+ ) -> str:
639
+ """Synchronous :meth:`new_session`. Use this (not ``asyncio.run(loop.new_session())``)
640
+ to bootstrap a session for the sync API: it runs on the loop's dedicated sync event
641
+ loop, so an owned PG/MySQL pool opens on the SAME loop that ``send_sync`` later uses
642
+ (a throwaway ``asyncio.run`` would bind the pool to a loop that is then closed)."""
643
+ return self._run_sync(
644
+ self.new_session(metadata=metadata, system_prompt=system_prompt)
645
+ )
646
+
484
647
  def send_sync(
485
648
  self,
486
649
  user_input: str | LoopMessage,
@@ -491,7 +654,7 @@ class StatefulAgentLoop:
491
654
  system_prompt: str | None = None,
492
655
  heal_pending: bool = False,
493
656
  ) -> StatefulResult:
494
- return asyncio.run(
657
+ return self._run_sync(
495
658
  self.send(
496
659
  user_input,
497
660
  session_id,
@@ -944,16 +1107,16 @@ class StatefulAgentLoop:
944
1107
  # already advanced by the just-persisted user input), reuse it and skip the O(active-
945
1108
  # history) load entirely; otherwise load in full and (re)populate the cache.
946
1109
  active_rows: list[MessageRow] | None = None
947
- cache_token: int | None = None
1110
+ cache_token: tuple[int, int] | None = None
948
1111
  if cache_eligible:
949
1112
  state = await store.get_state(sid)
950
1113
  if state is not None:
951
- cache_token = state.next_seq
952
- active_rows = self._cache_get(sid, state.next_seq)
1114
+ cache_token = (state.next_seq, state.last_compact_seq)
1115
+ active_rows = self._cache_get(sid, state.next_seq, state.last_compact_seq)
953
1116
  if active_rows is None:
954
1117
  active_rows = await store.load_active_messages(sid)
955
1118
  if cache_eligible and cache_token is not None:
956
- self._cache_put(sid, cache_token, active_rows)
1119
+ self._cache_put(sid, cache_token[0], active_rows, cache_token[1])
957
1120
  history = [_row_to_loop_message(r) for r in active_rows]
958
1121
  # Mirror loaded seqs into the sink so the compactor can translate
959
1122
  # in-memory indices back to store rows when it folds messages. Pass the
@@ -1014,16 +1177,7 @@ class StatefulAgentLoop:
1014
1177
  # extend with the active tail this send appended (a cheap O(delta) read, incl. any
1015
1178
  # follow-up rows drained mid-run) so back-to-back sends stay on the fast path. ──
1016
1179
  if cache_eligible:
1017
- if sink.compactions_applied > 0:
1018
- self._cache_invalidate(sid)
1019
- else:
1020
- entry = self._session_cache.get(sid)
1021
- post_state = await store.get_state(sid)
1022
- if entry is not None and post_state is not None:
1023
- if post_state.next_seq != entry.next_seq:
1024
- delta = await store.load_active_messages(sid, after_seq=entry.next_seq)
1025
- entry.rows.extend(delta)
1026
- entry.next_seq = post_state.next_seq
1180
+ await self._refresh_window_cache_after_send(sid, store)
1027
1181
  else:
1028
1182
  # A pre-primed sink (resume()/submit_input()) durably appended rows out-of-band of
1029
1183
  # the cache; drop any live entry now so it can't be served stale. (The contiguity
@@ -30,11 +30,14 @@ from power_loop.runtime.store.db import Params, Row
30
30
  from power_loop.runtime.store.dialect import Dialect, MySQLDialect
31
31
 
32
32
 
33
- def _args(params: Params) -> tuple[Any, ...] | None:
34
- # Pass None for a parameterless statement so the driver skips its ``query % args``
35
- # %-formatting pass entirely (our DDL has no ``%`` literals, and dialect.translate
36
- # already doubled any that a parameterized statement might carry).
37
- return tuple(params) if params else None
33
+ def _args(params: Params) -> tuple[Any, ...]:
34
+ # Always hand the driver a tuple (empty when there are no binds) so PyMySQL/aiomysql
35
+ # ALWAYS run their ``query % escaped_args`` pass that pass is what collapses the
36
+ # ``%%`` that ``dialect.translate`` doubles back down to a single literal ``%``. If we
37
+ # returned None for a parameterless statement, the driver would SKIP that pass and a
38
+ # ``%`` literal would reach MySQL as a stray ``%%`` (e.g. ``LIKE 'pl\_%'`` → ``'pl\_%%'``).
39
+ # ``query % ()`` is a no-op for SQL with no ``%`` and collapses ``%%`` otherwise.
40
+ return tuple(params)
38
41
 
39
42
 
40
43
  class _MyTransaction:
@@ -3,15 +3,18 @@
3
3
  Wraps a single stdlib ``sqlite3`` connection (``check_same_thread=False``, autocommit
4
4
  so transaction boundaries are explicit) and runs every statement in a worker thread via
5
5
  ``asyncio.to_thread`` so the async store never blocks the event loop. An ``asyncio.Lock``
6
- serializes writers — preserving SQLite's one-writer model — so a transaction's
7
- ``SELECT next_seq → … → UPDATE`` is atomic without DB row locks. WAL keeps readers
8
- non-blocking. (A dedicated read pool can be layered later; for now reads share the write
9
- connection under the lock.)
6
+ serializes writers IN-PROCESS — preserving SQLite's one-writer model — so a transaction's
7
+ ``SELECT next_seq → … → UPDATE`` is atomic without DB row locks. ACROSS processes (two
8
+ handles to one file) ``transaction()`` uses ``BEGIN IMMEDIATE`` so the RESERVED write lock
9
+ is taken up front and ``busy_timeout`` serializes contenders instead of deadlocking on a
10
+ lock upgrade. WAL keeps readers non-blocking. (A dedicated read pool can be layered later;
11
+ for now reads share the write connection under the lock.)
10
12
  """
11
13
 
12
14
  from __future__ import annotations
13
15
 
14
16
  import asyncio
17
+ import logging
15
18
  import sqlite3
16
19
  from collections.abc import AsyncIterator
17
20
  from contextlib import asynccontextmanager
@@ -19,6 +22,8 @@ from contextlib import asynccontextmanager
19
22
  from power_loop.runtime.store.db import Params, Row
20
23
  from power_loop.runtime.store.dialect import Dialect, SqliteDialect
21
24
 
25
+ logger = logging.getLogger(__name__)
26
+
22
27
 
23
28
  class _SqliteTransaction:
24
29
  """Lock-free statement runner; the owning Database holds its write lock for the
@@ -58,9 +63,14 @@ class SqliteDatabase:
58
63
  conn = sqlite3.connect(path, check_same_thread=False, isolation_level=None)
59
64
  conn.row_factory = sqlite3.Row
60
65
  if path != ":memory:":
66
+ # auto_vacuum MUST be chosen before the db header is initialized — i.e. before
67
+ # WAL touches it / before the first table is created — otherwise the PRAGMA is a
68
+ # silent no-op (mode stays NONE) and incremental_vacuum reclaims nothing. So it
69
+ # has to run BEFORE journal_mode=WAL. (DBs created by an older build where the
70
+ # order was reversed need a one-time full VACUUM to switch the mode.)
71
+ conn.execute("PRAGMA auto_vacuum=INCREMENTAL")
61
72
  conn.execute("PRAGMA journal_mode=WAL")
62
73
  conn.execute("PRAGMA synchronous=NORMAL")
63
- conn.execute("PRAGMA auto_vacuum=INCREMENTAL")
64
74
  conn.execute("PRAGMA foreign_keys=ON")
65
75
  conn.execute("PRAGMA busy_timeout=5000")
66
76
  return cls(conn, path=path)
@@ -86,17 +96,44 @@ class SqliteDatabase:
86
96
  return await asyncio.to_thread(self._b_fetchall, sql, params)
87
97
 
88
98
  # ── transaction ────────────────────────────────────────────────────────────
99
+ async def _safe_rollback(self) -> None:
100
+ """Return the shared connection to a no-open-transaction state without letting a
101
+ failing ROLLBACK mask the original error. Leaving an open transaction here would
102
+ wedge the one shared writer permanently ('cannot start a transaction within a
103
+ transaction') for the rest of the process."""
104
+ try:
105
+ await self._exec("ROLLBACK")
106
+ except Exception:
107
+ logger.warning("sqlite: ROLLBACK during transaction recovery failed", exc_info=True)
108
+
89
109
  @asynccontextmanager
90
110
  async def transaction(self) -> AsyncIterator[_SqliteTransaction]:
91
111
  async with self._lock:
92
- await self._exec("BEGIN")
112
+ # BEGIN IMMEDIATE (not plain/DEFERRED BEGIN): every store transaction is a
113
+ # read-modify-write (e.g. append_message's SELECT next_seq → INSERT), so take the
114
+ # RESERVED write lock up front. A DEFERRED BEGIN takes only a SHARED lock at the
115
+ # leading SELECT and upgrades at the first write — and SQLite returns SQLITE_BUSY
116
+ # *immediately* on a lock-UPGRADE conflict (busy_timeout does NOT retry upgrades),
117
+ # so two processes sharing the file deadlock ('database is locked'). IMMEDIATE makes
118
+ # busy_timeout WAIT and serialize them instead. In-process the asyncio.Lock already
119
+ # serializes, so this only adds the cross-process guarantee the store advertises.
120
+ await self._exec("BEGIN IMMEDIATE")
93
121
  try:
94
122
  yield _SqliteTransaction(self)
95
123
  except BaseException:
96
- await self._exec("ROLLBACK")
124
+ # Roll back, but never let a failed ROLLBACK replace the caller's
125
+ # exception (callers pattern-match on IntegrityError / domain ValueError).
126
+ await self._safe_rollback()
97
127
  raise
98
128
  else:
99
- await self._exec("COMMIT")
129
+ try:
130
+ await self._exec("COMMIT")
131
+ except BaseException:
132
+ # A failed COMMIT (disk full / I/O error / SQLITE_BUSY on a write
133
+ # upgrade) leaves the transaction open; roll it back so the connection
134
+ # isn't wedged for every subsequent transaction, then surface the error.
135
+ await self._safe_rollback()
136
+ raise
100
137
 
101
138
  # ── autocommit reads / single writes (lock-guarded) ────────────────────────
102
139
  async def fetchone(self, sql: str, params: Params = ()) -> Row | None:
@@ -118,15 +155,23 @@ class SqliteDatabase:
118
155
  self._closed = True
119
156
  await asyncio.to_thread(self._conn.close)
120
157
 
158
+ def _check_open(self) -> None:
159
+ # Guard maintenance ops the way close()/the read path are guarded: a statement on a
160
+ # closed sqlite3 connection raises an opaque ProgrammingError; surface a clear one.
161
+ if self._closed:
162
+ raise RuntimeError("operation on a closed SQLite store")
163
+
121
164
  # ── Maintenance capability (SQLite-only; see store/capabilities.py) ─────────
122
165
  async def checkpoint(self, *, mode: str = "TRUNCATE") -> None:
123
166
  if mode not in ("PASSIVE", "FULL", "RESTART", "TRUNCATE"):
124
167
  raise ValueError(f"invalid checkpoint mode: {mode!r}")
125
168
  async with self._lock:
169
+ self._check_open()
126
170
  await asyncio.to_thread(self._conn.execute, f"PRAGMA wal_checkpoint({mode})")
127
171
 
128
172
  async def vacuum(self, *, incremental: bool = True) -> None:
129
173
  async with self._lock:
174
+ self._check_open()
130
175
  # VACUUM cannot run inside a transaction; this conn is in autocommit mode.
131
176
  sql = "PRAGMA incremental_vacuum" if incremental else "VACUUM"
132
177
  await asyncio.to_thread(self._conn.execute, sql)
@@ -140,6 +185,7 @@ class SqliteDatabase:
140
185
  dest.close()
141
186
 
142
187
  async with self._lock:
188
+ self._check_open()
143
189
  await asyncio.to_thread(_backup)
144
190
 
145
191
 
@@ -267,8 +267,10 @@ class MySQLDialect:
267
267
 
268
268
  def translate(self, sql: str) -> str:
269
269
  # The driver (aiomysql/PyMySQL) uses ``%s`` and runs ``query % args``, so a literal
270
- # ``%`` must be doubled. Our SQL contains no ``%`` literals today, but escape first
271
- # to stay correct if one is ever added, then map qmark → ``%s``.
270
+ # ``%`` must be doubled here and is collapsed back by that pass. This is only correct
271
+ # because backends/mysql.py:_args ALWAYS passes a tuple (never None), so the
272
+ # ``query % args`` collapse runs even for parameterless statements — otherwise a
273
+ # doubled ``%%`` would leak to MySQL verbatim. Escape ``%`` first, then qmark → ``%s``.
272
274
  return sql.replace("%", "%%").replace("?", "%s")
273
275
 
274
276
  def ddl(self, prefix: str) -> list[str]: