npm - @vellumai/assistant - Versions diffs - 0.6.5 → 0.6.6 - Mend

@vellumai/assistant 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (443) hide show

package/AGENTS.md +9 -1
package/ARCHITECTURE.md +15 -17
package/Dockerfile +6 -4
package/__tests__/permissions/gateway-threshold-reader.test.ts +283 -0
package/docs/architecture/integrations.md +32 -39
package/docs/architecture/memory.md +25 -30
package/docs/architecture/security.md +7 -6
package/docs/browser-use-architecture-phase2.md +63 -20
package/docs/plugins.md +761 -0
package/examples/plugins/echo/README.md +132 -0
package/examples/plugins/echo/package.json +17 -0
package/examples/plugins/echo/register.ts +187 -0
package/node_modules/@vellumai/egress-proxy/src/types.ts +19 -0
package/openapi.yaml +212 -68
package/package.json +1 -1
package/src/__tests__/app-compiler.test.ts +57 -0
package/src/__tests__/approval-cascade.test.ts +7 -2
package/src/__tests__/auto-analysis-end-to-end.test.ts +1 -0
package/src/__tests__/avatar-generator.test.ts +4 -2
package/src/__tests__/bundled-asset.test.ts +6 -6
package/src/__tests__/catalog-cache.test.ts +69 -0
package/src/__tests__/checker.test.ts +459 -171
package/src/__tests__/circuit-breaker-pipeline.test.ts +406 -0
package/src/__tests__/compaction-events.test.ts +501 -0
package/src/__tests__/compaction-pipeline.test.ts +210 -0
package/src/__tests__/compaction-strip-metadata-clear.test.ts +181 -0
package/src/__tests__/compaction-timeout-recovery.test.ts +262 -0
package/src/__tests__/config-model-image-provider.test.ts +110 -0
package/src/__tests__/config-schema.test.ts +22 -9
package/src/__tests__/config-watcher-cleanup-throttle.test.ts +0 -4
package/src/__tests__/contacts-tools.test.ts +26 -0
package/src/__tests__/context-overflow-policy.test.ts +7 -7
package/src/__tests__/context-window-manager.test.ts +355 -4
package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
package/src/__tests__/conversation-agent-loop-overflow.test.ts +26 -30
package/src/__tests__/conversation-agent-loop.test.ts +30 -141
package/src/__tests__/conversation-confirmation-signals.test.ts +6 -1
package/src/__tests__/conversation-history-web-search.test.ts +1 -0
package/src/__tests__/conversation-init.benchmark.test.ts +2 -16
package/src/__tests__/conversation-pairing.test.ts +174 -10
package/src/__tests__/conversation-pre-run-repair.test.ts +4 -1
package/src/__tests__/conversation-process-callsite.test.ts +3 -0
package/src/__tests__/conversation-provider-retry-repair.test.ts +16 -7
package/src/__tests__/conversation-queue.test.ts +29 -14
package/src/__tests__/conversation-routes-disk-view.test.ts +7 -6
package/src/__tests__/conversation-runtime-assembly.test.ts +155 -110
package/src/__tests__/conversation-runtime-workspace.test.ts +23 -38
package/src/__tests__/conversation-seed-composer.test.ts +2 -2
package/src/__tests__/conversation-slash-queue.test.ts +7 -2
package/src/__tests__/conversation-slash-unknown.test.ts +25 -2
package/src/__tests__/conversation-speed-override.test.ts +6 -1
package/src/__tests__/conversation-title-service.test.ts +116 -0
package/src/__tests__/conversation-tool-setup-app-refresh.test.ts +41 -2
package/src/__tests__/conversation-usage.test.ts +1 -1
package/src/__tests__/conversation-workspace-cache-state.test.ts +4 -1
package/src/__tests__/conversation-workspace-injection.test.ts +3 -0
package/src/__tests__/conversation-workspace-tool-tracking.test.ts +4 -1
package/src/__tests__/credential-health-service.test.ts +78 -9
package/src/__tests__/credential-security-invariants.test.ts +2 -2
package/src/__tests__/db-schedule-syntax-migration.test.ts +1 -0
package/src/__tests__/empty-response-pipeline.test.ts +305 -0
package/src/__tests__/extension-id-sync-guard.test.ts +3 -3
package/src/__tests__/first-greeting.test.ts +247 -5
package/src/__tests__/headless-browser-mode.test.ts +57 -0
package/src/__tests__/history-repair-pipeline.test.ts +399 -0
package/src/__tests__/host-browser-e2e-cloud.test.ts +307 -0
package/src/__tests__/host-browser-e2e-self-hosted.test.ts +3 -3
package/src/__tests__/host-proxy-interface.test.ts +36 -2
package/src/__tests__/image-credentials.test.ts +137 -0
package/src/__tests__/image-service-dispatcher.test.ts +186 -0
package/src/__tests__/injector-chain.test.ts +526 -0
package/src/__tests__/intent-routing.test.ts +0 -26
package/src/__tests__/llm-call-pipeline.test.ts +285 -0
package/src/__tests__/llm-schema.test.ts +1 -1
package/src/__tests__/media-generate-image.test.ts +119 -13
package/src/__tests__/memory-retrieval-pipeline.test.ts +401 -0
package/src/__tests__/memory-upsert-concurrency.test.ts +1 -0
package/src/__tests__/migration-import-from-url.test.ts +5 -68
package/src/__tests__/model-intents.test.ts +4 -2
package/src/__tests__/notification-broadcaster.test.ts +3 -3
package/src/__tests__/notification-decision-strategy.test.ts +0 -11
package/src/__tests__/notification-schedule-notify-dedup.test.ts +108 -0
package/src/__tests__/oauth-apps-routes.test.ts +1 -1
package/src/__tests__/oauth-cli.test.ts +14 -12
package/src/__tests__/oauth-connect-orchestrator.test.ts +4 -13
package/src/__tests__/oauth-provider-serializer.test.ts +6 -4
package/src/__tests__/oauth-provider-visibility.test.ts +3 -5
package/src/__tests__/oauth-providers-routes.test.ts +3 -2
package/src/__tests__/oauth-store.test.ts +41 -76
package/src/__tests__/onboarding-template-contract.test.ts +16 -64
package/src/__tests__/openai-image-service.test.ts +368 -0
package/src/__tests__/overflow-reduce-pipeline.test.ts +676 -0
package/src/__tests__/permission-checker-host-gate.test.ts +0 -24
package/src/__tests__/persist-onboarding-artifacts.test.ts +266 -0
package/src/__tests__/persistence-pipeline.test.ts +377 -0
package/src/__tests__/pipeline-runner.test.ts +565 -0
package/src/__tests__/platform.test.ts +5 -2
package/src/__tests__/plugin-bootstrap.test.ts +483 -0
package/src/__tests__/plugin-registry.test.ts +273 -0
package/src/__tests__/plugin-route-contribution.test.ts +288 -0
package/src/__tests__/plugin-skill-contribution.test.ts +367 -0
package/src/__tests__/plugin-tool-contribution.test.ts +286 -0
package/src/__tests__/plugin-types.test.ts +320 -0
package/src/__tests__/pricing.test.ts +44 -12
package/src/__tests__/proxy-approval-callback.test.ts +69 -8
package/src/__tests__/reaction-persistence.test.ts +1 -0
package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +1 -0
package/src/__tests__/registry.test.ts +0 -2
package/src/__tests__/schedule-routes.test.ts +131 -1
package/src/__tests__/scheduler-recurrence.test.ts +14 -70
package/src/__tests__/scheduler-reuse-conversation.test.ts +10 -50
package/src/__tests__/secret-detection-handler.test.ts +0 -10
package/src/__tests__/shell-identity.test.ts +0 -134
package/src/__tests__/suggestion-routes.test.ts +103 -4
package/src/__tests__/task-memory-cleanup.test.ts +1 -0
package/src/__tests__/task-scheduler.test.ts +3 -15
package/src/__tests__/test-preload.ts +11 -0
package/src/__tests__/title-generate-pipeline.test.ts +224 -0
package/src/__tests__/token-estimate-pipeline.test.ts +431 -0
package/src/__tests__/tool-error-pipeline.test.ts +244 -0
package/src/__tests__/tool-execute-pipeline.test.ts +431 -0
package/src/__tests__/tool-execution-pipeline.benchmark.test.ts +0 -6
package/src/__tests__/tool-executor-shell-integration.test.ts +7 -10
package/src/__tests__/tool-executor.test.ts +141 -0
package/src/__tests__/tool-result-truncate-pipeline.test.ts +356 -0
package/src/__tests__/tool-result-truncation.test.ts +0 -110
package/src/__tests__/user-plugin-loader.test.ts +191 -0
package/src/__tests__/workspace-migration-046-seed-conversation-starters-callsite.test.ts +185 -0
package/src/__tests__/workspace-migration-049-release-notes-default-sonnet.test.ts +100 -0
package/src/__tests__/workspace-migration-050-seed-main-agent-opus-callsite.test.ts +171 -0
package/src/__tests__/workspace-migration-051-seed-conversation-summarization-callsite.test.ts +252 -0
package/src/__tests__/workspace-migration-remove-hooks.test.ts +99 -0
package/src/__tests__/workspace-policy.test.ts +21 -3
package/src/agent/loop.ts +340 -102
package/src/approvals/__tests__/guardian-feed-event.test.ts +304 -0
package/src/approvals/guardian-request-resolvers.ts +80 -0
package/src/backup/__tests__/backup-worker.test.ts +2 -13
package/src/backup/backup-worker.ts +3 -15
package/src/bundler/app-compiler.ts +84 -1
package/src/calls/call-state.ts +2 -2
package/src/channels/__tests__/types.test.ts +3 -3
package/src/channels/types.ts +6 -4
package/src/cli/__tests__/notifications.test.ts +87 -211
package/src/cli/commands/__tests__/backup.test.ts +1 -1
package/src/cli/commands/__tests__/image-generation.test.ts +255 -35
package/src/cli/commands/__tests__/inference-send.test.ts +12 -0
package/src/cli/commands/__tests__/tts-synthesize.test.ts +12 -0
package/src/cli/commands/backup.ts +2 -2
package/src/cli/commands/clients.ts +138 -0
package/src/cli/commands/completions.ts +2 -9
package/src/cli/commands/conversations.ts +55 -7
package/src/cli/commands/image-generation.ts +33 -34
package/src/cli/commands/notifications.ts +68 -103
package/src/cli/commands/oauth/__tests__/providers-register.test.ts +1 -1
package/src/cli/commands/oauth/__tests__/providers-update.test.ts +1 -1
package/src/cli/commands/oauth/connect.ts +2 -2
package/src/cli/commands/oauth/providers.ts +176 -8
package/src/cli/commands/oauth/status.ts +46 -36
package/src/cli/commands/skills.ts +3 -4
package/src/cli/program.ts +25 -29
package/src/config/__tests__/backup-schema.test.ts +7 -2
package/src/config/bundled-skills/app-builder/SKILL.md +2 -2
package/src/config/bundled-skills/app-builder/references/WIDGETS.md +10 -10
package/src/config/bundled-skills/contacts/tools/contact-merge.ts +66 -87
package/src/config/bundled-skills/contacts/tools/contact-search.ts +28 -51
package/src/config/bundled-skills/contacts/tools/contact-upsert.ts +22 -40
package/src/config/bundled-skills/image-studio/SKILL.md +2 -1
package/src/config/bundled-skills/image-studio/TOOLS.json +2 -1
package/src/config/bundled-skills/image-studio/tools/media-generate-image.ts +23 -39
package/src/config/bundled-skills/messaging/SKILL.md +3 -3
package/src/config/bundled-skills/messaging/tools/__tests__/messaging-feed-events.test.ts +207 -0
package/src/config/bundled-skills/messaging/tools/messaging-archive-by-sender.ts +12 -0
package/src/config/bundled-skills/messaging/tools/messaging-send.ts +58 -0
package/src/config/bundled-skills/schedule/SKILL.md +8 -3
package/src/config/bundled-skills/schedule/TOOLS.json +15 -7
package/src/config/bundled-skills/schedule/references/SCRIPT_MODE_PATTERNS.md +59 -0
package/src/config/bundled-tool-registry.ts +0 -15
package/src/config/feature-flag-registry.json +17 -1
package/src/config/schema.ts +19 -0
package/src/config/schemas/backup.ts +1 -1
package/src/config/schemas/conversations.ts +16 -0
package/src/config/schemas/llm.ts +2 -3
package/src/config/schemas/security.ts +6 -6
package/src/config/schemas/tts.ts +11 -0
package/src/config/skill-state.ts +6 -2
package/src/config/skills.ts +94 -5
package/src/context/__tests__/compact-prompt.test.ts +27 -9
package/src/context/prompts/compact.md +26 -12
package/src/context/tool-result-truncation.ts +3 -63
package/src/context/window-manager.ts +190 -16
package/src/credential-health/credential-health-service.ts +19 -6
package/src/daemon/__tests__/conversation-feed-event.test.ts +317 -0
package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +4 -12
package/src/daemon/__tests__/conversation-tool-setup.test.ts +14 -15
package/src/daemon/config-watcher.ts +0 -2
package/src/daemon/context-overflow-policy.ts +4 -13
package/src/daemon/conversation-agent-loop-handlers.ts +83 -22
package/src/daemon/conversation-agent-loop.ts +984 -683
package/src/daemon/conversation-history.ts +10 -19
package/src/daemon/conversation-lifecycle.ts +37 -19
package/src/daemon/conversation-notifiers.ts +2 -110
package/src/daemon/conversation-process.ts +14 -7
package/src/daemon/conversation-runtime-assembly.ts +532 -411
package/src/daemon/conversation-tool-setup.ts +41 -4
package/src/daemon/conversation.ts +80 -35
package/src/daemon/external-plugins-bootstrap.ts +478 -0
package/src/daemon/first-greeting.ts +191 -14
package/src/daemon/handlers/config-model.ts +11 -0
package/src/daemon/handlers/skills.ts +5 -1
package/src/daemon/lifecycle.ts +33 -68
package/src/daemon/message-types/computer-use.ts +2 -34
package/src/daemon/message-types/conversations.ts +49 -0
package/src/daemon/message-types/messages.ts +12 -0
package/src/daemon/server.ts +5 -3
package/src/daemon/shutdown-handlers.ts +2 -12
package/src/daemon/tool-side-effects.ts +14 -56
package/src/heartbeat/__tests__/heartbeat-feed-event.test.ts +160 -0
package/src/heartbeat/heartbeat-service.ts +24 -1
package/src/home/__tests__/feed-population-integration.test.ts +312 -0
package/src/home/emit-feed-event.ts +7 -0
package/src/home/feed-types.ts +41 -2
package/src/home/rewrite-command-preview.ts +66 -0
package/src/ipc/__tests__/socket-path.test.ts +11 -50
package/src/ipc/cli-client.ts +1 -1
package/src/ipc/cli-server.ts +3 -3
package/src/ipc/gateway-client.ts +4 -1
package/src/ipc/routes/browser-context.ts +2 -0
package/src/ipc/routes/browser.ts +1 -0
package/src/ipc/routes/get-contact.ts +16 -0
package/src/ipc/routes/index.ts +14 -0
package/src/ipc/routes/list-clients.ts +31 -0
package/src/ipc/routes/merge-contacts.ts +17 -0
package/src/ipc/routes/notification.ts +133 -0
package/src/ipc/routes/rename-conversation.ts +59 -0
package/src/ipc/routes/search-contacts.ts +19 -0
package/src/ipc/routes/upsert-contact.ts +25 -0
package/src/ipc/socket-path.ts +14 -38
package/src/media/app-icon-generator.ts +23 -46
package/src/media/avatar-router.ts +26 -41
package/src/media/gemini-image-service.ts +8 -41
package/src/media/image-credentials.ts +73 -0
package/src/media/image-service.ts +85 -0
package/src/media/openai-image-service.ts +131 -0
package/src/media/types.ts +46 -0
package/src/memory/conversation-crud.ts +48 -18
package/src/memory/conversation-queries.ts +57 -4
package/src/memory/conversation-title-service.ts +25 -0
package/src/memory/db-init.ts +8 -0
package/src/memory/embedding-gemini.test.ts +41 -2
package/src/memory/embedding-gemini.ts +6 -1
package/src/memory/graph/bootstrap.test.ts +282 -0
package/src/memory/graph/bootstrap.ts +8 -5
package/src/memory/graph/extraction.ts +10 -2
package/src/memory/graph/graph-search.test.ts +1 -0
package/src/memory/graph/inspect.ts +2 -2
package/src/memory/graph/retriever.ts +10 -3
package/src/memory/migrations/041-approval-prompt-ts-tracker.ts +26 -0
package/src/memory/migrations/149-oauth-tables.ts +1 -0
package/src/memory/migrations/223-schedule-script-column.ts +11 -0
package/src/memory/migrations/224-oauth-providers-managed-service-is-paid.ts +24 -0
package/src/memory/migrations/225-oauth-providers-available-scopes.ts +13 -0
package/src/memory/migrations/index.ts +4 -0
package/src/memory/pkb/pkb-index.test.ts +1 -0
package/src/memory/pkb/pkb-reconcile.test.ts +1 -0
package/src/memory/pkb/pkb-search.test.ts +65 -4
package/src/memory/pkb/pkb-search.ts +40 -18
package/src/memory/qdrant-client.test.ts +60 -0
package/src/memory/qdrant-client.ts +25 -0
package/src/memory/schema/infrastructure.ts +1 -0
package/src/memory/schema/oauth.ts +4 -1
package/src/messaging/providers/slack/render-transcript.test.ts +77 -29
package/src/messaging/providers/slack/render-transcript.ts +58 -0
package/src/notifications/conversation-pairing.ts +78 -19
package/src/notifications/copy-composer.ts +0 -5
package/src/notifications/emit-signal.ts +1 -1
package/src/notifications/signal.ts +1 -2
package/src/oauth/AGENTS.md +1 -1
package/src/oauth/__tests__/identity-verifier.test.ts +2 -1
package/src/oauth/connect-orchestrator.ts +8 -34
package/src/oauth/connect-types.ts +6 -10
package/src/oauth/manual-token-connection.ts +23 -0
package/src/oauth/oauth-store.ts +30 -14
package/src/oauth/provider-serializer.ts +6 -1
package/src/oauth/seed-providers.ts +56 -108
package/src/outbound-proxy/http-forwarder.ts +9 -0
package/src/permissions/approval-policy.test.ts +293 -18
package/src/permissions/approval-policy.ts +110 -58
package/src/permissions/arg-parser.test.ts +161 -0
package/src/permissions/arg-parser.ts +141 -0
package/src/permissions/bash-risk-classifier.test.ts +414 -2
package/src/permissions/bash-risk-classifier.ts +303 -60
package/src/permissions/checker.ts +157 -29
package/src/permissions/command-registry.test.ts +239 -0
package/src/permissions/command-registry.ts +234 -54
package/src/permissions/defaults.ts +5 -4
package/src/permissions/gateway-threshold-reader.ts +196 -0
package/src/permissions/prompter.ts +4 -0
package/src/permissions/risk-types.ts +61 -4
package/src/permissions/schedule-risk-classifier.test.ts +129 -0
package/src/permissions/schedule-risk-classifier.ts +85 -0
package/src/permissions/shell-identity.ts +2 -42
package/src/permissions/types.ts +2 -0
package/src/permissions/workspace-policy.ts +8 -3
package/src/plugins/defaults/circuit-breaker.ts +146 -0
package/src/plugins/defaults/compaction.ts +145 -0
package/src/plugins/defaults/empty-response.ts +126 -0
package/src/plugins/defaults/history-repair.ts +85 -0
package/src/plugins/defaults/index.ts +116 -0
package/src/plugins/defaults/injectors.ts +491 -0
package/src/plugins/defaults/llm-call.ts +82 -0
package/src/plugins/defaults/memory-retrieval.ts +226 -0
package/src/plugins/defaults/overflow-reduce.ts +181 -0
package/src/plugins/defaults/persistence.ts +129 -0
package/src/plugins/defaults/title-generate.ts +95 -0
package/src/plugins/defaults/token-estimate.ts +104 -0
package/src/plugins/defaults/tool-error.ts +126 -0
package/src/plugins/defaults/tool-execute.ts +89 -0
package/src/plugins/defaults/tool-result-truncate.ts +88 -0
package/src/plugins/pipeline.ts +316 -0
package/src/plugins/plugin-skill-contributions.ts +292 -0
package/src/plugins/registry.ts +241 -0
package/src/plugins/types.ts +1134 -0
package/src/plugins/user-loader.ts +177 -0
package/src/prompts/templates/BOOTSTRAP.md +27 -77
package/src/providers/model-catalog.ts +52 -29
package/src/providers/model-intents.ts +1 -1
package/src/providers/openrouter/client.ts +5 -1
package/src/providers/speech-to-text/deepgram-realtime.test.ts +61 -0
package/src/providers/speech-to-text/deepgram-realtime.ts +57 -0
package/src/providers/speech-to-text/xai-realtime.test.ts +72 -4
package/src/providers/speech-to-text/xai-realtime.ts +39 -14
package/src/runtime/AGENTS.md +25 -16
package/src/runtime/__tests__/browser-extension-pair-routes.test.ts +3 -3
package/src/runtime/__tests__/client-registry.test.ts +293 -0
package/src/runtime/client-registry.ts +261 -0
package/src/runtime/http-server.ts +77 -8
package/src/runtime/http-types.ts +0 -2
package/src/runtime/migrations/vbundle-builder.ts +1 -22
package/src/runtime/routes/approval-prompt-ts-tracker.ts +51 -31
package/src/runtime/routes/approval-routes.ts +17 -0
package/src/runtime/routes/browser-extension-pair-routes.ts +27 -8
package/src/runtime/routes/conversation-routes.ts +223 -116
package/src/runtime/routes/inbound-message-handler.ts +88 -13
package/src/runtime/routes/memory-item-routes.test.ts +1 -0
package/src/runtime/routes/migration-routes.ts +0 -3
package/src/runtime/routes/playground/__tests__/force-compact.test.ts +284 -0
package/src/runtime/routes/playground/__tests__/guard.test.ts +80 -0
package/src/runtime/routes/playground/__tests__/inject-failures.test.ts +294 -0
package/src/runtime/routes/playground/__tests__/reset-circuit.test.ts +271 -0
package/src/runtime/routes/playground/__tests__/seed-conversation.test.ts +202 -0
package/src/runtime/routes/playground/__tests__/seeded-conversations.test.ts +309 -0
package/src/runtime/routes/playground/__tests__/state.test.ts +224 -0
package/src/runtime/routes/playground/conversation-not-found.ts +29 -0
package/src/runtime/routes/playground/deps.ts +56 -0
package/src/runtime/routes/playground/force-compact.ts +73 -0
package/src/runtime/routes/playground/guard.ts +37 -0
package/src/runtime/routes/playground/index.ts +28 -0
package/src/runtime/routes/playground/inject-failures.ts +159 -0
package/src/runtime/routes/playground/reset-circuit.ts +115 -0
package/src/runtime/routes/playground/seed-conversation.ts +139 -0
package/src/runtime/routes/playground/seeded-conversations.ts +78 -0
package/src/runtime/routes/playground/state.ts +78 -0
package/src/runtime/routes/schedule-routes.ts +89 -8
package/src/runtime/skill-route-registry.ts +75 -15
package/src/schedule/run-script.ts +68 -0
package/src/schedule/schedule-store.ts +7 -1
package/src/schedule/scheduler.ts +48 -8
package/src/skills/catalog-cache.ts +12 -5
package/src/tools/browser/__tests__/browser-status.test.ts +189 -0
package/src/tools/browser/browser-execution.ts +88 -19
package/src/tools/browser/cdp-client/__tests__/extension-cdp-client.test.ts +230 -0
package/src/tools/browser/cdp-client/__tests__/factory.test.ts +146 -3
package/src/tools/browser/cdp-client/extension-cdp-client.ts +54 -3
package/src/tools/browser/cdp-client/factory.ts +15 -4
package/src/tools/executor.ts +126 -74
package/src/tools/network/script-proxy/session-manager.ts +37 -1
package/src/tools/permission-checker.ts +98 -49
package/src/tools/policy-context.ts +4 -0
package/src/tools/registry.ts +140 -3
package/src/tools/schedule/create.ts +23 -8
package/src/tools/schedule/update.ts +3 -1
package/src/tools/secret-detection-handler.ts +0 -51
package/src/tools/system/avatar-generator.ts +6 -2
package/src/tools/types.ts +28 -2
package/src/util/platform.ts +7 -2
package/src/util/pricing.ts +26 -3
package/src/workspace/migrations/006-services-config.ts +2 -4
package/src/workspace/migrations/022-move-hooks-to-workspace.ts +2 -3
package/src/workspace/migrations/041-backfill-google-gmail-settings-scope.ts +3 -4
package/src/workspace/migrations/046-seed-conversation-starters-callsite.ts +108 -0
package/src/workspace/migrations/047-remove-watch-callsites.ts +54 -0
package/src/workspace/migrations/048-remove-workspace-hooks.ts +81 -0
package/src/workspace/migrations/049-release-notes-default-sonnet.ts +80 -0
package/src/workspace/migrations/050-seed-main-agent-opus-callsite.ts +86 -0
package/src/workspace/migrations/051-seed-conversation-summarization-callsite.ts +128 -0
package/src/workspace/migrations/registry.ts +12 -0
package/tsconfig.json +1 -1
package/hook-templates/debug-prompt-logger/hook.json +0 -7
package/hook-templates/debug-prompt-logger/run.sh +0 -66
package/src/__tests__/compaction-circuit-breaker.test.ts +0 -336
package/src/__tests__/context-overflow-approval.test.ts +0 -156
package/src/__tests__/hooks-blocking.test.ts +0 -178
package/src/__tests__/hooks-cli.test.ts +0 -182
package/src/__tests__/hooks-config.test.ts +0 -108
package/src/__tests__/hooks-discovery.test.ts +0 -211
package/src/__tests__/hooks-integration.test.ts +0 -196
package/src/__tests__/hooks-manager.test.ts +0 -226
package/src/__tests__/hooks-runner.test.ts +0 -175
package/src/__tests__/hooks-settings.test.ts +0 -160
package/src/__tests__/hooks-templates.test.ts +0 -169
package/src/__tests__/hooks-ts-runner.test.ts +0 -170
package/src/__tests__/hooks-watch.test.ts +0 -112
package/src/__tests__/notification-schedule-dedup.test.ts +0 -213
package/src/__tests__/oauth-scope-policy.test.ts +0 -180
package/src/__tests__/send-notification-tool.test.ts +0 -83
package/src/cli/commands/shotgun.ts +0 -266
package/src/config/bundled-skills/conversations/SKILL.md +0 -20
package/src/config/bundled-skills/conversations/TOOLS.json +0 -23
package/src/config/bundled-skills/conversations/tools/rename-conversation.ts +0 -88
package/src/config/bundled-skills/heartbeat/SKILL.md +0 -43
package/src/config/bundled-skills/notifications/SKILL.md +0 -40
package/src/config/bundled-skills/notifications/TOOLS.json +0 -80
package/src/config/bundled-skills/notifications/tools/send-notification.ts +0 -152
package/src/config/bundled-skills/notifications/tools/shared.ts +0 -13
package/src/config/bundled-skills/screen-watch/SKILL.md +0 -27
package/src/config/bundled-skills/screen-watch/TOOLS.json +0 -35
package/src/config/bundled-skills/screen-watch/tools/start-screen-watch.ts +0 -12
package/src/config/bundled-skills/skills-catalog/SKILL.md +0 -84
package/src/daemon/context-overflow-approval.ts +0 -52
package/src/daemon/watch-handler.ts +0 -399
package/src/hooks/cli.ts +0 -253
package/src/hooks/config.ts +0 -100
package/src/hooks/discovery.ts +0 -135
package/src/hooks/manager.ts +0 -179
package/src/hooks/runner.ts +0 -117
package/src/hooks/templates.ts +0 -77
package/src/hooks/types.ts +0 -75
package/src/oauth/scope-policy.ts +0 -89
package/src/runtime/gateway-internal-client.ts +0 -94
package/src/runtime/routes/watch-routes.ts +0 -156
package/src/signals/shotgun.ts +0 -203
package/src/tools/watch/screen-watch.ts +0 -144
package/src/tools/watch/watch-state.ts +0 -142

package/src/daemon/conversation-agent-loop.ts CHANGED Viewed

@@ -37,8 +37,8 @@ import {
 } from "../context/token-estimator.js";
 import type { ContextWindowManager } from "../context/window-manager.js";
 import type { ToolProfiler } from "../events/tool-profiling-listener.js";
+import { emitFeedEvent } from "../home/emit-feed-event.js";
 import { writeRelationshipState } from "../home/relationship-state-writer.js";
-import { getHookManager } from "../hooks/manager.js";
 import {
   clearSentryConversationContext,
   setSentryConversationContext,
@@ -47,9 +47,7 @@ import { commitAppTurnChanges } from "../memory/app-git-service.js";
 import { getApp, listAppFiles, resolveAppDir } from "../memory/app-store.js";
 import { enqueueAutoAnalysisOnCompaction } from "../memory/auto-analysis-enqueue.js";
 import {
-  addMessage,
-  clearPkbSystemReminderMetadataForConversation,
-  deleteMessageById,
+  clearStrippedInjectionMetadataForConversation,
   getConversation,
   getConversationOriginChannel,
   getConversationOriginInterface,
@@ -57,22 +55,53 @@ import {
   getMessageById,
   provenanceFromTrustContext,
   updateConversationContextWindow,
-  updateConversationTitle,
-  updateMessageMetadata,
 } from "../memory/conversation-crud.js";
 import { getResolvedConversationDirPath } from "../memory/conversation-directories.js";
 import { syncMessageToDisk } from "../memory/conversation-disk-view.js";
 import {
   isReplaceableTitle,
-  queueGenerateConversationTitle,
   queueRegenerateConversationTitle,
-  UNTITLED_FALLBACK,
 } from "../memory/conversation-title-service.js";
 import type { ConversationGraphMemory } from "../memory/graph/conversation-graph-memory.js";
 import { recordMemoryRecallLog } from "../memory/memory-recall-log-store.js";
 import { PKB_WORKSPACE_SCOPE } from "../memory/pkb/types.js";
 import type { PermissionPrompter } from "../permissions/prompter.js";
-import type { ContentBlock, Message } from "../providers/types.js";
+import { defaultCompactionTerminal } from "../plugins/defaults/compaction.js";
+import { defaultHistoryRepairTerminal } from "../plugins/defaults/history-repair.js";
+import {
+  asDefaultGraphPayload,
+  type DefaultMemoryRetrievalDeps,
+  type GraphMemoryPayload,
+  runDefaultMemoryRetrieval,
+} from "../plugins/defaults/memory-retrieval.js";
+import { defaultPersistenceTerminal } from "../plugins/defaults/persistence.js";
+import { defaultTitleGenerateTerminal } from "../plugins/defaults/title-generate.js";
+import { defaultTokenEstimateTerminal } from "../plugins/defaults/token-estimate.js";
+import { DEFAULT_TIMEOUTS, runPipeline } from "../plugins/pipeline.js";
+import { getMiddlewaresFor } from "../plugins/registry.js";
+import type {
+  CircuitBreakerArgs,
+  CircuitBreakerResult,
+  CompactionArgs,
+  CompactionResult,
+  EstimateArgs,
+  EstimateResult,
+  HistoryRepairArgs,
+  HistoryRepairResult,
+  MemoryArgs,
+  MemoryResult,
+  OverflowReduceArgs,
+  OverflowReduceResult,
+  PersistArgs,
+  PersistResult,
+  TurnContext as PluginTurnContext,
+} from "../plugins/types.js";
+import { PluginExecutionError, PluginTimeoutError } from "../plugins/types.js";
+import type {
+  ContentBlock,
+  Message,
+  ToolDefinition,
+} from "../providers/types.js";
 import type { Provider } from "../providers/types.js";
 import { resolveActorTrust } from "../runtime/actor-trust-resolver.js";
 import { DAEMON_INTERNAL_ASSISTANT_ID } from "../runtime/assistant-scope.js";
@@ -88,7 +117,6 @@ import {
   type AssistantAttachmentDraft,
   cleanAssistantContent,
 } from "./assistant-attachments.js";
-import { requestCompressionApproval } from "./context-overflow-approval.js";
 import { resolveOverflowAction } from "./context-overflow-policy.js";
 import {
   createInitialReducerState,
@@ -129,8 +157,6 @@ import {
   inboundActorContextFromTrustContext,
   loadSlackActiveThreadFocusBlock,
   loadSlackChronologicalMessages,
-  readNowScratchpad,
-  readPkbContext,
   stripInjectionsForCompaction,
 } from "./conversation-runtime-assembly.js";
 import type { SkillProjectionCache } from "./conversation-skill-tools.js";
@@ -138,7 +164,7 @@ import { markSurfaceCompleted } from "./conversation-surfaces.js";
 import { resolveTrustClass } from "./conversation-tool-setup.js";
 import { recordUsage } from "./conversation-usage.js";
 import { formatTurnTimestamp } from "./date-context.js";
-import { deepRepairHistory, repairHistory } from "./history-repair.js";
+import { deepRepairHistory } from "./history-repair.js";
 import type {
   DynamicPageSurfaceData,
   ServerMessage,
@@ -171,77 +197,210 @@ type GitServiceInitializer = {
   ensureInitialized(): Promise<void>;
 };
-// ── Compaction circuit-breaker constants ────────────────────────────
+// ── Compaction circuit-breaker pipeline helpers ─────────────────────
+//
+// The circuit-breaker behavior (3 consecutive summary-LLM failures trips a
+// 1-hour cooldown) is now implemented by the `circuitBreaker` plugin
+// pipeline. The default plugin (`plugins/defaults/circuit-breaker.ts`)
+// replicates the legacy threshold/cooldown constants and event-emission
+// semantics exactly — it operates on the `consecutiveCompactionFailures` /
+// `compactionCircuitOpenUntil` fields the conversation still owns so the
+// dev-only playground routes (`POST /playground/reset-compaction-circuit`,
+// `POST /playground/inject-compaction-failures`) continue to read and
+// mutate those fields directly.
 //
-// The circuit opens after `COMPACTION_CIRCUIT_FAILURE_THRESHOLD` consecutive
-// summary-LLM failures and stays open for `COMPACTION_CIRCUIT_COOLDOWN_MS`
-// before auto-compaction is allowed to retry. User-initiated compaction
-// (`force: true`) bypasses the breaker regardless of its state.
-const COMPACTION_CIRCUIT_FAILURE_THRESHOLD = 3;
-const COMPACTION_CIRCUIT_COOLDOWN_MS = 60 * 60 * 1000; // 1 hour
+// The helpers below build the pipeline inputs and invoke the runner. They
+// are the sole entry points the rest of the daemon uses to query or update
+// the compaction circuit.
+/** Circuit-breaker key for a specific conversation's compaction pipeline. */
+function compactionCircuitKey(conversationId: string): string {
+  return `compaction:${conversationId}`;
+}
 /**
- * Check whether the compaction circuit breaker is currently open for the
- * given context. The breaker auto-closes once `compactionCircuitOpenUntil`
- * has elapsed.
+ * Build the minimal {@link TurnContext} the pipeline runner requires. Called
+ * both from inside the agent loop (where turn identifiers are available) and
+ * from non-turn invocations like `Conversation.forceCompact` (which falls
+ * back to stable placeholders so the runner's log records still carry the
+ * conversation identifier).
  */
-export function isCompactionCircuitOpen(ctx: {
-  compactionCircuitOpenUntil: number | null;
-}): boolean {
-  return (
-    ctx.compactionCircuitOpenUntil !== null &&
-    Date.now() < ctx.compactionCircuitOpenUntil
-  );
+function buildCircuitTurnContext(ctx: {
+  readonly conversationId: string;
+  currentRequestId?: string;
+  currentTurnTrustContext?: TrustContext;
+  trustContext?: TrustContext;
+  turnCount: number;
+}): PluginTurnContext {
+  const trust: TrustContext =
+    ctx.currentTurnTrustContext ?? ctx.trustContext ?? FALLBACK_TURN_TRUST;
+  return {
+    requestId: ctx.currentRequestId ?? "circuit-breaker",
+    conversationId: ctx.conversationId,
+    turnIndex: ctx.turnCount,
+    trust,
+  };
 }
 /**
- * Track the outcome of a `maybeCompact()` call against the circuit breaker.
+ * Run the `circuitBreaker` pipeline for the compaction circuit on this
+ * conversation. When `outcome` is provided, state is updated (and transition
+ * events emit via `onEvent`); when omitted the call is query-only.
  *
- * - When the summary LLM call failed (local fallback covered the result),
- *   increment the consecutive-failure counter. If the counter reaches the
- *   threshold, open the circuit for the cooldown window and emit
- *   `compaction_circuit_open` so clients can surface a notice.
- * - When the call did not fail, reset the counter and clear any open circuit.
+ * Returns the post-call decision from the pipeline. Callers gate auto-paths
+ * on `!result.open` and admit forced paths regardless of the decision.
+ */
+async function runCompactionCircuitPipeline(
+  ctx: {
+    readonly conversationId: string;
+    consecutiveCompactionFailures: number;
+    compactionCircuitOpenUntil: number | null;
+    currentRequestId?: string;
+    currentTurnTrustContext?: TrustContext;
+    trustContext?: TrustContext;
+    turnCount: number;
+  },
+  args: {
+    outcome?: "success" | "failure";
+    onEvent?: (msg: ServerMessage) => void;
+  },
+): Promise<CircuitBreakerResult> {
+  const turnContext = buildCircuitTurnContext(ctx);
+  return runPipeline<CircuitBreakerArgs, CircuitBreakerResult>(
+    "circuitBreaker",
+    getMiddlewaresFor("circuitBreaker"),
+    async (terminalArgs) => {
+      // No plugin in the chain produced a decision. This should be
+      // unreachable in production because the default plugin registers a
+      // `circuitBreaker` middleware that always returns a decision, but we
+      // defensively derive the state here so test setups that intentionally
+      // omit the default plugin still get a sensible response.
+      const openUntil = terminalArgs.state.compactionCircuitOpenUntil;
+      const now = Date.now();
+      if (openUntil !== null && now < openUntil) {
+        return { open: true, cooldownRemainingMs: openUntil - now };
+      }
+      return { open: false };
+    },
+    {
+      key: compactionCircuitKey(ctx.conversationId),
+      // Pass the ctx directly as the mutable state container. The
+      // `CircuitBreakerArgs.state` shape deliberately matches the subset of
+      // fields the conversation owns so plugins mutate the same object the
+      // playground routes read and write.
+      state: ctx,
+      ...(args.outcome !== undefined ? { outcome: args.outcome } : {}),
+      ...(args.onEvent ? { onEvent: args.onEvent } : {}),
+    },
+    turnContext,
+    DEFAULT_TIMEOUTS.circuitBreaker,
+  );
+}
+/**
+ * Query-only: is the compaction circuit breaker currently open for this
+ * conversation? Thin wrapper around {@link runCompactionCircuitPipeline}
+ * with no outcome. Async because the pipeline runner is async, but the
+ * default plugin resolves synchronously on its microtask.
+ */
+export async function isCompactionCircuitOpen(ctx: {
+  readonly conversationId: string;
+  consecutiveCompactionFailures: number;
+  compactionCircuitOpenUntil: number | null;
+  currentRequestId?: string;
+  currentTurnTrustContext?: TrustContext;
+  trustContext?: TrustContext;
+  turnCount: number;
+}): Promise<boolean> {
+  const decision = await runCompactionCircuitPipeline(ctx, {});
+  return decision.open;
+}
+/**
+ * Update the compaction circuit breaker with the outcome of a `maybeCompact`
+ * call and emit any transition event. A `summaryFailed` value of `undefined`
+ * means the summary LLM never ran (early return) — callers must guard with
+ * `summaryFailed !== undefined` before invoking this helper so early-return
+ * paths don't silently reset the 3-strike counter.
  *
- * This is called by every `maybeCompact()` site (including forced ones),
- * because a run of three failures is a provider-health signal regardless of
- * whether the caller bypassed the breaker.
+ * The default plugin handles threshold-based tripping and cooldown reset;
+ * see `plugins/defaults/circuit-breaker.ts` for the canonical semantics.
  */
-export function trackCompactionOutcome(
+export async function trackCompactionOutcome(
   ctx: {
+    readonly conversationId: string;
     consecutiveCompactionFailures: number;
     compactionCircuitOpenUntil: number | null;
+    currentRequestId?: string;
+    currentTurnTrustContext?: TrustContext;
+    trustContext?: TrustContext;
+    turnCount: number;
   },
-  summaryFailed: boolean | undefined,
+  summaryFailed: boolean,
   onEvent: (msg: ServerMessage) => void,
-): void {
-  if (summaryFailed) {
-    ctx.consecutiveCompactionFailures += 1;
-    // Treat a stale/expired open-until timestamp the same as null so a new
-    // 3-strike window can re-open the circuit after the prior cooldown
-    // elapses. Without this the second trip would no-op because
-    // `compactionCircuitOpenUntil` remains set to a past timestamp even
-    // though `isCompactionCircuitOpen()` correctly reports closed.
-    const circuitDormant =
-      ctx.compactionCircuitOpenUntil === null ||
-      Date.now() >= ctx.compactionCircuitOpenUntil;
-    if (
-      ctx.consecutiveCompactionFailures >=
-        COMPACTION_CIRCUIT_FAILURE_THRESHOLD &&
-      circuitDormant
-    ) {
-      const openUntil = Date.now() + COMPACTION_CIRCUIT_COOLDOWN_MS;
-      ctx.compactionCircuitOpenUntil = openUntil;
-      onEvent({
-        type: "compaction_circuit_open",
-        reason: "3_consecutive_failures",
-        openUntil,
-      });
-    }
-  } else {
-    ctx.consecutiveCompactionFailures = 0;
-    ctx.compactionCircuitOpenUntil = null;
-  }
+): Promise<void> {
+  await runCompactionCircuitPipeline(ctx, {
+    outcome: summaryFailed ? "failure" : "success",
+    onEvent,
+  });
+}
+// ── Plugin pipeline helpers ──────────────────────────────────────────
+//
+// Canonical {@link PluginTurnContext} builder threaded into every
+// `runPipeline` call inside `runAgentLoopImpl`. The orchestrator composes
+// the context on demand at each call site from ambient state rather than
+// carrying a persistent `TurnContext` instance across the turn.
+/**
+ * Synthetic fallback trust context used when the orchestrator fires a pipeline
+ * before the per-turn trust snapshot has been captured (e.g. invocations that
+ * bypass `processMessage` / `drainQueue`). We bias to `unknown` rather than
+ * `guardian` so a missing snapshot cannot accidentally grant elevated trust
+ * to a custom plugin reading `ctx.trust`.
+ */
+export const FALLBACK_TURN_TRUST: TrustContext = {
+  sourceChannel: "vellum",
+  trustClass: "unknown",
+};
+/**
+ * Build the {@link TurnContext} passed to {@link runPipeline}.
+ *
+ * Canonical source of truth for every pipeline call site inside the agent
+ * loop. Every `runPipeline` invocation in `runAgentLoopImpl` (and in the
+ * handlers that share its ambient state) must route through this helper
+ * rather than constructing a `TurnContext` literal inline — this keeps
+ * `turnIndex`, trust resolution, and the `contextWindowManager` attachment
+ * consistent across pipeline slots, which in turn keeps structured logs
+ * filtered by `conversationId`/`turnIndex` coherent across slots.
+ *
+ * Behavior:
+ * - `turnIndex` is always `ctx.turnCount` — the orchestrator-owned
+ *   0-based turn counter. Reading from a single source avoids the
+ *   earlier inconsistency (`ctx.turnCount`, `ctx.messages.length - 1`,
+ *   `ctx.messages.length`, and `0` were all used for the same turn).
+ * - Trust pulls from the per-turn snapshot first, then the conversation-
+ *   level context, then {@link FALLBACK_TURN_TRUST}. The cascade matches
+ *   the one inside the orchestrator's inline injection assembly so
+ *   middleware reads the same trust class the runtime sees.
+ * - `contextWindowManager` is attached unconditionally. Pipelines that
+ *   don't need it can ignore it; the default compaction plugin reads it
+ *   via the typed optional field on `TurnContext`.
+ */
+export function buildPluginTurnContext(
+  ctx: AgentLoopConversationContext,
+  requestId: string,
+): PluginTurnContext {
+  const trust =
+    ctx.currentTurnTrustContext ?? ctx.trustContext ?? FALLBACK_TURN_TRUST;
+  return {
+    requestId,
+    conversationId: ctx.conversationId,
+    turnIndex: ctx.turnCount,
+    trust,
+    contextWindowManager: ctx.contextWindowManager,
+  };
 }
 // ── Context Interface ────────────────────────────────────────────────
@@ -404,7 +563,6 @@ export async function runAgentLoopImpl(
   userMessageId: string,
   onEvent: (msg: ServerMessage) => void,
   options?: {
-    skipPreMessageRollback?: boolean;
     isInteractive?: boolean;
     isUserMessage?: boolean;
     titleText?: string;
@@ -528,40 +686,10 @@ export async function runAgentLoopImpl(
       }
     }
-    const preMessageResult = await getHookManager().trigger("pre-message", {
-      conversationId: ctx.conversationId,
-      messagePreview: truncate(content, 200, ""),
-    });
-    if (preMessageResult.blocked) {
-      if (!options?.skipPreMessageRollback) {
-        ctx.messages.pop();
-        deleteMessageById(userMessageId);
-      }
-      // Replace loading placeholder so the conversation isn't stuck as "Generating title..."
-      const currentConv = getConversation(ctx.conversationId);
-      if (
-        isReplaceableTitle(currentConv?.title ?? null) &&
-        currentConv?.title !== UNTITLED_FALLBACK
-      ) {
-        updateConversationTitle(ctx.conversationId, UNTITLED_FALLBACK);
-        onEvent({
-          type: "conversation_title_updated",
-          conversationId: ctx.conversationId,
-          title: UNTITLED_FALLBACK,
-        });
-      }
-      onEvent({
-        type: "error",
-        message: `Message blocked by hook "${preMessageResult.blockedBy}"`,
-      });
-      return;
-    }
     // Generate title early — the user message alone is sufficient context.
-    // Firing after hook gating but before the main LLM call removes the
-    // delay of waiting for the full assistant response. The second-pass
-    // regeneration at turn 3 will refine the title with more context.
+    // Firing before the main LLM call removes the delay of waiting for the
+    // full assistant response. The second-pass regeneration at turn 3 will
+    // refine the title with more context.
     // No abort signal — title generation should complete even if the user
     // cancels the response, since the user message is already persisted.
     // Deferred via setTimeout so the main agent loop LLM call enqueues
@@ -569,18 +697,38 @@ export async function runAgentLoopImpl(
     if (
       isReplaceableTitle(getConversation(ctx.conversationId)?.title ?? null)
     ) {
+      // TurnContext routed through the canonical builder so the pipeline's
+      // log record reports the same `conversationId`/`turnIndex` shape as
+      // every other slot in this turn. Title generation does not depend on
+      // the context-window manager attached by the builder, but sharing the
+      // builder keeps the invariant enforced in one place.
+      const titlePipelineCtx = buildPluginTurnContext(ctx, reqId);
+      const titleArgs = {
+        conversationId: ctx.conversationId,
+        provider: ctx.provider,
+        userMessage: options?.titleText ?? content,
+        onTitleUpdated: (title: string) => {
+          onEvent({
+            type: "conversation_title_updated",
+            conversationId: ctx.conversationId,
+            title,
+          });
+        },
+      };
       setTimeout(() => {
-        queueGenerateConversationTitle({
-          conversationId: ctx.conversationId,
-          provider: ctx.provider,
-          userMessage: options?.titleText ?? content,
-          onTitleUpdated: (title) => {
-            onEvent({
-              type: "conversation_title_updated",
-              conversationId: ctx.conversationId,
-              title,
-            });
-          },
+        runPipeline(
+          "titleGenerate",
+          getMiddlewaresFor("titleGenerate"),
+          defaultTitleGenerateTerminal,
+          titleArgs,
+          titlePipelineCtx,
+          DEFAULT_TIMEOUTS.titleGenerate,
+        ).catch((err) => {
+          // Fire-and-forget — keep previous non-propagating semantics.
+          // queueGenerateConversationTitle already swallows internal
+          // errors; this catch covers pipeline-layer errors (timeouts,
+          // middleware throws) without surfacing them to the agent loop.
+          rlog.warn({ err }, "titleGenerate pipeline failed (non-fatal)");
         });
       }, 0);
     }
@@ -592,7 +740,7 @@ export async function runAgentLoopImpl(
     const compactCheck = ctx.contextWindowManager.shouldCompact(ctx.messages);
     // Skip auto-compaction while the circuit breaker is open. Force paths
     // and user-initiated /compact bypass this check.
-    const autoCompactAllowed = !isCompactionCircuitOpen(ctx);
+    const autoCompactAllowed = !(await isCompactionCircuitOpen(ctx));
     if (compactCheck.needed && autoCompactAllowed) {
       ctx.emitActivityState(
         "thinking",
@@ -601,69 +749,59 @@ export async function runAgentLoopImpl(
         reqId,
       );
     }
-    const compacted = autoCompactAllowed
-      ? await ctx.contextWindowManager.maybeCompact(
-          ctx.messages,
-          abortController.signal,
+    let compacted: Awaited<
+      ReturnType<typeof ctx.contextWindowManager.maybeCompact>
+    > | null = null;
+    if (autoCompactAllowed) {
+      try {
+        compacted = (await runPipeline<CompactionArgs, CompactionResult>(
+          "compaction",
+          getMiddlewaresFor("compaction"),
+          (args) =>
+            defaultCompactionTerminal(args, buildPluginTurnContext(ctx, reqId)),
           {
-            lastCompactedAt: ctx.contextCompactedAt ?? undefined,
-            precomputedEstimate: compactCheck.estimatedTokens,
-            conversationOriginChannel:
-              getConversationOriginChannel(ctx.conversationId) ?? undefined,
+            messages: ctx.messages,
+            signal: abortController.signal,
+            options: {
+              lastCompactedAt: ctx.contextCompactedAt ?? undefined,
+              precomputedEstimate: compactCheck.estimatedTokens,
+              conversationOriginChannel:
+                getConversationOriginChannel(ctx.conversationId) ?? undefined,
+            },
           },
-        )
-      : null;
+          buildPluginTurnContext(ctx, reqId),
+          DEFAULT_TIMEOUTS.compaction,
+        )) as Awaited<ReturnType<typeof ctx.contextWindowManager.maybeCompact>>;
+      } catch (err) {
+        if (err instanceof PluginTimeoutError) {
+          // Pipeline exceeded its budget. Record the failure so the circuit
+          // breaker tracks consecutive timeouts (it trips after three),
+          // then degrade gracefully by skipping compaction this turn —
+          // the turn proceeds with the un-compacted history rather than
+          // hard-failing. The inner summary call has been aborted by the
+          // runner's signal-linking, so updateSummary's local fallback
+          // also ran before this catch block is reached.
+          rlog.warn(
+            { err, phase: "start-of-turn-compaction" },
+            "Compaction pipeline timed out — skipping compaction this turn",
+          );
+          await trackCompactionOutcome(ctx, true, onEvent);
+          compacted = null;
+        } else {
+          throw err;
+        }
+      }
+    }
     // Only track circuit-breaker state when a summary LLM call actually ran.
     // `summaryFailed` is `undefined` on early returns (compaction disabled,
     // below threshold, cooldown active, no eligible messages, truncation-only
     // path) — treating those as "successful" compactions would silently reset
     // the 3-strike counter and break the invariant.
     if (compacted && compacted.summaryFailed !== undefined) {
-      trackCompactionOutcome(ctx, compacted.summaryFailed, onEvent);
+      await trackCompactionOutcome(ctx, compacted.summaryFailed, onEvent);
     }
     if (compacted?.compacted) {
-      ctx.messages = compacted.messages;
-      ctx.contextCompactedMessageCount += compacted.compactedPersistedMessages;
-      ctx.contextCompactedAt = Date.now();
-      // Notify memory graph that compaction happened — triggers full context
-      // reload on the next turn to replenish lost memory context.
-      ctx.graphMemory.onCompacted(compacted.compactedPersistedMessages);
-      updateConversationContextWindow(
-        ctx.conversationId,
-        compacted.summaryText,
-        ctx.contextCompactedMessageCount,
-      );
-      // Fire auto-analysis on compaction so the reflective agent can
-      // crystallize anything worth remembering before the context window
-      // narrows further.
-      enqueueAutoAnalysisOnCompaction(
-        ctx.conversationId,
-        ctx.trustContext?.trustClass,
-      );
-      onEvent({
-        type: "context_compacted",
-        previousEstimatedInputTokens: compacted.previousEstimatedInputTokens,
-        estimatedInputTokens: compacted.estimatedInputTokens,
-        maxInputTokens: compacted.maxInputTokens,
-        thresholdTokens: compacted.thresholdTokens,
-        compactedMessages: compacted.compactedMessages,
-        summaryCalls: compacted.summaryCalls,
-        summaryInputTokens: compacted.summaryInputTokens,
-        summaryOutputTokens: compacted.summaryOutputTokens,
-        summaryModel: compacted.summaryModel,
-      });
-      emitUsage(
-        ctx,
-        compacted.summaryInputTokens,
-        compacted.summaryOutputTokens,
-        compacted.summaryModel,
-        onEvent,
-        "context_compactor",
-        reqId,
-        compacted.summaryCacheCreationInputTokens ?? 0,
-        compacted.summaryCacheReadInputTokens ?? 0,
-        collapseRawResponses(compacted.summaryRawResponses),
-      );
+      applyCompactionResult(ctx, compacted, onEvent, reqId);
       shouldInjectWorkspace = true;
       if (compacted.compactedPersistedMessages > 0) {
         compactedThisTurn = true;
@@ -711,21 +849,60 @@ export async function runAgentLoopImpl(
     let runMessages = ctx.messages;
-    // Memory graph retrieval — dispatches to context-load / per-turn based on
-    // conversation state. Keep the query vector around so the PKB reminder
-    // can reuse it for relevance-hint search (see `applyRuntimeInjections`).
+    // Memory retrieval pipeline — fetches PKB, NOW.md, and memory-graph
+    // outputs through a single `memoryRetrieval` pipeline. Plugins may
+    // replace the terminal behavior by registering a middleware that
+    // short-circuits with its own `MemoryResult`; the default terminal
+    // below runs `runDefaultMemoryRetrieval` which reproduces the prior
+    // in-lined behavior (PKB/NOW reads + gated graph call).
+    const isTrustedActor = resolveTrustClass(ctx.trustContext) === "guardian";
+    // Canonical builder — pulls trust from per-turn snapshot, then
+    // conversation-level, then the synthetic fallback. Memory retrieval
+    // does not need the context-window handle the builder attaches, but
+    // keeping every call site on one helper is load-bearing for log
+    // coherence across pipeline slots.
+    const memoryPluginTurnCtx = buildPluginTurnContext(ctx, reqId);
+    const memoryArgs: MemoryArgs = {
+      conversationId: ctx.conversationId,
+      trustContext: ctx.trustContext,
+      turnIndex: ctx.turnCount,
+      // Pass the abort signal via `args` (not `deps`) so the pipeline
+      // runner's `linkAbortSignal` can swap it for a signal linked to the
+      // pipeline's internal controller — on a plugin-set timeout or
+      // external cancel, the linked signal aborts and `prepareMemory`
+      // stops mutating graph state / emitting events after the pipeline
+      // has already errored.
+      signal: abortController.signal,
+    };
+    const memoryDeps: DefaultMemoryRetrievalDeps = {
+      messages: ctx.messages,
+      graphMemory: ctx.graphMemory,
+      config: getConfig(),
+      onEvent,
+      isTrustedActor,
+    };
+    const memoryResult: MemoryResult = await runPipeline(
+      "memoryRetrieval",
+      getMiddlewaresFor("memoryRetrieval"),
+      (args) => runDefaultMemoryRetrieval(args, memoryDeps),
+      memoryArgs,
+      memoryPluginTurnCtx,
+      DEFAULT_TIMEOUTS.memoryRetrieval,
+    );
+    // Consume the memory-graph block when the default retriever emitted
+    // one. Custom plugins that substitute their own blocks without the
+    // default discriminator are expected to handle their own side effects
+    // (event emission, metric persistence) inside their middleware; this
+    // block short-circuits to the original no-op behavior in that case.
+    const defaultGraphPayload: GraphMemoryPayload | null =
+      asDefaultGraphPayload(memoryResult.memoryGraphBlocks);
     let pkbQueryVector: number[] | undefined;
     let pkbSparseVector:
       | import("../memory/qdrant-client.js").QdrantSparseVector
       | undefined;
-    const isTrustedActor = resolveTrustClass(ctx.trustContext) === "guardian";
-    if (isTrustedActor) {
-      const graphResult = await ctx.graphMemory.prepareMemory(
-        ctx.messages,
-        getConfig(),
-        abortController.signal,
-        onEvent,
-      );
+    if (defaultGraphPayload) {
+      const graphResult = defaultGraphPayload.result;
       runMessages = graphResult.runMessages;
       // Select dense+sparse as a matched pair so RRF fusion combines two
       // signals aligned to the same query text:
@@ -746,12 +923,24 @@ export async function runAgentLoopImpl(
       // Persist the injected block text in message metadata so it survives
       // conversation reloads (eviction, restart, fork). loadFromDb re-injects
-      // from metadata.
+      // from metadata. Routed through the `persistence` pipeline so plugins
+      // can observe or override metadata updates alongside add/delete.
       if (graphResult.injectedBlockText) {
         try {
-          updateMessageMetadata(userMessageId, {
-            memoryInjectedBlock: graphResult.injectedBlockText,
-          });
+          await runPipeline<PersistArgs, PersistResult>(
+            "persistence",
+            getMiddlewaresFor("persistence"),
+            defaultPersistenceTerminal,
+            {
+              op: "update",
+              messageId: userMessageId,
+              updates: {
+                memoryInjectedBlock: graphResult.injectedBlockText,
+              },
+            },
+            buildPluginTurnContext(ctx, reqId),
+            DEFAULT_TIMEOUTS.persistence,
+          );
         } catch (err) {
           rlog.warn(
             { err },
@@ -933,11 +1122,13 @@ export async function runAgentLoopImpl(
     // Inject NOW.md and PKB content only on the first turn (or after
     // compaction re-strips them).  Old injections persist in history and
     // are never stripped on normal turns — this preserves the cached prefix.
-    const currentNowContent = readNowScratchpad();
+    // PKB/NOW content is sourced from the `memoryRetrieval` pipeline above
+    // so plugins can override either source without touching the agent loop.
+    const currentNowContent = memoryResult.nowContent;
     const shouldInjectNowAndPkb = isFirstMessage || compactedThisTurn;
     const nowScratchpad = shouldInjectNowAndPkb ? currentNowContent : null;
-    const currentPkbContent = readPkbContext();
+    const currentPkbContent = memoryResult.pkbContent;
     const pkbContext = shouldInjectNowAndPkb ? currentPkbContent : null;
     const pkbActive = currentPkbContent !== null;
@@ -1030,12 +1221,19 @@ export async function runAgentLoopImpl(
     let currentInjectionMode: InjectionMode = "full";
+    // Canonical per-turn TurnContext forwarded to the injector chain. The
+    // per-turn injection inputs are built inside `applyRuntimeInjections`
+    // from the `injectionOpts` bag; we only need to hand in identity +
+    // trust here so third-party injectors see the real turn metadata.
+    const injectionTurnCtx = buildPluginTurnContext(ctx, reqId);
     const injection = await applyRuntimeInjections(runMessages, {
       ...injectionOpts,
       slackChronologicalMessages: reducerCompacted
         ? null
         : injectionOpts.slackChronologicalMessages,
       mode: currentInjectionMode,
+      turnContext: injectionTurnCtx,
     });
     runMessages = injection.messages;
@@ -1043,11 +1241,14 @@ export async function runAgentLoopImpl(
     // reloads (eviction, restart, fork). loadFromDb re-injects from metadata.
     // Only the first call site persists — the overflow-recovery re-entry sites
     // send identical bytes and the tail row may not correspond to
-    // `userMessageId`. Both blocks are written in a single call to avoid
+    // `userMessageId`. All blocks are written in a single call to avoid
     // doubling SQLite SELECT+UPDATE work on every turn.
     if (
       injection.blocks.unifiedTurnContext ||
-      injection.blocks.pkbSystemReminder
+      injection.blocks.pkbSystemReminder ||
+      injection.blocks.workspaceBlock ||
+      injection.blocks.nowScratchpadBlock ||
+      injection.blocks.pkbContextBlock
     ) {
       try {
         const metadataUpdates: Record<string, unknown> = {};
@@ -1059,7 +1260,28 @@ export async function runAgentLoopImpl(
           metadataUpdates.pkbSystemReminderBlock =
             injection.blocks.pkbSystemReminder;
         }
-        updateMessageMetadata(userMessageId, metadataUpdates);
+        if (injection.blocks.workspaceBlock) {
+          metadataUpdates.workspaceBlock = injection.blocks.workspaceBlock;
+        }
+        if (injection.blocks.nowScratchpadBlock) {
+          metadataUpdates.nowScratchpadBlock =
+            injection.blocks.nowScratchpadBlock;
+        }
+        if (injection.blocks.pkbContextBlock) {
+          metadataUpdates.pkbContextBlock = injection.blocks.pkbContextBlock;
+        }
+        await runPipeline<PersistArgs, PersistResult>(
+          "persistence",
+          getMiddlewaresFor("persistence"),
+          defaultPersistenceTerminal,
+          {
+            op: "update",
+            messageId: userMessageId,
+            updates: metadataUpdates,
+          },
+          buildPluginTurnContext(ctx, reqId),
+          DEFAULT_TIMEOUTS.persistence,
+        );
       } catch (err) {
         rlog.warn({ err }, "Failed to persist injection metadata (non-fatal)");
       }
@@ -1082,18 +1304,51 @@ export async function runAgentLoopImpl(
     let reducerState: ReducerState | undefined;
     const toolTokenBudget = ctx.agentLoop.getToolTokenBudget(runMessages);
-    // Canonical calibration key used at every `estimatePromptTokens` site in
-    // this function. Matches the key recorded by `handleUsage` for wrapper
-    // providers (OpenRouter routing to Anthropic → key is `"anthropic"`).
+    // Canonical calibration key — passed to the `tokenEstimate` pipeline for
+    // every preflight/mid-loop estimate, the overflow reducer config, and the
+    // convergence-path `estimatePromptTokens` call. Matches the key recorded
+    // by `handleUsage` for wrapper providers (OpenRouter routing to
+    // Anthropic → key is `"anthropic"`).
     const estimationProviderName = getCalibrationProviderKey(ctx.provider);
-    const preflightTokens = estimatePromptTokens(
-      runMessages,
-      ctx.systemPrompt,
-      {
-        providerName: estimationProviderName,
-        toolTokenBudget,
-      },
-    );
+    // Shared `TurnContext` for every `tokenEstimate` pipeline invocation in
+    // this turn. The pipeline is the extension point for plugins that want
+    // to substitute an alternate estimator (e.g. provider-native tokenization)
+    // without touching orchestrator code.
+    //
+    // Routed through the canonical builder — `turnIndex` is `ctx.turnCount`,
+    // trust cascades through per-turn/conversation-level/fallback, and the
+    // context-window handle rides along so any middleware that wants to
+    // reuse the manager (e.g. to compute compaction-aware estimates) can.
+    const pipelineTurnCtx = buildPluginTurnContext(ctx, reqId);
+    const runTokenEstimatePipeline = (
+      history: Message[],
+    ): Promise<EstimateResult> =>
+      runPipeline<EstimateArgs, EstimateResult>(
+        "tokenEstimate",
+        getMiddlewaresFor("tokenEstimate"),
+        defaultTokenEstimateTerminal,
+        {
+          // Shallow-frozen copies so a misbehaving middleware that mutates
+          // `args.history` or `args.tools` in place (e.g. trims the array
+          // before calling next) can't silently strip prompt context from
+          // the orchestrator's live `runMessages` / resolved-tools arrays.
+          // TypeScript `readonly` on `EstimateArgs` does not prevent
+          // `push`/`splice` at runtime; the frozen wrapper throws in strict
+          // mode and isolates any mutation attempts from the call-site state.
+          history: Object.freeze([...history]) as Message[],
+          systemPrompt: ctx.systemPrompt,
+          tools: Object.freeze([
+            ...ctx.agentLoop.getResolvedTools(history),
+          ]) as ToolDefinition[],
+          providerName: estimationProviderName,
+        },
+        pipelineTurnCtx,
+        DEFAULT_TIMEOUTS.tokenEstimate,
+      );
+    const preflightTokens = await runTokenEstimatePipeline(runMessages);
     if (overflowRecovery.enabled && preflightTokens > preflightBudget) {
       rlog.warn(
@@ -1105,157 +1360,198 @@ export async function runAgentLoopImpl(
         "Preflight budget exceeded — running overflow reducer before provider call",
       );
-      reducerState = createInitialReducerState();
-      let preflightAttempts = 0;
-      while (
-        preflightAttempts < overflowRecovery.maxAttempts &&
-        !reducerState.exhausted
-      ) {
-        preflightAttempts++;
-        ctx.emitActivityState(
-          "thinking",
-          "context_compacting",
-          "assistant_turn",
-          reqId,
-        );
-        const step = await reduceContextOverflow(
-          ctx.messages,
-          {
-            providerName: estimationProviderName,
-            systemPrompt: ctx.systemPrompt,
-            contextWindow: config.llm.default.contextWindow,
-            targetTokens: preflightBudget,
-            toolTokenBudget,
-          },
-          reducerState,
-          (msgs, signal, opts) =>
-            ctx.contextWindowManager.maybeCompact(msgs, signal!, opts),
-          abortController.signal,
-        );
-        reducerState = step.state;
-        ctx.messages = step.messages;
-        currentInjectionMode = step.state.injectionMode;
-        // Track circuit-breaker state whenever the reducer invoked compaction.
-        // The reducer's forced_compaction tier uses force:true, so it bypasses
-        // the open-circuit check, but we still want failure tracking to detect
-        // a run of broken summaries and clear the counter on success. Only
-        // track when the summary LLM actually ran — `summaryFailed === undefined`
-        // indicates an early return (no eligible messages, truncation-only
-        // path, etc.) that shouldn't influence the breaker.
-        if (
-          step.compactionResult &&
-          step.compactionResult.summaryFailed !== undefined
-        ) {
-          trackCompactionOutcome(
-            ctx,
-            step.compactionResult.summaryFailed,
-            onEvent,
-          );
-        }
-        if (step.compactionResult?.compacted) {
-          ctx.contextCompactedMessageCount +=
-            step.compactionResult.compactedPersistedMessages;
-          ctx.contextCompactedAt = Date.now();
-          updateConversationContextWindow(
-            ctx.conversationId,
-            step.compactionResult.summaryText,
-            ctx.contextCompactedMessageCount,
-          );
-          // Fire auto-analysis on compaction — see forceCompact() for rationale.
-          enqueueAutoAnalysisOnCompaction(
-            ctx.conversationId,
-            ctx.trustContext?.trustClass,
-          );
-          onEvent({
-            type: "context_compacted",
-            previousEstimatedInputTokens:
-              step.compactionResult.previousEstimatedInputTokens,
-            estimatedInputTokens: step.compactionResult.estimatedInputTokens,
-            maxInputTokens: step.compactionResult.maxInputTokens,
-            thresholdTokens: step.compactionResult.thresholdTokens,
-            compactedMessages: step.compactionResult.compactedMessages,
-            summaryCalls: step.compactionResult.summaryCalls,
-            summaryInputTokens: step.compactionResult.summaryInputTokens,
-            summaryOutputTokens: step.compactionResult.summaryOutputTokens,
-            summaryModel: step.compactionResult.summaryModel,
-          });
-          emitUsage(
-            ctx,
-            step.compactionResult.summaryInputTokens,
-            step.compactionResult.summaryOutputTokens,
-            step.compactionResult.summaryModel,
-            onEvent,
-            "context_compactor",
+      // Overflow reduction runs through the plugin pipeline. The default
+      // middleware (`default-overflow-reduce`, registered at bootstrap)
+      // contains the historical tier loop — forced compaction → tool-result
+      // truncation → media stubbing → injection downgrade — plus the
+      // re-inject/re-estimate convergence check. The callbacks below are
+      // the orchestrator-specific side effects that the plugin coordinates
+      // per iteration (activity emission, compaction application, runtime
+      // injection reassembly, token re-estimation). Registered plugins that
+      // wrap the `overflowReduce` slot see each iteration through their own
+      // middleware `next` callback.
+      const overflowArgs: OverflowReduceArgs = {
+        messages: ctx.messages,
+        runMessages,
+        systemPrompt: ctx.systemPrompt,
+        providerName: estimationProviderName,
+        contextWindow: config.llm.default.contextWindow,
+        preflightBudget,
+        toolTokenBudget,
+        maxAttempts: overflowRecovery.maxAttempts,
+        abortSignal: abortController.signal,
+        compactFn: async (msgs, signal, opts) =>
+          // Route the reducer's forced-compaction tier through the
+          // `compaction` pipeline so registered plugins observe these
+          // invocations. Without this, custom compaction middleware only
+          // sees the three orchestrator-owned call sites and misses the
+          // reducer-initiated forced compactions entirely.
+          (await runPipeline<CompactionArgs, CompactionResult>(
+            "compaction",
+            getMiddlewaresFor("compaction"),
+            (args) =>
+              defaultCompactionTerminal(
+                args,
+                buildPluginTurnContext(ctx, reqId),
+              ),
+            {
+              messages: msgs,
+              signal,
+              options: opts,
+            },
+            buildPluginTurnContext(ctx, reqId),
+            DEFAULT_TIMEOUTS.compaction,
+          )) as Awaited<
+            ReturnType<typeof ctx.contextWindowManager.maybeCompact>
+          >,
+        emitActivityState: () => {
+          ctx.emitActivityState(
+            "thinking",
+            "context_compacting",
+            "assistant_turn",
             reqId,
-            step.compactionResult.summaryCacheCreationInputTokens ?? 0,
-            step.compactionResult.summaryCacheReadInputTokens ?? 0,
-            collapseRawResponses(step.compactionResult.summaryRawResponses),
-          );
-          ctx.graphMemory.onCompacted(
-            step.compactionResult.compactedPersistedMessages,
           );
-          shouldInjectWorkspace = true;
-          reducerCompacted = true;
-        }
-        // Re-inject with potentially downgraded injection mode.
-        // When compaction ran it strips existing NOW.md / PKB blocks, so we
-        // must re-inject the current content. Otherwise rely on the deduplicated
-        // value from injectionOpts to avoid duplicate injection.
-        const injection = await applyRuntimeInjections(ctx.messages, {
-          ...injectionOpts,
-          ...(step.compactionResult?.compacted && {
-            pkbContext: currentPkbContent,
-          }),
-          ...(step.compactionResult?.compacted && {
-            nowScratchpad: currentNowContent,
-          }),
-          workspaceTopLevelContext: shouldInjectWorkspace
-            ? ctx.workspaceTopLevelContext
-            : null,
-          // Once the reducer has compacted `ctx.messages`, the captured
-          // `slackChronologicalMessages` snapshot (built from the full
-          // persisted transcript) would overwrite the compacted history
-          // and undo compaction. Suppress the override from here on.
-          slackChronologicalMessages: reducerCompacted
-            ? null
-            : injectionOpts.slackChronologicalMessages,
-          mode: currentInjectionMode,
-        });
-        runMessages = injection.messages;
-        if (isTrustedActor && currentInjectionMode !== "minimal") {
-          const memResult = ctx.graphMemory.reinjectCachedMemory(runMessages);
-          runMessages = memResult.runMessages;
-        }
-        // Re-estimate with injections included — step.estimatedTokens was
-        // computed on bare history (ctx.messages) and doesn't account for
-        // tokens added by runtime injections.
-        const postInjectionTokens = estimatePromptTokens(
-          runMessages,
-          ctx.systemPrompt,
-          {
+        },
+        onCompactionResult: async (result) => {
+          // Track circuit-breaker state whenever the reducer invoked
+          // compaction. The reducer's forced_compaction tier uses
+          // force:true, so it bypasses the open-circuit check, but we
+          // still want failure tracking to detect a run of broken
+          // summaries and clear the counter on success. Only track when
+          // the summary LLM actually ran — `summaryFailed === undefined`
+          // indicates an early return (no eligible messages,
+          // truncation-only path, etc.) that shouldn't influence the
+          // breaker.
+          if (result.summaryFailed !== undefined) {
+            await trackCompactionOutcome(ctx, result.summaryFailed, onEvent);
+          }
+          if (result.compacted) {
+            applyCompactionResult(ctx, result, onEvent, reqId);
+            shouldInjectWorkspace = true;
+          }
+        },
+        reinjectForMode: async (
+          reducedMessages,
+          mode,
+          stepCompacted,
+          accumulatedCompacted,
+        ) => {
+          // Mirror the pre-PR-23 behavior: `ctx.messages` must track the
+          // reducer's latest output before re-injection runs, because other
+          // sites consulted through `injectionOpts` (`workspaceTopLevelContext`,
+          // slack history, etc.) depend on it and `applyCompactionResult`
+          // only updates `ctx.messages` on a compaction tier. Assigning here
+          // keeps non-compaction tiers (tool-result truncation, media
+          // stubbing, injection downgrade) observable to downstream
+          // injection assembly on the same turn.
+          ctx.messages = reducedMessages;
+          // When THIS iteration compacted, it stripped existing NOW.md /
+          // PKB blocks — so we re-inject current content. A later iteration
+          // that only truncates or downgrades must NOT re-force PKB/NOW,
+          // or each round would grow the token count. Matches the
+          // pre-PR-23 per-iteration `step.compactionResult?.compacted` gate.
+          const injection = await applyRuntimeInjections(reducedMessages, {
+            ...injectionOpts,
+            ...(stepCompacted && { pkbContext: currentPkbContent }),
+            ...(stepCompacted && { nowScratchpad: currentNowContent }),
+            workspaceTopLevelContext: shouldInjectWorkspace
+              ? ctx.workspaceTopLevelContext
+              : null,
+            // Once ANY iteration has compacted `ctx.messages`, the captured
+            // `slackChronologicalMessages` snapshot (built from the full
+            // persisted transcript) would overwrite the compacted history
+            // and undo compaction. Suppress the override from here on —
+            // sticky across subsequent non-compacting iterations.
+            slackChronologicalMessages: accumulatedCompacted
+              ? null
+              : injectionOpts.slackChronologicalMessages,
+            mode,
+            turnContext: buildPluginTurnContext(ctx, reqId),
+          });
+          let next = injection.messages;
+          if (isTrustedActor && mode !== "minimal") {
+            const memResult = ctx.graphMemory.reinjectCachedMemory(next);
+            next = memResult.runMessages;
+          }
+          return next;
+        },
+        estimatePostInjection: (runMsgs) =>
+          estimatePromptTokens(runMsgs, ctx.systemPrompt, {
             providerName: estimationProviderName,
             toolTokenBudget,
-          },
-        );
+          }),
+      };
+      const overflowResult = await runPipeline<
+        OverflowReduceArgs,
+        OverflowReduceResult
+      >(
+        "overflowReduce",
+        getMiddlewaresFor("overflowReduce"),
+        // Terminal — only reached when every registered middleware calls
+        // `next` and delegates past the innermost layer. The default plugin
+        // is a terminal itself (it doesn't call `next`), so in practice
+        // this fallback fires only when the default has been explicitly
+        // deregistered (tests) and no user plugin replaces it. Strict-fail
+        // semantics: throw so the missing terminal surfaces as a visible
+        // error instead of silently returning the history untouched.
+        async () => {
+          throw new PluginExecutionError(
+            "overflowReduce pipeline has no terminal handler — every reducer middleware called next() without providing a replacement",
+            "overflowReduce",
+          );
+        },
+        overflowArgs,
+        buildPluginTurnContext(ctx, reqId),
+        DEFAULT_TIMEOUTS.overflowReduce,
+      );
-        if (postInjectionTokens <= preflightBudget) break;
+      ctx.messages = overflowResult.messages;
+      runMessages = overflowResult.runMessages;
+      currentInjectionMode = overflowResult.injectionMode;
+      reducerState = overflowResult.reducerState;
+      if (overflowResult.reducerCompacted) {
+        reducerCompacted = true;
       }
     }
-    // Pre-run repair
+    // Pre-run repair — routed through the `historyRepair` plugin pipeline so
+    // plugins can observe or override repair behavior. The default plugin's
+    // middleware is a passthrough; the actual repair runs in the terminal
+    // (`defaultHistoryRepairTerminal`).
     let preRepairMessages = runMessages;
-    const preRunRepair = repairHistory(runMessages);
+    let preRunRepair: HistoryRepairResult | null = null;
+    try {
+      preRunRepair = await runPipeline<HistoryRepairArgs, HistoryRepairResult>(
+        "historyRepair",
+        getMiddlewaresFor("historyRepair"),
+        async (args) => defaultHistoryRepairTerminal(args),
+        { history: runMessages, provider: ctx.provider.name },
+        buildPluginTurnContext(ctx, reqId),
+        DEFAULT_TIMEOUTS.historyRepair,
+      );
+    } catch (err) {
+      if (err instanceof PluginTimeoutError) {
+        // Pipeline exceeded its budget — likely a misbehaving third-party
+        // middleware. Degrade gracefully by proceeding with the un-repaired
+        // history rather than turn-fatal-erroring; un-repaired history is
+        // strictly better than no turn at all, and the provider call itself
+        // will still error visibly if the drift is unrecoverable.
+        rlog.warn(
+          { err, phase: "pre_run" },
+          "historyRepair pipeline timed out — proceeding with un-repaired history",
+        );
+      } else {
+        throw err;
+      }
+    }
     if (
-      preRunRepair.stats.assistantToolResultsMigrated > 0 ||
-      preRunRepair.stats.missingToolResultsInserted > 0 ||
-      preRunRepair.stats.orphanToolResultsDowngraded > 0 ||
-      preRunRepair.stats.consecutiveSameRoleMerged > 0
+      preRunRepair !== null &&
+      (preRunRepair.stats.assistantToolResultsMigrated > 0 ||
+        preRunRepair.stats.missingToolResultsInserted > 0 ||
+        preRunRepair.stats.orphanToolResultsDowngraded > 0 ||
+        preRunRepair.stats.consecutiveSameRoleMerged > 0)
     ) {
       rlog.warn(
         { phase: "pre_run", ...preRunRepair.stats },
@@ -1299,7 +1595,9 @@ export async function runAgentLoopImpl(
     let yieldedForBudget = false;
-    const onCheckpoint = (checkpoint: CheckpointInfo): CheckpointDecision => {
+    const onCheckpoint = async (
+      checkpoint: CheckpointInfo,
+    ): Promise<CheckpointDecision> => {
       state.currentTurnToolNames = [];
       if (ctx.canHandoffAtCheckpoint()) {
@@ -1312,14 +1610,7 @@ export async function runAgentLoopImpl(
       // conversation-agent-loop run compaction before the provider rejects.
       if (overflowRecovery.enabled) {
         const midLoopThreshold = preflightBudget * 0.85;
-        const estimated = estimatePromptTokens(
-          checkpoint.history,
-          ctx.systemPrompt,
-          {
-            providerName: estimationProviderName,
-            toolTokenBudget,
-          },
-        );
+        const estimated = await runTokenEstimatePipeline(checkpoint.history);
         if (estimated > midLoopThreshold) {
           rlog.warn(
             { phase: "mid-loop", estimated, threshold: midLoopThreshold },
@@ -1335,10 +1626,16 @@ export async function runAgentLoopImpl(
     turnStarted = true;
-    let denyCompressionMessage: Message | null = null;
     rlog.info({ callSite: turnCallSite }, "Starting agent loop run");
+    // Thread the orchestrator's canonical per-turn context into the agent
+    // loop so its internal pipeline invocations (llmCall, emptyResponse,
+    // toolError, toolResultTruncate, toolExecute) see the real
+    // conversation identity / trust / contextWindowManager instead of the
+    // synthesized `"agent-loop"` placeholder. The loop clones this value
+    // and overwrites `turnIndex` with its own tool-use iteration counter.
+    const loopTurnCtx = buildPluginTurnContext(ctx, reqId);
     let updatedHistory = await ctx.agentLoop.run(
       runMessages,
       eventHandler,
@@ -1346,6 +1643,7 @@ export async function runAgentLoopImpl(
       reqId,
       onCheckpoint,
       turnCallSite,
+      loopTurnCtx,
     );
     rlog.info(
@@ -1379,11 +1677,11 @@ export async function runAgentLoopImpl(
       const rawHistory = stripInjectionsForCompaction(updatedHistory);
       ctx.messages = rawHistory;
       try {
-        clearPkbSystemReminderMetadataForConversation(ctx.conversationId);
+        clearStrippedInjectionMetadataForConversation(ctx.conversationId);
       } catch (err) {
         rlog.warn(
           { err },
-          "Failed to clear pkbSystemReminderBlock metadata after compaction strip (non-fatal)",
+          "Failed to clear stripped-injection metadata after compaction strip (non-fatal)",
         );
       }
@@ -1394,65 +1692,61 @@ export async function runAgentLoopImpl(
         reqId,
         "Compacting context",
       );
-      const midLoopCompact = await ctx.contextWindowManager.maybeCompact(
-        ctx.messages,
-        abortController.signal,
-        {
-          lastCompactedAt: ctx.contextCompactedAt ?? undefined,
-          force: true,
-          targetInputTokensOverride: preflightBudget,
-          conversationOriginChannel:
-            getConversationOriginChannel(ctx.conversationId) ?? undefined,
-        },
-      );
+      let midLoopCompact: Awaited<
+        ReturnType<typeof ctx.contextWindowManager.maybeCompact>
+      >;
+      try {
+        midLoopCompact = (await runPipeline<CompactionArgs, CompactionResult>(
+          "compaction",
+          getMiddlewaresFor("compaction"),
+          (args) =>
+            defaultCompactionTerminal(args, buildPluginTurnContext(ctx, reqId)),
+          {
+            messages: ctx.messages,
+            signal: abortController.signal,
+            options: {
+              lastCompactedAt: ctx.contextCompactedAt ?? undefined,
+              force: true,
+              targetInputTokensOverride: preflightBudget,
+              conversationOriginChannel:
+                getConversationOriginChannel(ctx.conversationId) ?? undefined,
+            },
+          },
+          buildPluginTurnContext(ctx, reqId),
+          DEFAULT_TIMEOUTS.compaction,
+        )) as Awaited<ReturnType<typeof ctx.contextWindowManager.maybeCompact>>;
+      } catch (err) {
+        if (err instanceof PluginTimeoutError) {
+          // Mid-loop compaction timed out. Record the failure for the
+          // circuit breaker and escalate to the convergence loop's more
+          // aggressive reducer tiers (tool-result truncation, media
+          // stubbing, injection downgrade) by flipping the overflow flag
+          // and breaking out of the mid-loop retry. The existing
+          // "exhausted all attempts" block further down handles the
+          // escalation.
+          rlog.warn(
+            { err, phase: "mid-loop-compact" },
+            "Compaction pipeline timed out — escalating to convergence loop",
+          );
+          await trackCompactionOutcome(ctx, true, onEvent);
+          state.contextTooLargeDetected = true;
+          break;
+        }
+        throw err;
+      }
       // `force: true` bypasses the cooldown/threshold gates but early returns
       // for "no eligible messages" / "insufficient messages" still leave
       // `summaryFailed` undefined. Only track when the summary LLM actually ran.
       if (midLoopCompact.summaryFailed !== undefined) {
-        trackCompactionOutcome(ctx, midLoopCompact.summaryFailed, onEvent);
-      }
-      if (midLoopCompact.compacted) {
-        ctx.messages = midLoopCompact.messages;
-        reducerCompacted = true;
-        ctx.contextCompactedMessageCount +=
-          midLoopCompact.compactedPersistedMessages;
-        ctx.contextCompactedAt = Date.now();
-        updateConversationContextWindow(
-          ctx.conversationId,
-          midLoopCompact.summaryText,
-          ctx.contextCompactedMessageCount,
-        );
-        // Fire auto-analysis on compaction — see forceCompact() for rationale.
-        enqueueAutoAnalysisOnCompaction(
-          ctx.conversationId,
-          ctx.trustContext?.trustClass,
-        );
-        onEvent({
-          type: "context_compacted",
-          previousEstimatedInputTokens:
-            midLoopCompact.previousEstimatedInputTokens,
-          estimatedInputTokens: midLoopCompact.estimatedInputTokens,
-          maxInputTokens: midLoopCompact.maxInputTokens,
-          thresholdTokens: midLoopCompact.thresholdTokens,
-          compactedMessages: midLoopCompact.compactedMessages,
-          summaryCalls: midLoopCompact.summaryCalls,
-          summaryInputTokens: midLoopCompact.summaryInputTokens,
-          summaryOutputTokens: midLoopCompact.summaryOutputTokens,
-          summaryModel: midLoopCompact.summaryModel,
-        });
-        emitUsage(
+        await trackCompactionOutcome(
           ctx,
-          midLoopCompact.summaryInputTokens,
-          midLoopCompact.summaryOutputTokens,
-          midLoopCompact.summaryModel,
+          midLoopCompact.summaryFailed,
           onEvent,
-          "context_compactor",
-          reqId,
-          midLoopCompact.summaryCacheCreationInputTokens ?? 0,
-          midLoopCompact.summaryCacheReadInputTokens ?? 0,
-          collapseRawResponses(midLoopCompact.summaryRawResponses),
         );
-        ctx.graphMemory.onCompacted(midLoopCompact.compactedPersistedMessages);
+      }
+      if (midLoopCompact.compacted) {
+        applyCompactionResult(ctx, midLoopCompact, onEvent, reqId);
+        reducerCompacted = true;
         shouldInjectWorkspace = true;
       }
@@ -1474,6 +1768,7 @@ export async function runAgentLoopImpl(
           ? null
           : injectionOpts.slackChronologicalMessages,
         mode: currentInjectionMode,
+        turnContext: buildPluginTurnContext(ctx, reqId),
       });
       runMessages = injection.messages;
       if (isTrustedActor && currentInjectionMode !== "minimal") {
@@ -1497,6 +1792,7 @@ export async function runAgentLoopImpl(
         reqId,
         onCheckpoint,
         turnCallSite,
+        loopTurnCtx,
       );
     }
@@ -1526,6 +1822,15 @@ export async function runAgentLoopImpl(
         { phase: "retry" },
         "Provider ordering error detected, attempting one-shot deep-repair retry",
       );
+      // Design note: deep-repair intentionally bypasses the `historyRepair`
+      // plugin pipeline. Deep-repair is a recovery-only path triggered by a
+      // provider ordering error — it must be deterministic and unaffected by
+      // user middleware that might have caused (or be unable to recover from)
+      // the original drift. Plugins can already observe / override the
+      // pre-run repair via the `historyRepair` pipeline above; widening that
+      // surface to deep-repair is intentionally deferred until there's a
+      // concrete plugin-level use case. Do not route this call through
+      // `runPipeline` without first revisiting that contract.
       const retryRepair = deepRepairHistory(runMessages);
       runMessages = retryRepair.messages;
       const retryStrip = stripHistoricalWebSearchResults(runMessages);
@@ -1542,6 +1847,7 @@ export async function runAgentLoopImpl(
         reqId,
         onCheckpoint,
         turnCallSite,
+        loopTurnCtx,
       );
       if (state.orderingErrorDetected) {
@@ -1555,8 +1861,7 @@ export async function runAgentLoopImpl(
     // ── Bounded context overflow convergence loop ──────────────────
     // When the provider rejects with context-too-large, iterate through
     // reducer tiers (forced compaction, tool-result truncation, media
-    // stubbing, injection downgrade) with optional approval gating for
-    // interactive latest-turn compression.
+    // stubbing, injection downgrade).
     //
     // When progress was made (agent added messages before hitting the
     // limit), incorporate those new messages into ctx.messages so the
@@ -1572,11 +1877,11 @@ export async function runAgentLoopImpl(
       if (updatedHistory.length > preRunHistoryLength) {
         ctx.messages = stripInjectionsForCompaction(updatedHistory);
         try {
-          clearPkbSystemReminderMetadataForConversation(ctx.conversationId);
+          clearStrippedInjectionMetadataForConversation(ctx.conversationId);
         } catch (err) {
           rlog.warn(
             { err },
-            "Failed to clear pkbSystemReminderBlock metadata after compaction strip (non-fatal)",
+            "Failed to clear stripped-injection metadata after compaction strip (non-fatal)",
           );
         }
         convergenceStripped = true;
@@ -1675,7 +1980,7 @@ export async function runAgentLoopImpl(
           step.compactionResult &&
           step.compactionResult.summaryFailed !== undefined
         ) {
-          trackCompactionOutcome(
+          await trackCompactionOutcome(
             ctx,
             step.compactionResult.summaryFailed,
             onEvent,
@@ -1683,47 +1988,7 @@ export async function runAgentLoopImpl(
         }
         if (step.compactionResult?.compacted) {
-          ctx.contextCompactedMessageCount +=
-            step.compactionResult.compactedPersistedMessages;
-          ctx.contextCompactedAt = Date.now();
-          updateConversationContextWindow(
-            ctx.conversationId,
-            step.compactionResult.summaryText,
-            ctx.contextCompactedMessageCount,
-          );
-          // Fire auto-analysis on compaction — see forceCompact() for rationale.
-          enqueueAutoAnalysisOnCompaction(
-            ctx.conversationId,
-            ctx.trustContext?.trustClass,
-          );
-          onEvent({
-            type: "context_compacted",
-            previousEstimatedInputTokens:
-              step.compactionResult.previousEstimatedInputTokens,
-            estimatedInputTokens: step.compactionResult.estimatedInputTokens,
-            maxInputTokens: step.compactionResult.maxInputTokens,
-            thresholdTokens: step.compactionResult.thresholdTokens,
-            compactedMessages: step.compactionResult.compactedMessages,
-            summaryCalls: step.compactionResult.summaryCalls,
-            summaryInputTokens: step.compactionResult.summaryInputTokens,
-            summaryOutputTokens: step.compactionResult.summaryOutputTokens,
-            summaryModel: step.compactionResult.summaryModel,
-          });
-          emitUsage(
-            ctx,
-            step.compactionResult.summaryInputTokens,
-            step.compactionResult.summaryOutputTokens,
-            step.compactionResult.summaryModel,
-            onEvent,
-            "context_compactor",
-            reqId,
-            step.compactionResult.summaryCacheCreationInputTokens ?? 0,
-            step.compactionResult.summaryCacheReadInputTokens ?? 0,
-            collapseRawResponses(step.compactionResult.summaryRawResponses),
-          );
-          ctx.graphMemory.onCompacted(
-            step.compactionResult.compactedPersistedMessages,
-          );
+          applyCompactionResult(ctx, step.compactionResult, onEvent, reqId);
           shouldInjectWorkspace = true;
           reducerCompacted = true;
         }
@@ -1742,6 +2007,7 @@ export async function runAgentLoopImpl(
             ? null
             : injectionOpts.slackChronologicalMessages,
           mode: currentInjectionMode,
+          turnContext: buildPluginTurnContext(ctx, reqId),
         });
         runMessages = injection.messages;
         if (isTrustedActor && currentInjectionMode !== "minimal") {
@@ -1767,6 +2033,7 @@ export async function runAgentLoopImpl(
           reqId,
           onCheckpoint,
           turnCallSite,
+          loopTurnCtx,
         );
         // If the rerun still yields at checkpoint, the turn is still
@@ -1789,11 +2056,11 @@ export async function runAgentLoopImpl(
           if (updatedHistory.length > preRunHistoryLength) {
             ctx.messages = stripInjectionsForCompaction(updatedHistory);
             try {
-              clearPkbSystemReminderMetadataForConversation(ctx.conversationId);
+              clearStrippedInjectionMetadataForConversation(ctx.conversationId);
             } catch (err) {
               rlog.warn(
                 { err },
-                "Failed to clear pkbSystemReminderBlock metadata after compaction strip (non-fatal)",
+                "Failed to clear stripped-injection metadata after compaction strip (non-fatal)",
               );
             }
             convergenceStripped = true;
@@ -1805,231 +2072,83 @@ export async function runAgentLoopImpl(
       // All reducer tiers exhausted but provider still rejects —
       // consult the overflow policy for latest-turn compression.
-      // Emergency compaction is deferred to the policy-gated paths below
-      // so that `request_user_approval` sessions collect consent first.
+      // The policy either auto-compresses the latest turn or falls
+      // through to the final graceful-error fallback below.
       if (state.contextTooLargeDetected) {
         const action = resolveOverflowAction({
           overflowRecovery,
           isInteractive: isInteractiveResolved,
         });
-        if (action === "request_user_approval") {
-          const approval = await requestCompressionApproval(ctx.prompter, {
-            signal: abortController.signal,
-          });
-          if (approval.approved) {
-            // User approved — force emergency compaction with aggressive settings
-            const emergencyCompact =
-              await ctx.contextWindowManager.maybeCompact(
-                ctx.messages,
-                abortController.signal,
-                {
+        if (action === "auto_compress_latest_turn") {
+          // Auto-compress without asking — users opt out via the "drop" policy.
+          ctx.emitActivityState(
+            "thinking",
+            "context_compacting",
+            "assistant_turn",
+            reqId,
+          );
+          let emergencyCompact: Awaited<
+            ReturnType<typeof ctx.contextWindowManager.maybeCompact>
+          > | null = null;
+          try {
+            emergencyCompact = (await runPipeline<
+              CompactionArgs,
+              CompactionResult
+            >(
+              "compaction",
+              getMiddlewaresFor("compaction"),
+              (args) =>
+                defaultCompactionTerminal(
+                  args,
+                  buildPluginTurnContext(ctx, reqId),
+                ),
+              {
+                messages: ctx.messages,
+                signal: abortController.signal,
+                options: {
                   lastCompactedAt: ctx.contextCompactedAt ?? undefined,
                   force: true,
                   minKeepRecentUserTurns: 0,
                   targetInputTokensOverride: correctedTarget,
                 },
+              },
+              buildPluginTurnContext(ctx, reqId),
+              DEFAULT_TIMEOUTS.compaction,
+            )) as Awaited<
+              ReturnType<typeof ctx.contextWindowManager.maybeCompact>
+            >;
+          } catch (err) {
+            if (err instanceof PluginTimeoutError) {
+              // Emergency compaction timed out. Record the circuit-breaker
+              // failure and fall through to the graceful-error path below
+              // (the unsuccessful-compaction fallback) rather than hard-
+              // failing the turn.
+              rlog.warn(
+                { err, phase: "emergency-compaction" },
+                "Emergency compaction pipeline timed out — continuing with overflow fallback",
               );
-            // Only track when the summary LLM actually ran; `force: true`
-            // bypasses the cooldown but not the early-return paths.
-            if (emergencyCompact.summaryFailed !== undefined) {
-              trackCompactionOutcome(
-                ctx,
-                emergencyCompact.summaryFailed,
-                onEvent,
-              );
-            }
-            if (emergencyCompact.compacted) {
-              ctx.messages = emergencyCompact.messages;
-              reducerCompacted = true;
-              ctx.contextCompactedMessageCount +=
-                emergencyCompact.compactedPersistedMessages;
-              ctx.contextCompactedAt = Date.now();
-              updateConversationContextWindow(
-                ctx.conversationId,
-                emergencyCompact.summaryText,
-                ctx.contextCompactedMessageCount,
-              );
-              // Fire auto-analysis on compaction — see forceCompact() for rationale.
-              enqueueAutoAnalysisOnCompaction(
-                ctx.conversationId,
-                ctx.trustContext?.trustClass,
-              );
-              onEvent({
-                type: "context_compacted",
-                previousEstimatedInputTokens:
-                  emergencyCompact.previousEstimatedInputTokens,
-                estimatedInputTokens: emergencyCompact.estimatedInputTokens,
-                maxInputTokens: emergencyCompact.maxInputTokens,
-                thresholdTokens: emergencyCompact.thresholdTokens,
-                compactedMessages: emergencyCompact.compactedMessages,
-                summaryCalls: emergencyCompact.summaryCalls,
-                summaryInputTokens: emergencyCompact.summaryInputTokens,
-                summaryOutputTokens: emergencyCompact.summaryOutputTokens,
-                summaryModel: emergencyCompact.summaryModel,
-              });
-              emitUsage(
-                ctx,
-                emergencyCompact.summaryInputTokens,
-                emergencyCompact.summaryOutputTokens,
-                emergencyCompact.summaryModel,
-                onEvent,
-                "context_compactor",
-                reqId,
-                emergencyCompact.summaryCacheCreationInputTokens ?? 0,
-                emergencyCompact.summaryCacheReadInputTokens ?? 0,
-                collapseRawResponses(emergencyCompact.summaryRawResponses),
-              );
-              ctx.graphMemory.onCompacted(
-                emergencyCompact.compactedPersistedMessages,
-              );
-              shouldInjectWorkspace = true;
-            }
-            // Only re-inject NOW.md when ctx.messages was actually stripped;
-            // otherwise the existing block is still present.
-            const injection = await applyRuntimeInjections(ctx.messages, {
-              ...injectionOpts,
-              pkbContext: currentPkbContent,
-              nowScratchpad: convergenceStripped ? currentNowContent : null,
-              workspaceTopLevelContext: shouldInjectWorkspace
-                ? ctx.workspaceTopLevelContext
-                : null,
-              slackChronologicalMessages: reducerCompacted
-                ? null
-                : injectionOpts.slackChronologicalMessages,
-              mode: currentInjectionMode,
-            });
-            runMessages = injection.messages;
-            if (isTrustedActor && currentInjectionMode !== "minimal") {
-              ctx.graphMemory.retrackCachedNodes();
-            }
-            const emergencyStrip = stripHistoricalWebSearchResults(runMessages);
-            if (emergencyStrip.stats.blocksStripped > 0) {
-              rlog.info(
-                { phase: "emergency_compact", ...emergencyStrip.stats },
-                "Converted historical web_search_tool_result blocks to text summaries",
-              );
-              runMessages = emergencyStrip.messages;
+              await trackCompactionOutcome(ctx, true, onEvent);
+              emergencyCompact = null;
+            } else {
+              throw err;
             }
-            preRepairMessages = runMessages;
-            preRunHistoryLength = runMessages.length;
-            state.contextTooLargeDetected = false;
-            updatedHistory = await ctx.agentLoop.run(
-              runMessages,
-              eventHandler,
-              abortController.signal,
-              reqId,
-              onCheckpoint,
-              turnCallSite,
-            );
-          } else {
-            // User denied compression — emit a graceful assistant explanation
-            // instead of a conversation_error, and end the turn cleanly.
-            state.contextTooLargeDetected = false;
-            const denyText =
-              "The conversation has grown too long for the model to process, " +
-              "and compression was declined. Please start a new conversation " +
-              "or manually shorten the conversation to continue.";
-            const loopChannelMeta = {
-              ...provenanceFromTrustContext(ctx.trustContext),
-              userMessageChannel: capturedTurnChannelContext.userMessageChannel,
-              assistantMessageChannel:
-                capturedTurnChannelContext.assistantMessageChannel,
-              userMessageInterface:
-                capturedTurnInterfaceContext.userMessageInterface,
-              assistantMessageInterface:
-                capturedTurnInterfaceContext.assistantMessageInterface,
-            };
-            const denyMessage = createAssistantMessage(denyText);
-            await addMessage(
-              ctx.conversationId,
-              "assistant",
-              JSON.stringify(denyMessage.content),
-              loopChannelMeta,
-            );
-            denyCompressionMessage = denyMessage;
-            onEvent({
-              type: "assistant_text_delta",
-              text: denyText,
-              conversationId: ctx.conversationId,
-            });
-            // Prevent the final error fallback from firing
-            state.providerErrorUserMessage = null;
           }
-        } else if (action === "auto_compress_latest_turn") {
-          // Non-interactive — auto-compress without asking
-          ctx.emitActivityState(
-            "thinking",
-            "context_compacting",
-            "assistant_turn",
-            reqId,
-          );
-          const emergencyCompact = await ctx.contextWindowManager.maybeCompact(
-            ctx.messages,
-            abortController.signal,
-            {
-              lastCompactedAt: ctx.contextCompactedAt ?? undefined,
-              force: true,
-              minKeepRecentUserTurns: 0,
-              targetInputTokensOverride: correctedTarget,
-            },
-          );
           // Only track when the summary LLM actually ran; `force: true`
           // bypasses the cooldown but not the early-return paths.
-          if (emergencyCompact.summaryFailed !== undefined) {
-            trackCompactionOutcome(
+          if (
+            emergencyCompact &&
+            emergencyCompact.summaryFailed !== undefined
+          ) {
+            await trackCompactionOutcome(
               ctx,
               emergencyCompact.summaryFailed,
               onEvent,
             );
           }
-          if (emergencyCompact.compacted) {
-            ctx.messages = emergencyCompact.messages;
+          if (emergencyCompact?.compacted) {
+            applyCompactionResult(ctx, emergencyCompact, onEvent, reqId);
             reducerCompacted = true;
-            ctx.contextCompactedMessageCount +=
-              emergencyCompact.compactedPersistedMessages;
-            ctx.contextCompactedAt = Date.now();
-            updateConversationContextWindow(
-              ctx.conversationId,
-              emergencyCompact.summaryText,
-              ctx.contextCompactedMessageCount,
-            );
-            // Fire auto-analysis on compaction — see forceCompact() for rationale.
-            enqueueAutoAnalysisOnCompaction(
-              ctx.conversationId,
-              ctx.trustContext?.trustClass,
-            );
-            onEvent({
-              type: "context_compacted",
-              previousEstimatedInputTokens:
-                emergencyCompact.previousEstimatedInputTokens,
-              estimatedInputTokens: emergencyCompact.estimatedInputTokens,
-              maxInputTokens: emergencyCompact.maxInputTokens,
-              thresholdTokens: emergencyCompact.thresholdTokens,
-              compactedMessages: emergencyCompact.compactedMessages,
-              summaryCalls: emergencyCompact.summaryCalls,
-              summaryInputTokens: emergencyCompact.summaryInputTokens,
-              summaryOutputTokens: emergencyCompact.summaryOutputTokens,
-              summaryModel: emergencyCompact.summaryModel,
-            });
-            emitUsage(
-              ctx,
-              emergencyCompact.summaryInputTokens,
-              emergencyCompact.summaryOutputTokens,
-              emergencyCompact.summaryModel,
-              onEvent,
-              "context_compactor",
-              reqId,
-              emergencyCompact.summaryCacheCreationInputTokens ?? 0,
-              emergencyCompact.summaryCacheReadInputTokens ?? 0,
-              collapseRawResponses(emergencyCompact.summaryRawResponses),
-            );
-            ctx.graphMemory.onCompacted(
-              emergencyCompact.compactedPersistedMessages,
-            );
             shouldInjectWorkspace = true;
           }
@@ -2046,6 +2165,7 @@ export async function runAgentLoopImpl(
               ? null
               : injectionOpts.slackChronologicalMessages,
             mode: currentInjectionMode,
+            turnContext: buildPluginTurnContext(ctx, reqId),
           });
           runMessages = injection.messages;
           if (isTrustedActor && currentInjectionMode !== "minimal") {
@@ -2070,6 +2190,7 @@ export async function runAgentLoopImpl(
             reqId,
             onCheckpoint,
             turnCallSite,
+            loopTurnCtx,
           );
         }
         // action === "fail_gracefully" falls through to the final error below
@@ -2134,11 +2255,19 @@ export async function runAgentLoopImpl(
         assistantMessageInterface:
           capturedTurnInterfaceContext.assistantMessageInterface,
       };
-      await addMessage(
-        ctx.conversationId,
-        "user",
-        JSON.stringify(toolResultBlocks),
-        toolResultMetadata,
+      await runPipeline<PersistArgs, PersistResult>(
+        "persistence",
+        getMiddlewaresFor("persistence"),
+        defaultPersistenceTerminal,
+        {
+          op: "add",
+          conversationId: ctx.conversationId,
+          role: "user",
+          content: JSON.stringify(toolResultBlocks),
+          metadata: toolResultMetadata,
+        },
+        buildPluginTurnContext(ctx, reqId),
+        DEFAULT_TIMEOUTS.persistence,
       );
       state.pendingToolResults.clear();
     }
@@ -2151,10 +2280,6 @@ export async function runAgentLoopImpl(
       return { ...msg, content: cleanedBlocks };
     });
-    if (denyCompressionMessage) {
-      newMessages.push(denyCompressionMessage);
-    }
     const hasAssistantResponse = newMessages.some(
       (msg) => msg.role === "assistant",
     );
@@ -2176,11 +2301,19 @@ export async function runAgentLoopImpl(
       const errorAssistantMessage = createAssistantMessage(
         state.providerErrorUserMessage,
       );
-      await addMessage(
-        ctx.conversationId,
-        "assistant",
-        JSON.stringify(errorAssistantMessage.content),
-        errChannelMeta,
+      await runPipeline<PersistArgs, PersistResult>(
+        "persistence",
+        getMiddlewaresFor("persistence"),
+        defaultPersistenceTerminal,
+        {
+          op: "add",
+          conversationId: ctx.conversationId,
+          role: "assistant",
+          content: JSON.stringify(errorAssistantMessage.content),
+          metadata: errChannelMeta,
+        },
+        buildPluginTurnContext(ctx, reqId),
+        DEFAULT_TIMEOUTS.persistence,
       );
       newMessages.push(errorAssistantMessage);
       // Do NOT send assistant_text_delta here — handleProviderError already
@@ -2248,10 +2381,6 @@ export async function runAgentLoopImpl(
       },
     );
-    void getHookManager().trigger("post-message", {
-      conversationId: ctx.conversationId,
-    });
     const syncLastAssistantMessageToDisk = (): void => {
       if (!state.lastAssistantMessageId) return;
       const convForDisk = getConversation(ctx.conversationId);
@@ -2368,13 +2497,65 @@ export async function runAgentLoopImpl(
             ? { messageId: state.lastAssistantMessageId }
             : {}),
         });
+        // Emit a home-feed event for background/scheduled conversation completions.
+        // Scoped to message_complete only (not cancelled/handoff), wrapped in
+        // try-catch so malformed message content can never propagate errors.
+        try {
+          const conv = getConversation(ctx.conversationId);
+          if (
+            conv &&
+            (conv.conversationType === "background" ||
+              conv.conversationType === "scheduled")
+          ) {
+            const lastMsg = state.lastAssistantMessageId
+              ? getMessageById(state.lastAssistantMessageId, ctx.conversationId)
+              : undefined;
+            let summary: string;
+            if (lastMsg) {
+              const parsed: unknown = JSON.parse(lastMsg.content);
+              if (typeof parsed === "string") {
+                summary = parsed.slice(0, 200);
+              } else if (Array.isArray(parsed)) {
+                const textBlock = parsed.find(
+                  (b: { type?: string }) => b.type === "text",
+                );
+                summary =
+                  typeof textBlock?.text === "string"
+                    ? textBlock.text.slice(0, 200)
+                    : (conv.title ?? "Background task completed.");
+              } else {
+                summary = conv.title ?? "Background task completed.";
+              }
+            } else {
+              summary = conv.title ?? "Background task completed.";
+            }
+            void emitFeedEvent({
+              source: "assistant",
+              title: conv.title ?? "Background Task",
+              summary,
+              dedupKey: `bg-conv:${ctx.conversationId}`,
+            }).catch((err) => {
+              log.warn(
+                { err, conversationId: ctx.conversationId },
+                "Failed to emit background conversation feed event",
+              );
+            });
+          }
+        } catch (feedErr) {
+          log.warn(
+            { err: feedErr, conversationId: ctx.conversationId },
+            "Failed to build home-feed event for background conversation",
+          );
+        }
       }
     }
     // Second title pass: after 3 completed turns, re-generate the title
     // using the last 3 messages for better context. Only fires when the
-    // current title was auto-generated (isAutoTitle = 1).
-    if (ctx.turnCount === 2) {
+    // current title was auto-generated (isAutoTitle = 1) and the user
+    // has not opted out via `conversations.skipAutoRetitling`.
+    if (ctx.turnCount === 2 && !getConfig().conversations.skipAutoRetitling) {
       // turnCount is 0-indexed, incremented in finally; 2 = about to become 3rd turn
       queueRegenerateConversationTitle({
         conversationId: ctx.conversationId,
@@ -2427,12 +2608,6 @@ export async function runAgentLoopImpl(
       });
       onEvent({ type: "error", message: classified.userMessage });
       onEvent(buildConversationErrorMessage(ctx.conversationId, classified));
-      void getHookManager().trigger("on-error", {
-        error: err instanceof Error ? err.name : "Error",
-        message,
-        stack: err instanceof Error ? err.stack : undefined,
-        conversationId: ctx.conversationId,
-      });
     }
   } finally {
     if (turnStarted) {
@@ -2542,7 +2717,133 @@ function emitUsage(
   );
 }
-function collapseRawResponses(rawResponses?: unknown[]): unknown | undefined {
+/**
+ * Minimal context shape consumed by `applyCompactionResult`. Both
+ * `AgentLoopConversationContext` and `Conversation` satisfy this via structural
+ * typing, so the helper can back both the 5 agent-loop auto-compaction sites
+ * and the single `forceCompact` user-initiated site.
+ */
+export interface CompactionApplyContext {
+  readonly conversationId: string;
+  messages: Message[];
+  contextCompactedMessageCount: number;
+  contextCompactedAt: number | null;
+  readonly graphMemory: ConversationGraphMemory;
+  readonly provider: Provider;
+  usageStats: UsageStats;
+  trustContext?: TrustContext;
+}
+/**
+ * Applies a successful `ContextWindowResult` to a conversation: updates the
+ * in-memory message buffer and compaction counters, notifies the graph memory
+ * and conversation-summary store, enqueues auto-analysis, emits the
+ * `context_compacted` event, and records a `context_compactor` usage event.
+ *
+ * The emitted `usage_update` intentionally omits `contextWindow` — the
+ * `context_compacted` event already carries the fresh
+ * `estimatedInputTokens` / `maxInputTokens` and is the single source of
+ * truth for the UI indicator after compaction. Emitting both caused a
+ * redundant SwiftUI invalidation on every compaction.
+ */
+export function applyCompactionResult(
+  ctx: CompactionApplyContext,
+  result: {
+    messages: Message[];
+    compactedPersistedMessages: number;
+    previousEstimatedInputTokens: number;
+    estimatedInputTokens: number;
+    maxInputTokens: number;
+    thresholdTokens: number;
+    compactedMessages: number;
+    summaryCalls: number;
+    summaryInputTokens: number;
+    summaryOutputTokens: number;
+    summaryModel: string;
+    summaryText: string;
+    summaryCacheCreationInputTokens?: number;
+    summaryCacheReadInputTokens?: number;
+    summaryRawResponses?: unknown[];
+  },
+  onEvent: (msg: ServerMessage) => void,
+  reqId: string | null,
+): void {
+  ctx.messages = result.messages;
+  ctx.contextCompactedMessageCount += result.compactedPersistedMessages;
+  ctx.contextCompactedAt = Date.now();
+  ctx.graphMemory.onCompacted(result.compactedPersistedMessages);
+  updateConversationContextWindow(
+    ctx.conversationId,
+    result.summaryText,
+    ctx.contextCompactedMessageCount,
+  );
+  enqueueAutoAnalysisOnCompaction(
+    ctx.conversationId,
+    ctx.trustContext?.trustClass,
+  );
+  const summarySignals = computeSummaryQualitySignals(result.summaryText);
+  onEvent({
+    type: "context_compacted",
+    conversationId: ctx.conversationId,
+    previousEstimatedInputTokens: result.previousEstimatedInputTokens,
+    estimatedInputTokens: result.estimatedInputTokens,
+    maxInputTokens: result.maxInputTokens,
+    thresholdTokens: result.thresholdTokens,
+    compactedMessages: result.compactedMessages,
+    summaryCalls: result.summaryCalls,
+    summaryInputTokens: result.summaryInputTokens,
+    summaryOutputTokens: result.summaryOutputTokens,
+    summaryModel: result.summaryModel,
+    summaryCharCount: summarySignals.charCount,
+    summaryHeaderCount: summarySignals.headerCount,
+    summaryHadMemoryEcho: summarySignals.hadMemoryEcho,
+  });
+  emitUsage(
+    ctx,
+    result.summaryInputTokens,
+    result.summaryOutputTokens,
+    result.summaryModel,
+    onEvent,
+    "context_compactor",
+    reqId,
+    result.summaryCacheCreationInputTokens ?? 0,
+    result.summaryCacheReadInputTokens ?? 0,
+    collapseRawResponses(result.summaryRawResponses),
+    undefined /* providerName */,
+    1 /* llmCallCount */,
+  );
+}
+export function collapseRawResponses(
+  rawResponses?: unknown[],
+): unknown | undefined {
   if (!rawResponses || rawResponses.length === 0) return undefined;
   return rawResponses.length === 1 ? rawResponses[0] : rawResponses;
 }
+/**
+ * Matches any runtime-injection tag that should never appear inside a
+ * generated summary. If the regex hits, either the compaction strip logic
+ * failed to drop an injected block from the summarizer input, or the
+ * summarizer invented tag-like text on its own — both are quality bugs
+ * worth surfacing via telemetry.
+ */
+const SUMMARY_MEMORY_ECHO_PATTERN =
+  /<(?:memory|memory_context|memory_image|turn_context|workspace|workspace_top_level|knowledge_base|pkb|system_reminder|now_scratchpad|NOW\.md|active_thread|active_subagents|active_workspace|active_dynamic_page|channel_capabilities|transport_hints|system_notice|non_interactive_context|temporal_context|guardian_context|inbound_actor_context|channel_turn_context|interface_turn_context|channel_command_context|voice_call_control)\b/i;
+/**
+ * Compute light-weight quality signals for a compaction summary. Emitted
+ * on every `context_compacted` event so regressions (short outputs,
+ * header collapse, memory-injection leakage) are visible without having
+ * to read the summary text from the DB.
+ */
+export function computeSummaryQualitySignals(summaryText: string): {
+  charCount: number;
+  headerCount: number;
+  hadMemoryEcho: boolean;
+} {
+  const charCount = summaryText.length;
+  const headerCount = (summaryText.match(/^## /gm) ?? []).length;
+  const hadMemoryEcho = SUMMARY_MEMORY_ECHO_PATTERN.test(summaryText);
+  return { charCount, headerCount, hadMemoryEcho };
+}