@vellumai/assistant 0.8.1 → 0.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +2 -7
- package/Dockerfile +75 -1
- package/bun.lock +11 -1
- package/docker-entrypoint.sh +5 -0
- package/docker-init-apt-root.sh +94 -0
- package/docker-kata-apt-env.sh +39 -0
- package/docs/plugins.md +88 -47
- package/docs/skills.md +9 -7
- package/examples/plugins/echo/README.md +27 -27
- package/examples/plugins/echo/package.json +3 -0
- package/examples/plugins/echo/register.ts +31 -31
- package/node_modules/@vellumai/slack-text/src/index.test.ts +114 -14
- package/node_modules/@vellumai/slack-text/src/index.ts +82 -18
- package/openapi.yaml +325 -3
- package/package.json +3 -1
- package/scripts/generate-openapi.ts +83 -10
- package/scripts/sync-llm-catalog.ts +2 -2
- package/scripts/sync-web-search-catalog.ts +47 -25
- package/src/__tests__/agent-image-optimize.test.ts +11 -3
- package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +131 -0
- package/src/__tests__/anthropic-provider.test.ts +45 -0
- package/src/__tests__/app-builder-tool-scripts.test.ts +9 -3
- package/src/__tests__/app-executors.test.ts +220 -4
- package/src/__tests__/auto-analysis-end-to-end.test.ts +35 -0
- package/src/__tests__/bundled-asset.test.ts +6 -6
- package/src/__tests__/channel-availability-routes.test.ts +206 -0
- package/src/__tests__/channel-delivery-store.test.ts +289 -1
- package/src/__tests__/circuit-breaker-pipeline.test.ts +0 -1
- package/src/__tests__/clawhub.test.ts +75 -16
- package/src/__tests__/compactor-tail-resolution.test.ts +41 -0
- package/src/__tests__/config-schema.test.ts +21 -0
- package/src/__tests__/config-set-route.test.ts +80 -0
- package/src/__tests__/config-sounds-sync.test.ts +97 -0
- package/src/__tests__/config-watcher-skill-reseed.test.ts +453 -0
- package/src/__tests__/context-search-conversations-source.test.ts +117 -2
- package/src/__tests__/context-search-memory-v2-source.test.ts +0 -1
- package/src/__tests__/context-search-workspace-source.test.ts +7 -0
- package/src/__tests__/context-token-estimator.test.ts +1 -0
- package/src/__tests__/conversation-abort-tool-results.test.ts +4 -1
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +1 -0
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +92 -92
- package/src/__tests__/conversation-agent-loop.test.ts +2 -0
- package/src/__tests__/conversation-error.test.ts +42 -3
- package/src/__tests__/conversation-fork-crud.test.ts +82 -0
- package/src/__tests__/conversation-inference-profile-route.test.ts +40 -4
- package/src/__tests__/conversation-lifecycle.test.ts +173 -0
- package/src/__tests__/conversation-message-sync-tags.test.ts +97 -0
- package/src/__tests__/conversation-pairing.test.ts +54 -0
- package/src/__tests__/conversation-process-callsite.test.ts +4 -1
- package/src/__tests__/conversation-provider-retry-repair.test.ts +5 -1
- package/src/__tests__/conversation-queue.test.ts +4 -1
- package/src/__tests__/conversation-runtime-assembly.test.ts +76 -9
- package/src/__tests__/conversation-slash-queue.test.ts +59 -1
- package/src/__tests__/conversation-slash-unknown.test.ts +4 -1
- package/src/__tests__/conversation-surfaces-table-action.test.ts +360 -0
- package/src/__tests__/conversation-sync-tags.test.ts +235 -0
- package/src/__tests__/conversation-workspace-injection.test.ts +5 -1
- package/src/__tests__/conversation-workspace-tool-tracking.test.ts +5 -1
- package/src/__tests__/credential-security-invariants.test.ts +3 -2
- package/src/__tests__/db-slack-external-content-normalization.test.ts +301 -0
- package/src/__tests__/delete-managed-skill-tool.test.ts +55 -13
- package/src/__tests__/disk-pressure-tools.test.ts +1 -0
- package/src/__tests__/dm-backfill.test.ts +121 -10
- package/src/__tests__/document-tool-security.test.ts +258 -0
- package/src/__tests__/dynamic-skill-workflow-prompt.test.ts +0 -1
- package/src/__tests__/edit-propagation.test.ts +33 -0
- package/src/__tests__/empty-response-pipeline.test.ts +0 -4
- package/src/__tests__/external-plugin-loader.test.ts +60 -36
- package/src/__tests__/filing-service.test.ts +140 -0
- package/src/__tests__/get-skill-detail-audit.test.ts +0 -4
- package/src/__tests__/handlers-skills-memory-v2-reseed.test.ts +43 -62
- package/src/__tests__/helpers/tar-fixtures.ts +39 -0
- package/src/__tests__/helpers/wait-for.ts +21 -0
- package/src/__tests__/history-repair-pipeline.test.ts +0 -3
- package/src/__tests__/history-repair.test.ts +73 -0
- package/src/__tests__/host-app-control-proxy.test.ts +266 -10
- package/src/__tests__/image-credentials.test.ts +1 -1
- package/src/__tests__/inbound-slack-persistence.test.ts +2 -0
- package/src/__tests__/inference-no-mode-boot-e2e.test.ts +1 -1
- package/src/__tests__/inference-profile-reaper.test.ts +4 -2
- package/src/__tests__/inference-profile-session-handler.test.ts +18 -6
- package/src/__tests__/inference-profile-session-ipc.test.ts +17 -5
- package/src/__tests__/injector-chain.test.ts +10 -8
- package/src/__tests__/install-skill-routing.test.ts +155 -37
- package/src/__tests__/lifecycle-memory-v2-seed.test.ts +92 -3
- package/src/__tests__/list-messages-page-latest.test.ts +55 -0
- package/src/__tests__/llm-call-pipeline.test.ts +0 -3
- package/src/__tests__/llm-catalog-parity.test.ts +55 -13
- package/src/__tests__/llm-request-log-source-clickhouse.test.ts +34 -0
- package/src/__tests__/llm-request-log-source-factory.test.ts +29 -53
- package/src/__tests__/llm-usage-store.test.ts +114 -0
- package/src/__tests__/managed-profile-guard.test.ts +31 -29
- package/src/__tests__/managed-skill-lifecycle.test.ts +109 -18
- package/src/__tests__/managed-store.test.ts +84 -192
- package/src/__tests__/media-generate-image.test.ts +1 -1
- package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -2
- package/src/__tests__/messages-after-tiebreaker.test.ts +122 -0
- package/src/__tests__/oauth-commands-routes.test.ts +168 -16
- package/src/__tests__/oauth-provider-profiles.test.ts +9 -0
- package/src/__tests__/openai-provider.test.ts +24 -0
- package/src/__tests__/openai-responses-cutover-guard.test.ts +17 -9
- package/src/__tests__/overflow-reduce-pipeline.test.ts +0 -2
- package/src/__tests__/persistence-pipeline.test.ts +0 -2
- package/src/__tests__/{managed-proxy-context.test.ts → platform-proxy-context.test.ts} +1 -1
- package/src/__tests__/platform.test.ts +2 -0
- package/src/__tests__/plugin-api-shim.test.ts +125 -0
- package/src/__tests__/plugin-bootstrap.test.ts +10 -36
- package/src/__tests__/plugin-external-api.test.ts +68 -0
- package/src/__tests__/plugin-registry.test.ts +0 -77
- package/src/__tests__/plugin-route-contribution.test.ts +0 -1
- package/src/__tests__/plugin-skill-contribution.test.ts +0 -2
- package/src/__tests__/plugin-tool-contribution.test.ts +16 -15
- package/src/__tests__/plugin-types.test.ts +3 -13
- package/src/__tests__/process-message-background-slack.test.ts +8 -1
- package/src/__tests__/process-message-display-content.test.ts +421 -0
- package/src/__tests__/provider-catalog-visibility.test.ts +142 -0
- package/src/__tests__/provider-error-scenarios.test.ts +111 -0
- package/src/__tests__/{provider-managed-proxy-integration.test.ts → provider-platform-proxy-integration.test.ts} +8 -8
- package/src/__tests__/scaffold-managed-skill-tool.test.ts +65 -13
- package/src/__tests__/schedule-routes.test.ts +50 -3
- package/src/__tests__/schedule-store.test.ts +94 -0
- package/src/__tests__/scheduler-reuse-conversation.test.ts +54 -7
- package/src/__tests__/schema-transforms.test.ts +20 -0
- package/src/__tests__/search-skills-unified.test.ts +0 -5
- package/src/__tests__/server-history-render.test.ts +43 -0
- package/src/__tests__/skill-load-feature-flag.test.ts +0 -12
- package/src/__tests__/skill-load-tool.test.ts +27 -89
- package/src/__tests__/skill-memory.test.ts +23 -3
- package/src/__tests__/skills-file-content-endpoint.test.ts +9 -38
- package/src/__tests__/skills-files-catalog-fallback.test.ts +0 -3
- package/src/__tests__/skills-install-extract.test.ts +49 -38
- package/src/__tests__/skills-install-staging.test.ts +159 -0
- package/src/__tests__/skills-uninstall.test.ts +9 -41
- package/src/__tests__/skills.test.ts +51 -58
- package/src/__tests__/slack-channel-config.test.ts +9 -0
- package/src/__tests__/subagent-tool-filtering.test.ts +50 -0
- package/src/__tests__/system-prompt.test.ts +737 -63
- package/src/__tests__/terminal-tools.test.ts +28 -1
- package/src/__tests__/thread-backfill.test.ts +557 -27
- package/src/__tests__/title-generate-pipeline.test.ts +0 -13
- package/src/__tests__/token-estimate-pipeline.test.ts +0 -3
- package/src/__tests__/tool-error-pipeline.test.ts +0 -3
- package/src/__tests__/tool-execute-pipeline.test.ts +0 -5
- package/src/__tests__/tool-executor-lifecycle-events.test.ts +1 -1
- package/src/__tests__/tool-executor.test.ts +16 -4
- package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -12
- package/src/__tests__/turn-events-store.test.ts +256 -0
- package/src/__tests__/twilio-routes.test.ts +4 -0
- package/src/__tests__/user-plugin-loader.test.ts +0 -7
- package/src/__tests__/voice-session-bridge.test.ts +198 -0
- package/src/__tests__/web-search-catalog-parity.test.ts +32 -10
- package/src/__tests__/workspace-migration-057-repair-stale-gemini-model-ids.test.ts +115 -3
- package/src/__tests__/workspace-migration-072-seed-reply-suggestion-callsite.test.ts +50 -0
- package/src/__tests__/workspace-migration-073-repair-recall-callsite-empty-profile.test.ts +153 -0
- package/src/__tests__/workspace-migration-085-memory-v2-bm25-b-reembed-disabled-v2-pages.test.ts +220 -0
- package/src/__tests__/workspace-migration-086-revert-stale-gemini-mis-rewrites.test.ts +269 -0
- package/src/__tests__/workspace-migration-remove-legacy-skills-index.test.ts +309 -0
- package/src/__tests__/workspace-migrations-runner.test.ts +111 -3
- package/src/acp/resolve-agent.ts +1 -1
- package/src/agent/image-optimize.ts +13 -5
- package/src/calls/voice-session-bridge.ts +61 -42
- package/src/channels/types.ts +108 -0
- package/src/cli/__tests__/unknown-command.test.ts +24 -0
- package/src/cli/commands/__tests__/changelog.test.ts +304 -319
- package/src/cli/commands/__tests__/schedules.test.ts +491 -0
- package/src/cli/commands/changelog.ts +106 -42
- package/src/cli/commands/conversations.ts +102 -17
- package/src/cli/commands/default-action.ts +10 -53
- package/src/cli/commands/notifications.ts +329 -317
- package/src/cli/commands/plugins.ts +185 -0
- package/src/cli/commands/schedules.ts +391 -0
- package/src/cli/commands/telemetry.ts +40 -0
- package/src/cli/lib/__tests__/cli-colors.test.ts +48 -0
- package/src/cli/lib/__tests__/confirm-prompt.test.ts +159 -0
- package/src/cli/lib/__tests__/install-from-github.test.ts +355 -0
- package/src/cli/lib/__tests__/list-installed-plugins.test.ts +154 -0
- package/src/cli/lib/__tests__/uninstall-plugin.test.ts +124 -0
- package/src/cli/lib/__tests__/unknown-command.test.ts +106 -0
- package/src/cli/lib/cli-colors.ts +12 -0
- package/src/cli/lib/confirm-prompt.ts +79 -0
- package/src/cli/lib/install-from-github.ts +304 -0
- package/src/cli/lib/list-installed-plugins.ts +137 -0
- package/src/cli/lib/uninstall-plugin.ts +82 -0
- package/src/cli/lib/unknown-command.ts +111 -0
- package/src/cli/program.ts +38 -2
- package/src/config/bundled-skills/app-builder/SKILL.md +23 -21
- package/src/config/bundled-skills/app-builder/TOOLS.json +7 -0
- package/src/config/bundled-skills/computer-use/TOOLS.json +15 -52
- package/src/config/bundled-skills/document/SKILL.md +23 -3
- package/src/config/bundled-skills/document/TOOLS.json +53 -0
- package/src/config/bundled-skills/document/tools/document-delete.ts +12 -0
- package/src/config/bundled-skills/document/tools/document-list.ts +12 -0
- package/src/config/bundled-skills/document/tools/document-read.ts +12 -0
- package/src/config/bundled-skills/skill-management/SKILL.md +2 -2
- package/src/config/bundled-skills/skill-management/TOOLS.json +7 -7
- package/src/config/bundled-tool-registry.ts +6 -0
- package/src/config/feature-flag-registry.json +41 -1
- package/src/config/loader.ts +64 -38
- package/src/config/schema.ts +7 -10
- package/src/config/schemas/__tests__/llm-request-logs.test.ts +36 -0
- package/src/config/schemas/channels.ts +8 -0
- package/src/config/schemas/compaction.ts +28 -0
- package/src/config/schemas/heartbeat.ts +9 -0
- package/src/config/schemas/llm-request-logs.ts +31 -7
- package/src/config/schemas/llm.ts +3 -0
- package/src/config/schemas/memory-retrieval.ts +18 -0
- package/src/config/schemas/tools.ts +14 -0
- package/src/config/skills.ts +3 -96
- package/src/context/compactor.ts +1047 -0
- package/src/context/token-estimator.ts +2 -2
- package/src/context/window-manager.ts +197 -1520
- package/src/credential-execution/managed-catalog.ts +37 -0
- package/src/credential-health/credential-health-service.ts +280 -19
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +34 -0
- package/src/daemon/__tests__/conversation-tool-setup-exclude.test.ts +138 -0
- package/src/daemon/__tests__/conversation-tool-setup.test.ts +74 -0
- package/src/daemon/approval-generators.ts +8 -6
- package/src/daemon/config-watcher.ts +94 -31
- package/src/daemon/conversation-agent-loop.ts +169 -9
- package/src/daemon/conversation-error.ts +171 -37
- package/src/daemon/conversation-lifecycle.ts +53 -40
- package/src/daemon/conversation-messaging.ts +25 -6
- package/src/daemon/conversation-process.ts +49 -12
- package/src/daemon/conversation-runtime-assembly.ts +16 -1
- package/src/daemon/conversation-slash.ts +12 -5
- package/src/daemon/conversation-store.ts +11 -4
- package/src/daemon/conversation-tool-setup.ts +39 -7
- package/src/daemon/conversation.ts +33 -1
- package/src/daemon/external-plugins-bootstrap.ts +217 -181
- package/src/daemon/first-greeting.ts +22 -2
- package/src/daemon/handlers/config-model.ts +6 -5
- package/src/daemon/handlers/config-slack-channel.ts +15 -3
- package/src/daemon/handlers/shared.ts +14 -5
- package/src/daemon/handlers/skills.ts +111 -108
- package/src/daemon/history-repair.ts +28 -1
- package/src/daemon/host-app-control-proxy.ts +98 -23
- package/src/daemon/lifecycle.ts +45 -35
- package/src/daemon/meet-host-supervisor.ts +5 -4
- package/src/daemon/memory-v2-startup.ts +49 -0
- package/src/daemon/message-protocol.ts +1 -0
- package/src/daemon/message-types/conversations.ts +25 -0
- package/src/daemon/message-types/messages.ts +61 -0
- package/src/daemon/message-types/subagents.ts +1 -0
- package/src/daemon/message-types/sync.ts +1 -0
- package/src/daemon/pkb-reminder-builder.test.ts +1 -1
- package/src/daemon/pkb-reminder-builder.ts +1 -1
- package/src/daemon/plugin-source-watcher.ts +146 -0
- package/src/daemon/process-message.ts +21 -3
- package/src/daemon/server.ts +11 -2
- package/src/daemon/skill-memory-refresh.ts +29 -0
- package/src/documents/document-store.ts +221 -3
- package/src/embedded/plugin-api.ts +40 -0
- package/src/filing/filing-service.ts +39 -0
- package/src/heartbeat/__tests__/heartbeat-service.test.ts +91 -6
- package/src/heartbeat/heartbeat-run-store.ts +2 -1
- package/src/heartbeat/heartbeat-service.ts +41 -0
- package/src/home/__tests__/feed-types.test.ts +40 -0
- package/src/home/feed-types.ts +22 -0
- package/src/home/post-connect-feed.ts +1 -0
- package/src/index.ts +18 -1
- package/src/live-voice/__tests__/live-voice-stt.test.ts +57 -0
- package/src/mcp/client.ts +20 -4
- package/src/media/image-credentials.ts +3 -3
- package/src/memory/__tests__/bookmark-crud.test.ts +33 -27
- package/src/memory/__tests__/conversation-queries.test.ts +263 -0
- package/src/memory/__tests__/jobs-worker-v2-graph-trigger-embed.test.ts +113 -0
- package/src/memory/__tests__/memory-retrospective-startup-cleanup.test.ts +119 -14
- package/src/memory/__tests__/message-content.test.ts +35 -0
- package/src/memory/bookmark-crud.ts +42 -10
- package/src/memory/context-search/sources/conversations.ts +62 -2
- package/src/memory/context-search/sources/workspace.ts +4 -0
- package/src/memory/conversation-crud.ts +63 -19
- package/src/memory/conversation-queries.ts +110 -10
- package/src/memory/db-init.ts +6 -0
- package/src/memory/delivery-crud.ts +152 -5
- package/src/memory/embedding-backend.ts +4 -4
- package/src/memory/external-conversation-store.ts +66 -5
- package/src/memory/graph/__tests__/conversation-graph-memory-v2-routing.test.ts +66 -9
- package/src/memory/graph/conversation-graph-memory.ts +31 -15
- package/src/memory/graph/tools.ts +3 -3
- package/src/memory/indexer.ts +34 -29
- package/src/memory/jobs/__tests__/embed-concept-page.test.ts +73 -0
- package/src/memory/jobs/embed-concept-page.ts +20 -11
- package/src/memory/jobs-worker.ts +6 -1
- package/src/memory/llm-request-log-source-clickhouse.ts +17 -10
- package/src/memory/llm-request-log-source.ts +19 -52
- package/src/memory/llm-usage-store.ts +125 -5
- package/src/memory/memory-retrospective-startup-cleanup.ts +72 -5
- package/src/memory/message-content.ts +1 -1
- package/src/memory/migrations/109-external-conversation-bindings.ts +15 -4
- package/src/memory/migrations/229-delete-private-conversations.test.ts +38 -1
- package/src/memory/migrations/229-delete-private-conversations.ts +7 -0
- package/src/memory/migrations/247-external-conversation-binding-thread-id.ts +78 -0
- package/src/memory/migrations/248-create-onboarding-events.ts +21 -0
- package/src/memory/migrations/249-normalize-slack-external-content.ts +240 -0
- package/src/memory/migrations/index.ts +6 -0
- package/src/memory/migrations/registry.ts +8 -0
- package/src/memory/onboarding-events-store.ts +106 -0
- package/src/memory/schema/bookmarks.ts +0 -2
- package/src/memory/schema/calls.ts +1 -0
- package/src/memory/schema/inference.ts +1 -3
- package/src/memory/schema/infrastructure.ts +12 -0
- package/src/memory/turn-events-store.ts +127 -2
- package/src/memory/v2/__tests__/activation.test.ts +0 -8
- package/src/memory/v2/__tests__/injection.test.ts +98 -8
- package/src/memory/v2/__tests__/migration.test.ts +87 -0
- package/src/memory/v2/__tests__/page-index.test.ts +83 -0
- package/src/memory/v2/__tests__/prompts-router.test.ts +58 -6
- package/src/memory/v2/__tests__/qdrant.test.ts +66 -3
- package/src/memory/v2/__tests__/router.test.ts +15 -0
- package/src/memory/v2/__tests__/skill-store.test.ts +387 -8
- package/src/memory/v2/injection.ts +32 -6
- package/src/memory/v2/migration.ts +49 -19
- package/src/memory/v2/page-index.ts +35 -5
- package/src/memory/v2/prompts/router.ts +11 -8
- package/src/memory/v2/prompts/sweep.ts +2 -2
- package/src/memory/v2/qdrant.ts +135 -7
- package/src/memory/v2/router.ts +9 -8
- package/src/memory/v2/skill-store.ts +120 -35
- package/src/messaging/providers/slack/__tests__/adapter-token-routing.test.ts +45 -5
- package/src/messaging/providers/slack/__tests__/download.test.ts +231 -0
- package/src/messaging/providers/slack/adapter.ts +43 -5
- package/src/messaging/providers/slack/client.ts +27 -0
- package/src/messaging/providers/slack/deep-link.ts +65 -0
- package/src/messaging/providers/slack/download.ts +104 -0
- package/src/messaging/providers/slack/message-metadata.test.ts +32 -0
- package/src/messaging/providers/slack/message-metadata.ts +27 -0
- package/src/messaging/providers/slack/render-transcript.test.ts +134 -0
- package/src/messaging/providers/slack/render-transcript.ts +69 -5
- package/src/messaging/providers/slack/types.ts +20 -1
- package/src/notifications/conversation-pairing.ts +2 -1
- package/src/notifications/decision-engine.ts +2 -1
- package/src/notifications/emit-signal.ts +20 -1
- package/src/notifications/home-feed-side-effect.ts +54 -0
- package/src/notifications/signal.ts +3 -1
- package/src/oauth/connection-resolver.ts +8 -4
- package/src/oauth/platform-connection.ts +6 -2
- package/src/oauth/seed-providers.ts +10 -1
- package/src/permissions/checker.ts +2 -0
- package/src/permissions/ipc-risk-types.ts +1 -0
- package/src/permissions/question-prompter.test.ts +416 -0
- package/src/permissions/question-prompter.ts +294 -0
- package/src/platform/client.test.ts +1 -1
- package/src/platform/client.ts +1 -1
- package/src/plugin-api/constants.ts +26 -0
- package/src/plugin-api/index.ts +34 -1
- package/src/plugin-api/types.ts +104 -22
- package/src/plugins/defaults/circuit-breaker.ts +0 -5
- package/src/plugins/defaults/compaction.ts +0 -4
- package/src/plugins/defaults/empty-response.ts +0 -2
- package/src/plugins/defaults/history-repair.ts +0 -2
- package/src/plugins/defaults/injectors.ts +36 -3
- package/src/plugins/defaults/llm-call.ts +0 -2
- package/src/plugins/defaults/memory-retrieval.ts +0 -1
- package/src/plugins/defaults/overflow-reduce.ts +0 -1
- package/src/plugins/defaults/persistence.ts +0 -2
- package/src/plugins/defaults/title-generate.ts +0 -5
- package/src/plugins/defaults/token-estimate.ts +0 -2
- package/src/plugins/defaults/tool-error.ts +0 -7
- package/src/plugins/defaults/tool-execute.ts +0 -2
- package/src/plugins/defaults/tool-result-truncate.ts +0 -4
- package/src/plugins/ensure-plugin-api-shim.ts +96 -0
- package/src/plugins/external-api.ts +104 -0
- package/src/plugins/external-plugin-loader.ts +105 -32
- package/src/plugins/feature-gate.ts +22 -0
- package/src/plugins/pipeline.ts +37 -0
- package/src/plugins/registry.ts +48 -80
- package/src/plugins/types.ts +31 -26
- package/src/plugins/user-loader.ts +21 -2
- package/src/proactive-artifact/aux-message-injector.ts +11 -0
- package/src/proactive-artifact/job.test.ts +37 -5
- package/src/prompts/__tests__/system-prompt.test.ts +12 -0
- package/src/prompts/__tests__/task-progress-hint-section.test.ts +99 -0
- package/src/prompts/normalize-onboarding.ts +27 -0
- package/src/prompts/sections.ts +302 -0
- package/src/prompts/system-prompt.ts +63 -166
- package/src/prompts/templates/BOOTSTRAP.md +17 -1
- package/src/prompts/templates/system-sections.ts +173 -0
- package/src/providers/__tests__/inference.test.ts +22 -7
- package/src/providers/anthropic/client.ts +28 -28
- package/src/providers/connection-resolution.ts +7 -0
- package/src/providers/inference/adapter-factory.ts +41 -4
- package/src/providers/inference/connections.ts +74 -29
- package/src/providers/inference/resolve-auth.ts +12 -4
- package/src/providers/model-catalog.ts +294 -12
- package/src/providers/openai/chat-completions-provider.ts +10 -2
- package/src/providers/openrouter/client.ts +7 -0
- package/src/providers/{managed-proxy → platform-proxy}/constants.ts +4 -1
- package/src/providers/{managed-proxy → platform-proxy}/context.ts +3 -3
- package/src/providers/provider-availability.ts +17 -2
- package/src/providers/provider-catalog-visibility.ts +36 -0
- package/src/providers/registry.ts +22 -14
- package/src/providers/retry.ts +47 -1
- package/src/runtime/__tests__/agent-wake.test.ts +152 -0
- package/src/runtime/agent-wake.ts +42 -14
- package/src/runtime/auth/route-policy.ts +8 -1
- package/src/runtime/btw-sidechain.ts +2 -0
- package/src/runtime/http-types.ts +19 -0
- package/src/runtime/migrations/origin-mode.ts +1 -1
- package/src/runtime/pending-interactions.ts +1 -0
- package/src/runtime/routes/__tests__/bookmark-routes.test.ts +17 -0
- package/src/runtime/routes/__tests__/conversation-management-routes.test.ts +5 -1
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +107 -20
- package/src/runtime/routes/__tests__/question-routes.test.ts +395 -0
- package/src/runtime/routes/__tests__/tts-routes.test.ts +64 -1
- package/src/runtime/routes/acp-routes-list.test.ts +143 -0
- package/src/runtime/routes/acp-routes.ts +5 -3
- package/src/runtime/routes/auth-routes.ts +1 -1
- package/src/runtime/routes/bookmark-routes.ts +5 -3
- package/src/runtime/routes/btw-routes.ts +5 -1
- package/src/runtime/routes/channel-availability-routes.ts +121 -0
- package/src/runtime/routes/conversation-cli-routes.ts +44 -3
- package/src/runtime/routes/conversation-list-routes.ts +3 -20
- package/src/runtime/routes/conversation-management-routes.ts +17 -42
- package/src/runtime/routes/conversation-query-routes.ts +40 -35
- package/src/runtime/routes/conversation-routes.ts +90 -11
- package/src/runtime/routes/documents-routes.ts +25 -86
- package/src/runtime/routes/group-routes.ts +5 -0
- package/src/runtime/routes/inbound-conversation.ts +28 -8
- package/src/runtime/routes/inbound-message-handler.ts +236 -41
- package/src/runtime/routes/inbound-stages/background-dispatch.test.ts +111 -0
- package/src/runtime/routes/inbound-stages/background-dispatch.ts +32 -1
- package/src/runtime/routes/inbound-stages/edit-intercept.ts +17 -4
- package/src/runtime/routes/index.ts +6 -0
- package/src/runtime/routes/inference-profile-session-handler.ts +17 -44
- package/src/runtime/routes/inference-profile-session-reaper.ts +7 -21
- package/src/runtime/routes/inference-provider-connection-routes.ts +65 -21
- package/src/runtime/routes/integrations/slack/share.ts +4 -52
- package/src/runtime/routes/integrations/slack/token.ts +43 -0
- package/src/runtime/routes/integrations/twilio.ts +6 -13
- package/src/runtime/routes/notification-routes.ts +1 -1
- package/src/runtime/routes/oauth-commands-routes.ts +105 -15
- package/src/runtime/routes/oauth-lifecycle-routes.ts +43 -0
- package/src/runtime/routes/question-routes.ts +259 -0
- package/src/runtime/routes/rename-conversation-routes.ts +2 -33
- package/src/runtime/routes/schedule-routes.ts +4 -7
- package/src/runtime/routes/subagents-routes.ts +57 -18
- package/src/runtime/routes/telemetry-routes.ts +27 -0
- package/src/runtime/routes/tts-routes.ts +27 -2
- package/src/runtime/routes/workspace-routes.test.ts +43 -0
- package/src/runtime/routes/workspace-routes.ts +28 -0
- package/src/runtime/services/conversation-serializer.ts +39 -7
- package/src/runtime/sync/resource-sync-events.ts +93 -1
- package/src/schedule/schedule-store.ts +27 -2
- package/src/schedule/scheduler.ts +9 -1
- package/src/security/__tests__/untrusted-content.test.ts +86 -0
- package/src/security/untrusted-content.ts +93 -8
- package/src/skills/catalog-files.ts +1 -1
- package/src/skills/catalog-install.ts +233 -116
- package/src/skills/clawhub.ts +70 -13
- package/src/skills/managed-store.ts +4 -119
- package/src/skills/skillssh-registry.ts +27 -48
- package/src/subagent/manager.ts +15 -7
- package/src/telemetry/types.ts +113 -1
- package/src/telemetry/usage-telemetry-reporter.test.ts +312 -5
- package/src/telemetry/usage-telemetry-reporter.ts +113 -7
- package/src/tools/apps/executors.ts +58 -7
- package/src/tools/ask-question/ask-question-tool.test.ts +509 -0
- package/src/tools/ask-question/ask-question-tool.ts +304 -0
- package/src/tools/browser/browser-execution.ts +15 -11
- package/src/tools/computer-use/definitions.ts +3 -3
- package/src/tools/credentials/vault.ts +1 -1
- package/src/tools/document/document-tool.ts +124 -1
- package/src/tools/filesystem/edit.ts +1 -1
- package/src/tools/filesystem/list.ts +1 -1
- package/src/tools/filesystem/read.ts +1 -1
- package/src/tools/filesystem/write.ts +5 -2
- package/src/tools/host-filesystem/transfer.ts +1 -1
- package/src/tools/host-terminal/host-shell.ts +1 -1
- package/src/tools/permission-checker.ts +1 -1
- package/src/tools/registry.ts +17 -7
- package/src/tools/schedule/create.ts +2 -2
- package/src/tools/schema-transforms.ts +7 -2
- package/src/tools/side-effects.ts +1 -0
- package/src/tools/skills/delete-managed.ts +4 -4
- package/src/tools/skills/execute.ts +1 -1
- package/src/tools/skills/scaffold-managed.ts +3 -2
- package/src/tools/subagent/notify-parent.ts +1 -1
- package/src/tools/system/request-permission.ts +2 -2
- package/src/tools/terminal/safe-env.ts +60 -1
- package/src/tools/tool-manifest.ts +2 -0
- package/src/tools/types.ts +72 -21
- package/src/tools/ui-surface/definitions.ts +6 -5
- package/src/tts/__tests__/provider-adapters.test.ts +76 -2
- package/src/tts/providers/elevenlabs-provider.ts +75 -1
- package/src/types/onboarding-context.ts +2 -0
- package/src/util/errors.ts +17 -0
- package/src/util/platform.ts +10 -0
- package/src/watcher/__tests__/engine.test.ts +22 -0
- package/src/watcher/engine.ts +6 -2
- package/src/workspace/migrations/057-repair-stale-gemini-model-ids.ts +80 -15
- package/src/workspace/migrations/072-seed-reply-suggestion-callsite.ts +35 -22
- package/src/workspace/migrations/073-repair-recall-callsite-empty-profile.ts +3 -1
- package/src/workspace/migrations/083-system-prompt-prefix-to-file.ts +191 -0
- package/src/workspace/migrations/084-remove-legacy-skills-index.ts +276 -0
- package/src/workspace/migrations/085-memory-v2-bm25-b-reembed-disabled-v2-pages.ts +137 -0
- package/src/workspace/migrations/086-revert-stale-gemini-mis-rewrites.ts +198 -0
- package/src/workspace/migrations/registry.ts +8 -0
- package/src/workspace/migrations/runner.ts +39 -9
- package/src/workspace/migrations/types.ts +4 -0
- package/examples/plugins/echo/bun.lock +0 -25
- package/src/__tests__/context-window-manager.test.ts +0 -2481
- package/src/context/__tests__/compact-prompt.test.ts +0 -63
- package/src/context/prompts/compact.md +0 -26
- package/src/prompts/__tests__/build-cli-reference-section.test.ts +0 -37
- /package/src/__tests__/{secret-routes-managed-proxy.test.ts → secret-routes-platform-proxy.test.ts} +0 -0
|
@@ -1,247 +1,46 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Context window manager — the surface the rest of the daemon talks to
|
|
3
|
+
* when it needs to know whether and how to compact a conversation.
|
|
4
|
+
*
|
|
5
|
+
* The actual compaction work is delegated to {@link runAssistantDrivenCompaction}
|
|
6
|
+
* in `./compactor.js`, which hands the model the full conversation plus a
|
|
7
|
+
* user-role instruction message and lets the assistant write its own
|
|
8
|
+
* summary and choose its own cut point.
|
|
9
|
+
*
|
|
10
|
+
* This module retains a small set of legacy exports — `CONTEXT_SUMMARY_MARKER`,
|
|
11
|
+
* `createContextSummaryMessage`, `getSummaryFromContextMessage` — because
|
|
12
|
+
* conversation reload, fork inheritance, and Slack chronological-context
|
|
13
|
+
* assembly all detect a previously-produced summary via the marker. The
|
|
14
|
+
* marker is wrapped around the assistant-role memory message we emit on
|
|
15
|
+
* successful compaction so those code paths keep working unchanged.
|
|
16
|
+
*/
|
|
17
|
+
import { getConfig } from "../config/loader.js";
|
|
18
|
+
import type { CompactionConfig } from "../config/schemas/compaction.js";
|
|
4
19
|
import type { LLMCallSite } from "../config/schemas/llm.js";
|
|
5
20
|
import type { ContextWindowConfig } from "../config/types.js";
|
|
6
21
|
import type {
|
|
7
22
|
ContentBlock,
|
|
8
|
-
ImageContent,
|
|
9
23
|
Message,
|
|
10
24
|
Provider,
|
|
25
|
+
ToolDefinition,
|
|
11
26
|
} from "../providers/types.js";
|
|
12
|
-
import { resolveBundledDir } from "../util/bundled-asset.js";
|
|
13
27
|
import { getLogger } from "../util/logger.js";
|
|
14
|
-
import { safeStringSlice } from "../util/unicode.js";
|
|
15
28
|
import {
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
} from "./token-estimator.js";
|
|
20
|
-
import { truncateToolResultsAcrossHistory } from "./tool-result-truncation.js";
|
|
29
|
+
type CompactionRunArgs,
|
|
30
|
+
runAssistantDrivenCompaction,
|
|
31
|
+
} from "./compactor.js";
|
|
32
|
+
import { estimatePromptTokens } from "./token-estimator.js";
|
|
21
33
|
|
|
22
34
|
const log = getLogger("context-window");
|
|
23
35
|
|
|
24
36
|
export const CONTEXT_SUMMARY_MARKER = "<context_summary>";
|
|
25
|
-
const
|
|
26
|
-
const MAX_BLOCK_PREVIEW_CHARS = 3000;
|
|
27
|
-
const MAX_FALLBACK_SUMMARY_CHARS = 12000;
|
|
28
|
-
const COMPACTION_COOLDOWN_MS = 2 * 60 * 1000;
|
|
29
|
-
const MIN_GAIN_TOKENS_DURING_COOLDOWN = 1200;
|
|
30
|
-
const SEVERE_PRESSURE_RATIO = 0.95;
|
|
31
|
-
const COMPACTION_TOOL_RESULT_MAX_CHARS = 6_000;
|
|
32
|
-
const MIN_COMPACTABLE_PERSISTED_MESSAGES = 2;
|
|
37
|
+
const CONTEXT_SUMMARY_CLOSE = "</context_summary>";
|
|
33
38
|
const INTERNAL_CONTEXT_SUMMARY_MESSAGES = new WeakSet<Message>();
|
|
34
39
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
* narration without bloating the summary. When the tail exceeds this
|
|
40
|
-
* size we keep the END (most recent text), since "next step" / "now I'll
|
|
41
|
-
* …" statements typically live at the end of the assistant's last text
|
|
42
|
-
* block and that's the part the post-compaction model needs most.
|
|
43
|
-
*/
|
|
44
|
-
const TAIL_ANCHOR_MAX_CHARS = 1500;
|
|
45
|
-
const TAIL_ANCHOR_OPEN_TAG = "<verbatim_tail>";
|
|
46
|
-
const TAIL_ANCHOR_CLOSE_TAG = "</verbatim_tail>";
|
|
47
|
-
|
|
48
|
-
/**
|
|
49
|
-
* When the existing summary is this fraction or more of the per-summary
|
|
50
|
-
* token budget, inject a "compress older content aggressively" instruction
|
|
51
|
-
* so incremental-update passes don't let the summary grow unboundedly.
|
|
52
|
-
*/
|
|
53
|
-
const SUMMARY_COMPRESSION_PRESSURE_RATIO = 0.6;
|
|
54
|
-
|
|
55
|
-
/**
|
|
56
|
-
* Text-block prefixes that persist in live history (for prefix-caching
|
|
57
|
-
* stability and model grounding) but pollute the summarizer's view of the
|
|
58
|
-
* actual conversation. These blocks are system-metadata attached to user
|
|
59
|
-
* turns — memory injections, turn context, workspace hints, etc. They are
|
|
60
|
-
* stripped ONLY from the messages fed to the summarization LLM call. Live
|
|
61
|
-
* history is never mutated, so prefix caching is preserved.
|
|
62
|
-
*
|
|
63
|
-
* This list intentionally overlaps with `RUNTIME_INJECTION_PREFIXES` in
|
|
64
|
-
* `conversation-runtime-assembly.ts`. That list governs in-flight turn
|
|
65
|
-
* assembly via pure prefix matching; this one governs compaction input.
|
|
66
|
-
* Keep the two lists in sync when a new injection type is added.
|
|
67
|
-
*
|
|
68
|
-
* Compaction strip coverage is two-tier: this prefix list catches
|
|
69
|
-
* internal-vocabulary tags and any tag carrying the `__injected`
|
|
70
|
-
* attribute, while `COMPACTION_ONLY_WRAPPED_STRIP_TAGS` below matches
|
|
71
|
-
* ambiguous bare-tag blocks that are shaped like a runtime-emitted
|
|
72
|
-
* open/close wrap. A new ambiguous tag added upstream needs to be
|
|
73
|
-
* evaluated against both tiers — internal-vocabulary names go here,
|
|
74
|
-
* and names whose bare form collides with ordinary English
|
|
75
|
-
* (`<memory>`, `<workspace>`, `<knowledge_base>`, `<pkb>`,
|
|
76
|
-
* `<system_reminder>`) go in the wrapped-strip list so user prose
|
|
77
|
-
* mentioning the tag is preserved.
|
|
78
|
-
*/
|
|
79
|
-
const COMPACTION_ONLY_STRIP_PREFIXES = [
|
|
80
|
-
"<memory __injected>",
|
|
81
|
-
"<memory_image __injected>",
|
|
82
|
-
"</memory_image>",
|
|
83
|
-
"<memory_context __injected>",
|
|
84
|
-
"<turn_context>",
|
|
85
|
-
"<channel_turn_context>",
|
|
86
|
-
"<guardian_context>",
|
|
87
|
-
"<inbound_actor_context>",
|
|
88
|
-
"<interface_turn_context>",
|
|
89
|
-
"<workspace_top_level>",
|
|
90
|
-
"<now_scratchpad>",
|
|
91
|
-
"<NOW.md Always keep this up to date",
|
|
92
|
-
"<active_thread>",
|
|
93
|
-
"<active_subagents>",
|
|
94
|
-
"<active_workspace>",
|
|
95
|
-
"<active_dynamic_page>",
|
|
96
|
-
"<channel_capabilities>",
|
|
97
|
-
"<channel_command_context>",
|
|
98
|
-
"<voice_call_control>",
|
|
99
|
-
"<transport_hints>",
|
|
100
|
-
"<system_notice>",
|
|
101
|
-
"<non_interactive_context>",
|
|
102
|
-
"<temporal_context>",
|
|
103
|
-
];
|
|
104
|
-
|
|
105
|
-
/**
|
|
106
|
-
* Tags whose bare form (`<tag>`) is common English vocabulary or markup a
|
|
107
|
-
* user might legitimately type in prose. For these we only strip a text
|
|
108
|
-
* block if it is shaped exactly like a runtime injection: starts with
|
|
109
|
-
* `<tag>\n` and ends with `</tag>`. This bare-tag wrapped shape
|
|
110
|
-
* (e.g. `<memory>\n...\n</memory>`) appears in persisted history
|
|
111
|
-
* alongside the `__injected`-attributed variants, which the prefix list
|
|
112
|
-
* above already catches via `<memory __injected>`. A user who mentions
|
|
113
|
-
* `<memory>` in a sentence or inlines `<workspace>...</workspace>` within
|
|
114
|
-
* other prose will not match this shape.
|
|
115
|
-
*/
|
|
116
|
-
const COMPACTION_ONLY_WRAPPED_STRIP_TAGS = [
|
|
117
|
-
"memory",
|
|
118
|
-
"memory_context",
|
|
119
|
-
"workspace",
|
|
120
|
-
"knowledge_base",
|
|
121
|
-
"pkb",
|
|
122
|
-
"system_reminder",
|
|
123
|
-
];
|
|
124
|
-
|
|
125
|
-
function isCompactionInjectedBlock(text: string): boolean {
|
|
126
|
-
if (COMPACTION_ONLY_STRIP_PREFIXES.some((p) => text.startsWith(p))) {
|
|
127
|
-
return true;
|
|
128
|
-
}
|
|
129
|
-
return COMPACTION_ONLY_WRAPPED_STRIP_TAGS.some(
|
|
130
|
-
(tag) => text.startsWith(`<${tag}>\n`) && text.endsWith(`</${tag}>`),
|
|
131
|
-
);
|
|
132
|
-
}
|
|
133
|
-
|
|
134
|
-
/**
|
|
135
|
-
* Remove text blocks that look like runtime injections from user messages.
|
|
136
|
-
* Non-text blocks (images, tool_use, tool_result, etc.) are untouched.
|
|
137
|
-
* Empty messages (every block filtered out) are dropped from the output.
|
|
138
|
-
*
|
|
139
|
-
* Used only on the `compactableMessages` slice right before it is
|
|
140
|
-
* serialized for the summarization LLM — the caller's original message
|
|
141
|
-
* array is never mutated.
|
|
142
|
-
*/
|
|
143
|
-
export function stripCompactionOnlyInjections(messages: Message[]): Message[] {
|
|
144
|
-
return messages
|
|
145
|
-
.map((message) => {
|
|
146
|
-
if (message.role !== "user") return message;
|
|
147
|
-
const nextContent = message.content.filter((block) => {
|
|
148
|
-
if (block.type !== "text") return true;
|
|
149
|
-
return !isCompactionInjectedBlock(block.text);
|
|
150
|
-
});
|
|
151
|
-
if (nextContent.length === message.content.length) return message;
|
|
152
|
-
if (nextContent.length === 0) return null;
|
|
153
|
-
return { ...message, content: nextContent };
|
|
154
|
-
})
|
|
155
|
-
.filter(
|
|
156
|
-
(message): message is NonNullable<typeof message> => message != null,
|
|
157
|
-
);
|
|
158
|
-
}
|
|
159
|
-
|
|
160
|
-
/**
|
|
161
|
-
* Load the compaction summary system prompt from the bundled markdown asset.
|
|
162
|
-
*
|
|
163
|
-
* `resolveBundledDir` handles the compiled-binary case where the caller path
|
|
164
|
-
* points to `/$bunfs/` and the asset lives next to the executable (macOS app
|
|
165
|
-
* bundle `Contents/Resources/` or sibling dir). In source mode it falls back
|
|
166
|
-
* to the sibling `prompts/` directory.
|
|
167
|
-
*/
|
|
168
|
-
export function loadCompactPrompt(): string {
|
|
169
|
-
const callerDir = import.meta.dirname ?? __dirname;
|
|
170
|
-
const promptsDir = resolveBundledDir(callerDir, "prompts", "compact-prompts");
|
|
171
|
-
const promptPath = join(promptsDir, "compact.md");
|
|
172
|
-
const contents = readFileSync(promptPath, "utf-8");
|
|
173
|
-
if (contents.length === 0) {
|
|
174
|
-
throw new Error(
|
|
175
|
-
`compact.md at ${promptPath} is empty — compaction summary prompt missing`,
|
|
176
|
-
);
|
|
177
|
-
}
|
|
178
|
-
return contents;
|
|
179
|
-
}
|
|
180
|
-
|
|
181
|
-
/**
|
|
182
|
-
* Hardcoded fallback prompt used when the bundled `compact.md` asset is
|
|
183
|
-
* missing or unreadable, so the daemon can still compact conversations
|
|
184
|
-
* rather than failing module import at startup.
|
|
185
|
-
*/
|
|
186
|
-
const SUMMARY_PROMPT_FALLBACK = [
|
|
187
|
-
"You are summarizing a long conversation so that the assistant can keep working with it after older messages are dropped. Your summary will REPLACE those messages — the assistant's only access to what was said earlier will be what you write here.",
|
|
188
|
-
"",
|
|
189
|
-
"Be thorough. Capture what happened, why it mattered, what's unresolved, and what was felt. Do not compress away emotional tone, relationship context, or nuance. Keep specific details (names, numbers, file paths, commands, URLs, exact phrasings) when they might matter later.",
|
|
190
|
-
"",
|
|
191
|
-
"Target length: aim for 1500–4000 tokens. Use the upper end when the conversation is rich in decisions, relationships, emotional content, or threads that are still open. Use the lower end for short or simple task execution.",
|
|
192
|
-
"",
|
|
193
|
-
"Open with a 1–2 paragraph narrative describing what the conversation is about and where it currently stands. Then use `## ` section headers. Use these when they apply; skip sections that have nothing to say; add your own headers when something doesn't fit:",
|
|
194
|
-
"- `## What We're Working On`",
|
|
195
|
-
"- `## Decisions & Commitments`",
|
|
196
|
-
"- `## Facts Worth Remembering`",
|
|
197
|
-
"- `## Open Threads`",
|
|
198
|
-
"- `## Emotional Arc / Relationship Notes` (include when relevant)",
|
|
199
|
-
"- `## Artifacts & References`",
|
|
200
|
-
"",
|
|
201
|
-
"If an existing summary is provided, update it: merge new information in, prefer the most recent and explicit detail on conflicts, and preserve anything still unresolved or still true. Do not restart from scratch.",
|
|
202
|
-
"",
|
|
203
|
-
"Never include in the summary: content inside `<memory __injected>`, `<memory>`, `<turn_context>`, `<workspace>`, `<knowledge_base>`, `<system_reminder>`, `<now_scratchpad>`, `<NOW.md …>`, `<active_thread>`, `<channel_capabilities>`, `<transport_hints>`, `<system_notice>`, or any other angle-bracket-tagged system blocks. Tool-call boilerplate (retries, failed attempts the assistant recovered from, routine status updates) — summarize the outcome instead. Repetitive chit-chat that adds nothing.",
|
|
204
|
-
"",
|
|
205
|
-
'Thread anchors (Slack only): if the input includes a "Retained Thread References" section, each listed reply cites its parent via `→ Mxxxxxx`. If that parent appears in the Transcript, preserve its text verbatim. Omit when absent.',
|
|
206
|
-
"",
|
|
207
|
-
"Return only the summary itself in markdown — no preamble, no meta-commentary.",
|
|
208
|
-
].join("\n");
|
|
209
|
-
|
|
210
|
-
/**
|
|
211
|
-
* Load the compact prompt with graceful fallback. If `loader` throws (missing
|
|
212
|
-
* or unreadable bundled asset, partial deployment, filesystem corruption),
|
|
213
|
-
* logs a warning and returns the hardcoded fallback string so module import
|
|
214
|
-
* never fails. The loader is injectable for testability.
|
|
215
|
-
*/
|
|
216
|
-
export function loadCompactPromptOrFallback(
|
|
217
|
-
loader: () => string = loadCompactPrompt,
|
|
218
|
-
): string {
|
|
219
|
-
try {
|
|
220
|
-
return loader();
|
|
221
|
-
} catch (err) {
|
|
222
|
-
log.warn(
|
|
223
|
-
{ err },
|
|
224
|
-
"Failed to load compact.md from bundle; using inline fallback prompt. The bundled asset may be missing or unreadable.",
|
|
225
|
-
);
|
|
226
|
-
return SUMMARY_PROMPT_FALLBACK;
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
const SUMMARY_SYSTEM_PROMPT = loadCompactPromptOrFallback();
|
|
231
|
-
|
|
232
|
-
/**
|
|
233
|
-
* Pattern matching a Slack-style reply tag-line's parent-alias reference.
|
|
234
|
-
* The chronological renderer emits reply lines as
|
|
235
|
-
* `[MM/DD/YY HH:MM @sender → Mxxxxxx]: body`, or, for edited replies,
|
|
236
|
-
* `[MM/DD/YY HH:MM @sender → Mxxxxxx, edited MM/DD/YY HH:MM]: body`. The
|
|
237
|
-
* character after the 6-hex parent alias is therefore `]` for a plain reply
|
|
238
|
-
* or `,` for an edited one — the regex accepts either. `Mxxxxxx` is the
|
|
239
|
-
* first 6 hex chars of sha256(threadTs). A retained-tail text block that
|
|
240
|
-
* contains this pattern is carrying a live reference to a parent that may
|
|
241
|
-
* still live in the compactable region — the summarizer needs to know about
|
|
242
|
-
* it to act on the Thread-anchors clause of SUMMARY_SYSTEM_PROMPT.
|
|
243
|
-
*/
|
|
244
|
-
const THREAD_REPLY_REFERENCE_PATTERN = /→ M[0-9a-f]{6}[,\]]/;
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Public types — preserved for downstream consumers (agent loop, conversation,
|
|
42
|
+
// plugin pipeline, applyCompactionResult, routes/playground/force-compact).
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
245
44
|
|
|
246
45
|
export interface ContextWindowResult {
|
|
247
46
|
messages: Message[];
|
|
@@ -263,12 +62,6 @@ export interface ContextWindowResult {
|
|
|
263
62
|
summaryRawResponses?: unknown[];
|
|
264
63
|
summaryText: string;
|
|
265
64
|
reason?: string;
|
|
266
|
-
/**
|
|
267
|
-
* True when the summary LLM call threw and the local fallback produced the
|
|
268
|
-
* summary. Callers use this to distinguish provider-side summary failures
|
|
269
|
-
* from successful compactions so they can apply circuit-breaker logic
|
|
270
|
-
* without losing the fallback-compacted messages.
|
|
271
|
-
*/
|
|
272
65
|
summaryFailed?: boolean;
|
|
273
66
|
}
|
|
274
67
|
|
|
@@ -279,41 +72,27 @@ export interface ShouldCompactResult {
|
|
|
279
72
|
|
|
280
73
|
export interface ContextWindowCompactOptions {
|
|
281
74
|
lastCompactedAt?: number;
|
|
282
|
-
/**
|
|
75
|
+
/** Skip the auto-threshold check (used for /compact and recovery). */
|
|
283
76
|
force?: boolean;
|
|
284
77
|
/**
|
|
285
|
-
*
|
|
286
|
-
*
|
|
287
|
-
* (except the summary message itself). When omitted, the default floor
|
|
288
|
-
* is `1` (or `8` when `conversationOriginChannel === "slack"`).
|
|
289
|
-
*/
|
|
290
|
-
minKeepRecentUserTurns?: number;
|
|
291
|
-
/**
|
|
292
|
-
* Origin channel hint used when `minKeepRecentUserTurns` is omitted.
|
|
293
|
-
* Slack-originated conversations bump the default keep floor so multi-turn
|
|
294
|
-
* thread context (replies, quoted messages) is not summarized away too
|
|
295
|
-
* aggressively. Explicit `minKeepRecentUserTurns` overrides this hint.
|
|
296
|
-
*/
|
|
297
|
-
conversationOriginChannel?: string;
|
|
298
|
-
/**
|
|
299
|
-
* Per-conversation inference-profile override forwarded to the summary LLM
|
|
300
|
-
* call and usage attribution.
|
|
78
|
+
* Per-conversation inference-profile override forwarded to the compaction
|
|
79
|
+
* LLM call.
|
|
301
80
|
*/
|
|
302
81
|
overrideProfile?: string | null;
|
|
303
82
|
/**
|
|
304
|
-
*
|
|
305
|
-
*
|
|
306
|
-
* — i.e. the override may only demand a *stricter* fit. Passing a looser
|
|
307
|
-
* value has no effect. Intended for forced recovery paths that need a
|
|
308
|
-
* tighter target than the default.
|
|
83
|
+
* Pre-computed token estimate from a prior {@link shouldCompact} call.
|
|
84
|
+
* Avoids a redundant tokenization pass when the caller already has one.
|
|
309
85
|
*/
|
|
310
|
-
|
|
86
|
+
precomputedEstimate?: number;
|
|
311
87
|
/**
|
|
312
|
-
*
|
|
313
|
-
*
|
|
314
|
-
*
|
|
88
|
+
* Legacy fields retained for backwards compatibility with existing
|
|
89
|
+
* callers. The new assistant-driven compactor does not consume them —
|
|
90
|
+
* the model decides where to cut and what to keep — but accepting them
|
|
91
|
+
* here lets callers keep their existing call sites unchanged.
|
|
315
92
|
*/
|
|
316
|
-
|
|
93
|
+
minKeepRecentUserTurns?: number;
|
|
94
|
+
conversationOriginChannel?: string;
|
|
95
|
+
targetInputTokensOverride?: number;
|
|
317
96
|
}
|
|
318
97
|
|
|
319
98
|
export interface ContextWindowManagerOptions {
|
|
@@ -322,36 +101,83 @@ export interface ContextWindowManagerOptions {
|
|
|
322
101
|
config: ContextWindowConfig;
|
|
323
102
|
/** Pre-computed tool token budget to include in all estimations. */
|
|
324
103
|
toolTokenBudget?: number;
|
|
104
|
+
/** Conversation ID — required for image-manifest and timestamp lookups. */
|
|
105
|
+
conversationId?: string;
|
|
106
|
+
/**
|
|
107
|
+
* Optional tools resolver. The compactor passes tools to the provider on
|
|
108
|
+
* the compaction call so the cached prefix (system prompt + tools +
|
|
109
|
+
* conversation messages) matches the agent's main-turn cache key.
|
|
110
|
+
*/
|
|
111
|
+
resolveTools?: () => ToolDefinition[] | undefined;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// ---------------------------------------------------------------------------
|
|
115
|
+
// Summary-message helpers (used by lifecycle rehydrate + fork inheritance)
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Build the synthetic memory message that heads a compacted conversation.
|
|
120
|
+
* Produces an `assistant`-role message wrapped in `<context_summary>` tags
|
|
121
|
+
* so reload and inheritance paths can recognize it via
|
|
122
|
+
* {@link getSummaryFromContextMessage}.
|
|
123
|
+
*/
|
|
124
|
+
export function createContextSummaryMessage(summary: string): Message {
|
|
125
|
+
const message: Message = {
|
|
126
|
+
role: "assistant",
|
|
127
|
+
content: [
|
|
128
|
+
{
|
|
129
|
+
type: "text",
|
|
130
|
+
text: `${CONTEXT_SUMMARY_MARKER}\n${summary}\n${CONTEXT_SUMMARY_CLOSE}`,
|
|
131
|
+
},
|
|
132
|
+
],
|
|
133
|
+
};
|
|
134
|
+
INTERNAL_CONTEXT_SUMMARY_MESSAGES.add(message);
|
|
135
|
+
return message;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function getSummaryFromContextMessage(
|
|
139
|
+
message: Message | undefined,
|
|
140
|
+
): string | null {
|
|
141
|
+
if (!message) return null;
|
|
142
|
+
const text = extractText(message.content).trim();
|
|
143
|
+
if (!text.startsWith(CONTEXT_SUMMARY_MARKER)) return null;
|
|
144
|
+
if (!INTERNAL_CONTEXT_SUMMARY_MESSAGES.has(message)) return null;
|
|
145
|
+
let inner = text.slice(CONTEXT_SUMMARY_MARKER.length);
|
|
146
|
+
const closeIdx = inner.lastIndexOf(CONTEXT_SUMMARY_CLOSE);
|
|
147
|
+
if (closeIdx !== -1) inner = inner.slice(0, closeIdx);
|
|
148
|
+
return inner.trim();
|
|
325
149
|
}
|
|
326
150
|
|
|
151
|
+
function extractText(content: ContentBlock[]): string {
|
|
152
|
+
return content
|
|
153
|
+
.filter(
|
|
154
|
+
(b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
|
|
155
|
+
)
|
|
156
|
+
.map((b) => b.text)
|
|
157
|
+
.join("\n");
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// ---------------------------------------------------------------------------
|
|
161
|
+
// ContextWindowManager
|
|
162
|
+
// ---------------------------------------------------------------------------
|
|
163
|
+
|
|
327
164
|
export class ContextWindowManager {
|
|
328
165
|
private readonly provider: Provider;
|
|
329
166
|
private readonly _systemPrompt: string | (() => string);
|
|
330
167
|
private config: ContextWindowConfig;
|
|
331
168
|
private readonly toolTokenBudget: number;
|
|
169
|
+
private readonly conversationId: string | undefined;
|
|
170
|
+
private readonly resolveTools:
|
|
171
|
+
| (() => ToolDefinition[] | undefined)
|
|
172
|
+
| undefined;
|
|
332
173
|
/**
|
|
333
174
|
* Number of leading messages that are non-persisted (injected inherited
|
|
334
|
-
* context from a parent conversation).
|
|
335
|
-
*
|
|
336
|
-
*
|
|
337
|
-
* after a successful compaction pass.
|
|
175
|
+
* context from a parent conversation). The compactor subtracts this from
|
|
176
|
+
* `compactedMessages` so `compactedPersistedMessages` only reflects DB
|
|
177
|
+
* rows. Decremented after a successful compaction.
|
|
338
178
|
*/
|
|
339
179
|
nonPersistedPrefixCount = 0;
|
|
340
|
-
/**
|
|
341
|
-
* True when the message at index 0 is a context summary that was inherited
|
|
342
|
-
* from a parent fork (i.e. injected as part of the non-persisted prefix),
|
|
343
|
-
* rather than produced by this conversation's own compaction. The parent
|
|
344
|
-
* summary sits at index 0 but is excluded from `compactableMessages` by
|
|
345
|
-
* `summaryOffset`, so its slot in `nonPersistedPrefixCount` must be
|
|
346
|
-
* accounted for separately. Cleared after the first compaction replaces
|
|
347
|
-
* the parent summary with a child-owned one.
|
|
348
|
-
*/
|
|
349
180
|
summaryIsInjected = false;
|
|
350
|
-
/**
|
|
351
|
-
* Cached resolved system prompt. Lazily populated on first access via the
|
|
352
|
-
* `systemPrompt` getter and cleared after each compaction pass so the next
|
|
353
|
-
* pass picks up any prompt changes.
|
|
354
|
-
*/
|
|
355
181
|
private _resolvedSystemPrompt: string | undefined;
|
|
356
182
|
|
|
357
183
|
constructor(options: ContextWindowManagerOptions) {
|
|
@@ -359,27 +185,21 @@ export class ContextWindowManager {
|
|
|
359
185
|
this._systemPrompt = options.systemPrompt;
|
|
360
186
|
this.config = options.config;
|
|
361
187
|
this.toolTokenBudget = options.toolTokenBudget ?? 0;
|
|
188
|
+
this.conversationId = options.conversationId;
|
|
189
|
+
this.resolveTools = options.resolveTools;
|
|
362
190
|
}
|
|
363
191
|
|
|
364
192
|
updateConfig(config: ContextWindowConfig): void {
|
|
365
193
|
this.config = config;
|
|
366
194
|
}
|
|
367
195
|
|
|
368
|
-
/**
|
|
369
|
-
* Provider key for the local token estimator. Wrapper providers (e.g.
|
|
370
|
-
* OpenRouter routing to `anthropic/*`) override `tokenEstimationProvider`
|
|
371
|
-
* so image/PDF sizing uses the same rules as the upstream API instead of
|
|
372
|
-
* the generic `base64/4` fallback.
|
|
373
|
-
*/
|
|
374
196
|
private get estimationProviderName(): string {
|
|
375
197
|
return this.provider.tokenEstimationProvider ?? this.provider.name;
|
|
376
198
|
}
|
|
377
199
|
|
|
378
|
-
/** Lazily resolve and cache the system prompt for the duration of a compaction pass. */
|
|
379
200
|
private get systemPrompt(): string {
|
|
380
|
-
if (this._resolvedSystemPrompt !== undefined)
|
|
201
|
+
if (this._resolvedSystemPrompt !== undefined)
|
|
381
202
|
return this._resolvedSystemPrompt;
|
|
382
|
-
}
|
|
383
203
|
const resolved =
|
|
384
204
|
typeof this._systemPrompt === "function"
|
|
385
205
|
? this._systemPrompt()
|
|
@@ -392,21 +212,26 @@ export class ContextWindowManager {
|
|
|
392
212
|
this._resolvedSystemPrompt = undefined;
|
|
393
213
|
}
|
|
394
214
|
|
|
215
|
+
private resolveCompactionConfig(): CompactionConfig {
|
|
216
|
+
return getConfig().compaction;
|
|
217
|
+
}
|
|
218
|
+
|
|
395
219
|
/**
|
|
396
|
-
* Cheap pre-check
|
|
397
|
-
*
|
|
398
|
-
*
|
|
399
|
-
*
|
|
220
|
+
* Cheap pre-check — estimate the current token count and compare against
|
|
221
|
+
* `compaction.autoThreshold`. Callers pass the estimate back through
|
|
222
|
+
* `precomputedEstimate` on the {@link maybeCompact} call to avoid
|
|
223
|
+
* re-tokenizing the same history twice.
|
|
400
224
|
*/
|
|
401
225
|
shouldCompact(messages: Message[]): ShouldCompactResult {
|
|
402
|
-
|
|
226
|
+
const compaction = this.resolveCompactionConfig();
|
|
227
|
+
if (!compaction.enabled) return { needed: false, estimatedTokens: 0 };
|
|
403
228
|
try {
|
|
404
229
|
const estimated = estimatePromptTokens(messages, this.systemPrompt, {
|
|
405
230
|
providerName: this.estimationProviderName,
|
|
406
231
|
toolTokenBudget: this.toolTokenBudget,
|
|
407
232
|
});
|
|
408
233
|
const threshold = Math.floor(
|
|
409
|
-
this.config.maxInputTokens *
|
|
234
|
+
this.config.maxInputTokens * compaction.autoThreshold,
|
|
410
235
|
);
|
|
411
236
|
return { needed: estimated >= threshold, estimatedTokens: estimated };
|
|
412
237
|
} finally {
|
|
@@ -431,6 +256,7 @@ export class ContextWindowManager {
|
|
|
431
256
|
signal?: AbortSignal,
|
|
432
257
|
options?: ContextWindowCompactOptions,
|
|
433
258
|
): Promise<ContextWindowResult> {
|
|
259
|
+
const compaction = this.resolveCompactionConfig();
|
|
434
260
|
const previousEstimatedInputTokens =
|
|
435
261
|
options?.precomputedEstimate ??
|
|
436
262
|
estimatePromptTokens(messages, this.systemPrompt, {
|
|
@@ -438,1264 +264,115 @@ export class ContextWindowManager {
|
|
|
438
264
|
toolTokenBudget: this.toolTokenBudget,
|
|
439
265
|
});
|
|
440
266
|
const thresholdTokens = Math.floor(
|
|
441
|
-
this.config.maxInputTokens *
|
|
267
|
+
this.config.maxInputTokens * compaction.autoThreshold,
|
|
442
268
|
);
|
|
443
|
-
const existingSummary = getSummaryFromContextMessage(messages[0]);
|
|
444
269
|
|
|
445
|
-
if (!
|
|
446
|
-
return {
|
|
447
|
-
messages,
|
|
448
|
-
compacted: false,
|
|
449
|
-
previousEstimatedInputTokens,
|
|
450
|
-
estimatedInputTokens: previousEstimatedInputTokens,
|
|
270
|
+
if (!compaction.enabled) {
|
|
271
|
+
return noopResult(messages, previousEstimatedInputTokens, {
|
|
451
272
|
maxInputTokens: this.config.maxInputTokens,
|
|
452
273
|
thresholdTokens,
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
summaryCalls: 0,
|
|
456
|
-
summaryInputTokens: 0,
|
|
457
|
-
summaryOutputTokens: 0,
|
|
458
|
-
summaryModel: "",
|
|
459
|
-
summaryText: existingSummary ?? "",
|
|
460
|
-
reason: "context window compaction disabled",
|
|
461
|
-
};
|
|
274
|
+
reason: "compaction disabled",
|
|
275
|
+
});
|
|
462
276
|
}
|
|
463
277
|
|
|
464
|
-
if (
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
278
|
+
if (this.conversationId == null) {
|
|
279
|
+
// The compactor needs the conversation id to look up image
|
|
280
|
+
// attachments and DB timestamps. If we don't have one (legacy test
|
|
281
|
+
// path, ad-hoc instantiation), skip — never fabricate one.
|
|
282
|
+
log.warn(
|
|
283
|
+
"ContextWindowManager has no conversationId — skipping compaction",
|
|
284
|
+
);
|
|
285
|
+
return noopResult(messages, previousEstimatedInputTokens, {
|
|
470
286
|
maxInputTokens: this.config.maxInputTokens,
|
|
471
287
|
thresholdTokens,
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
summaryCalls: 0,
|
|
475
|
-
summaryInputTokens: 0,
|
|
476
|
-
summaryOutputTokens: 0,
|
|
477
|
-
summaryModel: "",
|
|
478
|
-
summaryText: existingSummary ?? "",
|
|
479
|
-
reason: "below compaction threshold",
|
|
480
|
-
};
|
|
288
|
+
reason: "no conversation id",
|
|
289
|
+
});
|
|
481
290
|
}
|
|
482
291
|
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
if (userTurnStarts.length === 0) {
|
|
486
|
-
return {
|
|
487
|
-
messages,
|
|
488
|
-
compacted: false,
|
|
489
|
-
previousEstimatedInputTokens,
|
|
490
|
-
estimatedInputTokens: previousEstimatedInputTokens,
|
|
292
|
+
if (!options?.force && previousEstimatedInputTokens < thresholdTokens) {
|
|
293
|
+
return noopResult(messages, previousEstimatedInputTokens, {
|
|
491
294
|
maxInputTokens: this.config.maxInputTokens,
|
|
492
295
|
thresholdTokens,
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
summaryCalls: 0,
|
|
496
|
-
summaryInputTokens: 0,
|
|
497
|
-
summaryOutputTokens: 0,
|
|
498
|
-
summaryModel: "",
|
|
499
|
-
summaryText: existingSummary ?? "",
|
|
500
|
-
reason: "no user turns available for compaction",
|
|
501
|
-
};
|
|
296
|
+
reason: "below auto threshold",
|
|
297
|
+
});
|
|
502
298
|
}
|
|
503
299
|
|
|
504
|
-
const
|
|
505
|
-
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
300
|
+
const args: CompactionRunArgs = {
|
|
301
|
+
conversationId: this.conversationId,
|
|
302
|
+
messages,
|
|
303
|
+
provider: this.provider,
|
|
304
|
+
systemPrompt: this.systemPrompt,
|
|
305
|
+
tools: this.resolveTools?.(),
|
|
306
|
+
compaction,
|
|
307
|
+
maxInputTokens: this.config.maxInputTokens,
|
|
509
308
|
previousEstimatedInputTokens,
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
// projection-optimism clamp in pickKeepBoundary does not cover:
|
|
516
|
-
// 1. `adjustForToolPairs` walked the boundary back through a
|
|
517
|
-
// tool_use/tool_result chain at the start of the conversation.
|
|
518
|
-
// 2. The binary search settled below `userTurnStarts.length` (so
|
|
519
|
-
// the clamp at the top of pickKeepBoundary did not fire) but
|
|
520
|
-
// `adjustForToolPairs` still walked the resulting boundary
|
|
521
|
-
// backwards past `summaryOffset`.
|
|
522
|
-
// Rescue: restore the binary search's intended keep depth (capped at
|
|
523
|
-
// `length - 1` so we always summarize at least one turn) and bypass
|
|
524
|
-
// `adjustForToolPairs`. The kept region's first message may then
|
|
525
|
-
// contain a `tool_result` whose matching `tool_use` lives in the
|
|
526
|
-
// compacted region; we strip such orphans below before assembling
|
|
527
|
-
// the final messages array so the next agent turn does not fail
|
|
528
|
-
// when sending to the LLM.
|
|
529
|
-
const forceRescueApplied =
|
|
530
|
-
options?.force === true &&
|
|
531
|
-
keepPlanInitial.keepFromIndex <= summaryOffset &&
|
|
532
|
-
userTurnStarts.length >= 2;
|
|
533
|
-
const safeKeepTurns = Math.max(
|
|
534
|
-
1,
|
|
535
|
-
Math.min(keepPlanInitial.keepTurns, userTurnStarts.length - 1),
|
|
536
|
-
);
|
|
537
|
-
const keepPlan = forceRescueApplied
|
|
538
|
-
? {
|
|
539
|
-
keepFromIndex: userTurnStarts[userTurnStarts.length - safeKeepTurns],
|
|
540
|
-
keepTurns: safeKeepTurns,
|
|
541
|
-
}
|
|
542
|
-
: keepPlanInitial;
|
|
543
|
-
if (keepPlan.keepFromIndex <= summaryOffset) {
|
|
544
|
-
// All turns fit after truncation projection, but the real in-memory
|
|
545
|
-
// messages may still contain un-truncated tool results. Apply truncation
|
|
546
|
-
// so the caller gets the token savings even without summarization.
|
|
547
|
-
const { messages: truncatedMessages, truncatedCount } =
|
|
548
|
-
truncateToolResultsAcrossHistory(
|
|
549
|
-
messages,
|
|
550
|
-
COMPACTION_TOOL_RESULT_MAX_CHARS,
|
|
551
|
-
);
|
|
552
|
-
const didTruncate = truncatedCount > 0;
|
|
553
|
-
const estimatedAfterTruncation = didTruncate
|
|
554
|
-
? estimatePromptTokens(truncatedMessages, this.systemPrompt, {
|
|
555
|
-
providerName: this.estimationProviderName,
|
|
556
|
-
toolTokenBudget: this.toolTokenBudget,
|
|
557
|
-
})
|
|
558
|
-
: previousEstimatedInputTokens;
|
|
559
|
-
// Under force with only one user turn, the rescue above could not
|
|
560
|
-
// fire — there is nothing earlier to summarize. Surface that
|
|
561
|
-
// explicitly instead of "conversation already fits..." so the user
|
|
562
|
-
// knows why `/compact` did not produce a summary.
|
|
563
|
-
const noSummarizationReason =
|
|
564
|
-
options?.force && userTurnStarts.length < 2
|
|
565
|
-
? "only one user turn — nothing earlier to compact"
|
|
566
|
-
: "conversation already fits within the compaction target";
|
|
567
|
-
return {
|
|
568
|
-
messages: truncatedMessages,
|
|
569
|
-
compacted: didTruncate,
|
|
570
|
-
previousEstimatedInputTokens,
|
|
571
|
-
estimatedInputTokens: estimatedAfterTruncation,
|
|
572
|
-
maxInputTokens: this.config.maxInputTokens,
|
|
573
|
-
thresholdTokens,
|
|
574
|
-
compactedMessages: 0,
|
|
575
|
-
compactedPersistedMessages: 0,
|
|
576
|
-
summaryCalls: 0,
|
|
577
|
-
summaryInputTokens: 0,
|
|
578
|
-
summaryOutputTokens: 0,
|
|
579
|
-
summaryModel: "",
|
|
580
|
-
summaryText: existingSummary ?? "",
|
|
581
|
-
reason: didTruncate
|
|
582
|
-
? "truncated tool results without summarization"
|
|
583
|
-
: noSummarizationReason,
|
|
584
|
-
};
|
|
585
|
-
}
|
|
586
|
-
|
|
587
|
-
const compactableMessages = messages.slice(
|
|
588
|
-
summaryOffset,
|
|
589
|
-
keepPlan.keepFromIndex,
|
|
590
|
-
);
|
|
591
|
-
if (compactableMessages.length === 0) {
|
|
592
|
-
return {
|
|
593
|
-
messages,
|
|
594
|
-
compacted: false,
|
|
595
|
-
previousEstimatedInputTokens,
|
|
596
|
-
estimatedInputTokens: previousEstimatedInputTokens,
|
|
597
|
-
maxInputTokens: this.config.maxInputTokens,
|
|
598
|
-
thresholdTokens,
|
|
599
|
-
compactedMessages: 0,
|
|
600
|
-
compactedPersistedMessages: 0,
|
|
601
|
-
summaryCalls: 0,
|
|
602
|
-
summaryInputTokens: 0,
|
|
603
|
-
summaryOutputTokens: 0,
|
|
604
|
-
summaryModel: "",
|
|
605
|
-
summaryText: existingSummary ?? "",
|
|
606
|
-
reason: "no eligible messages to compact",
|
|
607
|
-
};
|
|
608
|
-
}
|
|
309
|
+
force: options?.force,
|
|
310
|
+
signal,
|
|
311
|
+
overrideProfile: options?.overrideProfile ?? null,
|
|
312
|
+
nonPersistedPrefixCount: this.nonPersistedPrefixCount,
|
|
313
|
+
};
|
|
609
314
|
|
|
610
|
-
|
|
611
|
-
// contributes 1 to `nonPersistedPrefixCount` but is excluded from
|
|
612
|
-
// `compactableMessages` by `summaryOffset`; subtract it here so the
|
|
613
|
-
// remaining injected count lines up with compactableMessages. A summary
|
|
614
|
-
// produced by this conversation's own prior compaction is not part of
|
|
615
|
-
// `nonPersistedPrefixCount` (already decremented), so no subtraction.
|
|
616
|
-
const injectedSummaryOffset = this.summaryIsInjected ? summaryOffset : 0;
|
|
617
|
-
const injectedInCompactable = Math.min(
|
|
618
|
-
Math.max(0, this.nonPersistedPrefixCount - injectedSummaryOffset),
|
|
619
|
-
compactableMessages.length,
|
|
620
|
-
);
|
|
621
|
-
const compactedPersistedMessages =
|
|
622
|
-
countPersistedMessages(compactableMessages) - injectedInCompactable;
|
|
623
|
-
const rawProjectedMessages = [
|
|
624
|
-
createContextSummaryMessage(existingSummary ?? "Projected summary"),
|
|
625
|
-
...messages.slice(keepPlan.keepFromIndex),
|
|
626
|
-
];
|
|
627
|
-
const { messages: projectedMessages } = truncateToolResultsAcrossHistory(
|
|
628
|
-
rawProjectedMessages,
|
|
629
|
-
COMPACTION_TOOL_RESULT_MAX_CHARS,
|
|
630
|
-
);
|
|
631
|
-
const projectedInputTokens = estimatePromptTokens(
|
|
632
|
-
projectedMessages,
|
|
633
|
-
this.systemPrompt,
|
|
634
|
-
{
|
|
635
|
-
providerName: this.estimationProviderName,
|
|
636
|
-
toolTokenBudget: this.toolTokenBudget,
|
|
637
|
-
},
|
|
638
|
-
);
|
|
639
|
-
const projectedGainTokens = Math.max(
|
|
640
|
-
0,
|
|
641
|
-
previousEstimatedInputTokens - projectedInputTokens,
|
|
642
|
-
);
|
|
643
|
-
const severePressure =
|
|
644
|
-
previousEstimatedInputTokens >=
|
|
645
|
-
Math.floor(this.config.maxInputTokens * SEVERE_PRESSURE_RATIO);
|
|
646
|
-
const lastCompactedAt = options?.lastCompactedAt;
|
|
315
|
+
const result = await runAssistantDrivenCompaction(args);
|
|
647
316
|
|
|
648
|
-
|
|
649
|
-
// sooner. Scale the cooldown inversely with the growth-rate multiplier, capped at
|
|
650
|
-
// 1/4 of the base cooldown so we never check more than 4× as frequently.
|
|
651
|
-
const growthRateMultiplier = Math.max(
|
|
652
|
-
1,
|
|
653
|
-
projectedGainTokens / MIN_GAIN_TOKENS_DURING_COOLDOWN,
|
|
654
|
-
);
|
|
655
|
-
const adaptiveCooldownMs = Math.max(
|
|
656
|
-
COMPACTION_COOLDOWN_MS / 4,
|
|
657
|
-
COMPACTION_COOLDOWN_MS / growthRateMultiplier,
|
|
658
|
-
);
|
|
659
|
-
const withinCooldown =
|
|
660
|
-
typeof lastCompactedAt === "number" &&
|
|
661
|
-
Date.now() - lastCompactedAt < adaptiveCooldownMs;
|
|
317
|
+
if (!result.compacted) return result;
|
|
662
318
|
|
|
663
|
-
//
|
|
664
|
-
//
|
|
665
|
-
//
|
|
666
|
-
//
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
319
|
+
// Recompute the post-compaction token estimate now that the message
|
|
320
|
+
// array has been rebuilt. The compactor returns a conservative
|
|
321
|
+
// placeholder; the agent loop wants the real number for its next
|
|
322
|
+
// budget decision.
|
|
323
|
+
let estimatedInputTokens = result.estimatedInputTokens;
|
|
324
|
+
try {
|
|
325
|
+
estimatedInputTokens = estimatePromptTokens(
|
|
326
|
+
result.messages,
|
|
327
|
+
this.systemPrompt,
|
|
672
328
|
{
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
growthRateMultiplier,
|
|
676
|
-
msSinceCompaction:
|
|
677
|
-
typeof lastCompactedAt === "number"
|
|
678
|
-
? Date.now() - lastCompactedAt
|
|
679
|
-
: null,
|
|
329
|
+
providerName: this.estimationProviderName,
|
|
330
|
+
toolTokenBudget: this.toolTokenBudget,
|
|
680
331
|
},
|
|
681
|
-
"Compaction cooldown active",
|
|
682
332
|
);
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
compacted: false,
|
|
686
|
-
previousEstimatedInputTokens,
|
|
687
|
-
estimatedInputTokens: previousEstimatedInputTokens,
|
|
688
|
-
maxInputTokens: this.config.maxInputTokens,
|
|
689
|
-
thresholdTokens,
|
|
690
|
-
compactedMessages: 0,
|
|
691
|
-
compactedPersistedMessages: 0,
|
|
692
|
-
summaryCalls: 0,
|
|
693
|
-
summaryInputTokens: 0,
|
|
694
|
-
summaryOutputTokens: 0,
|
|
695
|
-
summaryModel: "",
|
|
696
|
-
summaryText: existingSummary ?? "",
|
|
697
|
-
reason: "compaction cooldown active",
|
|
698
|
-
};
|
|
699
|
-
}
|
|
700
|
-
|
|
701
|
-
// `severePressure` already bypasses this guard to keep context from
|
|
702
|
-
// overflowing. Forced compaction also bypasses: when the user
|
|
703
|
-
// explicitly types `/compact` we must summarize whatever is
|
|
704
|
-
// available rather than return "insufficient compactable persisted
|
|
705
|
-
// messages" — that is a no-op response to a direct user command.
|
|
706
|
-
if (
|
|
707
|
-
compactedPersistedMessages < MIN_COMPACTABLE_PERSISTED_MESSAGES &&
|
|
708
|
-
!severePressure &&
|
|
709
|
-
!options?.force
|
|
710
|
-
) {
|
|
711
|
-
return {
|
|
712
|
-
messages,
|
|
713
|
-
compacted: false,
|
|
714
|
-
previousEstimatedInputTokens,
|
|
715
|
-
estimatedInputTokens: previousEstimatedInputTokens,
|
|
716
|
-
maxInputTokens: this.config.maxInputTokens,
|
|
717
|
-
thresholdTokens,
|
|
718
|
-
compactedMessages: 0,
|
|
719
|
-
compactedPersistedMessages: 0,
|
|
720
|
-
summaryCalls: 0,
|
|
721
|
-
summaryInputTokens: 0,
|
|
722
|
-
summaryOutputTokens: 0,
|
|
723
|
-
summaryModel: "",
|
|
724
|
-
summaryText: existingSummary ?? "",
|
|
725
|
-
reason: "insufficient compactable persisted messages",
|
|
726
|
-
};
|
|
727
|
-
}
|
|
728
|
-
|
|
729
|
-
const retainedThreadRefs = collectRetainedThreadReferences(
|
|
730
|
-
messages.slice(keepPlan.keepFromIndex),
|
|
731
|
-
);
|
|
732
|
-
// Strip runtime injections (memory, turn context, workspace hints, etc.)
|
|
733
|
-
// from the messages fed to the summarizer. These blocks are system
|
|
734
|
-
// metadata; leaving them in causes the summary to echo rotating memory
|
|
735
|
-
// content instead of the actual conversation. The caller's live message
|
|
736
|
-
// array is untouched so prefix caching stays intact.
|
|
737
|
-
const transcriptSource = stripCompactionOnlyInjections(compactableMessages);
|
|
738
|
-
const transcriptBlocks = this.capTranscriptBlocksToTokenBudget(
|
|
739
|
-
serializeMessagesToContentBlocks(transcriptSource),
|
|
740
|
-
existingSummary ?? "No previous summary.",
|
|
741
|
-
retainedThreadRefs,
|
|
742
|
-
);
|
|
743
|
-
const summaryUpdate = await this.updateSummary(
|
|
744
|
-
existingSummary ?? "No previous summary.",
|
|
745
|
-
transcriptBlocks,
|
|
746
|
-
retainedThreadRefs,
|
|
747
|
-
signal,
|
|
748
|
-
options?.overrideProfile ?? null,
|
|
749
|
-
);
|
|
750
|
-
const summaryInputTokens = summaryUpdate.inputTokens;
|
|
751
|
-
const summaryOutputTokens = summaryUpdate.outputTokens;
|
|
752
|
-
const summaryModel = summaryUpdate.model;
|
|
753
|
-
const summaryCacheCreationInputTokens =
|
|
754
|
-
summaryUpdate.cacheCreationInputTokens;
|
|
755
|
-
const summaryCacheReadInputTokens = summaryUpdate.cacheReadInputTokens;
|
|
756
|
-
const summaryFailed = summaryUpdate.failed;
|
|
757
|
-
const summaryRawResponses: unknown[] = [];
|
|
758
|
-
if (Array.isArray(summaryUpdate.rawResponse)) {
|
|
759
|
-
summaryRawResponses.push(...summaryUpdate.rawResponse);
|
|
760
|
-
} else if (summaryUpdate.rawResponse !== undefined) {
|
|
761
|
-
summaryRawResponses.push(summaryUpdate.rawResponse);
|
|
333
|
+
} catch (err) {
|
|
334
|
+
log.warn({ err }, "Post-compaction token estimate failed");
|
|
762
335
|
}
|
|
763
|
-
const summaryCalls = 1;
|
|
764
|
-
|
|
765
|
-
// Force-keep the most recent assistant text from the compactable region
|
|
766
|
-
// by splicing it verbatim into the summary message. This is independent
|
|
767
|
-
// of what the LLM summarizer chose to surface — when compaction
|
|
768
|
-
// interrupts a long assistant work span, this anchor preserves the
|
|
769
|
-
// model's last self-narration ("Next step: …", "About to …") so the
|
|
770
|
-
// post-compaction model has unambiguous continuity instead of falling
|
|
771
|
-
// back to a "where am I?" recovery shape.
|
|
772
|
-
const tailAnchorText = extractTailAssistantText(compactableMessages);
|
|
773
|
-
const summary =
|
|
774
|
-
tailAnchorText != null
|
|
775
|
-
? appendTailAnchorToSummary(summaryUpdate.summary, tailAnchorText)
|
|
776
|
-
: summaryUpdate.summary;
|
|
777
|
-
|
|
778
|
-
// Media (images, files) in kept turns is preserved naturally — those
|
|
779
|
-
// turns are carried forward as-is and their token cost is already
|
|
780
|
-
// accounted for by pickKeepBoundary's estimatePromptTokens call.
|
|
781
|
-
// Images in compacted turns are passed to the summarizer so it can
|
|
782
|
-
// describe their visual content in the summary text.
|
|
783
|
-
const summaryMessage = createContextSummaryMessage(summary);
|
|
784
336
|
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
// The force-rescue boundary bypasses `adjustForToolPairs`, so the
|
|
791
|
-
// kept region may contain `tool_result` blocks whose matching
|
|
792
|
-
// `tool_use` is in the (now-compacted) prefix. Strip those orphans
|
|
793
|
-
// so the next agent turn does not fail with an LLM API error.
|
|
794
|
-
const keptMessages = forceRescueApplied
|
|
795
|
-
? stripOrphanToolResults(truncatedKeptMessages)
|
|
796
|
-
: truncatedKeptMessages;
|
|
797
|
-
const compactedMessages = [summaryMessage, ...keptMessages];
|
|
798
|
-
const estimatedInputTokens = estimatePromptTokens(
|
|
799
|
-
compactedMessages,
|
|
800
|
-
this.systemPrompt,
|
|
801
|
-
{
|
|
802
|
-
providerName: this.estimationProviderName,
|
|
803
|
-
toolTokenBudget: this.toolTokenBudget,
|
|
804
|
-
},
|
|
337
|
+
// Consume any non-persisted prefix messages that were compacted away
|
|
338
|
+
// and clear the injected-summary flag.
|
|
339
|
+
const compactedAway = Math.min(
|
|
340
|
+
this.nonPersistedPrefixCount,
|
|
341
|
+
result.compactedMessages,
|
|
805
342
|
);
|
|
806
|
-
// Consume the injected prefix messages that were compacted away. When the
|
|
807
|
-
// parent-injected summary was replaced by a freshly produced child summary,
|
|
808
|
-
// also consume its slot (it was excluded from injectedInCompactable via
|
|
809
|
-
// injectedSummaryOffset) and clear the flag so subsequent compactions treat
|
|
810
|
-
// the summary at index 0 as child-owned.
|
|
811
343
|
this.nonPersistedPrefixCount = Math.max(
|
|
812
344
|
0,
|
|
813
|
-
this.nonPersistedPrefixCount -
|
|
814
|
-
injectedInCompactable -
|
|
815
|
-
injectedSummaryOffset,
|
|
345
|
+
this.nonPersistedPrefixCount - compactedAway,
|
|
816
346
|
);
|
|
817
347
|
this.summaryIsInjected = false;
|
|
818
348
|
|
|
819
|
-
|
|
820
|
-
{
|
|
821
|
-
previousEstimatedInputTokens,
|
|
822
|
-
estimatedInputTokens,
|
|
823
|
-
compactedMessages: compactableMessages.length,
|
|
824
|
-
compactedPersistedMessages,
|
|
825
|
-
keepTurns: keepPlan.keepTurns,
|
|
826
|
-
summaryCalls,
|
|
827
|
-
},
|
|
828
|
-
"Compacted conversation context window",
|
|
829
|
-
);
|
|
830
|
-
|
|
831
|
-
return {
|
|
832
|
-
messages: compactedMessages,
|
|
833
|
-
compacted: true,
|
|
834
|
-
previousEstimatedInputTokens,
|
|
835
|
-
estimatedInputTokens,
|
|
836
|
-
maxInputTokens: this.config.maxInputTokens,
|
|
837
|
-
thresholdTokens,
|
|
838
|
-
compactedMessages: compactableMessages.length,
|
|
839
|
-
compactedPersistedMessages,
|
|
840
|
-
summaryCalls,
|
|
841
|
-
summaryInputTokens,
|
|
842
|
-
summaryOutputTokens,
|
|
843
|
-
summaryModel,
|
|
844
|
-
summaryCallSite: CONVERSATION_SUMMARY_CALL_SITE,
|
|
845
|
-
summaryOverrideProfile: options?.overrideProfile ?? null,
|
|
846
|
-
summaryCacheCreationInputTokens,
|
|
847
|
-
summaryCacheReadInputTokens,
|
|
848
|
-
summaryRawResponses,
|
|
849
|
-
summaryText: summary,
|
|
850
|
-
summaryFailed,
|
|
851
|
-
};
|
|
852
|
-
}
|
|
853
|
-
|
|
854
|
-
private get targetInputTokens(): number {
|
|
855
|
-
return Math.floor(
|
|
856
|
-
this.config.maxInputTokens *
|
|
857
|
-
(this.config.targetBudgetRatio - this.config.summaryBudgetRatio),
|
|
858
|
-
);
|
|
349
|
+
return { ...result, estimatedInputTokens };
|
|
859
350
|
}
|
|
860
|
-
|
|
861
|
-
private pickKeepBoundary(
|
|
862
|
-
messages: Message[],
|
|
863
|
-
userTurnStarts: number[],
|
|
864
|
-
opts?: {
|
|
865
|
-
minKeepRecentUserTurns?: number;
|
|
866
|
-
targetInputTokensOverride?: number;
|
|
867
|
-
conversationOriginChannel?: string;
|
|
868
|
-
force?: boolean;
|
|
869
|
-
previousEstimatedInputTokens?: number;
|
|
870
|
-
},
|
|
871
|
-
): { keepFromIndex: number; keepTurns: number } {
|
|
872
|
-
// Slack-originated conversations rely on multi-turn thread context
|
|
873
|
-
// (reply chains, quoted messages, contextual references). Bump the
|
|
874
|
-
// default keep floor for them so compaction does not summarize away
|
|
875
|
-
// recent turns that the next reply may directly cite. Explicit
|
|
876
|
-
// `minKeepRecentUserTurns` (including emergency `0`) wins.
|
|
877
|
-
const defaultTurns = opts?.conversationOriginChannel === "slack" ? 8 : 1;
|
|
878
|
-
const minFloor = Math.min(
|
|
879
|
-
Math.max(0, Math.floor(opts?.minKeepRecentUserTurns ?? defaultTurns)),
|
|
880
|
-
userTurnStarts.length,
|
|
881
|
-
);
|
|
882
|
-
const targetTokens = Math.min(
|
|
883
|
-
opts?.targetInputTokensOverride ?? this.targetInputTokens,
|
|
884
|
-
this.targetInputTokens,
|
|
885
|
-
);
|
|
886
|
-
|
|
887
|
-
// Binary search for the maximum keepTurns whose projected tokens fit
|
|
888
|
-
// within the budget. Token count is monotonically non-decreasing with
|
|
889
|
-
// keepTurns (more turns = more tokens), so binary search is valid.
|
|
890
|
-
const projectedTokensForKeep = (turns: number): number => {
|
|
891
|
-
const fromIndex =
|
|
892
|
-
turns === 0
|
|
893
|
-
? messages.length
|
|
894
|
-
: (userTurnStarts[userTurnStarts.length - turns] ?? messages.length);
|
|
895
|
-
const rawProjected = [
|
|
896
|
-
createContextSummaryMessage("Projected summary"),
|
|
897
|
-
...messages.slice(fromIndex),
|
|
898
|
-
];
|
|
899
|
-
const { messages: projectedMessages } = truncateToolResultsAcrossHistory(
|
|
900
|
-
rawProjected,
|
|
901
|
-
COMPACTION_TOOL_RESULT_MAX_CHARS,
|
|
902
|
-
);
|
|
903
|
-
return estimatePromptTokens(projectedMessages, this.systemPrompt, {
|
|
904
|
-
providerName: this.estimationProviderName,
|
|
905
|
-
toolTokenBudget: this.toolTokenBudget,
|
|
906
|
-
});
|
|
907
|
-
};
|
|
908
|
-
|
|
909
|
-
let lo = minFloor;
|
|
910
|
-
let hi = userTurnStarts.length;
|
|
911
|
-
|
|
912
|
-
// Fast path: if keeping all turns already fits, skip the search.
|
|
913
|
-
if (hi > lo && projectedTokensForKeep(hi) > targetTokens) {
|
|
914
|
-
// Binary search: find the largest keepTurns where projected tokens fit.
|
|
915
|
-
while (lo < hi) {
|
|
916
|
-
const mid = lo + Math.ceil((hi - lo) / 2);
|
|
917
|
-
if (projectedTokensForKeep(mid) <= targetTokens) {
|
|
918
|
-
lo = mid;
|
|
919
|
-
} else {
|
|
920
|
-
hi = mid - 1;
|
|
921
|
-
}
|
|
922
|
-
}
|
|
923
|
-
} else {
|
|
924
|
-
lo = hi;
|
|
925
|
-
}
|
|
926
|
-
|
|
927
|
-
// Under forced compaction with only the implicit default floor in play,
|
|
928
|
-
// that floor stops being an absolute override when the kept region still
|
|
929
|
-
// exceeds the target. Walk keepTurns below the floor — down to 0 if
|
|
930
|
-
// needed — so /compact can always drive the conversation toward target,
|
|
931
|
-
// even when the floor turn itself is oversized (e.g. a huge paste in the
|
|
932
|
-
// last user message). Exceptions that still treat the floor as hard:
|
|
933
|
-
// - Explicit `minKeepRecentUserTurns` (the caller opted in to that
|
|
934
|
-
// floor; emergency recovery already passes 0 when it wants to go all
|
|
935
|
-
// the way down).
|
|
936
|
-
// - Slack origin (the bumped 8-turn floor protects thread reply chains
|
|
937
|
-
// and quoted-message context that the next reply may directly cite).
|
|
938
|
-
// Automatic mid-loop compaction (force !== true) always honors the floor
|
|
939
|
-
// so the in-flight agent turn isn't summarized away.
|
|
940
|
-
const floorIsImplicitDefault =
|
|
941
|
-
opts?.minKeepRecentUserTurns === undefined &&
|
|
942
|
-
opts?.conversationOriginChannel !== "slack";
|
|
943
|
-
if (
|
|
944
|
-
opts?.force &&
|
|
945
|
-
floorIsImplicitDefault &&
|
|
946
|
-
projectedTokensForKeep(lo) > targetTokens
|
|
947
|
-
) {
|
|
948
|
-
while (lo > 0 && projectedTokensForKeep(lo) > targetTokens) {
|
|
949
|
-
lo--;
|
|
950
|
-
}
|
|
951
|
-
}
|
|
952
|
-
|
|
953
|
-
// The projection's summary-swap and tool_result truncation can make
|
|
954
|
-
// projectedTokensForKeep(hi) optimistically fit even when the live
|
|
955
|
-
// conversation is well over target — sending /compact through the
|
|
956
|
-
// "already fits" skip path as a no-op. Clamp lo so summarization runs.
|
|
957
|
-
if (
|
|
958
|
-
opts?.force &&
|
|
959
|
-
floorIsImplicitDefault &&
|
|
960
|
-
lo === userTurnStarts.length &&
|
|
961
|
-
lo > 0 &&
|
|
962
|
-
(opts?.previousEstimatedInputTokens ?? 0) > targetTokens
|
|
963
|
-
) {
|
|
964
|
-
lo -= 1;
|
|
965
|
-
}
|
|
966
|
-
|
|
967
|
-
const keepTurns = lo;
|
|
968
|
-
const rawKeepFromIndex =
|
|
969
|
-
keepTurns === 0
|
|
970
|
-
? messages.length
|
|
971
|
-
: (userTurnStarts[userTurnStarts.length - keepTurns] ??
|
|
972
|
-
messages.length);
|
|
973
|
-
const keepFromIndex = adjustForToolPairs(messages, rawKeepFromIndex);
|
|
974
|
-
return { keepFromIndex, keepTurns };
|
|
975
|
-
}
|
|
976
|
-
|
|
977
|
-
private get summaryMaxTokens(): number {
|
|
978
|
-
return Math.max(
|
|
979
|
-
1,
|
|
980
|
-
Math.floor(this.config.maxInputTokens * this.config.summaryBudgetRatio),
|
|
981
|
-
);
|
|
982
|
-
}
|
|
983
|
-
|
|
984
|
-
/**
|
|
985
|
-
* Trim the serialized transcript content blocks so that the summary prompt
|
|
986
|
-
* (system prompt + existing summary + transcript + scaffolding) fits within
|
|
987
|
-
* the provider's input token limit, minus the output budget reserved for the
|
|
988
|
-
* summary itself.
|
|
989
|
-
*
|
|
990
|
-
* When the transcript exceeds the budget, blocks are dropped from the
|
|
991
|
-
* beginning (oldest messages first) to preserve recent context. Image blocks
|
|
992
|
-
* are dropped before text blocks within each pass since they are expensive
|
|
993
|
-
* and their surrounding text context already captures the conversation flow.
|
|
994
|
-
*/
|
|
995
|
-
private capTranscriptBlocksToTokenBudget(
|
|
996
|
-
blocks: ContentBlock[],
|
|
997
|
-
currentSummary: string,
|
|
998
|
-
retainedThreadRefs: string[],
|
|
999
|
-
): ContentBlock[] {
|
|
1000
|
-
const retainedRefsText = retainedThreadRefs.join("\n");
|
|
1001
|
-
const overheadTokens =
|
|
1002
|
-
estimateTextTokens(SUMMARY_SYSTEM_PROMPT) +
|
|
1003
|
-
estimateTextTokens(currentSummary) +
|
|
1004
|
-
estimateTextTokens(retainedRefsText) +
|
|
1005
|
-
// Scaffolding text in buildSummaryContentBlocks ("Update the summary...",
|
|
1006
|
-
// section headers, etc.) — generous fixed estimate.
|
|
1007
|
-
200 +
|
|
1008
|
-
this.summaryMaxTokens;
|
|
1009
|
-
|
|
1010
|
-
const maxTranscriptTokens = Math.max(
|
|
1011
|
-
0,
|
|
1012
|
-
this.config.maxInputTokens - overheadTokens,
|
|
1013
|
-
);
|
|
1014
|
-
|
|
1015
|
-
const estimateBlockTokens = (b: ContentBlock): number =>
|
|
1016
|
-
estimateContentBlockTokens(b, {
|
|
1017
|
-
providerName: this.estimationProviderName,
|
|
1018
|
-
});
|
|
1019
|
-
|
|
1020
|
-
let totalTokens = 0;
|
|
1021
|
-
for (const block of blocks) {
|
|
1022
|
-
totalTokens += estimateBlockTokens(block);
|
|
1023
|
-
}
|
|
1024
|
-
const originalTotalTokens = totalTokens;
|
|
1025
|
-
if (totalTokens <= maxTranscriptTokens) return blocks;
|
|
1026
|
-
|
|
1027
|
-
// First pass: drop images from the beginning until we fit or run out of
|
|
1028
|
-
// images to drop. Images are high-cost and their text context (message
|
|
1029
|
-
// headers, surrounding tool_use/tool_result serializations) is preserved.
|
|
1030
|
-
const result = [...blocks];
|
|
1031
|
-
for (
|
|
1032
|
-
let i = 0;
|
|
1033
|
-
i < result.length && totalTokens > maxTranscriptTokens;
|
|
1034
|
-
i++
|
|
1035
|
-
) {
|
|
1036
|
-
if (result[i].type === "image") {
|
|
1037
|
-
totalTokens -= estimateBlockTokens(result[i]);
|
|
1038
|
-
const stub: ContentBlock = {
|
|
1039
|
-
type: "text",
|
|
1040
|
-
text: `[image omitted from summary context]`,
|
|
1041
|
-
};
|
|
1042
|
-
totalTokens += estimateBlockTokens(stub);
|
|
1043
|
-
result[i] = stub;
|
|
1044
|
-
}
|
|
1045
|
-
}
|
|
1046
|
-
if (totalTokens <= maxTranscriptTokens) return result;
|
|
1047
|
-
|
|
1048
|
-
// Second pass: drop text blocks from the beginning (oldest) until we fit.
|
|
1049
|
-
// If a single text block exceeds the remaining budget, truncate it rather
|
|
1050
|
-
// than dropping it entirely so the summarizer always has content to work with.
|
|
1051
|
-
let dropUntil = 0;
|
|
1052
|
-
let droppedTokens = 0;
|
|
1053
|
-
for (
|
|
1054
|
-
let i = 0;
|
|
1055
|
-
i < result.length && totalTokens > maxTranscriptTokens;
|
|
1056
|
-
i++
|
|
1057
|
-
) {
|
|
1058
|
-
const blockTokens = estimateBlockTokens(result[i]);
|
|
1059
|
-
const excess = totalTokens - maxTranscriptTokens;
|
|
1060
|
-
if (blockTokens > excess && result[i].type === "text") {
|
|
1061
|
-
// Truncate this block to shed exactly the excess tokens.
|
|
1062
|
-
// Subtract the cost of the "[...truncated] " prefix so the final
|
|
1063
|
-
// block (prefix + kept text) stays within budget.
|
|
1064
|
-
const truncationPrefix = "[...truncated] ";
|
|
1065
|
-
const prefixTokens = estimateTextTokens(truncationPrefix);
|
|
1066
|
-
const keepTokens = Math.max(1, blockTokens - excess - prefixTokens);
|
|
1067
|
-
const text = (result[i] as { type: "text"; text: string }).text;
|
|
1068
|
-
// Approximate: 1 token ≈ 4 characters for truncation purposes.
|
|
1069
|
-
const keepChars = Math.max(1, Math.floor(keepTokens * 4));
|
|
1070
|
-
const truncatedText = text.slice(-keepChars);
|
|
1071
|
-
const truncatedBlock: ContentBlock = {
|
|
1072
|
-
type: "text",
|
|
1073
|
-
text: `${truncationPrefix}${truncatedText}`,
|
|
1074
|
-
};
|
|
1075
|
-
const newBlockTokens = estimateBlockTokens(truncatedBlock);
|
|
1076
|
-
droppedTokens += blockTokens - newBlockTokens;
|
|
1077
|
-
totalTokens -= blockTokens - newBlockTokens;
|
|
1078
|
-
result[i] = truncatedBlock;
|
|
1079
|
-
dropUntil = i;
|
|
1080
|
-
break;
|
|
1081
|
-
}
|
|
1082
|
-
droppedTokens += blockTokens;
|
|
1083
|
-
totalTokens -= blockTokens;
|
|
1084
|
-
dropUntil = i + 1;
|
|
1085
|
-
}
|
|
1086
|
-
|
|
1087
|
-
log.info(
|
|
1088
|
-
{
|
|
1089
|
-
originalTokens: originalTotalTokens,
|
|
1090
|
-
cappedTokens: maxTranscriptTokens,
|
|
1091
|
-
droppedTokens,
|
|
1092
|
-
},
|
|
1093
|
-
"Capped summary transcript blocks to fit provider input limit",
|
|
1094
|
-
);
|
|
1095
|
-
|
|
1096
|
-
return [
|
|
1097
|
-
{ type: "text", text: "[earlier messages truncated]" } as ContentBlock,
|
|
1098
|
-
...result.slice(dropUntil),
|
|
1099
|
-
];
|
|
1100
|
-
}
|
|
1101
|
-
|
|
1102
|
-
private async updateSummary(
|
|
1103
|
-
currentSummary: string,
|
|
1104
|
-
transcriptBlocks: ContentBlock[],
|
|
1105
|
-
retainedThreadRefs: string[],
|
|
1106
|
-
signal?: AbortSignal,
|
|
1107
|
-
overrideProfile?: string | null,
|
|
1108
|
-
): Promise<{
|
|
1109
|
-
summary: string;
|
|
1110
|
-
inputTokens: number;
|
|
1111
|
-
outputTokens: number;
|
|
1112
|
-
model: string;
|
|
1113
|
-
cacheCreationInputTokens: number;
|
|
1114
|
-
cacheReadInputTokens: number;
|
|
1115
|
-
rawResponse?: unknown;
|
|
1116
|
-
/**
|
|
1117
|
-
* True when the provider.sendMessage call threw and the local fallback
|
|
1118
|
-
* was used. Callers (the agent loop) use this to drive circuit-breaker
|
|
1119
|
-
* state without having to reimplement the fallback themselves.
|
|
1120
|
-
*/
|
|
1121
|
-
failed: boolean;
|
|
1122
|
-
}> {
|
|
1123
|
-
// When the existing summary is already consuming most of its budget,
|
|
1124
|
-
// nudge the model to compress older durable content aggressively so
|
|
1125
|
-
// incremental-update passes don't let the summary grow unboundedly.
|
|
1126
|
-
const existingSummaryTokens = estimateTextTokens(currentSummary);
|
|
1127
|
-
const compressionPressure =
|
|
1128
|
-
existingSummaryTokens >=
|
|
1129
|
-
this.summaryMaxTokens * SUMMARY_COMPRESSION_PRESSURE_RATIO;
|
|
1130
|
-
const contentBlocks = buildSummaryContentBlocks(
|
|
1131
|
-
currentSummary,
|
|
1132
|
-
transcriptBlocks,
|
|
1133
|
-
retainedThreadRefs,
|
|
1134
|
-
{ compressionPressure },
|
|
1135
|
-
);
|
|
1136
|
-
const summaryMessage: Message = { role: "user", content: contentBlocks };
|
|
1137
|
-
let failed = false;
|
|
1138
|
-
try {
|
|
1139
|
-
const providerConfig: Record<string, unknown> = {
|
|
1140
|
-
callSite: CONVERSATION_SUMMARY_CALL_SITE,
|
|
1141
|
-
usageTracking: "manual",
|
|
1142
|
-
max_tokens: this.summaryMaxTokens,
|
|
1143
|
-
};
|
|
1144
|
-
if (overrideProfile) {
|
|
1145
|
-
providerConfig.overrideProfile = overrideProfile;
|
|
1146
|
-
}
|
|
1147
|
-
const response = await this.provider.sendMessage(
|
|
1148
|
-
[summaryMessage],
|
|
1149
|
-
undefined,
|
|
1150
|
-
SUMMARY_SYSTEM_PROMPT,
|
|
1151
|
-
{
|
|
1152
|
-
config: providerConfig,
|
|
1153
|
-
signal,
|
|
1154
|
-
},
|
|
1155
|
-
);
|
|
1156
|
-
|
|
1157
|
-
const nextSummary = extractText(response.content).trim();
|
|
1158
|
-
if (nextSummary.length > 0) {
|
|
1159
|
-
return {
|
|
1160
|
-
summary: this.clampSummary(nextSummary),
|
|
1161
|
-
inputTokens: response.usage.inputTokens,
|
|
1162
|
-
outputTokens: response.usage.outputTokens,
|
|
1163
|
-
model: response.model,
|
|
1164
|
-
cacheCreationInputTokens:
|
|
1165
|
-
response.usage.cacheCreationInputTokens ?? 0,
|
|
1166
|
-
cacheReadInputTokens: response.usage.cacheReadInputTokens ?? 0,
|
|
1167
|
-
rawResponse: response.rawResponse,
|
|
1168
|
-
failed: false,
|
|
1169
|
-
};
|
|
1170
|
-
}
|
|
1171
|
-
} catch (err) {
|
|
1172
|
-
failed = true;
|
|
1173
|
-
log.warn({ err }, "Summary generation failed, using local fallback");
|
|
1174
|
-
}
|
|
1175
|
-
|
|
1176
|
-
// Fallback: extract text-only transcript for local summary generation.
|
|
1177
|
-
const textTranscript = transcriptBlocks
|
|
1178
|
-
.filter(
|
|
1179
|
-
(b): b is Extract<ContentBlock, { type: "text" }> => b.type === "text",
|
|
1180
|
-
)
|
|
1181
|
-
.map((b) => b.text)
|
|
1182
|
-
.join("\n\n");
|
|
1183
|
-
|
|
1184
|
-
return {
|
|
1185
|
-
summary: fallbackSummary(currentSummary, textTranscript),
|
|
1186
|
-
inputTokens: 0,
|
|
1187
|
-
outputTokens: 0,
|
|
1188
|
-
model: "",
|
|
1189
|
-
cacheCreationInputTokens: 0,
|
|
1190
|
-
cacheReadInputTokens: 0,
|
|
1191
|
-
failed,
|
|
1192
|
-
};
|
|
1193
|
-
}
|
|
1194
|
-
|
|
1195
|
-
private clampSummary(summary: string): string {
|
|
1196
|
-
// Budget in tokens → approximate char limit (4 chars ≈ 1 token).
|
|
1197
|
-
const maxChars = this.summaryMaxTokens * 4;
|
|
1198
|
-
if (summary.length <= maxChars) return summary;
|
|
1199
|
-
return clampSummaryAtSectionBoundary(summary, maxChars);
|
|
1200
|
-
}
|
|
1201
|
-
}
|
|
1202
|
-
|
|
1203
|
-
/**
|
|
1204
|
-
* Truncate a markdown summary that exceeds `maxChars`, preferring a
|
|
1205
|
-
* section boundary (`\n## `) so we never cut a heading mid-text. Falls
|
|
1206
|
-
* back to a hard character slice when no boundary exists in the safe
|
|
1207
|
-
* region (first half of the budget).
|
|
1208
|
-
*/
|
|
1209
|
-
export function clampSummaryAtSectionBoundary(
|
|
1210
|
-
summary: string,
|
|
1211
|
-
maxChars: number,
|
|
1212
|
-
): string {
|
|
1213
|
-
if (summary.length <= maxChars) return summary;
|
|
1214
|
-
const ELLIPSIS = "...";
|
|
1215
|
-
// Hard limit we must stay under, leaving room for the ellipsis suffix.
|
|
1216
|
-
const cutoff = maxChars - ELLIPSIS.length;
|
|
1217
|
-
if (cutoff <= 0) return ELLIPSIS;
|
|
1218
|
-
const head = safeStringSlice(summary, 0, cutoff);
|
|
1219
|
-
// Find the last `## ` heading at a line start. Require it to be past the
|
|
1220
|
-
// midpoint of the allowed region so we don't drop most of the summary
|
|
1221
|
-
// just to hit a boundary — better to cut mid-section late than to keep
|
|
1222
|
-
// almost nothing.
|
|
1223
|
-
const halfway = Math.floor(cutoff / 2);
|
|
1224
|
-
const boundary = head.lastIndexOf("\n## ");
|
|
1225
|
-
if (boundary >= halfway) {
|
|
1226
|
-
return `${head.slice(0, boundary).trimEnd()}\n${ELLIPSIS}`;
|
|
1227
|
-
}
|
|
1228
|
-
return `${head}${ELLIPSIS}`;
|
|
1229
|
-
}
|
|
1230
|
-
|
|
1231
|
-
function collectUserTurnStartIndexes(messages: Message[]): number[] {
|
|
1232
|
-
const starts: number[] = [];
|
|
1233
|
-
for (let i = 0; i < messages.length; i++) {
|
|
1234
|
-
const message = messages[i];
|
|
1235
|
-
if (message.role !== "user") continue;
|
|
1236
|
-
if (getSummaryFromContextMessage(message) != null) continue;
|
|
1237
|
-
if (isToolResultOnly(message)) continue;
|
|
1238
|
-
starts.push(i);
|
|
1239
|
-
}
|
|
1240
|
-
return starts;
|
|
1241
|
-
}
|
|
1242
|
-
|
|
1243
|
-
/**
|
|
1244
|
-
* Count messages that have DB counterparts. Context-summary messages are
|
|
1245
|
-
* in-memory-only and excluded; ALL other messages (including tool-result-only
|
|
1246
|
-
* user messages) have a corresponding row in the DB and must be counted so
|
|
1247
|
-
* that `contextCompactedMessageCount` indexes the DB array correctly.
|
|
1248
|
-
*/
|
|
1249
|
-
function countPersistedMessages(messages: Message[]): number {
|
|
1250
|
-
return messages.filter((message) => {
|
|
1251
|
-
return getSummaryFromContextMessage(message) == null;
|
|
1252
|
-
}).length;
|
|
1253
|
-
}
|
|
1254
|
-
|
|
1255
|
-
function isSystemNoticeBlock(block: ContentBlock): boolean {
|
|
1256
|
-
if (block.type !== "text") return false;
|
|
1257
|
-
const text = (block as { text?: string }).text ?? "";
|
|
1258
|
-
return (
|
|
1259
|
-
text.startsWith("<system_notice>") && text.endsWith("</system_notice>")
|
|
1260
|
-
);
|
|
1261
351
|
}
|
|
1262
352
|
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
return (
|
|
1267
|
-
message.content.length > 0 &&
|
|
1268
|
-
message.content.every(
|
|
1269
|
-
(block) =>
|
|
1270
|
-
block.type === "tool_result" ||
|
|
1271
|
-
block.type === "web_search_tool_result" ||
|
|
1272
|
-
isSystemNoticeBlock(block),
|
|
1273
|
-
)
|
|
1274
|
-
);
|
|
1275
|
-
}
|
|
353
|
+
// ---------------------------------------------------------------------------
|
|
354
|
+
// Helpers
|
|
355
|
+
// ---------------------------------------------------------------------------
|
|
1276
356
|
|
|
1277
|
-
|
|
1278
|
-
* Walk the keep boundary backward to ensure tool_use/tool_result pairs are
|
|
1279
|
-
* never split across the compaction boundary. If the first kept message is
|
|
1280
|
-
* a user message containing tool_result blocks whose matching tool_use blocks
|
|
1281
|
-
* live in the preceding (compacted-away) assistant message, include that
|
|
1282
|
-
* assistant message in the kept set.
|
|
1283
|
-
*/
|
|
1284
|
-
function adjustForToolPairs(
|
|
357
|
+
function noopResult(
|
|
1285
358
|
messages: Message[],
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
|
|
1298
|
-
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
if (referencedIds.size === 0) break;
|
|
1305
|
-
|
|
1306
|
-
// Check if the preceding assistant message contains matching tool_uses
|
|
1307
|
-
const prev = messages[idx - 1];
|
|
1308
|
-
if (!prev || prev.role !== "assistant") break;
|
|
1309
|
-
|
|
1310
|
-
const hasOrphanedPair = prev.content.some(
|
|
1311
|
-
(block) =>
|
|
1312
|
-
(block.type === "tool_use" || block.type === "server_tool_use") &&
|
|
1313
|
-
"id" in block &&
|
|
1314
|
-
referencedIds.has((block as { id: string }).id),
|
|
1315
|
-
);
|
|
1316
|
-
if (!hasOrphanedPair) break;
|
|
1317
|
-
|
|
1318
|
-
// Include the assistant message
|
|
1319
|
-
idx--;
|
|
1320
|
-
|
|
1321
|
-
// The assistant message may itself be preceded by a tool_result user
|
|
1322
|
-
// message that pairs with an even earlier assistant — continue the check
|
|
1323
|
-
if (idx > 0 && messages[idx - 1]?.role === "user") {
|
|
1324
|
-
idx--;
|
|
1325
|
-
} else {
|
|
1326
|
-
break;
|
|
1327
|
-
}
|
|
1328
|
-
}
|
|
1329
|
-
return idx;
|
|
1330
|
-
}
|
|
1331
|
-
|
|
1332
|
-
/**
|
|
1333
|
-
* Strip `tool_result` blocks whose matching `tool_use` is not present in
|
|
1334
|
-
* the message array. Used by the force-rescue path in `_maybeCompact`
|
|
1335
|
-
* which bypasses `adjustForToolPairs` to honor user-explicit `/compact`
|
|
1336
|
-
* commands — the kept region's first user message can otherwise contain
|
|
1337
|
-
* an orphan `tool_result`, which the LLM API rejects.
|
|
1338
|
-
*
|
|
1339
|
-
* A user message that contains only orphan `tool_result` blocks is
|
|
1340
|
-
* dropped entirely; partial messages keep the surviving content blocks.
|
|
1341
|
-
*/
|
|
1342
|
-
function stripOrphanToolResults(messages: Message[]): Message[] {
|
|
1343
|
-
const knownToolUseIds = new Set<string>();
|
|
1344
|
-
for (const msg of messages) {
|
|
1345
|
-
if (msg.role !== "assistant") continue;
|
|
1346
|
-
for (const block of msg.content) {
|
|
1347
|
-
if (
|
|
1348
|
-
(block.type === "tool_use" || block.type === "server_tool_use") &&
|
|
1349
|
-
"id" in block
|
|
1350
|
-
) {
|
|
1351
|
-
knownToolUseIds.add((block as { id: string }).id);
|
|
1352
|
-
}
|
|
1353
|
-
}
|
|
1354
|
-
}
|
|
1355
|
-
|
|
1356
|
-
return messages.flatMap((msg) => {
|
|
1357
|
-
if (msg.role !== "user") return [msg];
|
|
1358
|
-
let stripped = false;
|
|
1359
|
-
const filtered = msg.content.filter((block) => {
|
|
1360
|
-
if (
|
|
1361
|
-
(block.type === "tool_result" ||
|
|
1362
|
-
block.type === "web_search_tool_result") &&
|
|
1363
|
-
"tool_use_id" in block
|
|
1364
|
-
) {
|
|
1365
|
-
const id = (block as { tool_use_id: string }).tool_use_id;
|
|
1366
|
-
if (!knownToolUseIds.has(id)) {
|
|
1367
|
-
stripped = true;
|
|
1368
|
-
return false;
|
|
1369
|
-
}
|
|
1370
|
-
}
|
|
1371
|
-
return true;
|
|
1372
|
-
});
|
|
1373
|
-
if (!stripped) return [msg];
|
|
1374
|
-
if (filtered.length === 0) return [];
|
|
1375
|
-
return [{ ...msg, content: filtered }];
|
|
1376
|
-
});
|
|
1377
|
-
}
|
|
1378
|
-
|
|
1379
|
-
export function getSummaryFromContextMessage(
|
|
1380
|
-
message: Message | undefined,
|
|
1381
|
-
): string | null {
|
|
1382
|
-
if (!message) return null;
|
|
1383
|
-
const text = extractText(message.content).trim();
|
|
1384
|
-
if (!text.startsWith(CONTEXT_SUMMARY_MARKER)) return null;
|
|
1385
|
-
if (INTERNAL_CONTEXT_SUMMARY_MESSAGES.has(message)) {
|
|
1386
|
-
return stripContextSummaryTags(text);
|
|
1387
|
-
}
|
|
1388
|
-
return null;
|
|
1389
|
-
}
|
|
1390
|
-
|
|
1391
|
-
function stripContextSummaryTags(text: string): string {
|
|
1392
|
-
let inner = text.slice(CONTEXT_SUMMARY_MARKER.length);
|
|
1393
|
-
const closeIdx = inner.lastIndexOf("</context_summary>");
|
|
1394
|
-
if (closeIdx !== -1) {
|
|
1395
|
-
inner = inner.slice(0, closeIdx);
|
|
1396
|
-
}
|
|
1397
|
-
return inner.trim();
|
|
1398
|
-
}
|
|
1399
|
-
|
|
1400
|
-
export function createContextSummaryMessage(summary: string): Message {
|
|
1401
|
-
const message: Message = {
|
|
1402
|
-
role: "user",
|
|
1403
|
-
content: [
|
|
1404
|
-
{
|
|
1405
|
-
type: "text",
|
|
1406
|
-
text: `${CONTEXT_SUMMARY_MARKER}\n${summary}\n</context_summary>`,
|
|
1407
|
-
},
|
|
1408
|
-
],
|
|
359
|
+
estimated: number,
|
|
360
|
+
opts: { maxInputTokens: number; thresholdTokens: number; reason: string },
|
|
361
|
+
): ContextWindowResult {
|
|
362
|
+
return {
|
|
363
|
+
messages,
|
|
364
|
+
compacted: false,
|
|
365
|
+
previousEstimatedInputTokens: estimated,
|
|
366
|
+
estimatedInputTokens: estimated,
|
|
367
|
+
maxInputTokens: opts.maxInputTokens,
|
|
368
|
+
thresholdTokens: opts.thresholdTokens,
|
|
369
|
+
compactedMessages: 0,
|
|
370
|
+
compactedPersistedMessages: 0,
|
|
371
|
+
summaryCalls: 0,
|
|
372
|
+
summaryInputTokens: 0,
|
|
373
|
+
summaryOutputTokens: 0,
|
|
374
|
+
summaryModel: "",
|
|
375
|
+
summaryText: getSummaryFromContextMessage(messages[0]) ?? "",
|
|
376
|
+
reason: opts.reason,
|
|
1409
377
|
};
|
|
1410
|
-
INTERNAL_CONTEXT_SUMMARY_MESSAGES.add(message);
|
|
1411
|
-
return message;
|
|
1412
|
-
}
|
|
1413
|
-
|
|
1414
|
-
/**
|
|
1415
|
-
* Walk `messages` backward and return the concatenated text content of the
|
|
1416
|
-
* most recent assistant message that contains at least one non-empty text
|
|
1417
|
-
* block. tool_use / tool_result / image / unknown blocks are skipped. The
|
|
1418
|
-
* result is trimmed and (if longer than `maxChars`) clamped from the START
|
|
1419
|
-
* so the END — where "next step" / "now I'll …" narration tends to land —
|
|
1420
|
-
* is preserved.
|
|
1421
|
-
*
|
|
1422
|
-
* Returns `null` when no eligible assistant text is found (e.g. compactable
|
|
1423
|
-
* region was all user/tool messages, or all assistant messages were
|
|
1424
|
-
* tool_use-only). The caller treats `null` as "no anchor to splice".
|
|
1425
|
-
*
|
|
1426
|
-
* Used by `_maybeCompact` to force-keep the last assistant text from the
|
|
1427
|
-
* compactable region into the post-compaction summary message, so the
|
|
1428
|
-
* model's most recent self-narration survives summarization regardless of
|
|
1429
|
-
* whether the LLM summarizer chose to surface it.
|
|
1430
|
-
*/
|
|
1431
|
-
export function extractTailAssistantText(
|
|
1432
|
-
messages: Message[],
|
|
1433
|
-
maxChars: number = TAIL_ANCHOR_MAX_CHARS,
|
|
1434
|
-
): string | null {
|
|
1435
|
-
for (let i = messages.length - 1; i >= 0; i--) {
|
|
1436
|
-
const message = messages[i];
|
|
1437
|
-
if (message?.role !== "assistant") continue;
|
|
1438
|
-
const text = extractText(message.content).trim();
|
|
1439
|
-
if (text.length === 0) continue;
|
|
1440
|
-
if (text.length <= maxChars) return text;
|
|
1441
|
-
// Keep the END — most recent narration wins.
|
|
1442
|
-
const truncated = safeStringSlice(
|
|
1443
|
-
text,
|
|
1444
|
-
text.length - maxChars,
|
|
1445
|
-
text.length,
|
|
1446
|
-
);
|
|
1447
|
-
return `[...truncated] ${truncated}`;
|
|
1448
|
-
}
|
|
1449
|
-
return null;
|
|
1450
|
-
}
|
|
1451
|
-
|
|
1452
|
-
/**
|
|
1453
|
-
* Splice a verbatim tail-anchor block onto the end of the LLM-produced
|
|
1454
|
-
* summary text. The tag-wrapped block is structurally distinct from any
|
|
1455
|
-
* `## ` section the LLM might generate, so it survives section-boundary
|
|
1456
|
-
* clamping in `clampSummaryAtSectionBoundary` (which only runs on the LLM
|
|
1457
|
-
* summary itself, before this splice).
|
|
1458
|
-
*
|
|
1459
|
-
* Idempotent: if the summary already ends with a `<verbatim_tail>…` block
|
|
1460
|
-
* (e.g. from a prior compaction whose summary was carried forward as
|
|
1461
|
-
* `existingSummary`), it is replaced rather than stacked, so successive
|
|
1462
|
-
* compactions don't accumulate stale tails.
|
|
1463
|
-
*/
|
|
1464
|
-
export function appendTailAnchorToSummary(
|
|
1465
|
-
summary: string,
|
|
1466
|
-
tailText: string,
|
|
1467
|
-
): string {
|
|
1468
|
-
const trimmed = summary.trimEnd();
|
|
1469
|
-
const existingOpen = trimmed.lastIndexOf(TAIL_ANCHOR_OPEN_TAG);
|
|
1470
|
-
const base =
|
|
1471
|
-
existingOpen >= 0 ? trimmed.slice(0, existingOpen).trimEnd() : trimmed;
|
|
1472
|
-
return `${base}\n\n${TAIL_ANCHOR_OPEN_TAG}\n${tailText.trim()}\n${TAIL_ANCHOR_CLOSE_TAG}`;
|
|
1473
|
-
}
|
|
1474
|
-
|
|
1475
|
-
/**
|
|
1476
|
-
* Build content blocks for the summary prompt. Returns a mix of text blocks
|
|
1477
|
-
* (for the scaffolding, existing summary, and serialized non-image content)
|
|
1478
|
-
* and image blocks (preserved from the original messages so the summarizer
|
|
1479
|
-
* can describe what was in them).
|
|
1480
|
-
*/
|
|
1481
|
-
function buildSummaryContentBlocks(
|
|
1482
|
-
currentSummary: string,
|
|
1483
|
-
transcriptBlocks: ContentBlock[],
|
|
1484
|
-
retainedThreadRefs: string[],
|
|
1485
|
-
options: { compressionPressure: boolean } = { compressionPressure: false },
|
|
1486
|
-
): ContentBlock[] {
|
|
1487
|
-
const lines = [
|
|
1488
|
-
"Update the summary with new transcript data.",
|
|
1489
|
-
"If new information conflicts with older notes, keep the most recent and explicit detail.",
|
|
1490
|
-
"Keep all unresolved asks and next steps.",
|
|
1491
|
-
"For any images included below, describe their visual content in the summary so the information is preserved after compaction.",
|
|
1492
|
-
];
|
|
1493
|
-
if (options.compressionPressure) {
|
|
1494
|
-
lines.push(
|
|
1495
|
-
"The existing summary is approaching its token budget. Compress older durable content aggressively (drop detail that is no longer load-bearing, merge bullets, tighten prose) while preserving the most recent turns' nuance.",
|
|
1496
|
-
);
|
|
1497
|
-
}
|
|
1498
|
-
lines.push(
|
|
1499
|
-
"",
|
|
1500
|
-
"### Existing Summary",
|
|
1501
|
-
currentSummary.trim().length > 0 ? currentSummary.trim() : "None.",
|
|
1502
|
-
"",
|
|
1503
|
-
);
|
|
1504
|
-
if (retainedThreadRefs.length > 0) {
|
|
1505
|
-
lines.push(
|
|
1506
|
-
"### Retained Thread References",
|
|
1507
|
-
"These reply tag lines remain in the live context after compaction. Each `→ Mxxxxxx` cites a parent message by alias; if that parent appears in the Transcript below, preserve its text verbatim.",
|
|
1508
|
-
...retainedThreadRefs.map((ref) => `- ${ref}`),
|
|
1509
|
-
"",
|
|
1510
|
-
);
|
|
1511
|
-
}
|
|
1512
|
-
lines.push("### Transcript");
|
|
1513
|
-
return [
|
|
1514
|
-
{
|
|
1515
|
-
type: "text",
|
|
1516
|
-
text: lines.join("\n"),
|
|
1517
|
-
} as ContentBlock,
|
|
1518
|
-
...transcriptBlocks,
|
|
1519
|
-
];
|
|
1520
|
-
}
|
|
1521
|
-
|
|
1522
|
-
/**
|
|
1523
|
-
* Scan retained-tail messages for Slack-style reply tag lines that cite a
|
|
1524
|
-
* thread parent via the `→ Mxxxxxx` alias convention. Returns the full tag
|
|
1525
|
-
* line for each match (de-duplicated, order-preserved) so the summarizer
|
|
1526
|
-
* has a concrete list of parents whose text must be preserved verbatim.
|
|
1527
|
-
*
|
|
1528
|
-
* Non-slack conversations and retained tails without any reply markers
|
|
1529
|
-
* produce an empty list — in that case the summarizer is told explicitly
|
|
1530
|
-
* that no verbatim preservation is required.
|
|
1531
|
-
*/
|
|
1532
|
-
function collectRetainedThreadReferences(
|
|
1533
|
-
retainedMessages: Message[],
|
|
1534
|
-
): string[] {
|
|
1535
|
-
const seen = new Set<string>();
|
|
1536
|
-
const out: string[] = [];
|
|
1537
|
-
for (const msg of retainedMessages) {
|
|
1538
|
-
for (const block of msg.content) {
|
|
1539
|
-
if (block.type !== "text") continue;
|
|
1540
|
-
const text = (block as { text: string }).text;
|
|
1541
|
-
for (const line of text.split("\n")) {
|
|
1542
|
-
if (!THREAD_REPLY_REFERENCE_PATTERN.test(line)) continue;
|
|
1543
|
-
const trimmed = line.trim();
|
|
1544
|
-
if (trimmed.length === 0) continue;
|
|
1545
|
-
if (seen.has(trimmed)) continue;
|
|
1546
|
-
seen.add(trimmed);
|
|
1547
|
-
out.push(trimmed);
|
|
1548
|
-
}
|
|
1549
|
-
}
|
|
1550
|
-
}
|
|
1551
|
-
return out;
|
|
1552
|
-
}
|
|
1553
|
-
|
|
1554
|
-
/**
|
|
1555
|
-
* Serialize messages into a sequence of content blocks. Text-based content
|
|
1556
|
-
* (tool calls, tool results, thinking, etc.) is serialized into text blocks.
|
|
1557
|
-
* Image blocks — both top-level and nested inside tool_result contentBlocks —
|
|
1558
|
-
* are preserved as-is so the summarizer LLM can see them.
|
|
1559
|
-
*/
|
|
1560
|
-
function serializeMessagesToContentBlocks(messages: Message[]): ContentBlock[] {
|
|
1561
|
-
const blocks: ContentBlock[] = [];
|
|
1562
|
-
for (let i = 0; i < messages.length; i++) {
|
|
1563
|
-
const msg = messages[i];
|
|
1564
|
-
const textLines: string[] = [`Message #${i + 1} (${msg.role})`];
|
|
1565
|
-
|
|
1566
|
-
for (const block of msg.content) {
|
|
1567
|
-
if (block.type === "image") {
|
|
1568
|
-
// Flush accumulated text lines before the image.
|
|
1569
|
-
if (textLines.length > 0) {
|
|
1570
|
-
blocks.push({ type: "text", text: textLines.join("\n") });
|
|
1571
|
-
textLines.length = 0;
|
|
1572
|
-
}
|
|
1573
|
-
blocks.push(block);
|
|
1574
|
-
} else if (block.type === "tool_result") {
|
|
1575
|
-
// guard:allow-tool-result-only — web_search_tool_result handled by serializeBlock via else branch
|
|
1576
|
-
// Extract images from tool_result contentBlocks before serializing.
|
|
1577
|
-
const collectedImages: ImageContent[] = [];
|
|
1578
|
-
textLines.push(serializeToolResultBlock(block, collectedImages));
|
|
1579
|
-
if (collectedImages.length > 0) {
|
|
1580
|
-
// Flush text, emit collected images, then continue.
|
|
1581
|
-
if (textLines.length > 0) {
|
|
1582
|
-
blocks.push({ type: "text", text: textLines.join("\n") });
|
|
1583
|
-
textLines.length = 0;
|
|
1584
|
-
}
|
|
1585
|
-
blocks.push(...collectedImages);
|
|
1586
|
-
}
|
|
1587
|
-
} else {
|
|
1588
|
-
textLines.push(serializeBlock(block));
|
|
1589
|
-
}
|
|
1590
|
-
}
|
|
1591
|
-
|
|
1592
|
-
// Flush remaining text lines for this message.
|
|
1593
|
-
if (textLines.length > 0) {
|
|
1594
|
-
blocks.push({ type: "text", text: textLines.join("\n") });
|
|
1595
|
-
}
|
|
1596
|
-
}
|
|
1597
|
-
return blocks;
|
|
1598
|
-
}
|
|
1599
|
-
|
|
1600
|
-
/**
|
|
1601
|
-
* Serialize images nested inside tool_result contentBlocks, returning them
|
|
1602
|
-
* as separate content blocks to preserve for the summarizer.
|
|
1603
|
-
*/
|
|
1604
|
-
function serializeToolResultBlock(
|
|
1605
|
-
block: Extract<ContentBlock, { type: "tool_result" }>,
|
|
1606
|
-
collectedImages: ImageContent[],
|
|
1607
|
-
): string {
|
|
1608
|
-
if (block.contentBlocks) {
|
|
1609
|
-
for (const cb of block.contentBlocks) {
|
|
1610
|
-
if (cb.type === "image") {
|
|
1611
|
-
collectedImages.push(cb);
|
|
1612
|
-
}
|
|
1613
|
-
}
|
|
1614
|
-
}
|
|
1615
|
-
return `tool_result ${block.tool_use_id}${
|
|
1616
|
-
block.is_error ? " (error)" : ""
|
|
1617
|
-
}: ${clampText(block.content)}`;
|
|
1618
|
-
}
|
|
1619
|
-
|
|
1620
|
-
function serializeBlock(block: ContentBlock): string {
|
|
1621
|
-
switch (block.type) {
|
|
1622
|
-
case "text":
|
|
1623
|
-
return `text: ${clampText(block.text)}`;
|
|
1624
|
-
case "tool_use":
|
|
1625
|
-
return `tool_use ${block.name}: ${clampText(stableJson(block.input))}`;
|
|
1626
|
-
case "tool_result":
|
|
1627
|
-
return `tool_result ${block.tool_use_id}${
|
|
1628
|
-
block.is_error ? " (error)" : ""
|
|
1629
|
-
}: ${clampText(block.content)}`;
|
|
1630
|
-
case "image":
|
|
1631
|
-
// Top-level images are handled by serializeMessagesToContentBlocks.
|
|
1632
|
-
// This path is only hit for images in unexpected positions.
|
|
1633
|
-
return `image: ${block.source.media_type}, ${
|
|
1634
|
-
Math.ceil(block.source.data.length / 4) * 3
|
|
1635
|
-
} bytes(base64)`;
|
|
1636
|
-
case "file": {
|
|
1637
|
-
const sizeBytes = Math.ceil(block.source.data.length / 4) * 3;
|
|
1638
|
-
const parts = [
|
|
1639
|
-
`file: ${block.source.filename}`,
|
|
1640
|
-
block.source.media_type,
|
|
1641
|
-
`${sizeBytes} bytes(base64)`,
|
|
1642
|
-
];
|
|
1643
|
-
if (block.extracted_text) {
|
|
1644
|
-
parts.push(`text=${clampText(block.extracted_text)}`);
|
|
1645
|
-
}
|
|
1646
|
-
return parts.join(", ");
|
|
1647
|
-
}
|
|
1648
|
-
case "thinking":
|
|
1649
|
-
return `thinking: ${clampText(block.thinking)}`;
|
|
1650
|
-
case "redacted_thinking":
|
|
1651
|
-
return "redacted_thinking";
|
|
1652
|
-
case "server_tool_use":
|
|
1653
|
-
return `server_tool_use ${block.name}: ${clampText(stableJson(block.input))}`;
|
|
1654
|
-
case "web_search_tool_result":
|
|
1655
|
-
return `web_search_tool_result ${block.tool_use_id}`;
|
|
1656
|
-
default:
|
|
1657
|
-
return "unknown_block";
|
|
1658
|
-
}
|
|
1659
|
-
}
|
|
1660
|
-
|
|
1661
|
-
function clampText(text: string): string {
|
|
1662
|
-
if (text.length <= MAX_BLOCK_PREVIEW_CHARS) return text;
|
|
1663
|
-
return `${safeStringSlice(text, 0, MAX_BLOCK_PREVIEW_CHARS)}... [truncated ${
|
|
1664
|
-
text.length - MAX_BLOCK_PREVIEW_CHARS
|
|
1665
|
-
} chars]`;
|
|
1666
|
-
}
|
|
1667
|
-
|
|
1668
|
-
function fallbackSummary(currentSummary: string, chunk: string): string {
|
|
1669
|
-
const lines = chunk
|
|
1670
|
-
.split("\n")
|
|
1671
|
-
.map((line) => line.trim())
|
|
1672
|
-
.filter((line) => line.length > 0);
|
|
1673
|
-
const recentLines = lines.slice(-120).join("\n");
|
|
1674
|
-
const merged = [
|
|
1675
|
-
currentSummary.trim(),
|
|
1676
|
-
"## Recent Progress",
|
|
1677
|
-
recentLines.length > 0 ? recentLines : "No new details.",
|
|
1678
|
-
]
|
|
1679
|
-
.filter((part) => part.length > 0)
|
|
1680
|
-
.join("\n\n");
|
|
1681
|
-
if (merged.length <= MAX_FALLBACK_SUMMARY_CHARS) return merged;
|
|
1682
|
-
return merged.slice(merged.length - MAX_FALLBACK_SUMMARY_CHARS);
|
|
1683
|
-
}
|
|
1684
|
-
|
|
1685
|
-
function extractText(content: ContentBlock[]): string {
|
|
1686
|
-
return content
|
|
1687
|
-
.filter(
|
|
1688
|
-
(block): block is Extract<ContentBlock, { type: "text" }> =>
|
|
1689
|
-
block.type === "text",
|
|
1690
|
-
)
|
|
1691
|
-
.map((block) => block.text)
|
|
1692
|
-
.join("\n");
|
|
1693
|
-
}
|
|
1694
|
-
|
|
1695
|
-
function stableJson(value: unknown): string {
|
|
1696
|
-
try {
|
|
1697
|
-
return JSON.stringify(value);
|
|
1698
|
-
} catch {
|
|
1699
|
-
return "[unserializable]";
|
|
1700
|
-
}
|
|
1701
378
|
}
|