@vellumai/assistant 0.8.7 → 0.8.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Dockerfile +20 -4
- package/docker-entrypoint.sh +4 -2
- package/docker-init-apt-root.sh +3 -1
- package/docker-kata-apt-env.sh +3 -1
- package/docker-kata-runtime-family.sh +12 -0
- package/docs/architecture/memory.md +1 -1
- package/docs/plugins.md +75 -79
- package/examples/plugins/echo/README.md +6 -12
- package/examples/plugins/echo/register.ts +0 -41
- package/node_modules/@vellumai/skill-host-contracts/src/server-message.ts +3 -3
- package/openapi.yaml +3381 -348
- package/package.json +1 -1
- package/scripts/generate-openapi.ts +68 -41
- package/src/__tests__/agent-loop-exit-reason.test.ts +34 -39
- package/src/__tests__/agent-loop-provider-error-recording.test.ts +1 -1
- package/src/__tests__/agent-loop.test.ts +37 -87
- package/src/__tests__/agent-wake-disk-pressure-callsite.test.ts +2 -0
- package/src/__tests__/annotate-activity-metadata.test.ts +262 -0
- package/src/__tests__/annotate-risk-options.test.ts +2 -3
- package/src/__tests__/anthropic-provider.test.ts +95 -2
- package/src/__tests__/assistant-event-hub.test.ts +25 -0
- package/src/__tests__/assistant-events-sse-shed.test.ts +8 -0
- package/src/__tests__/{conversation-stream-state.test.ts → assistant-stream-state.test.ts} +252 -91
- package/src/__tests__/auth-fallback-events-store.test.ts +116 -0
- package/src/__tests__/background-workers-disk-pressure.test.ts +6 -0
- package/src/__tests__/btw-routes.test.ts +62 -3
- package/src/__tests__/build-persisted-content.test.ts +184 -0
- package/src/__tests__/catalog-files.test.ts +1 -1
- package/src/__tests__/clawhub-files.test.ts +1 -1
- package/src/__tests__/compaction-pipeline.test.ts +1 -1
- package/src/__tests__/compaction.benchmark.test.ts +0 -30
- package/src/__tests__/config-watcher.test.ts +1 -1
- package/src/__tests__/conversation-abort-tool-results.test.ts +57 -19
- package/src/__tests__/conversation-agent-loop-disk-pressure.test.ts +6 -2
- package/src/__tests__/conversation-agent-loop-inference-profile.test.ts +10 -4
- package/src/__tests__/conversation-agent-loop-overflow.test.ts +313 -1136
- package/src/__tests__/conversation-agent-loop.test.ts +596 -1616
- package/src/__tests__/conversation-analysis-routes.test.ts +6 -0
- package/src/__tests__/conversation-history-web-search.test.ts +11 -1
- package/src/__tests__/conversation-pairing.test.ts +4 -31
- package/src/__tests__/conversation-process-app-control-preactivation.test.ts +6 -0
- package/src/__tests__/conversation-provider-retry-repair.test.ts +26 -5
- package/src/__tests__/conversation-queue.test.ts +2 -0
- package/src/__tests__/conversation-routes-disk-view.test.ts +3 -0
- package/src/__tests__/conversation-routes-slash-commands.test.ts +6 -5
- package/src/__tests__/conversation-runtime-assembly.test.ts +170 -229
- package/src/__tests__/conversation-runtime-workspace.test.ts +3 -24
- package/src/__tests__/conversation-slash-commands.test.ts +8 -42
- package/src/__tests__/conversation-slash-queue.test.ts +6 -1
- package/src/__tests__/conversation-surfaces-action-delivery.test.ts +84 -0
- package/src/__tests__/conversation-sync-tags.test.ts +27 -15
- package/src/__tests__/conversation-title-service.test.ts +135 -2
- package/src/__tests__/conversation-workspace-injection.test.ts +6 -1
- package/src/__tests__/cross-provider-web-search.test.ts +214 -1
- package/src/__tests__/db-schedule-syntax-migration.test.ts +5 -0
- package/src/__tests__/dm-persistence.test.ts +5 -1
- package/src/__tests__/empty-response-hook.test.ts +304 -0
- package/src/__tests__/feature-flag-test-helpers.ts +2 -2
- package/src/__tests__/gemini-image-service.test.ts +13 -0
- package/src/__tests__/helpers/mock-provider.ts +110 -0
- package/src/__tests__/helpers/native-web-search-harness.ts +129 -0
- package/src/__tests__/history-repair-hook.test.ts +1 -0
- package/src/__tests__/identity-intro-cache.test.ts +12 -100
- package/src/__tests__/identity-routes.test.ts +248 -7
- package/src/__tests__/inbound-slack-persistence.test.ts +5 -1
- package/src/__tests__/injector-background-turn.test.ts +2 -8
- package/src/__tests__/injector-chain.test.ts +106 -270
- package/src/__tests__/injector-disk-pressure.test.ts +3 -12
- package/src/__tests__/injector-document-comments.test.ts +2 -2
- package/src/__tests__/injector-pkb-v2-silenced.test.ts +30 -22
- package/src/__tests__/injector-v3-suppression.test.ts +31 -37
- package/src/__tests__/internal-telemetry-routes.test.ts +109 -0
- package/src/__tests__/list-messages-page-latest.test.ts +60 -0
- package/src/__tests__/list-messages-tool-merge.test.ts +20 -0
- package/src/__tests__/llm-usage-store.test.ts +223 -1
- package/src/__tests__/memory-retrieval-hook.test.ts +297 -0
- package/src/__tests__/memory-v2-static-injector.test.ts +103 -35
- package/src/__tests__/native-web-search.test.ts +191 -0
- package/src/__tests__/onboarding-template-contract.test.ts +2 -0
- package/src/__tests__/openai-image-service.test.ts +17 -0
- package/src/__tests__/openai-provider.test.ts +31 -1
- package/src/__tests__/persist-unsendable-image.test.ts +215 -0
- package/src/__tests__/persistence-secret-redaction.test.ts +1 -0
- package/src/__tests__/pipeline-runner.test.ts +29 -39
- package/src/__tests__/pkb-autoinject.test.ts +2 -5
- package/src/__tests__/plugin-bootstrap.test.ts +13 -28
- package/src/__tests__/plugin-registry.test.ts +0 -27
- package/src/__tests__/plugin-types.test.ts +2 -125
- package/src/__tests__/process-message-display-content.test.ts +6 -2
- package/src/__tests__/regenerate-fire-and-forget-trace.test.ts +5 -1
- package/src/__tests__/resolve-trust-class.test.ts +4 -4
- package/src/__tests__/runtime-events-sse-reconnect.test.ts +60 -23
- package/src/__tests__/schedule-routes.test.ts +603 -2
- package/src/__tests__/schedule-store.test.ts +41 -0
- package/src/__tests__/schedule-tools.test.ts +35 -0
- package/src/__tests__/server-history-render.test.ts +314 -1
- package/src/__tests__/skillssh-files.test.ts +1 -1
- package/src/__tests__/system-prompt.test.ts +20 -0
- package/src/__tests__/task-scheduler.test.ts +162 -1
- package/src/__tests__/terminal-tools.test.ts +6 -1
- package/src/__tests__/title-generate-hook.test.ts +319 -0
- package/src/__tests__/tool-error-hook.test.ts +278 -0
- package/src/__tests__/tool-preview-lifecycle.test.ts +468 -5
- package/src/__tests__/tool-result-metadata-plumbing.test.ts +1 -0
- package/src/__tests__/tool-result-truncate-hook.test.ts +127 -0
- package/src/__tests__/tool-result-truncation.test.ts +0 -2
- package/src/__tests__/ui-choice-copy-surfaces.test.ts +254 -0
- package/src/__tests__/ui-work-result-surface.test.ts +159 -0
- package/src/__tests__/usage-routes.test.ts +285 -1
- package/src/__tests__/user-plugin-loader.test.ts +2 -2
- package/src/__tests__/voice-session-bridge.test.ts +6 -3
- package/src/__tests__/web-search-backend-failure.test.ts +166 -0
- package/src/agent/loop.ts +346 -442
- package/src/api/events/assistant-thinking-delta.ts +33 -0
- package/src/api/events/tool-output-chunk.ts +45 -0
- package/src/api/events/tool-use-preview-start.ts +32 -0
- package/src/api/events/trace-event.ts +69 -0
- package/src/api/index.ts +48 -13
- package/src/api/responses/conversation-message.ts +368 -0
- package/src/avatar/__tests__/avatar-store.test.ts +34 -29
- package/src/cli/commands/__tests__/notifications.test.ts +58 -14
- package/src/cli/commands/notifications.ts +112 -60
- package/src/config/assistant-feature-flags.ts +22 -11
- package/src/config/bundled-skills/app-builder/SKILL.md +3 -20
- package/src/config/bundled-skills/app-builder/references/examples/README.md +17 -0
- package/src/config/bundled-skills/app-builder/references/examples/expense-tracker.md +515 -0
- package/src/config/bundled-skills/app-builder/references/examples/focus-timer.md +342 -0
- package/src/config/bundled-skills/app-builder/references/examples/habit-tracker.md +490 -0
- package/src/config/bundled-skills/document-editor/SKILL.md +1 -1
- package/src/config/bundled-skills/messaging/SKILL.md +0 -7
- package/src/config/feature-flag-cache.ts +3 -3
- package/src/config/feature-flag-registry.json +35 -3
- package/src/config/schemas/__tests__/memory-v2.test.ts +1 -0
- package/src/config/schemas/__tests__/memory-v3.test.ts +25 -0
- package/src/config/schemas/llm.ts +1 -0
- package/src/config/schemas/memory-v2.ts +8 -0
- package/src/config/schemas/memory-v3.ts +8 -0
- package/src/config/schemas/platform.ts +8 -0
- package/src/config/seed-inference-profiles.ts +2 -2
- package/src/config/skills.ts +13 -0
- package/src/context/compactor.ts +1 -1
- package/src/context/strip-injections.ts +122 -0
- package/src/context/token-estimator.ts +23 -0
- package/src/context/tool-result-truncation.ts +0 -23
- package/src/context/window-manager.ts +3 -6
- package/src/credential-execution/executable-discovery.ts +16 -0
- package/src/daemon/__tests__/conversation-lifecycle-auto-analyze.test.ts +6 -0
- package/src/daemon/__tests__/inference-profile-notification.test.ts +153 -0
- package/src/daemon/__tests__/native-web-search-metadata.test.ts +10 -8
- package/src/daemon/assistant-attachments.ts +1 -1
- package/src/daemon/config-watcher.ts +2 -2
- package/src/daemon/context-overflow-reducer.ts +0 -1
- package/src/daemon/conversation-agent-loop-handlers.ts +605 -153
- package/src/daemon/conversation-agent-loop.ts +281 -760
- package/src/daemon/conversation-history.ts +5 -4
- package/src/daemon/conversation-lifecycle.ts +3 -4
- package/src/daemon/conversation-messaging.ts +7 -6
- package/src/daemon/conversation-process.ts +11 -16
- package/src/daemon/conversation-runtime-assembly.ts +130 -347
- package/src/daemon/conversation-slash.ts +6 -25
- package/src/daemon/conversation-surfaces.ts +222 -4
- package/src/daemon/conversation-tool-setup.ts +2 -29
- package/src/daemon/conversation.ts +32 -14
- package/src/daemon/external-plugins-bootstrap.ts +9 -10
- package/src/daemon/handlers/config-a2a.ts +51 -36
- package/src/daemon/handlers/config-slack-channel.ts +20 -14
- package/src/daemon/handlers/config-telegram.ts +16 -2
- package/src/daemon/handlers/shared.ts +156 -84
- package/src/daemon/handlers/skills.ts +39 -10
- package/src/daemon/lifecycle.ts +4 -0
- package/src/daemon/message-types/apps.ts +1 -29
- package/src/daemon/message-types/messages.ts +9 -57
- package/src/daemon/message-types/skills.ts +2 -0
- package/src/daemon/message-types/surfaces.ts +136 -3
- package/src/daemon/now-scratchpad.ts +21 -0
- package/src/daemon/orphan-reaper.test.ts +210 -0
- package/src/daemon/orphan-reaper.ts +240 -0
- package/src/daemon/persist-unsendable-image.ts +117 -0
- package/src/daemon/process-message.ts +1 -3
- package/src/daemon/trace-emitter.ts +6 -4
- package/src/daemon/trust-context.ts +19 -0
- package/src/daemon/wake-target-adapter.ts +3 -1
- package/src/home/home-greeting-cache.ts +24 -1
- package/src/ipc/gateway-client.test.ts +2 -2
- package/src/ipc/gateway-client.ts +3 -3
- package/src/media/gemini-image-service.ts +15 -0
- package/src/media/openai-image-service.ts +14 -0
- package/src/media/types.ts +34 -0
- package/src/memory/__tests__/jobs-worker-v2-schedule.test.ts +56 -0
- package/src/memory/auth-fallback-events-store.ts +94 -0
- package/src/memory/conversation-title-service.ts +65 -41
- package/src/memory/db-init.ts +4 -0
- package/src/memory/graph/__tests__/conversation-graph-memory-registry.test.ts +119 -0
- package/src/memory/graph/conversation-graph-memory.ts +65 -0
- package/src/memory/jobs-store.ts +33 -0
- package/src/memory/jobs-worker.ts +31 -4
- package/src/memory/llm-usage-store.ts +224 -50
- package/src/memory/migrations/222-strip-placeholder-sentinels-from-messages.ts +6 -5
- package/src/memory/migrations/270-schedule-source-conversation.ts +13 -0
- package/src/memory/migrations/271-create-auth-fallback-events.ts +21 -0
- package/src/memory/migrations/index.ts +2 -0
- package/src/memory/pkb/autoinject.ts +61 -0
- package/src/memory/pkb/context.ts +50 -0
- package/src/memory/pkb/types.ts +14 -0
- package/src/memory/schedule-attribution-sql.ts +104 -0
- package/src/memory/schema/infrastructure.ts +16 -0
- package/src/memory/usage-grouped-buckets.ts +6 -1
- package/src/memory/v2/__tests__/consolidation-job.test.ts +1 -1
- package/src/memory/v2/consolidation-job.ts +1 -1
- package/src/memory/v3/__tests__/health.test.ts +16 -0
- package/src/memory/v3/__tests__/orchestrate.test.ts +45 -9
- package/src/memory/v3/__tests__/provider-blocks.test.ts +13 -0
- package/src/memory/v3/__tests__/router.test.ts +101 -29
- package/src/memory/v3/__tests__/selector.test.ts +93 -27
- package/src/memory/v3/__tests__/shadow-plugin.test.ts +23 -5
- package/src/memory/v3/health.ts +0 -0
- package/src/memory/v3/llm-retry.ts +32 -0
- package/src/memory/v3/orchestrate.ts +26 -14
- package/src/memory/v3/provider-blocks.ts +15 -5
- package/src/memory/v3/router.ts +48 -42
- package/src/memory/v3/selector.ts +57 -42
- package/src/memory/v3/shadow-plugin.ts +47 -15
- package/src/memory/v3/types.ts +8 -0
- package/src/notifications/conversation-pairing.ts +8 -15
- package/src/notifications/decision-engine.ts +6 -3
- package/src/notifications/home-feed-side-effect.ts +12 -1
- package/src/permissions/prompter.ts +4 -0
- package/src/plugin-api/constants.ts +4 -0
- package/src/plugin-api/index.ts +8 -1
- package/src/plugin-api/types.ts +151 -1
- package/src/plugins/defaults/empty-response/hooks/stop.ts +126 -0
- package/src/plugins/defaults/empty-response/register.ts +8 -13
- package/src/plugins/defaults/index.ts +1 -15
- package/src/plugins/defaults/injectors/register.ts +243 -74
- package/src/plugins/defaults/memory-retrieval/hooks/post-compact.ts +91 -0
- package/src/plugins/defaults/memory-retrieval/hooks/user-prompt-submit-temp.ts +216 -0
- package/src/plugins/defaults/memory-retrieval/injector-chain.ts +35 -0
- package/src/plugins/defaults/title-generate/hooks/stop.ts +75 -0
- package/src/plugins/defaults/title-generate/hooks/user-prompt-submit.ts +35 -0
- package/src/plugins/defaults/title-generate/package.json +1 -1
- package/src/plugins/defaults/title-generate/register.ts +18 -18
- package/src/plugins/defaults/tool-error/hooks/post-tool-use.ts +118 -0
- package/src/plugins/defaults/tool-error/package.json +1 -1
- package/src/plugins/defaults/tool-error/register.ts +9 -21
- package/src/plugins/defaults/tool-result-truncate/hooks/post-tool-use.ts +32 -0
- package/src/plugins/defaults/tool-result-truncate/register.ts +10 -21
- package/src/plugins/defaults/tool-result-truncate/terminal.ts +37 -18
- package/src/plugins/pipeline.ts +6 -18
- package/src/plugins/registry.ts +8 -25
- package/src/plugins/types.ts +43 -474
- package/src/proactive-artifact/aux-message-injector.ts +3 -3
- package/src/proactive-artifact/job.test.ts +7 -12
- package/src/prompts/__tests__/system-prompt.test.ts +36 -0
- package/src/prompts/templates/BOOTSTRAP-ACTIVATION-RAIL.md +62 -0
- package/src/prompts/templates/BOOTSTRAP.md +2 -2
- package/src/prompts/templates/system-sections.ts +15 -0
- package/src/providers/anthropic/client.ts +37 -29
- package/src/providers/openai/__tests__/chat-completions-provider-reasoning.test.ts +112 -0
- package/src/providers/openai/chat-completions-provider.ts +44 -0
- package/src/providers/openrouter/client.ts +1 -0
- package/src/providers/placeholder-sentinels.ts +35 -0
- package/src/runtime/__tests__/agent-wake.test.ts +5 -1
- package/src/runtime/agent-wake.ts +2 -2
- package/src/runtime/assistant-event-hub.ts +36 -6
- package/src/runtime/{conversation-stream-state.ts → assistant-stream-state.ts} +132 -58
- package/src/runtime/http-router.ts +16 -21
- package/src/runtime/http-types.ts +16 -70
- package/src/runtime/pending-interactions.ts +1 -0
- package/src/runtime/routes/__tests__/consolidation-routes.test.ts +265 -2
- package/src/runtime/routes/__tests__/conversation-query-routes.test.ts +31 -1
- package/src/runtime/routes/__tests__/memory-v2-routes.test.ts +6 -2
- package/src/runtime/routes/__tests__/tts-routes.test.ts +6 -2
- package/src/runtime/routes/app-management-routes.ts +6 -117
- package/src/runtime/routes/app-routes.ts +13 -15
- package/src/runtime/routes/attachment-routes.ts +26 -15
- package/src/runtime/routes/avatar-routes.ts +26 -0
- package/src/runtime/routes/btw-routes.ts +29 -23
- package/src/runtime/routes/consolidation-routes.ts +120 -20
- package/src/runtime/routes/conversation-query-routes.ts +2 -0
- package/src/runtime/routes/conversation-routes.ts +358 -184
- package/src/runtime/routes/documents-routes.ts +4 -0
- package/src/runtime/routes/domain-routes.ts +51 -37
- package/src/runtime/routes/epoch-millis-range.ts +34 -0
- package/src/runtime/routes/events-routes.ts +28 -34
- package/src/runtime/routes/gateway-log-routes.ts +26 -4
- package/src/runtime/routes/heartbeat-routes.ts +32 -12
- package/src/runtime/routes/identity-intro-cache.ts +11 -34
- package/src/runtime/routes/identity-routes.ts +208 -17
- package/src/runtime/routes/image-generation-routes.ts +40 -2
- package/src/runtime/routes/index.ts +2 -0
- package/src/runtime/routes/integrations/a2a.ts +12 -10
- package/src/runtime/routes/integrations/slack/__tests__/channel.test.ts +16 -0
- package/src/runtime/routes/integrations/slack/channel.ts +4 -0
- package/src/runtime/routes/integrations/slack/share.ts +27 -6
- package/src/runtime/routes/integrations/telegram.ts +6 -0
- package/src/runtime/routes/integrations/twilio.ts +42 -0
- package/src/runtime/routes/internal-telemetry-routes.ts +88 -0
- package/src/runtime/routes/log-export-routes.ts +8 -0
- package/src/runtime/routes/memory-v2-routes.ts +15 -8
- package/src/runtime/routes/memory-v3-routes.ts +50 -28
- package/src/runtime/routes/oauth-apps.ts +66 -12
- package/src/runtime/routes/oauth-providers.ts +44 -5
- package/src/runtime/routes/platform-routes.ts +81 -5
- package/src/runtime/routes/playground/__tests__/force-compact.test.ts +6 -4
- package/src/runtime/routes/playground/force-compact.ts +1 -1
- package/src/runtime/routes/rename-conversation-routes.ts +5 -0
- package/src/runtime/routes/schedule-routes.ts +152 -42
- package/src/runtime/routes/secret-routes.ts +14 -2
- package/src/runtime/routes/skills-routes.ts +43 -14
- package/src/runtime/routes/tool-call-confirmation-enrichment.test.ts +161 -0
- package/src/runtime/routes/tool-call-confirmation-enrichment.ts +107 -0
- package/src/runtime/routes/trust-rules-routes.ts +26 -2
- package/src/runtime/routes/tts-routes.ts +35 -0
- package/src/runtime/routes/types.ts +66 -8
- package/src/runtime/routes/usage-routes.ts +47 -39
- package/src/runtime/routes/webhook-routes.ts +41 -2
- package/src/runtime/routes/workspace-routes.ts +4 -0
- package/src/runtime/services/__tests__/analyze-conversation.test.ts +6 -0
- package/src/runtime/services/analyze-conversation.ts +2 -2
- package/src/schedule/schedule-store.ts +20 -1
- package/src/schedule/schedule-usage-store.ts +83 -0
- package/src/schedule/scheduler.ts +12 -5
- package/src/skills/catalog-files.ts +2 -2
- package/src/skills/catalog-install.ts +3 -0
- package/src/skills/categories-cache.ts +118 -0
- package/src/skills/clawhub-files.ts +1 -2
- package/src/skills/skillssh-files.ts +1 -2
- package/src/telemetry/types.ts +29 -1
- package/src/telemetry/usage-telemetry-reporter.test.ts +112 -3
- package/src/telemetry/usage-telemetry-reporter.ts +57 -2
- package/src/tools/executor.ts +1 -53
- package/src/tools/network/__tests__/web-search-metadata.test.ts +7 -1
- package/src/tools/network/__tests__/web-search.test.ts +11 -3
- package/src/tools/network/web-search-error.test.ts +248 -0
- package/src/tools/network/web-search-error.ts +267 -0
- package/src/tools/network/web-search.ts +207 -48
- package/src/tools/schedule/create.ts +2 -0
- package/src/tools/terminal/safe-env.ts +10 -1
- package/src/tools/ui-surface/definitions.ts +9 -1
- package/src/tts/__tests__/provider-catalog-consistency.test.ts +85 -1
- package/src/tts/provider-catalog.ts +76 -1
- package/src/util/mutex.ts +47 -0
- package/src/workspace/git-service.ts +1 -42
- package/src/workspace/migrations/095-bump-heartbeat-interval-30m-to-60m.ts +51 -0
- package/src/workspace/migrations/096-reduce-quality-profile-effort.ts +72 -0
- package/src/workspace/migrations/097-enable-adaptive-thinking-managed-profiles.ts +93 -0
- package/src/workspace/migrations/registry.ts +6 -0
- package/src/__tests__/bootstrap-turn-cleanup.test.ts +0 -44
- package/src/__tests__/empty-response-pipeline.test.ts +0 -423
- package/src/__tests__/llm-call-pipeline.test.ts +0 -287
- package/src/__tests__/memory-retrieval-pipeline.test.ts +0 -418
- package/src/__tests__/persistence-pipeline.test.ts +0 -503
- package/src/__tests__/title-generate-pipeline.test.ts +0 -211
- package/src/__tests__/token-estimate-pipeline.test.ts +0 -479
- package/src/__tests__/tool-error-pipeline.test.ts +0 -241
- package/src/__tests__/tool-execute-pipeline.test.ts +0 -417
- package/src/__tests__/tool-result-truncate-pipeline.test.ts +0 -341
- package/src/daemon/bootstrap-turn-cleanup.ts +0 -45
- package/src/gallery/default-gallery.ts +0 -1359
- package/src/gallery/gallery-manifest.ts +0 -28
- package/src/home/feature-gate.ts +0 -22
- package/src/plugins/defaults/empty-response/middlewares/emptyResponse.ts +0 -22
- package/src/plugins/defaults/empty-response/terminal.ts +0 -106
- package/src/plugins/defaults/injectors/package.json +0 -15
- package/src/plugins/defaults/llm-call/middlewares/llmCall.ts +0 -17
- package/src/plugins/defaults/llm-call/package.json +0 -15
- package/src/plugins/defaults/llm-call/register.ts +0 -45
- package/src/plugins/defaults/memory-retrieval/middlewares/memoryRetrieval.ts +0 -17
- package/src/plugins/defaults/memory-retrieval/package.json +0 -15
- package/src/plugins/defaults/memory-retrieval/register.ts +0 -181
- package/src/plugins/defaults/persistence/middlewares/persistence.ts +0 -19
- package/src/plugins/defaults/persistence/package.json +0 -15
- package/src/plugins/defaults/persistence/register.ts +0 -38
- package/src/plugins/defaults/persistence/terminal.ts +0 -83
- package/src/plugins/defaults/title-generate/terminal.ts +0 -31
- package/src/plugins/defaults/token-estimate/middlewares/tokenEstimate.ts +0 -23
- package/src/plugins/defaults/token-estimate/package.json +0 -15
- package/src/plugins/defaults/token-estimate/register.ts +0 -34
- package/src/plugins/defaults/token-estimate/terminal.ts +0 -40
- package/src/plugins/defaults/tool-error/middlewares/toolError.ts +0 -21
- package/src/plugins/defaults/tool-error/terminal.ts +0 -47
- package/src/plugins/defaults/tool-execute/middlewares/toolExecute.ts +0 -23
- package/src/plugins/defaults/tool-execute/package.json +0 -15
- package/src/plugins/defaults/tool-execute/register.ts +0 -49
- package/src/plugins/defaults/tool-result-truncate/middlewares/toolResultTruncate.ts +0 -23
- package/src/plugins/defaults/tool-result-truncate/types.ts +0 -22
- package/src/skills/category-inference.ts +0 -111
|
@@ -4,9 +4,12 @@
|
|
|
4
4
|
* Coverage matrix:
|
|
5
5
|
* - Returned IDs map to the right member slugs by 1-based index, with
|
|
6
6
|
* `pinned` driven by `pinned_ids`.
|
|
7
|
-
* - Omitted `ids` → ALL members of the leaf (recall-safe
|
|
7
|
+
* - Omitted `ids` → ALL members of the leaf (recall-safe; bounded to one
|
|
8
|
+
* leaf, so this stays a select-all unlike the L1 router).
|
|
8
9
|
* - Explicit `ids: []` → no pages (deliberate abstention).
|
|
9
|
-
* - No provider / missing tool_use / schema mismatch / throw →
|
|
10
|
+
* - No provider / missing tool_use / schema mismatch / throw → no pages
|
|
11
|
+
* (degrade to the deterministic lanes), the last three after a re-prompt
|
|
12
|
+
* retry; a malformed response that recovers on retry returns its pages.
|
|
10
13
|
* - The per-leaf `<pages>` prefix is byte-identical across two calls with
|
|
11
14
|
* different turns (the cache invariant).
|
|
12
15
|
* - `selectAcrossLeaves` flattens per-leaf results and never exceeds the
|
|
@@ -83,6 +86,45 @@ function toolUseResponse(input: Record<string, unknown>): ProviderResponse {
|
|
|
83
86
|
};
|
|
84
87
|
}
|
|
85
88
|
|
|
89
|
+
/** A 200 response that carries no tool_use — the malformed-but-successful case
|
|
90
|
+
* the re-prompt retry exists to recover from. */
|
|
91
|
+
function noToolResponse(): ProviderResponse {
|
|
92
|
+
return {
|
|
93
|
+
model: "stub-model",
|
|
94
|
+
stopReason: "end_turn",
|
|
95
|
+
usage: { inputTokens: 0, outputTokens: 0 },
|
|
96
|
+
content: [{ type: "text", text: "no tool call" }],
|
|
97
|
+
};
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
/** Provider returning a different response per call (the i-th call returns
|
|
101
|
+
* responses[i], or the last entry once exhausted), recording each call so a
|
|
102
|
+
* test can assert how many attempts were made. */
|
|
103
|
+
function makeSequenceProvider(responses: ProviderResponse[]): Provider {
|
|
104
|
+
let i = 0;
|
|
105
|
+
return {
|
|
106
|
+
name: "sequence",
|
|
107
|
+
sendMessage: async (messages, options) => {
|
|
108
|
+
providerCalls.push({ messages, options });
|
|
109
|
+
const response = responses[Math.min(i, responses.length - 1)];
|
|
110
|
+
i += 1;
|
|
111
|
+
return response;
|
|
112
|
+
},
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
/** Provider that records each call and then throws — for the throw-after-retries
|
|
117
|
+
* path (the provider's own RetryProvider has already exhausted its backoff). */
|
|
118
|
+
function makeThrowingProvider(): Provider {
|
|
119
|
+
return {
|
|
120
|
+
name: "throwing",
|
|
121
|
+
sendMessage: async (messages, options) => {
|
|
122
|
+
providerCalls.push({ messages, options });
|
|
123
|
+
throw new Error("boom");
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
|
|
86
128
|
function makeLeaf(path: LeafPath, members: Slug[]): LeafNode {
|
|
87
129
|
return {
|
|
88
130
|
path,
|
|
@@ -206,10 +248,8 @@ describe("selectFromLeaf — id mapping", () => {
|
|
|
206
248
|
// selectFromLeaf — recall-safe fallbacks.
|
|
207
249
|
// ---------------------------------------------------------------------------
|
|
208
250
|
|
|
209
|
-
describe("selectFromLeaf —
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
test("no provider → ALL members", async () => {
|
|
251
|
+
describe("selectFromLeaf — degradation on failure", () => {
|
|
252
|
+
test("no provider → no pages, without calling the provider", async () => {
|
|
213
253
|
providerStub = null;
|
|
214
254
|
const result = await selectFromLeaf(
|
|
215
255
|
"people/alice",
|
|
@@ -217,26 +257,23 @@ describe("selectFromLeaf — recall-safe fallbacks", () => {
|
|
|
217
257
|
makeTree(),
|
|
218
258
|
summaryOf,
|
|
219
259
|
);
|
|
220
|
-
expect(result).toEqual(
|
|
260
|
+
expect(result).toEqual([]);
|
|
261
|
+
expect(providerCalls).toHaveLength(0);
|
|
221
262
|
});
|
|
222
263
|
|
|
223
|
-
test("missing tool_use →
|
|
224
|
-
providerStub = makeProvider(
|
|
225
|
-
model: "stub-model",
|
|
226
|
-
stopReason: "end_turn",
|
|
227
|
-
usage: { inputTokens: 0, outputTokens: 0 },
|
|
228
|
-
content: [{ type: "text", text: "no tool call" }],
|
|
229
|
-
});
|
|
264
|
+
test("missing tool_use → no pages after retrying", async () => {
|
|
265
|
+
providerStub = makeProvider(noToolResponse());
|
|
230
266
|
const result = await selectFromLeaf(
|
|
231
267
|
"people/alice",
|
|
232
268
|
makeTurn("x"),
|
|
233
269
|
makeTree(),
|
|
234
270
|
summaryOf,
|
|
235
271
|
);
|
|
236
|
-
expect(result).toEqual(
|
|
272
|
+
expect(result).toEqual([]);
|
|
273
|
+
expect(providerCalls).toHaveLength(3);
|
|
237
274
|
});
|
|
238
275
|
|
|
239
|
-
test("schema mismatch →
|
|
276
|
+
test("schema mismatch → no pages after retrying", async () => {
|
|
240
277
|
providerStub = makeProvider(toolUseResponse({ ids: "not-an-array" }));
|
|
241
278
|
const result = await selectFromLeaf(
|
|
242
279
|
"people/alice",
|
|
@@ -244,23 +281,35 @@ describe("selectFromLeaf — recall-safe fallbacks", () => {
|
|
|
244
281
|
makeTree(),
|
|
245
282
|
summaryOf,
|
|
246
283
|
);
|
|
247
|
-
expect(result).toEqual(
|
|
284
|
+
expect(result).toEqual([]);
|
|
285
|
+
expect(providerCalls).toHaveLength(3);
|
|
248
286
|
});
|
|
249
287
|
|
|
250
|
-
test("provider throw →
|
|
251
|
-
providerStub =
|
|
252
|
-
name: "throwing",
|
|
253
|
-
sendMessage: async () => {
|
|
254
|
-
throw new Error("boom");
|
|
255
|
-
},
|
|
256
|
-
};
|
|
288
|
+
test("provider throw → no pages after retrying", async () => {
|
|
289
|
+
providerStub = makeThrowingProvider();
|
|
257
290
|
const result = await selectFromLeaf(
|
|
258
291
|
"people/alice",
|
|
259
292
|
makeTurn("x"),
|
|
260
293
|
makeTree(),
|
|
261
294
|
summaryOf,
|
|
262
295
|
);
|
|
263
|
-
expect(result).toEqual(
|
|
296
|
+
expect(result).toEqual([]);
|
|
297
|
+
expect(providerCalls).toHaveLength(3);
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
test("a malformed response that recovers on retry returns its pages", async () => {
|
|
301
|
+
providerStub = makeSequenceProvider([
|
|
302
|
+
noToolResponse(),
|
|
303
|
+
toolUseResponse({ ids: [2] }),
|
|
304
|
+
]);
|
|
305
|
+
const result = await selectFromLeaf(
|
|
306
|
+
"people/alice",
|
|
307
|
+
makeTurn("the 1:1"),
|
|
308
|
+
makeTree(),
|
|
309
|
+
summaryOf,
|
|
310
|
+
);
|
|
311
|
+
expect(result).toEqual([{ slug: "alice-1on1", pinned: false }]);
|
|
312
|
+
expect(providerCalls).toHaveLength(2);
|
|
264
313
|
});
|
|
265
314
|
});
|
|
266
315
|
|
|
@@ -298,13 +347,13 @@ describe("selectFromLeaf — request shape", () => {
|
|
|
298
347
|
const [blockA, blockB] = providerCalls[0].messages[0].content as Array<{
|
|
299
348
|
type: string;
|
|
300
349
|
text: string;
|
|
301
|
-
cache_control?: { type: string };
|
|
350
|
+
cache_control?: { type: string; ttl?: string };
|
|
302
351
|
}>;
|
|
303
352
|
expect(blockA.type).toBe("text");
|
|
304
353
|
expect(blockA.text).toContain("<leaf>people/alice</leaf>");
|
|
305
354
|
expect(blockA.text).toContain("<pages>");
|
|
306
355
|
expect(blockA.text).toContain("[1] alice-bio — summary of alice-bio");
|
|
307
|
-
expect(blockA.cache_control).toEqual({ type: "ephemeral" });
|
|
356
|
+
expect(blockA.cache_control).toEqual({ type: "ephemeral", ttl: "1h" });
|
|
308
357
|
|
|
309
358
|
expect(blockB.type).toBe("text");
|
|
310
359
|
expect(blockB.text).toContain("<current_message>alice?</current_message>");
|
|
@@ -312,6 +361,23 @@ describe("selectFromLeaf — request shape", () => {
|
|
|
312
361
|
expect(blockB.cache_control).toBeUndefined();
|
|
313
362
|
});
|
|
314
363
|
|
|
364
|
+
test("situational context renders in the per-turn block when present", async () => {
|
|
365
|
+
providerStub = makeProvider(toolUseResponse({ ids: [1] }));
|
|
366
|
+
await selectFromLeaf(
|
|
367
|
+
"people/alice",
|
|
368
|
+
{
|
|
369
|
+
...makeTurn("alice?"),
|
|
370
|
+
situationalContext: "Today is Saturday. Alice's anniversary is today.",
|
|
371
|
+
},
|
|
372
|
+
makeTree(),
|
|
373
|
+
summaryOf,
|
|
374
|
+
);
|
|
375
|
+
const blockB = providerCalls[0].messages[0].content[1] as { text: string };
|
|
376
|
+
expect(blockB.text).toContain(
|
|
377
|
+
"<situation>Today is Saturday. Alice's anniversary is today.</situation>",
|
|
378
|
+
);
|
|
379
|
+
});
|
|
380
|
+
|
|
315
381
|
test("system prompt mentions pinned (locks the pinning commitment)", async () => {
|
|
316
382
|
providerStub = makeProvider(toolUseResponse({ ids: [1] }));
|
|
317
383
|
await selectFromLeaf("people/alice", makeTurn("x"), makeTree(), summaryOf);
|
|
@@ -68,6 +68,7 @@ const orchestrateSpy = mock(async () => ({
|
|
|
68
68
|
let treeLoads = 0;
|
|
69
69
|
let coreLoads = 0;
|
|
70
70
|
let needleBuilds = 0;
|
|
71
|
+
let configL2Concurrency = 16;
|
|
71
72
|
|
|
72
73
|
// Shared in-memory DB so writes are observable from the test. We hold the raw
|
|
73
74
|
// sqlite handle alongside the drizzle wrapper so the test can both read rows
|
|
@@ -105,7 +106,12 @@ mock.module("../../../config/assistant-feature-flags.js", () => ({
|
|
|
105
106
|
|
|
106
107
|
mock.module("../../../config/loader.js", () => ({
|
|
107
108
|
getConfig: () => ({
|
|
108
|
-
memory: {
|
|
109
|
+
memory: {
|
|
110
|
+
v3: {
|
|
111
|
+
workingSet: { maxPages: 150, evictWindow: 5 },
|
|
112
|
+
l2Concurrency: configL2Concurrency,
|
|
113
|
+
},
|
|
114
|
+
},
|
|
109
115
|
}),
|
|
110
116
|
}));
|
|
111
117
|
|
|
@@ -203,7 +209,7 @@ const {
|
|
|
203
209
|
runShadowObservation,
|
|
204
210
|
resetShadowLanesForTests,
|
|
205
211
|
invalidateLanes,
|
|
206
|
-
|
|
212
|
+
memoryV3Injector,
|
|
207
213
|
} = await import("../shadow-plugin.js");
|
|
208
214
|
|
|
209
215
|
// The module stubs above stay installed for the rest of the process (Bun can't
|
|
@@ -235,14 +241,14 @@ beforeEach(() => {
|
|
|
235
241
|
treeLoads = 0;
|
|
236
242
|
coreLoads = 0;
|
|
237
243
|
needleBuilds = 0;
|
|
244
|
+
configL2Concurrency = 16;
|
|
238
245
|
testDb = makeDb();
|
|
239
246
|
resetShadowLanesForTests();
|
|
240
247
|
});
|
|
241
248
|
|
|
242
|
-
/** Invoke the
|
|
249
|
+
/** Invoke the memory-v3 injector's `produce()` for a turn. */
|
|
243
250
|
function produce(conversationId: string, turnIndex: number) {
|
|
244
|
-
|
|
245
|
-
return injector.produce({
|
|
251
|
+
return memoryV3Injector.produce({
|
|
246
252
|
requestId: "r1",
|
|
247
253
|
conversationId,
|
|
248
254
|
turnIndex,
|
|
@@ -289,6 +295,18 @@ describe("memory-v3 shadow plugin", () => {
|
|
|
289
295
|
expect(turn.currentMessage).toBe("hello world");
|
|
290
296
|
});
|
|
291
297
|
|
|
298
|
+
test("orchestrate receives the configured L2 concurrency", async () => {
|
|
299
|
+
shadowEnabled = true;
|
|
300
|
+
configL2Concurrency = 9;
|
|
301
|
+
await runShadowObservation("conv-1", 0);
|
|
302
|
+
const deps = (
|
|
303
|
+
orchestrateSpy.mock.calls as unknown as unknown[][]
|
|
304
|
+
)[0]![1] as {
|
|
305
|
+
l2Concurrency?: number;
|
|
306
|
+
};
|
|
307
|
+
expect(deps.l2Concurrency).toBe(9);
|
|
308
|
+
});
|
|
309
|
+
|
|
292
310
|
test("both flags OFF → produce returns null, no orchestrate, no writes", async () => {
|
|
293
311
|
liveEnabled = false;
|
|
294
312
|
shadowEnabled = false;
|
package/src/memory/v3/health.ts
CHANGED
|
Binary file
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory v3 — shared retry helper for the L1 router and L2 selector model calls.
|
|
3
|
+
*
|
|
4
|
+
* The configured provider is already wrapped in `RetryProvider`
|
|
5
|
+
* (`../../providers/retry.ts`), which retries transient transport failures
|
|
6
|
+
* (network errors, 429s, 5xx, stream aborts) with exponential backoff before it
|
|
7
|
+
* ever throws. This helper therefore adds NO backoff of its own; it exists to:
|
|
8
|
+
* (a) re-prompt on a malformed-but-successful response — a 200 whose body has
|
|
9
|
+
* no usable forced `tool_use`, or whose tool input fails schema validation
|
|
10
|
+
* (the provider's retry never re-runs these, since nothing threw); and
|
|
11
|
+
* (b) cheaply re-attempt a call that threw after the provider exhausted its
|
|
12
|
+
* own retries, before the lane degrades to its deterministic fallback.
|
|
13
|
+
*
|
|
14
|
+
* `attempt` signals "unusable, retry me" by returning `null` (or throwing). The
|
|
15
|
+
* first non-null result wins; `null` after `maxAttempts` tells the caller to
|
|
16
|
+
* degrade to the deterministic recall lanes.
|
|
17
|
+
*/
|
|
18
|
+
export async function retryForResult<T>(
|
|
19
|
+
attempt: () => Promise<T | null>,
|
|
20
|
+
maxAttempts = 3,
|
|
21
|
+
): Promise<T | null> {
|
|
22
|
+
for (let i = 0; i < maxAttempts; i++) {
|
|
23
|
+
try {
|
|
24
|
+
const result = await attempt();
|
|
25
|
+
if (result !== null) return result;
|
|
26
|
+
} catch {
|
|
27
|
+
// Treat a throw like an unusable result and retry. The provider layer has
|
|
28
|
+
// already backed off transient errors, so there is nothing to wait for.
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
@@ -13,11 +13,16 @@
|
|
|
13
13
|
* then dedup by slug (a page assigned to multiple opened leaves comes back
|
|
14
14
|
* once per leaf) ORing the pinned flag so a page pinned anywhere stays
|
|
15
15
|
* pinned.
|
|
16
|
-
* 4.
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
*
|
|
16
|
+
* 4. Age the carry-forward working set to this turn (evict core slugs, stale
|
|
17
|
+
* non-pinned entries, then the cap) and snapshot it — the pages carried in
|
|
18
|
+
* from EARLIER turns.
|
|
19
|
+
* 5. Final injection = unique union of this turn's selected slugs and that
|
|
20
|
+
* carried-forward set, so pages selected on earlier turns carry forward
|
|
21
|
+
* even when this turn does not re-select them.
|
|
22
|
+
* 6. Record this turn's selections into the working set for LATER turns. This
|
|
23
|
+
* runs AFTER the snapshot so the cap is spent on genuinely carried pages,
|
|
24
|
+
* not on this turn's selections (which are injected directly) — otherwise a
|
|
25
|
+
* turn selecting more pages than the cap would evict the entire carry.
|
|
21
26
|
*/
|
|
22
27
|
|
|
23
28
|
import type { NeedleIndex } from "./needle.js";
|
|
@@ -48,7 +53,8 @@ export interface OrchestrateResult {
|
|
|
48
53
|
openedLeaves: LeafPath[];
|
|
49
54
|
/** This turn's L2 selections, deduped by slug (pinned flags ORed). */
|
|
50
55
|
currentSelections: SelectedPage[];
|
|
51
|
-
/**
|
|
56
|
+
/** The carried-forward set: selections from EARLIER turns, aged to this turn
|
|
57
|
+
* (snapshotted before this turn's selections are recorded). */
|
|
52
58
|
workingSetUnion: Set<Slug>;
|
|
53
59
|
/** Slugs to inject: this turn's selections ∪ the carried-forward working set. */
|
|
54
60
|
finalInjection: Slug[];
|
|
@@ -95,20 +101,26 @@ export async function orchestrate(
|
|
|
95
101
|
}
|
|
96
102
|
const currentSelections = [...bySlug.values()];
|
|
97
103
|
|
|
98
|
-
// Step 4:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
// Step 5: evict. Core slugs are owned by core, not the working set.
|
|
104
|
+
// Step 4: age the carry-forward set to this turn (drop core slugs, stale
|
|
105
|
+
// non-pinned entries, then the cap) and snapshot it. This is the set carried
|
|
106
|
+
// in from EARLIER turns; recording this turn happens afterward (step 6) so the
|
|
107
|
+
// cap is spent on genuinely carried pages, not on this turn's selections
|
|
108
|
+
// (which are injected directly anyway).
|
|
104
109
|
deps.workingSet.evict(turn.turnNumber, coreSlugs(deps.tree, deps.core));
|
|
105
|
-
|
|
106
|
-
// Step 6: final injection = this turn's selections ∪ carried-forward set.
|
|
107
110
|
const workingSetUnion = deps.workingSet.union();
|
|
111
|
+
|
|
112
|
+
// Step 5: final injection = this turn's selections ∪ the carried-forward set,
|
|
113
|
+
// so pages selected on earlier turns carry forward even when this turn does
|
|
114
|
+
// not re-select them.
|
|
108
115
|
const finalInjection = unique<Slug>([
|
|
109
116
|
...currentSelections.map((s) => s.slug),
|
|
110
117
|
...workingSetUnion,
|
|
111
118
|
]);
|
|
112
119
|
|
|
120
|
+
// Step 6: record this turn's selections so they carry forward to LATER turns.
|
|
121
|
+
for (const sel of currentSelections) {
|
|
122
|
+
deps.workingSet.recordSelection(sel.slug, turn.turnNumber, sel.pinned);
|
|
123
|
+
}
|
|
124
|
+
|
|
113
125
|
return { openedLeaves, currentSelections, workingSetUnion, finalInjection };
|
|
114
126
|
}
|
|
@@ -1,16 +1,26 @@
|
|
|
1
1
|
import type { ContentBlock } from "../../providers/types.js";
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* Text content block carrying
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
4
|
+
* Text content block carrying a `cache_control` breakpoint with a 1-hour TTL.
|
|
5
|
+
* Shared by the v3 router (the static leaf-tree block) and selector (each
|
|
6
|
+
* leaf's static `<pages>` block): these prefixes are stable across turns — the
|
|
7
|
+
* leaf tree is byte-identical every turn, and a leaf's pages block changes only
|
|
8
|
+
* when its pages/summaries do — while v3 turns are frequently more than the
|
|
9
|
+
* default 5-minute cache window apart. A 1h TTL keeps the prefix warm across
|
|
10
|
+
* those gaps so it is read from cache rather than re-created every turn; the
|
|
11
|
+
* volatile current-message block is rendered after this one and left un-cached.
|
|
12
|
+
* Haiku does not support the extended-cache-ttl beta, so the Anthropic provider
|
|
13
|
+
* strips this `ttl` for Haiku models.
|
|
14
|
+
*
|
|
15
|
+
* Our internal `TextContent` type omits `cache_control` (only the Anthropic
|
|
16
|
+
* provider transforms it onto the wire), so we reach through a `Record` cast to
|
|
17
|
+
* keep the core types provider-agnostic.
|
|
9
18
|
*/
|
|
10
19
|
export function cachedTextBlock(text: string): ContentBlock {
|
|
11
20
|
const block: ContentBlock = { type: "text", text };
|
|
12
21
|
(block as unknown as Record<string, unknown>).cache_control = {
|
|
13
22
|
type: "ephemeral",
|
|
23
|
+
ttl: "1h",
|
|
14
24
|
};
|
|
15
25
|
return block;
|
|
16
26
|
}
|
package/src/memory/v3/router.ts
CHANGED
|
@@ -15,12 +15,17 @@
|
|
|
15
15
|
* cache turn after turn. The trailing recent-context / current-message block
|
|
16
16
|
* changes every turn, so it carries no breakpoint.
|
|
17
17
|
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
* -
|
|
21
|
-
* -
|
|
22
|
-
*
|
|
23
|
-
*
|
|
18
|
+
* Failure handling. A *model-call* failure is not the same as the model
|
|
19
|
+
* choosing to open everything, so the two no longer share an outcome:
|
|
20
|
+
* - explicit `ids` → open exactly those leaves,
|
|
21
|
+
* - explicit empty array (`ids: []`) → open nothing (deliberate abstention),
|
|
22
|
+
* - omitted `ids` → open nothing: the router must name the leaves it wants,
|
|
23
|
+
* never the whole tree (~137 leaves would fan out a full L2 pass per turn),
|
|
24
|
+
* - infrastructure failure (provider unavailable, a throw that survived the
|
|
25
|
+
* provider's own retries, no usable `tool_use`, or a schema mismatch) →
|
|
26
|
+
* open nothing after a short re-prompt retry, degrading to the deterministic
|
|
27
|
+
* recall lanes (always-on core, the BM25 needle, the carry-forward working
|
|
28
|
+
* set) that the orchestrator unions in regardless.
|
|
24
29
|
*/
|
|
25
30
|
|
|
26
31
|
import { z } from "zod";
|
|
@@ -31,6 +36,7 @@ import {
|
|
|
31
36
|
} from "../../providers/provider-send-message.js";
|
|
32
37
|
import type { Message, ToolDefinition } from "../../providers/types.js";
|
|
33
38
|
import { getLogger } from "../../util/logger.js";
|
|
39
|
+
import { retryForResult } from "./llm-retry.js";
|
|
34
40
|
import { cachedTextBlock } from "./provider-blocks.js";
|
|
35
41
|
import type { LeafPath, LeafTree, TurnContext } from "./types.js";
|
|
36
42
|
|
|
@@ -40,8 +46,8 @@ const log = getLogger("memory-v3-router");
|
|
|
40
46
|
const OPEN_LEAVES_TOOL_NAME = "open_leaves";
|
|
41
47
|
|
|
42
48
|
const OpenLeavesSchema = z.object({
|
|
43
|
-
// Optional
|
|
44
|
-
//
|
|
49
|
+
// Optional so the field can be absent on the wire, but an omitted `ids` opens
|
|
50
|
+
// nothing — the router must name the leaves it wants, never the whole tree.
|
|
45
51
|
ids: z.array(z.number().int()).optional(),
|
|
46
52
|
});
|
|
47
53
|
|
|
@@ -50,8 +56,8 @@ const OPEN_LEAVES_TOOL: ToolDefinition = {
|
|
|
50
56
|
description:
|
|
51
57
|
"Open the leaves whose contents could plausibly bear on the next reply. " +
|
|
52
58
|
"Lean toward inclusion — a missed relevant leaf is a worse error than an " +
|
|
53
|
-
"unused one.
|
|
54
|
-
"
|
|
59
|
+
"unused one. Pass the chosen IDs explicitly; return `[]` only when nothing " +
|
|
60
|
+
"in the tree could possibly help.",
|
|
55
61
|
input_schema: {
|
|
56
62
|
type: "object",
|
|
57
63
|
properties: {
|
|
@@ -65,13 +71,14 @@ const OPEN_LEAVES_TOOL: ToolDefinition = {
|
|
|
65
71
|
|
|
66
72
|
const SYSTEM_PROMPT = `You route a conversation turn to the leaves of a topic tree that should be opened for the next reply.
|
|
67
73
|
|
|
68
|
-
Each leaf has a numbered ID, a path, and a description of what it holds. Decide which leaves to open by weighing
|
|
74
|
+
Each leaf has a numbered ID, a path, and a description of what it holds. Decide which leaves to open by weighing four signals:
|
|
69
75
|
|
|
70
76
|
- Topic — entities, projects, and events named or implied by the turn.
|
|
71
77
|
- Register — the affect and mode of the message (e.g. playful, distressed, formal). A register signal is enough to open a leaf even when no entity is named.
|
|
72
78
|
- Recent context — the immediately preceding exchange, which resolves references like "this", "that", or "the same thing" to concrete topics.
|
|
79
|
+
- Situation — the current date and a live scratchpad of what is salient right now. A date or state cue can make a leaf relevant even when the message never names it (e.g. a person whose anniversary is today, an active thread).
|
|
73
80
|
|
|
74
|
-
Include on doubt: open every leaf that could plausibly hold something useful. Missing a relevant leaf is a worse error than opening an unused one. Call \`open_leaves\` with the chosen IDs
|
|
81
|
+
Include on doubt: open every leaf that could plausibly hold something useful. Missing a relevant leaf is a worse error than opening an unused one. Call \`open_leaves\` with the chosen IDs explicitly; return \`[]\` only when nothing in the tree could possibly help.`;
|
|
75
82
|
|
|
76
83
|
/** Leaves sorted deterministically by path so the numbered block is stable. */
|
|
77
84
|
function sortedLeaves(tree: LeafTree): LeafPath[] {
|
|
@@ -104,10 +111,10 @@ export function renderLeafBlock(tree: LeafTree): string {
|
|
|
104
111
|
}
|
|
105
112
|
|
|
106
113
|
/**
|
|
107
|
-
* Run the L1 router for one turn. Returns the leaf paths to open
|
|
108
|
-
*
|
|
109
|
-
*
|
|
110
|
-
*
|
|
114
|
+
* Run the L1 router for one turn. Returns the leaf paths to open — only ever the
|
|
115
|
+
* leaves the model names explicitly. An omitted `ids`, an explicit `[]`, or an
|
|
116
|
+
* infrastructure failure (after a short re-prompt retry) all open nothing,
|
|
117
|
+
* degrading to the deterministic recall lanes the orchestrator unions in.
|
|
111
118
|
*/
|
|
112
119
|
export async function routeL1(
|
|
113
120
|
turn: TurnContext,
|
|
@@ -118,8 +125,10 @@ export async function routeL1(
|
|
|
118
125
|
|
|
119
126
|
const provider = await getConfiguredProvider("memoryV3RouteL1");
|
|
120
127
|
if (!provider) {
|
|
121
|
-
log.warn(
|
|
122
|
-
|
|
128
|
+
log.warn(
|
|
129
|
+
"L1 router provider unavailable; degrading to deterministic lanes",
|
|
130
|
+
);
|
|
131
|
+
return [];
|
|
123
132
|
}
|
|
124
133
|
|
|
125
134
|
const userMsg: Message = {
|
|
@@ -129,15 +138,21 @@ export async function routeL1(
|
|
|
129
138
|
{
|
|
130
139
|
type: "text",
|
|
131
140
|
text:
|
|
141
|
+
(turn.situationalContext
|
|
142
|
+
? `<situation>${turn.situationalContext}</situation>\n`
|
|
143
|
+
: "") +
|
|
132
144
|
`<recent_context>${turn.recentContext}</recent_context>\n` +
|
|
133
145
|
`<current_message>${turn.currentMessage}</current_message>`,
|
|
134
146
|
},
|
|
135
147
|
],
|
|
136
148
|
};
|
|
137
149
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
150
|
+
// One forced-tool call, retried a few times so a transient malformed response
|
|
151
|
+
// (no usable tool_use, or tool input that fails the schema) re-prompts before
|
|
152
|
+
// we give up. `null` from an attempt means "unusable, retry"; the provider
|
|
153
|
+
// layer already backs off transient throws, so this loop adds no delay.
|
|
154
|
+
const parsed = await retryForResult(async () => {
|
|
155
|
+
const response = await provider.sendMessage([userMsg], {
|
|
141
156
|
tools: [OPEN_LEAVES_TOOL],
|
|
142
157
|
systemPrompt: SYSTEM_PROMPT,
|
|
143
158
|
config: {
|
|
@@ -145,37 +160,28 @@ export async function routeL1(
|
|
|
145
160
|
tool_choice: { type: "tool" as const, name: OPEN_LEAVES_TOOL_NAME },
|
|
146
161
|
},
|
|
147
162
|
});
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
const toolBlock = extractToolUse(response);
|
|
154
|
-
if (!toolBlock || toolBlock.name !== OPEN_LEAVES_TOOL_NAME) {
|
|
155
|
-
log.warn(
|
|
156
|
-
{ stopReason: response.stopReason },
|
|
157
|
-
"L1 router returned no open_leaves tool_use; opening all leaves",
|
|
158
|
-
);
|
|
159
|
-
return paths;
|
|
160
|
-
}
|
|
163
|
+
const toolBlock = extractToolUse(response);
|
|
164
|
+
if (!toolBlock || toolBlock.name !== OPEN_LEAVES_TOOL_NAME) return null;
|
|
165
|
+
const result = OpenLeavesSchema.safeParse(toolBlock.input);
|
|
166
|
+
return result.success ? result.data : null;
|
|
167
|
+
});
|
|
161
168
|
|
|
162
|
-
|
|
163
|
-
if (!parsed.success) {
|
|
169
|
+
if (parsed === null) {
|
|
164
170
|
log.warn(
|
|
165
|
-
|
|
166
|
-
"L1 router tool input did not match schema; opening all leaves",
|
|
171
|
+
"L1 router could not obtain a selection after retries; degrading to deterministic lanes",
|
|
167
172
|
);
|
|
168
|
-
return
|
|
173
|
+
return [];
|
|
169
174
|
}
|
|
170
175
|
|
|
171
|
-
//
|
|
172
|
-
|
|
176
|
+
// An omitted `ids` field means the model named no leaves — open nothing rather
|
|
177
|
+
// than the whole tree. Only explicitly listed IDs open leaves.
|
|
178
|
+
if (parsed.ids === undefined) return [];
|
|
173
179
|
|
|
174
180
|
// Map 1-based IDs back to leaf paths, dropping out-of-range IDs without
|
|
175
181
|
// throwing. De-duplicate while preserving model-returned order.
|
|
176
182
|
const seen = new Set<number>();
|
|
177
183
|
const selected: LeafPath[] = [];
|
|
178
|
-
for (const id of parsed.
|
|
184
|
+
for (const id of parsed.ids) {
|
|
179
185
|
if (id < 1 || id > paths.length || seen.has(id)) continue;
|
|
180
186
|
seen.add(id);
|
|
181
187
|
selected.push(paths[id - 1]);
|