create-walle 0.9.21 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/package.json +2 -2
- package/template/CLAUDE.md +2 -2
- package/template/LICENSE +1 -1
- package/template/bin/ctm-dev-cleanup.js +24 -3
- package/template/bin/ctm-launch.sh +13 -0
- package/template/bin/dev.sh +156 -18
- package/template/bin/node-bin.sh +84 -0
- package/template/bin/pin-node.sh +51 -0
- package/template/claude-task-manager/api-prompts.js +1203 -182
- package/template/claude-task-manager/api-reviews.js +109 -15
- package/template/claude-task-manager/approval-agent.js +1360 -280
- package/template/claude-task-manager/bin/restart-ctm.sh +64 -23
- package/template/claude-task-manager/bin/storage-migration-supervisor.js +338 -0
- package/template/claude-task-manager/db.js +4417 -295
- package/template/claude-task-manager/docs/app-update-refresh-protocol.md +69 -0
- package/template/claude-task-manager/docs/approval-ai-refinement.md +138 -0
- package/template/claude-task-manager/docs/approval-rescue-loop.md +74 -0
- package/template/claude-task-manager/docs/codex-operational-warning-health.md +107 -0
- package/template/claude-task-manager/docs/codex-resume-state-guard-design.md +17 -12
- package/template/claude-task-manager/docs/codex-terminal-render-controller-handoff.md +311 -0
- package/template/claude-task-manager/docs/coding-agent-hooks-architecture.md +418 -0
- package/template/claude-task-manager/docs/conversation-import-freshness.md +20 -0
- package/template/claude-task-manager/docs/google-workspace-auth-health.md +77 -0
- package/template/claude-task-manager/docs/image-paste-ux.md +13 -0
- package/template/claude-task-manager/docs/ipad-web-preview.md +88 -0
- package/template/claude-task-manager/docs/main-loop-offload-architecture.md +66 -0
- package/template/claude-task-manager/docs/microsoft-dev-tunnel-phone-access-design.md +274 -519
- package/template/claude-task-manager/docs/mobile-live-streaming.md +27 -5
- package/template/claude-task-manager/docs/mobile-remote-submission-lifecycle.md +69 -0
- package/template/claude-task-manager/docs/phone-access-design.md +53 -15
- package/template/claude-task-manager/docs/phone-passkey-identity.md +122 -0
- package/template/claude-task-manager/docs/phone-setup.md +3 -0
- package/template/claude-task-manager/docs/prompt-editing-tree-design.md +25 -1
- package/template/claude-task-manager/docs/remote-desktop-access-design.md +268 -0
- package/template/claude-task-manager/docs/restart-lifecycle-architecture.md +95 -0
- package/template/claude-task-manager/docs/runtime-work-control-plane.md +53 -0
- package/template/claude-task-manager/docs/session-interactive-wait-surfaces.md +38 -0
- package/template/claude-task-manager/docs/session-needs-you-dismissal.md +84 -0
- package/template/claude-task-manager/docs/session-render-state-management-design.md +91 -3
- package/template/claude-task-manager/docs/session-standup-command-center-design.md +25 -1
- package/template/claude-task-manager/docs/session-title-authority.md +32 -0
- package/template/claude-task-manager/docs/session-workspace-binding.md +33 -0
- package/template/claude-task-manager/docs/skill-intent-resolution-design.md +72 -0
- package/template/claude-task-manager/docs/walle-mcp-supervisor-health.md +86 -0
- package/template/claude-task-manager/docs/walle-relay-phone-access-design.md +24 -15
- package/template/claude-task-manager/docs/walle-session-history-hydration.md +114 -0
- package/template/claude-task-manager/docs/walle-session-input-queue.md +104 -0
- package/template/claude-task-manager/docs/walle-session-model-catalog.md +90 -0
- package/template/claude-task-manager/docs/walle-session-model-preferences.md +15 -6
- package/template/claude-task-manager/git-utils.js +897 -27
- package/template/claude-task-manager/lib/agent-capabilities.js +33 -0
- package/template/claude-task-manager/lib/agent-cli-cache.js +37 -7
- package/template/claude-task-manager/lib/agent-hooks-installer.js +26 -2
- package/template/claude-task-manager/lib/agent-presets.js +17 -1
- package/template/claude-task-manager/lib/all-sessions-query.js +108 -0
- package/template/claude-task-manager/lib/approval-ai-refinement.js +488 -0
- package/template/claude-task-manager/lib/approval-self-adapt.js +168 -0
- package/template/claude-task-manager/lib/async-semaphore.js +44 -0
- package/template/claude-task-manager/lib/auth-context.js +5 -0
- package/template/claude-task-manager/lib/auth-rate-limit.js +47 -4
- package/template/claude-task-manager/lib/auth-rules.js +29 -2
- package/template/claude-task-manager/lib/auto-approval-verifier.js +129 -16
- package/template/claude-task-manager/lib/background-llm.js +144 -17
- package/template/claude-task-manager/lib/branch-inventory.js +212 -0
- package/template/claude-task-manager/lib/claude-desktop-sessions.js +15 -3
- package/template/claude-task-manager/lib/coalesce-sync-frames.js +151 -0
- package/template/claude-task-manager/lib/codex-launch-health.js +762 -0
- package/template/claude-task-manager/lib/codex-transcript-pager.js +51 -0
- package/template/claude-task-manager/lib/codex-zst.js +124 -0
- package/template/claude-task-manager/lib/coding-agent-models.js +233 -30
- package/template/claude-task-manager/lib/connection-health.js +232 -0
- package/template/claude-task-manager/lib/conversation-blob-parser.js +42 -0
- package/template/claude-task-manager/lib/conversation-tail-merge.js +89 -26
- package/template/claude-task-manager/lib/ctm-session-context-api.js +39 -10
- package/template/claude-task-manager/lib/cursor-conversation-store.js +354 -0
- package/template/claude-task-manager/lib/db-owner-worker-client.js +315 -0
- package/template/claude-task-manager/lib/document-review.js +141 -6
- package/template/claude-task-manager/lib/escalation-review.js +152 -0
- package/template/claude-task-manager/lib/graceful-shutdown.js +159 -0
- package/template/claude-task-manager/lib/headless-term-service.js +678 -0
- package/template/claude-task-manager/lib/heavy-worker-fallback.js +38 -0
- package/template/claude-task-manager/lib/jsonl-conversation-parser.js +542 -0
- package/template/claude-task-manager/lib/jsonl-range-reader.js +112 -0
- package/template/claude-task-manager/lib/main-db-census.js +216 -0
- package/template/claude-task-manager/lib/message-pagination.js +106 -4
- package/template/claude-task-manager/lib/microsoft-dev-tunnel-setup.js +750 -26
- package/template/claude-task-manager/lib/mobile-auth-api.js +274 -7
- package/template/claude-task-manager/lib/mobile-auth-store.js +592 -10
- package/template/claude-task-manager/lib/mobile-notification-dispatcher.js +15 -0
- package/template/claude-task-manager/lib/model-overview-brain-fallback.js +311 -0
- package/template/claude-task-manager/lib/model-overview-cache.js +141 -0
- package/template/claude-task-manager/lib/models-health-routing-notice.js +126 -0
- package/template/claude-task-manager/lib/node-pin-guard.js +93 -0
- package/template/claude-task-manager/lib/perf-tracker.js +242 -6
- package/template/claude-task-manager/lib/permission-match.js +76 -0
- package/template/claude-task-manager/lib/permission-sync.js +133 -20
- package/template/claude-task-manager/lib/process-title.js +35 -0
- package/template/claude-task-manager/lib/prompt-executions-query.js +25 -0
- package/template/claude-task-manager/lib/prompt-index-disk-cache.js +44 -0
- package/template/claude-task-manager/lib/prompt-intent.js +132 -0
- package/template/claude-task-manager/lib/provider-user-context.js +34 -0
- package/template/claude-task-manager/lib/read-pool-client.js +313 -0
- package/template/claude-task-manager/lib/readpool-breaker.js +31 -0
- package/template/claude-task-manager/lib/recent-sessions-breaker.js +12 -0
- package/template/claude-task-manager/lib/remote-feedback-client.js +72 -0
- package/template/claude-task-manager/lib/remote-relay-protocol.js +37 -4
- package/template/claude-task-manager/lib/remote-relay-store.js +159 -0
- package/template/claude-task-manager/lib/remote-submission-observer.js +278 -0
- package/template/claude-task-manager/lib/restart-guard.js +109 -0
- package/template/claude-task-manager/lib/restore-interruption-detector.js +439 -0
- package/template/claude-task-manager/lib/restore-policy.js +13 -0
- package/template/claude-task-manager/lib/restore-resume-batch.js +74 -0
- package/template/claude-task-manager/lib/restore-runtime.js +68 -0
- package/template/claude-task-manager/lib/restore-storm.js +34 -0
- package/template/claude-task-manager/lib/resume-cwd.js +36 -0
- package/template/claude-task-manager/lib/resume-preflight.js +313 -0
- package/template/claude-task-manager/lib/runtime-work-registry.js +444 -0
- package/template/claude-task-manager/lib/sanitize-openai-auth.js +31 -0
- package/template/claude-task-manager/lib/scheduler.js +21 -1
- package/template/claude-task-manager/lib/scrollback-snapshot-store.js +159 -0
- package/template/claude-task-manager/lib/serial-task-queue.js +64 -0
- package/template/claude-task-manager/lib/server-listeners.js +239 -0
- package/template/claude-task-manager/lib/session-capture.js +42 -7
- package/template/claude-task-manager/lib/session-content-backfill.js +131 -0
- package/template/claude-task-manager/lib/session-history.js +388 -43
- package/template/claude-task-manager/lib/session-host-manager.js +287 -0
- package/template/claude-task-manager/lib/session-image-refs.js +209 -0
- package/template/claude-task-manager/lib/session-jobs.js +399 -59
- package/template/claude-task-manager/lib/session-prompt-index.js +137 -0
- package/template/claude-task-manager/lib/session-restore.js +53 -0
- package/template/claude-task-manager/lib/session-standup.js +123 -23
- package/template/claude-task-manager/lib/session-state-bus.js +14 -0
- package/template/claude-task-manager/lib/session-stream.js +64 -16
- package/template/claude-task-manager/lib/session-timeline-summary.js +260 -0
- package/template/claude-task-manager/lib/session-token-usage.js +494 -0
- package/template/claude-task-manager/lib/session-workspace-binding.js +356 -0
- package/template/claude-task-manager/lib/setup-network-config.js +9 -0
- package/template/claude-task-manager/lib/size-cap.js +45 -0
- package/template/claude-task-manager/lib/size-cap.test.js +62 -0
- package/template/claude-task-manager/lib/skill-autocomplete.js +180 -1
- package/template/claude-task-manager/lib/skill-intent-resolver.js +304 -0
- package/template/claude-task-manager/lib/sqlite-driver.js +19 -3
- package/template/claude-task-manager/lib/standup-attention.js +7 -3
- package/template/claude-task-manager/lib/status-authority.js +39 -0
- package/template/claude-task-manager/lib/status-hooks.js +4 -0
- package/template/claude-task-manager/lib/storage-migration.js +235 -0
- package/template/claude-task-manager/lib/structured-capture.js +298 -0
- package/template/claude-task-manager/lib/sync-io-census.js +163 -0
- package/template/claude-task-manager/lib/tailscale-setup.js +6 -0
- package/template/claude-task-manager/lib/terminal-activity-evidence.js +33 -0
- package/template/claude-task-manager/lib/terminal-choice.js +364 -0
- package/template/claude-task-manager/lib/terminal-control-sanitize.js +17 -0
- package/template/claude-task-manager/lib/terminal-fingerprint.js +48 -0
- package/template/claude-task-manager/lib/terminal-output-flush.js +84 -0
- package/template/claude-task-manager/lib/timeline-order.js +122 -0
- package/template/claude-task-manager/lib/transcript-store.js +348 -43
- package/template/claude-task-manager/lib/transport-security.js +84 -1
- package/template/claude-task-manager/lib/wait-state.js +184 -0
- package/template/claude-task-manager/lib/walle-client.js +47 -5
- package/template/claude-task-manager/lib/walle-ctm-history.js +564 -4
- package/template/claude-task-manager/lib/walle-external-actions.js +135 -16
- package/template/claude-task-manager/lib/walle-history-hydration.js +46 -0
- package/template/claude-task-manager/lib/walle-native-health.js +403 -0
- package/template/claude-task-manager/lib/walle-repair.js +701 -0
- package/template/claude-task-manager/lib/walle-session-cache.js +109 -0
- package/template/claude-task-manager/lib/walle-session-context.js +57 -21
- package/template/claude-task-manager/lib/walle-session-model-catalog.js +34 -0
- package/template/claude-task-manager/lib/walle-supervisor.js +539 -63
- package/template/claude-task-manager/lib/walle-transcript.js +52 -0
- package/template/claude-task-manager/lib/worktree-active-sync.js +11 -7
- package/template/claude-task-manager/lib/worktree-cwd.js +32 -1
- package/template/claude-task-manager/package.json +1 -1
- package/template/claude-task-manager/prompt-harvest.js +89 -66
- package/template/claude-task-manager/providers/claude-code.js +51 -3
- package/template/claude-task-manager/providers/cursor.js +140 -45
- package/template/claude-task-manager/public/css/reviews.css +551 -61
- package/template/claude-task-manager/public/css/setup.css +191 -0
- package/template/claude-task-manager/public/css/walle-session.css +865 -10
- package/template/claude-task-manager/public/css/walle.css +154 -0
- package/template/claude-task-manager/public/designs/ai-providers-consolidation-v2.html +830 -0
- package/template/claude-task-manager/public/index.html +18516 -2058
- package/template/claude-task-manager/public/ipad.html +363 -0
- package/template/claude-task-manager/public/js/document-review-links.js +301 -0
- package/template/claude-task-manager/public/js/image-normalize.js +69 -36
- package/template/claude-task-manager/public/js/message-renderer.js +1265 -77
- package/template/claude-task-manager/public/js/prompts.js +66 -29
- package/template/claude-task-manager/public/js/reviews.js +901 -133
- package/template/claude-task-manager/public/js/session-activity-utils.js +11 -1
- package/template/claude-task-manager/public/js/session-search-utils.js +94 -10
- package/template/claude-task-manager/public/js/session-status-precedence.js +23 -5
- package/template/claude-task-manager/public/js/setup.js +1273 -176
- package/template/claude-task-manager/public/js/stream-view.js +691 -73
- package/template/claude-task-manager/public/js/terminal-reconciler.js +210 -0
- package/template/claude-task-manager/public/js/walle-session.js +2455 -158
- package/template/claude-task-manager/public/js/walle.js +455 -28
- package/template/claude-task-manager/public/m/app.css +2909 -262
- package/template/claude-task-manager/public/m/app.js +6601 -398
- package/template/claude-task-manager/public/m/claim.html +224 -17
- package/template/claude-task-manager/public/m/index.html +117 -21
- package/template/claude-task-manager/public/m/sw.js +3 -1
- package/template/claude-task-manager/public/manifest.json +2 -2
- package/template/claude-task-manager/public/prompts.html +30 -14
- package/template/claude-task-manager/queue-engine.js +507 -28
- package/template/claude-task-manager/scripts/repair-claude-session-images.js +27 -8
- package/template/claude-task-manager/server.js +14341 -2197
- package/template/claude-task-manager/session-integrity.js +160 -18
- package/template/claude-task-manager/session-search-ranking.js +1 -0
- package/template/claude-task-manager/session-utils.js +25 -5
- package/template/claude-task-manager/workers/approval-blocklist.js +96 -6
- package/template/claude-task-manager/workers/approval-widget-validator.js +14 -8
- package/template/claude-task-manager/workers/conversation-import-worker.js +11 -50
- package/template/claude-task-manager/workers/db-owner-worker.js +386 -0
- package/template/claude-task-manager/workers/harvest-worker.js +9 -55
- package/template/claude-task-manager/workers/headless-term-worker.js +9 -530
- package/template/claude-task-manager/workers/read-pool-worker.js +387 -0
- package/template/claude-task-manager/workers/scrollback-worker.js +11 -72
- package/template/claude-task-manager/workers/session-host-process.js +146 -0
- package/template/claude-task-manager/workers/session-integrity-worker.js +10 -54
- package/template/claude-task-manager/workers/state-detectors/base.js +18 -1
- package/template/claude-task-manager/workers/state-detectors/claude-code.js +182 -9
- package/template/claude-task-manager/workers/state-detectors/codex.js +150 -2
- package/template/claude-task-manager/workers/state-detectors/cursor.js +127 -0
- package/template/claude-task-manager/workers/state-detectors/gemini.js +21 -0
- package/template/claude-task-manager/workers/state-detectors/index.js +29 -0
- package/template/claude-task-manager/workers/state-detectors/opencode.js +103 -0
- package/template/docs/design/markdown-review-pane.md +206 -0
- package/template/docs/designs/2026-05-17-portkey-gateway-provider-ux.md +129 -38
- package/template/docs/designs/2026-05-20-mobile-worktree-finish-command.md +27 -0
- package/template/docs/designs/2026-05-22-ai-configuration-consolidation.md +248 -0
- package/template/docs/designs/ai-configuration-consolidation-mock.html +812 -0
- package/template/docs/private-memory-and-pii-policy.md +69 -0
- package/template/package.json +2 -1
- package/template/scripts/check-private-data.js +201 -0
- package/template/shared/sqlite-owner-guard.js +30 -0
- package/template/shared/sqlite-owner-write-queue.js +225 -0
- package/template/shared/sqlite-storage-policy.js +111 -0
- package/template/shared/sqlite-write-lock.js +428 -0
- package/template/wall-e/agent-runners/claude-code.js +5 -0
- package/template/wall-e/agent.js +166 -22
- package/template/wall-e/api-walle.js +524 -70
- package/template/wall-e/auth/provider-flows.js +11 -1
- package/template/wall-e/bin/walle-mcp-stdio.js +341 -17
- package/template/wall-e/brain.js +1614 -141
- package/template/wall-e/chat/attachment-blocks.js +96 -0
- package/template/wall-e/chat/attachments.js +2 -1
- package/template/wall-e/chat/capability-resolver.js +7 -7
- package/template/wall-e/chat/context-messages.js +28 -0
- package/template/wall-e/chat/conversation-frame.js +630 -0
- package/template/wall-e/chat/provider-messages.js +125 -0
- package/template/wall-e/chat.js +1002 -233
- package/template/wall-e/coding/acceptance-contract.js +170 -0
- package/template/wall-e/coding/acp-adapter.js +1 -1
- package/template/wall-e/coding/agent-catalog.js +3 -0
- package/template/wall-e/coding/artifact-store.js +93 -0
- package/template/wall-e/coding/capability-router.js +120 -0
- package/template/wall-e/coding/coding-run-controller.js +423 -0
- package/template/wall-e/coding/compaction-service.js +157 -12
- package/template/wall-e/coding/frontend-verification.js +258 -0
- package/template/wall-e/coding/lifecycle-hooks.js +75 -0
- package/template/wall-e/coding/local-preview-contract.js +157 -0
- package/template/wall-e/coding/permission-service.js +57 -13
- package/template/wall-e/coding/prompt-bundle.js +19 -1
- package/template/wall-e/coding/prompt-section-registry.js +227 -0
- package/template/wall-e/coding/provider-compat.js +15 -0
- package/template/wall-e/coding/runtime-events.js +224 -0
- package/template/wall-e/coding/runtime-mode.js +3 -0
- package/template/wall-e/coding/side-git-snapshot.js +160 -4
- package/template/wall-e/coding/snapshot-service.js +143 -1
- package/template/wall-e/coding/stream-processor.js +388 -34
- package/template/wall-e/coding/task-tool.js +141 -4
- package/template/wall-e/coding/tool-execution-controller.js +365 -0
- package/template/wall-e/coding/tool-registry.js +43 -5
- package/template/wall-e/coding/user-hooks.js +217 -0
- package/template/wall-e/coding-orchestrator.js +1330 -221
- package/template/wall-e/coding-prompts.js +20 -4
- package/template/wall-e/context/context-builder.js +15 -2
- package/template/wall-e/decision/confidence.js +1 -1
- package/template/wall-e/docs/coding-acceptance-contract.md +41 -0
- package/template/wall-e/docs/external-action-controller.md +26 -6
- package/template/wall-e/docs/telemetry-lifecycle.md +8 -2
- package/template/wall-e/embeddings.js +591 -53
- package/template/wall-e/external-action-controller.js +12 -0
- package/template/wall-e/http/auth.js +1 -0
- package/template/wall-e/http/chat-api.js +46 -11
- package/template/wall-e/http/model-admin.js +836 -34
- package/template/wall-e/lib/boot-profile.js +88 -0
- package/template/wall-e/lib/event-loop-monitor.js +93 -0
- package/template/wall-e/lib/service-health.js +194 -0
- package/template/wall-e/llm/anthropic.js +130 -5
- package/template/wall-e/llm/client.js +266 -63
- package/template/wall-e/llm/default-fallback.js +382 -0
- package/template/wall-e/llm/health.js +19 -0
- package/template/wall-e/llm/message-guard.js +78 -0
- package/template/wall-e/llm/model-catalog.js +252 -1
- package/template/wall-e/llm/openai.js +26 -4
- package/template/wall-e/llm/portkey-sync.js +654 -0
- package/template/wall-e/llm/provider-error.js +30 -2
- package/template/wall-e/llm/registry.js +5 -1
- package/template/wall-e/llm/request-compat.js +67 -0
- package/template/wall-e/loops/backfill.js +79 -23
- package/template/wall-e/loops/brain-optimize.js +67 -0
- package/template/wall-e/loops/ingest.js +25 -10
- package/template/wall-e/loops/question-digest.js +160 -0
- package/template/wall-e/loops/reflect.js +6 -4
- package/template/wall-e/loops/think.js +39 -12
- package/template/wall-e/mcp-server.js +318 -36
- package/template/wall-e/memory/ctm-context-client.js +52 -14
- package/template/wall-e/memory/ctm-operational-context.js +237 -0
- package/template/wall-e/memory/ctm-prompt-executions-client.js +128 -0
- package/template/wall-e/memory/ctm-session-context.js +111 -63
- package/template/wall-e/prompts/coding/deepseek.txt +3 -0
- package/template/wall-e/prompts/coding/gemini.txt +6 -0
- package/template/wall-e/prompts/coding/gpt.txt +6 -0
- package/template/wall-e/prompts/coding/local.txt +7 -0
- package/template/wall-e/runtime/decision-hooks.js +115 -0
- package/template/wall-e/runtime/devbox-gateway.js +82 -8
- package/template/wall-e/runtime/prompt-manifest.js +86 -0
- package/template/wall-e/runtime/tool-executor.js +269 -0
- package/template/wall-e/runtime/tool-result-envelope.js +138 -0
- package/template/wall-e/runtime/transcript-projection.js +60 -0
- package/template/wall-e/runtime/walle-runtime.js +224 -0
- package/template/wall-e/scripts/db-optimize/migrate.js +162 -0
- package/template/wall-e/scripts/db-optimize/recall-eval.js +117 -0
- package/template/wall-e/server.js +15 -0
- package/template/wall-e/session-files.js +9 -0
- package/template/wall-e/skills/_bundled/google-calendar/run.js +1 -1
- package/template/wall-e/skills/_bundled/gws-workspace/run.js +1 -1
- package/template/wall-e/skills/_bundled/slack-mentions/run.js +76 -6
- package/template/wall-e/skills/claude-code-reader.js +7 -3
- package/template/wall-e/skills/script-skill-runner.js +10 -0
- package/template/wall-e/skills/skill-planner.js +38 -0
- package/template/wall-e/tools/builtin-middleware.js +19 -9
- package/template/wall-e/tools/local-tools.js +1428 -16
- package/template/wall-e/tools/permission-checker.js +73 -5
- package/template/wall-e/tools/question-manager.js +117 -7
- package/template/wall-e/training/harvester.js +12 -28
- package/template/wall-e/training/replay.js +25 -80
- package/template/website/index.html +10 -10
- package/template/wall-e/eval/ab-test.js +0 -203
- package/template/wall-e/eval/agent-runner.js +0 -772
- package/template/wall-e/eval/agent-scorer.js +0 -461
- package/template/wall-e/eval/aggregator.js +0 -414
- package/template/wall-e/eval/allowed-test-commands.js +0 -34
- package/template/wall-e/eval/benchmark-generator.js +0 -113
- package/template/wall-e/eval/benchmarks/chat-eval.json +0 -1662
- package/template/wall-e/eval/benchmarks/chat.json +0 -82
- package/template/wall-e/eval/benchmarks/coding-agent-real.json +0 -1
- package/template/wall-e/eval/benchmarks/coding-agent.json +0 -1581
- package/template/wall-e/eval/benchmarks/coding.json +0 -122
- package/template/wall-e/eval/benchmarks/memory-retrieval.json +0 -234
- package/template/wall-e/eval/benchmarks/reasoning.json +0 -82
- package/template/wall-e/eval/benchmarks/swebench-lite-30.json +0 -212
- package/template/wall-e/eval/benchmarks.js +0 -669
- package/template/wall-e/eval/cc-replay.js +0 -719
- package/template/wall-e/eval/chat-eval.js +0 -525
- package/template/wall-e/eval/check-keys.js +0 -15
- package/template/wall-e/eval/check-providers.js +0 -42
- package/template/wall-e/eval/codex-cli-baseline.js +0 -669
- package/template/wall-e/eval/coding-agent-real.js +0 -570
- package/template/wall-e/eval/context-compactor.js +0 -251
- package/template/wall-e/eval/debug-agent003.js +0 -68
- package/template/wall-e/eval/diagnostics.js +0 -216
- package/template/wall-e/eval/eval-orchestrator.js +0 -642
- package/template/wall-e/eval/evaluate.js +0 -202
- package/template/wall-e/eval/evaluator.js +0 -373
- package/template/wall-e/eval/exporter.js +0 -212
- package/template/wall-e/eval/fixtures/express-basic/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-basic/server.js +0 -115
- package/template/wall-e/eval/fixtures/express-basic/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy/server.js +0 -113
- package/template/wall-e/eval/fixtures/express-buggy/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy-items/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy-items/server.js +0 -112
- package/template/wall-e/eval/fixtures/express-buggy-items/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy-search/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy-search/server.js +0 -121
- package/template/wall-e/eval/fixtures/express-buggy-search/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-rename-data/data.js +0 -34
- package/template/wall-e/eval/fixtures/express-rename-data/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-rename-data/server.js +0 -97
- package/template/wall-e/eval/fixtures/express-rename-data/test.js +0 -88
- package/template/wall-e/eval/fixtures/express-xss/package.json +0 -12
- package/template/wall-e/eval/fixtures/express-xss/server.js +0 -90
- package/template/wall-e/eval/fixtures/express-xss/test.js +0 -67
- package/template/wall-e/eval/fixtures/express-xss/views/profile.ejs +0 -9
- package/template/wall-e/eval/fixtures/fullstack-app/config/default.js +0 -9
- package/template/wall-e/eval/fixtures/fullstack-app/config/test.js +0 -13
- package/template/wall-e/eval/fixtures/fullstack-app/package.json +0 -11
- package/template/wall-e/eval/fixtures/fullstack-app/public/css/style.css +0 -137
- package/template/wall-e/eval/fixtures/fullstack-app/public/index.html +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/app.js +0 -121
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/auth.js +0 -71
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/items.js +0 -80
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/users.js +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/public/login.html +0 -45
- package/template/wall-e/eval/fixtures/fullstack-app/public/register.html +0 -38
- package/template/wall-e/eval/fixtures/fullstack-app/scripts/migrate.js +0 -23
- package/template/wall-e/eval/fixtures/fullstack-app/scripts/seed.js +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/server/db.js +0 -99
- package/template/wall-e/eval/fixtures/fullstack-app/server/index.js +0 -94
- package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/auth.js +0 -19
- package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/logger.js +0 -19
- package/template/wall-e/eval/fixtures/fullstack-app/server/router.js +0 -50
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/auth.js +0 -69
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/health.js +0 -23
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/items.js +0 -88
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/users.js +0 -75
- package/template/wall-e/eval/fixtures/fullstack-app/server/test.js +0 -198
- package/template/wall-e/eval/fixtures/fullstack-app/server/utils/response.js +0 -34
- package/template/wall-e/eval/fixtures/fullstack-app/server/utils/validate.js +0 -26
- package/template/wall-e/eval/fixtures/fullstack-app/server.js +0 -8
- package/template/wall-e/eval/fixtures/fullstack-app/test.js +0 -12
- package/template/wall-e/eval/fixtures/monorepo-basic/package.json +0 -8
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/data.js +0 -58
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/middleware.js +0 -46
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/package.json +0 -8
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/routes.js +0 -64
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/server.js +0 -56
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/test.js +0 -116
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/commands.js +0 -61
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/index.js +0 -62
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/output.js +0 -43
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/package.json +0 -11
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/test.js +0 -44
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/formatters.js +0 -43
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/index.js +0 -12
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/package.json +0 -5
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/test.js +0 -55
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/validators.js +0 -29
- package/template/wall-e/eval/fixtures/monorepo-basic/test.js +0 -46
- package/template/wall-e/eval/fixtures/node-cli/index.js +0 -78
- package/template/wall-e/eval/fixtures/node-cli/package.json +0 -10
- package/template/wall-e/eval/fixtures/node-cli/test.js +0 -57
- package/template/wall-e/eval/fixtures/node-typed/package.json +0 -8
- package/template/wall-e/eval/fixtures/node-typed/src/handlers.js +0 -31
- package/template/wall-e/eval/fixtures/node-typed/src/utils.js +0 -33
- package/template/wall-e/eval/fixtures/node-typed/test.js +0 -36
- package/template/wall-e/eval/fixtures/python-flask/app.py +0 -14
- package/template/wall-e/eval/fixtures/python-flask/requirements.txt +0 -2
- package/template/wall-e/eval/fixtures/python-flask/test_app.py +0 -25
- package/template/wall-e/eval/fixtures/wall-e-subset/brain.js +0 -105
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/aggregator.js +0 -101
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/chat.json +0 -20
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/coding.json +0 -32
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks.js +0 -64
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/package.json +0 -6
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/server.js +0 -31
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/test.js +0 -18
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/utils.js +0 -34
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/runner.js +0 -104
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/scorer.js +0 -73
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/test.js +0 -134
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/client.js +0 -99
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/providers.js +0 -63
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/test.js +0 -70
- package/template/wall-e/eval/fixtures/wall-e-subset/package.json +0 -10
- package/template/wall-e/eval/fixtures/wall-e-subset/test.js +0 -86
- package/template/wall-e/eval/harvester.js +0 -685
- package/template/wall-e/eval/head-to-head.js +0 -388
- package/template/wall-e/eval/humaneval-adapter.js +0 -321
- package/template/wall-e/eval/list-models.js +0 -31
- package/template/wall-e/eval/livecodebench-adapter.js +0 -291
- package/template/wall-e/eval/mail-integration.js +0 -443
- package/template/wall-e/eval/manifest.js +0 -186
- package/template/wall-e/eval/meta-harness/adapters/coding-agent.js +0 -57
- package/template/wall-e/eval/meta-harness/bootstrap-snapshot.js +0 -149
- package/template/wall-e/eval/meta-harness/candidate-store.js +0 -117
- package/template/wall-e/eval/meta-harness/cli.js +0 -86
- package/template/wall-e/eval/meta-harness/domain-spec.js +0 -154
- package/template/wall-e/eval/meta-harness/domains/coding-agent.domain.json +0 -84
- package/template/wall-e/eval/meta-harness/examples/env-bootstrap-candidate.js +0 -29
- package/template/wall-e/eval/meta-harness/experience-store.js +0 -174
- package/template/wall-e/eval/meta-harness/frontier.js +0 -96
- package/template/wall-e/eval/meta-harness/harness-interface.js +0 -90
- package/template/wall-e/eval/meta-harness/leakage-guard.js +0 -80
- package/template/wall-e/eval/meta-harness/optimizer.js +0 -207
- package/template/wall-e/eval/meta-harness/proposer-runner.js +0 -110
- package/template/wall-e/eval/meta-harness/reporting.js +0 -58
- package/template/wall-e/eval/meta-harness/telemetry.js +0 -27
- package/template/wall-e/eval/meta-harness/validation.js +0 -81
- package/template/wall-e/eval/promoter.js +0 -228
- package/template/wall-e/eval/provider-normalizer.js +0 -33
- package/template/wall-e/eval/replay.js +0 -395
- package/template/wall-e/eval/run-agent-benchmarks.js +0 -386
- package/template/wall-e/eval/run-codex-cli-baseline.js +0 -177
- package/template/wall-e/eval/run-coding-agent-real.js +0 -187
- package/template/wall-e/eval/run-eval.js +0 -435
- package/template/wall-e/eval/run-model-comparison.js +0 -142
- package/template/wall-e/eval/session-evaluator.js +0 -187
- package/template/wall-e/eval/session-miner.js +0 -207
- package/template/wall-e/eval/session-retrieval-benchmark.js +0 -150
- package/template/wall-e/eval/session-transcripts.js +0 -509
- package/template/wall-e/eval/shadow.js +0 -161
- package/template/wall-e/eval/swebench-adapter.js +0 -345
- package/template/wall-e/eval/swebench-docker.js +0 -192
- package/template/wall-e/eval/train.py +0 -320
- package/template/wall-e/eval/trainer.js +0 -232
- package/template/wall-e/eval/weekly-eval-loop.js +0 -241
|
@@ -0,0 +1,488 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const crypto = require('crypto');
|
|
4
|
+
const { callBackgroundLlm } = require('./background-llm');
|
|
5
|
+
|
|
6
|
+
const ACTIVE_STATUS = 'active';
|
|
7
|
+
const FAILED_STATUS = 'failed';
|
|
8
|
+
const CANDIDATE_STATUS = 'candidate';
|
|
9
|
+
const MAX_PATTERN_LENGTH = 320;
|
|
10
|
+
const MAX_SAMPLE_LENGTH = 2000;
|
|
11
|
+
|
|
12
|
+
const ALLOWED_MISS_TYPES = new Set([
|
|
13
|
+
'detector_failure',
|
|
14
|
+
'parser_failure',
|
|
15
|
+
'gate_too_strict',
|
|
16
|
+
'partial_snapshot_race',
|
|
17
|
+
'policy_gap',
|
|
18
|
+
'stale_or_false_positive',
|
|
19
|
+
'unknown',
|
|
20
|
+
]);
|
|
21
|
+
|
|
22
|
+
function _db(options = {}) {
|
|
23
|
+
return options.dbModule || require('../db');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function _asString(value) {
|
|
27
|
+
return value == null ? '' : String(value);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function _normalizeSpace(value) {
|
|
31
|
+
return _asString(value).replace(/\s+/g, ' ').trim();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function redactApprovalText(text) {
|
|
35
|
+
return _asString(text)
|
|
36
|
+
.replace(/[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}/gi, '<email>')
|
|
37
|
+
.replace(/(?:sk|pk|rk|xox[baprs]|ghp|github_pat|glpat|AIza)[A-Za-z0-9_\-]{12,}/g, '<secret>')
|
|
38
|
+
.replace(/\/Users\/[^/\s]+/g, '/Users/<user>')
|
|
39
|
+
.replace(/Bearer\s+[A-Za-z0-9._~+/=-]+/gi, 'Bearer <token>')
|
|
40
|
+
.slice(-MAX_SAMPLE_LENGTH);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function redactTelemetryPattern(pattern) {
|
|
44
|
+
return redactApprovalText(pattern)
|
|
45
|
+
.replace(/\\\$\\s\*[^|)\n]+/g, '\\$\\s*<command>')
|
|
46
|
+
.replace(/\$\\s\*[^|)\n]+/g, '$\\s*<command>')
|
|
47
|
+
.slice(0, MAX_PATTERN_LENGTH);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function fingerprintMiss(miss = {}) {
|
|
51
|
+
const h = crypto.createHash('sha256');
|
|
52
|
+
h.update(_asString(miss.providerId || miss.provider_id || ''));
|
|
53
|
+
h.update('\0');
|
|
54
|
+
h.update(_asString(miss.gateReason || miss.gate_reason || miss.reason || ''));
|
|
55
|
+
h.update('\0');
|
|
56
|
+
h.update(_asString(miss.source || ''));
|
|
57
|
+
h.update('\0');
|
|
58
|
+
h.update(_normalizeSpace(redactApprovalText(miss.rawText || miss.text || '')).slice(-1200));
|
|
59
|
+
return h.digest('hex').slice(0, 32);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
function extractJsonObject(text) {
|
|
63
|
+
const raw = _asString(text).trim();
|
|
64
|
+
const match = raw.match(/\{[\s\S]*\}/);
|
|
65
|
+
if (!match) return null;
|
|
66
|
+
try { return JSON.parse(match[0]); } catch { return null; }
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function _isSafePattern(pattern) {
|
|
70
|
+
const p = _asString(pattern);
|
|
71
|
+
if (!p) return true;
|
|
72
|
+
if (p.length > MAX_PATTERN_LENGTH) return false;
|
|
73
|
+
if (/\(\?(?:[=!]|<[=!])/.test(p)) return false;
|
|
74
|
+
if (/\\[1-9]/.test(p)) return false;
|
|
75
|
+
if (/\([^)]*[+*][^)]*\)[+*{]/.test(p)) return false;
|
|
76
|
+
if (/(?:\.\*){3,}/.test(p)) return false;
|
|
77
|
+
return true;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function _compilePattern(pattern, flags = 'im') {
|
|
81
|
+
const p = _asString(pattern).trim();
|
|
82
|
+
if (!p) return null;
|
|
83
|
+
if (!_isSafePattern(p)) return null;
|
|
84
|
+
try { return new RegExp(p, flags); } catch { return null; }
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function _firstCapture(match, preferred = []) {
|
|
88
|
+
if (!match) return '';
|
|
89
|
+
for (const key of preferred) {
|
|
90
|
+
if (match.groups && match.groups[key]) return _asString(match.groups[key]).trim();
|
|
91
|
+
}
|
|
92
|
+
for (let i = 1; i < match.length; i++) {
|
|
93
|
+
if (match[i]) return _asString(match[i]).trim();
|
|
94
|
+
}
|
|
95
|
+
return '';
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function _ruleValue(rule, camel, snake) {
|
|
99
|
+
return rule?.[camel] ?? rule?.[snake] ?? '';
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function parseWithRule(text, rule = {}) {
|
|
103
|
+
const raw = _asString(text);
|
|
104
|
+
if (!raw.trim() || !rule) return null;
|
|
105
|
+
const detector = _compilePattern(_ruleValue(rule, 'detectorPattern', 'detector_pattern'));
|
|
106
|
+
if (detector && !detector.test(raw)) return null;
|
|
107
|
+
|
|
108
|
+
const questionPattern = _compilePattern(_ruleValue(rule, 'questionPattern', 'question_pattern'));
|
|
109
|
+
if (questionPattern && !questionPattern.test(raw)) return null;
|
|
110
|
+
|
|
111
|
+
const yesPattern = _compilePattern(_ruleValue(rule, 'yesPattern', 'yes_pattern'));
|
|
112
|
+
if (yesPattern && !yesPattern.test(raw)) return null;
|
|
113
|
+
|
|
114
|
+
const anchorPattern = _compilePattern(_ruleValue(rule, 'anchorPattern', 'anchor_pattern'));
|
|
115
|
+
if (anchorPattern && !anchorPattern.test(raw)) return null;
|
|
116
|
+
|
|
117
|
+
const warningPattern = _compilePattern(_ruleValue(rule, 'warningPattern', 'warning_pattern'));
|
|
118
|
+
const warningMatch = warningPattern ? warningPattern.exec(raw) : null;
|
|
119
|
+
const warning = _firstCapture(warningMatch, ['warning']);
|
|
120
|
+
|
|
121
|
+
const commandPattern = _compilePattern(_ruleValue(rule, 'commandPattern', 'command_pattern'));
|
|
122
|
+
const commandMatch = commandPattern ? commandPattern.exec(raw) : null;
|
|
123
|
+
const command = _firstCapture(commandMatch, ['command', 'cmd', 'path']);
|
|
124
|
+
const toolName = _asString(_ruleValue(rule, 'toolName', 'tool_name') || _firstCapture(commandMatch, ['tool'])).trim()
|
|
125
|
+
|| 'AI refined approval';
|
|
126
|
+
|
|
127
|
+
if (/bash|shell|command/i.test(toolName) && !command) return null;
|
|
128
|
+
|
|
129
|
+
const approveShortcut = _asString(_ruleValue(rule, 'approveKey', 'approve_key') || '1').trim() || '1';
|
|
130
|
+
return {
|
|
131
|
+
providerId: _asString(_ruleValue(rule, 'providerId', 'provider_id') || 'generic'),
|
|
132
|
+
toolName,
|
|
133
|
+
command: command.slice(0, 2000),
|
|
134
|
+
warning: warning.slice(0, 1000),
|
|
135
|
+
fullContext: raw.slice(-2000),
|
|
136
|
+
hasAllowAll: false,
|
|
137
|
+
approveShortcut,
|
|
138
|
+
_aiRefinementFingerprint: _asString(rule.fingerprint || ''),
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function parseWithActiveRules(text, providerId, options = {}) {
|
|
143
|
+
const db = _db(options);
|
|
144
|
+
let rules = [];
|
|
145
|
+
try {
|
|
146
|
+
if (typeof db.findActiveApprovalAiRefinementRules === 'function') {
|
|
147
|
+
rules = db.findActiveApprovalAiRefinementRules(providerId || '');
|
|
148
|
+
} else if (typeof db.listApprovalAiRefinementRules === 'function') {
|
|
149
|
+
rules = db.listApprovalAiRefinementRules({ status: ACTIVE_STATUS, providerId, limit: 50 });
|
|
150
|
+
}
|
|
151
|
+
} catch {
|
|
152
|
+
rules = [];
|
|
153
|
+
}
|
|
154
|
+
for (const rule of rules || []) {
|
|
155
|
+
const ctx = parseWithRule(text, rule);
|
|
156
|
+
if (ctx) return ctx;
|
|
157
|
+
}
|
|
158
|
+
return null;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
function normalizeProposal(raw = {}) {
|
|
162
|
+
const missType = _asString(raw.missType || raw.miss_type || 'unknown').trim();
|
|
163
|
+
return {
|
|
164
|
+
missType: ALLOWED_MISS_TYPES.has(missType) ? missType : 'unknown',
|
|
165
|
+
safeToInstall: !!(raw.safeToInstall ?? raw.safe_to_install),
|
|
166
|
+
detectorPattern: _asString(raw.detectorPattern || raw.detector_pattern).trim(),
|
|
167
|
+
questionPattern: _asString(raw.questionPattern || raw.question_pattern).trim(),
|
|
168
|
+
yesPattern: _asString(raw.yesPattern || raw.yes_pattern).trim(),
|
|
169
|
+
anchorPattern: _asString(raw.anchorPattern || raw.anchor_pattern).trim(),
|
|
170
|
+
toolName: _asString(raw.toolName || raw.tool_name).trim(),
|
|
171
|
+
commandPattern: _asString(raw.commandPattern || raw.command_pattern).trim(),
|
|
172
|
+
warningPattern: _asString(raw.warningPattern || raw.warning_pattern).trim(),
|
|
173
|
+
approveKey: _asString(raw.approveKey || raw.approve_key || '1').trim() || '1',
|
|
174
|
+
confidence: Math.max(0, Math.min(1, Number(raw.confidence || 0))),
|
|
175
|
+
rationale: _asString(raw.rationale || raw.reasoning).trim().slice(0, 1000),
|
|
176
|
+
};
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function validateRefinementProposal(proposal, rawText) {
|
|
180
|
+
const errors = [];
|
|
181
|
+
const p = normalizeProposal(proposal);
|
|
182
|
+
if (!p.safeToInstall) errors.push('proposal_not_safe_to_install');
|
|
183
|
+
if (p.confidence < 0.65) errors.push('low_confidence');
|
|
184
|
+
if (!p.detectorPattern) errors.push('missing_detector_pattern');
|
|
185
|
+
if (!p.yesPattern) errors.push('missing_yes_pattern');
|
|
186
|
+
if (!p.toolName) errors.push('missing_tool_name');
|
|
187
|
+
if (!p.commandPattern) errors.push('missing_command_pattern');
|
|
188
|
+
for (const [name, value] of Object.entries({
|
|
189
|
+
detectorPattern: p.detectorPattern,
|
|
190
|
+
questionPattern: p.questionPattern,
|
|
191
|
+
yesPattern: p.yesPattern,
|
|
192
|
+
anchorPattern: p.anchorPattern,
|
|
193
|
+
commandPattern: p.commandPattern,
|
|
194
|
+
warningPattern: p.warningPattern,
|
|
195
|
+
})) {
|
|
196
|
+
if (!_isSafePattern(value)) errors.push(`unsafe_${name}`);
|
|
197
|
+
if (value && !_compilePattern(value)) errors.push(`invalid_${name}`);
|
|
198
|
+
}
|
|
199
|
+
const context = errors.length ? null : parseWithRule(rawText, {
|
|
200
|
+
status: ACTIVE_STATUS,
|
|
201
|
+
detector_pattern: p.detectorPattern,
|
|
202
|
+
question_pattern: p.questionPattern,
|
|
203
|
+
yes_pattern: p.yesPattern,
|
|
204
|
+
anchor_pattern: p.anchorPattern,
|
|
205
|
+
tool_name: p.toolName,
|
|
206
|
+
command_pattern: p.commandPattern,
|
|
207
|
+
warning_pattern: p.warningPattern,
|
|
208
|
+
approve_key: p.approveKey,
|
|
209
|
+
});
|
|
210
|
+
if (!context) errors.push('candidate_rule_did_not_parse_sample');
|
|
211
|
+
return {
|
|
212
|
+
ok: errors.length === 0,
|
|
213
|
+
errors,
|
|
214
|
+
proposal: p,
|
|
215
|
+
context,
|
|
216
|
+
};
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
async function proposeRefinement(miss = {}, options = {}) {
|
|
220
|
+
const callModel = options.callModel || callBackgroundLlm;
|
|
221
|
+
if (typeof callModel !== 'function') {
|
|
222
|
+
return { ok: false, error: 'model_unavailable' };
|
|
223
|
+
}
|
|
224
|
+
const redacted = redactApprovalText(miss.rawText || miss.text || '');
|
|
225
|
+
const prompt = `You are CTM's approval prompt refinement engine.
|
|
226
|
+
|
|
227
|
+
The deterministic terminal approval pipeline saw an approval-shaped screen but missed or rejected it before the normal approval policy could run.
|
|
228
|
+
|
|
229
|
+
Your job is NOT to decide whether the command is allowed.
|
|
230
|
+
Your job is to propose a narrow parser/detector refinement rule so CTM can extract a normal approval context, then the existing blocklist/policy/live-terminal gates will decide.
|
|
231
|
+
|
|
232
|
+
Hard rules:
|
|
233
|
+
- Only propose a rule for an active approval widget or active permission prompt.
|
|
234
|
+
- Never create a durable "always allow" rule.
|
|
235
|
+
- Never approve by policy. This rule only extracts detector/parser context.
|
|
236
|
+
- Prefer provider-specific text anchors over broad words like "allow" alone.
|
|
237
|
+
- Regexes must be JavaScript-compatible, case-insensitive safe, and under ${MAX_PATTERN_LENGTH} characters.
|
|
238
|
+
- commandPattern must capture the operation in a named group "command" when possible.
|
|
239
|
+
- yesPattern must match the one-time Yes/Allow option, not an always-allow option.
|
|
240
|
+
- If uncertain or stale, set safeToInstall false.
|
|
241
|
+
|
|
242
|
+
Provider: ${miss.providerId || miss.provider_id || 'unknown'}
|
|
243
|
+
Source: ${miss.source || 'unknown'}
|
|
244
|
+
Gate reason: ${miss.gateReason || miss.gate_reason || miss.reason || 'unknown'}
|
|
245
|
+
Raw detected: ${miss.rawDetected ? 'yes' : 'no'}
|
|
246
|
+
Hint detected: ${miss.hintDetected ? 'yes' : 'no'}
|
|
247
|
+
Parse status: ${miss.parseStatus || miss.parse_status || 'unknown'}
|
|
248
|
+
|
|
249
|
+
Redacted terminal tail:
|
|
250
|
+
${redacted}
|
|
251
|
+
|
|
252
|
+
Return only JSON:
|
|
253
|
+
{
|
|
254
|
+
"missType": "detector_failure" | "parser_failure" | "gate_too_strict" | "partial_snapshot_race" | "policy_gap" | "stale_or_false_positive" | "unknown",
|
|
255
|
+
"safeToInstall": true,
|
|
256
|
+
"detectorPattern": "regex that proves this is the approval widget",
|
|
257
|
+
"questionPattern": "optional regex for the active question",
|
|
258
|
+
"yesPattern": "regex for the one-time yes/allow option",
|
|
259
|
+
"anchorPattern": "optional provider/tool anchor regex",
|
|
260
|
+
"toolName": "short tool label",
|
|
261
|
+
"commandPattern": "regex with named capture (?<command>...) or first capture",
|
|
262
|
+
"warningPattern": "optional regex with named capture (?<warning>...)",
|
|
263
|
+
"approveKey": "1",
|
|
264
|
+
"confidence": 0.0,
|
|
265
|
+
"rationale": "one sentence"
|
|
266
|
+
}`;
|
|
267
|
+
|
|
268
|
+
try {
|
|
269
|
+
const response = await callModel(prompt, {
|
|
270
|
+
task: 'approval-ai-refinement',
|
|
271
|
+
modelTier: 'fast',
|
|
272
|
+
maxTokens: 700,
|
|
273
|
+
temperature: 0,
|
|
274
|
+
thinking: 'disabled',
|
|
275
|
+
reasoningEffort: 'low',
|
|
276
|
+
timeoutMs: Number(options.modelTimeoutMs || 45000),
|
|
277
|
+
});
|
|
278
|
+
const parsed = extractJsonObject(response?.text || response);
|
|
279
|
+
if (!parsed) return { ok: false, error: 'invalid_model_json' };
|
|
280
|
+
return { ok: true, proposal: normalizeProposal(parsed), model: response?.model || '' };
|
|
281
|
+
} catch (err) {
|
|
282
|
+
return { ok: false, error: err?.message || String(err) };
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function _ruleRow(fingerprint, miss, proposal, validation, extra = {}) {
|
|
287
|
+
return {
|
|
288
|
+
fingerprint,
|
|
289
|
+
providerId: miss.providerId || miss.provider_id || '',
|
|
290
|
+
missType: proposal.missType,
|
|
291
|
+
source: miss.source || '',
|
|
292
|
+
status: validation.ok ? ACTIVE_STATUS : FAILED_STATUS,
|
|
293
|
+
detectorPattern: proposal.detectorPattern,
|
|
294
|
+
questionPattern: proposal.questionPattern,
|
|
295
|
+
yesPattern: proposal.yesPattern,
|
|
296
|
+
anchorPattern: proposal.anchorPattern,
|
|
297
|
+
toolName: proposal.toolName,
|
|
298
|
+
commandPattern: proposal.commandPattern,
|
|
299
|
+
warningPattern: proposal.warningPattern,
|
|
300
|
+
approveKey: proposal.approveKey,
|
|
301
|
+
confidence: proposal.confidence,
|
|
302
|
+
rationale: proposal.rationale,
|
|
303
|
+
sampleRedactedTail: redactApprovalText(miss.rawText || miss.text || ''),
|
|
304
|
+
validationStatus: validation.ok ? 'passed' : 'failed',
|
|
305
|
+
validationMessage: validation.ok ? 'candidate rule parsed the sample' : validation.errors.join(','),
|
|
306
|
+
telemetryStatus: extra.telemetryStatus || '',
|
|
307
|
+
telemetryError: extra.telemetryError || '',
|
|
308
|
+
lastSeenAtMs: Date.now(),
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function telemetryRulePayload(proposal = {}) {
|
|
313
|
+
const p = normalizeProposal(proposal);
|
|
314
|
+
return {
|
|
315
|
+
miss_type: p.missType,
|
|
316
|
+
tool_name: p.toolName,
|
|
317
|
+
detector_pattern: redactTelemetryPattern(p.detectorPattern),
|
|
318
|
+
question_pattern: redactTelemetryPattern(p.questionPattern),
|
|
319
|
+
yes_pattern: redactTelemetryPattern(p.yesPattern),
|
|
320
|
+
command_pattern: redactTelemetryPattern(p.commandPattern),
|
|
321
|
+
warning_pattern: redactTelemetryPattern(p.warningPattern),
|
|
322
|
+
approve_key: p.approveKey,
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
async function _submitTelemetry(eventClient, meta) {
|
|
327
|
+
if (!eventClient || typeof eventClient.submitTelemetryEvent !== 'function') {
|
|
328
|
+
return { ok: false, skipped: true, reason: 'telemetry_client_unavailable' };
|
|
329
|
+
}
|
|
330
|
+
try {
|
|
331
|
+
return await eventClient.submitTelemetryEvent('ctm_approval_ai_refinement', meta);
|
|
332
|
+
} catch (err) {
|
|
333
|
+
return { ok: false, error: err?.message || String(err) };
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
function _warningRow(miss, fingerprint, title, message, evidence = '') {
|
|
338
|
+
return {
|
|
339
|
+
fingerprint,
|
|
340
|
+
sessionId: miss.sessionId || miss.session_id || '',
|
|
341
|
+
providerId: miss.providerId || miss.provider_id || '',
|
|
342
|
+
severity: 'warning',
|
|
343
|
+
status: 'open',
|
|
344
|
+
title,
|
|
345
|
+
message,
|
|
346
|
+
evidence: evidence || redactApprovalText(miss.rawText || miss.text || ''),
|
|
347
|
+
};
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
function _saveWarning(db, warning) {
|
|
351
|
+
try {
|
|
352
|
+
return db.saveApprovalAiRefinementWarning?.(warning) || warning;
|
|
353
|
+
} catch {
|
|
354
|
+
return warning;
|
|
355
|
+
}
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
async function handleMiss(miss = {}, options = {}) {
|
|
359
|
+
const rawText = _asString(miss.rawText || miss.text || '');
|
|
360
|
+
const db = _db(options);
|
|
361
|
+
const fingerprint = miss.fingerprint || fingerprintMiss({ ...miss, rawText });
|
|
362
|
+
const existing = db.getApprovalAiRefinementRule?.(fingerprint);
|
|
363
|
+
if (existing?.status === ACTIVE_STATUS) {
|
|
364
|
+
const context = parseWithRule(rawText, existing);
|
|
365
|
+
if (context) return { status: ACTIVE_STATUS, fingerprint, rule: existing, context, reused: true };
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
const proposalResult = await proposeRefinement({ ...miss, rawText }, options);
|
|
369
|
+
if (!proposalResult.ok) {
|
|
370
|
+
const warning = _saveWarning(db, _warningRow(
|
|
371
|
+
miss,
|
|
372
|
+
fingerprint,
|
|
373
|
+
'AI approval refinement failed',
|
|
374
|
+
`CTM detected a missed approval prompt, but AI refinement could not propose a rule: ${proposalResult.error || 'unknown error'}.`,
|
|
375
|
+
));
|
|
376
|
+
const fallbackProposal = normalizeProposal({ missType: 'unknown', safeToInstall: false });
|
|
377
|
+
const validation = { ok: false, errors: [proposalResult.error || 'proposal_failed'], proposal: fallbackProposal, context: null };
|
|
378
|
+
db.saveApprovalAiRefinementRule?.(_ruleRow(fingerprint, miss, fallbackProposal, validation));
|
|
379
|
+
await _submitTelemetry(options.remoteFeedbackClient || options.telemetryClient, {
|
|
380
|
+
fingerprint,
|
|
381
|
+
provider_id: miss.providerId || miss.provider_id || '',
|
|
382
|
+
source: miss.source || '',
|
|
383
|
+
miss_type: 'unknown',
|
|
384
|
+
status: FAILED_STATUS,
|
|
385
|
+
validation_status: 'proposal_failed',
|
|
386
|
+
error: proposalResult.error || 'proposal_failed',
|
|
387
|
+
});
|
|
388
|
+
return { status: FAILED_STATUS, fingerprint, warning, error: proposalResult.error || 'proposal_failed' };
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
const validation = validateRefinementProposal(proposalResult.proposal, rawText);
|
|
392
|
+
const telemetry = await _submitTelemetry(options.remoteFeedbackClient || options.telemetryClient, {
|
|
393
|
+
fingerprint,
|
|
394
|
+
provider_id: miss.providerId || miss.provider_id || '',
|
|
395
|
+
source: miss.source || '',
|
|
396
|
+
gate_reason: miss.gateReason || miss.gate_reason || miss.reason || '',
|
|
397
|
+
miss_type: validation.proposal.missType,
|
|
398
|
+
status: validation.ok ? ACTIVE_STATUS : FAILED_STATUS,
|
|
399
|
+
validation_status: validation.ok ? 'passed' : 'failed',
|
|
400
|
+
validation_message: validation.ok ? 'candidate rule parsed the sample' : validation.errors.join(','),
|
|
401
|
+
confidence: validation.proposal.confidence,
|
|
402
|
+
has_detector_pattern: !!validation.proposal.detectorPattern,
|
|
403
|
+
has_command_pattern: !!validation.proposal.commandPattern,
|
|
404
|
+
rule: telemetryRulePayload(validation.proposal),
|
|
405
|
+
});
|
|
406
|
+
const rule = db.saveApprovalAiRefinementRule?.(_ruleRow(fingerprint, miss, validation.proposal, validation, {
|
|
407
|
+
telemetryStatus: telemetry.ok ? 'sent' : (telemetry.skipped ? 'skipped' : 'failed'),
|
|
408
|
+
telemetryError: telemetry.ok ? '' : (telemetry.error || telemetry.reason || ''),
|
|
409
|
+
})) || _ruleRow(fingerprint, miss, validation.proposal, validation);
|
|
410
|
+
|
|
411
|
+
if (!validation.ok) {
|
|
412
|
+
const warning = _saveWarning(db, _warningRow(
|
|
413
|
+
miss,
|
|
414
|
+
fingerprint,
|
|
415
|
+
'AI approval refinement did not validate',
|
|
416
|
+
`CTM detected a missed approval prompt, but the generated rule did not pass local validation: ${validation.errors.join(', ')}.`,
|
|
417
|
+
));
|
|
418
|
+
return { status: FAILED_STATUS, fingerprint, rule, warning, validation };
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
return { status: ACTIVE_STATUS, fingerprint, rule, context: validation.context, validation };
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
function recordRerunFailure(miss = {}, refinement = {}, rerunResult = {}, options = {}) {
|
|
425
|
+
const db = _db(options);
|
|
426
|
+
const fingerprint = refinement.fingerprint || miss.fingerprint || fingerprintMiss(miss);
|
|
427
|
+
const reason = rerunResult?.reason || rerunResult?.outcome || 'rerun_failed';
|
|
428
|
+
const warning = _warningRow(
|
|
429
|
+
miss,
|
|
430
|
+
fingerprint,
|
|
431
|
+
'AI approval refinement failed after install',
|
|
432
|
+
`CTM installed a local AI approval parser rule, but rerunning the approval detection did not prove it worked (${reason}). The prompt needs user attention.`,
|
|
433
|
+
);
|
|
434
|
+
const saved = _saveWarning(db, warning);
|
|
435
|
+
try {
|
|
436
|
+
const existing = db.getApprovalAiRefinementRule?.(fingerprint);
|
|
437
|
+
if (existing) {
|
|
438
|
+
db.saveApprovalAiRefinementRule?.({
|
|
439
|
+
...existing,
|
|
440
|
+
status: FAILED_STATUS,
|
|
441
|
+
validationStatus: 'rerun_failed',
|
|
442
|
+
validationMessage: reason,
|
|
443
|
+
lastSeenAtMs: Date.now(),
|
|
444
|
+
});
|
|
445
|
+
}
|
|
446
|
+
} catch {}
|
|
447
|
+
return saved;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// Integrity check for an active learned rule: its detector pattern must be
|
|
451
|
+
// present, safe (no catastrophic backtracking / backrefs / lookaround), and
|
|
452
|
+
// compilable, and any other supplied pattern must compile. Used by the
|
|
453
|
+
// self-adapt maintenance pass to retire rules that have become corrupt/unsafe.
|
|
454
|
+
function isActiveRuleHealthy(rule = {}) {
|
|
455
|
+
const detector = _ruleValue(rule, 'detectorPattern', 'detector_pattern');
|
|
456
|
+
if (!_asString(detector).trim()) return false;
|
|
457
|
+
for (const [camel, snake] of [
|
|
458
|
+
['detectorPattern', 'detector_pattern'],
|
|
459
|
+
['questionPattern', 'question_pattern'],
|
|
460
|
+
['yesPattern', 'yes_pattern'],
|
|
461
|
+
['anchorPattern', 'anchor_pattern'],
|
|
462
|
+
['commandPattern', 'command_pattern'],
|
|
463
|
+
['warningPattern', 'warning_pattern'],
|
|
464
|
+
]) {
|
|
465
|
+
const p = _asString(_ruleValue(rule, camel, snake)).trim();
|
|
466
|
+
if (p && !_compilePattern(p)) return false;
|
|
467
|
+
}
|
|
468
|
+
return true;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
module.exports = {
|
|
472
|
+
ACTIVE_STATUS,
|
|
473
|
+
FAILED_STATUS,
|
|
474
|
+
CANDIDATE_STATUS,
|
|
475
|
+
redactApprovalText,
|
|
476
|
+
redactTelemetryPattern,
|
|
477
|
+
fingerprintMiss,
|
|
478
|
+
extractJsonObject,
|
|
479
|
+
isActiveRuleHealthy,
|
|
480
|
+
parseWithRule,
|
|
481
|
+
parseWithActiveRules,
|
|
482
|
+
normalizeProposal,
|
|
483
|
+
validateRefinementProposal,
|
|
484
|
+
telemetryRulePayload,
|
|
485
|
+
proposeRefinement,
|
|
486
|
+
handleMiss,
|
|
487
|
+
recordRerunFailure,
|
|
488
|
+
};
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Shadow-approver self-adaptation: learn from the user's own corrections.
|
|
4
|
+
//
|
|
5
|
+
// Two behavioral signals (correlated with the recent approval_decisions log):
|
|
6
|
+
// 1. interrupt_after_autoapprove — the approver auto-approved via a learned
|
|
7
|
+
// rule, then the user immediately interrupted (Ctrl-C). Strong signal the
|
|
8
|
+
// rule is over-broad → disable it.
|
|
9
|
+
// 2. approve_after_escalation — the approver escalated, then the user manually
|
|
10
|
+
// approved the same prompt. Signal the class is safe → record it so the
|
|
11
|
+
// history-scan / maintenance loop can promote a narrow rule.
|
|
12
|
+
//
|
|
13
|
+
// Plus a periodic maintenance pass (runSelfAdaptMaintenance) that re-validates
|
|
14
|
+
// active AI-refinement detection rules against their stored sample and disables
|
|
15
|
+
// ones that no longer parse — so learned detection improves and self-heals over
|
|
16
|
+
// time instead of accumulating stale rules.
|
|
17
|
+
//
|
|
18
|
+
// This module is pure orchestration over an injected dbModule + refinement
|
|
19
|
+
// module; all timing is injectable for tests.
|
|
20
|
+
|
|
21
|
+
const DEFAULT_INTERRUPT_WINDOW_MS = 8_000;
|
|
22
|
+
const DEFAULT_APPROVE_WINDOW_MS = 120_000;
|
|
23
|
+
|
|
24
|
+
// SQLite datetime('now') stores UTC 'YYYY-MM-DD HH:MM:SS'. Parse to epoch ms.
|
|
25
|
+
function _sqliteTimeMs(value) {
|
|
26
|
+
if (!value) return 0;
|
|
27
|
+
const s = String(value).trim();
|
|
28
|
+
const iso = /\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}/.test(s) ? s.replace(' ', 'T') + 'Z' : s;
|
|
29
|
+
const ms = Date.parse(iso);
|
|
30
|
+
return Number.isFinite(ms) ? ms : 0;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function _recentDecisions(dbModule, sessionId, limit = 6) {
|
|
34
|
+
try {
|
|
35
|
+
return dbModule.listApprovalDecisions({ sessionId, limit }) || [];
|
|
36
|
+
} catch {
|
|
37
|
+
return [];
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// User interrupted (Ctrl-C) right after we auto-approved via a learned rule →
|
|
42
|
+
// retire that rule. Returns { action: 'disabled'|'noop', ruleId?, reason? }.
|
|
43
|
+
function learnFromInterrupt({ sessionId, dbModule, now = Date.now(), windowMs = DEFAULT_INTERRUPT_WINDOW_MS } = {}) {
|
|
44
|
+
const decisions = _recentDecisions(dbModule, sessionId, 4);
|
|
45
|
+
const last = decisions[0];
|
|
46
|
+
if (!last) return { action: 'noop', reason: 'no_recent_decision' };
|
|
47
|
+
if (last.decision !== 'approved') return { action: 'noop', reason: 'last_not_approved' };
|
|
48
|
+
if ((now - _sqliteTimeMs(last.created_at)) > windowMs) return { action: 'noop', reason: 'outside_window' };
|
|
49
|
+
// Only retire when an auto-learned rule caused it; never touch user/heuristic-less decisions.
|
|
50
|
+
if (last.decided_by !== 'rule' || !last.rule_id) return { action: 'noop', reason: 'not_rule_based' };
|
|
51
|
+
try {
|
|
52
|
+
if (typeof dbModule.toggleApprovalRule === 'function') {
|
|
53
|
+
dbModule.toggleApprovalRule(last.rule_id, false);
|
|
54
|
+
}
|
|
55
|
+
} catch (e) {
|
|
56
|
+
return { action: 'noop', reason: 'disable_failed', error: e.message };
|
|
57
|
+
}
|
|
58
|
+
return { action: 'disabled', ruleId: last.rule_id, label: last.command_summary || '' };
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function _escapeRegex(s) {
|
|
62
|
+
return String(s || '').replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// User manually approved right after we escalated → learn it. When the
|
|
66
|
+
// escalated decision captured a command signature, we promote it into a narrow
|
|
67
|
+
// auto-approve rule keyed on that exact signature (so the same command class is
|
|
68
|
+
// auto-approved next time). We never promote blocklist hits or high-risk
|
|
69
|
+
// escalations, and a promoted rule keeps the escalation's risk level so the
|
|
70
|
+
// verifier still gets a say on medium-risk classes. Always records an
|
|
71
|
+
// observation for audit. Returns { action, ... }.
|
|
72
|
+
function learnFromApproveAfterEscalation({ sessionId, dbModule, now = Date.now(), windowMs = DEFAULT_APPROVE_WINDOW_MS } = {}) {
|
|
73
|
+
const decisions = _recentDecisions(dbModule, sessionId, 6);
|
|
74
|
+
const escalation = decisions.find(d =>
|
|
75
|
+
d.decision === 'escalated' && (now - _sqliteTimeMs(d.created_at)) <= windowMs);
|
|
76
|
+
if (!escalation) return { action: 'noop', reason: 'no_recent_escalation' };
|
|
77
|
+
|
|
78
|
+
// Audit observation (always).
|
|
79
|
+
try {
|
|
80
|
+
if (typeof dbModule.addApprovalObservation === 'function') {
|
|
81
|
+
dbModule.addApprovalObservation({
|
|
82
|
+
sessionId,
|
|
83
|
+
source: 'self-adapt',
|
|
84
|
+
rawDetected: true,
|
|
85
|
+
gated: false,
|
|
86
|
+
policyDecision: 'user_approved_after_escalation',
|
|
87
|
+
decidedBy: 'user',
|
|
88
|
+
keystrokeStatus: 'manual_approve',
|
|
89
|
+
redactedScreenTail: String(escalation.command_summary || escalation.tool_name || '').slice(0, 200),
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
} catch { /* non-fatal */ }
|
|
93
|
+
|
|
94
|
+
// Promote into a learned rule when we have a reliable signature and the
|
|
95
|
+
// escalation is safe to learn from.
|
|
96
|
+
const signature = escalation.command_signature || '';
|
|
97
|
+
if (!signature) return { action: 'signal_recorded', escalationId: escalation.id, reason: 'no_signature' };
|
|
98
|
+
if (escalation.decided_by === 'blocklist') return { action: 'signal_recorded', escalationId: escalation.id, reason: 'blocklist_not_promotable' };
|
|
99
|
+
if (escalation.risk_level === 'high') return { action: 'signal_recorded', escalationId: escalation.id, reason: 'high_risk_not_promotable' };
|
|
100
|
+
|
|
101
|
+
const label = escalation.command_summary || escalation.tool_name || 'Approved after escalation';
|
|
102
|
+
const riskLevel = escalation.risk_level === 'medium' ? 'medium' : 'low';
|
|
103
|
+
try {
|
|
104
|
+
if (typeof dbModule.upsertApprovalRule === 'function') {
|
|
105
|
+
dbModule.upsertApprovalRule({
|
|
106
|
+
pattern: _escapeRegex(signature),
|
|
107
|
+
label,
|
|
108
|
+
description: 'Auto-learned: you approved this after CTM escalated it.',
|
|
109
|
+
category: String(escalation.tool_name || '').toLowerCase().replace(/\s+/g, '-'),
|
|
110
|
+
riskLevel,
|
|
111
|
+
enabled: true,
|
|
112
|
+
commandSignature: signature,
|
|
113
|
+
autoLearnedSource: 'approved_after_escalation',
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
} catch (e) {
|
|
117
|
+
return { action: 'signal_recorded', escalationId: escalation.id, reason: 'rule_create_failed', error: e.message };
|
|
118
|
+
}
|
|
119
|
+
return { action: 'rule_created', escalationId: escalation.id, signature, label, riskLevel };
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Periodic maintenance: verify each active AI-refinement detection rule is still
|
|
123
|
+
// healthy (detector present, all patterns safe + compilable) and retire ones
|
|
124
|
+
// that have become corrupt/unsafe, so the learned-detection set self-heals and
|
|
125
|
+
// no broken rule shadows detection. (We can't re-parse the stored sample — it is
|
|
126
|
+
// redacted — so we guard rule integrity rather than re-deriving from the sample.)
|
|
127
|
+
// Returns { checked, disabled, kept }.
|
|
128
|
+
function runSelfAdaptMaintenance({ dbModule, refinement, now = Date.now() } = {}) {
|
|
129
|
+
const result = { checked: 0, disabled: 0, kept: 0 };
|
|
130
|
+
let rules = [];
|
|
131
|
+
try {
|
|
132
|
+
rules = dbModule.listApprovalAiRefinementRules
|
|
133
|
+
? dbModule.listApprovalAiRefinementRules({ status: refinement.ACTIVE_STATUS, limit: 200 })
|
|
134
|
+
: [];
|
|
135
|
+
} catch {
|
|
136
|
+
rules = [];
|
|
137
|
+
}
|
|
138
|
+
for (const rule of rules || []) {
|
|
139
|
+
result.checked += 1;
|
|
140
|
+
const healthy = typeof refinement.isActiveRuleHealthy === 'function'
|
|
141
|
+
? refinement.isActiveRuleHealthy(rule)
|
|
142
|
+
: true;
|
|
143
|
+
if (!healthy && typeof dbModule.saveApprovalAiRefinementRule === 'function') {
|
|
144
|
+
try {
|
|
145
|
+
dbModule.saveApprovalAiRefinementRule({
|
|
146
|
+
...rule,
|
|
147
|
+
fingerprint: rule.fingerprint,
|
|
148
|
+
status: refinement.FAILED_STATUS || 'failed',
|
|
149
|
+
validationMessage: 'self-adapt maintenance: rule patterns no longer compile/safe',
|
|
150
|
+
updatedAt: now,
|
|
151
|
+
});
|
|
152
|
+
result.disabled += 1;
|
|
153
|
+
continue;
|
|
154
|
+
} catch { /* fall through to kept */ }
|
|
155
|
+
}
|
|
156
|
+
result.kept += 1;
|
|
157
|
+
}
|
|
158
|
+
return result;
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
module.exports = {
|
|
162
|
+
learnFromInterrupt,
|
|
163
|
+
learnFromApproveAfterEscalation,
|
|
164
|
+
runSelfAdaptMaintenance,
|
|
165
|
+
_sqliteTimeMs,
|
|
166
|
+
DEFAULT_INTERRUPT_WINDOW_MS,
|
|
167
|
+
DEFAULT_APPROVE_WINDOW_MS,
|
|
168
|
+
};
|