create-walle 0.9.21 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/package.json +2 -2
- package/template/CLAUDE.md +2 -2
- package/template/LICENSE +1 -1
- package/template/bin/ctm-dev-cleanup.js +24 -3
- package/template/bin/ctm-launch.sh +13 -0
- package/template/bin/dev.sh +156 -18
- package/template/bin/node-bin.sh +84 -0
- package/template/bin/pin-node.sh +51 -0
- package/template/claude-task-manager/api-prompts.js +1203 -182
- package/template/claude-task-manager/api-reviews.js +109 -15
- package/template/claude-task-manager/approval-agent.js +1360 -280
- package/template/claude-task-manager/bin/restart-ctm.sh +64 -23
- package/template/claude-task-manager/bin/storage-migration-supervisor.js +338 -0
- package/template/claude-task-manager/db.js +4417 -295
- package/template/claude-task-manager/docs/app-update-refresh-protocol.md +69 -0
- package/template/claude-task-manager/docs/approval-ai-refinement.md +138 -0
- package/template/claude-task-manager/docs/approval-rescue-loop.md +74 -0
- package/template/claude-task-manager/docs/codex-operational-warning-health.md +107 -0
- package/template/claude-task-manager/docs/codex-resume-state-guard-design.md +17 -12
- package/template/claude-task-manager/docs/codex-terminal-render-controller-handoff.md +311 -0
- package/template/claude-task-manager/docs/coding-agent-hooks-architecture.md +418 -0
- package/template/claude-task-manager/docs/conversation-import-freshness.md +20 -0
- package/template/claude-task-manager/docs/google-workspace-auth-health.md +77 -0
- package/template/claude-task-manager/docs/image-paste-ux.md +13 -0
- package/template/claude-task-manager/docs/ipad-web-preview.md +88 -0
- package/template/claude-task-manager/docs/main-loop-offload-architecture.md +66 -0
- package/template/claude-task-manager/docs/microsoft-dev-tunnel-phone-access-design.md +274 -519
- package/template/claude-task-manager/docs/mobile-live-streaming.md +27 -5
- package/template/claude-task-manager/docs/mobile-remote-submission-lifecycle.md +69 -0
- package/template/claude-task-manager/docs/phone-access-design.md +53 -15
- package/template/claude-task-manager/docs/phone-passkey-identity.md +122 -0
- package/template/claude-task-manager/docs/phone-setup.md +3 -0
- package/template/claude-task-manager/docs/prompt-editing-tree-design.md +25 -1
- package/template/claude-task-manager/docs/remote-desktop-access-design.md +268 -0
- package/template/claude-task-manager/docs/restart-lifecycle-architecture.md +95 -0
- package/template/claude-task-manager/docs/runtime-work-control-plane.md +53 -0
- package/template/claude-task-manager/docs/session-interactive-wait-surfaces.md +38 -0
- package/template/claude-task-manager/docs/session-needs-you-dismissal.md +84 -0
- package/template/claude-task-manager/docs/session-render-state-management-design.md +91 -3
- package/template/claude-task-manager/docs/session-standup-command-center-design.md +25 -1
- package/template/claude-task-manager/docs/session-title-authority.md +32 -0
- package/template/claude-task-manager/docs/session-workspace-binding.md +33 -0
- package/template/claude-task-manager/docs/skill-intent-resolution-design.md +72 -0
- package/template/claude-task-manager/docs/walle-mcp-supervisor-health.md +86 -0
- package/template/claude-task-manager/docs/walle-relay-phone-access-design.md +24 -15
- package/template/claude-task-manager/docs/walle-session-history-hydration.md +114 -0
- package/template/claude-task-manager/docs/walle-session-input-queue.md +104 -0
- package/template/claude-task-manager/docs/walle-session-model-catalog.md +90 -0
- package/template/claude-task-manager/docs/walle-session-model-preferences.md +15 -6
- package/template/claude-task-manager/git-utils.js +897 -27
- package/template/claude-task-manager/lib/agent-capabilities.js +33 -0
- package/template/claude-task-manager/lib/agent-cli-cache.js +37 -7
- package/template/claude-task-manager/lib/agent-hooks-installer.js +26 -2
- package/template/claude-task-manager/lib/agent-presets.js +17 -1
- package/template/claude-task-manager/lib/all-sessions-query.js +108 -0
- package/template/claude-task-manager/lib/approval-ai-refinement.js +488 -0
- package/template/claude-task-manager/lib/approval-self-adapt.js +168 -0
- package/template/claude-task-manager/lib/async-semaphore.js +44 -0
- package/template/claude-task-manager/lib/auth-context.js +5 -0
- package/template/claude-task-manager/lib/auth-rate-limit.js +47 -4
- package/template/claude-task-manager/lib/auth-rules.js +29 -2
- package/template/claude-task-manager/lib/auto-approval-verifier.js +129 -16
- package/template/claude-task-manager/lib/background-llm.js +144 -17
- package/template/claude-task-manager/lib/branch-inventory.js +212 -0
- package/template/claude-task-manager/lib/claude-desktop-sessions.js +15 -3
- package/template/claude-task-manager/lib/coalesce-sync-frames.js +151 -0
- package/template/claude-task-manager/lib/codex-launch-health.js +762 -0
- package/template/claude-task-manager/lib/codex-transcript-pager.js +51 -0
- package/template/claude-task-manager/lib/codex-zst.js +124 -0
- package/template/claude-task-manager/lib/coding-agent-models.js +233 -30
- package/template/claude-task-manager/lib/connection-health.js +232 -0
- package/template/claude-task-manager/lib/conversation-blob-parser.js +42 -0
- package/template/claude-task-manager/lib/conversation-tail-merge.js +89 -26
- package/template/claude-task-manager/lib/ctm-session-context-api.js +39 -10
- package/template/claude-task-manager/lib/cursor-conversation-store.js +354 -0
- package/template/claude-task-manager/lib/db-owner-worker-client.js +315 -0
- package/template/claude-task-manager/lib/document-review.js +141 -6
- package/template/claude-task-manager/lib/escalation-review.js +152 -0
- package/template/claude-task-manager/lib/graceful-shutdown.js +159 -0
- package/template/claude-task-manager/lib/headless-term-service.js +678 -0
- package/template/claude-task-manager/lib/heavy-worker-fallback.js +38 -0
- package/template/claude-task-manager/lib/jsonl-conversation-parser.js +542 -0
- package/template/claude-task-manager/lib/jsonl-range-reader.js +112 -0
- package/template/claude-task-manager/lib/main-db-census.js +216 -0
- package/template/claude-task-manager/lib/message-pagination.js +106 -4
- package/template/claude-task-manager/lib/microsoft-dev-tunnel-setup.js +750 -26
- package/template/claude-task-manager/lib/mobile-auth-api.js +274 -7
- package/template/claude-task-manager/lib/mobile-auth-store.js +592 -10
- package/template/claude-task-manager/lib/mobile-notification-dispatcher.js +15 -0
- package/template/claude-task-manager/lib/model-overview-brain-fallback.js +311 -0
- package/template/claude-task-manager/lib/model-overview-cache.js +141 -0
- package/template/claude-task-manager/lib/models-health-routing-notice.js +126 -0
- package/template/claude-task-manager/lib/node-pin-guard.js +93 -0
- package/template/claude-task-manager/lib/perf-tracker.js +242 -6
- package/template/claude-task-manager/lib/permission-match.js +76 -0
- package/template/claude-task-manager/lib/permission-sync.js +133 -20
- package/template/claude-task-manager/lib/process-title.js +35 -0
- package/template/claude-task-manager/lib/prompt-executions-query.js +25 -0
- package/template/claude-task-manager/lib/prompt-index-disk-cache.js +44 -0
- package/template/claude-task-manager/lib/prompt-intent.js +132 -0
- package/template/claude-task-manager/lib/provider-user-context.js +34 -0
- package/template/claude-task-manager/lib/read-pool-client.js +313 -0
- package/template/claude-task-manager/lib/readpool-breaker.js +31 -0
- package/template/claude-task-manager/lib/recent-sessions-breaker.js +12 -0
- package/template/claude-task-manager/lib/remote-feedback-client.js +72 -0
- package/template/claude-task-manager/lib/remote-relay-protocol.js +37 -4
- package/template/claude-task-manager/lib/remote-relay-store.js +159 -0
- package/template/claude-task-manager/lib/remote-submission-observer.js +278 -0
- package/template/claude-task-manager/lib/restart-guard.js +109 -0
- package/template/claude-task-manager/lib/restore-interruption-detector.js +439 -0
- package/template/claude-task-manager/lib/restore-policy.js +13 -0
- package/template/claude-task-manager/lib/restore-resume-batch.js +74 -0
- package/template/claude-task-manager/lib/restore-runtime.js +68 -0
- package/template/claude-task-manager/lib/restore-storm.js +34 -0
- package/template/claude-task-manager/lib/resume-cwd.js +36 -0
- package/template/claude-task-manager/lib/resume-preflight.js +313 -0
- package/template/claude-task-manager/lib/runtime-work-registry.js +444 -0
- package/template/claude-task-manager/lib/sanitize-openai-auth.js +31 -0
- package/template/claude-task-manager/lib/scheduler.js +21 -1
- package/template/claude-task-manager/lib/scrollback-snapshot-store.js +159 -0
- package/template/claude-task-manager/lib/serial-task-queue.js +64 -0
- package/template/claude-task-manager/lib/server-listeners.js +239 -0
- package/template/claude-task-manager/lib/session-capture.js +42 -7
- package/template/claude-task-manager/lib/session-content-backfill.js +131 -0
- package/template/claude-task-manager/lib/session-history.js +388 -43
- package/template/claude-task-manager/lib/session-host-manager.js +287 -0
- package/template/claude-task-manager/lib/session-image-refs.js +209 -0
- package/template/claude-task-manager/lib/session-jobs.js +399 -59
- package/template/claude-task-manager/lib/session-prompt-index.js +137 -0
- package/template/claude-task-manager/lib/session-restore.js +53 -0
- package/template/claude-task-manager/lib/session-standup.js +123 -23
- package/template/claude-task-manager/lib/session-state-bus.js +14 -0
- package/template/claude-task-manager/lib/session-stream.js +64 -16
- package/template/claude-task-manager/lib/session-timeline-summary.js +260 -0
- package/template/claude-task-manager/lib/session-token-usage.js +494 -0
- package/template/claude-task-manager/lib/session-workspace-binding.js +356 -0
- package/template/claude-task-manager/lib/setup-network-config.js +9 -0
- package/template/claude-task-manager/lib/size-cap.js +45 -0
- package/template/claude-task-manager/lib/size-cap.test.js +62 -0
- package/template/claude-task-manager/lib/skill-autocomplete.js +180 -1
- package/template/claude-task-manager/lib/skill-intent-resolver.js +304 -0
- package/template/claude-task-manager/lib/sqlite-driver.js +19 -3
- package/template/claude-task-manager/lib/standup-attention.js +7 -3
- package/template/claude-task-manager/lib/status-authority.js +39 -0
- package/template/claude-task-manager/lib/status-hooks.js +4 -0
- package/template/claude-task-manager/lib/storage-migration.js +235 -0
- package/template/claude-task-manager/lib/structured-capture.js +298 -0
- package/template/claude-task-manager/lib/sync-io-census.js +163 -0
- package/template/claude-task-manager/lib/tailscale-setup.js +6 -0
- package/template/claude-task-manager/lib/terminal-activity-evidence.js +33 -0
- package/template/claude-task-manager/lib/terminal-choice.js +364 -0
- package/template/claude-task-manager/lib/terminal-control-sanitize.js +17 -0
- package/template/claude-task-manager/lib/terminal-fingerprint.js +48 -0
- package/template/claude-task-manager/lib/terminal-output-flush.js +84 -0
- package/template/claude-task-manager/lib/timeline-order.js +122 -0
- package/template/claude-task-manager/lib/transcript-store.js +348 -43
- package/template/claude-task-manager/lib/transport-security.js +84 -1
- package/template/claude-task-manager/lib/wait-state.js +184 -0
- package/template/claude-task-manager/lib/walle-client.js +47 -5
- package/template/claude-task-manager/lib/walle-ctm-history.js +564 -4
- package/template/claude-task-manager/lib/walle-external-actions.js +135 -16
- package/template/claude-task-manager/lib/walle-history-hydration.js +46 -0
- package/template/claude-task-manager/lib/walle-native-health.js +403 -0
- package/template/claude-task-manager/lib/walle-repair.js +701 -0
- package/template/claude-task-manager/lib/walle-session-cache.js +109 -0
- package/template/claude-task-manager/lib/walle-session-context.js +57 -21
- package/template/claude-task-manager/lib/walle-session-model-catalog.js +34 -0
- package/template/claude-task-manager/lib/walle-supervisor.js +539 -63
- package/template/claude-task-manager/lib/walle-transcript.js +52 -0
- package/template/claude-task-manager/lib/worktree-active-sync.js +11 -7
- package/template/claude-task-manager/lib/worktree-cwd.js +32 -1
- package/template/claude-task-manager/package.json +1 -1
- package/template/claude-task-manager/prompt-harvest.js +89 -66
- package/template/claude-task-manager/providers/claude-code.js +51 -3
- package/template/claude-task-manager/providers/cursor.js +140 -45
- package/template/claude-task-manager/public/css/reviews.css +551 -61
- package/template/claude-task-manager/public/css/setup.css +191 -0
- package/template/claude-task-manager/public/css/walle-session.css +865 -10
- package/template/claude-task-manager/public/css/walle.css +154 -0
- package/template/claude-task-manager/public/designs/ai-providers-consolidation-v2.html +830 -0
- package/template/claude-task-manager/public/index.html +18516 -2058
- package/template/claude-task-manager/public/ipad.html +363 -0
- package/template/claude-task-manager/public/js/document-review-links.js +301 -0
- package/template/claude-task-manager/public/js/image-normalize.js +69 -36
- package/template/claude-task-manager/public/js/message-renderer.js +1265 -77
- package/template/claude-task-manager/public/js/prompts.js +66 -29
- package/template/claude-task-manager/public/js/reviews.js +901 -133
- package/template/claude-task-manager/public/js/session-activity-utils.js +11 -1
- package/template/claude-task-manager/public/js/session-search-utils.js +94 -10
- package/template/claude-task-manager/public/js/session-status-precedence.js +23 -5
- package/template/claude-task-manager/public/js/setup.js +1273 -176
- package/template/claude-task-manager/public/js/stream-view.js +691 -73
- package/template/claude-task-manager/public/js/terminal-reconciler.js +210 -0
- package/template/claude-task-manager/public/js/walle-session.js +2455 -158
- package/template/claude-task-manager/public/js/walle.js +455 -28
- package/template/claude-task-manager/public/m/app.css +2909 -262
- package/template/claude-task-manager/public/m/app.js +6601 -398
- package/template/claude-task-manager/public/m/claim.html +224 -17
- package/template/claude-task-manager/public/m/index.html +117 -21
- package/template/claude-task-manager/public/m/sw.js +3 -1
- package/template/claude-task-manager/public/manifest.json +2 -2
- package/template/claude-task-manager/public/prompts.html +30 -14
- package/template/claude-task-manager/queue-engine.js +507 -28
- package/template/claude-task-manager/scripts/repair-claude-session-images.js +27 -8
- package/template/claude-task-manager/server.js +14341 -2197
- package/template/claude-task-manager/session-integrity.js +160 -18
- package/template/claude-task-manager/session-search-ranking.js +1 -0
- package/template/claude-task-manager/session-utils.js +25 -5
- package/template/claude-task-manager/workers/approval-blocklist.js +96 -6
- package/template/claude-task-manager/workers/approval-widget-validator.js +14 -8
- package/template/claude-task-manager/workers/conversation-import-worker.js +11 -50
- package/template/claude-task-manager/workers/db-owner-worker.js +386 -0
- package/template/claude-task-manager/workers/harvest-worker.js +9 -55
- package/template/claude-task-manager/workers/headless-term-worker.js +9 -530
- package/template/claude-task-manager/workers/read-pool-worker.js +387 -0
- package/template/claude-task-manager/workers/scrollback-worker.js +11 -72
- package/template/claude-task-manager/workers/session-host-process.js +146 -0
- package/template/claude-task-manager/workers/session-integrity-worker.js +10 -54
- package/template/claude-task-manager/workers/state-detectors/base.js +18 -1
- package/template/claude-task-manager/workers/state-detectors/claude-code.js +182 -9
- package/template/claude-task-manager/workers/state-detectors/codex.js +150 -2
- package/template/claude-task-manager/workers/state-detectors/cursor.js +127 -0
- package/template/claude-task-manager/workers/state-detectors/gemini.js +21 -0
- package/template/claude-task-manager/workers/state-detectors/index.js +29 -0
- package/template/claude-task-manager/workers/state-detectors/opencode.js +103 -0
- package/template/docs/design/markdown-review-pane.md +206 -0
- package/template/docs/designs/2026-05-17-portkey-gateway-provider-ux.md +129 -38
- package/template/docs/designs/2026-05-20-mobile-worktree-finish-command.md +27 -0
- package/template/docs/designs/2026-05-22-ai-configuration-consolidation.md +248 -0
- package/template/docs/designs/ai-configuration-consolidation-mock.html +812 -0
- package/template/docs/private-memory-and-pii-policy.md +69 -0
- package/template/package.json +2 -1
- package/template/scripts/check-private-data.js +201 -0
- package/template/shared/sqlite-owner-guard.js +30 -0
- package/template/shared/sqlite-owner-write-queue.js +225 -0
- package/template/shared/sqlite-storage-policy.js +111 -0
- package/template/shared/sqlite-write-lock.js +428 -0
- package/template/wall-e/agent-runners/claude-code.js +5 -0
- package/template/wall-e/agent.js +166 -22
- package/template/wall-e/api-walle.js +524 -70
- package/template/wall-e/auth/provider-flows.js +11 -1
- package/template/wall-e/bin/walle-mcp-stdio.js +341 -17
- package/template/wall-e/brain.js +1614 -141
- package/template/wall-e/chat/attachment-blocks.js +96 -0
- package/template/wall-e/chat/attachments.js +2 -1
- package/template/wall-e/chat/capability-resolver.js +7 -7
- package/template/wall-e/chat/context-messages.js +28 -0
- package/template/wall-e/chat/conversation-frame.js +630 -0
- package/template/wall-e/chat/provider-messages.js +125 -0
- package/template/wall-e/chat.js +1002 -233
- package/template/wall-e/coding/acceptance-contract.js +170 -0
- package/template/wall-e/coding/acp-adapter.js +1 -1
- package/template/wall-e/coding/agent-catalog.js +3 -0
- package/template/wall-e/coding/artifact-store.js +93 -0
- package/template/wall-e/coding/capability-router.js +120 -0
- package/template/wall-e/coding/coding-run-controller.js +423 -0
- package/template/wall-e/coding/compaction-service.js +157 -12
- package/template/wall-e/coding/frontend-verification.js +258 -0
- package/template/wall-e/coding/lifecycle-hooks.js +75 -0
- package/template/wall-e/coding/local-preview-contract.js +157 -0
- package/template/wall-e/coding/permission-service.js +57 -13
- package/template/wall-e/coding/prompt-bundle.js +19 -1
- package/template/wall-e/coding/prompt-section-registry.js +227 -0
- package/template/wall-e/coding/provider-compat.js +15 -0
- package/template/wall-e/coding/runtime-events.js +224 -0
- package/template/wall-e/coding/runtime-mode.js +3 -0
- package/template/wall-e/coding/side-git-snapshot.js +160 -4
- package/template/wall-e/coding/snapshot-service.js +143 -1
- package/template/wall-e/coding/stream-processor.js +388 -34
- package/template/wall-e/coding/task-tool.js +141 -4
- package/template/wall-e/coding/tool-execution-controller.js +365 -0
- package/template/wall-e/coding/tool-registry.js +43 -5
- package/template/wall-e/coding/user-hooks.js +217 -0
- package/template/wall-e/coding-orchestrator.js +1330 -221
- package/template/wall-e/coding-prompts.js +20 -4
- package/template/wall-e/context/context-builder.js +15 -2
- package/template/wall-e/decision/confidence.js +1 -1
- package/template/wall-e/docs/coding-acceptance-contract.md +41 -0
- package/template/wall-e/docs/external-action-controller.md +26 -6
- package/template/wall-e/docs/telemetry-lifecycle.md +8 -2
- package/template/wall-e/embeddings.js +591 -53
- package/template/wall-e/external-action-controller.js +12 -0
- package/template/wall-e/http/auth.js +1 -0
- package/template/wall-e/http/chat-api.js +46 -11
- package/template/wall-e/http/model-admin.js +836 -34
- package/template/wall-e/lib/boot-profile.js +88 -0
- package/template/wall-e/lib/event-loop-monitor.js +93 -0
- package/template/wall-e/lib/service-health.js +194 -0
- package/template/wall-e/llm/anthropic.js +130 -5
- package/template/wall-e/llm/client.js +266 -63
- package/template/wall-e/llm/default-fallback.js +382 -0
- package/template/wall-e/llm/health.js +19 -0
- package/template/wall-e/llm/message-guard.js +78 -0
- package/template/wall-e/llm/model-catalog.js +252 -1
- package/template/wall-e/llm/openai.js +26 -4
- package/template/wall-e/llm/portkey-sync.js +654 -0
- package/template/wall-e/llm/provider-error.js +30 -2
- package/template/wall-e/llm/registry.js +5 -1
- package/template/wall-e/llm/request-compat.js +67 -0
- package/template/wall-e/loops/backfill.js +79 -23
- package/template/wall-e/loops/brain-optimize.js +67 -0
- package/template/wall-e/loops/ingest.js +25 -10
- package/template/wall-e/loops/question-digest.js +160 -0
- package/template/wall-e/loops/reflect.js +6 -4
- package/template/wall-e/loops/think.js +39 -12
- package/template/wall-e/mcp-server.js +318 -36
- package/template/wall-e/memory/ctm-context-client.js +52 -14
- package/template/wall-e/memory/ctm-operational-context.js +237 -0
- package/template/wall-e/memory/ctm-prompt-executions-client.js +128 -0
- package/template/wall-e/memory/ctm-session-context.js +111 -63
- package/template/wall-e/prompts/coding/deepseek.txt +3 -0
- package/template/wall-e/prompts/coding/gemini.txt +6 -0
- package/template/wall-e/prompts/coding/gpt.txt +6 -0
- package/template/wall-e/prompts/coding/local.txt +7 -0
- package/template/wall-e/runtime/decision-hooks.js +115 -0
- package/template/wall-e/runtime/devbox-gateway.js +82 -8
- package/template/wall-e/runtime/prompt-manifest.js +86 -0
- package/template/wall-e/runtime/tool-executor.js +269 -0
- package/template/wall-e/runtime/tool-result-envelope.js +138 -0
- package/template/wall-e/runtime/transcript-projection.js +60 -0
- package/template/wall-e/runtime/walle-runtime.js +224 -0
- package/template/wall-e/scripts/db-optimize/migrate.js +162 -0
- package/template/wall-e/scripts/db-optimize/recall-eval.js +117 -0
- package/template/wall-e/server.js +15 -0
- package/template/wall-e/session-files.js +9 -0
- package/template/wall-e/skills/_bundled/google-calendar/run.js +1 -1
- package/template/wall-e/skills/_bundled/gws-workspace/run.js +1 -1
- package/template/wall-e/skills/_bundled/slack-mentions/run.js +76 -6
- package/template/wall-e/skills/claude-code-reader.js +7 -3
- package/template/wall-e/skills/script-skill-runner.js +10 -0
- package/template/wall-e/skills/skill-planner.js +38 -0
- package/template/wall-e/tools/builtin-middleware.js +19 -9
- package/template/wall-e/tools/local-tools.js +1428 -16
- package/template/wall-e/tools/permission-checker.js +73 -5
- package/template/wall-e/tools/question-manager.js +117 -7
- package/template/wall-e/training/harvester.js +12 -28
- package/template/wall-e/training/replay.js +25 -80
- package/template/website/index.html +10 -10
- package/template/wall-e/eval/ab-test.js +0 -203
- package/template/wall-e/eval/agent-runner.js +0 -772
- package/template/wall-e/eval/agent-scorer.js +0 -461
- package/template/wall-e/eval/aggregator.js +0 -414
- package/template/wall-e/eval/allowed-test-commands.js +0 -34
- package/template/wall-e/eval/benchmark-generator.js +0 -113
- package/template/wall-e/eval/benchmarks/chat-eval.json +0 -1662
- package/template/wall-e/eval/benchmarks/chat.json +0 -82
- package/template/wall-e/eval/benchmarks/coding-agent-real.json +0 -1
- package/template/wall-e/eval/benchmarks/coding-agent.json +0 -1581
- package/template/wall-e/eval/benchmarks/coding.json +0 -122
- package/template/wall-e/eval/benchmarks/memory-retrieval.json +0 -234
- package/template/wall-e/eval/benchmarks/reasoning.json +0 -82
- package/template/wall-e/eval/benchmarks/swebench-lite-30.json +0 -212
- package/template/wall-e/eval/benchmarks.js +0 -669
- package/template/wall-e/eval/cc-replay.js +0 -719
- package/template/wall-e/eval/chat-eval.js +0 -525
- package/template/wall-e/eval/check-keys.js +0 -15
- package/template/wall-e/eval/check-providers.js +0 -42
- package/template/wall-e/eval/codex-cli-baseline.js +0 -669
- package/template/wall-e/eval/coding-agent-real.js +0 -570
- package/template/wall-e/eval/context-compactor.js +0 -251
- package/template/wall-e/eval/debug-agent003.js +0 -68
- package/template/wall-e/eval/diagnostics.js +0 -216
- package/template/wall-e/eval/eval-orchestrator.js +0 -642
- package/template/wall-e/eval/evaluate.js +0 -202
- package/template/wall-e/eval/evaluator.js +0 -373
- package/template/wall-e/eval/exporter.js +0 -212
- package/template/wall-e/eval/fixtures/express-basic/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-basic/server.js +0 -115
- package/template/wall-e/eval/fixtures/express-basic/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy/server.js +0 -113
- package/template/wall-e/eval/fixtures/express-buggy/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy-items/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy-items/server.js +0 -112
- package/template/wall-e/eval/fixtures/express-buggy-items/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy-search/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy-search/server.js +0 -121
- package/template/wall-e/eval/fixtures/express-buggy-search/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-rename-data/data.js +0 -34
- package/template/wall-e/eval/fixtures/express-rename-data/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-rename-data/server.js +0 -97
- package/template/wall-e/eval/fixtures/express-rename-data/test.js +0 -88
- package/template/wall-e/eval/fixtures/express-xss/package.json +0 -12
- package/template/wall-e/eval/fixtures/express-xss/server.js +0 -90
- package/template/wall-e/eval/fixtures/express-xss/test.js +0 -67
- package/template/wall-e/eval/fixtures/express-xss/views/profile.ejs +0 -9
- package/template/wall-e/eval/fixtures/fullstack-app/config/default.js +0 -9
- package/template/wall-e/eval/fixtures/fullstack-app/config/test.js +0 -13
- package/template/wall-e/eval/fixtures/fullstack-app/package.json +0 -11
- package/template/wall-e/eval/fixtures/fullstack-app/public/css/style.css +0 -137
- package/template/wall-e/eval/fixtures/fullstack-app/public/index.html +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/app.js +0 -121
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/auth.js +0 -71
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/items.js +0 -80
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/users.js +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/public/login.html +0 -45
- package/template/wall-e/eval/fixtures/fullstack-app/public/register.html +0 -38
- package/template/wall-e/eval/fixtures/fullstack-app/scripts/migrate.js +0 -23
- package/template/wall-e/eval/fixtures/fullstack-app/scripts/seed.js +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/server/db.js +0 -99
- package/template/wall-e/eval/fixtures/fullstack-app/server/index.js +0 -94
- package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/auth.js +0 -19
- package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/logger.js +0 -19
- package/template/wall-e/eval/fixtures/fullstack-app/server/router.js +0 -50
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/auth.js +0 -69
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/health.js +0 -23
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/items.js +0 -88
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/users.js +0 -75
- package/template/wall-e/eval/fixtures/fullstack-app/server/test.js +0 -198
- package/template/wall-e/eval/fixtures/fullstack-app/server/utils/response.js +0 -34
- package/template/wall-e/eval/fixtures/fullstack-app/server/utils/validate.js +0 -26
- package/template/wall-e/eval/fixtures/fullstack-app/server.js +0 -8
- package/template/wall-e/eval/fixtures/fullstack-app/test.js +0 -12
- package/template/wall-e/eval/fixtures/monorepo-basic/package.json +0 -8
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/data.js +0 -58
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/middleware.js +0 -46
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/package.json +0 -8
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/routes.js +0 -64
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/server.js +0 -56
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/test.js +0 -116
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/commands.js +0 -61
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/index.js +0 -62
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/output.js +0 -43
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/package.json +0 -11
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/test.js +0 -44
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/formatters.js +0 -43
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/index.js +0 -12
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/package.json +0 -5
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/test.js +0 -55
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/validators.js +0 -29
- package/template/wall-e/eval/fixtures/monorepo-basic/test.js +0 -46
- package/template/wall-e/eval/fixtures/node-cli/index.js +0 -78
- package/template/wall-e/eval/fixtures/node-cli/package.json +0 -10
- package/template/wall-e/eval/fixtures/node-cli/test.js +0 -57
- package/template/wall-e/eval/fixtures/node-typed/package.json +0 -8
- package/template/wall-e/eval/fixtures/node-typed/src/handlers.js +0 -31
- package/template/wall-e/eval/fixtures/node-typed/src/utils.js +0 -33
- package/template/wall-e/eval/fixtures/node-typed/test.js +0 -36
- package/template/wall-e/eval/fixtures/python-flask/app.py +0 -14
- package/template/wall-e/eval/fixtures/python-flask/requirements.txt +0 -2
- package/template/wall-e/eval/fixtures/python-flask/test_app.py +0 -25
- package/template/wall-e/eval/fixtures/wall-e-subset/brain.js +0 -105
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/aggregator.js +0 -101
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/chat.json +0 -20
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/coding.json +0 -32
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks.js +0 -64
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/package.json +0 -6
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/server.js +0 -31
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/test.js +0 -18
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/utils.js +0 -34
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/runner.js +0 -104
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/scorer.js +0 -73
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/test.js +0 -134
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/client.js +0 -99
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/providers.js +0 -63
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/test.js +0 -70
- package/template/wall-e/eval/fixtures/wall-e-subset/package.json +0 -10
- package/template/wall-e/eval/fixtures/wall-e-subset/test.js +0 -86
- package/template/wall-e/eval/harvester.js +0 -685
- package/template/wall-e/eval/head-to-head.js +0 -388
- package/template/wall-e/eval/humaneval-adapter.js +0 -321
- package/template/wall-e/eval/list-models.js +0 -31
- package/template/wall-e/eval/livecodebench-adapter.js +0 -291
- package/template/wall-e/eval/mail-integration.js +0 -443
- package/template/wall-e/eval/manifest.js +0 -186
- package/template/wall-e/eval/meta-harness/adapters/coding-agent.js +0 -57
- package/template/wall-e/eval/meta-harness/bootstrap-snapshot.js +0 -149
- package/template/wall-e/eval/meta-harness/candidate-store.js +0 -117
- package/template/wall-e/eval/meta-harness/cli.js +0 -86
- package/template/wall-e/eval/meta-harness/domain-spec.js +0 -154
- package/template/wall-e/eval/meta-harness/domains/coding-agent.domain.json +0 -84
- package/template/wall-e/eval/meta-harness/examples/env-bootstrap-candidate.js +0 -29
- package/template/wall-e/eval/meta-harness/experience-store.js +0 -174
- package/template/wall-e/eval/meta-harness/frontier.js +0 -96
- package/template/wall-e/eval/meta-harness/harness-interface.js +0 -90
- package/template/wall-e/eval/meta-harness/leakage-guard.js +0 -80
- package/template/wall-e/eval/meta-harness/optimizer.js +0 -207
- package/template/wall-e/eval/meta-harness/proposer-runner.js +0 -110
- package/template/wall-e/eval/meta-harness/reporting.js +0 -58
- package/template/wall-e/eval/meta-harness/telemetry.js +0 -27
- package/template/wall-e/eval/meta-harness/validation.js +0 -81
- package/template/wall-e/eval/promoter.js +0 -228
- package/template/wall-e/eval/provider-normalizer.js +0 -33
- package/template/wall-e/eval/replay.js +0 -395
- package/template/wall-e/eval/run-agent-benchmarks.js +0 -386
- package/template/wall-e/eval/run-codex-cli-baseline.js +0 -177
- package/template/wall-e/eval/run-coding-agent-real.js +0 -187
- package/template/wall-e/eval/run-eval.js +0 -435
- package/template/wall-e/eval/run-model-comparison.js +0 -142
- package/template/wall-e/eval/session-evaluator.js +0 -187
- package/template/wall-e/eval/session-miner.js +0 -207
- package/template/wall-e/eval/session-retrieval-benchmark.js +0 -150
- package/template/wall-e/eval/session-transcripts.js +0 -509
- package/template/wall-e/eval/shadow.js +0 -161
- package/template/wall-e/eval/swebench-adapter.js +0 -345
- package/template/wall-e/eval/swebench-docker.js +0 -192
- package/template/wall-e/eval/train.py +0 -320
- package/template/wall-e/eval/trainer.js +0 -232
- package/template/wall-e/eval/weekly-eval-loop.js +0 -241
|
@@ -6,24 +6,44 @@
|
|
|
6
6
|
// escalate to the user.
|
|
7
7
|
|
|
8
8
|
const dbModule = require('./db');
|
|
9
|
-
const
|
|
9
|
+
const crypto = require('crypto');
|
|
10
|
+
const { getProvider, detectProvider, providers } = require('./providers');
|
|
10
11
|
const { checkBlocklist } = require('./workers/approval-blocklist');
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
const { commandHead } = require('./lib/escalation-review');
|
|
13
|
+
const { callBackgroundLlm } = require('./lib/background-llm');
|
|
14
|
+
const { verifyIfEnabled } = require('./lib/auto-approval-verifier');
|
|
15
|
+
const { matchPermission } = require('./lib/permission-match');
|
|
16
|
+
const approvalAiRefinement = require('./lib/approval-ai-refinement');
|
|
17
|
+
|
|
18
|
+
// Dangerous-command blocklist: when enabled, any command matching
|
|
19
|
+
// ./workers/approval-blocklist is force-escalated even if learned rules /
|
|
20
|
+
// heuristics / AI would approve. Default ON (defense-in-depth); turn off via the
|
|
21
|
+
// Permission Manager UI (Shadow Approver panel). Cached read on each check.
|
|
16
22
|
function isBlocklistEnabled() {
|
|
17
23
|
try {
|
|
18
|
-
return !!dbModule.getSetting('approval_blocklist_enabled',
|
|
24
|
+
return !!dbModule.getSetting('approval_blocklist_enabled', true);
|
|
19
25
|
} catch {
|
|
20
|
-
return
|
|
26
|
+
return true;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// The user's Permission-tab edits to the blocklist: disabled default ids + custom
|
|
31
|
+
// patterns. Stored as one JSON setting; checkBlocklist() layers it over the
|
|
32
|
+
// shipped defaults. Null/absent → defaults only. Read fresh each check (cheap —
|
|
33
|
+
// a single settings row) so edits take effect without a restart.
|
|
34
|
+
function getBlocklistConfig() {
|
|
35
|
+
try {
|
|
36
|
+
const cfg = dbModule.getSetting('approval_blocklist_config', null);
|
|
37
|
+
return cfg && typeof cfg === 'object' ? cfg : null;
|
|
38
|
+
} catch {
|
|
39
|
+
return null;
|
|
21
40
|
}
|
|
22
41
|
}
|
|
23
42
|
|
|
24
43
|
// Legacy patterns kept for backward compatibility with tests
|
|
25
44
|
const PROCEED_PATTERN = /Do you want to (proceed|make this edit to .+|create .+|overwrite .+)\??/;
|
|
26
45
|
const BROAD_PROCEED_PATTERN = /Do you want to .+\?/;
|
|
46
|
+
const CLAUDE_DURABLE_YES_OPTION_RE = /^\s*(?:[❯›▶▸>]\s*)?2\.\s*Yes,\s*(allow all|allow\b.*\b(?:for|from) this project\b|(?:and\s+)?allow\s+access\s+to\b.*\b(?:and\s+similar\s+commands|for\s+this\s+session)\b|and\s+allow\s+Claude\b.*\bthis\s+session\b|and don't ask again|and always allow)/i;
|
|
27
47
|
|
|
28
48
|
// Delay (ms) before sending the auto-approve keystroke. Lower = faster response.
|
|
29
49
|
const APPROVE_DELAY_MS = 100;
|
|
@@ -112,12 +132,53 @@ async function _currentPromptVisibility(sessionId, context, headlessWorker) {
|
|
|
112
132
|
}
|
|
113
133
|
}
|
|
114
134
|
|
|
135
|
+
function _uniqueProviderIds(...ids) {
|
|
136
|
+
const seen = new Set();
|
|
137
|
+
const out = [];
|
|
138
|
+
for (const id of ids) {
|
|
139
|
+
const value = String(id || '').trim();
|
|
140
|
+
if (!value || seen.has(value)) continue;
|
|
141
|
+
seen.add(value);
|
|
142
|
+
out.push(value);
|
|
143
|
+
}
|
|
144
|
+
return out;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function _parseKnownProviderContext(rawText, suppliedProviderId) {
|
|
148
|
+
const detectedProviderId = (() => {
|
|
149
|
+
try { return detectProvider(rawText)?.id || ''; } catch { return ''; }
|
|
150
|
+
})();
|
|
151
|
+
const looksClaudeCode = /Esc to cancel|Tab to amend|ctrl\+e to explain|❯/.test(String(rawText || ''));
|
|
152
|
+
const fallbackProviderIds = providers
|
|
153
|
+
.map(p => p.id)
|
|
154
|
+
.filter(id => id !== 'claude-code' || looksClaudeCode);
|
|
155
|
+
const candidateIds = _uniqueProviderIds(
|
|
156
|
+
suppliedProviderId,
|
|
157
|
+
detectedProviderId,
|
|
158
|
+
// The legacy parser is Claude-shaped; try the real provider parser for
|
|
159
|
+
// Claude-anchored screens before falling back to generic rescue so
|
|
160
|
+
// Claude-specific Enter semantics survive structural-gate misses and
|
|
161
|
+
// recheck-only observations without relabeling unrelated numbered prompts.
|
|
162
|
+
...fallbackProviderIds
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
for (const providerId of candidateIds) {
|
|
166
|
+
const provider = getProvider(providerId);
|
|
167
|
+
if (!provider || typeof provider.parse !== 'function') continue;
|
|
168
|
+
try {
|
|
169
|
+
const context = provider.parse(rawText);
|
|
170
|
+
if (context) return { providerId, context: { ...context, providerId } };
|
|
171
|
+
} catch {}
|
|
172
|
+
}
|
|
173
|
+
return null;
|
|
174
|
+
}
|
|
175
|
+
|
|
115
176
|
// Guarded setTimeout: schedules sendApprovalKeystroke after APPROVE_DELAY_MS,
|
|
116
177
|
// but first revalidates the currently rendered headless-terminal prompt when
|
|
117
178
|
// the worker is available. This prevents low-byte stale transitions from
|
|
118
179
|
// leaking approval shortcuts into the next Codex/Claude input box. If the
|
|
119
180
|
// worker cannot answer and output advanced significantly, skip as before.
|
|
120
|
-
function _scheduleGuardedApproval(session, context, headlessWorker, broadcastFn, sessionId, decisionPayload) {
|
|
181
|
+
function _scheduleGuardedApproval(session, context, headlessWorker, broadcastFn, sessionId, decisionPayload, options = {}) {
|
|
121
182
|
const outputBytesAtDecision = session._outputBytesCounter || 0;
|
|
122
183
|
// Stash decision provenance on the context so sendApprovalKeystroke can log it.
|
|
123
184
|
context._decidedBy = decisionPayload.decidedBy;
|
|
@@ -132,6 +193,7 @@ function _scheduleGuardedApproval(session, context, headlessWorker, broadcastFn,
|
|
|
132
193
|
console.log(`[approval-agent] Skipping keystroke for session ${sessionId.slice(0, 8)} — live preflight no longer validates the approval prompt after ${outputAdvanced} bytes. decidedBy=${decisionPayload.decidedBy} label="${decisionPayload.label}"`);
|
|
133
194
|
// Still notify clients about the decision for telemetry/UI purposes.
|
|
134
195
|
broadcastFn(sessionId, session, { ...decisionPayload, decision: 'skipped-stale' });
|
|
196
|
+
try { options.onResult?.({ status: 'skipped-stale', sent: false, outputAdvanced, promptVisibility }); } catch {}
|
|
135
197
|
return;
|
|
136
198
|
}
|
|
137
199
|
}
|
|
@@ -140,12 +202,14 @@ function _scheduleGuardedApproval(session, context, headlessWorker, broadcastFn,
|
|
|
140
202
|
console.log(`[approval-agent] Skipping keystroke for session ${sessionId.slice(0, 8)} — ${outputAdvanced} bytes of PTY output since decision and prompt could not be revalidated. decidedBy=${decisionPayload.decidedBy} label="${decisionPayload.label}"`);
|
|
141
203
|
// Still notify clients about the decision for telemetry/UI purposes.
|
|
142
204
|
broadcastFn(sessionId, session, { ...decisionPayload, decision: 'skipped-stale' });
|
|
205
|
+
try { options.onResult?.({ status: 'skipped-stale', sent: false, outputAdvanced, promptVisibility }); } catch {}
|
|
143
206
|
return;
|
|
144
207
|
}
|
|
145
208
|
console.log(`[approval-agent] Proceeding with approval for session ${sessionId.slice(0, 8)} despite ${outputAdvanced} bytes of PTY output — same prompt is still visible. decidedBy=${decisionPayload.decidedBy} label="${decisionPayload.label}"`);
|
|
146
209
|
}
|
|
147
|
-
sendApprovalKeystroke(session, context, headlessWorker);
|
|
210
|
+
sendApprovalKeystroke(session, context, headlessWorker, options.keystrokeOptions || {});
|
|
148
211
|
broadcastFn(sessionId, session, decisionPayload);
|
|
212
|
+
try { options.onResult?.({ status: 'sent', sent: true, outputAdvanced, promptVisibility }); } catch {}
|
|
149
213
|
}, APPROVE_DELAY_MS);
|
|
150
214
|
}
|
|
151
215
|
|
|
@@ -156,20 +220,88 @@ const DEDUP_WINDOW_MS = 3000;
|
|
|
156
220
|
|
|
157
221
|
// Determine which option to send — delegates to provider if available,
|
|
158
222
|
// falls back to Claude Code behavior ("2" for allow-all, "1" for plain Yes).
|
|
159
|
-
function getApproveKeystroke(context) {
|
|
223
|
+
function getApproveKeystroke(context, options = {}) {
|
|
160
224
|
const provider = context.providerId ? getProvider(context.providerId) : null;
|
|
225
|
+
if (options.preferAllowAll === false) {
|
|
226
|
+
if (provider) {
|
|
227
|
+
return provider.approveKeystroke({
|
|
228
|
+
...context,
|
|
229
|
+
hasAllowAll: false,
|
|
230
|
+
approveAllShortcut: null,
|
|
231
|
+
alwaysAllowShortcut: null,
|
|
232
|
+
});
|
|
233
|
+
}
|
|
234
|
+
if (context.approveShortcut) return context.approveShortcut;
|
|
235
|
+
return '1';
|
|
236
|
+
}
|
|
161
237
|
if (provider) return provider.approveKeystroke(context);
|
|
162
238
|
return context.hasAllowAll ? '2' : '1';
|
|
163
239
|
}
|
|
164
240
|
|
|
241
|
+
function _parseGenericApprovalContext(cleanText, providerId) {
|
|
242
|
+
const lines = String(cleanText || '').split('\n').map(l => l.trim()).filter(Boolean);
|
|
243
|
+
if (!lines.length) return null;
|
|
244
|
+
|
|
245
|
+
let proceedIdx = -1;
|
|
246
|
+
for (let i = lines.length - 1; i >= 0; i--) {
|
|
247
|
+
if (/(do you want|would you like|approve|allow|permission|proceed|run this command).*\?/i.test(lines[i])) {
|
|
248
|
+
proceedIdx = i;
|
|
249
|
+
break;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
if (proceedIdx < 0) return null;
|
|
253
|
+
|
|
254
|
+
let approveShortcut = '';
|
|
255
|
+
let hasYesNo = false;
|
|
256
|
+
let hasAllowAll = false;
|
|
257
|
+
for (let i = proceedIdx + 1; i < Math.min(proceedIdx + 12, lines.length); i++) {
|
|
258
|
+
const line = lines[i];
|
|
259
|
+
const numbered = line.match(/^\D*([1-9])\.\s*(Yes|Allow|Approve|Proceed)\b/i);
|
|
260
|
+
const single = line.match(/^\D*([yY])\s*[-.)]?\s*(Yes|Allow|Approve|Proceed)\b/i);
|
|
261
|
+
if (!hasYesNo && (numbered || single)) {
|
|
262
|
+
approveShortcut = numbered ? numbered[1] : single[1].toLowerCase();
|
|
263
|
+
hasYesNo = true;
|
|
264
|
+
}
|
|
265
|
+
if (/always|allow all|don't ask again|for this project|for this session|similar commands/i.test(line)) {
|
|
266
|
+
hasAllowAll = true;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
if (!hasYesNo) return null;
|
|
270
|
+
|
|
271
|
+
let toolName = 'Generic approval';
|
|
272
|
+
const contextLines = [];
|
|
273
|
+
for (let i = proceedIdx - 1; i >= Math.max(0, proceedIdx - 30); i--) {
|
|
274
|
+
const line = lines[i];
|
|
275
|
+
if (/^[⏺●]?\s*(Bash command|Bash|Edit|Write|Read|Glob|Grep|Fetch|WebFetch|NotebookEdit|TodoWrite|Agent|MCP)\b/i.test(line)) {
|
|
276
|
+
toolName = line.trim();
|
|
277
|
+
break;
|
|
278
|
+
}
|
|
279
|
+
contextLines.unshift(line);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
const fullContext = lines.slice(Math.max(0, proceedIdx - 20), Math.min(lines.length, proceedIdx + 12)).join('\n');
|
|
283
|
+
return {
|
|
284
|
+
providerId: providerId || 'generic',
|
|
285
|
+
toolName,
|
|
286
|
+
command: contextLines.join('\n').slice(0, 2000),
|
|
287
|
+
warning: '',
|
|
288
|
+
fullContext: fullContext.slice(0, 2000),
|
|
289
|
+
hasAllowAll,
|
|
290
|
+
approveShortcut,
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
|
|
165
294
|
// Parse the terminal buffer to extract the approval context.
|
|
166
295
|
// If providerId is given and a matching provider exists, delegates to it.
|
|
167
296
|
// Defaults to Claude Code parsing for backward compatibility.
|
|
168
297
|
function parseApprovalContext(cleanText, providerId) {
|
|
169
298
|
const provider = providerId ? getProvider(providerId) : null;
|
|
170
299
|
if (provider) {
|
|
171
|
-
|
|
172
|
-
|
|
300
|
+
let ctx = null;
|
|
301
|
+
try { ctx = provider.parse(cleanText); } catch { ctx = null; }
|
|
302
|
+
if (ctx && !ctx.providerId) ctx.providerId = providerId;
|
|
303
|
+
if (ctx) return ctx;
|
|
304
|
+
return approvalAiRefinement.parseWithActiveRules(cleanText, providerId, { dbModule }) || null;
|
|
173
305
|
}
|
|
174
306
|
// Fall through to legacy Claude Code parsing below
|
|
175
307
|
const lines = cleanText.split('\n').map(l => l.trim()).filter(Boolean);
|
|
@@ -185,14 +317,20 @@ function parseApprovalContext(cleanText, providerId) {
|
|
|
185
317
|
if (BROAD_PROCEED_PATTERN.test(lines[i])) { proceedIdx = i; break; }
|
|
186
318
|
}
|
|
187
319
|
}
|
|
188
|
-
if (proceedIdx < 0)
|
|
320
|
+
if (proceedIdx < 0) {
|
|
321
|
+
return _parseGenericApprovalContext(cleanText, providerId)
|
|
322
|
+
|| approvalAiRefinement.parseWithActiveRules(cleanText, providerId, { dbModule });
|
|
323
|
+
}
|
|
189
324
|
|
|
190
325
|
// Find "1. Yes" after it (Edit prompts may have more options so search further)
|
|
191
326
|
let hasYesNo = false;
|
|
192
327
|
for (let i = proceedIdx + 1; i < Math.min(proceedIdx + 6, lines.length); i++) {
|
|
193
328
|
if (/^\D*1\.\s*Yes\b/.test(lines[i])) { hasYesNo = true; break; }
|
|
194
329
|
}
|
|
195
|
-
if (!hasYesNo)
|
|
330
|
+
if (!hasYesNo) {
|
|
331
|
+
return _parseGenericApprovalContext(cleanText, providerId)
|
|
332
|
+
|| approvalAiRefinement.parseWithActiveRules(cleanText, providerId, { dbModule });
|
|
333
|
+
}
|
|
196
334
|
|
|
197
335
|
// Extract warning (line before "Do you want to proceed?")
|
|
198
336
|
let warning = '';
|
|
@@ -200,7 +338,7 @@ function parseApprovalContext(cleanText, providerId) {
|
|
|
200
338
|
const line = lines[i];
|
|
201
339
|
if (!line) continue;
|
|
202
340
|
// Warning lines typically describe the risk
|
|
203
|
-
if (/command contains|could write|could modify|could delete|could overwrite|which can|permission|dangerous|destructive|overwrite|will modify|will delete|will overwrite|execute arbitrary|shell command substitution/i.test(line)) {
|
|
341
|
+
if (/command contains|could write|could modify|could delete|could overwrite|which can|permission|dangerous|destructive|overwrite|will modify|will delete|will overwrite|execute arbitrary|executes commands|modifies files|cannot be auto-allowed|shell command substitution/i.test(line)) {
|
|
204
342
|
warning = line;
|
|
205
343
|
break;
|
|
206
344
|
}
|
|
@@ -227,10 +365,33 @@ function parseApprovalContext(cleanText, providerId) {
|
|
|
227
365
|
toolName = line.trim();
|
|
228
366
|
break;
|
|
229
367
|
}
|
|
368
|
+
// MCP tool-use prompt: capture the real tool name (line ending in "(MCP)" or
|
|
369
|
+
// a raw mcp__/plugin: tool id) so it doesn't collapse to "Unknown".
|
|
370
|
+
if (/\(MCP\)\s*$/i.test(line) || /^(?:[⏺●]\s*)?(?:plugin:|mcp__)/i.test(line)) {
|
|
371
|
+
let name = line.trim().replace(/^[⏺●]\s*/, '').replace(/\s*\(MCP\)\s*$/i, '');
|
|
372
|
+
const argIdx = name.indexOf('(');
|
|
373
|
+
if (argIdx > 0) name = name.slice(0, argIdx).trim();
|
|
374
|
+
toolName = name || 'MCP tool';
|
|
375
|
+
contextLines.unshift(line.trim());
|
|
376
|
+
break;
|
|
377
|
+
}
|
|
230
378
|
contextLines.unshift(line);
|
|
231
379
|
}
|
|
232
380
|
|
|
233
|
-
|
|
381
|
+
// No tool header above the diff → Claude's file-operation prompt
|
|
382
|
+
// ("overwrite/create/make this edit to <file>?"). Derive Edit/Write + target
|
|
383
|
+
// file so it auto-approves as a normal edit instead of an Unknown command that
|
|
384
|
+
// risk-scores the file's code. (Mirror of the claude-code provider parse path,
|
|
385
|
+
// for the no-detected-provider fallback.)
|
|
386
|
+
let fileOpCommand = '';
|
|
387
|
+
if (!toolName) {
|
|
388
|
+
const ccProvider = getProvider('claude-code');
|
|
389
|
+
const fileOp = ccProvider && typeof ccProvider.deriveFileOpTool === 'function'
|
|
390
|
+
? ccProvider.deriveFileOpTool(lines[proceedIdx]) : null;
|
|
391
|
+
if (fileOp) { toolName = fileOp.toolName; fileOpCommand = fileOp.command; }
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
const command = (fileOpCommand || contextLines.join('\n')).trim();
|
|
234
395
|
|
|
235
396
|
// Build focused context: tool header + command + warning + prompt (not the whole screen)
|
|
236
397
|
const ctxStart = Math.max(0, endIdx - (contextLines.length + 1));
|
|
@@ -241,12 +402,16 @@ function parseApprovalContext(cleanText, providerId) {
|
|
|
241
402
|
// - Edit/Write: "2. Yes, allow all edits in foo/ during this session"
|
|
242
403
|
// - Bash: "2. Yes, and don't ask again for: sqlite3 $BRAIN_DB:*"
|
|
243
404
|
// - Claude Code 2.x: "2. Yes, allow reading from foo/ from this project"
|
|
405
|
+
// - Claude Code path groups: "2. Yes, and allow access to tmp/ and similar commands"
|
|
244
406
|
let hasAllowAll = false;
|
|
245
407
|
for (let i = proceedIdx + 1; i < Math.min(proceedIdx + 8, lines.length); i++) {
|
|
246
|
-
if (
|
|
408
|
+
if (CLAUDE_DURABLE_YES_OPTION_RE.test(lines[i])) { hasAllowAll = true; break; }
|
|
247
409
|
}
|
|
248
410
|
|
|
249
411
|
return {
|
|
412
|
+
// Preserve the legacy no-provider path. Provider-specific keystroke
|
|
413
|
+
// behavior should only activate when detection supplied a provider id.
|
|
414
|
+
providerId: providerId || '',
|
|
250
415
|
toolName: toolName || 'Unknown',
|
|
251
416
|
command: command.slice(0, 2000),
|
|
252
417
|
warning: warning || '',
|
|
@@ -255,6 +420,70 @@ function parseApprovalContext(cleanText, providerId) {
|
|
|
255
420
|
};
|
|
256
421
|
}
|
|
257
422
|
|
|
423
|
+
// Distinguish a GENUINELY LIVE approval prompt (the agent is blocked, waiting at an
|
|
424
|
+
// interactive selection at the bottom of the screen) from mere approval-shaped
|
|
425
|
+
// PROSE that happens to contain "Do you want to proceed?" / "1. Yes" — e.g. a
|
|
426
|
+
// coding agent, docs, or this very tool discussing approval prompts. parseApproval-
|
|
427
|
+
// Context matches the structure; this adds the "it's actually live" gate the auto-
|
|
428
|
+
// approver needs so it never fires on prompt-shaped output and injects stray
|
|
429
|
+
// keystrokes (the spurious "y" bug). A real TUI prompt renders a selection cursor on
|
|
430
|
+
// the active option and/or an interactive footer (Esc to cancel, ctrl+e to explain,
|
|
431
|
+
// ↑/↓ to select); prose has neither. We scan only the TAIL because the live prompt
|
|
432
|
+
// always renders at the bottom of the screen. Erring toward "not live" is the safe
|
|
433
|
+
// direction: a missed real prompt just falls back to manual approval, whereas a
|
|
434
|
+
// false positive auto-types into the session.
|
|
435
|
+
const LIVE_PROMPT_TAIL_LINES = 18;
|
|
436
|
+
// Fancy selection cursors only (NOT a bare ">", which appears in quoted prose like
|
|
437
|
+
// "> 2. Yes") immediately before an approval option. "→" is Cursor Agent's cursor
|
|
438
|
+
// (e.g. "→ Run (once) (y)").
|
|
439
|
+
const LIVE_SELECTION_CURSOR_RE = /^\s*[▸❯›▶➤◆→]\s*(?:\d+[.)]\s*)?(?:yes|allow|approve|proceed|accept|run\b|make edits|don'?t ask|always)/i;
|
|
440
|
+
// Interactive footers a real TUI renders while waiting at a prompt; very unlikely in prose.
|
|
441
|
+
// Cursor Agent's hotkey footer: "Run (once) (y)" / "Skip (esc or n)" / "Auto-run
|
|
442
|
+
// everything (shift+tab)" / "… to allowlist? (tab)".
|
|
443
|
+
// NOTE: "esc to interrupt" and the generic "(shift+)tab to cycle/toggle/switch"
|
|
444
|
+
// (plus "← for agents" / "↓ to manage") were intentionally REMOVED — those are the
|
|
445
|
+
// agent's WORKING / mode-cycle composer footers, NOT approval footers, and matching
|
|
446
|
+
// them made every idle/working session look like a live approval. They are now the
|
|
447
|
+
// decisive NEGATIVE gate below (COMPOSER_STATUS_FOOTER_RE).
|
|
448
|
+
const LIVE_PROMPT_FOOTER_RE = /\besc to (?:cancel|reject|go back)\b|\bctrl\+e to explain\b|\bpress enter to (?:confirm|continue|select|submit)\b|\benter to confirm\b|[↑↓]\s*(?:\/\s*[↑↓]\s*)?(?:to\s+)?(?:select|navigate|choose)\b|\bup\/down (?:arrows? )?to (?:select|navigate)\b|\(esc or n\)|Run \(once\) \(y\)|Auto-run everything[^\n]*\(shift\+tab\)|to allowlist\?\s*\(tab\)/i;
|
|
449
|
+
// Footers/hints that belong to the agent's READY COMPOSER or WORKING status. A
|
|
450
|
+
// live approval widget REPLACES the composer, so when one of these is on screen the
|
|
451
|
+
// agent is idle or generating, NOT blocked at an approval. Decisive negative gate:
|
|
452
|
+
// a session merely DISPLAYING approval-shaped text (this tool discussing prompts, a
|
|
453
|
+
// coding agent's output) still renders its composer at the bottom, so it can never
|
|
454
|
+
// be mistaken for a live prompt. None of these appear in a real Claude/Codex/Cursor
|
|
455
|
+
// approval widget.
|
|
456
|
+
const COMPOSER_STATUS_FOOTER_RE = /\besc to interrupt\b|\b(?:shift\+tab|tab) to (?:cycle|toggle|switch)\b|\bauto[- ]?mode on\b|\baccept edits on\b|\bplan mode on\b|\bbypass(?:ing)? permissions\b|\?\s*for shortcuts\b|(?:⏵⏵?|\*)\s*for agents\b|\bctrl\+t to (?:show|hide|toggle)\b/i;
|
|
457
|
+
// True when the agent's READY-COMPOSER or WORKING status footer is visible in the
|
|
458
|
+
// last few lines. A live approval/choice widget REPLACES the composer, so a
|
|
459
|
+
// composer footer at the bottom is decisive proof the agent is idle/generating at
|
|
460
|
+
// its prompt, NOT blocked at a live approval or selection menu. Shared by the
|
|
461
|
+
// auto-approver (isLiveApprovalPrompt) and the idle waiting-input detector
|
|
462
|
+
// (server.js _idlePromptDetections) so both surfaces agree on "composer present ⇒
|
|
463
|
+
// not a live prompt" instead of duplicating the heuristic.
|
|
464
|
+
function hasComposerStatusFooter(cleanText) {
|
|
465
|
+
const lines = String(cleanText || '').split('\n');
|
|
466
|
+
const tail = lines.slice(-LIVE_PROMPT_TAIL_LINES);
|
|
467
|
+
for (const raw of tail) {
|
|
468
|
+
if (COMPOSER_STATUS_FOOTER_RE.test(String(raw || ''))) return true;
|
|
469
|
+
}
|
|
470
|
+
return false;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
function isLiveApprovalPrompt(cleanText) {
|
|
474
|
+
const lines = String(cleanText || '').split('\n');
|
|
475
|
+
const tail = lines.slice(-LIVE_PROMPT_TAIL_LINES);
|
|
476
|
+
// Negative gate first: the agent's composer/working footer at the bottom means
|
|
477
|
+
// it is NOT waiting at an approval, regardless of approval-shaped text above.
|
|
478
|
+
if (hasComposerStatusFooter(cleanText)) return false;
|
|
479
|
+
for (const raw of tail) {
|
|
480
|
+
const line = String(raw || '');
|
|
481
|
+
if (LIVE_SELECTION_CURSOR_RE.test(line)) return true;
|
|
482
|
+
if (LIVE_PROMPT_FOOTER_RE.test(line)) return true;
|
|
483
|
+
}
|
|
484
|
+
return false;
|
|
485
|
+
}
|
|
486
|
+
|
|
258
487
|
// Normalize a command into a stable "signature" by extracting the command structure
|
|
259
488
|
// and replacing variable parts (paths, strings, numbers) with placeholders.
|
|
260
489
|
// Examples:
|
|
@@ -359,6 +588,328 @@ function findMatchingRule(context) {
|
|
|
359
588
|
return null;
|
|
360
589
|
}
|
|
361
590
|
|
|
591
|
+
function _shellTokens(segment) {
|
|
592
|
+
const matches = String(segment || '').match(/"[^"]*"|'[^']*'|[^\s]+/g) || [];
|
|
593
|
+
return matches.map(token => token.replace(/^(['"])([\s\S]*)\1$/, '$2'));
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
function _extractFindExecSegments(command) {
|
|
597
|
+
const text = String(command || '');
|
|
598
|
+
const segments = [];
|
|
599
|
+
let idx = 0;
|
|
600
|
+
while ((idx = text.indexOf('-exec', idx)) >= 0) {
|
|
601
|
+
const after = text.slice(idx + 5).trim();
|
|
602
|
+
const endMatch = after.match(/(?:\\;|\s;\s|\s\+(?:\s|$))/);
|
|
603
|
+
const end = endMatch ? endMatch.index : Math.min(after.length, 240);
|
|
604
|
+
const segment = after.slice(0, end).trim();
|
|
605
|
+
if (segment) segments.push(segment);
|
|
606
|
+
idx += 5;
|
|
607
|
+
}
|
|
608
|
+
return segments;
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
function _isReadOnlyFindExecSegment(segment) {
|
|
612
|
+
const tokens = _shellTokens(segment).filter(Boolean);
|
|
613
|
+
if (!tokens.length) return false;
|
|
614
|
+
const tool = tokens[0].split('/').pop().toLowerCase();
|
|
615
|
+
const unsafeTokens = /(^|[\s;&|])(?:sh|bash|zsh|fish|python|python3|node|ruby|perl|osascript|rm|mv|cp|chmod|chown|touch|mkdir|rmdir|truncate|tee|dd|curl|wget)\b/i;
|
|
616
|
+
if (unsafeTokens.test(segment)) return false;
|
|
617
|
+
if (/[<>]/.test(segment)) return false;
|
|
618
|
+
|
|
619
|
+
const readOnlyTools = new Set(['cat', 'head', 'tail', 'grep', 'rg', 'wc', 'stat', 'file', 'ls']);
|
|
620
|
+
if (readOnlyTools.has(tool)) return true;
|
|
621
|
+
|
|
622
|
+
if (tool === 'sed') {
|
|
623
|
+
if (tokens.some(t => /^-.*i/.test(t) || t === '--in-place')) return false;
|
|
624
|
+
if (!tokens.some(t => t === '-n' || /^-n[a-zA-Z]*$/.test(t))) return false;
|
|
625
|
+
const expressions = tokens.slice(1).filter(t =>
|
|
626
|
+
t !== '-n'
|
|
627
|
+
&& !/^-n[a-zA-Z]*$/.test(t)
|
|
628
|
+
&& t !== '{}'
|
|
629
|
+
&& !/^\{\}\+?$/.test(t)
|
|
630
|
+
&& !/^--/.test(t)
|
|
631
|
+
);
|
|
632
|
+
return expressions.length > 0
|
|
633
|
+
&& expressions.every(expr => /^(\d+|\$)?(,(\d+|\$))?[pP]$/.test(expr));
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
return false;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
function classifyFindExecPolicy(command) {
|
|
640
|
+
const cmd = String(command || '');
|
|
641
|
+
if (!/\bfind\b[\s\S]*\s-exec\s/.test(cmd)) return null;
|
|
642
|
+
const segments = _extractFindExecSegments(cmd);
|
|
643
|
+
if (!segments.length) {
|
|
644
|
+
return {
|
|
645
|
+
decision: 'escalate',
|
|
646
|
+
reasoning: 'find -exec could not be parsed safely (heuristic)',
|
|
647
|
+
riskLevel: 'high',
|
|
648
|
+
policyFinal: true,
|
|
649
|
+
ruleLabel: 'find -exec needs review',
|
|
650
|
+
ruleDescription: 'find -exec must expose a read-only nested command before one-shot approval',
|
|
651
|
+
};
|
|
652
|
+
}
|
|
653
|
+
if (segments.every(_isReadOnlyFindExecSegment)) {
|
|
654
|
+
return {
|
|
655
|
+
decision: 'approve',
|
|
656
|
+
reasoning: 'find -exec only runs read-only file inspection commands (heuristic)',
|
|
657
|
+
riskLevel: 'low',
|
|
658
|
+
policyFinal: true,
|
|
659
|
+
ruleLabel: 'find -exec read-only inspection',
|
|
660
|
+
rulePattern: String.raw`\bfind\b[\s\S]*\s-exec\s+(sed\s+-n|cat|head|tail|grep|rg|wc|stat|file|ls)\b`,
|
|
661
|
+
ruleDescription: 'One-shot approve find -exec only when every nested command is read-only file inspection',
|
|
662
|
+
};
|
|
663
|
+
}
|
|
664
|
+
return {
|
|
665
|
+
decision: 'escalate',
|
|
666
|
+
reasoning: 'find -exec can run arbitrary commands; nested command is not proven read-only (heuristic)',
|
|
667
|
+
riskLevel: 'high',
|
|
668
|
+
policyFinal: true,
|
|
669
|
+
ruleLabel: 'find -exec needs review',
|
|
670
|
+
ruleDescription: 'Do not auto-approve find -exec unless every nested command is read-only',
|
|
671
|
+
};
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
// Build a readable TITLE + grouping SIGNATURE for an escalation from its command
|
|
675
|
+
// context. Escalations were being stored as "Bash command" with an empty signature,
|
|
676
|
+
// so the Permission UI couldn't tell them apart or group them. We strip a leading
|
|
677
|
+
// run of `export VAR=… &&` / `VAR=…` env assignments (they carry credentials and
|
|
678
|
+
// aren't the operation being judged) so the title shows the ACTUAL command
|
|
679
|
+
// (`aws eks update-kubeconfig …`), and we normalize args to placeholders for a
|
|
680
|
+
// stable group key.
|
|
681
|
+
function _escalationCommandText(context) {
|
|
682
|
+
let cmd = String((context && context.command) || '').trim();
|
|
683
|
+
if (!cmd) {
|
|
684
|
+
const lines = String((context && context.fullContext) || '')
|
|
685
|
+
.split('\n').map((l) => l.trim()).filter(Boolean);
|
|
686
|
+
cmd = lines.find((l) => !/^(do you want|would you like|❯|›|>|\d+\.\s|esc to|ctrl\+e|bash command|warning|this command|⏺|●)/i.test(l)) || '';
|
|
687
|
+
}
|
|
688
|
+
return cmd;
|
|
689
|
+
}
|
|
690
|
+
// Leading FRAMING segments are not the operation being judged — strip a leading run
|
|
691
|
+
// of them so the title/signature show the ACTUAL command. Three kinds, separated by
|
|
692
|
+
// `&&` / `;` / newline:
|
|
693
|
+
// - env assignments (`export VAR=…` / `VAR=…`) — carry credentials, fragment groups.
|
|
694
|
+
// - `cd <path>` — navigation; the long worktree path otherwise eats the 120-char
|
|
695
|
+
// title budget AND collapses every `cd …` escalation into one group keyed "cd"
|
|
696
|
+
// with a dangerously broad `Bash(cd:*)` suggested rule.
|
|
697
|
+
// - pure `echo "…"` banners — section labels the agent prints, not actions.
|
|
698
|
+
// Guarded against stripping to empty (a bare `cd x` / `echo x` keeps itself).
|
|
699
|
+
const _FRAMING_PREFIX_RES = [
|
|
700
|
+
/^(?:export\s+)?[A-Za-z_][A-Za-z0-9_]*=(?:"[^"]*"|'[^']*'|[^\s&;|]+)\s*(?:&&|;|\n)+\s*/,
|
|
701
|
+
/^cd\s+(?:"[^"]*"|'[^']*'|[^\s&;|]+)\s*(?:&&|;|\n)+\s*/,
|
|
702
|
+
/^echo\s+(?:"[^"]*"|'[^']*'|[^&;|\n]+?)\s*(?:&&|;|\n)+\s*/,
|
|
703
|
+
];
|
|
704
|
+
function _stripFramingPrefix(cmd) {
|
|
705
|
+
let out = String(cmd || '').trim();
|
|
706
|
+
for (let i = 0; i < 16; i++) {
|
|
707
|
+
let matched = false;
|
|
708
|
+
for (const re of _FRAMING_PREFIX_RES) {
|
|
709
|
+
const m = out.match(re);
|
|
710
|
+
if (m) { out = out.slice(m[0].length).trim(); matched = true; break; }
|
|
711
|
+
}
|
|
712
|
+
if (!matched) break;
|
|
713
|
+
}
|
|
714
|
+
return out || String(cmd || '').trim();
|
|
715
|
+
}
|
|
716
|
+
function escalationCommandParts(context) {
|
|
717
|
+
const raw = _escalationCommandText(context);
|
|
718
|
+
const op = _stripFramingPrefix(raw);
|
|
719
|
+
const title = op.replace(/\s+/g, ' ').slice(0, 120).trim();
|
|
720
|
+
const signature = normalizeCommandSignature(context && context.toolName, op)
|
|
721
|
+
|| normalizeCommandSignature(context && context.toolName, raw);
|
|
722
|
+
return { title, signature };
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
// A rescue candidate is "actionable" only when we have a concrete command to show
|
|
726
|
+
// the operator AND the parser classified the tool. An empty command or an
|
|
727
|
+
// "Unknown" tool means the parse degraded (almost always approval-shaped PROSE,
|
|
728
|
+
// not a live prompt) — escalating it just yields a confusing, meaningless banner.
|
|
729
|
+
function _rescueCandidateActionable(context) {
|
|
730
|
+
if (!context) return false;
|
|
731
|
+
if (!escalationCommandParts(context).title) return false;
|
|
732
|
+
const tool = String(context.toolName || '').replace(/^[⏺●\s]+/, '').trim().toLowerCase();
|
|
733
|
+
if (!tool || tool === 'unknown') return false;
|
|
734
|
+
return true;
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
// The crisp, OBJECTIVE reason a command was sent to review instead of auto-approved
|
|
738
|
+
// — a category + sentence, NOT the AI verifier's vague free-text. First match wins
|
|
739
|
+
// (priority order). Shown in the session banner and the Pending group's "Why
|
|
740
|
+
// escalated" so the user can tell at a glance WHY (e.g. "Runs arbitrary code
|
|
741
|
+
// (node -e)") rather than reading a misleading `cd …` label.
|
|
742
|
+
function classifyBlockReason(context) {
|
|
743
|
+
const cmd = String((context && context.command) || '').trim();
|
|
744
|
+
const lc = cmd.toLowerCase();
|
|
745
|
+
if (!cmd) return { category: 'unrecognized', reason: 'Could not parse the command — review before allowing.' };
|
|
746
|
+
|
|
747
|
+
// 1) High-risk shell patterns (mirrors reviewWithHeuristics' highRisk list).
|
|
748
|
+
const highRisk = [
|
|
749
|
+
[/rm\s+-rf?\s+(?!\/tmp\/)[\/~]/, 'recursive delete outside /tmp (rm -rf)'],
|
|
750
|
+
[/--force\b|force.?push/, 'a force operation (--force / force-push)'],
|
|
751
|
+
[/\bsudo\s/, 'running as root (sudo)'],
|
|
752
|
+
[/\bchmod\s+777\b/, 'world-writable permissions (chmod 777)'],
|
|
753
|
+
[/(?:curl|wget)[\s\S]*\|\s*sh\b/, 'pipe-to-shell (curl … | sh)'],
|
|
754
|
+
[/\bdrop\s+table\b/, 'dropping a database table'],
|
|
755
|
+
[/>\s*\/(?:etc|usr|var)\//, 'writing to a system directory'],
|
|
756
|
+
[/\bmkfs\b|\bdd\s+if=/, 'a low-level disk write (dd / mkfs)'],
|
|
757
|
+
];
|
|
758
|
+
for (const [re, why] of highRisk) {
|
|
759
|
+
if (re.test(lc)) return { category: 'high-risk-pattern', reason: `Matches a high-risk pattern: ${why}.` };
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
// 2) Arbitrary code execution — can't be statically certified safe.
|
|
763
|
+
const arbitrary = [
|
|
764
|
+
[/\bnode\s+-e\b/, 'node -e'], [/\bnode\s+--eval\b/, 'node --eval'],
|
|
765
|
+
[/\bpython3?\s+-c\b/, 'python -c'], [/\b(?:ba)?sh\s+-c\b/, 'sh -c'],
|
|
766
|
+
[/\bperl\s+-e\b/, 'perl -e'], [/\bruby\s+-e\b/, 'ruby -e'], [/\beval\s/, 'eval'],
|
|
767
|
+
];
|
|
768
|
+
for (const [re, name] of arbitrary) {
|
|
769
|
+
if (re.test(lc)) return { category: 'arbitrary-code', reason: `Runs arbitrary code (\`${name}\`) — can't be statically certified safe; review to allow.` };
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
// 3) find -exec runs an arbitrary command per match.
|
|
773
|
+
if (/\bfind\b[\s\S]*-exec\b/.test(lc)) {
|
|
774
|
+
return { category: 'find-exec', reason: 'Uses `find -exec`, which runs an arbitrary command for each match.' };
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
// 4) Nothing matched a known-safe allowlist entry.
|
|
778
|
+
return { category: 'unrecognized', reason: 'Not on the safe-command allowlist — needs review.' };
|
|
779
|
+
}
|
|
780
|
+
|
|
781
|
+
// The lightweight context packet the goal-alignment judge reasons over. Built
|
|
782
|
+
// from WARM data only — no transcript blob read in the approval hot path (that
|
|
783
|
+
// would JSON.parse multi-MB on the main loop and freeze every session). The
|
|
784
|
+
// pieces:
|
|
785
|
+
// - goal: CTM's session title (its running summary of the task) — the cheapest
|
|
786
|
+
// stand-in for "what is this session trying to do?".
|
|
787
|
+
// - cwd: the session's working directory (scopes "local/dev" vs "elsewhere").
|
|
788
|
+
// - the agent's recent reasoning + the command itself are ALREADY on screen in
|
|
789
|
+
// context.fullContext (the viewport the verifier prompt includes) — that IS
|
|
790
|
+
// the recent transcript, for free.
|
|
791
|
+
// The judge uses goal + cwd + on-screen context to auto-approve goal-aligned
|
|
792
|
+
// actions (even irreversible local ones) and pause off-goal / real-world-
|
|
793
|
+
// destructive ones. The dangerous-command blocklist stays the hard floor above.
|
|
794
|
+
function _buildSessionContext(session) {
|
|
795
|
+
const goal = String((session && (session.title || session.label)) || '').replace(/\s+/g, ' ').trim().slice(0, 300);
|
|
796
|
+
const cwd = String((session && (session.cwd || (session.meta && session.meta.cwd))) || '').trim();
|
|
797
|
+
return { goal, cwd };
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
async function _verifyAutoApprovalOrBlock(sessionId, session, context, broadcastFn, label, source, riskLevel, callModel) {
|
|
801
|
+
// Verifier scope: medium+ risk only. Clearly low-risk/read-only approvals skip
|
|
802
|
+
// the LLM second opinion — keeps the fast path fast and usable offline.
|
|
803
|
+
if (riskLevel && riskLevel !== 'medium' && riskLevel !== 'high') return null;
|
|
804
|
+
// Attach the warm session-context packet (goal + cwd) so the built-in verifier
|
|
805
|
+
// can judge GOAL-ALIGNMENT, not just the command in isolation. Single point —
|
|
806
|
+
// covers both call sites (allow-by-default + approval-rescue). Never overwrite a
|
|
807
|
+
// packet the caller already supplied.
|
|
808
|
+
if (context && !context.sessionContext) context.sessionContext = _buildSessionContext(session);
|
|
809
|
+
const verifier = await verifyIfEnabled({ context, dbModule, callModel });
|
|
810
|
+
// Approve-by-default: ONLY a confident "unsafe" verdict blocks an auto-approval.
|
|
811
|
+
// A disabled verifier, a 'safe' verdict, or an 'unknown'/errored verdict (AI
|
|
812
|
+
// unavailable, timeout, bad response) all fall through to APPROVE. The approver
|
|
813
|
+
// escalates only when it is sure the command is high risk — never merely because
|
|
814
|
+
// the AI gate could not produce an answer. (The dangerous-command blocklist
|
|
815
|
+
// remains the deterministic hard gate above this.)
|
|
816
|
+
if (!verifier.enabled || verifier.verdict !== 'unsafe') return null;
|
|
817
|
+
|
|
818
|
+
// Prefer the OBJECTIVE block reason (the category — e.g. "Runs arbitrary code
|
|
819
|
+
// (node -e)") over the verifier's vague free-text, so the banner + Pending group
|
|
820
|
+
// tell the user WHY it was held. Fall back to the verifier's reason, then a default.
|
|
821
|
+
const blocked = classifyBlockReason(context);
|
|
822
|
+
const reason = blocked.reason || verifier.reason || 'Auto-approval verifier flagged this command as high risk.';
|
|
823
|
+
const parts = escalationCommandParts(context);
|
|
824
|
+
// The group key the Permission "Pending" tab will bucket this under — same fn the
|
|
825
|
+
// endpoint uses (lib/escalation-review.commandHead over the signature), so the
|
|
826
|
+
// banner's Review → can deep-link to the exact card.
|
|
827
|
+
const groupKey = (() => { try { return commandHead(parts.signature) || ''; } catch { return ''; } })();
|
|
828
|
+
const decision = {
|
|
829
|
+
sessionId,
|
|
830
|
+
toolName: context.toolName,
|
|
831
|
+
// Record the ACTUAL command (not "Bash command") + a stable grouping signature
|
|
832
|
+
// so the Permission "Needs Review" surface can group escalations by type.
|
|
833
|
+
commandSummary: parts.title || label || context.toolName,
|
|
834
|
+
commandSignature: parts.signature || '',
|
|
835
|
+
fullContext: String(context.fullContext || '').slice(0, 2000),
|
|
836
|
+
warning: context.warning || '',
|
|
837
|
+
decision: 'escalated',
|
|
838
|
+
reasoning: reason,
|
|
839
|
+
decidedBy: 'verifier',
|
|
840
|
+
riskLevel: 'high',
|
|
841
|
+
};
|
|
842
|
+
let decisionId;
|
|
843
|
+
try { decisionId = dbModule.addApprovalDecision?.(decision); } catch (e) { console.error('[approval-agent] verifier DB error:', e.message); }
|
|
844
|
+
try {
|
|
845
|
+
broadcastFn(sessionId, session, {
|
|
846
|
+
type: 'approval-decision',
|
|
847
|
+
sessionId,
|
|
848
|
+
decision: 'escalated',
|
|
849
|
+
decidedBy: 'verifier',
|
|
850
|
+
decisionId,
|
|
851
|
+
// Banner shows the actual command (matches the recorded commandSummary),
|
|
852
|
+
// not the heuristic rule label.
|
|
853
|
+
label: parts.title || label || context.toolName || 'Approval needs review',
|
|
854
|
+
reasoning: reason,
|
|
855
|
+
// The objective category + the Pending group key, so the client can show the
|
|
856
|
+
// right reason and the Review → button can deep-link to the matching group.
|
|
857
|
+
blockCategory: blocked.category || '',
|
|
858
|
+
groupKey,
|
|
859
|
+
riskLevel: decision.riskLevel,
|
|
860
|
+
verifierSource: source || '',
|
|
861
|
+
verifierVerdict: verifier.verdict,
|
|
862
|
+
command: String(context.command || '').slice(0, 500),
|
|
863
|
+
warning: context.warning || '',
|
|
864
|
+
});
|
|
865
|
+
} catch {}
|
|
866
|
+
return { blocked: true, verifier, reason };
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
// Split a shell command into its top-level clauses on the same separators Claude
|
|
870
|
+
// Code's own permission model recognizes (&& || ; | |& & and newlines), while
|
|
871
|
+
// respecting single/double quotes, backtick and $( )/$(( )) substitution nesting so
|
|
872
|
+
// an operator INSIDE a quote or substitution does not split. A compound command is
|
|
873
|
+
// only as safe as its riskiest clause, so risk is evaluated per-clause and MAX'd —
|
|
874
|
+
// otherwise `cat x; kill -9 PID` inherits `cat`'s low risk (a real auto-approver
|
|
875
|
+
// hole, and the same class as Cursor's `&&`-allowlist-bypass CVE).
|
|
876
|
+
function _splitShellClauses(cmd) {
|
|
877
|
+
const s = String(cmd || '');
|
|
878
|
+
const clauses = [];
|
|
879
|
+
let buf = '';
|
|
880
|
+
let sq = false, dq = false, bt = false, depth = 0;
|
|
881
|
+
for (let i = 0; i < s.length; i += 1) {
|
|
882
|
+
const ch = s[i], next = s[i + 1];
|
|
883
|
+
if (sq) { buf += ch; if (ch === "'") sq = false; continue; }
|
|
884
|
+
if (dq) { buf += ch; if (ch === '"' && s[i - 1] !== '\\') dq = false; continue; }
|
|
885
|
+
if (bt) { buf += ch; if (ch === '`' && s[i - 1] !== '\\') bt = false; continue; }
|
|
886
|
+
if (ch === "'") { sq = true; buf += ch; continue; }
|
|
887
|
+
if (ch === '"') { dq = true; buf += ch; continue; }
|
|
888
|
+
if (ch === '`') { bt = true; buf += ch; continue; }
|
|
889
|
+
if (ch === '$' && next === '(') { depth += 1; buf += '$('; i += 1; continue; } // $( and $((
|
|
890
|
+
if (ch === '(') { if (depth > 0) depth += 1; buf += ch; continue; }
|
|
891
|
+
if (ch === ')') { if (depth > 0) depth -= 1; buf += ch; continue; }
|
|
892
|
+
if (depth > 0) { buf += ch; continue; }
|
|
893
|
+
if (ch === '\n' || ch === ';') { clauses.push(buf); buf = ''; continue; }
|
|
894
|
+
if (ch === '&' || ch === '|') {
|
|
895
|
+
// && || |& all consume two chars; single & or | consume one.
|
|
896
|
+
if (next === ch || (ch === '|' && next === '&')) { i += 1; }
|
|
897
|
+
clauses.push(buf); buf = ''; continue;
|
|
898
|
+
}
|
|
899
|
+
buf += ch;
|
|
900
|
+
}
|
|
901
|
+
if (buf) clauses.push(buf);
|
|
902
|
+
return clauses.map((c) => c.trim()).filter(Boolean);
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
// #2: process-control clause — terminating processes (kill/pkill/killall, or a
|
|
906
|
+
// pipeline ending in `xargs kill`). Never blanket auto-approved as "low": it's a
|
|
907
|
+
// legitimate dev action (kill a dev server) but the user/AI must vouch for the
|
|
908
|
+
// target, so it routes to review (and, in the context-aware judge, goal-alignment).
|
|
909
|
+
function _isProcessControlClause(clause) {
|
|
910
|
+
return /\b(?:kill|pkill|killall)\b/.test(clause) || /\bxargs\b[\s\S]*\bkill\b/.test(clause);
|
|
911
|
+
}
|
|
912
|
+
|
|
362
913
|
// Simple heuristic review when no API key is available
|
|
363
914
|
function reviewWithHeuristics(context) {
|
|
364
915
|
const cmd = (context.command || '').toLowerCase();
|
|
@@ -376,6 +927,29 @@ function reviewWithHeuristics(context) {
|
|
|
376
927
|
ruleDescription: 'Read Wall-E MCP memory status' };
|
|
377
928
|
}
|
|
378
929
|
|
|
930
|
+
// MCP tool calls: auto-approve clearly READ-ONLY operations (navigate,
|
|
931
|
+
// snapshot, read, list, query, etc.). Mutating MCP ops (click, type, fill,
|
|
932
|
+
// run_code, evaluate, upload, write, delete, …) deliberately fall through to
|
|
933
|
+
// the AI reviewer/verifier (medium). Matches both the cleaned tool name
|
|
934
|
+
// (e.g. "plugin:playwright:playwright – navigate to a url") and the raw
|
|
935
|
+
// "mcp__…" / "(mcp)" command text.
|
|
936
|
+
const isMcp = /^(?:plugin:|mcp__|mcp\b)/i.test(tool) || /\(mcp\)/i.test(cmd);
|
|
937
|
+
if (isMcp) {
|
|
938
|
+
const readOnlyMcp = /\b(navigate(?:_back)?|snapshot|take_screenshot|screenshot|read|list|get|query|search|console_messages|network_requests?|wait_for|hover|tabs|resolve[-_ ]library[-_ ]id|get[-_ ]library[-_ ]docs|browser_snapshot|browser_navigate)\b/i;
|
|
939
|
+
const mutatingMcp = /\b(click|type|fill|drag|drop|file_upload|upload|run_code|evaluate|press_key|select_option|handle_dialog|write|create|delete|remove|update|send|post|install|deploy|exec)\b/i;
|
|
940
|
+
if (readOnlyMcp.test(tool) && !mutatingMcp.test(tool)) {
|
|
941
|
+
return { decision: 'approve', reasoning: 'Read-only MCP operation (heuristic)', riskLevel: 'low',
|
|
942
|
+
ruleLabel: context.toolName || 'MCP read-only operation',
|
|
943
|
+
rulePattern: '',
|
|
944
|
+
ruleDescription: 'Auto-approve read-only MCP operations (navigate, snapshot, read, list, query)' };
|
|
945
|
+
}
|
|
946
|
+
// Any other MCP op: do not blanket-approve — return medium so it routes to
|
|
947
|
+
// the AI reviewer/verifier (which can still approve or escalate).
|
|
948
|
+
return { decision: 'approve', reasoning: 'MCP operation — needs review', riskLevel: 'medium', fallback: true,
|
|
949
|
+
ruleLabel: context.toolName || 'MCP operation', rulePattern: '',
|
|
950
|
+
ruleDescription: 'Routed to AI reviewer/verifier for a decision' };
|
|
951
|
+
}
|
|
952
|
+
|
|
379
953
|
// Low-risk tools — auto-approve immediately (before high-risk content check,
|
|
380
954
|
// because Edit/Write diffs may contain code with "drop table" or "rm -rf" as
|
|
381
955
|
// string literals — those are code content, not dangerous operations).
|
|
@@ -391,7 +965,12 @@ function reviewWithHeuristics(context) {
|
|
|
391
965
|
}
|
|
392
966
|
}
|
|
393
967
|
|
|
394
|
-
|
|
968
|
+
const findExecPolicy = classifyFindExecPolicy(cmdUnwrapped || cmd);
|
|
969
|
+
if (findExecPolicy) return findExecPolicy;
|
|
970
|
+
|
|
971
|
+
// High-risk patterns — escalate. Checked against the WHOLE command (and the
|
|
972
|
+
// provider warning) because a danger substring (rm -rf /, curl|sh) is dangerous
|
|
973
|
+
// regardless of where clause boundaries fall.
|
|
395
974
|
const highRisk = [
|
|
396
975
|
/rm\s+-rf?\s+(?!\/tmp\/)[\/~]/, /force.?push/, /--force/, /drop\s+table/i,
|
|
397
976
|
/delete.*production/i, /sudo\s/, /chmod\s+777/, /curl.*\|\s*sh/,
|
|
@@ -404,22 +983,23 @@ function reviewWithHeuristics(context) {
|
|
|
404
983
|
}
|
|
405
984
|
}
|
|
406
985
|
|
|
407
|
-
//
|
|
986
|
+
// Local dev operations that are safe to auto-approve — matched PER CLAUSE.
|
|
408
987
|
const devSafe = [
|
|
409
988
|
{ re: /echo\s+.*>\s*\/tmp\//, label: 'Write to /tmp', desc: 'Echo output to temp files' },
|
|
410
|
-
{ re:
|
|
411
|
-
{ re:
|
|
412
|
-
{ re:
|
|
989
|
+
{ re: /^\s*cd\s/, label: 'Change directory', desc: 'Change the working directory' },
|
|
990
|
+
{ re: /^\s*sleep\s+[\d.]+\s*$/, label: 'Sleep', desc: 'Pause for a fixed duration' },
|
|
991
|
+
{ re: /\bcat\s/, label: 'Read file contents', desc: 'View file contents with cat' },
|
|
992
|
+
{ re: /\bls\b/, label: 'List directory', desc: 'List files and directories' },
|
|
993
|
+
{ re: /\bpwd\b/, label: 'Print working directory', desc: 'Show current directory path' },
|
|
413
994
|
{ re: /git\s+(status|log|diff|branch|show|stash\s+list|tag|remote)/, label: 'Git read operations', desc: 'Read-only git commands (status, log, diff, branch, show, tag, remote)' },
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
995
|
+
// NOTE: `node -e`, `python -c`, `cp`, `mv`, and `sqlite3` are intentionally
|
|
996
|
+
// NOT here — they can run arbitrary code or mutate/overwrite arbitrary files
|
|
997
|
+
// (incl. databases) and must go through the AI reviewer/verifier (medium),
|
|
998
|
+
// not blanket low-risk auto-approve.
|
|
417
999
|
{ re: /npm\s+(run|test|start)/, label: 'npm script', desc: 'Run npm scripts (run, test, start)' },
|
|
418
1000
|
{ re: /mkdir\s+-?p?\s/, label: 'Create directory', desc: 'Create directories with mkdir' },
|
|
419
1001
|
{ re: />\s*\/tmp\//, label: 'Write to /tmp', desc: 'Redirect output to temp files' },
|
|
420
1002
|
{ re: /touch\s/, label: 'Create empty file', desc: 'Create or update file timestamps' },
|
|
421
|
-
{ re: /cp\s/, label: 'Copy files', desc: 'Copy files or directories' },
|
|
422
|
-
{ re: /mv\s/, label: 'Move/rename files', desc: 'Move or rename files' },
|
|
423
1003
|
{ re: /\bcurl\s[\s\S]*?(https?:\/\/localhost|http:\/\/127\.0\.0\.1)/, label: 'Curl localhost', desc: 'HTTP requests to local dev servers' },
|
|
424
1004
|
{ re: /grep\s+-?[crn]/, label: 'Grep search', desc: 'Search file contents with grep' },
|
|
425
1005
|
{ re: /wc\s/, label: 'Word count', desc: 'Count lines/words/bytes' },
|
|
@@ -428,7 +1008,6 @@ function reviewWithHeuristics(context) {
|
|
|
428
1008
|
{ re: /echo\s[^|>]+$/, label: 'Echo output', desc: 'Print text to stdout (no redirect/pipe)' },
|
|
429
1009
|
{ re: /find\s.*-name/, label: 'Find files', desc: 'Search for files by name' },
|
|
430
1010
|
{ re: /sort\s|uniq\s/, label: 'Sort/unique', desc: 'Sort or deduplicate output' },
|
|
431
|
-
{ re: /\bsqlite3\s/, label: 'SQLite query', desc: 'Run SQLite3 database queries' },
|
|
432
1011
|
{ re: /\bjq\s/, label: 'JSON processing', desc: 'Process JSON with jq' },
|
|
433
1012
|
{ re: /\bsed\s+-?[ne]/, label: 'Sed filter', desc: 'Stream editing with sed (non-destructive)' },
|
|
434
1013
|
{ re: /\bawk\s/, label: 'Awk processing', desc: 'Text processing with awk' },
|
|
@@ -440,121 +1019,119 @@ function reviewWithHeuristics(context) {
|
|
|
440
1019
|
{ re: /\blsof\s/, label: 'List open files', desc: 'List open files and ports' },
|
|
441
1020
|
{ re: /\bps\s/, label: 'Process list', desc: 'List running processes' },
|
|
442
1021
|
];
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
1022
|
+
|
|
1023
|
+
// Per-clause MAX-risk: the command auto-approves as low ONLY if EVERY clause is
|
|
1024
|
+
// a known-safe dev operation. Any clause that terminates processes (#2) or is
|
|
1025
|
+
// unrecognized makes the whole command 'medium' → AI reviewer/verifier (which,
|
|
1026
|
+
// with session context, can still auto-approve a goal-aligned action).
|
|
1027
|
+
const clauses = _splitShellClauses(cmdUnwrapped || cmd);
|
|
1028
|
+
let firstSafe = null;
|
|
1029
|
+
let review = null;
|
|
1030
|
+
for (const clause of clauses) {
|
|
1031
|
+
// A bare assignment with no command substitution just sets a variable
|
|
1032
|
+
// (literal or arithmetic) — harmless. `VAR=$(cmd)` keeps the inner command,
|
|
1033
|
+
// so it falls through to be classified by that command below.
|
|
1034
|
+
if (/^\w+=/.test(clause) && !/\$\((?!\()/.test(clause) && !/`/.test(clause)) continue;
|
|
1035
|
+
if (_isProcessControlClause(clause)) {
|
|
1036
|
+
review = review || { label: 'Process control', desc: 'Terminates processes (kill/pkill) — review the target' };
|
|
1037
|
+
continue;
|
|
448
1038
|
}
|
|
1039
|
+
const safe = devSafe.find(({ re }) => re.test(clause));
|
|
1040
|
+
if (safe) { firstSafe = firstSafe || safe; continue; }
|
|
1041
|
+
review = review || { label: context.toolName || 'Bash command', desc: 'Routed to AI reviewer/verifier for a decision' };
|
|
1042
|
+
}
|
|
1043
|
+
if (review) {
|
|
1044
|
+
return { decision: 'approve', reasoning: 'Compound/unrecognized command — needs review', riskLevel: 'medium', fallback: true,
|
|
1045
|
+
ruleLabel: review.label, rulePattern: '', ruleDescription: review.desc };
|
|
1046
|
+
}
|
|
1047
|
+
if (firstSafe) {
|
|
1048
|
+
return { decision: 'approve', reasoning: 'Common dev operation (heuristic, all clauses safe)', riskLevel: 'low',
|
|
1049
|
+
ruleLabel: firstSafe.label, rulePattern: firstSafe.re.source, ruleDescription: firstSafe.desc };
|
|
449
1050
|
}
|
|
450
1051
|
|
|
451
|
-
// Default:
|
|
1052
|
+
// Default: medium risk — NOT auto-approved here. Routed to the AI reviewer +
|
|
1053
|
+
// verifier; if the AI gate is unavailable it escalates to the user (fail-safe).
|
|
452
1054
|
return { decision: 'approve', reasoning: 'Unrecognized command — needs review', riskLevel: 'medium', fallback: true,
|
|
453
1055
|
ruleLabel: context.toolName || 'Unknown', rulePattern: '',
|
|
454
|
-
ruleDescription: '
|
|
1056
|
+
ruleDescription: 'Routed to AI reviewer/verifier for a decision' };
|
|
455
1057
|
}
|
|
456
1058
|
|
|
457
|
-
//
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
}
|
|
472
|
-
}
|
|
473
|
-
} catch {}
|
|
1059
|
+
// Fail-safe verdict when the AI reviewer cannot produce an answer. We escalate
|
|
1060
|
+
// (ask the user) rather than fall back to heuristic approval — a missing/erroring
|
|
1061
|
+
// AI gate must never silently widen auto-approval (see docs/approval-ai-refinement.md).
|
|
1062
|
+
function _aiUnavailableEscalation(detail) {
|
|
1063
|
+
return {
|
|
1064
|
+
decision: 'escalate',
|
|
1065
|
+
riskLevel: 'medium',
|
|
1066
|
+
reasoning: `AI reviewer unavailable — escalating for safety${detail ? ` (${detail})` : ''}`,
|
|
1067
|
+
ruleLabel: '',
|
|
1068
|
+
rulePattern: '',
|
|
1069
|
+
ruleDescription: '',
|
|
1070
|
+
aiUnavailable: true,
|
|
1071
|
+
};
|
|
1072
|
+
}
|
|
474
1073
|
|
|
475
|
-
|
|
476
|
-
|
|
1074
|
+
// Review the command as a TL/Code Reviewer using the user's configured default
|
|
1075
|
+
// AI provider (via callBackgroundLlm — Anthropic/OpenAI/Gemini/Ollama/etc.).
|
|
1076
|
+
// No hardcoded provider or model. `options.callModel` is injectable for tests.
|
|
1077
|
+
async function reviewWithAI(context, learnedRules, options = {}) {
|
|
1078
|
+
const callModel = options.callModel || callBackgroundLlm;
|
|
1079
|
+
const rules = Array.isArray(learnedRules) ? learnedRules : [];
|
|
1080
|
+
const rulesContext = rules.length > 0
|
|
1081
|
+
? `\nPreviously approved patterns (the user has approved these before):\n${rules.map(r => `- ${r.label}: ${r.description || r.pattern}`).join('\n')}\n`
|
|
477
1082
|
: '';
|
|
478
1083
|
|
|
479
|
-
const
|
|
480
|
-
|
|
481
|
-
Your job: Review commands that Claude Code wants to execute and decide whether to AUTO-APPROVE (safe) or ESCALATE to the developer (risky).
|
|
1084
|
+
const system = `You are a senior TL/Code Reviewer acting as a gatekeeper for a developer's AI coding CLI sessions. Decide whether a requested command/tool call is safe to AUTO-APPROVE or should ESCALATE to the developer.
|
|
482
1085
|
|
|
483
1086
|
The developer's general approach:
|
|
484
|
-
-
|
|
485
|
-
-
|
|
486
|
-
-
|
|
487
|
-
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
1087
|
+
- Approve read-only operations, file reads, searches, and code edits within their project
|
|
1088
|
+
- Approve git read/commit operations and running their own dev scripts
|
|
1089
|
+
- Approve npm/pip install for known dependencies and local dev-server restarts
|
|
1090
|
+
- Be cautious about: force push, deleting production/shared data, modifying CI/CD, running unknown binaries, writing to system directories, destructive DB ops, exfiltrating data
|
|
1091
|
+
|
|
1092
|
+
Be pragmatic for a local dev environment, but ESCALATE anything that could cause irreversible damage or affect production/shared systems. Return ONLY valid JSON (no markdown fences).`;
|
|
1093
|
+
|
|
1094
|
+
const prompt = `${rulesContext}
|
|
492
1095
|
Current request being reviewed:
|
|
493
1096
|
Tool: ${context.toolName}
|
|
494
1097
|
Command/Content:
|
|
495
|
-
${context.command.slice(0, 1500)}
|
|
1098
|
+
${(context.command || '').slice(0, 1500)}
|
|
496
1099
|
|
|
497
1100
|
Safety Warning: ${context.warning || 'None'}
|
|
498
1101
|
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
Return ONLY valid JSON (no markdown fences):
|
|
1102
|
+
Return ONLY this JSON shape:
|
|
502
1103
|
{
|
|
503
|
-
"decision": "approve"
|
|
504
|
-
"riskLevel": "low"
|
|
1104
|
+
"decision": "approve" | "escalate",
|
|
1105
|
+
"riskLevel": "low" | "medium" | "high",
|
|
505
1106
|
"reasoning": "brief explanation (1-2 sentences)",
|
|
506
|
-
"ruleLabel": "short label for this type of operation
|
|
1107
|
+
"ruleLabel": "short label for this type of operation",
|
|
507
1108
|
"rulePattern": "regex pattern that would match similar future requests",
|
|
508
1109
|
"ruleDescription": "human-readable description of what this rule covers"
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
Be pragmatic. Most development operations in a local dev environment are safe. Only escalate things that could cause irreversible damage or affect production/shared systems.`;
|
|
1110
|
+
}`;
|
|
512
1111
|
|
|
1112
|
+
let response;
|
|
513
1113
|
try {
|
|
514
|
-
|
|
515
|
-
method: 'POST',
|
|
516
|
-
headers: {
|
|
517
|
-
'Content-Type': 'application/json',
|
|
518
|
-
'x-api-key': apiKey,
|
|
519
|
-
'anthropic-version': '2023-06-01',
|
|
520
|
-
...customHeaders,
|
|
521
|
-
},
|
|
522
|
-
body: JSON.stringify({
|
|
523
|
-
model: 'claude-sonnet-4-20250514',
|
|
524
|
-
max_tokens: 512,
|
|
525
|
-
messages: [{ role: 'user', content: prompt }],
|
|
526
|
-
}),
|
|
527
|
-
});
|
|
528
|
-
|
|
529
|
-
if (!res.ok) {
|
|
530
|
-
const text = await res.text();
|
|
531
|
-
console.error('[approval-agent] Claude API error:', res.status, text);
|
|
532
|
-
// Fall back to heuristic approval instead of escalating on API failure
|
|
533
|
-
return reviewWithHeuristics(context);
|
|
534
|
-
}
|
|
535
|
-
|
|
536
|
-
const data = await res.json();
|
|
537
|
-
const text = data.content?.[0]?.text || '';
|
|
538
|
-
const match = text.match(/\{[\s\S]*\}/);
|
|
539
|
-
if (!match) {
|
|
540
|
-
console.error('[approval-agent] Could not parse AI response, falling back to heuristics');
|
|
541
|
-
return reviewWithHeuristics(context);
|
|
542
|
-
}
|
|
543
|
-
|
|
544
|
-
const result = JSON.parse(match[0]);
|
|
545
|
-
return {
|
|
546
|
-
decision: result.decision || 'escalate',
|
|
547
|
-
riskLevel: result.riskLevel || 'medium',
|
|
548
|
-
reasoning: result.reasoning || '',
|
|
549
|
-
ruleLabel: result.ruleLabel || '',
|
|
550
|
-
rulePattern: result.rulePattern || '',
|
|
551
|
-
ruleDescription: result.ruleDescription || '',
|
|
552
|
-
};
|
|
1114
|
+
response = await callModel(prompt, { system, maxTokens: 512, temperature: 0.1 });
|
|
553
1115
|
} catch (e) {
|
|
554
|
-
console.error('[approval-agent]
|
|
555
|
-
|
|
556
|
-
return reviewWithHeuristics(context);
|
|
1116
|
+
console.error('[approval-agent] AI reviewer call failed:', e.message);
|
|
1117
|
+
return _aiUnavailableEscalation(e.reason || e.message);
|
|
557
1118
|
}
|
|
1119
|
+
|
|
1120
|
+
const text = (response && (response.text ?? response)) || '';
|
|
1121
|
+
const result = _extractJsonObject(typeof text === 'string' ? text : '');
|
|
1122
|
+
if (!result) {
|
|
1123
|
+
console.error('[approval-agent] Could not parse AI reviewer response — escalating for safety');
|
|
1124
|
+
return _aiUnavailableEscalation('unparseable response');
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
return {
|
|
1128
|
+
decision: result.decision === 'approve' ? 'approve' : 'escalate',
|
|
1129
|
+
riskLevel: result.riskLevel || 'medium',
|
|
1130
|
+
reasoning: result.reasoning || '',
|
|
1131
|
+
ruleLabel: result.ruleLabel || '',
|
|
1132
|
+
rulePattern: result.rulePattern || '',
|
|
1133
|
+
ruleDescription: result.ruleDescription || '',
|
|
1134
|
+
};
|
|
558
1135
|
}
|
|
559
1136
|
|
|
560
1137
|
// Duration (ms) to suppress WS output after sending an approval keystroke.
|
|
@@ -577,6 +1154,15 @@ const VERIFY_WINDOW_MS = 500;
|
|
|
577
1154
|
// bytes. An echo of "1" or "2" into the input box is just a few bytes.
|
|
578
1155
|
const VERIFY_TRANSITION_BYTES = 8;
|
|
579
1156
|
|
|
1157
|
+
// Approval rescue is intentionally bounded. It is not a replacement for the
|
|
1158
|
+
// provider parser/gate; it tries one missed prompt, verifies the outcome, and
|
|
1159
|
+
// suppresses repeated failures by exact fingerprint.
|
|
1160
|
+
const RESCUE_RETRY_COOLDOWN_MS = 10 * 60 * 1000;
|
|
1161
|
+
const RESCUE_FAILURE_COOLDOWN_MS = 60 * 60 * 1000;
|
|
1162
|
+
const RESCUE_WARN_COOLDOWN_MS = 30 * 60 * 1000;
|
|
1163
|
+
const RESCUE_MAX_CONSECUTIVE_FAILURES = 2;
|
|
1164
|
+
const RESCUE_DEFAULT_VERIFY_DELAY_MS = APPROVE_DELAY_MS + VERIFY_WINDOW_MS + APPROVAL_TRANSITION_SETTLE_MS + 300;
|
|
1165
|
+
|
|
580
1166
|
// Backspace character used to erase a stray keystroke that landed in the
|
|
581
1167
|
// input box (false-positive detection). Modern terminals interpret \x7f (DEL)
|
|
582
1168
|
// as backspace; \b (BS = 0x08) is unreliable on macOS PTYs.
|
|
@@ -603,9 +1189,9 @@ const BACKSPACE = '\x7f';
|
|
|
603
1189
|
//
|
|
604
1190
|
// Legacy path (no headlessWorker, e.g. unit tests): keep original
|
|
605
1191
|
// keystroke + ENTER_DELAY_MS Enter behavior so existing tests still pass.
|
|
606
|
-
function sendApprovalKeystroke(session, context, headlessWorker) {
|
|
1192
|
+
function sendApprovalKeystroke(session, context, headlessWorker, options = {}) {
|
|
607
1193
|
const provider = context.providerId ? getProvider(context.providerId) : null;
|
|
608
|
-
const keystroke = getApproveKeystroke(context);
|
|
1194
|
+
const keystroke = options.keystroke || getApproveKeystroke(context, options);
|
|
609
1195
|
const sid = session.id ? session.id.slice(0, 8) : '?';
|
|
610
1196
|
const decidedBy = context._decidedBy || 'unknown';
|
|
611
1197
|
const ruleLabel = context._ruleLabel || context.toolName || 'Unknown';
|
|
@@ -701,10 +1287,579 @@ function sendApprovalKeystroke(session, context, headlessWorker) {
|
|
|
701
1287
|
}, VERIFY_WINDOW_MS);
|
|
702
1288
|
}
|
|
703
1289
|
|
|
1290
|
+
function _hashRescue(value) {
|
|
1291
|
+
return crypto.createHash('sha256').update(String(value || '')).digest('hex').slice(0, 32);
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
function approvalRescueFingerprint(context, providerId, rawText, gateReason) {
|
|
1295
|
+
const normalizedProvider = providerId || context?.providerId || 'unknown';
|
|
1296
|
+
const signature = context
|
|
1297
|
+
? normalizeCommandSignature(context.toolName, context.command)
|
|
1298
|
+
: '';
|
|
1299
|
+
const body = signature || String(rawText || '').replace(/\s+/g, ' ').trim().slice(-1000);
|
|
1300
|
+
return _hashRescue(`${normalizedProvider}\n${gateReason || ''}\n${context?.toolName || ''}\n${body}`);
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
function _getRescuePattern(fingerprint) {
|
|
1304
|
+
try { return dbModule.getApprovalRescuePattern?.(fingerprint) || null; } catch { return null; }
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
function _saveRescuePattern(row) {
|
|
1308
|
+
try { return dbModule.saveApprovalRescuePattern?.(row) || row; } catch (e) {
|
|
1309
|
+
console.error('[approval-rescue] DB error:', e.message);
|
|
1310
|
+
return row;
|
|
1311
|
+
}
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
function _baseRescueRow(fingerprint, existing, meta = {}) {
|
|
1315
|
+
const now = Date.now();
|
|
1316
|
+
return {
|
|
1317
|
+
fingerprint,
|
|
1318
|
+
providerId: meta.providerId || existing?.provider_id || '',
|
|
1319
|
+
detectionSource: meta.source || existing?.detection_source || '',
|
|
1320
|
+
gateReason: meta.gateReason || existing?.gate_reason || '',
|
|
1321
|
+
status: existing?.status || 'candidate',
|
|
1322
|
+
attempts: Number(existing?.attempts || 0),
|
|
1323
|
+
successes: Number(existing?.successes || 0),
|
|
1324
|
+
failures: Number(existing?.failures || 0),
|
|
1325
|
+
consecutiveFailures: Number(existing?.consecutive_failures || 0),
|
|
1326
|
+
lastDecision: existing?.last_decision || '',
|
|
1327
|
+
lastOutcome: existing?.last_outcome || '',
|
|
1328
|
+
lastDiagnosis: existing?.last_diagnosis || '',
|
|
1329
|
+
ruleLabel: existing?.rule_label || '',
|
|
1330
|
+
ruleDescription: existing?.rule_description || '',
|
|
1331
|
+
approvalKey: existing?.approval_key || '',
|
|
1332
|
+
requiresEnter: existing?.requires_enter ?? 1,
|
|
1333
|
+
promotedRuleId: existing?.promoted_rule_id || null,
|
|
1334
|
+
cooldownUntilMs: Number(existing?.cooldown_until_ms || 0),
|
|
1335
|
+
lastWarningAtMs: Number(existing?.last_warning_at_ms || 0),
|
|
1336
|
+
lastSeenAtMs: now,
|
|
1337
|
+
lastAttemptAtMs: Number(existing?.last_attempt_at_ms || 0),
|
|
1338
|
+
};
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
function _extractJsonObject(text) {
|
|
1342
|
+
const raw = String(text || '').trim();
|
|
1343
|
+
const match = raw.match(/\{[\s\S]*\}/);
|
|
1344
|
+
if (!match) return null;
|
|
1345
|
+
try { return JSON.parse(match[0]); } catch { return null; }
|
|
1346
|
+
}
|
|
1347
|
+
|
|
1348
|
+
async function reviewApprovalRescueCandidate(context, meta = {}, options = {}) {
|
|
1349
|
+
const heuristic = reviewWithHeuristics(context);
|
|
1350
|
+
if (heuristic.policyFinal && heuristic.decision === 'approve' && heuristic.riskLevel === 'low') {
|
|
1351
|
+
return {
|
|
1352
|
+
safeToTry: true,
|
|
1353
|
+
decidedBy: 'heuristic-rescue',
|
|
1354
|
+
missType: 'structural_gate_miss',
|
|
1355
|
+
reasoning: heuristic.reasoning,
|
|
1356
|
+
ruleLabel: heuristic.ruleLabel || context.toolName || 'Approval',
|
|
1357
|
+
ruleDescription: heuristic.ruleDescription || '',
|
|
1358
|
+
fallbackHeuristic: true,
|
|
1359
|
+
};
|
|
1360
|
+
}
|
|
1361
|
+
if (heuristic.riskLevel === 'high' || heuristic.decision === 'escalate') {
|
|
1362
|
+
return {
|
|
1363
|
+
safeToTry: false,
|
|
1364
|
+
decidedBy: 'heuristic-rescue',
|
|
1365
|
+
missType: 'blocked_by_policy',
|
|
1366
|
+
shouldWarnUser: true,
|
|
1367
|
+
reasoning: heuristic.reasoning || 'High-risk operation detected.',
|
|
1368
|
+
ruleLabel: heuristic.ruleLabel || context.toolName || 'Approval',
|
|
1369
|
+
ruleDescription: heuristic.ruleDescription || '',
|
|
1370
|
+
fallbackHeuristic: true,
|
|
1371
|
+
};
|
|
1372
|
+
}
|
|
1373
|
+
|
|
1374
|
+
const callModel = options.callModel || callBackgroundLlm;
|
|
1375
|
+
if (options.disableAi !== true && typeof callModel === 'function') {
|
|
1376
|
+
const prompt = `You are CTM's approval-rescue monitor.
|
|
1377
|
+
|
|
1378
|
+
The deterministic approval pipeline saw approval-shaped terminal text but rejected it before policy could act.
|
|
1379
|
+
|
|
1380
|
+
Decide whether this is an ACTIVE approval prompt that should be auto-approved exactly once.
|
|
1381
|
+
|
|
1382
|
+
Rules:
|
|
1383
|
+
- Only approve if the prompt is active, current, and the operation is safe for a local coding session.
|
|
1384
|
+
- Never choose a durable allow-all option during rescue.
|
|
1385
|
+
- If the provider parser already detected the prompt but a structural gate rejected it, this is not a new provider pattern.
|
|
1386
|
+
- If the deterministic path missed this because an unknown/new provider has a new prompt shape, set missType to "new_provider_pattern".
|
|
1387
|
+
- If it looks like an existing provider parser/gate/race bug, set missType to "structural_gate_miss", "parser_bug", or "race".
|
|
1388
|
+
- If stale or uncertain, safeToTry must be false.
|
|
1389
|
+
|
|
1390
|
+
Provider: ${meta.providerId || context.providerId || 'unknown'}
|
|
1391
|
+
Gate reason: ${meta.gateReason || 'unknown'}
|
|
1392
|
+
Source: ${meta.source || 'unknown'}
|
|
1393
|
+
Provider parser detected prompt: ${meta.rawDetected ? 'yes' : 'no'}
|
|
1394
|
+
Generic hint detected prompt: ${meta.hintDetected ? 'yes' : 'no'}
|
|
1395
|
+
Tool: ${context.toolName}
|
|
1396
|
+
Command:
|
|
1397
|
+
${String(context.command || '').slice(0, 1500)}
|
|
1398
|
+
|
|
1399
|
+
Warning: ${context.warning || 'none'}
|
|
1400
|
+
Detected context:
|
|
1401
|
+
${String(context.fullContext || '').slice(0, 2000)}
|
|
1402
|
+
|
|
1403
|
+
Heuristic policy says: ${heuristic.decision || 'unknown'} / ${heuristic.riskLevel || 'unknown'} / ${heuristic.reasoning || ''}
|
|
1404
|
+
|
|
1405
|
+
Return only JSON:
|
|
1406
|
+
{
|
|
1407
|
+
"safeToTry": true or false,
|
|
1408
|
+
"missType": "new_provider_pattern" or "structural_gate_miss" or "parser_bug" or "race" or "stale_screen" or "blocked_by_policy" or "unknown",
|
|
1409
|
+
"reasoning": "one sentence",
|
|
1410
|
+
"ruleLabel": "short label",
|
|
1411
|
+
"ruleDescription": "short description",
|
|
1412
|
+
"approvalKey": "optional one-time key such as 1 or y",
|
|
1413
|
+
"shouldWarnUser": true or false
|
|
1414
|
+
}`;
|
|
1415
|
+
|
|
1416
|
+
try {
|
|
1417
|
+
const response = await callModel(prompt, {
|
|
1418
|
+
task: 'approval-rescue',
|
|
1419
|
+
modelTier: 'fast',
|
|
1420
|
+
maxTokens: 512,
|
|
1421
|
+
temperature: 0,
|
|
1422
|
+
thinking: 'disabled',
|
|
1423
|
+
reasoningEffort: 'low',
|
|
1424
|
+
timeoutMs: Number(options.modelTimeoutMs || 45000),
|
|
1425
|
+
});
|
|
1426
|
+
const parsed = _extractJsonObject(response?.text || response);
|
|
1427
|
+
if (parsed) {
|
|
1428
|
+
return {
|
|
1429
|
+
safeToTry: !!parsed.safeToTry,
|
|
1430
|
+
decidedBy: 'ai-rescue',
|
|
1431
|
+
missType: String(parsed.missType || 'unknown'),
|
|
1432
|
+
reasoning: String(parsed.reasoning || ''),
|
|
1433
|
+
ruleLabel: String(parsed.ruleLabel || context.toolName || 'Approval'),
|
|
1434
|
+
ruleDescription: String(parsed.ruleDescription || ''),
|
|
1435
|
+
approvalKey: String(parsed.approvalKey || ''),
|
|
1436
|
+
shouldWarnUser: !!parsed.shouldWarnUser,
|
|
1437
|
+
model: response?.model || '',
|
|
1438
|
+
};
|
|
1439
|
+
}
|
|
1440
|
+
} catch (e) {
|
|
1441
|
+
if (options.logModelErrors !== false) {
|
|
1442
|
+
console.warn('[approval-rescue] AI review unavailable, falling back to deterministic low-risk policy:', e.message);
|
|
1443
|
+
}
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
const explicitLowRisk = heuristic.decision === 'approve'
|
|
1448
|
+
&& heuristic.riskLevel === 'low'
|
|
1449
|
+
&& !heuristic.fallback;
|
|
1450
|
+
return {
|
|
1451
|
+
safeToTry: explicitLowRisk,
|
|
1452
|
+
decidedBy: 'heuristic-rescue',
|
|
1453
|
+
missType: explicitLowRisk ? 'structural_gate_miss' : 'unknown',
|
|
1454
|
+
reasoning: explicitLowRisk
|
|
1455
|
+
? heuristic.reasoning
|
|
1456
|
+
: 'No confident AI or deterministic low-risk approval decision was available.',
|
|
1457
|
+
ruleLabel: heuristic.ruleLabel || context.toolName || 'Approval',
|
|
1458
|
+
ruleDescription: heuristic.ruleDescription || '',
|
|
1459
|
+
shouldWarnUser: !explicitLowRisk,
|
|
1460
|
+
fallbackHeuristic: true,
|
|
1461
|
+
};
|
|
1462
|
+
}
|
|
1463
|
+
|
|
1464
|
+
function _diagnoseApprovalRescueMissType(review, context, meta = {}) {
|
|
1465
|
+
const pid = meta.providerId || context?.providerId || '';
|
|
1466
|
+
const knownProvider = !!(pid && getProvider(pid));
|
|
1467
|
+
const missType = String(review?.missType || 'unknown');
|
|
1468
|
+
const gateReason = String(meta.gateReason || meta.reason || '');
|
|
1469
|
+
const source = String(meta.source || '');
|
|
1470
|
+
const providerAlreadySawPrompt = !!meta.rawDetected || !!gateReason || /gate-miss/i.test(source);
|
|
1471
|
+
|
|
1472
|
+
// AI decides whether a one-shot rescue is safe. Promotion is architecture,
|
|
1473
|
+
// not vibes: if a known provider already detected the approval and only the
|
|
1474
|
+
// structural gate rejected it, the root cause is our parser/gate path.
|
|
1475
|
+
if (missType === 'new_provider_pattern' && knownProvider && providerAlreadySawPrompt) {
|
|
1476
|
+
return 'structural_gate_miss';
|
|
1477
|
+
}
|
|
1478
|
+
return missType;
|
|
1479
|
+
}
|
|
1480
|
+
|
|
1481
|
+
function _shouldPromoteApprovalRescuePattern(review, context, meta = {}) {
|
|
1482
|
+
const diagnosis = _diagnoseApprovalRescueMissType(review, context, meta);
|
|
1483
|
+
if (diagnosis !== 'new_provider_pattern') return false;
|
|
1484
|
+
const pid = meta.providerId || context?.providerId || '';
|
|
1485
|
+
const knownProvider = !!(pid && getProvider(pid));
|
|
1486
|
+
return !knownProvider;
|
|
1487
|
+
}
|
|
1488
|
+
|
|
1489
|
+
function _broadcastRescueWarning(sessionId, session, broadcastFn, context, review, row) {
|
|
1490
|
+
const now = Date.now();
|
|
1491
|
+
if (row.lastWarningAtMs && now - row.lastWarningAtMs < RESCUE_WARN_COOLDOWN_MS) return row;
|
|
1492
|
+
const updated = _saveRescuePattern({
|
|
1493
|
+
...row,
|
|
1494
|
+
lastWarningAtMs: now,
|
|
1495
|
+
status: row.status === 'promoted' ? 'promoted' : 'suppressed',
|
|
1496
|
+
});
|
|
1497
|
+
try {
|
|
1498
|
+
// The banner title must be the actual command needing review — NOT the AI
|
|
1499
|
+
// rescue-monitor's free-text ruleLabel (which describes its own verdict, e.g.
|
|
1500
|
+
// "approve-once-new-pattern", and is meaningless to the operator). The model's
|
|
1501
|
+
// label/reasoning is kept as the secondary "why" (verdict/reasoning).
|
|
1502
|
+
const title = escalationCommandParts(context).title;
|
|
1503
|
+
broadcastFn(sessionId, session, {
|
|
1504
|
+
type: 'approval-decision',
|
|
1505
|
+
sessionId,
|
|
1506
|
+
decision: 'escalated',
|
|
1507
|
+
decidedBy: 'rescue-monitor',
|
|
1508
|
+
label: title || context.toolName || 'Approval needs attention',
|
|
1509
|
+
verdict: review.ruleLabel || '',
|
|
1510
|
+
reasoning: review.reasoning || 'CTM detected a possible missed approval, but the rescue attempt was not safe or did not work.',
|
|
1511
|
+
riskLevel: 'medium',
|
|
1512
|
+
approvalRescue: true,
|
|
1513
|
+
command: String(context.command || '').slice(0, 500),
|
|
1514
|
+
warning: context.warning || '',
|
|
1515
|
+
});
|
|
1516
|
+
} catch {}
|
|
1517
|
+
return updated || row;
|
|
1518
|
+
}
|
|
1519
|
+
|
|
1520
|
+
async function _scheduleRescueAttempt(sessionId, session, context, headlessWorker, broadcastFn, decisionPayload, options = {}) {
|
|
1521
|
+
return new Promise(resolve => {
|
|
1522
|
+
let settled = false;
|
|
1523
|
+
const timeout = setTimeout(() => {
|
|
1524
|
+
if (settled) return;
|
|
1525
|
+
settled = true;
|
|
1526
|
+
resolve({ status: 'timeout', sent: false });
|
|
1527
|
+
}, Number(options.scheduleTimeoutMs || 2000));
|
|
1528
|
+
|
|
1529
|
+
_scheduleGuardedApproval(session, context, headlessWorker, broadcastFn, sessionId, decisionPayload, {
|
|
1530
|
+
keystrokeOptions: {
|
|
1531
|
+
preferAllowAll: false,
|
|
1532
|
+
...(options.keystroke ? { keystroke: options.keystroke } : {}),
|
|
1533
|
+
},
|
|
1534
|
+
onResult(result) {
|
|
1535
|
+
if (settled) return;
|
|
1536
|
+
settled = true;
|
|
1537
|
+
clearTimeout(timeout);
|
|
1538
|
+
resolve(result);
|
|
1539
|
+
},
|
|
1540
|
+
});
|
|
1541
|
+
});
|
|
1542
|
+
}
|
|
1543
|
+
|
|
1544
|
+
async function handleApprovalRescueCandidate(sessionId, session, cleanText, broadcastFn, providerId, headlessWorker, meta = {}, options = {}) {
|
|
1545
|
+
let enabled = true;
|
|
1546
|
+
try { enabled = dbModule.getSetting ? !!dbModule.getSetting('approval_rescue_enabled', true) : true; } catch {}
|
|
1547
|
+
if (!enabled) return { handled: false, reason: 'disabled' };
|
|
1548
|
+
|
|
1549
|
+
const rawText = String(cleanText || meta.rawText || '');
|
|
1550
|
+
if (!rawText) return { handled: false, reason: 'empty' };
|
|
1551
|
+
|
|
1552
|
+
const providerContext = _parseKnownProviderContext(rawText, providerId);
|
|
1553
|
+
let context = providerContext?.context || parseApprovalContext(rawText, providerId);
|
|
1554
|
+
if (!context && providerId) context = parseApprovalContext(rawText, null);
|
|
1555
|
+
if (!context) return { handled: false, reason: 'unparsed' };
|
|
1556
|
+
const effectiveProviderId = providerContext?.providerId
|
|
1557
|
+
|| (providerId && getProvider(providerId) ? providerId : '')
|
|
1558
|
+
|| (context.providerId && getProvider(context.providerId) ? context.providerId : '')
|
|
1559
|
+
|| context.providerId
|
|
1560
|
+
|| providerId
|
|
1561
|
+
|| 'generic';
|
|
1562
|
+
context.providerId = effectiveProviderId;
|
|
1563
|
+
|
|
1564
|
+
const fingerprint = approvalRescueFingerprint(context, effectiveProviderId, rawText, meta.gateReason);
|
|
1565
|
+
const existing = _getRescuePattern(fingerprint);
|
|
1566
|
+
let row = _baseRescueRow(fingerprint, existing, {
|
|
1567
|
+
providerId: effectiveProviderId,
|
|
1568
|
+
source: meta.source || 'gate-miss',
|
|
1569
|
+
gateReason: meta.gateReason || meta.reason || '',
|
|
1570
|
+
});
|
|
1571
|
+
row = _saveRescuePattern(row) || row;
|
|
1572
|
+
row = _baseRescueRow(fingerprint, row, {
|
|
1573
|
+
providerId: effectiveProviderId,
|
|
1574
|
+
source: meta.source || 'gate-miss',
|
|
1575
|
+
gateReason: meta.gateReason || meta.reason || '',
|
|
1576
|
+
});
|
|
1577
|
+
|
|
1578
|
+
const now = Date.now();
|
|
1579
|
+
if (row.cooldownUntilMs && row.cooldownUntilMs > now) {
|
|
1580
|
+
return { handled: false, reason: 'cooldown', fingerprint };
|
|
1581
|
+
}
|
|
1582
|
+
if (row.consecutiveFailures >= RESCUE_MAX_CONSECUTIVE_FAILURES && row.status !== 'promoted') {
|
|
1583
|
+
row.status = 'blocked';
|
|
1584
|
+
row.cooldownUntilMs = now + RESCUE_FAILURE_COOLDOWN_MS;
|
|
1585
|
+
row.lastOutcome = 'blocked-repeat-failures';
|
|
1586
|
+
_saveRescuePattern(row);
|
|
1587
|
+
return { handled: false, reason: 'blocked-repeat-failures', fingerprint };
|
|
1588
|
+
}
|
|
1589
|
+
|
|
1590
|
+
if (isBlocklistEnabled()) {
|
|
1591
|
+
const blockCheck = checkBlocklist(context.command || '', getBlocklistConfig());
|
|
1592
|
+
if (blockCheck.blocked) {
|
|
1593
|
+
row.status = 'suppressed';
|
|
1594
|
+
row.lastDecision = 'blocked';
|
|
1595
|
+
row.lastOutcome = 'blocklist';
|
|
1596
|
+
row.lastDiagnosis = blockCheck.category || 'blocked_by_policy';
|
|
1597
|
+
_saveRescuePattern(row);
|
|
1598
|
+
_broadcastRescueWarning(sessionId, session, broadcastFn, context, {
|
|
1599
|
+
ruleLabel: `Blocklist: ${blockCheck.reason}`,
|
|
1600
|
+
reasoning: `Dangerous-command blocklist matched (${blockCheck.category}): ${blockCheck.reason}`,
|
|
1601
|
+
}, row);
|
|
1602
|
+
return { handled: true, reason: 'blocklist', fingerprint, outcome: 'blocked' };
|
|
1603
|
+
}
|
|
1604
|
+
}
|
|
1605
|
+
|
|
1606
|
+
let review;
|
|
1607
|
+
if (row.status === 'promoted' && row.lastDecision === 'approve') {
|
|
1608
|
+
review = {
|
|
1609
|
+
safeToTry: true,
|
|
1610
|
+
decidedBy: 'rescue-rule',
|
|
1611
|
+
missType: row.lastDiagnosis || 'new_provider_pattern',
|
|
1612
|
+
reasoning: row.ruleDescription || 'Matched verified approval rescue pattern.',
|
|
1613
|
+
ruleLabel: row.ruleLabel || context.toolName || 'Approval',
|
|
1614
|
+
ruleDescription: row.ruleDescription || '',
|
|
1615
|
+
approvalKey: row.approvalKey || '',
|
|
1616
|
+
shouldWarnUser: false,
|
|
1617
|
+
};
|
|
1618
|
+
} else {
|
|
1619
|
+
review = await reviewApprovalRescueCandidate(context, {
|
|
1620
|
+
providerId: effectiveProviderId,
|
|
1621
|
+
source: meta.source || 'gate-miss',
|
|
1622
|
+
gateReason: meta.gateReason || meta.reason || '',
|
|
1623
|
+
rawDetected: !!meta.rawDetected,
|
|
1624
|
+
hintDetected: !!meta.hintDetected,
|
|
1625
|
+
}, options);
|
|
1626
|
+
}
|
|
1627
|
+
const diagnosis = _diagnoseApprovalRescueMissType(review, context, {
|
|
1628
|
+
providerId: effectiveProviderId,
|
|
1629
|
+
source: meta.source || 'gate-miss',
|
|
1630
|
+
gateReason: meta.gateReason || meta.reason || '',
|
|
1631
|
+
rawDetected: !!meta.rawDetected,
|
|
1632
|
+
hintDetected: !!meta.hintDetected,
|
|
1633
|
+
});
|
|
1634
|
+
review = { ...review, missType: diagnosis };
|
|
1635
|
+
|
|
1636
|
+
if (!review.safeToTry) {
|
|
1637
|
+
row.status = row.status === 'promoted' ? 'promoted' : 'suppressed';
|
|
1638
|
+
row.lastDecision = 'suppress';
|
|
1639
|
+
row.lastOutcome = 'not-safe';
|
|
1640
|
+
row.lastDiagnosis = diagnosis || 'unknown';
|
|
1641
|
+
row.cooldownUntilMs = now + RESCUE_RETRY_COOLDOWN_MS;
|
|
1642
|
+
row.ruleLabel = review.ruleLabel || row.ruleLabel;
|
|
1643
|
+
row.ruleDescription = review.ruleDescription || row.ruleDescription;
|
|
1644
|
+
row = _saveRescuePattern(row) || row;
|
|
1645
|
+
// Only pin a "review needed" banner when there is a concrete, classified
|
|
1646
|
+
// command to show the operator. A non-actionable candidate (no parsed command
|
|
1647
|
+
// or an unclassified "Unknown" tool) is almost always approval-shaped PROSE,
|
|
1648
|
+
// not a live prompt — escalating it produces a confusing, meaningless banner.
|
|
1649
|
+
// The refinement loop (handleMiss) still runs separately and learns the shape.
|
|
1650
|
+
if (review.shouldWarnUser && _rescueCandidateActionable(context)) {
|
|
1651
|
+
_broadcastRescueWarning(sessionId, session, broadcastFn, context, review, row);
|
|
1652
|
+
}
|
|
1653
|
+
return { handled: false, reason: 'not-safe', fingerprint, decidedBy: review.decidedBy, diagnosis };
|
|
1654
|
+
}
|
|
1655
|
+
|
|
1656
|
+
const verifierBlock = await _verifyAutoApprovalOrBlock(
|
|
1657
|
+
sessionId,
|
|
1658
|
+
session,
|
|
1659
|
+
context,
|
|
1660
|
+
broadcastFn,
|
|
1661
|
+
review.ruleLabel || context.toolName || 'Approval',
|
|
1662
|
+
'approval-rescue'
|
|
1663
|
+
);
|
|
1664
|
+
if (verifierBlock) {
|
|
1665
|
+
row.status = row.status === 'promoted' ? 'promoted' : 'suppressed';
|
|
1666
|
+
row.lastDecision = 'verifier-blocked';
|
|
1667
|
+
row.lastOutcome = verifierBlock.verifier?.verdict || 'verifier-blocked';
|
|
1668
|
+
row.lastDiagnosis = 'blocked_by_verifier';
|
|
1669
|
+
row.cooldownUntilMs = now + RESCUE_RETRY_COOLDOWN_MS;
|
|
1670
|
+
row.ruleLabel = review.ruleLabel || row.ruleLabel;
|
|
1671
|
+
row.ruleDescription = verifierBlock.reason || row.ruleDescription;
|
|
1672
|
+
_saveRescuePattern(row);
|
|
1673
|
+
return { handled: true, reason: 'verifier-blocked', fingerprint, decidedBy: 'verifier', diagnosis: 'blocked_by_verifier' };
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
const outputBytesAtAttempt = session._outputBytesCounter || 0;
|
|
1677
|
+
row.attempts += 1;
|
|
1678
|
+
row.lastAttemptAtMs = now;
|
|
1679
|
+
row.lastDecision = 'approve';
|
|
1680
|
+
row.lastOutcome = 'attempting';
|
|
1681
|
+
row.lastDiagnosis = diagnosis || 'unknown';
|
|
1682
|
+
row.ruleLabel = review.ruleLabel || row.ruleLabel || context.toolName || 'Approval';
|
|
1683
|
+
row.ruleDescription = review.ruleDescription || row.ruleDescription || '';
|
|
1684
|
+
row.approvalKey = review.approvalKey || row.approvalKey || '';
|
|
1685
|
+
row.cooldownUntilMs = now + RESCUE_RETRY_COOLDOWN_MS;
|
|
1686
|
+
row = _saveRescuePattern(row) || row;
|
|
1687
|
+
|
|
1688
|
+
// Record/surface the ACTUAL command (not the AI rescue-monitor's free-text
|
|
1689
|
+
// ruleLabel) so the decisions log + any banner read as the operation.
|
|
1690
|
+
const cmdTitle = escalationCommandParts(context).title;
|
|
1691
|
+
const decision = {
|
|
1692
|
+
sessionId,
|
|
1693
|
+
toolName: context.toolName,
|
|
1694
|
+
commandSummary: cmdTitle || context.toolName,
|
|
1695
|
+
fullContext: String(context.fullContext || '').slice(0, 2000),
|
|
1696
|
+
warning: context.warning || '',
|
|
1697
|
+
decision: 'approved',
|
|
1698
|
+
reasoning: review.reasoning || 'Approval rescue approved one missed active prompt.',
|
|
1699
|
+
decidedBy: review.decidedBy || 'ai-rescue',
|
|
1700
|
+
riskLevel: 'low',
|
|
1701
|
+
};
|
|
1702
|
+
try { dbModule.addApprovalDecision?.(decision); } catch (e) { console.error('[approval-rescue] decision DB error:', e.message); }
|
|
1703
|
+
|
|
1704
|
+
const sent = await _scheduleRescueAttempt(sessionId, session, context, headlessWorker, broadcastFn, {
|
|
1705
|
+
type: 'approval-decision',
|
|
1706
|
+
sessionId,
|
|
1707
|
+
decision: 'approved',
|
|
1708
|
+
decidedBy: review.decidedBy || 'ai-rescue',
|
|
1709
|
+
label: cmdTitle || context.toolName || 'Approval',
|
|
1710
|
+
reasoning: decision.reasoning,
|
|
1711
|
+
riskLevel: 'low',
|
|
1712
|
+
approvalRescue: true,
|
|
1713
|
+
}, {
|
|
1714
|
+
keystroke: review.approvalKey || row.approvalKey || '',
|
|
1715
|
+
scheduleTimeoutMs: options.scheduleTimeoutMs,
|
|
1716
|
+
});
|
|
1717
|
+
|
|
1718
|
+
if (!sent.sent) {
|
|
1719
|
+
row.lastOutcome = sent.status || 'skipped';
|
|
1720
|
+
row.consecutiveFailures = Math.max(row.consecutiveFailures, 0);
|
|
1721
|
+
_saveRescuePattern(row);
|
|
1722
|
+
return { handled: false, reason: row.lastOutcome, fingerprint, decidedBy: review.decidedBy, outcome: row.lastOutcome };
|
|
1723
|
+
}
|
|
1724
|
+
|
|
1725
|
+
const verifyDelayMs = Number(options.verifyDelayMs || RESCUE_DEFAULT_VERIFY_DELAY_MS);
|
|
1726
|
+
await new Promise(resolve => setTimeout(resolve, verifyDelayMs));
|
|
1727
|
+
const outputBytesNow = session._outputBytesCounter || 0;
|
|
1728
|
+
const outputAdvanced = outputBytesNow - outputBytesAtAttempt;
|
|
1729
|
+
let promptVisibility = null;
|
|
1730
|
+
try { promptVisibility = await _currentPromptVisibility(sessionId, context, headlessWorker); } catch {}
|
|
1731
|
+
const success = promptVisibility === false || outputAdvanced >= VERIFY_TRANSITION_BYTES;
|
|
1732
|
+
|
|
1733
|
+
if (success) {
|
|
1734
|
+
row.successes += 1;
|
|
1735
|
+
row.consecutiveFailures = 0;
|
|
1736
|
+
row.lastOutcome = promptVisibility === false ? 'prompt-cleared' : 'output-advanced';
|
|
1737
|
+
if (_shouldPromoteApprovalRescuePattern(review, context, {
|
|
1738
|
+
providerId: effectiveProviderId,
|
|
1739
|
+
source: meta.source || 'gate-miss',
|
|
1740
|
+
gateReason: meta.gateReason || meta.reason || '',
|
|
1741
|
+
rawDetected: !!meta.rawDetected,
|
|
1742
|
+
hintDetected: !!meta.hintDetected,
|
|
1743
|
+
})) {
|
|
1744
|
+
row.status = 'promoted';
|
|
1745
|
+
} else if (row.status !== 'promoted') {
|
|
1746
|
+
row.status = 'candidate';
|
|
1747
|
+
}
|
|
1748
|
+
_saveRescuePattern(row);
|
|
1749
|
+
return {
|
|
1750
|
+
handled: true,
|
|
1751
|
+
reason: 'approved',
|
|
1752
|
+
fingerprint,
|
|
1753
|
+
decidedBy: review.decidedBy,
|
|
1754
|
+
diagnosis,
|
|
1755
|
+
outcome: row.lastOutcome,
|
|
1756
|
+
promoted: row.status === 'promoted',
|
|
1757
|
+
};
|
|
1758
|
+
}
|
|
1759
|
+
|
|
1760
|
+
row.failures += 1;
|
|
1761
|
+
row.consecutiveFailures += 1;
|
|
1762
|
+
row.lastOutcome = 'verify-failed';
|
|
1763
|
+
row.cooldownUntilMs = now + (row.consecutiveFailures >= RESCUE_MAX_CONSECUTIVE_FAILURES
|
|
1764
|
+
? RESCUE_FAILURE_COOLDOWN_MS
|
|
1765
|
+
: RESCUE_RETRY_COOLDOWN_MS);
|
|
1766
|
+
if (row.consecutiveFailures >= RESCUE_MAX_CONSECUTIVE_FAILURES) row.status = 'blocked';
|
|
1767
|
+
row = _saveRescuePattern(row) || row;
|
|
1768
|
+
_broadcastRescueWarning(sessionId, session, broadcastFn, context, {
|
|
1769
|
+
...review,
|
|
1770
|
+
reasoning: `CTM tried to auto-approve a missed prompt, but the terminal did not advance (${outputAdvanced} bytes).`,
|
|
1771
|
+
}, row);
|
|
1772
|
+
return {
|
|
1773
|
+
handled: true,
|
|
1774
|
+
reason: 'verify-failed',
|
|
1775
|
+
fingerprint,
|
|
1776
|
+
decidedBy: review.decidedBy,
|
|
1777
|
+
diagnosis,
|
|
1778
|
+
outcome: 'verify-failed',
|
|
1779
|
+
};
|
|
1780
|
+
}
|
|
1781
|
+
|
|
704
1782
|
// Main entry point: check terminal buffer for approval prompts and handle them.
|
|
705
1783
|
// providerId is optional — when present, delegates parsing to the matching provider.
|
|
706
1784
|
// headlessWorker is optional — when present, enables Phase 3 post-keystroke verification.
|
|
707
|
-
|
|
1785
|
+
// Shared, side-effect-free auto-approval decision. This is the single source of
|
|
1786
|
+
// truth for "should this command auto-approve?", reused by BOTH the Claude/Codex
|
|
1787
|
+
// PTY path (handleApprovalCheck, below) and the Wall-E coding bridge
|
|
1788
|
+
// (/api/permissions/walle-check). Every provider runs the SAME cascade:
|
|
1789
|
+
// 1. dangerous-command blocklist (editable; hard floor)
|
|
1790
|
+
// 2. Permission Manager rules (explicit user allow/deny)
|
|
1791
|
+
// 3. learned approval rules / per-clause heuristic risk
|
|
1792
|
+
// 4. goal-aligned LLM verifier (medium+ risk; user-allowed commands skip it)
|
|
1793
|
+
// Returns { decision: 'allow'|'ask', decidedBy, riskLevel, reason, label, ... }
|
|
1794
|
+
// with NO broadcasts, keystrokes, or DB writes — callers own their side effects.
|
|
1795
|
+
// 'ask' means "escalate to a human" (the PTY path surfaces a card; Wall-E coding
|
|
1796
|
+
// raises a permission request). It never hard-denies, matching the PTY model
|
|
1797
|
+
// where the blocklist/verifier escalate rather than silently refuse.
|
|
1798
|
+
async function decideApproval(context, session, options = {}) {
|
|
1799
|
+
const callModel = options.callModel || null;
|
|
1800
|
+
const command = context.command || '';
|
|
1801
|
+
|
|
1802
|
+
// 1) Dangerous-command blocklist — runs first, never overridden by other signals.
|
|
1803
|
+
if (isBlocklistEnabled()) {
|
|
1804
|
+
const block = checkBlocklist(command, getBlocklistConfig());
|
|
1805
|
+
if (block.blocked) {
|
|
1806
|
+
return {
|
|
1807
|
+
decision: 'ask', decidedBy: 'blocklist', riskLevel: 'high',
|
|
1808
|
+
reason: `Dangerous-command blocklist matched (${block.category}): ${block.reason}`,
|
|
1809
|
+
label: `⚠️ Blocklist: ${block.reason}`,
|
|
1810
|
+
blocklistCategory: block.category, blocklistPatternId: block.patternId,
|
|
1811
|
+
};
|
|
1812
|
+
}
|
|
1813
|
+
}
|
|
1814
|
+
|
|
1815
|
+
// 2) Permission Manager rules (the user's explicit allow/deny).
|
|
1816
|
+
let permRules = [];
|
|
1817
|
+
try { permRules = typeof dbModule.listPermRules === 'function' ? dbModule.listPermRules({}) : []; } catch { permRules = []; }
|
|
1818
|
+
const permMatch = matchPermission({ toolName: context.toolName, command }, permRules);
|
|
1819
|
+
if (permMatch && permMatch.action === 'deny') {
|
|
1820
|
+
return {
|
|
1821
|
+
decision: 'ask', decidedBy: 'user-deny', riskLevel: 'high',
|
|
1822
|
+
reason: `Permission Manager deny rule matched: ${permMatch.rule}`, label: permMatch.rule,
|
|
1823
|
+
};
|
|
1824
|
+
}
|
|
1825
|
+
const userAllowed = !!(permMatch && permMatch.action === 'allow');
|
|
1826
|
+
|
|
1827
|
+
// 3) Learned rules / per-clause heuristic risk classification.
|
|
1828
|
+
const matchingRule = findMatchingRule(context);
|
|
1829
|
+
const heuristic = matchingRule ? null : reviewWithHeuristics(context);
|
|
1830
|
+
const riskLevel = matchingRule ? (matchingRule.risk_level || 'low') : (heuristic ? (heuristic.riskLevel || 'low') : 'low');
|
|
1831
|
+
const decidedBy = userAllowed ? 'user-allow' : (matchingRule ? 'rule' : 'auto');
|
|
1832
|
+
const label = userAllowed ? `Allowed: ${permMatch.rule}`
|
|
1833
|
+
: matchingRule ? matchingRule.label : ((heuristic && heuristic.ruleLabel) || context.toolName);
|
|
1834
|
+
const reason = userAllowed ? `Permission Manager allow rule matched: ${permMatch.rule}`
|
|
1835
|
+
: matchingRule ? `Matched learned rule: ${matchingRule.label}`
|
|
1836
|
+
: 'Auto-approved by default (not on the denylist)';
|
|
1837
|
+
|
|
1838
|
+
// 4) Goal-aligned verifier — medium+ risk only; user-allowed commands skip it
|
|
1839
|
+
// (the user has explicitly vouched). Only a confident "unsafe" verdict
|
|
1840
|
+
// escalates; a disabled/safe/unknown verdict falls through to allow.
|
|
1841
|
+
if (!userAllowed && (riskLevel === 'medium' || riskLevel === 'high')) {
|
|
1842
|
+
if (context && !context.sessionContext) context.sessionContext = _buildSessionContext(session);
|
|
1843
|
+
let verifier = { enabled: false, verdict: 'unknown' };
|
|
1844
|
+
try { verifier = await verifyIfEnabled({ context, dbModule, callModel }); } catch { verifier = { enabled: false, verdict: 'unknown' }; }
|
|
1845
|
+
if (verifier.enabled && verifier.verdict === 'unsafe') {
|
|
1846
|
+
const blocked = classifyBlockReason(context);
|
|
1847
|
+
return {
|
|
1848
|
+
decision: 'ask', decidedBy: 'verifier', riskLevel: 'high',
|
|
1849
|
+
reason: blocked.reason || verifier.reason || 'Auto-approval verifier flagged this command as high risk.',
|
|
1850
|
+
blockCategory: blocked.category || '', verifierVerdict: verifier.verdict, label,
|
|
1851
|
+
};
|
|
1852
|
+
}
|
|
1853
|
+
}
|
|
1854
|
+
|
|
1855
|
+
return {
|
|
1856
|
+
decision: 'allow', decidedBy, riskLevel, reason, label,
|
|
1857
|
+
ruleId: matchingRule ? matchingRule.id : null,
|
|
1858
|
+
};
|
|
1859
|
+
}
|
|
1860
|
+
|
|
1861
|
+
async function handleApprovalCheck(sessionId, session, cleanText, broadcastFn, providerId, headlessWorker, options = {}) {
|
|
1862
|
+
const callModel = options.callModel || null;
|
|
708
1863
|
const context = parseApprovalContext(cleanText, providerId);
|
|
709
1864
|
if (!context) return false;
|
|
710
1865
|
|
|
@@ -719,11 +1874,16 @@ async function handleApprovalCheck(sessionId, session, cleanText, broadcastFn, p
|
|
|
719
1874
|
}
|
|
720
1875
|
_lastApproval.set(sessionId, { fingerprint, ts: now });
|
|
721
1876
|
|
|
1877
|
+
// Normalized signature for this command — recorded on every decision so the
|
|
1878
|
+
// self-adapt loop can reliably promote an "approved-after-escalation" into a
|
|
1879
|
+
// learned rule keyed on this exact signature.
|
|
1880
|
+
const commandSignature = normalizeCommandSignature(context.toolName, context.command);
|
|
1881
|
+
|
|
722
1882
|
// Dangerous-command blocklist (defense-in-depth refusal gate).
|
|
723
1883
|
// Runs BEFORE learned rules / heuristics / AI — a blocklisted command is
|
|
724
1884
|
// never auto-approved regardless of what other signals say. Opt-in.
|
|
725
1885
|
if (isBlocklistEnabled()) {
|
|
726
|
-
const blockCheck = checkBlocklist(context.command || '');
|
|
1886
|
+
const blockCheck = checkBlocklist(context.command || '', getBlocklistConfig());
|
|
727
1887
|
if (blockCheck.blocked) {
|
|
728
1888
|
console.log(`[approval-agent] BLOCKLIST hit session=${sessionId} category=${blockCheck.category} reason="${blockCheck.reason}" cmd="${(context.command || '').slice(0, 200)}"`);
|
|
729
1889
|
const decision = {
|
|
@@ -736,6 +1896,7 @@ async function handleApprovalCheck(sessionId, session, cleanText, broadcastFn, p
|
|
|
736
1896
|
reasoning: `Dangerous-command blocklist matched (${blockCheck.category}): ${blockCheck.reason}`,
|
|
737
1897
|
decidedBy: 'blocklist',
|
|
738
1898
|
riskLevel: 'high',
|
|
1899
|
+
commandSignature,
|
|
739
1900
|
};
|
|
740
1901
|
let decisionId;
|
|
741
1902
|
try { decisionId = dbModule.addApprovalDecision(decision); } catch (e) { console.error('[approval-agent] DB error:', e.message); }
|
|
@@ -757,187 +1918,106 @@ async function handleApprovalCheck(sessionId, session, cleanText, broadcastFn, p
|
|
|
757
1918
|
}
|
|
758
1919
|
}
|
|
759
1920
|
|
|
760
|
-
//
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
reasoning: `Matched learned rule: ${matchingRule.label}`,
|
|
772
|
-
decidedBy: 'rule',
|
|
773
|
-
ruleId: matchingRule.id,
|
|
774
|
-
riskLevel: matchingRule.risk_level || 'low',
|
|
775
|
-
};
|
|
776
|
-
|
|
777
|
-
// Record and execute
|
|
1921
|
+
// ── Permission Manager rules (the user's explicit allow/deny) ─────────────
|
|
1922
|
+
// perm_rules are the user's "permissions tab" decisions (e.g. Bash(node:*)).
|
|
1923
|
+
// They normally only configure Claude Code's own settings.json; honoring them
|
|
1924
|
+
// here makes them authoritative across every provider (Codex included).
|
|
1925
|
+
// deny → escalate; allow (without always_ask) → auto-approve and skip the
|
|
1926
|
+
// verifier (the user has explicitly vouched for it).
|
|
1927
|
+
let permRules = [];
|
|
1928
|
+
try { permRules = typeof dbModule.listPermRules === 'function' ? dbModule.listPermRules({}) : []; } catch { permRules = []; }
|
|
1929
|
+
const permMatch = matchPermission({ toolName: context.toolName, command: context.command }, permRules);
|
|
1930
|
+
if (permMatch && permMatch.action === 'deny') {
|
|
1931
|
+
const reasoning = `Permission Manager deny rule matched: ${permMatch.rule}`;
|
|
778
1932
|
try {
|
|
779
|
-
dbModule.addApprovalDecision(
|
|
780
|
-
|
|
1933
|
+
dbModule.addApprovalDecision({
|
|
1934
|
+
sessionId, toolName: context.toolName, commandSummary: `Denied: ${permMatch.rule}`,
|
|
1935
|
+
fullContext: context.fullContext.slice(0, 2000), warning: context.warning,
|
|
1936
|
+
decision: 'escalated', reasoning, decidedBy: 'user-deny', riskLevel: 'high', commandSignature,
|
|
1937
|
+
});
|
|
781
1938
|
} catch (e) { console.error('[approval-agent] DB error:', e.message); }
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
_scheduleGuardedApproval(session, context, headlessWorker, broadcastFn, sessionId, {
|
|
787
|
-
type: 'approval-decision',
|
|
788
|
-
sessionId,
|
|
789
|
-
decision: 'approved',
|
|
790
|
-
decidedBy: 'rule',
|
|
791
|
-
label: matchingRule.label,
|
|
792
|
-
reasoning: decision.reasoning,
|
|
793
|
-
riskLevel: decision.riskLevel,
|
|
1939
|
+
broadcastFn(sessionId, session, {
|
|
1940
|
+
type: 'approval-decision', sessionId, decision: 'escalated', decidedBy: 'user-deny',
|
|
1941
|
+
label: permMatch.rule, reasoning, riskLevel: 'high',
|
|
1942
|
+
command: (context.command || '').slice(0, 500), warning: context.warning,
|
|
794
1943
|
});
|
|
795
|
-
|
|
796
1944
|
return true;
|
|
797
1945
|
}
|
|
1946
|
+
const userAllowed = !!(permMatch && permMatch.action === 'allow');
|
|
798
1947
|
|
|
799
|
-
//
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
//
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
if (heuristicSig && heuristic.rulePattern) {
|
|
820
|
-
try {
|
|
821
|
-
if (!dbModule.findApprovalRuleBySignature(heuristicSig)) {
|
|
822
|
-
dbModule.upsertApprovalRule({
|
|
823
|
-
pattern: heuristic.rulePattern,
|
|
824
|
-
label: heuristic.ruleLabel || context.toolName,
|
|
825
|
-
description: heuristic.ruleDescription || '',
|
|
826
|
-
category: context.toolName.toLowerCase().replace(/\s+/g, '-'),
|
|
827
|
-
riskLevel: 'low',
|
|
828
|
-
enabled: true,
|
|
829
|
-
commandSignature: heuristicSig,
|
|
830
|
-
});
|
|
831
|
-
}
|
|
832
|
-
} catch {}
|
|
833
|
-
}
|
|
834
|
-
|
|
835
|
-
_scheduleGuardedApproval(session, context, headlessWorker, broadcastFn, sessionId, {
|
|
836
|
-
type: 'approval-decision', sessionId, decision: 'approved', decidedBy: 'heuristic',
|
|
837
|
-
label: heuristic.ruleLabel || context.toolName, reasoning: heuristic.reasoning, riskLevel: 'low',
|
|
838
|
-
});
|
|
839
|
-
return true;
|
|
1948
|
+
// ── Allow-by-default ──────────────────────────────────────────────────────
|
|
1949
|
+
// Auto-approve everything not on the denylist. The blocklist above is the
|
|
1950
|
+
// denylist. For commands the user has NOT explicitly allowed, an LLM verifier
|
|
1951
|
+
// (on by default; ctm_settings.auto_approval_verifier_enabled) gives a second
|
|
1952
|
+
// opinion on medium+ risk and can escalate. User-allowed commands skip it.
|
|
1953
|
+
const matchingRule = findMatchingRule(context);
|
|
1954
|
+
const heuristic = matchingRule ? null : reviewWithHeuristics(context);
|
|
1955
|
+
const label = userAllowed ? `Allowed: ${permMatch.rule}`
|
|
1956
|
+
: matchingRule ? matchingRule.label : (heuristic.ruleLabel || context.toolName);
|
|
1957
|
+
const decidedBy = userAllowed ? 'user-allow' : (matchingRule ? 'rule' : 'auto');
|
|
1958
|
+
const riskLevel = matchingRule ? (matchingRule.risk_level || 'low') : (heuristic ? (heuristic.riskLevel || 'low') : 'low');
|
|
1959
|
+
const reasoning = userAllowed
|
|
1960
|
+
? `Permission Manager allow rule matched: ${permMatch.rule}`
|
|
1961
|
+
: matchingRule ? `Matched learned rule: ${matchingRule.label}`
|
|
1962
|
+
: 'Auto-approved by default (not on the denylist)';
|
|
1963
|
+
|
|
1964
|
+
if (!userAllowed) {
|
|
1965
|
+
// Verifier scope: medium+ risk only — read-only/low-risk ops auto-approve fast.
|
|
1966
|
+
const verifierBlock = await _verifyAutoApprovalOrBlock(sessionId, session, context, broadcastFn, label, decidedBy, riskLevel, callModel);
|
|
1967
|
+
if (verifierBlock) return true;
|
|
840
1968
|
}
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
1969
|
+
|
|
1970
|
+
try {
|
|
1971
|
+
dbModule.addApprovalDecision({
|
|
844
1972
|
sessionId,
|
|
845
1973
|
toolName: context.toolName,
|
|
846
|
-
commandSummary:
|
|
1974
|
+
commandSummary: label,
|
|
847
1975
|
fullContext: context.fullContext.slice(0, 2000),
|
|
848
1976
|
warning: context.warning,
|
|
849
|
-
decision: '
|
|
850
|
-
reasoning
|
|
851
|
-
decidedBy
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
broadcastFn(sessionId, session, {
|
|
856
|
-
type: 'approval-decision', sessionId, decision: 'escalated', decidedBy: 'heuristic',
|
|
857
|
-
label: context.toolName, reasoning: heuristic.reasoning, riskLevel: 'high',
|
|
1977
|
+
decision: 'approved',
|
|
1978
|
+
reasoning,
|
|
1979
|
+
decidedBy,
|
|
1980
|
+
ruleId: matchingRule ? matchingRule.id : null,
|
|
1981
|
+
riskLevel,
|
|
1982
|
+
commandSignature,
|
|
858
1983
|
});
|
|
859
|
-
|
|
860
|
-
}
|
|
861
|
-
|
|
862
|
-
// Medium risk — call AI for review
|
|
863
|
-
let learnedRules;
|
|
864
|
-
try { learnedRules = dbModule.listApprovalRules(); } catch { learnedRules = []; }
|
|
1984
|
+
if (matchingRule) dbModule.incrementApprovalRuleMatch(matchingRule.id);
|
|
1985
|
+
} catch (e) { console.error('[approval-agent] DB error:', e.message); }
|
|
865
1986
|
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
|
|
1987
|
+
// Send the one-time approval keystroke, guarded against stale prompts.
|
|
1988
|
+
_scheduleGuardedApproval(session, context, headlessWorker, broadcastFn, sessionId, {
|
|
1989
|
+
type: 'approval-decision',
|
|
869
1990
|
sessionId,
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
decidedBy: 'ai',
|
|
877
|
-
riskLevel: review.riskLevel,
|
|
878
|
-
};
|
|
879
|
-
|
|
880
|
-
// Record decision
|
|
881
|
-
let decisionId;
|
|
882
|
-
try { decisionId = dbModule.addApprovalDecision(decision); } catch (e) { console.error('[approval-agent] DB error:', e.message); }
|
|
883
|
-
|
|
884
|
-
if (review.decision === 'approve') {
|
|
885
|
-
// Auto-approve and learn a new rule with command signature for fast future matching
|
|
886
|
-
const signature = normalizeCommandSignature(context.toolName, context.command);
|
|
887
|
-
// Prefer AI-generated regex; fall back to escaped signature (signatures contain
|
|
888
|
-
// shell metacharacters like ||, (), * that are NOT valid regex patterns).
|
|
889
|
-
const aiPattern = review.rulePattern || '';
|
|
890
|
-
const rulePattern = (aiPattern && isSafeRegex(aiPattern)) ? aiPattern
|
|
891
|
-
: signature ? signature.replace(/[.*+?^${}()|[\]\\]/g, '\\$&') : '';
|
|
892
|
-
const ruleLabel = review.ruleLabel || context.toolName || 'Unknown';
|
|
893
|
-
if (rulePattern) {
|
|
894
|
-
try {
|
|
895
|
-
dbModule.upsertApprovalRule({
|
|
896
|
-
pattern: rulePattern,
|
|
897
|
-
label: ruleLabel,
|
|
898
|
-
description: review.ruleDescription || '',
|
|
899
|
-
category: context.toolName.toLowerCase().replace(/\s+/g, '-'),
|
|
900
|
-
riskLevel: review.riskLevel || 'low',
|
|
901
|
-
enabled: true,
|
|
902
|
-
commandSignature: signature,
|
|
903
|
-
});
|
|
904
|
-
console.log(`[approval-agent] Learned rule: "${ruleLabel}" sig="${signature}" pattern="${rulePattern}"`);
|
|
905
|
-
} catch (e) { console.error('[approval-agent] Rule save error:', e.message); }
|
|
906
|
-
}
|
|
907
|
-
|
|
908
|
-
_scheduleGuardedApproval(session, context, headlessWorker, broadcastFn, sessionId, {
|
|
909
|
-
type: 'approval-decision',
|
|
910
|
-
sessionId,
|
|
911
|
-
decision: 'approved',
|
|
912
|
-
decidedBy: 'ai',
|
|
913
|
-
label: review.ruleLabel || context.toolName,
|
|
914
|
-
reasoning: review.reasoning,
|
|
915
|
-
riskLevel: review.riskLevel,
|
|
916
|
-
});
|
|
917
|
-
} else {
|
|
918
|
-
// Escalate to user
|
|
919
|
-
broadcastFn(sessionId, session, {
|
|
920
|
-
type: 'approval-decision',
|
|
921
|
-
sessionId,
|
|
922
|
-
decision: 'escalated',
|
|
923
|
-
decidedBy: 'ai',
|
|
924
|
-
decisionId,
|
|
925
|
-
label: review.ruleLabel || context.toolName,
|
|
926
|
-
reasoning: review.reasoning,
|
|
927
|
-
riskLevel: review.riskLevel,
|
|
928
|
-
command: context.command.slice(0, 500),
|
|
929
|
-
warning: context.warning,
|
|
930
|
-
});
|
|
931
|
-
}
|
|
1991
|
+
decision: 'approved',
|
|
1992
|
+
decidedBy,
|
|
1993
|
+
label,
|
|
1994
|
+
reasoning,
|
|
1995
|
+
riskLevel,
|
|
1996
|
+
}, { keystrokeOptions: { preferAllowAll: false } });
|
|
932
1997
|
|
|
933
1998
|
return true;
|
|
934
1999
|
}
|
|
935
2000
|
|
|
936
2001
|
module.exports = {
|
|
937
2002
|
parseApprovalContext,
|
|
2003
|
+
isLiveApprovalPrompt,
|
|
2004
|
+
hasComposerStatusFooter,
|
|
2005
|
+
reviewWithHeuristics,
|
|
2006
|
+
_splitShellClauses,
|
|
2007
|
+
_isProcessControlClause,
|
|
2008
|
+
_buildSessionContext,
|
|
938
2009
|
normalizeCommandSignature,
|
|
2010
|
+
escalationCommandParts,
|
|
2011
|
+
classifyBlockReason,
|
|
2012
|
+
_rescueCandidateActionable,
|
|
939
2013
|
findMatchingRule,
|
|
2014
|
+
getApproveKeystroke,
|
|
2015
|
+
sendApprovalKeystroke,
|
|
940
2016
|
reviewWithAI,
|
|
2017
|
+
reviewApprovalRescueCandidate,
|
|
2018
|
+
approvalRescueFingerprint,
|
|
2019
|
+
handleApprovalRescueCandidate,
|
|
941
2020
|
handleApprovalCheck,
|
|
2021
|
+
decideApproval,
|
|
942
2022
|
clearSessionDedup(sessionId) { _lastApproval.delete(sessionId); },
|
|
943
2023
|
};
|