create-walle 0.9.21 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/package.json +2 -2
- package/template/CLAUDE.md +2 -2
- package/template/LICENSE +1 -1
- package/template/bin/ctm-dev-cleanup.js +24 -3
- package/template/bin/ctm-launch.sh +13 -0
- package/template/bin/dev.sh +156 -18
- package/template/bin/node-bin.sh +84 -0
- package/template/bin/pin-node.sh +51 -0
- package/template/claude-task-manager/api-prompts.js +1203 -182
- package/template/claude-task-manager/api-reviews.js +109 -15
- package/template/claude-task-manager/approval-agent.js +1360 -280
- package/template/claude-task-manager/bin/restart-ctm.sh +64 -23
- package/template/claude-task-manager/bin/storage-migration-supervisor.js +338 -0
- package/template/claude-task-manager/db.js +4417 -295
- package/template/claude-task-manager/docs/app-update-refresh-protocol.md +69 -0
- package/template/claude-task-manager/docs/approval-ai-refinement.md +138 -0
- package/template/claude-task-manager/docs/approval-rescue-loop.md +74 -0
- package/template/claude-task-manager/docs/codex-operational-warning-health.md +107 -0
- package/template/claude-task-manager/docs/codex-resume-state-guard-design.md +17 -12
- package/template/claude-task-manager/docs/codex-terminal-render-controller-handoff.md +311 -0
- package/template/claude-task-manager/docs/coding-agent-hooks-architecture.md +418 -0
- package/template/claude-task-manager/docs/conversation-import-freshness.md +20 -0
- package/template/claude-task-manager/docs/google-workspace-auth-health.md +77 -0
- package/template/claude-task-manager/docs/image-paste-ux.md +13 -0
- package/template/claude-task-manager/docs/ipad-web-preview.md +88 -0
- package/template/claude-task-manager/docs/main-loop-offload-architecture.md +66 -0
- package/template/claude-task-manager/docs/microsoft-dev-tunnel-phone-access-design.md +274 -519
- package/template/claude-task-manager/docs/mobile-live-streaming.md +27 -5
- package/template/claude-task-manager/docs/mobile-remote-submission-lifecycle.md +69 -0
- package/template/claude-task-manager/docs/phone-access-design.md +53 -15
- package/template/claude-task-manager/docs/phone-passkey-identity.md +122 -0
- package/template/claude-task-manager/docs/phone-setup.md +3 -0
- package/template/claude-task-manager/docs/prompt-editing-tree-design.md +25 -1
- package/template/claude-task-manager/docs/remote-desktop-access-design.md +268 -0
- package/template/claude-task-manager/docs/restart-lifecycle-architecture.md +95 -0
- package/template/claude-task-manager/docs/runtime-work-control-plane.md +53 -0
- package/template/claude-task-manager/docs/session-interactive-wait-surfaces.md +38 -0
- package/template/claude-task-manager/docs/session-needs-you-dismissal.md +84 -0
- package/template/claude-task-manager/docs/session-render-state-management-design.md +91 -3
- package/template/claude-task-manager/docs/session-standup-command-center-design.md +25 -1
- package/template/claude-task-manager/docs/session-title-authority.md +32 -0
- package/template/claude-task-manager/docs/session-workspace-binding.md +33 -0
- package/template/claude-task-manager/docs/skill-intent-resolution-design.md +72 -0
- package/template/claude-task-manager/docs/walle-mcp-supervisor-health.md +86 -0
- package/template/claude-task-manager/docs/walle-relay-phone-access-design.md +24 -15
- package/template/claude-task-manager/docs/walle-session-history-hydration.md +114 -0
- package/template/claude-task-manager/docs/walle-session-input-queue.md +104 -0
- package/template/claude-task-manager/docs/walle-session-model-catalog.md +90 -0
- package/template/claude-task-manager/docs/walle-session-model-preferences.md +15 -6
- package/template/claude-task-manager/git-utils.js +897 -27
- package/template/claude-task-manager/lib/agent-capabilities.js +33 -0
- package/template/claude-task-manager/lib/agent-cli-cache.js +37 -7
- package/template/claude-task-manager/lib/agent-hooks-installer.js +26 -2
- package/template/claude-task-manager/lib/agent-presets.js +17 -1
- package/template/claude-task-manager/lib/all-sessions-query.js +108 -0
- package/template/claude-task-manager/lib/approval-ai-refinement.js +488 -0
- package/template/claude-task-manager/lib/approval-self-adapt.js +168 -0
- package/template/claude-task-manager/lib/async-semaphore.js +44 -0
- package/template/claude-task-manager/lib/auth-context.js +5 -0
- package/template/claude-task-manager/lib/auth-rate-limit.js +47 -4
- package/template/claude-task-manager/lib/auth-rules.js +29 -2
- package/template/claude-task-manager/lib/auto-approval-verifier.js +129 -16
- package/template/claude-task-manager/lib/background-llm.js +144 -17
- package/template/claude-task-manager/lib/branch-inventory.js +212 -0
- package/template/claude-task-manager/lib/claude-desktop-sessions.js +15 -3
- package/template/claude-task-manager/lib/coalesce-sync-frames.js +151 -0
- package/template/claude-task-manager/lib/codex-launch-health.js +762 -0
- package/template/claude-task-manager/lib/codex-transcript-pager.js +51 -0
- package/template/claude-task-manager/lib/codex-zst.js +124 -0
- package/template/claude-task-manager/lib/coding-agent-models.js +233 -30
- package/template/claude-task-manager/lib/connection-health.js +232 -0
- package/template/claude-task-manager/lib/conversation-blob-parser.js +42 -0
- package/template/claude-task-manager/lib/conversation-tail-merge.js +89 -26
- package/template/claude-task-manager/lib/ctm-session-context-api.js +39 -10
- package/template/claude-task-manager/lib/cursor-conversation-store.js +354 -0
- package/template/claude-task-manager/lib/db-owner-worker-client.js +315 -0
- package/template/claude-task-manager/lib/document-review.js +141 -6
- package/template/claude-task-manager/lib/escalation-review.js +152 -0
- package/template/claude-task-manager/lib/graceful-shutdown.js +159 -0
- package/template/claude-task-manager/lib/headless-term-service.js +678 -0
- package/template/claude-task-manager/lib/heavy-worker-fallback.js +38 -0
- package/template/claude-task-manager/lib/jsonl-conversation-parser.js +542 -0
- package/template/claude-task-manager/lib/jsonl-range-reader.js +112 -0
- package/template/claude-task-manager/lib/main-db-census.js +216 -0
- package/template/claude-task-manager/lib/message-pagination.js +106 -4
- package/template/claude-task-manager/lib/microsoft-dev-tunnel-setup.js +750 -26
- package/template/claude-task-manager/lib/mobile-auth-api.js +274 -7
- package/template/claude-task-manager/lib/mobile-auth-store.js +592 -10
- package/template/claude-task-manager/lib/mobile-notification-dispatcher.js +15 -0
- package/template/claude-task-manager/lib/model-overview-brain-fallback.js +311 -0
- package/template/claude-task-manager/lib/model-overview-cache.js +141 -0
- package/template/claude-task-manager/lib/models-health-routing-notice.js +126 -0
- package/template/claude-task-manager/lib/node-pin-guard.js +93 -0
- package/template/claude-task-manager/lib/perf-tracker.js +242 -6
- package/template/claude-task-manager/lib/permission-match.js +76 -0
- package/template/claude-task-manager/lib/permission-sync.js +133 -20
- package/template/claude-task-manager/lib/process-title.js +35 -0
- package/template/claude-task-manager/lib/prompt-executions-query.js +25 -0
- package/template/claude-task-manager/lib/prompt-index-disk-cache.js +44 -0
- package/template/claude-task-manager/lib/prompt-intent.js +132 -0
- package/template/claude-task-manager/lib/provider-user-context.js +34 -0
- package/template/claude-task-manager/lib/read-pool-client.js +313 -0
- package/template/claude-task-manager/lib/readpool-breaker.js +31 -0
- package/template/claude-task-manager/lib/recent-sessions-breaker.js +12 -0
- package/template/claude-task-manager/lib/remote-feedback-client.js +72 -0
- package/template/claude-task-manager/lib/remote-relay-protocol.js +37 -4
- package/template/claude-task-manager/lib/remote-relay-store.js +159 -0
- package/template/claude-task-manager/lib/remote-submission-observer.js +278 -0
- package/template/claude-task-manager/lib/restart-guard.js +109 -0
- package/template/claude-task-manager/lib/restore-interruption-detector.js +439 -0
- package/template/claude-task-manager/lib/restore-policy.js +13 -0
- package/template/claude-task-manager/lib/restore-resume-batch.js +74 -0
- package/template/claude-task-manager/lib/restore-runtime.js +68 -0
- package/template/claude-task-manager/lib/restore-storm.js +34 -0
- package/template/claude-task-manager/lib/resume-cwd.js +36 -0
- package/template/claude-task-manager/lib/resume-preflight.js +313 -0
- package/template/claude-task-manager/lib/runtime-work-registry.js +444 -0
- package/template/claude-task-manager/lib/sanitize-openai-auth.js +31 -0
- package/template/claude-task-manager/lib/scheduler.js +21 -1
- package/template/claude-task-manager/lib/scrollback-snapshot-store.js +159 -0
- package/template/claude-task-manager/lib/serial-task-queue.js +64 -0
- package/template/claude-task-manager/lib/server-listeners.js +239 -0
- package/template/claude-task-manager/lib/session-capture.js +42 -7
- package/template/claude-task-manager/lib/session-content-backfill.js +131 -0
- package/template/claude-task-manager/lib/session-history.js +388 -43
- package/template/claude-task-manager/lib/session-host-manager.js +287 -0
- package/template/claude-task-manager/lib/session-image-refs.js +209 -0
- package/template/claude-task-manager/lib/session-jobs.js +399 -59
- package/template/claude-task-manager/lib/session-prompt-index.js +137 -0
- package/template/claude-task-manager/lib/session-restore.js +53 -0
- package/template/claude-task-manager/lib/session-standup.js +123 -23
- package/template/claude-task-manager/lib/session-state-bus.js +14 -0
- package/template/claude-task-manager/lib/session-stream.js +64 -16
- package/template/claude-task-manager/lib/session-timeline-summary.js +260 -0
- package/template/claude-task-manager/lib/session-token-usage.js +494 -0
- package/template/claude-task-manager/lib/session-workspace-binding.js +356 -0
- package/template/claude-task-manager/lib/setup-network-config.js +9 -0
- package/template/claude-task-manager/lib/size-cap.js +45 -0
- package/template/claude-task-manager/lib/size-cap.test.js +62 -0
- package/template/claude-task-manager/lib/skill-autocomplete.js +180 -1
- package/template/claude-task-manager/lib/skill-intent-resolver.js +304 -0
- package/template/claude-task-manager/lib/sqlite-driver.js +19 -3
- package/template/claude-task-manager/lib/standup-attention.js +7 -3
- package/template/claude-task-manager/lib/status-authority.js +39 -0
- package/template/claude-task-manager/lib/status-hooks.js +4 -0
- package/template/claude-task-manager/lib/storage-migration.js +235 -0
- package/template/claude-task-manager/lib/structured-capture.js +298 -0
- package/template/claude-task-manager/lib/sync-io-census.js +163 -0
- package/template/claude-task-manager/lib/tailscale-setup.js +6 -0
- package/template/claude-task-manager/lib/terminal-activity-evidence.js +33 -0
- package/template/claude-task-manager/lib/terminal-choice.js +364 -0
- package/template/claude-task-manager/lib/terminal-control-sanitize.js +17 -0
- package/template/claude-task-manager/lib/terminal-fingerprint.js +48 -0
- package/template/claude-task-manager/lib/terminal-output-flush.js +84 -0
- package/template/claude-task-manager/lib/timeline-order.js +122 -0
- package/template/claude-task-manager/lib/transcript-store.js +348 -43
- package/template/claude-task-manager/lib/transport-security.js +84 -1
- package/template/claude-task-manager/lib/wait-state.js +184 -0
- package/template/claude-task-manager/lib/walle-client.js +47 -5
- package/template/claude-task-manager/lib/walle-ctm-history.js +564 -4
- package/template/claude-task-manager/lib/walle-external-actions.js +135 -16
- package/template/claude-task-manager/lib/walle-history-hydration.js +46 -0
- package/template/claude-task-manager/lib/walle-native-health.js +403 -0
- package/template/claude-task-manager/lib/walle-repair.js +701 -0
- package/template/claude-task-manager/lib/walle-session-cache.js +109 -0
- package/template/claude-task-manager/lib/walle-session-context.js +57 -21
- package/template/claude-task-manager/lib/walle-session-model-catalog.js +34 -0
- package/template/claude-task-manager/lib/walle-supervisor.js +539 -63
- package/template/claude-task-manager/lib/walle-transcript.js +52 -0
- package/template/claude-task-manager/lib/worktree-active-sync.js +11 -7
- package/template/claude-task-manager/lib/worktree-cwd.js +32 -1
- package/template/claude-task-manager/package.json +1 -1
- package/template/claude-task-manager/prompt-harvest.js +89 -66
- package/template/claude-task-manager/providers/claude-code.js +51 -3
- package/template/claude-task-manager/providers/cursor.js +140 -45
- package/template/claude-task-manager/public/css/reviews.css +551 -61
- package/template/claude-task-manager/public/css/setup.css +191 -0
- package/template/claude-task-manager/public/css/walle-session.css +865 -10
- package/template/claude-task-manager/public/css/walle.css +154 -0
- package/template/claude-task-manager/public/designs/ai-providers-consolidation-v2.html +830 -0
- package/template/claude-task-manager/public/index.html +18516 -2058
- package/template/claude-task-manager/public/ipad.html +363 -0
- package/template/claude-task-manager/public/js/document-review-links.js +301 -0
- package/template/claude-task-manager/public/js/image-normalize.js +69 -36
- package/template/claude-task-manager/public/js/message-renderer.js +1265 -77
- package/template/claude-task-manager/public/js/prompts.js +66 -29
- package/template/claude-task-manager/public/js/reviews.js +901 -133
- package/template/claude-task-manager/public/js/session-activity-utils.js +11 -1
- package/template/claude-task-manager/public/js/session-search-utils.js +94 -10
- package/template/claude-task-manager/public/js/session-status-precedence.js +23 -5
- package/template/claude-task-manager/public/js/setup.js +1273 -176
- package/template/claude-task-manager/public/js/stream-view.js +691 -73
- package/template/claude-task-manager/public/js/terminal-reconciler.js +210 -0
- package/template/claude-task-manager/public/js/walle-session.js +2455 -158
- package/template/claude-task-manager/public/js/walle.js +455 -28
- package/template/claude-task-manager/public/m/app.css +2909 -262
- package/template/claude-task-manager/public/m/app.js +6601 -398
- package/template/claude-task-manager/public/m/claim.html +224 -17
- package/template/claude-task-manager/public/m/index.html +117 -21
- package/template/claude-task-manager/public/m/sw.js +3 -1
- package/template/claude-task-manager/public/manifest.json +2 -2
- package/template/claude-task-manager/public/prompts.html +30 -14
- package/template/claude-task-manager/queue-engine.js +507 -28
- package/template/claude-task-manager/scripts/repair-claude-session-images.js +27 -8
- package/template/claude-task-manager/server.js +14341 -2197
- package/template/claude-task-manager/session-integrity.js +160 -18
- package/template/claude-task-manager/session-search-ranking.js +1 -0
- package/template/claude-task-manager/session-utils.js +25 -5
- package/template/claude-task-manager/workers/approval-blocklist.js +96 -6
- package/template/claude-task-manager/workers/approval-widget-validator.js +14 -8
- package/template/claude-task-manager/workers/conversation-import-worker.js +11 -50
- package/template/claude-task-manager/workers/db-owner-worker.js +386 -0
- package/template/claude-task-manager/workers/harvest-worker.js +9 -55
- package/template/claude-task-manager/workers/headless-term-worker.js +9 -530
- package/template/claude-task-manager/workers/read-pool-worker.js +387 -0
- package/template/claude-task-manager/workers/scrollback-worker.js +11 -72
- package/template/claude-task-manager/workers/session-host-process.js +146 -0
- package/template/claude-task-manager/workers/session-integrity-worker.js +10 -54
- package/template/claude-task-manager/workers/state-detectors/base.js +18 -1
- package/template/claude-task-manager/workers/state-detectors/claude-code.js +182 -9
- package/template/claude-task-manager/workers/state-detectors/codex.js +150 -2
- package/template/claude-task-manager/workers/state-detectors/cursor.js +127 -0
- package/template/claude-task-manager/workers/state-detectors/gemini.js +21 -0
- package/template/claude-task-manager/workers/state-detectors/index.js +29 -0
- package/template/claude-task-manager/workers/state-detectors/opencode.js +103 -0
- package/template/docs/design/markdown-review-pane.md +206 -0
- package/template/docs/designs/2026-05-17-portkey-gateway-provider-ux.md +129 -38
- package/template/docs/designs/2026-05-20-mobile-worktree-finish-command.md +27 -0
- package/template/docs/designs/2026-05-22-ai-configuration-consolidation.md +248 -0
- package/template/docs/designs/ai-configuration-consolidation-mock.html +812 -0
- package/template/docs/private-memory-and-pii-policy.md +69 -0
- package/template/package.json +2 -1
- package/template/scripts/check-private-data.js +201 -0
- package/template/shared/sqlite-owner-guard.js +30 -0
- package/template/shared/sqlite-owner-write-queue.js +225 -0
- package/template/shared/sqlite-storage-policy.js +111 -0
- package/template/shared/sqlite-write-lock.js +428 -0
- package/template/wall-e/agent-runners/claude-code.js +5 -0
- package/template/wall-e/agent.js +166 -22
- package/template/wall-e/api-walle.js +524 -70
- package/template/wall-e/auth/provider-flows.js +11 -1
- package/template/wall-e/bin/walle-mcp-stdio.js +341 -17
- package/template/wall-e/brain.js +1614 -141
- package/template/wall-e/chat/attachment-blocks.js +96 -0
- package/template/wall-e/chat/attachments.js +2 -1
- package/template/wall-e/chat/capability-resolver.js +7 -7
- package/template/wall-e/chat/context-messages.js +28 -0
- package/template/wall-e/chat/conversation-frame.js +630 -0
- package/template/wall-e/chat/provider-messages.js +125 -0
- package/template/wall-e/chat.js +1002 -233
- package/template/wall-e/coding/acceptance-contract.js +170 -0
- package/template/wall-e/coding/acp-adapter.js +1 -1
- package/template/wall-e/coding/agent-catalog.js +3 -0
- package/template/wall-e/coding/artifact-store.js +93 -0
- package/template/wall-e/coding/capability-router.js +120 -0
- package/template/wall-e/coding/coding-run-controller.js +423 -0
- package/template/wall-e/coding/compaction-service.js +157 -12
- package/template/wall-e/coding/frontend-verification.js +258 -0
- package/template/wall-e/coding/lifecycle-hooks.js +75 -0
- package/template/wall-e/coding/local-preview-contract.js +157 -0
- package/template/wall-e/coding/permission-service.js +57 -13
- package/template/wall-e/coding/prompt-bundle.js +19 -1
- package/template/wall-e/coding/prompt-section-registry.js +227 -0
- package/template/wall-e/coding/provider-compat.js +15 -0
- package/template/wall-e/coding/runtime-events.js +224 -0
- package/template/wall-e/coding/runtime-mode.js +3 -0
- package/template/wall-e/coding/side-git-snapshot.js +160 -4
- package/template/wall-e/coding/snapshot-service.js +143 -1
- package/template/wall-e/coding/stream-processor.js +388 -34
- package/template/wall-e/coding/task-tool.js +141 -4
- package/template/wall-e/coding/tool-execution-controller.js +365 -0
- package/template/wall-e/coding/tool-registry.js +43 -5
- package/template/wall-e/coding/user-hooks.js +217 -0
- package/template/wall-e/coding-orchestrator.js +1330 -221
- package/template/wall-e/coding-prompts.js +20 -4
- package/template/wall-e/context/context-builder.js +15 -2
- package/template/wall-e/decision/confidence.js +1 -1
- package/template/wall-e/docs/coding-acceptance-contract.md +41 -0
- package/template/wall-e/docs/external-action-controller.md +26 -6
- package/template/wall-e/docs/telemetry-lifecycle.md +8 -2
- package/template/wall-e/embeddings.js +591 -53
- package/template/wall-e/external-action-controller.js +12 -0
- package/template/wall-e/http/auth.js +1 -0
- package/template/wall-e/http/chat-api.js +46 -11
- package/template/wall-e/http/model-admin.js +836 -34
- package/template/wall-e/lib/boot-profile.js +88 -0
- package/template/wall-e/lib/event-loop-monitor.js +93 -0
- package/template/wall-e/lib/service-health.js +194 -0
- package/template/wall-e/llm/anthropic.js +130 -5
- package/template/wall-e/llm/client.js +266 -63
- package/template/wall-e/llm/default-fallback.js +382 -0
- package/template/wall-e/llm/health.js +19 -0
- package/template/wall-e/llm/message-guard.js +78 -0
- package/template/wall-e/llm/model-catalog.js +252 -1
- package/template/wall-e/llm/openai.js +26 -4
- package/template/wall-e/llm/portkey-sync.js +654 -0
- package/template/wall-e/llm/provider-error.js +30 -2
- package/template/wall-e/llm/registry.js +5 -1
- package/template/wall-e/llm/request-compat.js +67 -0
- package/template/wall-e/loops/backfill.js +79 -23
- package/template/wall-e/loops/brain-optimize.js +67 -0
- package/template/wall-e/loops/ingest.js +25 -10
- package/template/wall-e/loops/question-digest.js +160 -0
- package/template/wall-e/loops/reflect.js +6 -4
- package/template/wall-e/loops/think.js +39 -12
- package/template/wall-e/mcp-server.js +318 -36
- package/template/wall-e/memory/ctm-context-client.js +52 -14
- package/template/wall-e/memory/ctm-operational-context.js +237 -0
- package/template/wall-e/memory/ctm-prompt-executions-client.js +128 -0
- package/template/wall-e/memory/ctm-session-context.js +111 -63
- package/template/wall-e/prompts/coding/deepseek.txt +3 -0
- package/template/wall-e/prompts/coding/gemini.txt +6 -0
- package/template/wall-e/prompts/coding/gpt.txt +6 -0
- package/template/wall-e/prompts/coding/local.txt +7 -0
- package/template/wall-e/runtime/decision-hooks.js +115 -0
- package/template/wall-e/runtime/devbox-gateway.js +82 -8
- package/template/wall-e/runtime/prompt-manifest.js +86 -0
- package/template/wall-e/runtime/tool-executor.js +269 -0
- package/template/wall-e/runtime/tool-result-envelope.js +138 -0
- package/template/wall-e/runtime/transcript-projection.js +60 -0
- package/template/wall-e/runtime/walle-runtime.js +224 -0
- package/template/wall-e/scripts/db-optimize/migrate.js +162 -0
- package/template/wall-e/scripts/db-optimize/recall-eval.js +117 -0
- package/template/wall-e/server.js +15 -0
- package/template/wall-e/session-files.js +9 -0
- package/template/wall-e/skills/_bundled/google-calendar/run.js +1 -1
- package/template/wall-e/skills/_bundled/gws-workspace/run.js +1 -1
- package/template/wall-e/skills/_bundled/slack-mentions/run.js +76 -6
- package/template/wall-e/skills/claude-code-reader.js +7 -3
- package/template/wall-e/skills/script-skill-runner.js +10 -0
- package/template/wall-e/skills/skill-planner.js +38 -0
- package/template/wall-e/tools/builtin-middleware.js +19 -9
- package/template/wall-e/tools/local-tools.js +1428 -16
- package/template/wall-e/tools/permission-checker.js +73 -5
- package/template/wall-e/tools/question-manager.js +117 -7
- package/template/wall-e/training/harvester.js +12 -28
- package/template/wall-e/training/replay.js +25 -80
- package/template/website/index.html +10 -10
- package/template/wall-e/eval/ab-test.js +0 -203
- package/template/wall-e/eval/agent-runner.js +0 -772
- package/template/wall-e/eval/agent-scorer.js +0 -461
- package/template/wall-e/eval/aggregator.js +0 -414
- package/template/wall-e/eval/allowed-test-commands.js +0 -34
- package/template/wall-e/eval/benchmark-generator.js +0 -113
- package/template/wall-e/eval/benchmarks/chat-eval.json +0 -1662
- package/template/wall-e/eval/benchmarks/chat.json +0 -82
- package/template/wall-e/eval/benchmarks/coding-agent-real.json +0 -1
- package/template/wall-e/eval/benchmarks/coding-agent.json +0 -1581
- package/template/wall-e/eval/benchmarks/coding.json +0 -122
- package/template/wall-e/eval/benchmarks/memory-retrieval.json +0 -234
- package/template/wall-e/eval/benchmarks/reasoning.json +0 -82
- package/template/wall-e/eval/benchmarks/swebench-lite-30.json +0 -212
- package/template/wall-e/eval/benchmarks.js +0 -669
- package/template/wall-e/eval/cc-replay.js +0 -719
- package/template/wall-e/eval/chat-eval.js +0 -525
- package/template/wall-e/eval/check-keys.js +0 -15
- package/template/wall-e/eval/check-providers.js +0 -42
- package/template/wall-e/eval/codex-cli-baseline.js +0 -669
- package/template/wall-e/eval/coding-agent-real.js +0 -570
- package/template/wall-e/eval/context-compactor.js +0 -251
- package/template/wall-e/eval/debug-agent003.js +0 -68
- package/template/wall-e/eval/diagnostics.js +0 -216
- package/template/wall-e/eval/eval-orchestrator.js +0 -642
- package/template/wall-e/eval/evaluate.js +0 -202
- package/template/wall-e/eval/evaluator.js +0 -373
- package/template/wall-e/eval/exporter.js +0 -212
- package/template/wall-e/eval/fixtures/express-basic/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-basic/server.js +0 -115
- package/template/wall-e/eval/fixtures/express-basic/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy/server.js +0 -113
- package/template/wall-e/eval/fixtures/express-buggy/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy-items/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy-items/server.js +0 -112
- package/template/wall-e/eval/fixtures/express-buggy-items/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy-search/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy-search/server.js +0 -121
- package/template/wall-e/eval/fixtures/express-buggy-search/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-rename-data/data.js +0 -34
- package/template/wall-e/eval/fixtures/express-rename-data/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-rename-data/server.js +0 -97
- package/template/wall-e/eval/fixtures/express-rename-data/test.js +0 -88
- package/template/wall-e/eval/fixtures/express-xss/package.json +0 -12
- package/template/wall-e/eval/fixtures/express-xss/server.js +0 -90
- package/template/wall-e/eval/fixtures/express-xss/test.js +0 -67
- package/template/wall-e/eval/fixtures/express-xss/views/profile.ejs +0 -9
- package/template/wall-e/eval/fixtures/fullstack-app/config/default.js +0 -9
- package/template/wall-e/eval/fixtures/fullstack-app/config/test.js +0 -13
- package/template/wall-e/eval/fixtures/fullstack-app/package.json +0 -11
- package/template/wall-e/eval/fixtures/fullstack-app/public/css/style.css +0 -137
- package/template/wall-e/eval/fixtures/fullstack-app/public/index.html +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/app.js +0 -121
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/auth.js +0 -71
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/items.js +0 -80
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/users.js +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/public/login.html +0 -45
- package/template/wall-e/eval/fixtures/fullstack-app/public/register.html +0 -38
- package/template/wall-e/eval/fixtures/fullstack-app/scripts/migrate.js +0 -23
- package/template/wall-e/eval/fixtures/fullstack-app/scripts/seed.js +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/server/db.js +0 -99
- package/template/wall-e/eval/fixtures/fullstack-app/server/index.js +0 -94
- package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/auth.js +0 -19
- package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/logger.js +0 -19
- package/template/wall-e/eval/fixtures/fullstack-app/server/router.js +0 -50
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/auth.js +0 -69
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/health.js +0 -23
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/items.js +0 -88
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/users.js +0 -75
- package/template/wall-e/eval/fixtures/fullstack-app/server/test.js +0 -198
- package/template/wall-e/eval/fixtures/fullstack-app/server/utils/response.js +0 -34
- package/template/wall-e/eval/fixtures/fullstack-app/server/utils/validate.js +0 -26
- package/template/wall-e/eval/fixtures/fullstack-app/server.js +0 -8
- package/template/wall-e/eval/fixtures/fullstack-app/test.js +0 -12
- package/template/wall-e/eval/fixtures/monorepo-basic/package.json +0 -8
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/data.js +0 -58
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/middleware.js +0 -46
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/package.json +0 -8
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/routes.js +0 -64
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/server.js +0 -56
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/test.js +0 -116
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/commands.js +0 -61
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/index.js +0 -62
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/output.js +0 -43
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/package.json +0 -11
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/test.js +0 -44
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/formatters.js +0 -43
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/index.js +0 -12
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/package.json +0 -5
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/test.js +0 -55
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/validators.js +0 -29
- package/template/wall-e/eval/fixtures/monorepo-basic/test.js +0 -46
- package/template/wall-e/eval/fixtures/node-cli/index.js +0 -78
- package/template/wall-e/eval/fixtures/node-cli/package.json +0 -10
- package/template/wall-e/eval/fixtures/node-cli/test.js +0 -57
- package/template/wall-e/eval/fixtures/node-typed/package.json +0 -8
- package/template/wall-e/eval/fixtures/node-typed/src/handlers.js +0 -31
- package/template/wall-e/eval/fixtures/node-typed/src/utils.js +0 -33
- package/template/wall-e/eval/fixtures/node-typed/test.js +0 -36
- package/template/wall-e/eval/fixtures/python-flask/app.py +0 -14
- package/template/wall-e/eval/fixtures/python-flask/requirements.txt +0 -2
- package/template/wall-e/eval/fixtures/python-flask/test_app.py +0 -25
- package/template/wall-e/eval/fixtures/wall-e-subset/brain.js +0 -105
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/aggregator.js +0 -101
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/chat.json +0 -20
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/coding.json +0 -32
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks.js +0 -64
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/package.json +0 -6
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/server.js +0 -31
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/test.js +0 -18
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/utils.js +0 -34
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/runner.js +0 -104
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/scorer.js +0 -73
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/test.js +0 -134
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/client.js +0 -99
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/providers.js +0 -63
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/test.js +0 -70
- package/template/wall-e/eval/fixtures/wall-e-subset/package.json +0 -10
- package/template/wall-e/eval/fixtures/wall-e-subset/test.js +0 -86
- package/template/wall-e/eval/harvester.js +0 -685
- package/template/wall-e/eval/head-to-head.js +0 -388
- package/template/wall-e/eval/humaneval-adapter.js +0 -321
- package/template/wall-e/eval/list-models.js +0 -31
- package/template/wall-e/eval/livecodebench-adapter.js +0 -291
- package/template/wall-e/eval/mail-integration.js +0 -443
- package/template/wall-e/eval/manifest.js +0 -186
- package/template/wall-e/eval/meta-harness/adapters/coding-agent.js +0 -57
- package/template/wall-e/eval/meta-harness/bootstrap-snapshot.js +0 -149
- package/template/wall-e/eval/meta-harness/candidate-store.js +0 -117
- package/template/wall-e/eval/meta-harness/cli.js +0 -86
- package/template/wall-e/eval/meta-harness/domain-spec.js +0 -154
- package/template/wall-e/eval/meta-harness/domains/coding-agent.domain.json +0 -84
- package/template/wall-e/eval/meta-harness/examples/env-bootstrap-candidate.js +0 -29
- package/template/wall-e/eval/meta-harness/experience-store.js +0 -174
- package/template/wall-e/eval/meta-harness/frontier.js +0 -96
- package/template/wall-e/eval/meta-harness/harness-interface.js +0 -90
- package/template/wall-e/eval/meta-harness/leakage-guard.js +0 -80
- package/template/wall-e/eval/meta-harness/optimizer.js +0 -207
- package/template/wall-e/eval/meta-harness/proposer-runner.js +0 -110
- package/template/wall-e/eval/meta-harness/reporting.js +0 -58
- package/template/wall-e/eval/meta-harness/telemetry.js +0 -27
- package/template/wall-e/eval/meta-harness/validation.js +0 -81
- package/template/wall-e/eval/promoter.js +0 -228
- package/template/wall-e/eval/provider-normalizer.js +0 -33
- package/template/wall-e/eval/replay.js +0 -395
- package/template/wall-e/eval/run-agent-benchmarks.js +0 -386
- package/template/wall-e/eval/run-codex-cli-baseline.js +0 -177
- package/template/wall-e/eval/run-coding-agent-real.js +0 -187
- package/template/wall-e/eval/run-eval.js +0 -435
- package/template/wall-e/eval/run-model-comparison.js +0 -142
- package/template/wall-e/eval/session-evaluator.js +0 -187
- package/template/wall-e/eval/session-miner.js +0 -207
- package/template/wall-e/eval/session-retrieval-benchmark.js +0 -150
- package/template/wall-e/eval/session-transcripts.js +0 -509
- package/template/wall-e/eval/shadow.js +0 -161
- package/template/wall-e/eval/swebench-adapter.js +0 -345
- package/template/wall-e/eval/swebench-docker.js +0 -192
- package/template/wall-e/eval/train.py +0 -320
- package/template/wall-e/eval/trainer.js +0 -232
- package/template/wall-e/eval/weekly-eval-loop.js +0 -241
|
@@ -15,6 +15,24 @@ const agentRunners = require('./agent-runners');
|
|
|
15
15
|
const { ASK_USER_TOOL, QuestionManager } = require('./tools/question-manager');
|
|
16
16
|
const { detectProject } = require('./tools/project-detector');
|
|
17
17
|
const { normalizeToolCall } = require('./llm/text-tool-calls');
|
|
18
|
+
const {
|
|
19
|
+
hasVerificationEvidence,
|
|
20
|
+
hasFailedVerificationAttempt,
|
|
21
|
+
callName,
|
|
22
|
+
toolResultSucceeded,
|
|
23
|
+
normalizeToolCallEvidence,
|
|
24
|
+
buildAcceptanceContract,
|
|
25
|
+
collectToolEvidence,
|
|
26
|
+
validatorFailure,
|
|
27
|
+
validatorPass,
|
|
28
|
+
summarizeValidatorFailures,
|
|
29
|
+
} = require('./coding/acceptance-contract');
|
|
30
|
+
const {
|
|
31
|
+
isFrontendFile,
|
|
32
|
+
checkFrontendStaticContracts,
|
|
33
|
+
resolveFrontendEntrypoints,
|
|
34
|
+
} = require('./coding/frontend-verification');
|
|
35
|
+
const { pathToFileURL } = require('node:url');
|
|
18
36
|
|
|
19
37
|
// ─── Progress Streaming (Phase 8) ────────────────────────────────────────────
|
|
20
38
|
// Global progress emitter — SSE endpoint and chat handler subscribe to this.
|
|
@@ -64,6 +82,10 @@ const {
|
|
|
64
82
|
resolvePromptCapabilities,
|
|
65
83
|
loadRequestedSkillInstructions,
|
|
66
84
|
} = require('./coding/prompt-capabilities');
|
|
85
|
+
const {
|
|
86
|
+
routeArtifactCapabilities,
|
|
87
|
+
hasCapability,
|
|
88
|
+
} = require('./coding/capability-router');
|
|
67
89
|
const { createCodingTranscript } = require('./coding/transcript-writer');
|
|
68
90
|
const { createCodingCapabilities } = require('./coding/capability-broker');
|
|
69
91
|
const {
|
|
@@ -72,8 +94,10 @@ const {
|
|
|
72
94
|
} = require('./coding/compaction-service');
|
|
73
95
|
const {
|
|
74
96
|
emitAgentRunContextWarnings,
|
|
75
|
-
resolveAgentRunContext,
|
|
76
97
|
} = require('./runtime/agent-run-context');
|
|
98
|
+
const {
|
|
99
|
+
resolveWallERuntimeProfile,
|
|
100
|
+
} = require('./runtime/walle-runtime');
|
|
77
101
|
const { estimateTokens, estimateMessagesTokens } = require('./context/token-counter');
|
|
78
102
|
const { recoverAllowedTextToolCalls } = require('./llm/text-tool-calls');
|
|
79
103
|
|
|
@@ -112,17 +136,42 @@ const CODING_TOOLS = [
|
|
|
112
136
|
{
|
|
113
137
|
name: 'run_shell',
|
|
114
138
|
description: 'Run a shell command. Supports pipes, redirects, and subshells. '
|
|
115
|
-
+ 'Destructive commands (rm, sudo, etc.) are blocked.'
|
|
139
|
+
+ 'Destructive commands (rm, sudo, etc.) are blocked. '
|
|
140
|
+
+ 'For dev servers, watchers, or long builds set background:true (never `&`) and poll with bg_output.',
|
|
116
141
|
input_schema: {
|
|
117
142
|
type: 'object',
|
|
118
143
|
properties: {
|
|
119
144
|
command: { type: 'string', description: 'Shell command to run (e.g., "npm test | tail -20")' },
|
|
120
145
|
timeout_ms: { type: 'number', description: 'Timeout in ms (default 30000)' },
|
|
121
146
|
cwd: { type: 'string', description: 'Working directory (optional)' },
|
|
147
|
+
background: { type: 'boolean', description: 'Run detached in the background; returns resource_id immediately. Use for dev servers/watchers/long builds instead of `&`.' },
|
|
122
148
|
},
|
|
123
149
|
required: ['command'],
|
|
124
150
|
},
|
|
125
151
|
},
|
|
152
|
+
{
|
|
153
|
+
name: 'bg_output',
|
|
154
|
+
description: 'Read the latest output of a background process started with run_shell {background:true}. Returns status (running/exited), exit code, and the log tail.',
|
|
155
|
+
input_schema: {
|
|
156
|
+
type: 'object',
|
|
157
|
+
properties: {
|
|
158
|
+
resource_id: { type: 'string', description: 'resource_id returned by run_shell {background:true}' },
|
|
159
|
+
tail_lines: { type: 'number', description: 'Trailing log lines to return (default 100)' },
|
|
160
|
+
},
|
|
161
|
+
required: ['resource_id'],
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
name: 'bg_kill',
|
|
166
|
+
description: 'Stop a background process started with run_shell {background:true}.',
|
|
167
|
+
input_schema: {
|
|
168
|
+
type: 'object',
|
|
169
|
+
properties: {
|
|
170
|
+
resource_id: { type: 'string', description: 'resource_id returned by run_shell {background:true}' },
|
|
171
|
+
},
|
|
172
|
+
required: ['resource_id'],
|
|
173
|
+
},
|
|
174
|
+
},
|
|
126
175
|
{
|
|
127
176
|
name: 'glob',
|
|
128
177
|
description: 'Find files matching a glob pattern (e.g., "**/*.js").',
|
|
@@ -174,6 +223,140 @@ const CODING_TOOLS = [
|
|
|
174
223
|
required: ['url'],
|
|
175
224
|
},
|
|
176
225
|
},
|
|
226
|
+
{
|
|
227
|
+
name: 'browser_smoke_test',
|
|
228
|
+
description: 'Render a URL in headless Chrome, collect JavaScript exceptions, console errors, failed requests, and safely click interactive elements. Use after frontend/UI work; screenshots prove appearance, this proves the page does not break when loaded or clicked.',
|
|
229
|
+
input_schema: {
|
|
230
|
+
type: 'object',
|
|
231
|
+
properties: {
|
|
232
|
+
url: { type: 'string', description: 'URL to validate; supports file:// local HTML files or localhost URLs from start_static_server.' },
|
|
233
|
+
viewport: { type: 'string', enum: ['desktop', 'mobile', 'tablet'], description: 'Viewport preset. Default: desktop.' },
|
|
234
|
+
click_selectors: { type: 'array', items: { type: 'string' }, description: 'Optional selectors to click. Defaults to [onclick], button, [role=button], and hash links.' },
|
|
235
|
+
max_clicks: { type: 'number', description: 'Maximum interactive elements to click. Default: 20.' },
|
|
236
|
+
settle_ms: { type: 'number', description: 'Milliseconds to wait after load/clicks. Default: 750.' },
|
|
237
|
+
timeout_ms: { type: 'number', description: 'Overall timeout. Default: 45000.' },
|
|
238
|
+
},
|
|
239
|
+
required: ['url'],
|
|
240
|
+
},
|
|
241
|
+
},
|
|
242
|
+
{
|
|
243
|
+
name: 'check_url',
|
|
244
|
+
description: 'Fetch an http:// or https:// URL and report whether it returns a 2xx/3xx response. Use this before claiming a local dev/static server is reachable.',
|
|
245
|
+
input_schema: {
|
|
246
|
+
type: 'object',
|
|
247
|
+
properties: {
|
|
248
|
+
url: { type: 'string', description: 'URL to fetch.' },
|
|
249
|
+
timeout_ms: { type: 'number', description: 'Timeout in ms (default 5000).' },
|
|
250
|
+
},
|
|
251
|
+
required: ['url'],
|
|
252
|
+
},
|
|
253
|
+
},
|
|
254
|
+
{
|
|
255
|
+
name: 'web_search',
|
|
256
|
+
description: 'Search the public web and return result titles, URLs, and snippets. Use to find documentation or error-message references when you do not know the URL; then read the page with web_fetch.',
|
|
257
|
+
input_schema: {
|
|
258
|
+
type: 'object',
|
|
259
|
+
properties: {
|
|
260
|
+
query: { type: 'string', description: 'Search query' },
|
|
261
|
+
max_results: { type: 'number', description: 'Max results (default 8)' },
|
|
262
|
+
},
|
|
263
|
+
required: ['query'],
|
|
264
|
+
},
|
|
265
|
+
},
|
|
266
|
+
{
|
|
267
|
+
name: 'web_fetch',
|
|
268
|
+
description: 'Fetch a web page or API endpoint and return extracted text. Use for reading documentation or references found via web_search.',
|
|
269
|
+
input_schema: {
|
|
270
|
+
type: 'object',
|
|
271
|
+
properties: {
|
|
272
|
+
url: { type: 'string', description: 'URL to fetch' },
|
|
273
|
+
extract_text: { type: 'boolean', description: 'Strip HTML tags (default true)' },
|
|
274
|
+
},
|
|
275
|
+
required: ['url'],
|
|
276
|
+
},
|
|
277
|
+
},
|
|
278
|
+
{
|
|
279
|
+
name: 'start_static_server',
|
|
280
|
+
description: 'Start a managed local static file server for a directory, wait for its health URL, and return a verified URL plus resource_id. Prefer this over run_shell background servers.',
|
|
281
|
+
input_schema: {
|
|
282
|
+
type: 'object',
|
|
283
|
+
properties: {
|
|
284
|
+
directory: { type: 'string', description: 'Directory to serve. Defaults to project cwd.' },
|
|
285
|
+
port: { type: 'number', description: 'Port to bind. Use 0 or omit for an available port.' },
|
|
286
|
+
route: { type: 'string', description: 'Route to health-check after start. Default: /index.html.' },
|
|
287
|
+
timeout_ms: { type: 'number', description: 'Startup timeout in ms (default 5000).' },
|
|
288
|
+
},
|
|
289
|
+
},
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
name: 'stop_static_server',
|
|
293
|
+
description: 'Stop a static server started by start_static_server using its resource_id.',
|
|
294
|
+
input_schema: {
|
|
295
|
+
type: 'object',
|
|
296
|
+
properties: {
|
|
297
|
+
resource_id: { type: 'string', description: 'resource_id returned by start_static_server.' },
|
|
298
|
+
},
|
|
299
|
+
required: ['resource_id'],
|
|
300
|
+
},
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
name: 'pdf_info',
|
|
304
|
+
description: 'Validate a PDF file and return structured metadata such as bytes, page count when available, and hash. Use before reading, summarizing, or claiming a PDF artifact is valid.',
|
|
305
|
+
input_schema: {
|
|
306
|
+
type: 'object',
|
|
307
|
+
properties: {
|
|
308
|
+
file_path: { type: 'string', description: 'Path to the PDF file.' },
|
|
309
|
+
max_bytes: { type: 'number', description: 'Maximum allowed file size in bytes (default 32MB).' },
|
|
310
|
+
},
|
|
311
|
+
required: ['file_path'],
|
|
312
|
+
},
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
name: 'pdf_render_pages',
|
|
316
|
+
description: 'Render a bounded PDF page range to image previews using pdftoppm when available. Use page previews to visually inspect generated or input PDFs before claiming success.',
|
|
317
|
+
input_schema: {
|
|
318
|
+
type: 'object',
|
|
319
|
+
properties: {
|
|
320
|
+
file_path: { type: 'string', description: 'Path to the PDF file.' },
|
|
321
|
+
pages: { type: 'string', description: 'Page range like "1", "1-3", or "2-". Defaults to "1". Maximum 20 pages.' },
|
|
322
|
+
output_dir: { type: 'string', description: 'Directory for rendered preview images. Defaults to a temp directory.' },
|
|
323
|
+
dpi: { type: 'number', description: 'Render DPI, 72-200. Default 144.' },
|
|
324
|
+
},
|
|
325
|
+
required: ['file_path'],
|
|
326
|
+
},
|
|
327
|
+
},
|
|
328
|
+
{
|
|
329
|
+
name: 'pdf_read_pages',
|
|
330
|
+
description: 'Read text from a bounded PDF page range using pdftotext when available, and include PDF metadata. Use for PDF analysis before answering from a document.',
|
|
331
|
+
input_schema: {
|
|
332
|
+
type: 'object',
|
|
333
|
+
properties: {
|
|
334
|
+
file_path: { type: 'string', description: 'Path to the PDF file.' },
|
|
335
|
+
pages: { type: 'string', description: 'Page range like "1", "1-3", or "2-". Defaults to "1-5". Maximum 20 pages.' },
|
|
336
|
+
max_chars: { type: 'number', description: 'Maximum text characters to return (default 20000).' },
|
|
337
|
+
},
|
|
338
|
+
required: ['file_path'],
|
|
339
|
+
},
|
|
340
|
+
},
|
|
341
|
+
{
|
|
342
|
+
name: 'make_pdf',
|
|
343
|
+
description: 'Generate a PDF from Markdown or HTML through the configured make-pdf renderer, then validate the output and optionally render page previews. Use for PDF creation instead of claiming a document is done from source text alone.',
|
|
344
|
+
input_schema: {
|
|
345
|
+
type: 'object',
|
|
346
|
+
properties: {
|
|
347
|
+
input_path: { type: 'string', description: 'Markdown or HTML source file to render.' },
|
|
348
|
+
output_path: { type: 'string', description: 'Desired PDF output path. Defaults next to input.' },
|
|
349
|
+
title: { type: 'string', description: 'Optional document title metadata.' },
|
|
350
|
+
page_size: { type: 'string', description: 'Optional page size such as Letter or A4.' },
|
|
351
|
+
margins: { type: 'string', description: 'Optional margin preset/string if supported by renderer.' },
|
|
352
|
+
cover: { type: 'boolean', description: 'Ask the renderer for a cover page when supported.' },
|
|
353
|
+
toc: { type: 'boolean', description: 'Ask the renderer for a table of contents when supported.' },
|
|
354
|
+
watermark: { type: 'string', description: 'Optional watermark text when supported.' },
|
|
355
|
+
render_preview: { type: 'boolean', description: 'Render first-page previews after generation. Default true.' },
|
|
356
|
+
},
|
|
357
|
+
required: ['input_path'],
|
|
358
|
+
},
|
|
359
|
+
},
|
|
177
360
|
{
|
|
178
361
|
name: 'edit_file',
|
|
179
362
|
description: 'Make a targeted edit to a file by replacing a string match. Uses a 9-strategy fuzzy matching chain — tolerates minor whitespace, indentation, and Unicode differences. More efficient than write_file for modifying existing files.',
|
|
@@ -332,8 +515,8 @@ const CODING_TOOLS = [
|
|
|
332
515
|
// Inspired by OpenCode's agent types (build/plan/explore).
|
|
333
516
|
// We simplify to tool filtering per phase since Wall-E has its own permission checker.
|
|
334
517
|
|
|
335
|
-
const READ_ONLY_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
|
|
336
|
-
const REVIEW_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
|
|
518
|
+
const READ_ONLY_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'pdf_info', 'pdf_read_pages', 'pdf_render_pages', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
|
|
519
|
+
const REVIEW_TOOL_NAMES = new Set(['read_file', 'glob', 'grep_files', 'list_directory', 'pdf_info', 'pdf_read_pages', 'pdf_render_pages', 'lsp_symbols', 'lsp_definition', 'lsp_references', 'lsp_diagnostics', 'lsp_hover', 'lsp_implementation']);
|
|
337
520
|
// BUILD uses all CODING_TOOLS (default)
|
|
338
521
|
|
|
339
522
|
const READ_ONLY_TOOLS = CODING_TOOLS.filter(t => READ_ONLY_TOOL_NAMES.has(t.name));
|
|
@@ -420,6 +603,7 @@ function toolRequiresPermission(name) {
|
|
|
420
603
|
'apply_patch',
|
|
421
604
|
'multi_edit',
|
|
422
605
|
'browser_screenshot',
|
|
606
|
+
'browser_smoke_test',
|
|
423
607
|
'applescript',
|
|
424
608
|
'claude_code',
|
|
425
609
|
'mail_send',
|
|
@@ -486,6 +670,78 @@ function parsePlan(output) {
|
|
|
486
670
|
throw new Error('Failed to parse plan: no valid JSON with non-empty subtasks array found');
|
|
487
671
|
}
|
|
488
672
|
|
|
673
|
+
function safeBranchSlug(text, fallback = 'task') {
|
|
674
|
+
const slug = String(text || '')
|
|
675
|
+
.toLowerCase()
|
|
676
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
677
|
+
.replace(/^-+|-+$/g, '')
|
|
678
|
+
.slice(0, 48);
|
|
679
|
+
return slug || fallback;
|
|
680
|
+
}
|
|
681
|
+
|
|
682
|
+
function plannerOutputRequestsClarification(output = '') {
|
|
683
|
+
const text = contentToText(output).toLowerCase();
|
|
684
|
+
if (!text.trim()) return false;
|
|
685
|
+
return /\b(?:please|can you|could you)\s+(?:provide|clarify|tell me|share)\b/.test(text)
|
|
686
|
+
|| /\b(?:need|needs|require|required)\s+(?:more|additional)\s+(?:information|context|details)\b/.test(text)
|
|
687
|
+
|| /\b(?:which|what)\s+(?:file|directory|project|repo|repository|path)\b[\s\S]{0,120}\?/.test(text);
|
|
688
|
+
}
|
|
689
|
+
|
|
690
|
+
function plannerOutputRefusesTask(output = '') {
|
|
691
|
+
const text = contentToText(output).toLowerCase();
|
|
692
|
+
if (!text.trim()) return false;
|
|
693
|
+
return /\b(?:i\s+)?(?:cannot|can't|unable to|not able to)\b[\s\S]{0,160}\b(?:help|comply|perform|complete|do this task)\b/.test(text)
|
|
694
|
+
|| /\b(?:unsafe|not allowed|forbidden|against policy)\b/.test(text);
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
function shouldRecoverPlannerParseFailure({ request, output, cwd } = {}) {
|
|
698
|
+
const requestText = contentToText(request);
|
|
699
|
+
if (!isActionRequiredPrompt(requestText, { mode: 'build' })) return false;
|
|
700
|
+
if (!cwd) return false;
|
|
701
|
+
const outputText = contentToText(output);
|
|
702
|
+
if (plannerOutputRequestsClarification(outputText)) return false;
|
|
703
|
+
if (plannerOutputRefusesTask(outputText)) return false;
|
|
704
|
+
return true;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
function buildPlannerRecoveryPlan(request, context = {}, parseErr, plannerOutput = '') {
|
|
708
|
+
const filesHint = Object.keys(context.relevantFiles || {}).slice(0, 12);
|
|
709
|
+
const plannerNotes = [
|
|
710
|
+
context.plannerNotes ? `Planner exploration notes:\n${String(context.plannerNotes).slice(0, 2400)}` : '',
|
|
711
|
+
plannerOutput ? `Unstructured planner output excerpt:\n${contentToText(plannerOutput).slice(0, 1600)}` : '',
|
|
712
|
+
].filter(Boolean).join('\n\n');
|
|
713
|
+
const promptLines = [
|
|
714
|
+
'The planning model failed to return the strict JSON plan, so this is a recovery build pass.',
|
|
715
|
+
'Do not stop at analysis, an audit, or another implementation plan.',
|
|
716
|
+
'Inspect the current workspace, make the concrete code/file changes requested by the user, then run the most relevant verification available.',
|
|
717
|
+
'If verification is blocked, provide tool-backed evidence of the blocker instead of claiming success.',
|
|
718
|
+
'',
|
|
719
|
+
`User request:\n${contentToText(request).trim()}`,
|
|
720
|
+
];
|
|
721
|
+
if (plannerNotes) {
|
|
722
|
+
promptLines.push('', plannerNotes);
|
|
723
|
+
}
|
|
724
|
+
if (parseErr?.message) {
|
|
725
|
+
promptLines.push('', `Planner failure: ${parseErr.message}`);
|
|
726
|
+
}
|
|
727
|
+
return {
|
|
728
|
+
branch_name: `walle/direct-${safeBranchSlug(request)}`,
|
|
729
|
+
estimated_scope: 'recovered-single-pass',
|
|
730
|
+
planning_recovery: {
|
|
731
|
+
strategy: 'single_build_subtask',
|
|
732
|
+
reason: parseErr?.message || 'planner did not return valid plan JSON',
|
|
733
|
+
},
|
|
734
|
+
subtasks: [{
|
|
735
|
+
id: '1',
|
|
736
|
+
title: 'Implement request directly',
|
|
737
|
+
prompt: promptLines.join('\n'),
|
|
738
|
+
depends_on: [],
|
|
739
|
+
verify: { test: true, review: true },
|
|
740
|
+
files_hint: filesHint,
|
|
741
|
+
}],
|
|
742
|
+
};
|
|
743
|
+
}
|
|
744
|
+
|
|
489
745
|
// buildSubtaskPrompt moved to coding-prompts.js (imported above).
|
|
490
746
|
|
|
491
747
|
function contentToText(content) {
|
|
@@ -541,13 +797,29 @@ function isLegitimateNoEditResponse(content, toolCallHistory = []) {
|
|
|
541
797
|
}
|
|
542
798
|
|
|
543
799
|
const EDIT_TOOL_NAMES = new Set(['edit_file', 'write_file', 'apply_patch', 'multi_edit']);
|
|
544
|
-
const
|
|
800
|
+
const CODING_EXECUTION_TOOL_NAMES = new Set([
|
|
545
801
|
...EDIT_TOOL_NAMES,
|
|
802
|
+
'start_coding',
|
|
803
|
+
'run_skill',
|
|
804
|
+
]);
|
|
805
|
+
const MEANINGFUL_ACTION_TOOL_NAMES = new Set([
|
|
806
|
+
...CODING_EXECUTION_TOOL_NAMES,
|
|
546
807
|
'run_shell',
|
|
547
808
|
'browser_screenshot',
|
|
809
|
+
'browser_smoke_test',
|
|
810
|
+
'check_url',
|
|
811
|
+
'url_check',
|
|
812
|
+
'pdf_info',
|
|
813
|
+
'pdf_render_pages',
|
|
814
|
+
'pdf_read_pages',
|
|
815
|
+
'make_pdf',
|
|
548
816
|
'mcp_call',
|
|
817
|
+
]);
|
|
818
|
+
const SETUP_ONLY_TOOL_NAMES = new Set([
|
|
549
819
|
'load_skill',
|
|
550
820
|
'skill',
|
|
821
|
+
'skill_loaded',
|
|
822
|
+
'skill_load_failed',
|
|
551
823
|
]);
|
|
552
824
|
|
|
553
825
|
const ACTION_REQUIRED_PROMPT_RE = /\b(fix|implement|improve|update|change|edit|modify|add|remove|refactor|build|create|write|generate|convert|repair|apply|run|test|verify|make)\b/i;
|
|
@@ -562,21 +834,12 @@ const PROSPECTIVE_WORK_RE = new RegExp([
|
|
|
562
834
|
].join(''), 'i');
|
|
563
835
|
|
|
564
836
|
function hasToolCall(toolCallHistory = [], names = new Set()) {
|
|
565
|
-
return (toolCallHistory || []).some((call) => names.has(call
|
|
837
|
+
return (toolCallHistory || []).some((call) => names.has(callName(call)));
|
|
566
838
|
}
|
|
567
839
|
|
|
568
|
-
function
|
|
569
|
-
const
|
|
570
|
-
|
|
571
|
-
if (name === 'browser_screenshot') return true;
|
|
572
|
-
if (name === 'run_shell') {
|
|
573
|
-
return /\b(?:test|spec|lint|build|typecheck|tsc|pytest|jest|mocha|vitest|playwright|node\s+--(?:test|check)|npm\s+(?:test|run)|pnpm\s+(?:test|run)|yarn\s+(?:test|run)|git\s+diff\s+--check)\b/i.test(input);
|
|
574
|
-
}
|
|
575
|
-
return /(?:test|verify|screenshot|diagnostic|lint|build)/i.test(name);
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
function hasVerificationEvidence(toolCallHistory = []) {
|
|
579
|
-
return (toolCallHistory || []).some(isVerificationToolCall);
|
|
840
|
+
function onlySetupToolCalls(toolCallHistory = []) {
|
|
841
|
+
const calls = toolCallHistory || [];
|
|
842
|
+
return calls.length > 0 && calls.every((call) => SETUP_ONLY_TOOL_NAMES.has(callName(call)));
|
|
580
843
|
}
|
|
581
844
|
|
|
582
845
|
function isVerificationBlockerResponse(content) {
|
|
@@ -602,15 +865,60 @@ function isActionRequiredPrompt(prompt, { mode } = {}) {
|
|
|
602
865
|
return true;
|
|
603
866
|
}
|
|
604
867
|
|
|
868
|
+
function promptRequiresFileChanges(prompt, { mode } = {}) {
|
|
869
|
+
if (!isActionRequiredPrompt(prompt, { mode })) return false;
|
|
870
|
+
const intentText = stripPathLikeTokens(contentToText(prompt));
|
|
871
|
+
if (!FILE_CHANGE_PROMPT_RE.test(intentText)) return false;
|
|
872
|
+
if (NO_CHANGE_TASK_RE.test(intentText) && !/\b(improve|fix|implement|update|change|edit|modify|apply|make|write|create|build)\b/i.test(intentText)) {
|
|
873
|
+
return false;
|
|
874
|
+
}
|
|
875
|
+
return true;
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
function isReadOnlyCodingIntent(intent = null) {
|
|
879
|
+
if (!intent || typeof intent !== 'object') return false;
|
|
880
|
+
if (intent.readOnly === true || intent.kind === 'read_only') return true;
|
|
881
|
+
return intent.expectsChange === false && intent.reason === 'conversational_update_language';
|
|
882
|
+
}
|
|
883
|
+
|
|
605
884
|
function isPrematureActionResponse(content) {
|
|
606
885
|
const text = contentToText(content);
|
|
607
886
|
if (!text.trim()) return false;
|
|
887
|
+
if (/\btool budget exhausted\b/i.test(text)) return true;
|
|
888
|
+
if (/\bwhat was not completed\b/i.test(text)) return true;
|
|
889
|
+
if (/\bnone of the proposed implementations were written\b/i.test(text)) return true;
|
|
890
|
+
if (/\bno changes were made\b/i.test(text) && /\b(?:not completed|failed|exhausted|recovery path)\b/i.test(text)) return true;
|
|
608
891
|
if (PROSPECTIVE_WORK_RE.test(text)) return true;
|
|
609
892
|
if (/\bwhat['’]?s wrong\b[\s\S]{0,400}\bfix:/i.test(text)) return true;
|
|
893
|
+
if (/\b(?:should i|shall i|do you want me to)\s+(?:proceed|continue|apply|implement|make|start|do)\b/i.test(text)) return true;
|
|
894
|
+
if (/\byour call\b[\s\S]{0,220}\b(?:proceed|continue|phase|prioriti[sz]e|pick|choose|apply|implement)\b/i.test(text)) return true;
|
|
895
|
+
if (/\b(?:implementation|fix|improvement)\s+plan\b/i.test(text)
|
|
896
|
+
&& /\b(?:next steps?|recommendations?|roadmap|proceed|continue|apply|implement)\b/i.test(text)) return true;
|
|
610
897
|
return false;
|
|
611
898
|
}
|
|
612
899
|
|
|
900
|
+
// "Screenshot, self-critique, and fix visual issues" is conditional: the
|
|
901
|
+
// agent only writes files if it FINDS a problem in the screenshot. A clean
|
|
902
|
+
// run that finds nothing to fix is the GOOD outcome — but the title contains
|
|
903
|
+
// "fix", which the FILE_CHANGE_PROMPT_RE below would otherwise treat as an
|
|
904
|
+
// edit task and fail with "Subtask ended without file changes".
|
|
905
|
+
//
|
|
906
|
+
// We trigger only when (a) the title contains an explicit visual-verification
|
|
907
|
+
// keyword (screenshot / self-critique / visual review) AND (b) it LEADS with
|
|
908
|
+
// one. "Review and improve UX" is excluded by (a) — the "improve" is a real
|
|
909
|
+
// edit task, not conditional. "Fix issues found in screenshot" is excluded by
|
|
910
|
+
// (b) — the primary verb is "Fix", screenshot is just context.
|
|
911
|
+
const VERIFICATION_KEYWORD_RE = /\b(screenshot|self[-\s]?critique|browser[-\s]?screenshot|visual\s*review)\b/i;
|
|
912
|
+
const VERIFICATION_LEADS_RE = /^\s*(screenshot|self[-\s]?critique|visual\s*review|browser[-\s]?screenshot)\b/i;
|
|
913
|
+
|
|
914
|
+
function isVerificationPrimarySubtask(subtask = {}) {
|
|
915
|
+
const title = String(subtask.title || '');
|
|
916
|
+
if (!VERIFICATION_KEYWORD_RE.test(title)) return false;
|
|
917
|
+
return VERIFICATION_LEADS_RE.test(title);
|
|
918
|
+
}
|
|
919
|
+
|
|
613
920
|
function subtaskRequiresFileChanges(subtask = {}) {
|
|
921
|
+
if (isVerificationPrimarySubtask(subtask)) return false;
|
|
614
922
|
const text = stripPathLikeTokens(`${subtask.title || ''}\n${subtask.prompt || ''}`);
|
|
615
923
|
if (!FILE_CHANGE_PROMPT_RE.test(text)) return false;
|
|
616
924
|
if (NO_CHANGE_TASK_RE.test(text) && !/\b(improve|fix|implement|update|change|edit|modify|apply|make|write|create|build)\b/i.test(text)) {
|
|
@@ -620,19 +928,81 @@ function subtaskRequiresFileChanges(subtask = {}) {
|
|
|
620
928
|
}
|
|
621
929
|
|
|
622
930
|
function toolCallHistoryFromLog(log = []) {
|
|
623
|
-
return (log || [])
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
931
|
+
return (log || []).flatMap((turn) => {
|
|
932
|
+
const results = turn.toolResults || [];
|
|
933
|
+
return (turn.toolCalls || []).map((call, index) => {
|
|
934
|
+
const resultRecord = results[index] || {};
|
|
935
|
+
return normalizeToolCallEvidence(call, resultRecord.result || resultRecord);
|
|
936
|
+
});
|
|
937
|
+
});
|
|
628
938
|
}
|
|
629
939
|
|
|
630
|
-
function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode, toolsAvailable, nudges = 0, maxNudges = 2, cwd } = {}) {
|
|
940
|
+
function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode, toolsAvailable, nudges = 0, maxNudges = 2, cwd, codingIntent, intent } = {}) {
|
|
941
|
+
if (isReadOnlyCodingIntent(codingIntent || intent)) return null;
|
|
631
942
|
if (!isActionRequiredPrompt(prompt, { mode })) return null;
|
|
632
943
|
|
|
633
944
|
const madeEdits = hasToolCall(toolCallHistory, EDIT_TOOL_NAMES);
|
|
945
|
+
const requiresVisualEvidence = /\b(?:website|web\s*page|frontend|ui|ux|visual|responsive|mobile|layout|css|html)\b/i.test(contentToText(prompt));
|
|
946
|
+
const hasSuccessfulScreenshot = (toolCallHistory || []).some((call) => (
|
|
947
|
+
callName(call) === 'browser_screenshot' && toolResultSucceeded(call)
|
|
948
|
+
));
|
|
949
|
+
const hasSuccessfulBrowserSmoke = (toolCallHistory || []).some((call) => (
|
|
950
|
+
callName(call) === 'browser_smoke_test' && toolResultSucceeded(call)
|
|
951
|
+
));
|
|
952
|
+
const requiresPdfEvidence = /\b(?:make|generate|create|export|render|design|format|style|polish|typeset|print|convert)\b[\s\S]{0,80}\bpdf\b|\bpdf\b[\s\S]{0,80}\b(?:make|generate|create|export|render|design|format|style|polish|typeset|print|convert)\b/i.test(contentToText(prompt));
|
|
953
|
+
const touchedPdfFlow = madeEdits || hasToolCall(toolCallHistory, new Set(['run_shell', 'pdf_info', 'pdf_read_pages', 'pdf_render_pages']));
|
|
954
|
+
const hasSuccessfulPdfArtifact = (toolCallHistory || []).some((call) => {
|
|
955
|
+
const name = callName(call);
|
|
956
|
+
if (name !== 'make_pdf' && name !== 'pdf_info') return false;
|
|
957
|
+
if (!toolResultSucceeded(call)) return false;
|
|
958
|
+
const result = call.result && typeof call.result === 'object' ? call.result : call;
|
|
959
|
+
return Boolean(result.path || result.artifact?.path || result.bytes || result.sha256);
|
|
960
|
+
});
|
|
961
|
+
if (touchedPdfFlow && requiresPdfEvidence && !hasSuccessfulPdfArtifact && !isVerificationBlockerResponse(content)) {
|
|
962
|
+
const reason = hasFailedVerificationAttempt(toolCallHistory)
|
|
963
|
+
? 'The assistant worked on PDF/document output but PDF artifact verification failed or produced no successful evidence.'
|
|
964
|
+
: 'The assistant worked on PDF/document output but ended before successful PDF artifact verification.';
|
|
965
|
+
if (!toolsAvailable) return { action: 'fail', reason: `${reason} No tool turns remain.` };
|
|
966
|
+
if (nudges >= maxNudges) return { action: 'fail', reason: `${reason} Verification continuation limit reached.` };
|
|
967
|
+
return {
|
|
968
|
+
action: 'continue',
|
|
969
|
+
reason,
|
|
970
|
+
message: `[SYSTEM] ${reason} This is not complete.\n` +
|
|
971
|
+
`For PDF/document generation, call make_pdf or otherwise validate the generated PDF with pdf_info and render at least one page with pdf_render_pages before claiming success.\n` +
|
|
972
|
+
`If PDF verification is genuinely impossible, state the exact failed tool result and do not claim the PDF is done.\n` +
|
|
973
|
+
`Working directory: ${cwd}`,
|
|
974
|
+
};
|
|
975
|
+
}
|
|
976
|
+
if (madeEdits && requiresVisualEvidence && (!hasSuccessfulScreenshot || !hasSuccessfulBrowserSmoke) && !isVerificationBlockerResponse(content)) {
|
|
977
|
+
const reason = hasFailedVerificationAttempt(toolCallHistory)
|
|
978
|
+
? 'The assistant made frontend/UI changes but browser verification failed or produced incomplete evidence.'
|
|
979
|
+
: 'The assistant made frontend/UI changes but ended before successful browser screenshot and runtime smoke verification.';
|
|
980
|
+
if (!toolsAvailable) {
|
|
981
|
+
return {
|
|
982
|
+
action: 'fail',
|
|
983
|
+
reason: `${reason} No tool turns remain.`,
|
|
984
|
+
};
|
|
985
|
+
}
|
|
986
|
+
if (nudges >= maxNudges) {
|
|
987
|
+
return {
|
|
988
|
+
action: 'fail',
|
|
989
|
+
reason: `${reason} Verification continuation limit reached.`,
|
|
990
|
+
};
|
|
991
|
+
}
|
|
992
|
+
return {
|
|
993
|
+
action: 'continue',
|
|
994
|
+
reason,
|
|
995
|
+
message: `[SYSTEM] ${reason} This is not complete.\n` +
|
|
996
|
+
`For website/UI/UX/frontend work, capture browser_screenshot and run browser_smoke_test at the relevant file:// or verified local URL before claiming success. If a server is needed, use start_static_server then check_url before browser verification.\n` +
|
|
997
|
+
`If browser verification is genuinely impossible, state that blocker explicitly with the failed tool result and do not claim the website is ready.\n` +
|
|
998
|
+
`Working directory: ${cwd}`,
|
|
999
|
+
};
|
|
1000
|
+
}
|
|
1001
|
+
const failedVerification = hasFailedVerificationAttempt(toolCallHistory);
|
|
634
1002
|
if (madeEdits && !hasVerificationEvidence(toolCallHistory) && !isVerificationBlockerResponse(content)) {
|
|
635
|
-
const reason =
|
|
1003
|
+
const reason = failedVerification
|
|
1004
|
+
? 'The assistant made file changes but verification failed or produced no successful evidence.'
|
|
1005
|
+
: 'The assistant made file changes but ended before running verification.';
|
|
636
1006
|
if (!toolsAvailable) {
|
|
637
1007
|
return {
|
|
638
1008
|
action: 'fail',
|
|
@@ -649,7 +1019,7 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
|
|
|
649
1019
|
action: 'continue',
|
|
650
1020
|
reason,
|
|
651
1021
|
message: `[SYSTEM] ${reason} This is not complete.\n` +
|
|
652
|
-
`Run the relevant verification now: tests, lint, build, typecheck,
|
|
1022
|
+
`Run the relevant verification now: tests, lint, build, typecheck, check_url/browser_screenshot for websites, or at minimum git diff --check when no project test exists.\n` +
|
|
653
1023
|
`Only summarize success after a tool result proves the work. If verification is genuinely impossible, state the blocker with tool-backed evidence.\n` +
|
|
654
1024
|
`Working directory: ${cwd}`,
|
|
655
1025
|
};
|
|
@@ -657,8 +1027,41 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
|
|
|
657
1027
|
if (madeEdits) return null;
|
|
658
1028
|
if (isLegitimateNoEditResponse(content, toolCallHistory)) return null;
|
|
659
1029
|
|
|
1030
|
+
const ranCodingExecution = hasToolCall(toolCallHistory, CODING_EXECUTION_TOOL_NAMES);
|
|
660
1031
|
const didMeaningfulAction = hasToolCall(toolCallHistory, MEANINGFUL_ACTION_TOOL_NAMES);
|
|
661
1032
|
const premature = isPrematureActionResponse(content);
|
|
1033
|
+
if (promptRequiresFileChanges(prompt, { mode }) && !ranCodingExecution) {
|
|
1034
|
+
const reason = !toolCallHistory.length
|
|
1035
|
+
? 'The assistant ended an action-oriented coding turn without using any tools.'
|
|
1036
|
+
: onlySetupToolCalls(toolCallHistory)
|
|
1037
|
+
? 'The assistant only loaded skills or capability context and did not execute the requested coding change.'
|
|
1038
|
+
: premature
|
|
1039
|
+
? 'The assistant ended with prospective work instead of executing it.'
|
|
1040
|
+
: 'The assistant inspected or diagnosed the requested change but did not execute a coding change.';
|
|
1041
|
+
|
|
1042
|
+
if (!toolsAvailable) {
|
|
1043
|
+
return {
|
|
1044
|
+
action: 'fail',
|
|
1045
|
+
reason: `${reason} No tool turns remain.`,
|
|
1046
|
+
};
|
|
1047
|
+
}
|
|
1048
|
+
|
|
1049
|
+
if (nudges >= maxNudges) {
|
|
1050
|
+
return {
|
|
1051
|
+
action: 'fail',
|
|
1052
|
+
reason: `${reason} Coding-execution continuation limit reached.`,
|
|
1053
|
+
};
|
|
1054
|
+
}
|
|
1055
|
+
|
|
1056
|
+
return {
|
|
1057
|
+
action: 'continue',
|
|
1058
|
+
reason,
|
|
1059
|
+
message: `[SYSTEM] ${reason} This is not complete.\n` +
|
|
1060
|
+
`Use the available tools now. In Wall-E chat, call start_coding for coding-agent work; in coding mode, inspect files, then edit/write/apply_patch to make the change, and run relevant verification.\n` +
|
|
1061
|
+
`Do not end with a plan, audit, diagnostic report, or "I will..." statement. Finish only after work is executed, or state a concrete blocker/no-change reason backed by tool results.\n` +
|
|
1062
|
+
`Working directory: ${cwd}`,
|
|
1063
|
+
};
|
|
1064
|
+
}
|
|
662
1065
|
if (didMeaningfulAction && !premature) return null;
|
|
663
1066
|
|
|
664
1067
|
const reason = !toolCallHistory.length
|
|
@@ -692,6 +1095,18 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
|
|
|
692
1095
|
}
|
|
693
1096
|
|
|
694
1097
|
function changedFilesSince(cwd, baseline = new Set()) {
|
|
1098
|
+
// Structured baseline from captureChangedFilesBaseline() — handles git AND non-git cwds.
|
|
1099
|
+
if (baseline && typeof baseline === 'object' && !(baseline instanceof Set)
|
|
1100
|
+
&& !Array.isArray(baseline) && typeof baseline.isGit === 'boolean') {
|
|
1101
|
+
if (!baseline.isGit) {
|
|
1102
|
+
// Non-git working dir: `git status` throws here (caught → empty set), so the agent's
|
|
1103
|
+
// real writes would silently report as []. Detect created/modified files by mtime.
|
|
1104
|
+
return collectFilesModifiedSince(cwd, baseline.startedAtMs || 0);
|
|
1105
|
+
}
|
|
1106
|
+
const before = baseline.dirty instanceof Set ? baseline.dirty : new Set(baseline.dirty || []);
|
|
1107
|
+
return [...getGitChangedFiles(cwd)].filter((rel) => !before.has(rel));
|
|
1108
|
+
}
|
|
1109
|
+
// Legacy: a Set/array of pre-existing dirty git paths.
|
|
695
1110
|
const before = baseline instanceof Set ? baseline : new Set(baseline || []);
|
|
696
1111
|
return [...getGitChangedFiles(cwd)].filter((rel) => !before.has(rel));
|
|
697
1112
|
}
|
|
@@ -700,6 +1115,25 @@ function isTimeoutOnlyOutput(output) {
|
|
|
700
1115
|
return /^\s*\[Timeout reached\]\s*$/i.test(contentToText(output));
|
|
701
1116
|
}
|
|
702
1117
|
|
|
1118
|
+
// Resolve the run's wall-clock timeout (ms). An explicit timeoutMs always wins.
|
|
1119
|
+
// Otherwise: headless/automated runs keep the 300s safety cap so CI/background work
|
|
1120
|
+
// can't hang; interactive runs (a user is present) get 0 = "no deadline" so they run
|
|
1121
|
+
// until the agent finishes or the user stops them (matches Claude Code / opencode).
|
|
1122
|
+
// Single source of truth for "is a human present this turn?" — used by both the run
|
|
1123
|
+
// deadline (resolveRunTimeoutMs) and the in-loop permission/acceptance behavior, so the
|
|
1124
|
+
// two can never drift. Interactive = no headless/benchmark and not explicitly opted out.
|
|
1125
|
+
function isInteractiveRun(opts = {}) {
|
|
1126
|
+
if (opts.interactive === true) return true;
|
|
1127
|
+
if (opts.interactive === false) return false;
|
|
1128
|
+
return !opts.headless && !opts.benchmark;
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
function resolveRunTimeoutMs(opts = {}) {
|
|
1132
|
+
if (opts.timeoutMs) return opts.timeoutMs;
|
|
1133
|
+
const isInteractive = isInteractiveRun(opts);
|
|
1134
|
+
return isInteractive ? 0 : 300000;
|
|
1135
|
+
}
|
|
1136
|
+
|
|
703
1137
|
function providerSupportsToolCalls(provider) {
|
|
704
1138
|
if (!provider) return true;
|
|
705
1139
|
if (provider.capabilities?.tools === false) return false;
|
|
@@ -763,6 +1197,25 @@ function createCodingCompactionService(provider, modelId, opts = {}) {
|
|
|
763
1197
|
});
|
|
764
1198
|
}
|
|
765
1199
|
|
|
1200
|
+
// A `stop` user hook can refuse to let the run finish (e.g. "tests must
|
|
1201
|
+
// pass"). Bounded: a flaky or unsatisfiable hook must not loop the agent
|
|
1202
|
+
// forever — after MAX_STOP_HOOK_BOUNCES the honest-failure path proceeds.
|
|
1203
|
+
const MAX_STOP_HOOK_BOUNCES = 3;
|
|
1204
|
+
|
|
1205
|
+
async function evaluateStopGate({ userHooks, log, sessionId, cwd, mode, turn, text }) {
|
|
1206
|
+
if (!userHooks || typeof userHooks.hasHooks !== 'function' || !userHooks.hasHooks('stop')) return null;
|
|
1207
|
+
const bounces = log._stopHookBounces || 0;
|
|
1208
|
+
if (bounces >= MAX_STOP_HOOK_BOUNCES) return null;
|
|
1209
|
+
const verdict = await userHooks.run('stop', { sessionId, cwd, mode, turn, text });
|
|
1210
|
+
if (verdict.decision !== 'deny') return null;
|
|
1211
|
+
log._stopHookBounces = bounces + 1;
|
|
1212
|
+
return {
|
|
1213
|
+
reason: verdict.reason,
|
|
1214
|
+
message: `A stop hook rejected finishing this task (attempt ${bounces + 1}/${MAX_STOP_HOOK_BOUNCES}): ${verdict.reason || 'no reason given'}\n` +
|
|
1215
|
+
'Address the issue, then finish. If it is genuinely unresolvable, explain the exact blocker in your final summary.',
|
|
1216
|
+
};
|
|
1217
|
+
}
|
|
1218
|
+
|
|
766
1219
|
async function maybeCompactCodingContext({
|
|
767
1220
|
messages,
|
|
768
1221
|
compactionService,
|
|
@@ -775,13 +1228,43 @@ async function maybeCompactCodingContext({
|
|
|
775
1228
|
mode,
|
|
776
1229
|
step = -1,
|
|
777
1230
|
sessionMemory,
|
|
1231
|
+
userHooks = null,
|
|
778
1232
|
reason = 'context_threshold',
|
|
779
1233
|
opts = {},
|
|
780
1234
|
} = {}) {
|
|
781
1235
|
if (!compactionService || !Array.isArray(messages) || messages.length < 2) return null;
|
|
782
1236
|
const systemTokens = estimateTokens(systemPrompt || '');
|
|
783
1237
|
const estimatedInputTokens = systemTokens + estimateMessagesTokens(messages);
|
|
784
|
-
|
|
1238
|
+
|
|
1239
|
+
// Cheap layer first: truncate OLD tool outputs before reaching for LLM
|
|
1240
|
+
// summarization. Rewriting old messages resets the prompt-cache prefix,
|
|
1241
|
+
// but pruning fires rarely (threshold crossing) and shrinks input enough
|
|
1242
|
+
// to amortize the one-turn cache miss.
|
|
1243
|
+
let pruneDetail = null;
|
|
1244
|
+
if (typeof compactionService.shouldPrune === 'function'
|
|
1245
|
+
&& typeof compactionService.prune === 'function'
|
|
1246
|
+
&& compactionService.shouldPrune({ messages, systemTokens })) {
|
|
1247
|
+
const pruneResult = compactionService.prune(messages);
|
|
1248
|
+
if (pruneResult?.pruned && Array.isArray(pruneResult.messages)) {
|
|
1249
|
+
messages.splice(0, messages.length, ...pruneResult.messages);
|
|
1250
|
+
pruneDetail = {
|
|
1251
|
+
prunedBlocks: pruneResult.prunedBlocks,
|
|
1252
|
+
tokensBefore: pruneResult.tokensBefore,
|
|
1253
|
+
tokensAfter: pruneResult.tokensAfter,
|
|
1254
|
+
};
|
|
1255
|
+
events?.emit?.('context.pruned', { sessionId, reason, ...pruneDetail });
|
|
1256
|
+
emitProgress?.({
|
|
1257
|
+
phase: mode || 'executing',
|
|
1258
|
+
step,
|
|
1259
|
+
message: `Pruned ${pruneResult.prunedBlocks} old tool output(s) (~${Math.max(0, pruneResult.tokensBefore - pruneResult.tokensAfter)} tokens)`,
|
|
1260
|
+
detail: pruneDetail,
|
|
1261
|
+
});
|
|
1262
|
+
}
|
|
1263
|
+
}
|
|
1264
|
+
|
|
1265
|
+
if (!compactionService.shouldCompact({ messages, systemTokens })) {
|
|
1266
|
+
return pruneDetail ? { compacted: false, pruned: true, ...pruneDetail } : null;
|
|
1267
|
+
}
|
|
785
1268
|
|
|
786
1269
|
emitProgress?.({
|
|
787
1270
|
phase: mode || 'executing',
|
|
@@ -789,6 +1272,8 @@ async function maybeCompactCodingContext({
|
|
|
789
1272
|
message: 'Compacting coding context...',
|
|
790
1273
|
});
|
|
791
1274
|
|
|
1275
|
+
if (userHooks?.runObserved) await userHooks.runObserved('pre_compact', { sessionId, cwd, reason });
|
|
1276
|
+
|
|
792
1277
|
const result = await compactionService.compact(messages, {
|
|
793
1278
|
sessionId,
|
|
794
1279
|
cwd,
|
|
@@ -849,6 +1334,16 @@ async function runCliFallback(prompt, opts = {}, { sid, cwd, reason, fromProvide
|
|
|
849
1334
|
detail: { reason, fromProvider },
|
|
850
1335
|
});
|
|
851
1336
|
}
|
|
1337
|
+
// Forward the run's auto-approval intent to the spawned CLI. The stream-native path
|
|
1338
|
+
// answers tool-permission requests in-process via headlessPolicy; the CLI fallback
|
|
1339
|
+
// spawns a real `claude`, so unless it is told to bypass permissions it silently
|
|
1340
|
+
// stalls in ask-mode and writes nothing. Mirror runAgentLoop's effective policy
|
|
1341
|
+
// (see headlessPolicy default below): an explicit opts.permissionMode wins, else
|
|
1342
|
+
// headlessPolicy:'allow' (or a benchmark run) maps to bypassPermissions; any other
|
|
1343
|
+
// policy leaves the CLI's default ask-mode intact.
|
|
1344
|
+
const effectiveHeadlessPolicy = opts.headlessPolicy || (opts.benchmark ? 'allow' : 'reject');
|
|
1345
|
+
const permissionMode = opts.permissionMode
|
|
1346
|
+
|| (effectiveHeadlessPolicy === 'allow' ? 'bypassPermissions' : undefined);
|
|
852
1347
|
const result = await runHeadless(prompt, {
|
|
853
1348
|
cwd,
|
|
854
1349
|
sessionId: sid,
|
|
@@ -857,6 +1352,8 @@ async function runCliFallback(prompt, opts = {}, { sid, cwd, reason, fromProvide
|
|
|
857
1352
|
runnerId,
|
|
858
1353
|
model,
|
|
859
1354
|
mode: opts.mode || 'build',
|
|
1355
|
+
permissionMode,
|
|
1356
|
+
maxTurns: opts.maxTurns,
|
|
860
1357
|
});
|
|
861
1358
|
return {
|
|
862
1359
|
...result,
|
|
@@ -1031,6 +1528,226 @@ function collectEmptyChangedFiles(cwd, changedFiles) {
|
|
|
1031
1528
|
return empties;
|
|
1032
1529
|
}
|
|
1033
1530
|
|
|
1531
|
+
function changedFilesTouchFrontend(files = []) {
|
|
1532
|
+
return (files || []).some((file) => isFrontendFile(file));
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1535
|
+
function emitAcceptanceValidatorProgress(onProgress, event = {}) {
|
|
1536
|
+
const payload = {
|
|
1537
|
+
type: 'acceptance_validator',
|
|
1538
|
+
phase: 'validating',
|
|
1539
|
+
step: event.step ?? -1,
|
|
1540
|
+
validator: event.validator || '',
|
|
1541
|
+
status: event.status || '',
|
|
1542
|
+
message: event.message || '',
|
|
1543
|
+
detail: event.detail || {},
|
|
1544
|
+
};
|
|
1545
|
+
try { onProgress?.(payload); } catch {}
|
|
1546
|
+
try {
|
|
1547
|
+
safeTelemetry()?.track?.('coding_acceptance_validator', {
|
|
1548
|
+
validator: payload.validator,
|
|
1549
|
+
status: payload.status,
|
|
1550
|
+
task_kind: event.taskKind || '',
|
|
1551
|
+
failures: event.failures || 0,
|
|
1552
|
+
});
|
|
1553
|
+
} catch {}
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
function screenshotEvidenceExists(screenshots = [], toolCallHistory = []) {
|
|
1557
|
+
if (Array.isArray(screenshots) && screenshots.some((shot) => shot && (shot.path || shot.url))) return true;
|
|
1558
|
+
return (toolCallHistory || []).some((call) => (
|
|
1559
|
+
callName(call) === 'browser_screenshot' && toolResultSucceeded(call)
|
|
1560
|
+
));
|
|
1561
|
+
}
|
|
1562
|
+
|
|
1563
|
+
async function runAcceptanceValidators({
|
|
1564
|
+
cwd,
|
|
1565
|
+
contract,
|
|
1566
|
+
changedFiles = [],
|
|
1567
|
+
screenshots = [],
|
|
1568
|
+
toolCallHistory = [],
|
|
1569
|
+
autoBrowser = false,
|
|
1570
|
+
requireBrowserRuntime = false,
|
|
1571
|
+
onProgress,
|
|
1572
|
+
step = -1,
|
|
1573
|
+
} = {}) {
|
|
1574
|
+
const validators = [];
|
|
1575
|
+
const concerns = [];
|
|
1576
|
+
const report = {
|
|
1577
|
+
ok: true,
|
|
1578
|
+
validators,
|
|
1579
|
+
concerns,
|
|
1580
|
+
frontend: null,
|
|
1581
|
+
};
|
|
1582
|
+
if (!contract?.requiresFrontendValidation) return report;
|
|
1583
|
+
|
|
1584
|
+
emitAcceptanceValidatorProgress(onProgress, {
|
|
1585
|
+
step,
|
|
1586
|
+
validator: 'frontend.static_contract',
|
|
1587
|
+
status: 'started',
|
|
1588
|
+
taskKind: contract.taskKind,
|
|
1589
|
+
});
|
|
1590
|
+
const staticVerdict = checkFrontendStaticContracts(cwd, changedFiles);
|
|
1591
|
+
report.frontend = { static: staticVerdict };
|
|
1592
|
+
if (!staticVerdict.ok) {
|
|
1593
|
+
const failure = validatorFailure(
|
|
1594
|
+
'frontend.static_contract',
|
|
1595
|
+
`Frontend static contract failed: ${staticVerdict.concerns.slice(0, 3).join('; ')}`,
|
|
1596
|
+
staticVerdict
|
|
1597
|
+
);
|
|
1598
|
+
validators.push(failure);
|
|
1599
|
+
concerns.push(...staticVerdict.concerns);
|
|
1600
|
+
emitAcceptanceValidatorProgress(onProgress, {
|
|
1601
|
+
step,
|
|
1602
|
+
validator: failure.name,
|
|
1603
|
+
status: 'failed',
|
|
1604
|
+
message: failure.message,
|
|
1605
|
+
taskKind: contract.taskKind,
|
|
1606
|
+
failures: staticVerdict.concerns.length,
|
|
1607
|
+
});
|
|
1608
|
+
report.ok = false;
|
|
1609
|
+
return report;
|
|
1610
|
+
}
|
|
1611
|
+
validators.push(validatorPass('frontend.static_contract', 'Frontend static contract passed', staticVerdict));
|
|
1612
|
+
emitAcceptanceValidatorProgress(onProgress, {
|
|
1613
|
+
step,
|
|
1614
|
+
validator: 'frontend.static_contract',
|
|
1615
|
+
status: 'passed',
|
|
1616
|
+
taskKind: contract.taskKind,
|
|
1617
|
+
});
|
|
1618
|
+
|
|
1619
|
+
const hasScreenshot = screenshotEvidenceExists(screenshots, toolCallHistory);
|
|
1620
|
+
if (!hasScreenshot) {
|
|
1621
|
+
const failure = validatorFailure(
|
|
1622
|
+
'frontend.screenshot_evidence',
|
|
1623
|
+
'Frontend verification failed: no successful browser_screenshot evidence captured',
|
|
1624
|
+
{ screenshots: screenshots.length }
|
|
1625
|
+
);
|
|
1626
|
+
validators.push(failure);
|
|
1627
|
+
concerns.push('[frontend-visual] No successful browser_screenshot evidence captured for frontend changes');
|
|
1628
|
+
report.ok = false;
|
|
1629
|
+
} else {
|
|
1630
|
+
validators.push(validatorPass('frontend.screenshot_evidence', 'Frontend screenshot evidence present', {
|
|
1631
|
+
screenshots: screenshots.length,
|
|
1632
|
+
}));
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
const smokeEvidence = collectToolEvidence(toolCallHistory, 'browser_smoke_test');
|
|
1636
|
+
const failedSmoke = smokeEvidence.find((item) => !item.ok);
|
|
1637
|
+
if (failedSmoke) {
|
|
1638
|
+
const failure = validatorFailure(
|
|
1639
|
+
'frontend.browser_runtime',
|
|
1640
|
+
'Frontend browser runtime smoke test failed',
|
|
1641
|
+
failedSmoke.result
|
|
1642
|
+
);
|
|
1643
|
+
validators.push(failure);
|
|
1644
|
+
concerns.push('[frontend-runtime] Browser runtime smoke test failed');
|
|
1645
|
+
report.ok = false;
|
|
1646
|
+
return report;
|
|
1647
|
+
}
|
|
1648
|
+
if (smokeEvidence.some((item) => item.ok)) {
|
|
1649
|
+
validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke evidence present', {
|
|
1650
|
+
evidence: smokeEvidence.length,
|
|
1651
|
+
}));
|
|
1652
|
+
return report;
|
|
1653
|
+
}
|
|
1654
|
+
|
|
1655
|
+
if (!requireBrowserRuntime) {
|
|
1656
|
+
validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke deferred to final gate', {
|
|
1657
|
+
deferred: true,
|
|
1658
|
+
}));
|
|
1659
|
+
return report;
|
|
1660
|
+
}
|
|
1661
|
+
|
|
1662
|
+
if (!autoBrowser) {
|
|
1663
|
+
const failure = validatorFailure(
|
|
1664
|
+
'frontend.browser_runtime',
|
|
1665
|
+
'Frontend verification failed: no successful browser_smoke_test evidence captured',
|
|
1666
|
+
{}
|
|
1667
|
+
);
|
|
1668
|
+
validators.push(failure);
|
|
1669
|
+
concerns.push('[frontend-runtime] No successful browser_smoke_test evidence captured');
|
|
1670
|
+
report.ok = false;
|
|
1671
|
+
return report;
|
|
1672
|
+
}
|
|
1673
|
+
|
|
1674
|
+
const entrypoints = resolveFrontendEntrypoints(cwd, changedFiles);
|
|
1675
|
+
if (entrypoints.length === 0) {
|
|
1676
|
+
validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke skipped: no HTML entrypoint found', {
|
|
1677
|
+
skipped: true,
|
|
1678
|
+
}));
|
|
1679
|
+
return report;
|
|
1680
|
+
}
|
|
1681
|
+
|
|
1682
|
+
const localTools = getLocalTools();
|
|
1683
|
+
if (!localTools.findChromeExecutable()) {
|
|
1684
|
+
const failure = validatorFailure(
|
|
1685
|
+
'frontend.browser_runtime',
|
|
1686
|
+
'Frontend browser runtime smoke test blocked: no Chromium-based browser found',
|
|
1687
|
+
{ entrypoints }
|
|
1688
|
+
);
|
|
1689
|
+
validators.push(failure);
|
|
1690
|
+
concerns.push('[frontend-runtime] No Chromium-based browser available for browser_smoke_test');
|
|
1691
|
+
report.ok = false;
|
|
1692
|
+
return report;
|
|
1693
|
+
}
|
|
1694
|
+
|
|
1695
|
+
const smokeResults = [];
|
|
1696
|
+
for (const entrypoint of entrypoints.slice(0, 2)) {
|
|
1697
|
+
for (const viewport of ['desktop', 'mobile']) {
|
|
1698
|
+
emitAcceptanceValidatorProgress(onProgress, {
|
|
1699
|
+
step,
|
|
1700
|
+
validator: 'frontend.browser_runtime',
|
|
1701
|
+
status: 'started',
|
|
1702
|
+
taskKind: contract.taskKind,
|
|
1703
|
+
detail: { entrypoint, viewport },
|
|
1704
|
+
});
|
|
1705
|
+
const result = await localTools.browserSmokeTest({
|
|
1706
|
+
url: pathToFileURL(entrypoint).href,
|
|
1707
|
+
viewport,
|
|
1708
|
+
max_clicks: 25,
|
|
1709
|
+
settle_ms: 750,
|
|
1710
|
+
});
|
|
1711
|
+
smokeResults.push(result);
|
|
1712
|
+
if (!result.ok) {
|
|
1713
|
+
const failure = validatorFailure(
|
|
1714
|
+
'frontend.browser_runtime',
|
|
1715
|
+
`Frontend browser runtime smoke failed for ${path.relative(cwd, entrypoint)} (${viewport})`,
|
|
1716
|
+
result
|
|
1717
|
+
);
|
|
1718
|
+
validators.push(failure);
|
|
1719
|
+
concerns.push(...(result.failures || []).slice(0, 5).map((item) => (
|
|
1720
|
+
`[frontend-runtime] ${item.type || 'failure'} ${item.exception || item.args || item.errorText || item.error || ''}`.trim()
|
|
1721
|
+
)));
|
|
1722
|
+
if (concerns.length === 0) concerns.push(`[frontend-runtime] ${failure.message}`);
|
|
1723
|
+
report.ok = false;
|
|
1724
|
+
report.frontend.browserSmoke = smokeResults;
|
|
1725
|
+
emitAcceptanceValidatorProgress(onProgress, {
|
|
1726
|
+
step,
|
|
1727
|
+
validator: failure.name,
|
|
1728
|
+
status: 'failed',
|
|
1729
|
+
message: failure.message,
|
|
1730
|
+
taskKind: contract.taskKind,
|
|
1731
|
+
failures: result.failures?.length || 1,
|
|
1732
|
+
});
|
|
1733
|
+
return report;
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
1736
|
+
}
|
|
1737
|
+
report.frontend.browserSmoke = smokeResults;
|
|
1738
|
+
validators.push(validatorPass('frontend.browser_runtime', 'Frontend browser runtime smoke passed', {
|
|
1739
|
+
entrypoints: entrypoints.map((file) => path.relative(cwd, file)),
|
|
1740
|
+
runs: smokeResults.length,
|
|
1741
|
+
}));
|
|
1742
|
+
emitAcceptanceValidatorProgress(onProgress, {
|
|
1743
|
+
step,
|
|
1744
|
+
validator: 'frontend.browser_runtime',
|
|
1745
|
+
status: 'passed',
|
|
1746
|
+
taskKind: contract.taskKind,
|
|
1747
|
+
});
|
|
1748
|
+
return report;
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1034
1751
|
function collectEditedFilePaths(toolName, args = {}, result = {}) {
|
|
1035
1752
|
const editTools = new Set(['edit_file', 'write_file', 'apply_patch', 'multi_edit']);
|
|
1036
1753
|
if (!editTools.has(toolName)) return [];
|
|
@@ -1150,10 +1867,11 @@ async function shutdownPostEditMiddleware(state) {
|
|
|
1150
1867
|
async function runAgentLoop(prompt, opts = {}) {
|
|
1151
1868
|
const { cwd, timeoutMs, maxTurns, provider, model, tools, onProgress } = opts;
|
|
1152
1869
|
const explicitProvider = !!provider;
|
|
1153
|
-
const sid = opts._resumeSessionId || crypto.randomUUID();
|
|
1870
|
+
const sid = opts._resumeSessionId || opts.runSessionId || opts.agentRunId || crypto.randomUUID();
|
|
1871
|
+
const codingIntent = opts.codingIntent || opts.intent || null;
|
|
1154
1872
|
|
|
1155
1873
|
// Persist activity start (Phase 2: Activity History)
|
|
1156
|
-
const isResume =
|
|
1874
|
+
const isResume = Boolean(opts._resumeSessionId && opts._resumeMessages);
|
|
1157
1875
|
try { getActivityLog().log({ session_id: sid, type: isResume ? 'coding_resume' : 'coding_start', title: isResume ? 'Coding session resumed' : 'Coding session started', body: prompt.slice(0, 200) }); } catch {}
|
|
1158
1876
|
|
|
1159
1877
|
// Helper: emit progress both to callback and global emitter
|
|
@@ -1164,8 +1882,34 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1164
1882
|
// Also forward to per-task event bus if available (A3 unification)
|
|
1165
1883
|
if (events) events.emit('progress', full);
|
|
1166
1884
|
}
|
|
1167
|
-
const
|
|
1168
|
-
const
|
|
1885
|
+
const externalSignal = opts.signal || opts.abortSignal || null;
|
|
1886
|
+
const throwIfExternalAbort = () => {
|
|
1887
|
+
if (!externalSignal?.aborted) return;
|
|
1888
|
+
const err = new Error('Cancelled');
|
|
1889
|
+
err.code = 'WALLE_CANCELLED';
|
|
1890
|
+
throw err;
|
|
1891
|
+
};
|
|
1892
|
+
const linkExternalAbort = (controller) => {
|
|
1893
|
+
if (!externalSignal || !controller) return () => {};
|
|
1894
|
+
if (externalSignal.aborted) {
|
|
1895
|
+
try { controller.abort(); } catch {}
|
|
1896
|
+
return () => {};
|
|
1897
|
+
}
|
|
1898
|
+
const onAbort = () => {
|
|
1899
|
+
try { controller.abort(); } catch {}
|
|
1900
|
+
};
|
|
1901
|
+
externalSignal.addEventListener('abort', onAbort, { once: true });
|
|
1902
|
+
return () => {
|
|
1903
|
+
try { externalSignal.removeEventListener('abort', onAbort); } catch {}
|
|
1904
|
+
};
|
|
1905
|
+
};
|
|
1906
|
+
// Interactive sessions (a user is watching and approving) must not be killed by a
|
|
1907
|
+
// wall-clock deadline — they run until the agent finishes or the user stops them,
|
|
1908
|
+
// like Claude Code / opencode. The 300s default is only for headless/automated
|
|
1909
|
+
// runs (so CI/background work can't hang). An explicit timeoutMs always wins.
|
|
1910
|
+
const interactiveRun = isInteractiveRun(opts);
|
|
1911
|
+
const timeout = resolveRunTimeoutMs(opts);
|
|
1912
|
+
const deadline = timeout > 0 ? Date.now() + timeout : Infinity;
|
|
1169
1913
|
let turns = maxTurns || MAX_AGENT_TURNS;
|
|
1170
1914
|
const log = []; // training data: every turn logged
|
|
1171
1915
|
|
|
@@ -1178,8 +1922,10 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1178
1922
|
}
|
|
1179
1923
|
const modelId = resolveModelId(model, llm);
|
|
1180
1924
|
const resolvedCwd = realpathBestEffort(cwd || process.cwd());
|
|
1181
|
-
|
|
1182
|
-
|
|
1925
|
+
// Baseline for post-run change detection. Works in non-git cwds too (mtime-based) so
|
|
1926
|
+
// an agent that writes into a plain folder doesn't report changedFiles: [].
|
|
1927
|
+
const preRunFileBaseline = captureChangedFilesBaseline(resolvedCwd);
|
|
1928
|
+
const wallERuntimeProfile = resolveWallERuntimeProfile({
|
|
1183
1929
|
...opts,
|
|
1184
1930
|
channel: opts.channel || 'coding',
|
|
1185
1931
|
agentMode: opts.agentMode || 'coding',
|
|
@@ -1190,16 +1936,30 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1190
1936
|
chatSessionId: opts.chatSessionId || opts.session_id || '',
|
|
1191
1937
|
cwd: resolvedCwd,
|
|
1192
1938
|
});
|
|
1193
|
-
|
|
1939
|
+
const agentRunContext = wallERuntimeProfile.context;
|
|
1940
|
+
emitAgentRunContextWarnings({ ...agentRunContext, warnings: wallERuntimeProfile.warnings }, { telemetry: safeTelemetry() });
|
|
1194
1941
|
const promptCapabilityHints = opts.promptCapabilityHints || parsePromptCapabilityHints(prompt);
|
|
1195
1942
|
const capabilities = resolveCodingCapabilities({ ...opts, promptCapabilityHints }, {
|
|
1196
1943
|
cwd: resolvedCwd,
|
|
1197
1944
|
brain: opts.brain || null,
|
|
1198
1945
|
});
|
|
1199
1946
|
const taskFileHints = extractTaskFileHints(prompt);
|
|
1947
|
+
const artifactCapabilities = routeArtifactCapabilities({
|
|
1948
|
+
prompt,
|
|
1949
|
+
taskFileHints,
|
|
1950
|
+
projectInfo: null,
|
|
1951
|
+
});
|
|
1200
1952
|
const runtimeMode = resolveRuntimeMode(opts);
|
|
1201
1953
|
const baseTools = Array.isArray(tools) ? tools : getToolsForMode(opts.mode || 'build');
|
|
1202
1954
|
const requestedTools = filterToolsForRuntimeMode(baseTools, runtimeMode);
|
|
1955
|
+
const transcriptMessageOwner = String(opts.transcriptMessageOwner || opts.transcript_message_owner || '').toLowerCase();
|
|
1956
|
+
const externalTranscriptMessages = opts.externalTranscriptMessages === true
|
|
1957
|
+
|| opts.external_transcript_messages === true
|
|
1958
|
+
|| opts.skipTranscriptMessages === true
|
|
1959
|
+
|| opts.skip_transcript_messages === true
|
|
1960
|
+
|| transcriptMessageOwner === 'ctm'
|
|
1961
|
+
|| transcriptMessageOwner === 'host'
|
|
1962
|
+
|| transcriptMessageOwner === 'external';
|
|
1203
1963
|
const transcript = createCodingTranscript({
|
|
1204
1964
|
transcript: opts.transcript,
|
|
1205
1965
|
persistTranscript: opts.persistTranscript,
|
|
@@ -1230,7 +1990,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1230
1990
|
mode: opts.mode || '',
|
|
1231
1991
|
});
|
|
1232
1992
|
}
|
|
1233
|
-
if (!opts._resumeMessages && transcript?.appendUserMessage) {
|
|
1993
|
+
if (!externalTranscriptMessages && !opts._resumeMessages && transcript?.appendUserMessage) {
|
|
1234
1994
|
transcript.appendUserMessage(prompt, {
|
|
1235
1995
|
sessionId: sid,
|
|
1236
1996
|
cwd: resolvedCwd,
|
|
@@ -1288,7 +2048,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1288
2048
|
}
|
|
1289
2049
|
} catch {}
|
|
1290
2050
|
|
|
1291
|
-
if (isFrontendTask(taskFileHints, prompt)
|
|
2051
|
+
if ((hasCapability(artifactCapabilities, 'frontend_design') || isFrontendTask(taskFileHints, prompt))
|
|
1292
2052
|
&& !projectSkills.some((s) => s && s.name === 'frontend-design')) {
|
|
1293
2053
|
projectSkills = [
|
|
1294
2054
|
...projectSkills,
|
|
@@ -1322,6 +2082,24 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1322
2082
|
}
|
|
1323
2083
|
promptCapabilities = await loadRequestedSkillInstructions(promptCapabilities, capabilities.skillRunner);
|
|
1324
2084
|
|
|
2085
|
+
if (artifactCapabilities.length && transcript?.appendPart) {
|
|
2086
|
+
transcript.appendPart({
|
|
2087
|
+
sessionId: sid,
|
|
2088
|
+
cwd: resolvedCwd,
|
|
2089
|
+
partType: 'capability_routed',
|
|
2090
|
+
data: {
|
|
2091
|
+
type: 'capability_routed',
|
|
2092
|
+
capabilities: artifactCapabilities.map((capability) => ({
|
|
2093
|
+
id: capability.id,
|
|
2094
|
+
label: capability.label,
|
|
2095
|
+
tools: capability.tools,
|
|
2096
|
+
requiredArtifacts: capability.requiredArtifacts,
|
|
2097
|
+
completionGate: capability.completionGate,
|
|
2098
|
+
})),
|
|
2099
|
+
},
|
|
2100
|
+
});
|
|
2101
|
+
}
|
|
2102
|
+
|
|
1325
2103
|
// Build system prompt with project context.
|
|
1326
2104
|
const systemPrompt = buildAgentSystemPrompt({
|
|
1327
2105
|
resolvedCwd,
|
|
@@ -1329,9 +2107,13 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1329
2107
|
projectSkills,
|
|
1330
2108
|
taskFileHints,
|
|
1331
2109
|
runtimeMode,
|
|
2110
|
+
mode: opts.mode,
|
|
2111
|
+
provider: llm.type || '',
|
|
2112
|
+
model: modelId,
|
|
1332
2113
|
runtimeContext: {
|
|
1333
2114
|
memoryToolsAvailable: Boolean(capabilities.mcpClient),
|
|
1334
2115
|
promptCapabilities,
|
|
2116
|
+
artifactCapabilities,
|
|
1335
2117
|
userTask: prompt,
|
|
1336
2118
|
},
|
|
1337
2119
|
});
|
|
@@ -1339,6 +2121,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1339
2121
|
// Resume support: use restored messages if resuming from checkpoint
|
|
1340
2122
|
const messages = opts._resumeMessages || [{ role: 'user', content: prompt }];
|
|
1341
2123
|
let finalOutput = '';
|
|
2124
|
+
let finalAnswerDelivered = false;
|
|
1342
2125
|
let totalInput = 0;
|
|
1343
2126
|
let totalOutput = 0;
|
|
1344
2127
|
let consecutiveErrors = 0;
|
|
@@ -1372,10 +2155,12 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1372
2155
|
|
|
1373
2156
|
mw.use('tool.after', screenshotTrackerHook(screenshotsTaken));
|
|
1374
2157
|
const events = opts.events || new CodingEvents();
|
|
1375
|
-
const { PermissionService } = require('./coding/permission-service');
|
|
2158
|
+
const { PermissionService, WAIT_FOR_REPLY } = require('./coding/permission-service');
|
|
1376
2159
|
const permissionService = opts.permissionService || new PermissionService({
|
|
1377
2160
|
events,
|
|
1378
|
-
|
|
2161
|
+
// Interactive runs wait for the user to approve (no auto-deny timeout); headless
|
|
2162
|
+
// runs resolve immediately via headlessPolicy, so the timeout never applies there.
|
|
2163
|
+
timeoutMs: opts.permissionTimeoutMs ?? (interactiveRun ? WAIT_FOR_REPLY : undefined),
|
|
1379
2164
|
headlessPolicy: opts.headlessPolicy || (opts.benchmark ? 'allow' : 'reject'),
|
|
1380
2165
|
});
|
|
1381
2166
|
const { AgentCatalog } = require('./coding/agent-catalog');
|
|
@@ -1401,7 +2186,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1401
2186
|
headless: opts.headless,
|
|
1402
2187
|
benchmark: opts.benchmark,
|
|
1403
2188
|
headlessPolicy: opts.headlessPolicy,
|
|
1404
|
-
|
|
2189
|
+
runSessionId: taskId,
|
|
1405
2190
|
enableTaskTool: false,
|
|
1406
2191
|
brain: opts.brain || null,
|
|
1407
2192
|
mcpClient: capabilities.mcpClient,
|
|
@@ -1437,6 +2222,102 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1437
2222
|
// Inspired by OpenCode Question service (packages/opencode/src/question/index.ts)
|
|
1438
2223
|
const questionManager = opts.questionManager || new QuestionManager(events);
|
|
1439
2224
|
const compactionService = createCodingCompactionService(llm, modelId, opts);
|
|
2225
|
+
const { RuntimeEventWriter } = require('./coding/runtime-events');
|
|
2226
|
+
const {
|
|
2227
|
+
appendPromptManifest,
|
|
2228
|
+
buildCodingPromptManifest,
|
|
2229
|
+
} = require('./coding/prompt-section-registry');
|
|
2230
|
+
const runtimeEvents = opts.runtimeEvents || new RuntimeEventWriter({
|
|
2231
|
+
transcript,
|
|
2232
|
+
events,
|
|
2233
|
+
defaults: {
|
|
2234
|
+
sessionId: sid,
|
|
2235
|
+
agentSessionId: agentRunContext.agentSessionId || sid,
|
|
2236
|
+
cwd: resolvedCwd,
|
|
2237
|
+
provider: llm.type || '',
|
|
2238
|
+
model: modelId,
|
|
2239
|
+
actor: agentRunContext.agentKind || 'walle-coding',
|
|
2240
|
+
},
|
|
2241
|
+
});
|
|
2242
|
+
const promptManifest = opts.promptManifest || buildCodingPromptManifest({
|
|
2243
|
+
systemPrompt,
|
|
2244
|
+
userTask: prompt,
|
|
2245
|
+
provider: llm.type || '',
|
|
2246
|
+
model: modelId,
|
|
2247
|
+
runtimeMode: runtimeMode.id,
|
|
2248
|
+
tools: requestedTools,
|
|
2249
|
+
promptCapabilities,
|
|
2250
|
+
metadata: {
|
|
2251
|
+
sessionId: sid,
|
|
2252
|
+
agentKind: agentRunContext.agentKind,
|
|
2253
|
+
agentMode: agentRunContext.agentMode,
|
|
2254
|
+
runtimeProfile: wallERuntimeProfile.profileId,
|
|
2255
|
+
persistenceProfile: wallERuntimeProfile.persistenceProfile,
|
|
2256
|
+
permissionProfile: wallERuntimeProfile.permissionProfile,
|
|
2257
|
+
outputContract: wallERuntimeProfile.outputContract,
|
|
2258
|
+
mode: opts.mode || '',
|
|
2259
|
+
},
|
|
2260
|
+
});
|
|
2261
|
+
appendPromptManifest(transcript, promptManifest, {
|
|
2262
|
+
sessionId: sid,
|
|
2263
|
+
cwd: resolvedCwd,
|
|
2264
|
+
chatSessionId: opts.chatSessionId || '',
|
|
2265
|
+
});
|
|
2266
|
+
runtimeEvents.emit({
|
|
2267
|
+
type: 'prompt_built',
|
|
2268
|
+
payload: {
|
|
2269
|
+
promptManifestId: promptManifest.promptManifestId,
|
|
2270
|
+
stableHash: promptManifest.stableHash,
|
|
2271
|
+
dynamicHash: promptManifest.dynamicHash,
|
|
2272
|
+
stableSectionCount: promptManifest.stableSectionCount,
|
|
2273
|
+
dynamicSectionCount: promptManifest.dynamicSectionCount,
|
|
2274
|
+
tokenEstimate: promptManifest.tokenEstimate,
|
|
2275
|
+
},
|
|
2276
|
+
});
|
|
2277
|
+
const { LifecycleHookBus } = require('./coding/lifecycle-hooks');
|
|
2278
|
+
const { ToolExecutionController } = require('./coding/tool-execution-controller');
|
|
2279
|
+
const lifecycleHooks = opts.lifecycleHooks || new LifecycleHookBus({
|
|
2280
|
+
events,
|
|
2281
|
+
middleware: mw,
|
|
2282
|
+
runtimeEvents,
|
|
2283
|
+
defaults: {
|
|
2284
|
+
sessionId: sid,
|
|
2285
|
+
agentSessionId: agentRunContext.agentSessionId || sid,
|
|
2286
|
+
cwd: resolvedCwd,
|
|
2287
|
+
provider: llm.type || '',
|
|
2288
|
+
model: modelId,
|
|
2289
|
+
actor: agentRunContext.agentKind || 'walle-coding',
|
|
2290
|
+
},
|
|
2291
|
+
});
|
|
2292
|
+
// User-defined lifecycle hooks (.walle/hooks.json). `opts.userHooks` may
|
|
2293
|
+
// inject a prebuilt instance (tests) or `null` to disable.
|
|
2294
|
+
const { createUserHooks } = require('./coding/user-hooks');
|
|
2295
|
+
const userHooks = opts.userHooks !== undefined
|
|
2296
|
+
? opts.userHooks
|
|
2297
|
+
: createUserHooks({ projectRoot: resolvedCwd, cwd: resolvedCwd });
|
|
2298
|
+
if (userHooks) {
|
|
2299
|
+
emitProgress({ phase: opts.mode || 'executing', step: -1, message: `User hooks active (${userHooks.hooks.length})` });
|
|
2300
|
+
userHooks.runObserved('session_start', { sessionId: sid, cwd: resolvedCwd, mode: opts.mode || 'build' });
|
|
2301
|
+
}
|
|
2302
|
+
|
|
2303
|
+
const toolExecutionController = opts.toolExecutionController || new ToolExecutionController({
|
|
2304
|
+
toolRegistry,
|
|
2305
|
+
middleware: mw,
|
|
2306
|
+
permissionService,
|
|
2307
|
+
questionManager,
|
|
2308
|
+
events,
|
|
2309
|
+
lifecycleHooks,
|
|
2310
|
+
cwd: resolvedCwd,
|
|
2311
|
+
projectRoot: resolvedCwd,
|
|
2312
|
+
sessionId: sid,
|
|
2313
|
+
provider: llm.type || '',
|
|
2314
|
+
model: modelId,
|
|
2315
|
+
mode: opts.mode || '',
|
|
2316
|
+
runtimeMode: runtimeMode.id,
|
|
2317
|
+
headless: Boolean(opts.headless),
|
|
2318
|
+
benchmark: Boolean(opts.benchmark),
|
|
2319
|
+
userHooks,
|
|
2320
|
+
});
|
|
1440
2321
|
|
|
1441
2322
|
// projectInfo already detected above (before system prompt)
|
|
1442
2323
|
const llmCtxRef = { current: null }; // populated each turn (see llmCtx below)
|
|
@@ -1448,47 +2329,87 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1448
2329
|
// fall back to the legacy whole-response loop.
|
|
1449
2330
|
if (shouldUseStreamProcessor(opts)) {
|
|
1450
2331
|
const { StreamProcessor } = require('./coding/stream-processor');
|
|
1451
|
-
const { SnapshotService } = require('./coding/snapshot-service');
|
|
2332
|
+
const { SnapshotService, BoundaryStore } = require('./coding/snapshot-service');
|
|
2333
|
+
const streamToolExecutionController = new ToolExecutionController({
|
|
2334
|
+
toolRegistry,
|
|
2335
|
+
middleware: mw,
|
|
2336
|
+
permissionService: null,
|
|
2337
|
+
questionManager,
|
|
2338
|
+
events,
|
|
2339
|
+
lifecycleHooks,
|
|
2340
|
+
cwd: resolvedCwd,
|
|
2341
|
+
projectRoot: resolvedCwd,
|
|
2342
|
+
sessionId: sid,
|
|
2343
|
+
provider: llm.type || '',
|
|
2344
|
+
model: modelId,
|
|
2345
|
+
mode: opts.mode || '',
|
|
2346
|
+
runtimeMode: runtimeMode.id,
|
|
2347
|
+
headless: Boolean(opts.headless),
|
|
2348
|
+
benchmark: Boolean(opts.benchmark),
|
|
2349
|
+
handlePermissions: false,
|
|
2350
|
+
userHooks,
|
|
2351
|
+
});
|
|
1452
2352
|
const processor = new StreamProcessor({
|
|
1453
2353
|
provider: llm,
|
|
1454
2354
|
model: modelId,
|
|
1455
2355
|
transcript,
|
|
1456
|
-
snapshotService: opts.snapshotService || new SnapshotService({
|
|
2356
|
+
snapshotService: opts.snapshotService || new SnapshotService({
|
|
2357
|
+
cwd: resolvedCwd,
|
|
2358
|
+
// Whole-worktree step snapshots + restart-surviving boundaries for
|
|
2359
|
+
// the rewind API. WALLE_WORKTREE_SNAPSHOTS=0 disables.
|
|
2360
|
+
worktreeSnapshots: process.env.WALLE_WORKTREE_SNAPSHOTS !== '0' && !opts.benchmark,
|
|
2361
|
+
boundaryStore: new BoundaryStore(),
|
|
2362
|
+
}),
|
|
1457
2363
|
permissionService,
|
|
1458
2364
|
headless: Boolean(opts.headless || opts.benchmark),
|
|
1459
2365
|
toolExecutor: async (call) => {
|
|
1460
|
-
const
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
}
|
|
1471
|
-
|
|
1472
|
-
input.projectRoot = resolvedCwd;
|
|
1473
|
-
const toolCtx = { sessionId: sid, cwd: resolvedCwd, model: modelId, provider: llm.type, runtimeMode: runtimeMode.id };
|
|
1474
|
-
const finalInput = await mw.run('tool.before', toolCtx, call.name, input);
|
|
1475
|
-
const result = await toolRegistry.execute(call.name, finalInput, toolCtx);
|
|
1476
|
-
return mw.run('tool.after', toolCtx, call.name, finalInput, result);
|
|
2366
|
+
const execution = await streamToolExecutionController.execute(call, {
|
|
2367
|
+
sessionId: sid,
|
|
2368
|
+
cwd: resolvedCwd,
|
|
2369
|
+
projectRoot: resolvedCwd,
|
|
2370
|
+
model: modelId,
|
|
2371
|
+
provider: llm.type,
|
|
2372
|
+
mode: opts.mode || '',
|
|
2373
|
+
runtimeMode: runtimeMode.id,
|
|
2374
|
+
interactive: opts.interactive,
|
|
2375
|
+
onTodos: (todos) => { currentTodos = todos; },
|
|
2376
|
+
});
|
|
2377
|
+
return execution.result;
|
|
1477
2378
|
},
|
|
1478
2379
|
});
|
|
1479
|
-
processor.on('event', (evt) =>
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
2380
|
+
processor.on('event', (evt) => {
|
|
2381
|
+
// Forward structured runtime events with their top-level `type` intact.
|
|
2382
|
+
// CTM (server.js onEvent: event.type === 'lane_event'/'permission_resolved')
|
|
2383
|
+
// and the browser (walle-session.js: switch(ev.type) → case 'permission_request')
|
|
2384
|
+
// both dispatch on the top-level type, so wrapping these into
|
|
2385
|
+
// {phase,step,message,detail} silently swallowed the live approval card and
|
|
2386
|
+
// the "Needs You" wait state — the request then parked until the user
|
|
2387
|
+
// reloaded (the durable restore card in walle-ctm-history.js still worked).
|
|
2388
|
+
// Keep approval + lane events un-wrapped so a watching client surfaces the
|
|
2389
|
+
// card and waiting state without a reload.
|
|
2390
|
+
if (evt && [
|
|
2391
|
+
'tool_call', 'tool_result', 'tool_done', 'skill_loaded', 'skill_load_failed',
|
|
2392
|
+
'permission_request', 'permission_resolved', 'permission_denied', 'lane_event',
|
|
2393
|
+
].includes(evt.type)) {
|
|
2394
|
+
emitProgress(evt);
|
|
2395
|
+
return;
|
|
2396
|
+
}
|
|
2397
|
+
emitProgress({
|
|
2398
|
+
phase: opts.mode || 'executing',
|
|
2399
|
+
step: 0,
|
|
2400
|
+
message: evt?.type || 'event',
|
|
2401
|
+
detail: evt,
|
|
2402
|
+
});
|
|
2403
|
+
});
|
|
1485
2404
|
|
|
1486
2405
|
let streamStatus = 'finished';
|
|
1487
2406
|
let streamStopReason = '';
|
|
1488
2407
|
let streamModel = modelId;
|
|
1489
2408
|
const streamErrors = [];
|
|
2409
|
+
let streamProviderError = null;
|
|
1490
2410
|
let streamHadEdit = false;
|
|
1491
2411
|
for (let turnIndex = opts._resumeTurn || 0; turnIndex < turns; turnIndex++) {
|
|
2412
|
+
throwIfExternalAbort();
|
|
1492
2413
|
const remaining = deadline - Date.now();
|
|
1493
2414
|
if (remaining <= 0) {
|
|
1494
2415
|
streamStatus = 'error';
|
|
@@ -1503,6 +2424,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1503
2424
|
});
|
|
1504
2425
|
const perTurnCap = opts.perTurnTimeoutMs || (/ollama|mlx/.test(llm.type || '') ? 600000 : 300000);
|
|
1505
2426
|
const ac = new AbortController();
|
|
2427
|
+
const unlinkExternalAbort = linkExternalAbort(ac);
|
|
1506
2428
|
const timer = setTimeout(() => ac.abort(), Math.min(remaining, perTurnCap));
|
|
1507
2429
|
let turn;
|
|
1508
2430
|
let toolsForTurn = [];
|
|
@@ -1518,7 +2440,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1518
2440
|
});
|
|
1519
2441
|
const llmCtx = { params: createInitialLlmParams(opts, taskFileHints.length >= 4 ? 8192 : 4096), system: systemPrompt, cwd: resolvedCwd,
|
|
1520
2442
|
provider: llm.type, model: modelId, mode: opts.mode, runtimeMode: runtimeMode.id, claudeMd: opts.claudeMd, log: {},
|
|
1521
|
-
toolsAvailable: toolsForTurn.length > 0, promptCapabilities };
|
|
2443
|
+
toolsAvailable: toolsForTurn.length > 0, promptCapabilities, promptManifest };
|
|
1522
2444
|
llmCtxRef.current = llmCtx;
|
|
1523
2445
|
await mw.run('llm.before', llmCtx);
|
|
1524
2446
|
await maybeCompactCodingContext({
|
|
@@ -1533,6 +2455,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1533
2455
|
mode: opts.mode || 'executing',
|
|
1534
2456
|
step: turnIndex,
|
|
1535
2457
|
sessionMemory: opts.sessionMemory,
|
|
2458
|
+
userHooks,
|
|
1536
2459
|
reason: 'stream_pre_turn',
|
|
1537
2460
|
opts,
|
|
1538
2461
|
});
|
|
@@ -1549,9 +2472,11 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1549
2472
|
thinking: llmCtx.params.thinking,
|
|
1550
2473
|
reasoningEffort: llmCtx.params.reasoningEffort,
|
|
1551
2474
|
options: llmCtx.params.options,
|
|
2475
|
+
promptCache: true,
|
|
1552
2476
|
});
|
|
1553
2477
|
} finally {
|
|
1554
2478
|
clearTimeout(timer);
|
|
2479
|
+
unlinkExternalAbort();
|
|
1555
2480
|
}
|
|
1556
2481
|
|
|
1557
2482
|
totalInput += turn.usage?.input || 0;
|
|
@@ -1560,17 +2485,25 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1560
2485
|
streamStopReason = turn.stopReason || streamStopReason;
|
|
1561
2486
|
streamModel = turn.model || streamModel;
|
|
1562
2487
|
if (turn.errors?.length) streamErrors.push(...turn.errors);
|
|
2488
|
+
if (turn.providerError) streamProviderError = turn.providerError;
|
|
1563
2489
|
if (turn.text) finalOutput += turn.text;
|
|
1564
2490
|
const streamToolCalls = (turn.toolCalls || []).map(tc => ({ name: tc.name, input: tc.input }));
|
|
1565
|
-
|
|
1566
|
-
|
|
1567
|
-
|
|
1568
|
-
|
|
2491
|
+
const streamToolResults = turn.toolResults || [];
|
|
2492
|
+
toolCallHistory.push(...streamToolCalls.map((tc, index) => {
|
|
2493
|
+
const resultRecord = streamToolResults[index] || {};
|
|
2494
|
+
return normalizeToolCallEvidence(tc, resultRecord.result || resultRecord);
|
|
2495
|
+
}));
|
|
1569
2496
|
log.push({
|
|
1570
2497
|
turn: turnIndex,
|
|
1571
2498
|
model: turn.model || modelId,
|
|
1572
2499
|
provider: turn.provider || llm.type,
|
|
1573
2500
|
toolCalls: streamToolCalls,
|
|
2501
|
+
toolResults: streamToolResults.map((record) => ({
|
|
2502
|
+
name: record.name,
|
|
2503
|
+
ok: normalizeToolCallEvidence(record, record.result || record).ok === true,
|
|
2504
|
+
error: record.error || record.result?.error || null,
|
|
2505
|
+
result: record.result || null,
|
|
2506
|
+
})),
|
|
1574
2507
|
content: turn.text,
|
|
1575
2508
|
stopReason: turn.stopReason,
|
|
1576
2509
|
});
|
|
@@ -1586,6 +2519,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1586
2519
|
toolsAvailable: toolsForTurn.length > 0,
|
|
1587
2520
|
nudges: log._noActionNudges || 0,
|
|
1588
2521
|
cwd: resolvedCwd,
|
|
2522
|
+
codingIntent,
|
|
1589
2523
|
});
|
|
1590
2524
|
if (continuation?.action === 'continue') {
|
|
1591
2525
|
log._noActionNudges = (log._noActionNudges || 0) + 1;
|
|
@@ -1600,6 +2534,16 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1600
2534
|
emitProgress({ phase: opts.mode || 'executing', step: turnIndex, message: 'Action guard failed incomplete no-tool response', detail: { reason: continuation.reason } });
|
|
1601
2535
|
break;
|
|
1602
2536
|
}
|
|
2537
|
+
const stopGate = await evaluateStopGate({
|
|
2538
|
+
userHooks, log, sessionId: sid, cwd: resolvedCwd, mode: opts.mode, turn: turnIndex, text: contentToText(turn.text),
|
|
2539
|
+
});
|
|
2540
|
+
if (stopGate) {
|
|
2541
|
+
if (turn.assistantMessage) messages.push(turn.assistantMessage);
|
|
2542
|
+
messages.push({ role: 'user', content: stopGate.message });
|
|
2543
|
+
emitProgress({ phase: opts.mode || 'executing', step: turnIndex, message: 'Stop hook rejected completion — continuing', detail: { reason: stopGate.reason } });
|
|
2544
|
+
continue;
|
|
2545
|
+
}
|
|
2546
|
+
if (contentToText(turn.text).trim()) finalAnswerDelivered = true;
|
|
1603
2547
|
}
|
|
1604
2548
|
if (turn.assistantMessage) messages.push(turn.assistantMessage);
|
|
1605
2549
|
if (turn.toolResultMessage) messages.push(turn.toolResultMessage);
|
|
@@ -1627,10 +2571,15 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1627
2571
|
sessionId: sid,
|
|
1628
2572
|
cwd: resolvedCwd,
|
|
1629
2573
|
partType: 'error',
|
|
1630
|
-
data:
|
|
2574
|
+
data: streamProviderError
|
|
2575
|
+
// Classified provider failure: surface the friendly, actionable message
|
|
2576
|
+
// (parity with the chat path) instead of a raw "fetch failed". Raw text
|
|
2577
|
+
// is retained in `errors` for debugging.
|
|
2578
|
+
? { message: streamProviderError.userMessage, providerError: streamProviderError, errors: streamErrors }
|
|
2579
|
+
: { errors: streamErrors },
|
|
1631
2580
|
});
|
|
1632
2581
|
}
|
|
1633
|
-
if (finalOutput && transcript?.appendAssistantMessage) {
|
|
2582
|
+
if (!externalTranscriptMessages && finalOutput && transcript?.appendAssistantMessage) {
|
|
1634
2583
|
transcript.appendAssistantMessage(finalOutput, {
|
|
1635
2584
|
sessionId: sid,
|
|
1636
2585
|
cwd: resolvedCwd,
|
|
@@ -1642,11 +2591,15 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1642
2591
|
}
|
|
1643
2592
|
|
|
1644
2593
|
await shutdownPostEditMiddleware(postEditMiddleware);
|
|
1645
|
-
const changedFiles = changedFilesSince(resolvedCwd,
|
|
2594
|
+
const changedFiles = changedFilesSince(resolvedCwd, preRunFileBaseline);
|
|
1646
2595
|
return {
|
|
1647
2596
|
success: streamStatus !== 'error',
|
|
1648
2597
|
output: finalOutput,
|
|
1649
|
-
|
|
2598
|
+
// Surface the classified, friendly provider message (e.g. "AI provider network
|
|
2599
|
+
// error: …could not reach the provider endpoint…") to the caller/chat reply
|
|
2600
|
+
// instead of a raw "fetch failed". The raw text stays in `errors` for the
|
|
2601
|
+
// CLI-recoverability pattern match.
|
|
2602
|
+
stderr: (streamProviderError && streamProviderError.userMessage) || streamErrors.join('\n'),
|
|
1650
2603
|
sessionId: sid,
|
|
1651
2604
|
exitCode: streamStatus === 'error' ? -1 : 0,
|
|
1652
2605
|
log,
|
|
@@ -1656,6 +2609,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1656
2609
|
next: 'stop',
|
|
1657
2610
|
runtimeMode: runtimeMode.id,
|
|
1658
2611
|
changedFiles,
|
|
2612
|
+
finalAnswerDelivered,
|
|
1659
2613
|
};
|
|
1660
2614
|
}
|
|
1661
2615
|
|
|
@@ -1674,6 +2628,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1674
2628
|
try {
|
|
1675
2629
|
const startTurn = opts._resumeTurn || 0;
|
|
1676
2630
|
for (let turn = startTurn; turn < turns; turn++) {
|
|
2631
|
+
throwIfExternalAbort();
|
|
1677
2632
|
const remaining = deadline - Date.now();
|
|
1678
2633
|
if (remaining <= 0) {
|
|
1679
2634
|
finalOutput += '\n[Timeout reached]';
|
|
@@ -1692,13 +2647,14 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1692
2647
|
const isLocal = /ollama|mlx/.test(llm.type || '');
|
|
1693
2648
|
const perTurnCap = opts.perTurnTimeoutMs || (isLocal ? 600000 : 300000);
|
|
1694
2649
|
const ac = new AbortController();
|
|
2650
|
+
const unlinkExternalAbort = linkExternalAbort(ac);
|
|
1695
2651
|
const timer = setTimeout(() => ac.abort(), Math.min(remaining, perTurnCap));
|
|
1696
2652
|
|
|
1697
2653
|
// Middleware: prepare LLM call
|
|
1698
2654
|
const turnsRemaining = turns - turn;
|
|
1699
2655
|
const llmCtx = { params: createInitialLlmParams(opts, taskFileHints.length >= 4 ? 8192 : 4096), system: systemPrompt, cwd: resolvedCwd,
|
|
1700
2656
|
provider: llm.type, model: modelId, mode: opts.mode, runtimeMode: runtimeMode.id, claudeMd: opts.claudeMd, log: {},
|
|
1701
|
-
toolsAvailable: turnsRemaining > 1, promptCapabilities };
|
|
2657
|
+
toolsAvailable: turnsRemaining > 1, promptCapabilities, promptManifest };
|
|
1702
2658
|
llmCtxRef.current = llmCtx; // expose to event bridge (A2)
|
|
1703
2659
|
await mw.run('llm.before', llmCtx);
|
|
1704
2660
|
let adaptedTools = await toolRegistry.getDefinitions(llmCtx);
|
|
@@ -1715,6 +2671,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1715
2671
|
mode: opts.mode || 'executing',
|
|
1716
2672
|
step: turn,
|
|
1717
2673
|
sessionMemory: opts.sessionMemory,
|
|
2674
|
+
userHooks,
|
|
1718
2675
|
reason: 'legacy_pre_turn',
|
|
1719
2676
|
opts,
|
|
1720
2677
|
});
|
|
@@ -1759,10 +2716,12 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1759
2716
|
thinking: llmCtx.params.thinking,
|
|
1760
2717
|
reasoningEffort: llmCtx.params.reasoningEffort,
|
|
1761
2718
|
options: llmCtx.params.options,
|
|
2719
|
+
promptCache: true,
|
|
1762
2720
|
signal: ac.signal,
|
|
1763
2721
|
});
|
|
1764
2722
|
} finally {
|
|
1765
2723
|
clearTimeout(timer);
|
|
2724
|
+
unlinkExternalAbort();
|
|
1766
2725
|
}
|
|
1767
2726
|
response = recoverAllowedTextToolCalls(response, adaptedTools);
|
|
1768
2727
|
if (response.textToolCallFormat) {
|
|
@@ -1794,13 +2753,18 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1794
2753
|
if (response.usage) {
|
|
1795
2754
|
const inputTokens = response.usage.input || 0;
|
|
1796
2755
|
const outputTokens = response.usage.output || 0;
|
|
2756
|
+
// Cache hits cost 0.1x input price, cache writes 1.25x (Anthropic).
|
|
2757
|
+
const cacheRead = response.usage.cacheRead || 0;
|
|
2758
|
+
const cacheWrite = response.usage.cacheWrite || 0;
|
|
2759
|
+
const effectiveInput = Math.max(0, inputTokens - cacheRead - cacheWrite)
|
|
2760
|
+
+ cacheRead * 0.1 + cacheWrite * 1.25;
|
|
1797
2761
|
// Cost estimate: rough pricing per 1M tokens
|
|
1798
2762
|
const costPer1M = {
|
|
1799
2763
|
input: modelId.includes('haiku') ? 0.25 : modelId.includes('sonnet') ? 3.0 : 15.0,
|
|
1800
2764
|
output: modelId.includes('haiku') ? 1.25 : modelId.includes('sonnet') ? 15.0 : 75.0,
|
|
1801
2765
|
};
|
|
1802
|
-
const turnCost = (
|
|
1803
|
-
turnCosts.push({ turn, inputTokens, outputTokens, cost: turnCost });
|
|
2766
|
+
const turnCost = (effectiveInput * costPer1M.input + outputTokens * costPer1M.output) / 1_000_000;
|
|
2767
|
+
turnCosts.push({ turn, inputTokens, outputTokens, cacheRead, cacheWrite, cost: turnCost });
|
|
1804
2768
|
budgetUsed += turnCost;
|
|
1805
2769
|
if (opts.budgetUsd && budgetUsed > opts.budgetUsd) {
|
|
1806
2770
|
finalOutput += '\n[Budget exceeded]';
|
|
@@ -1833,6 +2797,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1833
2797
|
toolsAvailable: adaptedTools.length > 0,
|
|
1834
2798
|
nudges: log._noActionNudges || 0,
|
|
1835
2799
|
cwd: resolvedCwd,
|
|
2800
|
+
codingIntent,
|
|
1836
2801
|
});
|
|
1837
2802
|
if (continuation?.action === 'continue') {
|
|
1838
2803
|
log._noActionNudges = (log._noActionNudges || 0) + 1;
|
|
@@ -1844,12 +2809,22 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1844
2809
|
if (continuation?.action === 'fail') {
|
|
1845
2810
|
throw new Error(continuation.reason);
|
|
1846
2811
|
}
|
|
2812
|
+
const stopGate = await evaluateStopGate({
|
|
2813
|
+
userHooks, log, sessionId: sid, cwd: resolvedCwd, mode: opts.mode, turn, text: contentToText(response.content),
|
|
2814
|
+
});
|
|
2815
|
+
if (stopGate) {
|
|
2816
|
+
messages.push({ role: 'assistant', content: assistantHistoryContent(response) });
|
|
2817
|
+
messages.push({ role: 'user', content: stopGate.message });
|
|
2818
|
+
emitProgress({ phase: opts.mode || 'executing', step: turn, message: 'Stop hook rejected completion — continuing', detail: { reason: stopGate.reason } });
|
|
2819
|
+
continue;
|
|
2820
|
+
}
|
|
1847
2821
|
emitProgress({
|
|
1848
2822
|
phase: opts.mode || 'executing',
|
|
1849
2823
|
step: turn,
|
|
1850
2824
|
message: 'Agent finished',
|
|
1851
2825
|
});
|
|
1852
2826
|
finalOutput += (typeof response.content === 'string' ? response.content : '') || '';
|
|
2827
|
+
if (contentToText(response.content).trim()) finalAnswerDelivered = true;
|
|
1853
2828
|
break;
|
|
1854
2829
|
}
|
|
1855
2830
|
|
|
@@ -1866,125 +2841,34 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1866
2841
|
detail: { tool: tc.name, input: tc.input },
|
|
1867
2842
|
});
|
|
1868
2843
|
|
|
1869
|
-
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
if (tc.name === 'list_directory' && input.directory && !path.isAbsolute(input.directory)) {
|
|
1885
|
-
input.directory = path.join(resolvedCwd, input.directory);
|
|
1886
|
-
}
|
|
1887
|
-
|
|
1888
|
-
// Path traversal guard: file tools must stay within cwd
|
|
1889
|
-
if (['read_file', 'write_file', 'edit_file'].includes(tc.name) && input.file_path) {
|
|
1890
|
-
if (!isWithinDirectory(input.file_path, resolvedCwd)) {
|
|
1891
|
-
result = { error: `Path ${input.file_path} is outside allowed directory ${resolvedCwd}` };
|
|
1892
|
-
turnHadError = true;
|
|
1893
|
-
throw new Error('path_blocked'); // skip to result push
|
|
1894
|
-
}
|
|
1895
|
-
}
|
|
1896
|
-
|
|
1897
|
-
// Override directory for search tools
|
|
1898
|
-
if (tc.name === 'glob' && !input.directory) input.directory = resolvedCwd;
|
|
1899
|
-
if (tc.name === 'grep_files' && !input.directory) input.directory = resolvedCwd;
|
|
1900
|
-
if (tc.name === 'run_shell') {
|
|
1901
|
-
input.timeout_ms = input.timeout_ms || 30000;
|
|
1902
|
-
input.cwd = input.cwd || resolvedCwd;
|
|
1903
|
-
}
|
|
1904
|
-
|
|
1905
|
-
if (toolRequiresPermission(tc.name)) {
|
|
1906
|
-
const permResult = await permissionService.authorize({
|
|
1907
|
-
sessionId: sid,
|
|
1908
|
-
tool: tc.name,
|
|
1909
|
-
input,
|
|
1910
|
-
cwd: input.cwd || resolvedCwd,
|
|
1911
|
-
projectRoot: resolvedCwd,
|
|
1912
|
-
mode: opts.mode,
|
|
1913
|
-
headless: Boolean(opts.headless || opts.benchmark),
|
|
1914
|
-
metadata: { toolCallId: tc.id || tc.toolCallId || '' },
|
|
1915
|
-
});
|
|
1916
|
-
if (permResult.decision !== 'allow') {
|
|
1917
|
-
result = { error: `Permission denied: ${permResult.reason || permResult.message || permResult.decision}` };
|
|
1918
|
-
turnHadError = true;
|
|
1919
|
-
throw new Error('path_blocked');
|
|
1920
|
-
}
|
|
1921
|
-
}
|
|
1922
|
-
|
|
1923
|
-
// Middleware: before tool
|
|
1924
|
-
const modifiedInput = await mw.run('tool.before', llmCtx, tc.name, input);
|
|
1925
|
-
const finalInput = (modifiedInput && typeof modifiedInput === 'object') ? modifiedInput : input;
|
|
1926
|
-
if (['read_file', 'write_file', 'edit_file', 'apply_patch', 'multi_edit', 'glob', 'grep_files', 'list_directory'].includes(tc.name)) {
|
|
1927
|
-
finalInput.sessionId = sid;
|
|
1928
|
-
finalInput.projectRoot = resolvedCwd;
|
|
1929
|
-
}
|
|
1930
|
-
|
|
1931
|
-
// In-flight todo tracking (6m)
|
|
1932
|
-
if (tc.name === 'update_todos') {
|
|
1933
|
-
currentTodos = finalInput.todos || [];
|
|
1934
|
-
result = { ok: true, todos: currentTodos };
|
|
1935
|
-
} else if (tc.name === 'ask_user') {
|
|
1936
|
-
// In headless/benchmark mode, auto-dismiss ask_user to avoid blocking
|
|
1937
|
-
if (opts.mode === 'build' && !opts.interactive) {
|
|
1938
|
-
result = { dismissed: true, message: 'Running in non-interactive mode. Please proceed with your best judgment based on the code you have read.' };
|
|
1939
|
-
} else {
|
|
1940
|
-
// Interactive question (B1) — ask the user and wait for answer
|
|
1941
|
-
try {
|
|
1942
|
-
const answer = await questionManager.ask(sid, {
|
|
1943
|
-
question: finalInput.question,
|
|
1944
|
-
header: finalInput.header,
|
|
1945
|
-
options: finalInput.options,
|
|
1946
|
-
multiple: finalInput.multiple,
|
|
1947
|
-
});
|
|
1948
|
-
result = answer ? { answers: answer } : { dismissed: true, message: 'Question timed out or was dismissed' };
|
|
1949
|
-
} catch (e) {
|
|
1950
|
-
result = { error: `Question failed: ${e.message}` };
|
|
1951
|
-
}
|
|
1952
|
-
}
|
|
1953
|
-
} else {
|
|
1954
|
-
result = await toolRegistry.execute(tc.name, finalInput, {
|
|
1955
|
-
sessionId: sid,
|
|
1956
|
-
cwd: resolvedCwd,
|
|
1957
|
-
model: modelId,
|
|
1958
|
-
provider: llm.type,
|
|
1959
|
-
llmCtx,
|
|
1960
|
-
});
|
|
1961
|
-
}
|
|
1962
|
-
|
|
1963
|
-
// Middleware: after tool
|
|
1964
|
-
result = await mw.run('tool.after', llmCtx, tc.name, finalInput, result) || result;
|
|
2844
|
+
const execution = await toolExecutionController.execute(tc, {
|
|
2845
|
+
sessionId: sid,
|
|
2846
|
+
cwd: resolvedCwd,
|
|
2847
|
+
projectRoot: resolvedCwd,
|
|
2848
|
+
model: modelId,
|
|
2849
|
+
provider: llm.type,
|
|
2850
|
+
mode: opts.mode || '',
|
|
2851
|
+
runtimeMode: runtimeMode.id,
|
|
2852
|
+
llmCtx,
|
|
2853
|
+
interactive: opts.interactive,
|
|
2854
|
+
onTodos: (todos) => { currentTodos = todos; },
|
|
2855
|
+
});
|
|
2856
|
+
const result = execution.result;
|
|
2857
|
+
const evidenceInput = execution.evidenceInput || tc.input || {};
|
|
2858
|
+
if (!execution.ok) turnHadError = true;
|
|
1965
2859
|
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
]
|
|
1976
|
-
|
|
1977
|
-
|
|
1978
|
-
}
|
|
1979
|
-
}
|
|
1980
|
-
if (tc.name === 'read_file' && result && !result.error) {
|
|
1981
|
-
events.emit('file.read', { filePath: finalInput.file_path, sessionId: sid });
|
|
1982
|
-
}
|
|
1983
|
-
} catch (err) {
|
|
1984
|
-
if (err.message !== 'path_blocked') {
|
|
1985
|
-
result = { error: err.message };
|
|
1986
|
-
}
|
|
1987
|
-
turnHadError = true;
|
|
2860
|
+
const typedArtifacts = storeTypedArtifactsForTranscript(result, {
|
|
2861
|
+
sessionId: sid,
|
|
2862
|
+
cwd: resolvedCwd,
|
|
2863
|
+
toolCallId: tc.id || '',
|
|
2864
|
+
toolName: tc.name,
|
|
2865
|
+
transcript,
|
|
2866
|
+
});
|
|
2867
|
+
if (typedArtifacts.length && log[log.length - 1]) {
|
|
2868
|
+
log[log.length - 1].artifacts = [
|
|
2869
|
+
...(log[log.length - 1].artifacts || []),
|
|
2870
|
+
...typedArtifacts,
|
|
2871
|
+
];
|
|
1988
2872
|
}
|
|
1989
2873
|
|
|
1990
2874
|
const resultStr = typeof result === 'string' ? result : JSON.stringify(result);
|
|
@@ -2003,11 +2887,18 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
2003
2887
|
toolResults.push({ type: 'tool_result', tool_use_id: tc.id, content: capped });
|
|
2004
2888
|
|
|
2005
2889
|
log[log.length - 1].toolResults = log[log.length - 1].toolResults || [];
|
|
2006
|
-
|
|
2890
|
+
const evidence = normalizeToolCallEvidence({ name: tc.name, input: evidenceInput }, result);
|
|
2891
|
+
log[log.length - 1].toolResults.push({
|
|
2892
|
+
name: tc.name,
|
|
2893
|
+
resultLength: resultStr.length,
|
|
2894
|
+
ok: evidence.ok === true,
|
|
2895
|
+
error: result?.error || null,
|
|
2896
|
+
exitCode: result?.exitCode,
|
|
2897
|
+
result,
|
|
2898
|
+
});
|
|
2007
2899
|
|
|
2008
2900
|
// Doom loop detection (6a) -- track tool calls for identical pattern
|
|
2009
|
-
|
|
2010
|
-
toolCallHistory.push({ name: tc.name, inputHash });
|
|
2901
|
+
toolCallHistory.push(evidence);
|
|
2011
2902
|
|
|
2012
2903
|
if (toolCallHistory.length >= DOOM_LOOP_THRESHOLD) {
|
|
2013
2904
|
const recent = toolCallHistory.slice(-DOOM_LOOP_THRESHOLD);
|
|
@@ -2066,7 +2957,20 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
2066
2957
|
if (response.stopReason === 'end_turn' || response.stopReason === 'max_tokens') break;
|
|
2067
2958
|
}
|
|
2068
2959
|
} catch (err) {
|
|
2069
|
-
|
|
2960
|
+
// Classify provider/LLM failures into a clear, human message (naming the model) so the
|
|
2961
|
+
// coding agent surfaces e.g. "AI provider network error: … could not reach the provider
|
|
2962
|
+
// endpoint…" instead of a raw "fetch failed" — mirrors the stream path
|
|
2963
|
+
// (stream-processor.js), which already decorates. Raw err.message is kept below for the
|
|
2964
|
+
// CLI-recoverability pattern match (which keys on the actual error text).
|
|
2965
|
+
let friendlyError = (err && err.message) || 'Coding session failed';
|
|
2966
|
+
try {
|
|
2967
|
+
const { decorateProviderError } = require('./llm/provider-error');
|
|
2968
|
+
const decorated = decorateProviderError(err, { provider: llm?.type || '', model: modelId || model || '' });
|
|
2969
|
+
if (decorated && decorated.providerError && decorated.providerError.userMessage) {
|
|
2970
|
+
friendlyError = decorated.providerError.userMessage;
|
|
2971
|
+
}
|
|
2972
|
+
} catch {}
|
|
2973
|
+
emitProgress({ phase: 'error', step: -1, message: friendlyError });
|
|
2070
2974
|
|
|
2071
2975
|
// Persist activity error (Phase 2: Activity History)
|
|
2072
2976
|
try { getActivityLog().log({ session_id: sid, type: 'coding_error', title: 'Coding session failed', body: err.message, detail: JSON.stringify({ turns: log.length }) }); } catch {}
|
|
@@ -2078,10 +2982,10 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
2078
2982
|
sessionId: sid,
|
|
2079
2983
|
cwd: resolvedCwd,
|
|
2080
2984
|
partType: 'error',
|
|
2081
|
-
data: { message:
|
|
2985
|
+
data: { message: friendlyError },
|
|
2082
2986
|
});
|
|
2083
2987
|
}
|
|
2084
|
-
if (finalOutput && transcript?.appendAssistantMessage) {
|
|
2988
|
+
if (!externalTranscriptMessages && finalOutput && transcript?.appendAssistantMessage) {
|
|
2085
2989
|
transcript.appendAssistantMessage(finalOutput, {
|
|
2086
2990
|
sessionId: sid,
|
|
2087
2991
|
cwd: resolvedCwd,
|
|
@@ -2093,9 +2997,10 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
2093
2997
|
}
|
|
2094
2998
|
|
|
2095
2999
|
// Graceful cleanup (6r)
|
|
3000
|
+
try { require('./tools/local-tools').cleanupBackgroundProcesses({ sessionId: sid }); } catch {}
|
|
2096
3001
|
const cleanup = {
|
|
2097
3002
|
lastCompletedTurn: log.length - 1,
|
|
2098
|
-
error:
|
|
3003
|
+
error: friendlyError,
|
|
2099
3004
|
todosAtAbort: currentTodos,
|
|
2100
3005
|
};
|
|
2101
3006
|
|
|
@@ -2141,7 +3046,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
2141
3046
|
return {
|
|
2142
3047
|
success: false,
|
|
2143
3048
|
output: finalOutput,
|
|
2144
|
-
stderr:
|
|
3049
|
+
stderr: friendlyError,
|
|
2145
3050
|
sessionId: sid,
|
|
2146
3051
|
exitCode: -1,
|
|
2147
3052
|
log,
|
|
@@ -2149,10 +3054,11 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
2149
3054
|
provider: llm?.type,
|
|
2150
3055
|
model: modelId,
|
|
2151
3056
|
runtimeMode: runtimeMode.id,
|
|
3057
|
+
finalAnswerDelivered,
|
|
2152
3058
|
turnCosts,
|
|
2153
3059
|
budgetUsed,
|
|
2154
3060
|
cleanup,
|
|
2155
|
-
changedFiles: changedFilesSince(resolvedCwd,
|
|
3061
|
+
changedFiles: changedFilesSince(resolvedCwd, preRunFileBaseline),
|
|
2156
3062
|
};
|
|
2157
3063
|
}
|
|
2158
3064
|
|
|
@@ -2168,6 +3074,20 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
2168
3074
|
if (questionManager) questionManager.clear();
|
|
2169
3075
|
try { require('./tools/file-tracker').clearSession(sid); } catch {}
|
|
2170
3076
|
|
|
3077
|
+
// Stop session-scoped background processes (dev servers, watchers).
|
|
3078
|
+
// Persistent ones are reported so the summary can mention them.
|
|
3079
|
+
try {
|
|
3080
|
+
const bg = require('./tools/local-tools').cleanupBackgroundProcesses({ sessionId: sid });
|
|
3081
|
+
if (bg.stopped.length || bg.persisted.length) {
|
|
3082
|
+
emitProgress({
|
|
3083
|
+
phase: 'done',
|
|
3084
|
+
step: -1,
|
|
3085
|
+
message: `Background processes: stopped ${bg.stopped.length}, left running ${bg.persisted.length}`,
|
|
3086
|
+
detail: bg,
|
|
3087
|
+
});
|
|
3088
|
+
}
|
|
3089
|
+
} catch {}
|
|
3090
|
+
|
|
2171
3091
|
emitProgress({ phase: 'done', step: -1, message: 'Agent loop finished' });
|
|
2172
3092
|
|
|
2173
3093
|
// Delete checkpoint on successful completion (no longer needed)
|
|
@@ -2175,7 +3095,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
2175
3095
|
|
|
2176
3096
|
// Persist activity completion (Phase 2: Activity History)
|
|
2177
3097
|
try { getActivityLog().log({ session_id: sid, type: 'coding_complete', title: 'Coding session completed', body: finalOutput.slice(0, 500), detail: JSON.stringify({ turns: log.length, tokens: totalInput + totalOutput }) }); } catch {}
|
|
2178
|
-
if (finalOutput && transcript?.appendAssistantMessage) {
|
|
3098
|
+
if (!externalTranscriptMessages && finalOutput && transcript?.appendAssistantMessage) {
|
|
2179
3099
|
transcript.appendAssistantMessage(finalOutput, {
|
|
2180
3100
|
sessionId: sid,
|
|
2181
3101
|
cwd: resolvedCwd,
|
|
@@ -2208,7 +3128,8 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
2208
3128
|
turnCosts,
|
|
2209
3129
|
budgetUsed,
|
|
2210
3130
|
screenshots: screenshotsTaken,
|
|
2211
|
-
changedFiles: changedFilesSince(resolvedCwd,
|
|
3131
|
+
changedFiles: changedFilesSince(resolvedCwd, preRunFileBaseline),
|
|
3132
|
+
finalAnswerDelivered,
|
|
2212
3133
|
};
|
|
2213
3134
|
}
|
|
2214
3135
|
|
|
@@ -2542,6 +3463,63 @@ function getGitChangedFiles(cwd) {
|
|
|
2542
3463
|
}
|
|
2543
3464
|
}
|
|
2544
3465
|
|
|
3466
|
+
// Directories we never descend into when scanning a non-git working tree for changes.
|
|
3467
|
+
// Dependency/build/VCS dirs would balloon the walk and never represent the agent's edits.
|
|
3468
|
+
const _CHANGED_SCAN_SKIP_DIRS = new Set([
|
|
3469
|
+
'.git', '.hg', '.svn', 'node_modules', 'bower_components', '.next', '.nuxt',
|
|
3470
|
+
'dist', 'build', 'out', 'target', 'vendor', '.venv', 'venv', '__pycache__',
|
|
3471
|
+
'.cache', '.turbo', '.gradle', '.idea', '.vscode', 'coverage', '.parcel-cache',
|
|
3472
|
+
]);
|
|
3473
|
+
|
|
3474
|
+
// Captures a baseline for changedFilesSince() that works in BOTH git and non-git cwds.
|
|
3475
|
+
// Git: the set of already-dirty paths, so the agent's own edits can be isolated from
|
|
3476
|
+
// pre-existing uncommitted changes. Non-git: a wall-clock marker captured before the
|
|
3477
|
+
// agent runs, so files it creates/modifies can be detected by mtime afterward.
|
|
3478
|
+
function captureChangedFilesBaseline(cwd) {
|
|
3479
|
+
if (isGitRepository(cwd)) {
|
|
3480
|
+
return { isGit: true, dirty: getGitChangedFiles(cwd) };
|
|
3481
|
+
}
|
|
3482
|
+
return { isGit: false, startedAtMs: Date.now() };
|
|
3483
|
+
}
|
|
3484
|
+
|
|
3485
|
+
// Walks a non-git working tree and returns relative paths of files created or modified
|
|
3486
|
+
// at/after `sinceMs`. Bounded (skip-dirs + entry/result caps) so a huge tree — e.g. a
|
|
3487
|
+
// folder holding a multi-hundred-MB archive — can't make change detection runaway.
|
|
3488
|
+
function collectFilesModifiedSince(cwd, sinceMs, { maxEntries = 60000, maxResults = 5000 } = {}) {
|
|
3489
|
+
const results = [];
|
|
3490
|
+
let visited = 0;
|
|
3491
|
+
const stack = [''];
|
|
3492
|
+
while (stack.length) {
|
|
3493
|
+
const relDir = stack.pop();
|
|
3494
|
+
let entries;
|
|
3495
|
+
try {
|
|
3496
|
+
entries = fs.readdirSync(path.join(cwd, relDir), { withFileTypes: true });
|
|
3497
|
+
} catch {
|
|
3498
|
+
continue;
|
|
3499
|
+
}
|
|
3500
|
+
for (const ent of entries) {
|
|
3501
|
+
if (visited++ >= maxEntries || results.length >= maxResults) return results;
|
|
3502
|
+
const rel = relDir ? `${relDir}/${ent.name}` : ent.name;
|
|
3503
|
+
if (ent.isDirectory()) {
|
|
3504
|
+
if (_CHANGED_SCAN_SKIP_DIRS.has(ent.name)) continue;
|
|
3505
|
+
stack.push(rel);
|
|
3506
|
+
} else if (ent.isFile()) {
|
|
3507
|
+
if (ent.name === '.DS_Store') continue;
|
|
3508
|
+
let st;
|
|
3509
|
+
try {
|
|
3510
|
+
st = fs.statSync(path.join(cwd, rel));
|
|
3511
|
+
} catch {
|
|
3512
|
+
continue;
|
|
3513
|
+
}
|
|
3514
|
+
if (st.mtimeMs >= sinceMs && _isPathSafeRelative(rel)) results.push(rel);
|
|
3515
|
+
}
|
|
3516
|
+
// Symlinks (and other non-file/non-dir entries) are intentionally skipped to avoid
|
|
3517
|
+
// following them out of the working tree or into cycles.
|
|
3518
|
+
}
|
|
3519
|
+
}
|
|
3520
|
+
return results;
|
|
3521
|
+
}
|
|
3522
|
+
|
|
2545
3523
|
function getGitTrackedAndUntrackedFiles(cwd) {
|
|
2546
3524
|
try {
|
|
2547
3525
|
const stdout = execFileSync('git', ['ls-files', '-z', '--cached', '--others', '--exclude-standard'], {
|
|
@@ -2747,20 +3725,34 @@ async function plan(request, cwd, options = {}) {
|
|
|
2747
3725
|
if (!result.success) {
|
|
2748
3726
|
parseErr.message = `Planning failed before producing valid JSON (${result.stderr || 'provider error'}): ${parseErr.message}`;
|
|
2749
3727
|
}
|
|
2750
|
-
if (
|
|
2751
|
-
|
|
2752
|
-
|
|
2753
|
-
|
|
2754
|
-
|
|
2755
|
-
|
|
2756
|
-
|
|
2757
|
-
|
|
2758
|
-
|
|
3728
|
+
if (shouldRecoverPlannerParseFailure({ request, output: result.output, cwd })) {
|
|
3729
|
+
if (onProgress) {
|
|
3730
|
+
onProgress({
|
|
3731
|
+
type: 'planning_recovery',
|
|
3732
|
+
phase: 'planning',
|
|
3733
|
+
step: -1,
|
|
3734
|
+
message: 'Planner returned unstructured output; recovering with a direct implementation subtask.',
|
|
3735
|
+
detail: { reason: parseErr.message },
|
|
3736
|
+
});
|
|
3737
|
+
}
|
|
3738
|
+
planObj = buildPlannerRecoveryPlan(request, context, parseErr, result.output);
|
|
3739
|
+
config._planningRecovery = planObj.planning_recovery;
|
|
3740
|
+
} else {
|
|
3741
|
+
if (process.env.WALLE_PLAN_DEBUG) {
|
|
3742
|
+
const dumpPath = path.join(
|
|
3743
|
+
process.env.WALL_E_DATA_DIR || '/tmp',
|
|
3744
|
+
`planner-debug-${Date.now()}.txt`,
|
|
2759
3745
|
);
|
|
2760
|
-
|
|
2761
|
-
|
|
3746
|
+
try {
|
|
3747
|
+
fs.writeFileSync(
|
|
3748
|
+
dumpPath,
|
|
3749
|
+
`=== prompt ===\n${prompt}\n\n=== output ===\n${result.output || ''}\n\n=== outputRaw ===\n${result.outputRaw || ''}\n`,
|
|
3750
|
+
);
|
|
3751
|
+
parseErr.message += ` (planner debug dumped to ${dumpPath})`;
|
|
3752
|
+
} catch {}
|
|
3753
|
+
}
|
|
3754
|
+
throw parseErr;
|
|
2762
3755
|
}
|
|
2763
|
-
throw parseErr;
|
|
2764
3756
|
}
|
|
2765
3757
|
|
|
2766
3758
|
// Enforce max_subtasks
|
|
@@ -2919,6 +3911,13 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
|
|
|
2919
3911
|
const attemptChangedFiles = changedFilesSince(cwd, attemptStartDirtyFiles);
|
|
2920
3912
|
const reportedChangedFiles = Array.isArray(result.changedFiles) ? result.changedFiles : [];
|
|
2921
3913
|
const changedFilesForValidation = [...new Set([...reportedChangedFiles, ...attemptChangedFiles])];
|
|
3914
|
+
const acceptanceContract = buildAcceptanceContract({
|
|
3915
|
+
request: subtask.prompt || subtask.title || '',
|
|
3916
|
+
subtask,
|
|
3917
|
+
changedFiles: changedFilesForValidation,
|
|
3918
|
+
frontend: changedFilesTouchFrontend(changedFilesForValidation),
|
|
3919
|
+
requiresFileChanges,
|
|
3920
|
+
});
|
|
2922
3921
|
|
|
2923
3922
|
if (isTimeoutOnlyOutput(result.output)) {
|
|
2924
3923
|
lastError = `Subtask timed out before producing a usable result${result.stderr ? `: ${result.stderr}` : ''}`;
|
|
@@ -2960,6 +3959,24 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
|
|
|
2960
3959
|
continue;
|
|
2961
3960
|
}
|
|
2962
3961
|
|
|
3962
|
+
const acceptanceReport = await runAcceptanceValidators({
|
|
3963
|
+
cwd,
|
|
3964
|
+
contract: acceptanceContract,
|
|
3965
|
+
changedFiles: changedFilesForValidation,
|
|
3966
|
+
screenshots: result.screenshots || [],
|
|
3967
|
+
toolCallHistory: toolCallHistoryFromLog(result.log),
|
|
3968
|
+
autoBrowser: false,
|
|
3969
|
+
requireBrowserRuntime: false,
|
|
3970
|
+
onProgress,
|
|
3971
|
+
step: i,
|
|
3972
|
+
});
|
|
3973
|
+
if (!acceptanceReport.ok) {
|
|
3974
|
+
lastError = `Acceptance validation failed: ${summarizeValidatorFailures(acceptanceReport).join('; ')}`;
|
|
3975
|
+
if (onProgress) onProgress({ type: 'retry', index: i, retry, error: lastError, acceptance: acceptanceReport });
|
|
3976
|
+
if (retry < config.max_retries) restoreSubtaskSnapshot(snapshot, cwd, baselineUntracked);
|
|
3977
|
+
continue;
|
|
3978
|
+
}
|
|
3979
|
+
|
|
2963
3980
|
// Run tests if configured
|
|
2964
3981
|
let testsOk = true;
|
|
2965
3982
|
if (config.test_command) {
|
|
@@ -3080,6 +4097,7 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
|
|
|
3080
4097
|
failed_subtask: i,
|
|
3081
4098
|
files_changed: [...agentChangedFiles],
|
|
3082
4099
|
pre_existing_dirty_files: [...preExistingDirtyFiles],
|
|
4100
|
+
screenshots: allScreenshots,
|
|
3083
4101
|
};
|
|
3084
4102
|
}
|
|
3085
4103
|
|
|
@@ -3095,6 +4113,7 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
|
|
|
3095
4113
|
state_path: statePath,
|
|
3096
4114
|
files_changed: [...agentChangedFiles],
|
|
3097
4115
|
pre_existing_dirty_files: [...preExistingDirtyFiles],
|
|
4116
|
+
screenshots: allScreenshots,
|
|
3098
4117
|
};
|
|
3099
4118
|
}
|
|
3100
4119
|
|
|
@@ -3104,6 +4123,7 @@ async function execute(planData, { cwd, onProgress, startFrom = 0 } = {}) {
|
|
|
3104
4123
|
async function complete(request, planData, executeResult, { cwd, brain, onProgress } = {}) {
|
|
3105
4124
|
const { plan: planObj, config } = planData;
|
|
3106
4125
|
const agentFiles = (executeResult.files_changed || []).filter(_isPathSafeRelative);
|
|
4126
|
+
const screenshots = Array.isArray(executeResult.screenshots) ? executeResult.screenshots : [];
|
|
3107
4127
|
const report = {
|
|
3108
4128
|
success: executeResult.success,
|
|
3109
4129
|
branch: planObj.branch_name,
|
|
@@ -3124,10 +4144,42 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
|
|
|
3124
4144
|
return report;
|
|
3125
4145
|
}
|
|
3126
4146
|
|
|
4147
|
+
let diff = '';
|
|
4148
|
+
if (agentFiles.length > 0) {
|
|
4149
|
+
diff = await getGitDiffForFiles(cwd, agentFiles);
|
|
4150
|
+
}
|
|
4151
|
+
|
|
4152
|
+
if (diff && codingReview.diffTouchesFrontend(diff)) {
|
|
4153
|
+
const acceptanceContract = buildAcceptanceContract({
|
|
4154
|
+
request,
|
|
4155
|
+
subtask: { title: 'Final frontend acceptance', prompt: request },
|
|
4156
|
+
changedFiles: agentFiles,
|
|
4157
|
+
frontend: true,
|
|
4158
|
+
requiresFileChanges: config.require_changes !== false,
|
|
4159
|
+
});
|
|
4160
|
+
const acceptanceReport = await runAcceptanceValidators({
|
|
4161
|
+
cwd,
|
|
4162
|
+
contract: acceptanceContract,
|
|
4163
|
+
changedFiles: agentFiles,
|
|
4164
|
+
screenshots,
|
|
4165
|
+
toolCallHistory: [],
|
|
4166
|
+
autoBrowser: config.browser_smoke !== false && config.browserSmoke !== false,
|
|
4167
|
+
requireBrowserRuntime: config.browser_smoke !== false && config.browserSmoke !== false,
|
|
4168
|
+
onProgress,
|
|
4169
|
+
step: -1,
|
|
4170
|
+
});
|
|
4171
|
+
report.frontendVerification = acceptanceReport;
|
|
4172
|
+
if (!acceptanceReport.ok) {
|
|
4173
|
+
report.success = false;
|
|
4174
|
+
report.error = summarizeValidatorFailures(acceptanceReport)[0] || 'Frontend acceptance validation failed';
|
|
4175
|
+
report.concerns.push(...acceptanceReport.concerns.slice(0, 10));
|
|
4176
|
+
return report;
|
|
4177
|
+
}
|
|
4178
|
+
}
|
|
4179
|
+
|
|
3127
4180
|
// Final review
|
|
3128
4181
|
if (config.review) {
|
|
3129
4182
|
if (onProgress) onProgress({ phase: 'reviewing', step: -1, message: 'Final review...' });
|
|
3130
|
-
const diff = await getGitDiffForFiles(cwd, agentFiles);
|
|
3131
4183
|
if (diff) {
|
|
3132
4184
|
const verdict = config.review_quorum
|
|
3133
4185
|
? await codingQuorum.runCodingQuorum({
|
|
@@ -3155,8 +4207,8 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
|
|
|
3155
4207
|
return report;
|
|
3156
4208
|
}
|
|
3157
4209
|
|
|
3158
|
-
if (codingReview.diffTouchesFrontend(diff) &&
|
|
3159
|
-
const visualVerdict = await codingReview.reviewVisual(request,
|
|
4210
|
+
if (codingReview.diffTouchesFrontend(diff) && screenshots.length > 0) {
|
|
4211
|
+
const visualVerdict = await codingReview.reviewVisual(request, screenshots, diff, {
|
|
3160
4212
|
cwd,
|
|
3161
4213
|
reviewer: config.reviewer,
|
|
3162
4214
|
reviewers: config.reviewers,
|
|
@@ -3208,7 +4260,7 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
|
|
|
3208
4260
|
if (diffErr.code !== 1) throw diffErr;
|
|
3209
4261
|
}
|
|
3210
4262
|
const sanitizedRequest = request.replace(/[\r\n]+/g, ' ').trim().slice(0, 72);
|
|
3211
|
-
const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by:
|
|
4263
|
+
const commitMsg = `feat: ${sanitizedRequest}\n\nOrchestrated by Wall-E coding agent.\nSubtasks: ${planObj.subtasks.length}\n\nCo-authored-by: Wall-E <noreply@example.invalid>`;
|
|
3212
4264
|
const { stdout } = await execFileAsync('git', ['commit', '-m', commitMsg], { cwd });
|
|
3213
4265
|
// Extract commit hash
|
|
3214
4266
|
const hashMatch = stdout.match(/\[[\w/.-]+ ([a-f0-9]+)\]/);
|
|
@@ -3285,6 +4337,56 @@ async function complete(request, planData, executeResult, { cwd, brain, onProgre
|
|
|
3285
4337
|
return report;
|
|
3286
4338
|
}
|
|
3287
4339
|
|
|
4340
|
+
function storeTypedArtifactsForTranscript(result, { sessionId, cwd, toolCallId, toolName, transcript } = {}) {
|
|
4341
|
+
if (!result || typeof result !== 'object' || !transcript?.appendArtifact) return [];
|
|
4342
|
+
let descriptors = [];
|
|
4343
|
+
try {
|
|
4344
|
+
const { extractTypedArtifactDescriptors } = require('./coding/stream-processor');
|
|
4345
|
+
descriptors = extractTypedArtifactDescriptors(result, { id: toolCallId, name: toolName });
|
|
4346
|
+
} catch {
|
|
4347
|
+
descriptors = [];
|
|
4348
|
+
}
|
|
4349
|
+
if (!descriptors.length) return [];
|
|
4350
|
+
let artifactStore = null;
|
|
4351
|
+
try {
|
|
4352
|
+
const { ArtifactStore } = require('./coding/artifact-store');
|
|
4353
|
+
artifactStore = new ArtifactStore();
|
|
4354
|
+
} catch {}
|
|
4355
|
+
const stored = [];
|
|
4356
|
+
const seen = new Set();
|
|
4357
|
+
for (const descriptor of descriptors) {
|
|
4358
|
+
try {
|
|
4359
|
+
const artifact = artifactStore?.storeArtifact
|
|
4360
|
+
? artifactStore.storeArtifact({
|
|
4361
|
+
sessionId,
|
|
4362
|
+
toolCallId,
|
|
4363
|
+
toolName,
|
|
4364
|
+
kind: descriptor.kind,
|
|
4365
|
+
sourcePath: descriptor.path || descriptor.sourcePath,
|
|
4366
|
+
content: descriptor.content,
|
|
4367
|
+
mimeType: descriptor.mimeType,
|
|
4368
|
+
bytes: descriptor.bytes,
|
|
4369
|
+
sha256: descriptor.sha256,
|
|
4370
|
+
metadata: descriptor.metadata || {},
|
|
4371
|
+
})
|
|
4372
|
+
: descriptor;
|
|
4373
|
+
const key = artifact.artifactId || `${artifact.kind}:${artifact.path}`;
|
|
4374
|
+
if (seen.has(key)) continue;
|
|
4375
|
+
seen.add(key);
|
|
4376
|
+
transcript.appendArtifact({
|
|
4377
|
+
sessionId,
|
|
4378
|
+
cwd,
|
|
4379
|
+
type: artifact.kind || 'artifact',
|
|
4380
|
+
toolCallId,
|
|
4381
|
+
name: toolName,
|
|
4382
|
+
artifact,
|
|
4383
|
+
});
|
|
4384
|
+
stored.push(artifact);
|
|
4385
|
+
} catch {}
|
|
4386
|
+
}
|
|
4387
|
+
return stored;
|
|
4388
|
+
}
|
|
4389
|
+
|
|
3288
4390
|
/**
|
|
3289
4391
|
* Resume a coding session from a saved checkpoint.
|
|
3290
4392
|
* Loads checkpoint data from brain DB, reconstructs messages, and re-enters runAgentLoop.
|
|
@@ -3335,13 +4437,20 @@ module.exports = {
|
|
|
3335
4437
|
readCheckpoint,
|
|
3336
4438
|
formatReport,
|
|
3337
4439
|
isTimeoutOnlyOutput,
|
|
4440
|
+
resolveRunTimeoutMs,
|
|
4441
|
+
isInteractiveRun,
|
|
3338
4442
|
isActionRequiredPrompt,
|
|
3339
4443
|
isPrematureActionResponse,
|
|
3340
4444
|
getNoActionContinuation,
|
|
4445
|
+
evaluateStopGate,
|
|
4446
|
+
MAX_STOP_HOOK_BOUNCES,
|
|
3341
4447
|
hasVerificationEvidence,
|
|
3342
4448
|
subtaskRequiresFileChanges,
|
|
3343
4449
|
screenshotTrackerHook,
|
|
3344
4450
|
collectEmptyChangedFiles,
|
|
4451
|
+
changedFilesSince,
|
|
4452
|
+
captureChangedFilesBaseline,
|
|
4453
|
+
collectFilesModifiedSince,
|
|
3345
4454
|
CODING_TOOLS,
|
|
3346
4455
|
READ_ONLY_TOOLS,
|
|
3347
4456
|
BUILD_TOOLS,
|