create-walle 0.9.21 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/package.json +2 -2
- package/template/CLAUDE.md +2 -2
- package/template/LICENSE +1 -1
- package/template/bin/ctm-dev-cleanup.js +24 -3
- package/template/bin/ctm-launch.sh +13 -0
- package/template/bin/dev.sh +156 -18
- package/template/bin/node-bin.sh +84 -0
- package/template/bin/pin-node.sh +51 -0
- package/template/claude-task-manager/api-prompts.js +1203 -182
- package/template/claude-task-manager/api-reviews.js +109 -15
- package/template/claude-task-manager/approval-agent.js +1360 -280
- package/template/claude-task-manager/bin/restart-ctm.sh +64 -23
- package/template/claude-task-manager/bin/storage-migration-supervisor.js +338 -0
- package/template/claude-task-manager/db.js +4417 -295
- package/template/claude-task-manager/docs/app-update-refresh-protocol.md +69 -0
- package/template/claude-task-manager/docs/approval-ai-refinement.md +138 -0
- package/template/claude-task-manager/docs/approval-rescue-loop.md +74 -0
- package/template/claude-task-manager/docs/codex-operational-warning-health.md +107 -0
- package/template/claude-task-manager/docs/codex-resume-state-guard-design.md +17 -12
- package/template/claude-task-manager/docs/codex-terminal-render-controller-handoff.md +311 -0
- package/template/claude-task-manager/docs/coding-agent-hooks-architecture.md +418 -0
- package/template/claude-task-manager/docs/conversation-import-freshness.md +20 -0
- package/template/claude-task-manager/docs/google-workspace-auth-health.md +77 -0
- package/template/claude-task-manager/docs/image-paste-ux.md +13 -0
- package/template/claude-task-manager/docs/ipad-web-preview.md +88 -0
- package/template/claude-task-manager/docs/main-loop-offload-architecture.md +66 -0
- package/template/claude-task-manager/docs/microsoft-dev-tunnel-phone-access-design.md +274 -519
- package/template/claude-task-manager/docs/mobile-live-streaming.md +27 -5
- package/template/claude-task-manager/docs/mobile-remote-submission-lifecycle.md +69 -0
- package/template/claude-task-manager/docs/phone-access-design.md +53 -15
- package/template/claude-task-manager/docs/phone-passkey-identity.md +122 -0
- package/template/claude-task-manager/docs/phone-setup.md +3 -0
- package/template/claude-task-manager/docs/prompt-editing-tree-design.md +25 -1
- package/template/claude-task-manager/docs/remote-desktop-access-design.md +268 -0
- package/template/claude-task-manager/docs/restart-lifecycle-architecture.md +95 -0
- package/template/claude-task-manager/docs/runtime-work-control-plane.md +53 -0
- package/template/claude-task-manager/docs/session-interactive-wait-surfaces.md +38 -0
- package/template/claude-task-manager/docs/session-needs-you-dismissal.md +84 -0
- package/template/claude-task-manager/docs/session-render-state-management-design.md +91 -3
- package/template/claude-task-manager/docs/session-standup-command-center-design.md +25 -1
- package/template/claude-task-manager/docs/session-title-authority.md +32 -0
- package/template/claude-task-manager/docs/session-workspace-binding.md +33 -0
- package/template/claude-task-manager/docs/skill-intent-resolution-design.md +72 -0
- package/template/claude-task-manager/docs/walle-mcp-supervisor-health.md +86 -0
- package/template/claude-task-manager/docs/walle-relay-phone-access-design.md +24 -15
- package/template/claude-task-manager/docs/walle-session-history-hydration.md +114 -0
- package/template/claude-task-manager/docs/walle-session-input-queue.md +104 -0
- package/template/claude-task-manager/docs/walle-session-model-catalog.md +90 -0
- package/template/claude-task-manager/docs/walle-session-model-preferences.md +15 -6
- package/template/claude-task-manager/git-utils.js +897 -27
- package/template/claude-task-manager/lib/agent-capabilities.js +33 -0
- package/template/claude-task-manager/lib/agent-cli-cache.js +37 -7
- package/template/claude-task-manager/lib/agent-hooks-installer.js +26 -2
- package/template/claude-task-manager/lib/agent-presets.js +17 -1
- package/template/claude-task-manager/lib/all-sessions-query.js +108 -0
- package/template/claude-task-manager/lib/approval-ai-refinement.js +488 -0
- package/template/claude-task-manager/lib/approval-self-adapt.js +168 -0
- package/template/claude-task-manager/lib/async-semaphore.js +44 -0
- package/template/claude-task-manager/lib/auth-context.js +5 -0
- package/template/claude-task-manager/lib/auth-rate-limit.js +47 -4
- package/template/claude-task-manager/lib/auth-rules.js +29 -2
- package/template/claude-task-manager/lib/auto-approval-verifier.js +129 -16
- package/template/claude-task-manager/lib/background-llm.js +144 -17
- package/template/claude-task-manager/lib/branch-inventory.js +212 -0
- package/template/claude-task-manager/lib/claude-desktop-sessions.js +15 -3
- package/template/claude-task-manager/lib/coalesce-sync-frames.js +151 -0
- package/template/claude-task-manager/lib/codex-launch-health.js +762 -0
- package/template/claude-task-manager/lib/codex-transcript-pager.js +51 -0
- package/template/claude-task-manager/lib/codex-zst.js +124 -0
- package/template/claude-task-manager/lib/coding-agent-models.js +233 -30
- package/template/claude-task-manager/lib/connection-health.js +232 -0
- package/template/claude-task-manager/lib/conversation-blob-parser.js +42 -0
- package/template/claude-task-manager/lib/conversation-tail-merge.js +89 -26
- package/template/claude-task-manager/lib/ctm-session-context-api.js +39 -10
- package/template/claude-task-manager/lib/cursor-conversation-store.js +354 -0
- package/template/claude-task-manager/lib/db-owner-worker-client.js +315 -0
- package/template/claude-task-manager/lib/document-review.js +141 -6
- package/template/claude-task-manager/lib/escalation-review.js +152 -0
- package/template/claude-task-manager/lib/graceful-shutdown.js +159 -0
- package/template/claude-task-manager/lib/headless-term-service.js +678 -0
- package/template/claude-task-manager/lib/heavy-worker-fallback.js +38 -0
- package/template/claude-task-manager/lib/jsonl-conversation-parser.js +542 -0
- package/template/claude-task-manager/lib/jsonl-range-reader.js +112 -0
- package/template/claude-task-manager/lib/main-db-census.js +216 -0
- package/template/claude-task-manager/lib/message-pagination.js +106 -4
- package/template/claude-task-manager/lib/microsoft-dev-tunnel-setup.js +750 -26
- package/template/claude-task-manager/lib/mobile-auth-api.js +274 -7
- package/template/claude-task-manager/lib/mobile-auth-store.js +592 -10
- package/template/claude-task-manager/lib/mobile-notification-dispatcher.js +15 -0
- package/template/claude-task-manager/lib/model-overview-brain-fallback.js +311 -0
- package/template/claude-task-manager/lib/model-overview-cache.js +141 -0
- package/template/claude-task-manager/lib/models-health-routing-notice.js +126 -0
- package/template/claude-task-manager/lib/node-pin-guard.js +93 -0
- package/template/claude-task-manager/lib/perf-tracker.js +242 -6
- package/template/claude-task-manager/lib/permission-match.js +76 -0
- package/template/claude-task-manager/lib/permission-sync.js +133 -20
- package/template/claude-task-manager/lib/process-title.js +35 -0
- package/template/claude-task-manager/lib/prompt-executions-query.js +25 -0
- package/template/claude-task-manager/lib/prompt-index-disk-cache.js +44 -0
- package/template/claude-task-manager/lib/prompt-intent.js +132 -0
- package/template/claude-task-manager/lib/provider-user-context.js +34 -0
- package/template/claude-task-manager/lib/read-pool-client.js +313 -0
- package/template/claude-task-manager/lib/readpool-breaker.js +31 -0
- package/template/claude-task-manager/lib/recent-sessions-breaker.js +12 -0
- package/template/claude-task-manager/lib/remote-feedback-client.js +72 -0
- package/template/claude-task-manager/lib/remote-relay-protocol.js +37 -4
- package/template/claude-task-manager/lib/remote-relay-store.js +159 -0
- package/template/claude-task-manager/lib/remote-submission-observer.js +278 -0
- package/template/claude-task-manager/lib/restart-guard.js +109 -0
- package/template/claude-task-manager/lib/restore-interruption-detector.js +439 -0
- package/template/claude-task-manager/lib/restore-policy.js +13 -0
- package/template/claude-task-manager/lib/restore-resume-batch.js +74 -0
- package/template/claude-task-manager/lib/restore-runtime.js +68 -0
- package/template/claude-task-manager/lib/restore-storm.js +34 -0
- package/template/claude-task-manager/lib/resume-cwd.js +36 -0
- package/template/claude-task-manager/lib/resume-preflight.js +313 -0
- package/template/claude-task-manager/lib/runtime-work-registry.js +444 -0
- package/template/claude-task-manager/lib/sanitize-openai-auth.js +31 -0
- package/template/claude-task-manager/lib/scheduler.js +21 -1
- package/template/claude-task-manager/lib/scrollback-snapshot-store.js +159 -0
- package/template/claude-task-manager/lib/serial-task-queue.js +64 -0
- package/template/claude-task-manager/lib/server-listeners.js +239 -0
- package/template/claude-task-manager/lib/session-capture.js +42 -7
- package/template/claude-task-manager/lib/session-content-backfill.js +131 -0
- package/template/claude-task-manager/lib/session-history.js +388 -43
- package/template/claude-task-manager/lib/session-host-manager.js +287 -0
- package/template/claude-task-manager/lib/session-image-refs.js +209 -0
- package/template/claude-task-manager/lib/session-jobs.js +399 -59
- package/template/claude-task-manager/lib/session-prompt-index.js +137 -0
- package/template/claude-task-manager/lib/session-restore.js +53 -0
- package/template/claude-task-manager/lib/session-standup.js +123 -23
- package/template/claude-task-manager/lib/session-state-bus.js +14 -0
- package/template/claude-task-manager/lib/session-stream.js +64 -16
- package/template/claude-task-manager/lib/session-timeline-summary.js +260 -0
- package/template/claude-task-manager/lib/session-token-usage.js +494 -0
- package/template/claude-task-manager/lib/session-workspace-binding.js +356 -0
- package/template/claude-task-manager/lib/setup-network-config.js +9 -0
- package/template/claude-task-manager/lib/size-cap.js +45 -0
- package/template/claude-task-manager/lib/size-cap.test.js +62 -0
- package/template/claude-task-manager/lib/skill-autocomplete.js +180 -1
- package/template/claude-task-manager/lib/skill-intent-resolver.js +304 -0
- package/template/claude-task-manager/lib/sqlite-driver.js +19 -3
- package/template/claude-task-manager/lib/standup-attention.js +7 -3
- package/template/claude-task-manager/lib/status-authority.js +39 -0
- package/template/claude-task-manager/lib/status-hooks.js +4 -0
- package/template/claude-task-manager/lib/storage-migration.js +235 -0
- package/template/claude-task-manager/lib/structured-capture.js +298 -0
- package/template/claude-task-manager/lib/sync-io-census.js +163 -0
- package/template/claude-task-manager/lib/tailscale-setup.js +6 -0
- package/template/claude-task-manager/lib/terminal-activity-evidence.js +33 -0
- package/template/claude-task-manager/lib/terminal-choice.js +364 -0
- package/template/claude-task-manager/lib/terminal-control-sanitize.js +17 -0
- package/template/claude-task-manager/lib/terminal-fingerprint.js +48 -0
- package/template/claude-task-manager/lib/terminal-output-flush.js +84 -0
- package/template/claude-task-manager/lib/timeline-order.js +122 -0
- package/template/claude-task-manager/lib/transcript-store.js +348 -43
- package/template/claude-task-manager/lib/transport-security.js +84 -1
- package/template/claude-task-manager/lib/wait-state.js +184 -0
- package/template/claude-task-manager/lib/walle-client.js +47 -5
- package/template/claude-task-manager/lib/walle-ctm-history.js +564 -4
- package/template/claude-task-manager/lib/walle-external-actions.js +135 -16
- package/template/claude-task-manager/lib/walle-history-hydration.js +46 -0
- package/template/claude-task-manager/lib/walle-native-health.js +403 -0
- package/template/claude-task-manager/lib/walle-repair.js +701 -0
- package/template/claude-task-manager/lib/walle-session-cache.js +109 -0
- package/template/claude-task-manager/lib/walle-session-context.js +57 -21
- package/template/claude-task-manager/lib/walle-session-model-catalog.js +34 -0
- package/template/claude-task-manager/lib/walle-supervisor.js +539 -63
- package/template/claude-task-manager/lib/walle-transcript.js +52 -0
- package/template/claude-task-manager/lib/worktree-active-sync.js +11 -7
- package/template/claude-task-manager/lib/worktree-cwd.js +32 -1
- package/template/claude-task-manager/package.json +1 -1
- package/template/claude-task-manager/prompt-harvest.js +89 -66
- package/template/claude-task-manager/providers/claude-code.js +51 -3
- package/template/claude-task-manager/providers/cursor.js +140 -45
- package/template/claude-task-manager/public/css/reviews.css +551 -61
- package/template/claude-task-manager/public/css/setup.css +191 -0
- package/template/claude-task-manager/public/css/walle-session.css +865 -10
- package/template/claude-task-manager/public/css/walle.css +154 -0
- package/template/claude-task-manager/public/designs/ai-providers-consolidation-v2.html +830 -0
- package/template/claude-task-manager/public/index.html +18516 -2058
- package/template/claude-task-manager/public/ipad.html +363 -0
- package/template/claude-task-manager/public/js/document-review-links.js +301 -0
- package/template/claude-task-manager/public/js/image-normalize.js +69 -36
- package/template/claude-task-manager/public/js/message-renderer.js +1265 -77
- package/template/claude-task-manager/public/js/prompts.js +66 -29
- package/template/claude-task-manager/public/js/reviews.js +901 -133
- package/template/claude-task-manager/public/js/session-activity-utils.js +11 -1
- package/template/claude-task-manager/public/js/session-search-utils.js +94 -10
- package/template/claude-task-manager/public/js/session-status-precedence.js +23 -5
- package/template/claude-task-manager/public/js/setup.js +1273 -176
- package/template/claude-task-manager/public/js/stream-view.js +691 -73
- package/template/claude-task-manager/public/js/terminal-reconciler.js +210 -0
- package/template/claude-task-manager/public/js/walle-session.js +2455 -158
- package/template/claude-task-manager/public/js/walle.js +455 -28
- package/template/claude-task-manager/public/m/app.css +2909 -262
- package/template/claude-task-manager/public/m/app.js +6601 -398
- package/template/claude-task-manager/public/m/claim.html +224 -17
- package/template/claude-task-manager/public/m/index.html +117 -21
- package/template/claude-task-manager/public/m/sw.js +3 -1
- package/template/claude-task-manager/public/manifest.json +2 -2
- package/template/claude-task-manager/public/prompts.html +30 -14
- package/template/claude-task-manager/queue-engine.js +507 -28
- package/template/claude-task-manager/scripts/repair-claude-session-images.js +27 -8
- package/template/claude-task-manager/server.js +14341 -2197
- package/template/claude-task-manager/session-integrity.js +160 -18
- package/template/claude-task-manager/session-search-ranking.js +1 -0
- package/template/claude-task-manager/session-utils.js +25 -5
- package/template/claude-task-manager/workers/approval-blocklist.js +96 -6
- package/template/claude-task-manager/workers/approval-widget-validator.js +14 -8
- package/template/claude-task-manager/workers/conversation-import-worker.js +11 -50
- package/template/claude-task-manager/workers/db-owner-worker.js +386 -0
- package/template/claude-task-manager/workers/harvest-worker.js +9 -55
- package/template/claude-task-manager/workers/headless-term-worker.js +9 -530
- package/template/claude-task-manager/workers/read-pool-worker.js +387 -0
- package/template/claude-task-manager/workers/scrollback-worker.js +11 -72
- package/template/claude-task-manager/workers/session-host-process.js +146 -0
- package/template/claude-task-manager/workers/session-integrity-worker.js +10 -54
- package/template/claude-task-manager/workers/state-detectors/base.js +18 -1
- package/template/claude-task-manager/workers/state-detectors/claude-code.js +182 -9
- package/template/claude-task-manager/workers/state-detectors/codex.js +150 -2
- package/template/claude-task-manager/workers/state-detectors/cursor.js +127 -0
- package/template/claude-task-manager/workers/state-detectors/gemini.js +21 -0
- package/template/claude-task-manager/workers/state-detectors/index.js +29 -0
- package/template/claude-task-manager/workers/state-detectors/opencode.js +103 -0
- package/template/docs/design/markdown-review-pane.md +206 -0
- package/template/docs/designs/2026-05-17-portkey-gateway-provider-ux.md +129 -38
- package/template/docs/designs/2026-05-20-mobile-worktree-finish-command.md +27 -0
- package/template/docs/designs/2026-05-22-ai-configuration-consolidation.md +248 -0
- package/template/docs/designs/ai-configuration-consolidation-mock.html +812 -0
- package/template/docs/private-memory-and-pii-policy.md +69 -0
- package/template/package.json +2 -1
- package/template/scripts/check-private-data.js +201 -0
- package/template/shared/sqlite-owner-guard.js +30 -0
- package/template/shared/sqlite-owner-write-queue.js +225 -0
- package/template/shared/sqlite-storage-policy.js +111 -0
- package/template/shared/sqlite-write-lock.js +428 -0
- package/template/wall-e/agent-runners/claude-code.js +5 -0
- package/template/wall-e/agent.js +166 -22
- package/template/wall-e/api-walle.js +524 -70
- package/template/wall-e/auth/provider-flows.js +11 -1
- package/template/wall-e/bin/walle-mcp-stdio.js +341 -17
- package/template/wall-e/brain.js +1614 -141
- package/template/wall-e/chat/attachment-blocks.js +96 -0
- package/template/wall-e/chat/attachments.js +2 -1
- package/template/wall-e/chat/capability-resolver.js +7 -7
- package/template/wall-e/chat/context-messages.js +28 -0
- package/template/wall-e/chat/conversation-frame.js +630 -0
- package/template/wall-e/chat/provider-messages.js +125 -0
- package/template/wall-e/chat.js +1002 -233
- package/template/wall-e/coding/acceptance-contract.js +170 -0
- package/template/wall-e/coding/acp-adapter.js +1 -1
- package/template/wall-e/coding/agent-catalog.js +3 -0
- package/template/wall-e/coding/artifact-store.js +93 -0
- package/template/wall-e/coding/capability-router.js +120 -0
- package/template/wall-e/coding/coding-run-controller.js +423 -0
- package/template/wall-e/coding/compaction-service.js +157 -12
- package/template/wall-e/coding/frontend-verification.js +258 -0
- package/template/wall-e/coding/lifecycle-hooks.js +75 -0
- package/template/wall-e/coding/local-preview-contract.js +157 -0
- package/template/wall-e/coding/permission-service.js +57 -13
- package/template/wall-e/coding/prompt-bundle.js +19 -1
- package/template/wall-e/coding/prompt-section-registry.js +227 -0
- package/template/wall-e/coding/provider-compat.js +15 -0
- package/template/wall-e/coding/runtime-events.js +224 -0
- package/template/wall-e/coding/runtime-mode.js +3 -0
- package/template/wall-e/coding/side-git-snapshot.js +160 -4
- package/template/wall-e/coding/snapshot-service.js +143 -1
- package/template/wall-e/coding/stream-processor.js +388 -34
- package/template/wall-e/coding/task-tool.js +141 -4
- package/template/wall-e/coding/tool-execution-controller.js +365 -0
- package/template/wall-e/coding/tool-registry.js +43 -5
- package/template/wall-e/coding/user-hooks.js +217 -0
- package/template/wall-e/coding-orchestrator.js +1330 -221
- package/template/wall-e/coding-prompts.js +20 -4
- package/template/wall-e/context/context-builder.js +15 -2
- package/template/wall-e/decision/confidence.js +1 -1
- package/template/wall-e/docs/coding-acceptance-contract.md +41 -0
- package/template/wall-e/docs/external-action-controller.md +26 -6
- package/template/wall-e/docs/telemetry-lifecycle.md +8 -2
- package/template/wall-e/embeddings.js +591 -53
- package/template/wall-e/external-action-controller.js +12 -0
- package/template/wall-e/http/auth.js +1 -0
- package/template/wall-e/http/chat-api.js +46 -11
- package/template/wall-e/http/model-admin.js +836 -34
- package/template/wall-e/lib/boot-profile.js +88 -0
- package/template/wall-e/lib/event-loop-monitor.js +93 -0
- package/template/wall-e/lib/service-health.js +194 -0
- package/template/wall-e/llm/anthropic.js +130 -5
- package/template/wall-e/llm/client.js +266 -63
- package/template/wall-e/llm/default-fallback.js +382 -0
- package/template/wall-e/llm/health.js +19 -0
- package/template/wall-e/llm/message-guard.js +78 -0
- package/template/wall-e/llm/model-catalog.js +252 -1
- package/template/wall-e/llm/openai.js +26 -4
- package/template/wall-e/llm/portkey-sync.js +654 -0
- package/template/wall-e/llm/provider-error.js +30 -2
- package/template/wall-e/llm/registry.js +5 -1
- package/template/wall-e/llm/request-compat.js +67 -0
- package/template/wall-e/loops/backfill.js +79 -23
- package/template/wall-e/loops/brain-optimize.js +67 -0
- package/template/wall-e/loops/ingest.js +25 -10
- package/template/wall-e/loops/question-digest.js +160 -0
- package/template/wall-e/loops/reflect.js +6 -4
- package/template/wall-e/loops/think.js +39 -12
- package/template/wall-e/mcp-server.js +318 -36
- package/template/wall-e/memory/ctm-context-client.js +52 -14
- package/template/wall-e/memory/ctm-operational-context.js +237 -0
- package/template/wall-e/memory/ctm-prompt-executions-client.js +128 -0
- package/template/wall-e/memory/ctm-session-context.js +111 -63
- package/template/wall-e/prompts/coding/deepseek.txt +3 -0
- package/template/wall-e/prompts/coding/gemini.txt +6 -0
- package/template/wall-e/prompts/coding/gpt.txt +6 -0
- package/template/wall-e/prompts/coding/local.txt +7 -0
- package/template/wall-e/runtime/decision-hooks.js +115 -0
- package/template/wall-e/runtime/devbox-gateway.js +82 -8
- package/template/wall-e/runtime/prompt-manifest.js +86 -0
- package/template/wall-e/runtime/tool-executor.js +269 -0
- package/template/wall-e/runtime/tool-result-envelope.js +138 -0
- package/template/wall-e/runtime/transcript-projection.js +60 -0
- package/template/wall-e/runtime/walle-runtime.js +224 -0
- package/template/wall-e/scripts/db-optimize/migrate.js +162 -0
- package/template/wall-e/scripts/db-optimize/recall-eval.js +117 -0
- package/template/wall-e/server.js +15 -0
- package/template/wall-e/session-files.js +9 -0
- package/template/wall-e/skills/_bundled/google-calendar/run.js +1 -1
- package/template/wall-e/skills/_bundled/gws-workspace/run.js +1 -1
- package/template/wall-e/skills/_bundled/slack-mentions/run.js +76 -6
- package/template/wall-e/skills/claude-code-reader.js +7 -3
- package/template/wall-e/skills/script-skill-runner.js +10 -0
- package/template/wall-e/skills/skill-planner.js +38 -0
- package/template/wall-e/tools/builtin-middleware.js +19 -9
- package/template/wall-e/tools/local-tools.js +1428 -16
- package/template/wall-e/tools/permission-checker.js +73 -5
- package/template/wall-e/tools/question-manager.js +117 -7
- package/template/wall-e/training/harvester.js +12 -28
- package/template/wall-e/training/replay.js +25 -80
- package/template/website/index.html +10 -10
- package/template/wall-e/eval/ab-test.js +0 -203
- package/template/wall-e/eval/agent-runner.js +0 -772
- package/template/wall-e/eval/agent-scorer.js +0 -461
- package/template/wall-e/eval/aggregator.js +0 -414
- package/template/wall-e/eval/allowed-test-commands.js +0 -34
- package/template/wall-e/eval/benchmark-generator.js +0 -113
- package/template/wall-e/eval/benchmarks/chat-eval.json +0 -1662
- package/template/wall-e/eval/benchmarks/chat.json +0 -82
- package/template/wall-e/eval/benchmarks/coding-agent-real.json +0 -1
- package/template/wall-e/eval/benchmarks/coding-agent.json +0 -1581
- package/template/wall-e/eval/benchmarks/coding.json +0 -122
- package/template/wall-e/eval/benchmarks/memory-retrieval.json +0 -234
- package/template/wall-e/eval/benchmarks/reasoning.json +0 -82
- package/template/wall-e/eval/benchmarks/swebench-lite-30.json +0 -212
- package/template/wall-e/eval/benchmarks.js +0 -669
- package/template/wall-e/eval/cc-replay.js +0 -719
- package/template/wall-e/eval/chat-eval.js +0 -525
- package/template/wall-e/eval/check-keys.js +0 -15
- package/template/wall-e/eval/check-providers.js +0 -42
- package/template/wall-e/eval/codex-cli-baseline.js +0 -669
- package/template/wall-e/eval/coding-agent-real.js +0 -570
- package/template/wall-e/eval/context-compactor.js +0 -251
- package/template/wall-e/eval/debug-agent003.js +0 -68
- package/template/wall-e/eval/diagnostics.js +0 -216
- package/template/wall-e/eval/eval-orchestrator.js +0 -642
- package/template/wall-e/eval/evaluate.js +0 -202
- package/template/wall-e/eval/evaluator.js +0 -373
- package/template/wall-e/eval/exporter.js +0 -212
- package/template/wall-e/eval/fixtures/express-basic/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-basic/server.js +0 -115
- package/template/wall-e/eval/fixtures/express-basic/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy/server.js +0 -113
- package/template/wall-e/eval/fixtures/express-buggy/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy-items/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy-items/server.js +0 -112
- package/template/wall-e/eval/fixtures/express-buggy-items/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy-search/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy-search/server.js +0 -121
- package/template/wall-e/eval/fixtures/express-buggy-search/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-rename-data/data.js +0 -34
- package/template/wall-e/eval/fixtures/express-rename-data/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-rename-data/server.js +0 -97
- package/template/wall-e/eval/fixtures/express-rename-data/test.js +0 -88
- package/template/wall-e/eval/fixtures/express-xss/package.json +0 -12
- package/template/wall-e/eval/fixtures/express-xss/server.js +0 -90
- package/template/wall-e/eval/fixtures/express-xss/test.js +0 -67
- package/template/wall-e/eval/fixtures/express-xss/views/profile.ejs +0 -9
- package/template/wall-e/eval/fixtures/fullstack-app/config/default.js +0 -9
- package/template/wall-e/eval/fixtures/fullstack-app/config/test.js +0 -13
- package/template/wall-e/eval/fixtures/fullstack-app/package.json +0 -11
- package/template/wall-e/eval/fixtures/fullstack-app/public/css/style.css +0 -137
- package/template/wall-e/eval/fixtures/fullstack-app/public/index.html +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/app.js +0 -121
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/auth.js +0 -71
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/items.js +0 -80
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/users.js +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/public/login.html +0 -45
- package/template/wall-e/eval/fixtures/fullstack-app/public/register.html +0 -38
- package/template/wall-e/eval/fixtures/fullstack-app/scripts/migrate.js +0 -23
- package/template/wall-e/eval/fixtures/fullstack-app/scripts/seed.js +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/server/db.js +0 -99
- package/template/wall-e/eval/fixtures/fullstack-app/server/index.js +0 -94
- package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/auth.js +0 -19
- package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/logger.js +0 -19
- package/template/wall-e/eval/fixtures/fullstack-app/server/router.js +0 -50
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/auth.js +0 -69
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/health.js +0 -23
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/items.js +0 -88
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/users.js +0 -75
- package/template/wall-e/eval/fixtures/fullstack-app/server/test.js +0 -198
- package/template/wall-e/eval/fixtures/fullstack-app/server/utils/response.js +0 -34
- package/template/wall-e/eval/fixtures/fullstack-app/server/utils/validate.js +0 -26
- package/template/wall-e/eval/fixtures/fullstack-app/server.js +0 -8
- package/template/wall-e/eval/fixtures/fullstack-app/test.js +0 -12
- package/template/wall-e/eval/fixtures/monorepo-basic/package.json +0 -8
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/data.js +0 -58
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/middleware.js +0 -46
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/package.json +0 -8
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/routes.js +0 -64
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/server.js +0 -56
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/test.js +0 -116
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/commands.js +0 -61
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/index.js +0 -62
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/output.js +0 -43
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/package.json +0 -11
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/test.js +0 -44
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/formatters.js +0 -43
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/index.js +0 -12
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/package.json +0 -5
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/test.js +0 -55
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/validators.js +0 -29
- package/template/wall-e/eval/fixtures/monorepo-basic/test.js +0 -46
- package/template/wall-e/eval/fixtures/node-cli/index.js +0 -78
- package/template/wall-e/eval/fixtures/node-cli/package.json +0 -10
- package/template/wall-e/eval/fixtures/node-cli/test.js +0 -57
- package/template/wall-e/eval/fixtures/node-typed/package.json +0 -8
- package/template/wall-e/eval/fixtures/node-typed/src/handlers.js +0 -31
- package/template/wall-e/eval/fixtures/node-typed/src/utils.js +0 -33
- package/template/wall-e/eval/fixtures/node-typed/test.js +0 -36
- package/template/wall-e/eval/fixtures/python-flask/app.py +0 -14
- package/template/wall-e/eval/fixtures/python-flask/requirements.txt +0 -2
- package/template/wall-e/eval/fixtures/python-flask/test_app.py +0 -25
- package/template/wall-e/eval/fixtures/wall-e-subset/brain.js +0 -105
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/aggregator.js +0 -101
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/chat.json +0 -20
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/coding.json +0 -32
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks.js +0 -64
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/package.json +0 -6
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/server.js +0 -31
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/test.js +0 -18
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/utils.js +0 -34
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/runner.js +0 -104
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/scorer.js +0 -73
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/test.js +0 -134
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/client.js +0 -99
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/providers.js +0 -63
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/test.js +0 -70
- package/template/wall-e/eval/fixtures/wall-e-subset/package.json +0 -10
- package/template/wall-e/eval/fixtures/wall-e-subset/test.js +0 -86
- package/template/wall-e/eval/harvester.js +0 -685
- package/template/wall-e/eval/head-to-head.js +0 -388
- package/template/wall-e/eval/humaneval-adapter.js +0 -321
- package/template/wall-e/eval/list-models.js +0 -31
- package/template/wall-e/eval/livecodebench-adapter.js +0 -291
- package/template/wall-e/eval/mail-integration.js +0 -443
- package/template/wall-e/eval/manifest.js +0 -186
- package/template/wall-e/eval/meta-harness/adapters/coding-agent.js +0 -57
- package/template/wall-e/eval/meta-harness/bootstrap-snapshot.js +0 -149
- package/template/wall-e/eval/meta-harness/candidate-store.js +0 -117
- package/template/wall-e/eval/meta-harness/cli.js +0 -86
- package/template/wall-e/eval/meta-harness/domain-spec.js +0 -154
- package/template/wall-e/eval/meta-harness/domains/coding-agent.domain.json +0 -84
- package/template/wall-e/eval/meta-harness/examples/env-bootstrap-candidate.js +0 -29
- package/template/wall-e/eval/meta-harness/experience-store.js +0 -174
- package/template/wall-e/eval/meta-harness/frontier.js +0 -96
- package/template/wall-e/eval/meta-harness/harness-interface.js +0 -90
- package/template/wall-e/eval/meta-harness/leakage-guard.js +0 -80
- package/template/wall-e/eval/meta-harness/optimizer.js +0 -207
- package/template/wall-e/eval/meta-harness/proposer-runner.js +0 -110
- package/template/wall-e/eval/meta-harness/reporting.js +0 -58
- package/template/wall-e/eval/meta-harness/telemetry.js +0 -27
- package/template/wall-e/eval/meta-harness/validation.js +0 -81
- package/template/wall-e/eval/promoter.js +0 -228
- package/template/wall-e/eval/provider-normalizer.js +0 -33
- package/template/wall-e/eval/replay.js +0 -395
- package/template/wall-e/eval/run-agent-benchmarks.js +0 -386
- package/template/wall-e/eval/run-codex-cli-baseline.js +0 -177
- package/template/wall-e/eval/run-coding-agent-real.js +0 -187
- package/template/wall-e/eval/run-eval.js +0 -435
- package/template/wall-e/eval/run-model-comparison.js +0 -142
- package/template/wall-e/eval/session-evaluator.js +0 -187
- package/template/wall-e/eval/session-miner.js +0 -207
- package/template/wall-e/eval/session-retrieval-benchmark.js +0 -150
- package/template/wall-e/eval/session-transcripts.js +0 -509
- package/template/wall-e/eval/shadow.js +0 -161
- package/template/wall-e/eval/swebench-adapter.js +0 -345
- package/template/wall-e/eval/swebench-docker.js +0 -192
- package/template/wall-e/eval/train.py +0 -320
- package/template/wall-e/eval/trainer.js +0 -232
- package/template/wall-e/eval/weekly-eval-loop.js +0 -241
|
@@ -15,6 +15,56 @@ const ALWAYS_ASK_TOOLS = new Set([
|
|
|
15
15
|
'applescript', 'claude_code', 'mail_send', 'mail_reply', 'slack_send_message',
|
|
16
16
|
]);
|
|
17
17
|
|
|
18
|
+
// Shell builtins that only navigate / change shell state — harmless on their
|
|
19
|
+
// own. Treated as allow ONLY inside per-clause evaluation, where every sibling
|
|
20
|
+
// clause of a compound command is independently checked, so a safe `cd` can
|
|
21
|
+
// never whitewash a dangerous neighbour (e.g. `cd /x && rm -rf /`).
|
|
22
|
+
const NAVIGATION_BUILTINS = new Set(['cd', 'pushd', 'popd', 'dirs']);
|
|
23
|
+
|
|
24
|
+
const _ACTION_RANK = { allow: 0, ask: 1, deny: 2 };
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Evaluate every clause of a (possibly compound) shell command and return the
|
|
28
|
+
* MOST-RESTRICTIVE decision (deny > ask > allow).
|
|
29
|
+
*
|
|
30
|
+
* `clauses` is an array of raw token arrays — one per clause — exactly as
|
|
31
|
+
* produced by analyzeShellCommand().commandTokens (split on &&, ||, ;, |, &).
|
|
32
|
+
* Judging the whole command by its first clause alone is both a usability bug
|
|
33
|
+
* (a leading `cd` forces an ask and parks the agent) and a security hole (an
|
|
34
|
+
* allow-listed first clause masks a destructive trailing clause).
|
|
35
|
+
*
|
|
36
|
+
* @param {string[][]} clauses
|
|
37
|
+
* @param {Array} allRules - merged ruleset (DEFAULT_RULES + agent + user)
|
|
38
|
+
* @returns {{ action: 'allow'|'ask'|'deny', pattern: string, reason: string }}
|
|
39
|
+
*/
|
|
40
|
+
function evaluateBashClauses(clauses, allRules) {
|
|
41
|
+
let worst = null;
|
|
42
|
+
for (const tokens of clauses) {
|
|
43
|
+
if (!Array.isArray(tokens) || tokens.length === 0) continue;
|
|
44
|
+
const cmd = tokens[0];
|
|
45
|
+
let action, pattern, reason;
|
|
46
|
+
if (DENYLIST.has(cmd)) {
|
|
47
|
+
action = 'deny';
|
|
48
|
+
pattern = `${cmd} *`;
|
|
49
|
+
reason = `${cmd} is a destructive command`;
|
|
50
|
+
} else if (NAVIGATION_BUILTINS.has(cmd)) {
|
|
51
|
+
action = 'allow';
|
|
52
|
+
pattern = `${cmd} *`;
|
|
53
|
+
reason = `Rule: bash/${cmd} * -> allow (navigation)`;
|
|
54
|
+
} else {
|
|
55
|
+
const prefix = bashArityPrefix(tokens);
|
|
56
|
+
pattern = prefix.join(' ') + ' *';
|
|
57
|
+
action = evaluate('bash', pattern, allRules).action;
|
|
58
|
+
reason = `Rule: bash/${pattern} -> ${action}`;
|
|
59
|
+
}
|
|
60
|
+
if (!worst || _ACTION_RANK[action] > _ACTION_RANK[worst.action]) {
|
|
61
|
+
worst = { action, pattern, reason };
|
|
62
|
+
if (action === 'deny') break; // deny is the floor — nothing is more restrictive
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return worst || { action: 'ask', pattern: '*', reason: 'Rule: bash/* -> ask' };
|
|
66
|
+
}
|
|
67
|
+
|
|
18
68
|
let _ctmBaseUrl = null;
|
|
19
69
|
let _ctmToken = null;
|
|
20
70
|
let _userRulesCache = null;
|
|
@@ -104,7 +154,7 @@ function listTrustedDirectories() {
|
|
|
104
154
|
*
|
|
105
155
|
* Returns: { decision: 'allow'|'deny'|'ask', source, reason, ruleId? }
|
|
106
156
|
*/
|
|
107
|
-
async function checkPermission({ tool, command, commandTokens, args, projectPath, sessionId, mode }) {
|
|
157
|
+
async function checkPermission({ tool, command, commandTokens, commandClauses, args, projectPath, sessionId, mode }) {
|
|
108
158
|
// Layer 1: Hardcoded denylist (safety floor — never overridable)
|
|
109
159
|
if (tool === 'run_shell') {
|
|
110
160
|
// Extract first command token for denylist check
|
|
@@ -120,8 +170,15 @@ async function checkPermission({ tool, command, commandTokens, args, projectPath
|
|
|
120
170
|
return { decision: 'allow', source: 'trust', reason: `Directory ${projectPath} is trusted` };
|
|
121
171
|
}
|
|
122
172
|
|
|
123
|
-
// Layer 2: CTM
|
|
124
|
-
|
|
173
|
+
// Layer 2: CTM unified auto-approver (when connected).
|
|
174
|
+
// Shell commands are decided by the SAME auto-approver Claude/Codex use —
|
|
175
|
+
// editable blocklist -> per-clause heuristic risk -> goal-aligned verifier —
|
|
176
|
+
// so policy is a single source of truth. Only `run_shell` is routed: file
|
|
177
|
+
// edits keep the local path-aware rules below (which protect .env/.ssh/.aws —
|
|
178
|
+
// protections the shell approver does not have), and read/mcp/skill keep their
|
|
179
|
+
// local defaults. If CTM is unreachable we fall through to the local rules,
|
|
180
|
+
// so coding never hard-breaks offline.
|
|
181
|
+
if (_ctmBaseUrl && tool === 'run_shell') {
|
|
125
182
|
try {
|
|
126
183
|
const body = { tool, command: command || '', args: args || [], project_path: projectPath || '', session_id: sessionId || '', agent: 'walle' };
|
|
127
184
|
const url = `${_ctmBaseUrl}/api/permissions/walle-check${_ctmToken ? '?token=' + encodeURIComponent(_ctmToken) : ''}`;
|
|
@@ -149,7 +206,17 @@ async function checkPermission({ tool, command, commandTokens, args, projectPath
|
|
|
149
206
|
const allRules = merge(DEFAULT_RULES, agentRules, _userRulesCache);
|
|
150
207
|
|
|
151
208
|
if (tool === 'run_shell') {
|
|
152
|
-
//
|
|
209
|
+
// Per-clause evaluation when the full clause list is available. A compound
|
|
210
|
+
// command like `cd ~/x && wc -l f && cat f` must be judged by ALL clauses
|
|
211
|
+
// (most-restrictive wins), not just the first — otherwise a leading `cd`
|
|
212
|
+
// forces an ask (parking the agent) and an allow-listed first clause masks
|
|
213
|
+
// a destructive trailing clause (the `&&`-bypass class).
|
|
214
|
+
if (Array.isArray(commandClauses) && commandClauses.length > 0) {
|
|
215
|
+
const r = evaluateBashClauses(commandClauses, allRules);
|
|
216
|
+
return { decision: r.action, source: 'rules', reason: r.reason };
|
|
217
|
+
}
|
|
218
|
+
// Legacy fallback: only the first clause's tokens (or the raw string) are
|
|
219
|
+
// available. Stay conservative — derive the pattern from BashArity.
|
|
153
220
|
let pattern;
|
|
154
221
|
if (commandTokens && commandTokens.length > 0) {
|
|
155
222
|
const prefix = bashArityPrefix(commandTokens);
|
|
@@ -220,6 +287,7 @@ async function recordDecision({ tool, command, args, decision, sessionId, projec
|
|
|
220
287
|
|
|
221
288
|
module.exports = {
|
|
222
289
|
checkPermission, recordDecision, configure,
|
|
223
|
-
|
|
290
|
+
evaluateBashClauses,
|
|
291
|
+
DENYLIST, ALWAYS_ASK_TOOLS, NAVIGATION_BUILTINS,
|
|
224
292
|
isDirectoryTrusted, trustDirectory, untrustDirectory, listTrustedDirectories,
|
|
225
293
|
};
|
|
@@ -11,6 +11,16 @@
|
|
|
11
11
|
const crypto = require('node:crypto');
|
|
12
12
|
|
|
13
13
|
const DEFAULT_TIMEOUT_MS = 5 * 60 * 1000; // 5 minutes
|
|
14
|
+
const SAFE_HTML_PREVIEW_TAGS = new Set([
|
|
15
|
+
'a', 'article', 'b', 'blockquote', 'br', 'code', 'div', 'em', 'footer',
|
|
16
|
+
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr', 'i', 'img', 'li',
|
|
17
|
+
'ol', 'p', 'pre', 'section', 'span', 'strong', 'table', 'tbody', 'td',
|
|
18
|
+
'th', 'thead', 'tr', 'ul',
|
|
19
|
+
]);
|
|
20
|
+
const SAFE_HTML_PREVIEW_ATTRS = new Set([
|
|
21
|
+
'alt', 'aria-label', 'aria-describedby', 'aria-labelledby', 'class', 'colspan',
|
|
22
|
+
'height', 'href', 'role', 'rowspan', 'src', 'title', 'width',
|
|
23
|
+
]);
|
|
14
24
|
|
|
15
25
|
// Shared pending map — all QuestionManager instances share the same questions
|
|
16
26
|
// so the API registry and session instances can find the same pending questions.
|
|
@@ -51,13 +61,14 @@ class QuestionManager {
|
|
|
51
61
|
ask(sessionId, question, askOpts = {}) {
|
|
52
62
|
const id = `q_${crypto.randomUUID().slice(0, 8)}`;
|
|
53
63
|
const timeoutMs = askOpts.timeoutMs ?? DEFAULT_TIMEOUT_MS;
|
|
64
|
+
const normalizedQuestion = normalizeQuestion(question || {});
|
|
54
65
|
|
|
55
66
|
return new Promise((resolve) => {
|
|
56
67
|
const timer = timeoutMs > 0
|
|
57
68
|
? setTimeout(() => this._expire(id), timeoutMs)
|
|
58
69
|
: null;
|
|
59
70
|
|
|
60
|
-
const entry = { sessionId, question, resolve, timer };
|
|
71
|
+
const entry = { sessionId, question: normalizedQuestion, resolve, timer };
|
|
61
72
|
this._pending.set(id, entry);
|
|
62
73
|
|
|
63
74
|
// Publish on bus so UI / SSE subscribers can show the question
|
|
@@ -65,10 +76,11 @@ class QuestionManager {
|
|
|
65
76
|
this._events.emit('question.asked', {
|
|
66
77
|
id,
|
|
67
78
|
sessionId,
|
|
68
|
-
question:
|
|
69
|
-
header:
|
|
70
|
-
options:
|
|
71
|
-
multiple: !!
|
|
79
|
+
question: normalizedQuestion.question,
|
|
80
|
+
header: normalizedQuestion.header || '',
|
|
81
|
+
options: normalizedQuestion.options || [],
|
|
82
|
+
multiple: !!normalizedQuestion.multiple,
|
|
83
|
+
preview: normalizedQuestion.preview || null,
|
|
72
84
|
timestamp: Date.now(),
|
|
73
85
|
});
|
|
74
86
|
}
|
|
@@ -186,12 +198,22 @@ class QuestionManager {
|
|
|
186
198
|
*/
|
|
187
199
|
const ASK_USER_TOOL = {
|
|
188
200
|
name: 'ask_user',
|
|
189
|
-
description: 'Ask the user a clarifying question and wait for their answer.
|
|
201
|
+
description: 'Ask the user a clarifying question and wait for their answer. Supports safe markdown previews and sanitized HTML-fragment previews for visual choices. Use when you need more information before proceeding.',
|
|
190
202
|
input_schema: {
|
|
191
203
|
type: 'object',
|
|
192
204
|
properties: {
|
|
193
205
|
question: { type: 'string', description: 'The question to ask' },
|
|
194
206
|
header: { type: 'string', description: 'Short label (max 30 chars)' },
|
|
207
|
+
preview: {
|
|
208
|
+
type: 'object',
|
|
209
|
+
description: 'Optional question-level preview. Use type=markdown for prose, type=html for a sanitized fragment, or type=image/pdf_page with a local path.',
|
|
210
|
+
properties: {
|
|
211
|
+
type: { type: 'string', enum: ['markdown', 'html', 'image', 'pdf_page'] },
|
|
212
|
+
content: { type: 'string', description: 'Markdown or safe HTML fragment.' },
|
|
213
|
+
path: { type: 'string', description: 'Optional local artifact path for image/pdf_page previews.' },
|
|
214
|
+
title: { type: 'string', description: 'Optional preview title.' },
|
|
215
|
+
},
|
|
216
|
+
},
|
|
195
217
|
options: {
|
|
196
218
|
type: 'array',
|
|
197
219
|
items: {
|
|
@@ -199,6 +221,16 @@ const ASK_USER_TOOL = {
|
|
|
199
221
|
properties: {
|
|
200
222
|
label: { type: 'string', description: 'Display text (1-5 words)' },
|
|
201
223
|
description: { type: 'string', description: 'Explanation of this choice' },
|
|
224
|
+
preview: {
|
|
225
|
+
type: 'object',
|
|
226
|
+
description: 'Optional preview for this option. Same shape as question preview.',
|
|
227
|
+
properties: {
|
|
228
|
+
type: { type: 'string', enum: ['markdown', 'html', 'image', 'pdf_page'] },
|
|
229
|
+
content: { type: 'string' },
|
|
230
|
+
path: { type: 'string' },
|
|
231
|
+
title: { type: 'string' },
|
|
232
|
+
},
|
|
233
|
+
},
|
|
202
234
|
},
|
|
203
235
|
required: ['label'],
|
|
204
236
|
},
|
|
@@ -210,4 +242,82 @@ const ASK_USER_TOOL = {
|
|
|
210
242
|
},
|
|
211
243
|
};
|
|
212
244
|
|
|
213
|
-
|
|
245
|
+
function normalizeQuestion(question) {
|
|
246
|
+
return {
|
|
247
|
+
question: String(question.question || '').slice(0, 4000),
|
|
248
|
+
header: String(question.header || '').slice(0, 30),
|
|
249
|
+
multiple: !!question.multiple,
|
|
250
|
+
preview: sanitizePreview(question.preview),
|
|
251
|
+
options: Array.isArray(question.options)
|
|
252
|
+
? question.options.slice(0, 10).map((option) => ({
|
|
253
|
+
label: String(option?.label || '').slice(0, 80),
|
|
254
|
+
description: String(option?.description || '').slice(0, 1000),
|
|
255
|
+
preview: sanitizePreview(option?.preview),
|
|
256
|
+
})).filter((option) => option.label)
|
|
257
|
+
: [],
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
function sanitizePreview(preview) {
|
|
262
|
+
if (!preview) return null;
|
|
263
|
+
if (typeof preview === 'string') {
|
|
264
|
+
return { type: 'markdown', content: preview.slice(0, 12000) };
|
|
265
|
+
}
|
|
266
|
+
if (typeof preview !== 'object') return null;
|
|
267
|
+
const type = String(preview.type || preview.kind || 'markdown').toLowerCase();
|
|
268
|
+
const title = String(preview.title || '').slice(0, 160);
|
|
269
|
+
if (type === 'markdown') {
|
|
270
|
+
return {
|
|
271
|
+
type: 'markdown',
|
|
272
|
+
title,
|
|
273
|
+
content: String(preview.content || '').slice(0, 12000),
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
if (type === 'html') {
|
|
277
|
+
const content = String(preview.content || '').slice(0, 12000);
|
|
278
|
+
const error = unsafeHtmlFragmentReason(content);
|
|
279
|
+
if (error) return { type: 'rejected', title, error };
|
|
280
|
+
return { type: 'html', title, content };
|
|
281
|
+
}
|
|
282
|
+
if (type === 'image' || type === 'pdf_page') {
|
|
283
|
+
return {
|
|
284
|
+
type,
|
|
285
|
+
title,
|
|
286
|
+
path: String(preview.path || '').slice(0, 2000),
|
|
287
|
+
mimeType: String(preview.mimeType || preview.mime_type || '').slice(0, 120),
|
|
288
|
+
content: String(preview.content || '').slice(0, 2000),
|
|
289
|
+
};
|
|
290
|
+
}
|
|
291
|
+
return { type: 'rejected', title, error: `Unsupported preview type: ${type}` };
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
function unsafeHtmlFragmentReason(html) {
|
|
295
|
+
if (/<\s*(?:script|style|iframe|object|embed|link|meta|html|head|body)\b/i.test(html)) {
|
|
296
|
+
return 'HTML preview contains a blocked tag.';
|
|
297
|
+
}
|
|
298
|
+
if (/\son[a-z]+\s*=/i.test(html)) return 'HTML preview contains an event handler attribute.';
|
|
299
|
+
if (/\bjavascript\s*:/i.test(html)) return 'HTML preview contains a javascript: URL.';
|
|
300
|
+
const tagRe = /<\s*\/?\s*([a-z][a-z0-9-]*)([^>]*)>/gi;
|
|
301
|
+
for (const match of html.matchAll(tagRe)) {
|
|
302
|
+
const tag = match[1].toLowerCase();
|
|
303
|
+
const attrs = match[2] || '';
|
|
304
|
+
if (!SAFE_HTML_PREVIEW_TAGS.has(tag)) return `HTML preview contains unsupported tag: ${tag}.`;
|
|
305
|
+
if (/<|>/.test(attrs)) return 'HTML preview contains malformed tag attributes.';
|
|
306
|
+
const attrRe = /\s([:@a-zA-Z_][:@a-zA-Z0-9_.-]*)\s*(?:=\s*(?:"[^"]*"|'[^']*'|[^\s"'=<>`]+))?/g;
|
|
307
|
+
for (const attrMatch of attrs.matchAll(attrRe)) {
|
|
308
|
+
const attr = attrMatch[1].toLowerCase();
|
|
309
|
+
if (attr.startsWith('data-') || attr.startsWith('aria-')) continue;
|
|
310
|
+
if (!SAFE_HTML_PREVIEW_ATTRS.has(attr)) return `HTML preview contains unsupported attribute: ${attr}.`;
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
return '';
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
module.exports = {
|
|
317
|
+
QuestionManager,
|
|
318
|
+
ASK_USER_TOOL,
|
|
319
|
+
DEFAULT_TIMEOUT_MS,
|
|
320
|
+
normalizeQuestion,
|
|
321
|
+
sanitizePreview,
|
|
322
|
+
unsafeHtmlFragmentReason,
|
|
323
|
+
};
|
|
@@ -196,33 +196,18 @@ async function harvestCodexSessions(since) {
|
|
|
196
196
|
// --- CTM Session Harvesting ---
|
|
197
197
|
|
|
198
198
|
async function harvestCtmSessions(since, dataDirOverride = null) {
|
|
199
|
-
const dataDir = dataDirOverride || process.env.WALL_E_DATA_DIR || path.join(process.env.HOME, '.walle', 'data');
|
|
200
|
-
const ctmDbPath = path.join(dataDir, 'task-manager.db');
|
|
201
|
-
if (!fs.existsSync(ctmDbPath)) return [];
|
|
202
|
-
|
|
203
|
-
let Database;
|
|
204
|
-
try { Database = require('better-sqlite3'); } catch { return []; }
|
|
205
|
-
let ctmDb;
|
|
206
199
|
try {
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
WHERE u.role = 'user' AND a.role = 'assistant'
|
|
219
|
-
AND length(u.message_text) >= 20 AND length(a.message_text) >= 20
|
|
220
|
-
`;
|
|
221
|
-
if (since) { sql += ` AND u.executed_at > ?`; }
|
|
222
|
-
sql += ' ORDER BY u.executed_at DESC LIMIT 500';
|
|
223
|
-
|
|
224
|
-
const rows = since ? ctmDb.prepare(sql).all(since) : ctmDb.prepare(sql).all();
|
|
225
|
-
ctmDb.close();
|
|
200
|
+
const {
|
|
201
|
+
listPromptExecutions,
|
|
202
|
+
pairPromptExecutions,
|
|
203
|
+
} = require('../memory/ctm-prompt-executions-client');
|
|
204
|
+
const executions = await listPromptExecutions({
|
|
205
|
+
since,
|
|
206
|
+
limit: 5000,
|
|
207
|
+
order: 'desc',
|
|
208
|
+
timeoutMs: 1500,
|
|
209
|
+
});
|
|
210
|
+
const rows = pairPromptExecutions(executions, { limit: 500 });
|
|
226
211
|
|
|
227
212
|
return rows.map(row => ({
|
|
228
213
|
id: contentHash('ctm', row.prompt),
|
|
@@ -239,8 +224,7 @@ async function harvestCtmSessions(since, dataDirOverride = null) {
|
|
|
239
224
|
quality_label: 0.7,
|
|
240
225
|
}));
|
|
241
226
|
} catch (err) {
|
|
242
|
-
|
|
243
|
-
console.error('[harvester] Error reading CTM sessions:', err.message);
|
|
227
|
+
console.error('[harvester] Error reading CTM sessions through API:', err.message);
|
|
244
228
|
return [];
|
|
245
229
|
}
|
|
246
230
|
}
|
|
@@ -1,29 +1,18 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
const fs = require('fs');
|
|
4
|
-
const path = require('path');
|
|
5
3
|
const { classifyTaskType } = require('./harvester');
|
|
6
4
|
const { getShadowConfig } = require('./shadow');
|
|
7
5
|
const { createClient } = require('../llm/client');
|
|
6
|
+
const {
|
|
7
|
+
groupPromptExecutionsBySession,
|
|
8
|
+
listPromptExecutions,
|
|
9
|
+
pairPromptExecutions,
|
|
10
|
+
} = require('../memory/ctm-prompt-executions-client');
|
|
8
11
|
|
|
9
12
|
const INTER_CALL_DELAY_MS = 1000;
|
|
10
13
|
const CALL_TIMEOUT_MS = 600000; // 10 min safety ceiling — no real timeout for training; local models can take minutes on complex prompts
|
|
11
14
|
const MAX_HISTORY_CHARS = 32000; // Cap conversation history sent to shadow model
|
|
12
15
|
|
|
13
|
-
// --- Shared helpers ---
|
|
14
|
-
|
|
15
|
-
function openCtmDb() {
|
|
16
|
-
const dataDir = process.env.WALL_E_DATA_DIR || path.join(process.env.HOME, '.walle', 'data');
|
|
17
|
-
const ctmDbPath = path.join(dataDir, 'task-manager.db');
|
|
18
|
-
if (!fs.existsSync(ctmDbPath)) return null;
|
|
19
|
-
const Database = require('better-sqlite3');
|
|
20
|
-
try {
|
|
21
|
-
return new Database(ctmDbPath, { readonly: true, fileMustExist: true });
|
|
22
|
-
} catch {
|
|
23
|
-
return null;
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
|
|
27
16
|
function makeShadowClient(models) {
|
|
28
17
|
const shadowModel = models[0];
|
|
29
18
|
const client = createClient(shadowModel.provider || 'ollama', {
|
|
@@ -70,36 +59,21 @@ async function runReplay(brain, options = {}) {
|
|
|
70
59
|
const state = brain.getHarvestState('ctm-replay');
|
|
71
60
|
const lastReplayAt = state?.last_processed_at || '2000-01-01';
|
|
72
61
|
|
|
73
|
-
const ctmDb = openCtmDb();
|
|
74
|
-
if (!ctmDb) return { replayed: 0 };
|
|
75
|
-
|
|
76
62
|
let rows;
|
|
77
63
|
try {
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
AND length(u.message_text) >= 20 AND length(a.message_text) >= 20
|
|
88
|
-
AND u.executed_at > ?
|
|
89
|
-
AND u.session_id IN (
|
|
90
|
-
SELECT session_id FROM prompt_executions
|
|
91
|
-
WHERE role = 'user'
|
|
92
|
-
GROUP BY session_id HAVING COUNT(*) = 1
|
|
93
|
-
)
|
|
94
|
-
ORDER BY u.executed_at ASC
|
|
95
|
-
LIMIT ?
|
|
96
|
-
`).all(lastReplayAt, limit);
|
|
64
|
+
const executions = await listPromptExecutions({
|
|
65
|
+
since: lastReplayAt,
|
|
66
|
+
limit: 5000,
|
|
67
|
+
order: 'asc',
|
|
68
|
+
timeoutMs: 1500,
|
|
69
|
+
});
|
|
70
|
+
// Only pick single-turn sessions. Multi-turn sessions are handled by
|
|
71
|
+
// runMultiTurnReplay.
|
|
72
|
+
rows = pairPromptExecutions(executions, { limit, singleTurnOnly: true });
|
|
97
73
|
} catch (err) {
|
|
98
|
-
|
|
99
|
-
console.error('[replay] Error querying CTM DB:', err.message);
|
|
74
|
+
console.error('[replay] Error querying CTM API:', err.message);
|
|
100
75
|
return { replayed: 0 };
|
|
101
76
|
}
|
|
102
|
-
ctmDb.close();
|
|
103
77
|
|
|
104
78
|
if (!rows.length) return { replayed: 0 };
|
|
105
79
|
|
|
@@ -186,50 +160,21 @@ async function runMultiTurnReplay(brain, options = {}) {
|
|
|
186
160
|
const state = brain.getHarvestState('ctm-replay-mt');
|
|
187
161
|
const lastReplayAt = state?.last_processed_at || '2000-01-01';
|
|
188
162
|
|
|
189
|
-
const ctmDb = openCtmDb();
|
|
190
|
-
if (!ctmDb) return { replayed: 0, sessions: 0 };
|
|
191
|
-
|
|
192
163
|
let sessions;
|
|
193
164
|
try {
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
ORDER BY MIN(executed_at) ASC
|
|
202
|
-
LIMIT ?
|
|
203
|
-
`).all(lastReplayAt, sessionLimit);
|
|
165
|
+
const executions = await listPromptExecutions({
|
|
166
|
+
since: lastReplayAt,
|
|
167
|
+
limit: 10000,
|
|
168
|
+
order: 'asc',
|
|
169
|
+
timeoutMs: 1500,
|
|
170
|
+
});
|
|
171
|
+
sessions = groupPromptExecutionsBySession(executions, { multiTurnOnly: true, limit: sessionLimit });
|
|
204
172
|
} catch (err) {
|
|
205
|
-
|
|
206
|
-
console.error('[replay-mt] Error finding sessions:', err.message);
|
|
173
|
+
console.error('[replay-mt] Error finding sessions through CTM API:', err.message);
|
|
207
174
|
return { replayed: 0, sessions: 0 };
|
|
208
175
|
}
|
|
209
176
|
|
|
210
|
-
if (!sessions.length) {
|
|
211
|
-
ctmDb.close();
|
|
212
|
-
return { replayed: 0, sessions: 0 };
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
// Pre-load all messages for selected sessions
|
|
216
|
-
const sessionMessages = new Map();
|
|
217
|
-
try {
|
|
218
|
-
for (const s of sessions) {
|
|
219
|
-
const msgs = ctmDb.prepare(`
|
|
220
|
-
SELECT message_text, role, message_index, tool_uses, executed_at
|
|
221
|
-
FROM prompt_executions
|
|
222
|
-
WHERE session_id = ?
|
|
223
|
-
ORDER BY message_index ASC
|
|
224
|
-
`).all(s.session_id);
|
|
225
|
-
sessionMessages.set(s.session_id, msgs);
|
|
226
|
-
}
|
|
227
|
-
} catch (err) {
|
|
228
|
-
try { ctmDb.close(); } catch {}
|
|
229
|
-
console.error('[replay-mt] Error loading messages:', err.message);
|
|
230
|
-
return { replayed: 0, sessions: 0 };
|
|
231
|
-
}
|
|
232
|
-
ctmDb.close();
|
|
177
|
+
if (!sessions.length) return { replayed: 0, sessions: 0 };
|
|
233
178
|
|
|
234
179
|
const existing = loadExistingPromptPrefixes(brain);
|
|
235
180
|
const { client, shadowModel } = makeShadowClient(models);
|
|
@@ -239,7 +184,7 @@ async function runMultiTurnReplay(brain, options = {}) {
|
|
|
239
184
|
let lastTimestamp = lastReplayAt;
|
|
240
185
|
|
|
241
186
|
for (const session of sessions) {
|
|
242
|
-
const msgs =
|
|
187
|
+
const msgs = session.messages;
|
|
243
188
|
if (!msgs || msgs.length < 4) continue; // need at least 2 user + 2 assistant
|
|
244
189
|
|
|
245
190
|
// Build progressive history and replay each assistant turn
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<meta charset="UTF-8">
|
|
5
5
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
6
|
<title>Wall-E — AI Coding Dashboard + Personal Agent</title>
|
|
7
|
-
<meta name="description" content="Run Claude Code, Codex, Gemini, and Aider sessions side by side. Manage prompts, queue tasks, review code and docs, use remote phone access, and let an AI agent build a second brain from your work life. Runs locally.">
|
|
7
|
+
<meta name="description" content="Run Claude Code, Codex, Gemini, and Aider sessions side by side. Manage prompts, queue tasks, review code and docs, use remote phone and tablet access, and let an AI agent build a second brain from your work life. Runs locally.">
|
|
8
8
|
<link rel="icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><text y='.9em' font-size='90'>🤖</text></svg>">
|
|
9
9
|
<style>
|
|
10
10
|
:root {
|
|
@@ -241,7 +241,7 @@
|
|
|
241
241
|
<h1>Your AI Coding <span class="accent">Command Center</span> +<br>Personal <span class="accent">Agent</span></h1>
|
|
242
242
|
<p class="sub">
|
|
243
243
|
Run Claude Code, Codex, Gemini, Aider, OpenCode, and Cursor Agent sessions side by side. Manage prompts,
|
|
244
|
-
queue tasks, review code and docs, open a
|
|
244
|
+
queue tasks, review code and docs, open a touch-friendly remote UI for phone and tablet with live prompts and model controls, and let an AI agent build a second brain from your work life.
|
|
245
245
|
</p>
|
|
246
246
|
<div class="install-box" onclick="navigator.clipboard.writeText('npx create-walle install ./walle');this.querySelector('.copy-hint').textContent='Copied!'">
|
|
247
247
|
<code>npx create-walle install ./walle</code>
|
|
@@ -250,7 +250,7 @@
|
|
|
250
250
|
<div class="badge-row">
|
|
251
251
|
<span class="badge">MIT License</span>
|
|
252
252
|
<span class="badge">Local-first</span>
|
|
253
|
-
<span class="badge">
|
|
253
|
+
<span class="badge">Phone & Tablet Access</span>
|
|
254
254
|
<span class="badge">Code & Doc Review</span>
|
|
255
255
|
<span class="badge">SQLite</span>
|
|
256
256
|
<span class="badge">No Cloud Required</span>
|
|
@@ -281,7 +281,7 @@
|
|
|
281
281
|
<h2>One dashboard for every AI coding agent</h2>
|
|
282
282
|
<p class="desc">
|
|
283
283
|
Run Claude Code, Codex, Gemini CLI, and Aider sessions side by side.
|
|
284
|
-
Manage prompts, queue tasks, review code and docs, and respond from your phone.
|
|
284
|
+
Manage prompts, queue tasks, review code and docs, and respond from your phone or tablet.
|
|
285
285
|
</p>
|
|
286
286
|
<div class="features">
|
|
287
287
|
<div class="feature-card">
|
|
@@ -306,8 +306,8 @@
|
|
|
306
306
|
</div>
|
|
307
307
|
<div class="feature-card">
|
|
308
308
|
<span class="icon">📱</span>
|
|
309
|
-
<h3>Remote Phone Access</h3>
|
|
310
|
-
<p>Pair your phone with a QR code and use a
|
|
309
|
+
<h3>Remote Phone & Tablet Access</h3>
|
|
310
|
+
<p>Pair your phone or tablet with a QR code and use a responsive CTM UI through Microsoft Dev Tunnels, Tailscale, Cloudflare Tunnel, or Walle Remote, including live prompts and model controls.</p>
|
|
311
311
|
</div>
|
|
312
312
|
<div class="feature-card">
|
|
313
313
|
<span class="icon">Δ</span>
|
|
@@ -354,7 +354,7 @@
|
|
|
354
354
|
<div class="feature-card">
|
|
355
355
|
<span class="icon">📱</span>
|
|
356
356
|
<h3>Multi-Device</h3>
|
|
357
|
-
<p>Share your brain across machines via Dropbox, iCloud, or any file sync. Pair your phone when you need to monitor or respond away from the Mac.</p>
|
|
357
|
+
<p>Share your brain across machines via Dropbox, iCloud, or any file sync. Pair your phone or tablet when you need to monitor or respond away from the Mac.</p>
|
|
358
358
|
</div>
|
|
359
359
|
</div>
|
|
360
360
|
</section>
|
|
@@ -378,8 +378,8 @@
|
|
|
378
378
|
<p>Click through to connect Slack (OAuth), email, and calendar. All optional — Wall-E works without them.</p>
|
|
379
379
|
</div>
|
|
380
380
|
<div class="step">
|
|
381
|
-
<h3>Pair your
|
|
382
|
-
<p>Choose Microsoft Dev Tunnels, Tailscale, Cloudflare Tunnel, or Walle Remote from Setup and scan the QR code.</p>
|
|
381
|
+
<h3>Pair your device</h3>
|
|
382
|
+
<p>Choose Microsoft Dev Tunnels, Tailscale, Cloudflare Tunnel, or Walle Remote from Setup and scan the QR code on a phone or tablet.</p>
|
|
383
383
|
</div>
|
|
384
384
|
<div class="step">
|
|
385
385
|
<h3>Start working</h3>
|
|
@@ -405,7 +405,7 @@
|
|
|
405
405
|
<li>Multi-agent terminal multiplexer</li>
|
|
406
406
|
<li>Prompt store & task queue</li>
|
|
407
407
|
<li>Approval engine & model registry</li>
|
|
408
|
-
<li>Remote phone pairing &
|
|
408
|
+
<li>Remote phone/tablet pairing & responsive UI</li>
|
|
409
409
|
<li>Code and document review workspace</li>
|
|
410
410
|
</ul>
|
|
411
411
|
</div>
|