create-walle 0.9.21 → 0.9.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/package.json +2 -2
- package/template/CLAUDE.md +2 -2
- package/template/LICENSE +1 -1
- package/template/bin/ctm-dev-cleanup.js +24 -3
- package/template/bin/ctm-launch.sh +13 -0
- package/template/bin/dev.sh +156 -18
- package/template/bin/node-bin.sh +84 -0
- package/template/bin/pin-node.sh +51 -0
- package/template/claude-task-manager/api-prompts.js +1203 -182
- package/template/claude-task-manager/api-reviews.js +109 -15
- package/template/claude-task-manager/approval-agent.js +1360 -280
- package/template/claude-task-manager/bin/restart-ctm.sh +64 -23
- package/template/claude-task-manager/bin/storage-migration-supervisor.js +338 -0
- package/template/claude-task-manager/db.js +4417 -295
- package/template/claude-task-manager/docs/app-update-refresh-protocol.md +69 -0
- package/template/claude-task-manager/docs/approval-ai-refinement.md +138 -0
- package/template/claude-task-manager/docs/approval-rescue-loop.md +74 -0
- package/template/claude-task-manager/docs/codex-operational-warning-health.md +107 -0
- package/template/claude-task-manager/docs/codex-resume-state-guard-design.md +17 -12
- package/template/claude-task-manager/docs/codex-terminal-render-controller-handoff.md +311 -0
- package/template/claude-task-manager/docs/coding-agent-hooks-architecture.md +418 -0
- package/template/claude-task-manager/docs/conversation-import-freshness.md +20 -0
- package/template/claude-task-manager/docs/google-workspace-auth-health.md +77 -0
- package/template/claude-task-manager/docs/image-paste-ux.md +13 -0
- package/template/claude-task-manager/docs/ipad-web-preview.md +88 -0
- package/template/claude-task-manager/docs/main-loop-offload-architecture.md +66 -0
- package/template/claude-task-manager/docs/microsoft-dev-tunnel-phone-access-design.md +274 -519
- package/template/claude-task-manager/docs/mobile-live-streaming.md +27 -5
- package/template/claude-task-manager/docs/mobile-remote-submission-lifecycle.md +69 -0
- package/template/claude-task-manager/docs/phone-access-design.md +53 -15
- package/template/claude-task-manager/docs/phone-passkey-identity.md +122 -0
- package/template/claude-task-manager/docs/phone-setup.md +3 -0
- package/template/claude-task-manager/docs/prompt-editing-tree-design.md +25 -1
- package/template/claude-task-manager/docs/remote-desktop-access-design.md +268 -0
- package/template/claude-task-manager/docs/restart-lifecycle-architecture.md +95 -0
- package/template/claude-task-manager/docs/runtime-work-control-plane.md +53 -0
- package/template/claude-task-manager/docs/session-interactive-wait-surfaces.md +38 -0
- package/template/claude-task-manager/docs/session-needs-you-dismissal.md +84 -0
- package/template/claude-task-manager/docs/session-render-state-management-design.md +91 -3
- package/template/claude-task-manager/docs/session-standup-command-center-design.md +25 -1
- package/template/claude-task-manager/docs/session-title-authority.md +32 -0
- package/template/claude-task-manager/docs/session-workspace-binding.md +33 -0
- package/template/claude-task-manager/docs/skill-intent-resolution-design.md +72 -0
- package/template/claude-task-manager/docs/walle-mcp-supervisor-health.md +86 -0
- package/template/claude-task-manager/docs/walle-relay-phone-access-design.md +24 -15
- package/template/claude-task-manager/docs/walle-session-history-hydration.md +114 -0
- package/template/claude-task-manager/docs/walle-session-input-queue.md +104 -0
- package/template/claude-task-manager/docs/walle-session-model-catalog.md +90 -0
- package/template/claude-task-manager/docs/walle-session-model-preferences.md +15 -6
- package/template/claude-task-manager/git-utils.js +897 -27
- package/template/claude-task-manager/lib/agent-capabilities.js +33 -0
- package/template/claude-task-manager/lib/agent-cli-cache.js +37 -7
- package/template/claude-task-manager/lib/agent-hooks-installer.js +26 -2
- package/template/claude-task-manager/lib/agent-presets.js +17 -1
- package/template/claude-task-manager/lib/all-sessions-query.js +108 -0
- package/template/claude-task-manager/lib/approval-ai-refinement.js +488 -0
- package/template/claude-task-manager/lib/approval-self-adapt.js +168 -0
- package/template/claude-task-manager/lib/async-semaphore.js +44 -0
- package/template/claude-task-manager/lib/auth-context.js +5 -0
- package/template/claude-task-manager/lib/auth-rate-limit.js +47 -4
- package/template/claude-task-manager/lib/auth-rules.js +29 -2
- package/template/claude-task-manager/lib/auto-approval-verifier.js +129 -16
- package/template/claude-task-manager/lib/background-llm.js +144 -17
- package/template/claude-task-manager/lib/branch-inventory.js +212 -0
- package/template/claude-task-manager/lib/claude-desktop-sessions.js +15 -3
- package/template/claude-task-manager/lib/coalesce-sync-frames.js +151 -0
- package/template/claude-task-manager/lib/codex-launch-health.js +762 -0
- package/template/claude-task-manager/lib/codex-transcript-pager.js +51 -0
- package/template/claude-task-manager/lib/codex-zst.js +124 -0
- package/template/claude-task-manager/lib/coding-agent-models.js +233 -30
- package/template/claude-task-manager/lib/connection-health.js +232 -0
- package/template/claude-task-manager/lib/conversation-blob-parser.js +42 -0
- package/template/claude-task-manager/lib/conversation-tail-merge.js +89 -26
- package/template/claude-task-manager/lib/ctm-session-context-api.js +39 -10
- package/template/claude-task-manager/lib/cursor-conversation-store.js +354 -0
- package/template/claude-task-manager/lib/db-owner-worker-client.js +315 -0
- package/template/claude-task-manager/lib/document-review.js +141 -6
- package/template/claude-task-manager/lib/escalation-review.js +152 -0
- package/template/claude-task-manager/lib/graceful-shutdown.js +159 -0
- package/template/claude-task-manager/lib/headless-term-service.js +678 -0
- package/template/claude-task-manager/lib/heavy-worker-fallback.js +38 -0
- package/template/claude-task-manager/lib/jsonl-conversation-parser.js +542 -0
- package/template/claude-task-manager/lib/jsonl-range-reader.js +112 -0
- package/template/claude-task-manager/lib/main-db-census.js +216 -0
- package/template/claude-task-manager/lib/message-pagination.js +106 -4
- package/template/claude-task-manager/lib/microsoft-dev-tunnel-setup.js +750 -26
- package/template/claude-task-manager/lib/mobile-auth-api.js +274 -7
- package/template/claude-task-manager/lib/mobile-auth-store.js +592 -10
- package/template/claude-task-manager/lib/mobile-notification-dispatcher.js +15 -0
- package/template/claude-task-manager/lib/model-overview-brain-fallback.js +311 -0
- package/template/claude-task-manager/lib/model-overview-cache.js +141 -0
- package/template/claude-task-manager/lib/models-health-routing-notice.js +126 -0
- package/template/claude-task-manager/lib/node-pin-guard.js +93 -0
- package/template/claude-task-manager/lib/perf-tracker.js +242 -6
- package/template/claude-task-manager/lib/permission-match.js +76 -0
- package/template/claude-task-manager/lib/permission-sync.js +133 -20
- package/template/claude-task-manager/lib/process-title.js +35 -0
- package/template/claude-task-manager/lib/prompt-executions-query.js +25 -0
- package/template/claude-task-manager/lib/prompt-index-disk-cache.js +44 -0
- package/template/claude-task-manager/lib/prompt-intent.js +132 -0
- package/template/claude-task-manager/lib/provider-user-context.js +34 -0
- package/template/claude-task-manager/lib/read-pool-client.js +313 -0
- package/template/claude-task-manager/lib/readpool-breaker.js +31 -0
- package/template/claude-task-manager/lib/recent-sessions-breaker.js +12 -0
- package/template/claude-task-manager/lib/remote-feedback-client.js +72 -0
- package/template/claude-task-manager/lib/remote-relay-protocol.js +37 -4
- package/template/claude-task-manager/lib/remote-relay-store.js +159 -0
- package/template/claude-task-manager/lib/remote-submission-observer.js +278 -0
- package/template/claude-task-manager/lib/restart-guard.js +109 -0
- package/template/claude-task-manager/lib/restore-interruption-detector.js +439 -0
- package/template/claude-task-manager/lib/restore-policy.js +13 -0
- package/template/claude-task-manager/lib/restore-resume-batch.js +74 -0
- package/template/claude-task-manager/lib/restore-runtime.js +68 -0
- package/template/claude-task-manager/lib/restore-storm.js +34 -0
- package/template/claude-task-manager/lib/resume-cwd.js +36 -0
- package/template/claude-task-manager/lib/resume-preflight.js +313 -0
- package/template/claude-task-manager/lib/runtime-work-registry.js +444 -0
- package/template/claude-task-manager/lib/sanitize-openai-auth.js +31 -0
- package/template/claude-task-manager/lib/scheduler.js +21 -1
- package/template/claude-task-manager/lib/scrollback-snapshot-store.js +159 -0
- package/template/claude-task-manager/lib/serial-task-queue.js +64 -0
- package/template/claude-task-manager/lib/server-listeners.js +239 -0
- package/template/claude-task-manager/lib/session-capture.js +42 -7
- package/template/claude-task-manager/lib/session-content-backfill.js +131 -0
- package/template/claude-task-manager/lib/session-history.js +388 -43
- package/template/claude-task-manager/lib/session-host-manager.js +287 -0
- package/template/claude-task-manager/lib/session-image-refs.js +209 -0
- package/template/claude-task-manager/lib/session-jobs.js +399 -59
- package/template/claude-task-manager/lib/session-prompt-index.js +137 -0
- package/template/claude-task-manager/lib/session-restore.js +53 -0
- package/template/claude-task-manager/lib/session-standup.js +123 -23
- package/template/claude-task-manager/lib/session-state-bus.js +14 -0
- package/template/claude-task-manager/lib/session-stream.js +64 -16
- package/template/claude-task-manager/lib/session-timeline-summary.js +260 -0
- package/template/claude-task-manager/lib/session-token-usage.js +494 -0
- package/template/claude-task-manager/lib/session-workspace-binding.js +356 -0
- package/template/claude-task-manager/lib/setup-network-config.js +9 -0
- package/template/claude-task-manager/lib/size-cap.js +45 -0
- package/template/claude-task-manager/lib/size-cap.test.js +62 -0
- package/template/claude-task-manager/lib/skill-autocomplete.js +180 -1
- package/template/claude-task-manager/lib/skill-intent-resolver.js +304 -0
- package/template/claude-task-manager/lib/sqlite-driver.js +19 -3
- package/template/claude-task-manager/lib/standup-attention.js +7 -3
- package/template/claude-task-manager/lib/status-authority.js +39 -0
- package/template/claude-task-manager/lib/status-hooks.js +4 -0
- package/template/claude-task-manager/lib/storage-migration.js +235 -0
- package/template/claude-task-manager/lib/structured-capture.js +298 -0
- package/template/claude-task-manager/lib/sync-io-census.js +163 -0
- package/template/claude-task-manager/lib/tailscale-setup.js +6 -0
- package/template/claude-task-manager/lib/terminal-activity-evidence.js +33 -0
- package/template/claude-task-manager/lib/terminal-choice.js +364 -0
- package/template/claude-task-manager/lib/terminal-control-sanitize.js +17 -0
- package/template/claude-task-manager/lib/terminal-fingerprint.js +48 -0
- package/template/claude-task-manager/lib/terminal-output-flush.js +84 -0
- package/template/claude-task-manager/lib/timeline-order.js +122 -0
- package/template/claude-task-manager/lib/transcript-store.js +348 -43
- package/template/claude-task-manager/lib/transport-security.js +84 -1
- package/template/claude-task-manager/lib/wait-state.js +184 -0
- package/template/claude-task-manager/lib/walle-client.js +47 -5
- package/template/claude-task-manager/lib/walle-ctm-history.js +564 -4
- package/template/claude-task-manager/lib/walle-external-actions.js +135 -16
- package/template/claude-task-manager/lib/walle-history-hydration.js +46 -0
- package/template/claude-task-manager/lib/walle-native-health.js +403 -0
- package/template/claude-task-manager/lib/walle-repair.js +701 -0
- package/template/claude-task-manager/lib/walle-session-cache.js +109 -0
- package/template/claude-task-manager/lib/walle-session-context.js +57 -21
- package/template/claude-task-manager/lib/walle-session-model-catalog.js +34 -0
- package/template/claude-task-manager/lib/walle-supervisor.js +539 -63
- package/template/claude-task-manager/lib/walle-transcript.js +52 -0
- package/template/claude-task-manager/lib/worktree-active-sync.js +11 -7
- package/template/claude-task-manager/lib/worktree-cwd.js +32 -1
- package/template/claude-task-manager/package.json +1 -1
- package/template/claude-task-manager/prompt-harvest.js +89 -66
- package/template/claude-task-manager/providers/claude-code.js +51 -3
- package/template/claude-task-manager/providers/cursor.js +140 -45
- package/template/claude-task-manager/public/css/reviews.css +551 -61
- package/template/claude-task-manager/public/css/setup.css +191 -0
- package/template/claude-task-manager/public/css/walle-session.css +865 -10
- package/template/claude-task-manager/public/css/walle.css +154 -0
- package/template/claude-task-manager/public/designs/ai-providers-consolidation-v2.html +830 -0
- package/template/claude-task-manager/public/index.html +18516 -2058
- package/template/claude-task-manager/public/ipad.html +363 -0
- package/template/claude-task-manager/public/js/document-review-links.js +301 -0
- package/template/claude-task-manager/public/js/image-normalize.js +69 -36
- package/template/claude-task-manager/public/js/message-renderer.js +1265 -77
- package/template/claude-task-manager/public/js/prompts.js +66 -29
- package/template/claude-task-manager/public/js/reviews.js +901 -133
- package/template/claude-task-manager/public/js/session-activity-utils.js +11 -1
- package/template/claude-task-manager/public/js/session-search-utils.js +94 -10
- package/template/claude-task-manager/public/js/session-status-precedence.js +23 -5
- package/template/claude-task-manager/public/js/setup.js +1273 -176
- package/template/claude-task-manager/public/js/stream-view.js +691 -73
- package/template/claude-task-manager/public/js/terminal-reconciler.js +210 -0
- package/template/claude-task-manager/public/js/walle-session.js +2455 -158
- package/template/claude-task-manager/public/js/walle.js +455 -28
- package/template/claude-task-manager/public/m/app.css +2909 -262
- package/template/claude-task-manager/public/m/app.js +6601 -398
- package/template/claude-task-manager/public/m/claim.html +224 -17
- package/template/claude-task-manager/public/m/index.html +117 -21
- package/template/claude-task-manager/public/m/sw.js +3 -1
- package/template/claude-task-manager/public/manifest.json +2 -2
- package/template/claude-task-manager/public/prompts.html +30 -14
- package/template/claude-task-manager/queue-engine.js +507 -28
- package/template/claude-task-manager/scripts/repair-claude-session-images.js +27 -8
- package/template/claude-task-manager/server.js +14341 -2197
- package/template/claude-task-manager/session-integrity.js +160 -18
- package/template/claude-task-manager/session-search-ranking.js +1 -0
- package/template/claude-task-manager/session-utils.js +25 -5
- package/template/claude-task-manager/workers/approval-blocklist.js +96 -6
- package/template/claude-task-manager/workers/approval-widget-validator.js +14 -8
- package/template/claude-task-manager/workers/conversation-import-worker.js +11 -50
- package/template/claude-task-manager/workers/db-owner-worker.js +386 -0
- package/template/claude-task-manager/workers/harvest-worker.js +9 -55
- package/template/claude-task-manager/workers/headless-term-worker.js +9 -530
- package/template/claude-task-manager/workers/read-pool-worker.js +387 -0
- package/template/claude-task-manager/workers/scrollback-worker.js +11 -72
- package/template/claude-task-manager/workers/session-host-process.js +146 -0
- package/template/claude-task-manager/workers/session-integrity-worker.js +10 -54
- package/template/claude-task-manager/workers/state-detectors/base.js +18 -1
- package/template/claude-task-manager/workers/state-detectors/claude-code.js +182 -9
- package/template/claude-task-manager/workers/state-detectors/codex.js +150 -2
- package/template/claude-task-manager/workers/state-detectors/cursor.js +127 -0
- package/template/claude-task-manager/workers/state-detectors/gemini.js +21 -0
- package/template/claude-task-manager/workers/state-detectors/index.js +29 -0
- package/template/claude-task-manager/workers/state-detectors/opencode.js +103 -0
- package/template/docs/design/markdown-review-pane.md +206 -0
- package/template/docs/designs/2026-05-17-portkey-gateway-provider-ux.md +129 -38
- package/template/docs/designs/2026-05-20-mobile-worktree-finish-command.md +27 -0
- package/template/docs/designs/2026-05-22-ai-configuration-consolidation.md +248 -0
- package/template/docs/designs/ai-configuration-consolidation-mock.html +812 -0
- package/template/docs/private-memory-and-pii-policy.md +69 -0
- package/template/package.json +2 -1
- package/template/scripts/check-private-data.js +201 -0
- package/template/shared/sqlite-owner-guard.js +30 -0
- package/template/shared/sqlite-owner-write-queue.js +225 -0
- package/template/shared/sqlite-storage-policy.js +111 -0
- package/template/shared/sqlite-write-lock.js +428 -0
- package/template/wall-e/agent-runners/claude-code.js +5 -0
- package/template/wall-e/agent.js +166 -22
- package/template/wall-e/api-walle.js +524 -70
- package/template/wall-e/auth/provider-flows.js +11 -1
- package/template/wall-e/bin/walle-mcp-stdio.js +341 -17
- package/template/wall-e/brain.js +1614 -141
- package/template/wall-e/chat/attachment-blocks.js +96 -0
- package/template/wall-e/chat/attachments.js +2 -1
- package/template/wall-e/chat/capability-resolver.js +7 -7
- package/template/wall-e/chat/context-messages.js +28 -0
- package/template/wall-e/chat/conversation-frame.js +630 -0
- package/template/wall-e/chat/provider-messages.js +125 -0
- package/template/wall-e/chat.js +1002 -233
- package/template/wall-e/coding/acceptance-contract.js +170 -0
- package/template/wall-e/coding/acp-adapter.js +1 -1
- package/template/wall-e/coding/agent-catalog.js +3 -0
- package/template/wall-e/coding/artifact-store.js +93 -0
- package/template/wall-e/coding/capability-router.js +120 -0
- package/template/wall-e/coding/coding-run-controller.js +423 -0
- package/template/wall-e/coding/compaction-service.js +157 -12
- package/template/wall-e/coding/frontend-verification.js +258 -0
- package/template/wall-e/coding/lifecycle-hooks.js +75 -0
- package/template/wall-e/coding/local-preview-contract.js +157 -0
- package/template/wall-e/coding/permission-service.js +57 -13
- package/template/wall-e/coding/prompt-bundle.js +19 -1
- package/template/wall-e/coding/prompt-section-registry.js +227 -0
- package/template/wall-e/coding/provider-compat.js +15 -0
- package/template/wall-e/coding/runtime-events.js +224 -0
- package/template/wall-e/coding/runtime-mode.js +3 -0
- package/template/wall-e/coding/side-git-snapshot.js +160 -4
- package/template/wall-e/coding/snapshot-service.js +143 -1
- package/template/wall-e/coding/stream-processor.js +388 -34
- package/template/wall-e/coding/task-tool.js +141 -4
- package/template/wall-e/coding/tool-execution-controller.js +365 -0
- package/template/wall-e/coding/tool-registry.js +43 -5
- package/template/wall-e/coding/user-hooks.js +217 -0
- package/template/wall-e/coding-orchestrator.js +1330 -221
- package/template/wall-e/coding-prompts.js +20 -4
- package/template/wall-e/context/context-builder.js +15 -2
- package/template/wall-e/decision/confidence.js +1 -1
- package/template/wall-e/docs/coding-acceptance-contract.md +41 -0
- package/template/wall-e/docs/external-action-controller.md +26 -6
- package/template/wall-e/docs/telemetry-lifecycle.md +8 -2
- package/template/wall-e/embeddings.js +591 -53
- package/template/wall-e/external-action-controller.js +12 -0
- package/template/wall-e/http/auth.js +1 -0
- package/template/wall-e/http/chat-api.js +46 -11
- package/template/wall-e/http/model-admin.js +836 -34
- package/template/wall-e/lib/boot-profile.js +88 -0
- package/template/wall-e/lib/event-loop-monitor.js +93 -0
- package/template/wall-e/lib/service-health.js +194 -0
- package/template/wall-e/llm/anthropic.js +130 -5
- package/template/wall-e/llm/client.js +266 -63
- package/template/wall-e/llm/default-fallback.js +382 -0
- package/template/wall-e/llm/health.js +19 -0
- package/template/wall-e/llm/message-guard.js +78 -0
- package/template/wall-e/llm/model-catalog.js +252 -1
- package/template/wall-e/llm/openai.js +26 -4
- package/template/wall-e/llm/portkey-sync.js +654 -0
- package/template/wall-e/llm/provider-error.js +30 -2
- package/template/wall-e/llm/registry.js +5 -1
- package/template/wall-e/llm/request-compat.js +67 -0
- package/template/wall-e/loops/backfill.js +79 -23
- package/template/wall-e/loops/brain-optimize.js +67 -0
- package/template/wall-e/loops/ingest.js +25 -10
- package/template/wall-e/loops/question-digest.js +160 -0
- package/template/wall-e/loops/reflect.js +6 -4
- package/template/wall-e/loops/think.js +39 -12
- package/template/wall-e/mcp-server.js +318 -36
- package/template/wall-e/memory/ctm-context-client.js +52 -14
- package/template/wall-e/memory/ctm-operational-context.js +237 -0
- package/template/wall-e/memory/ctm-prompt-executions-client.js +128 -0
- package/template/wall-e/memory/ctm-session-context.js +111 -63
- package/template/wall-e/prompts/coding/deepseek.txt +3 -0
- package/template/wall-e/prompts/coding/gemini.txt +6 -0
- package/template/wall-e/prompts/coding/gpt.txt +6 -0
- package/template/wall-e/prompts/coding/local.txt +7 -0
- package/template/wall-e/runtime/decision-hooks.js +115 -0
- package/template/wall-e/runtime/devbox-gateway.js +82 -8
- package/template/wall-e/runtime/prompt-manifest.js +86 -0
- package/template/wall-e/runtime/tool-executor.js +269 -0
- package/template/wall-e/runtime/tool-result-envelope.js +138 -0
- package/template/wall-e/runtime/transcript-projection.js +60 -0
- package/template/wall-e/runtime/walle-runtime.js +224 -0
- package/template/wall-e/scripts/db-optimize/migrate.js +162 -0
- package/template/wall-e/scripts/db-optimize/recall-eval.js +117 -0
- package/template/wall-e/server.js +15 -0
- package/template/wall-e/session-files.js +9 -0
- package/template/wall-e/skills/_bundled/google-calendar/run.js +1 -1
- package/template/wall-e/skills/_bundled/gws-workspace/run.js +1 -1
- package/template/wall-e/skills/_bundled/slack-mentions/run.js +76 -6
- package/template/wall-e/skills/claude-code-reader.js +7 -3
- package/template/wall-e/skills/script-skill-runner.js +10 -0
- package/template/wall-e/skills/skill-planner.js +38 -0
- package/template/wall-e/tools/builtin-middleware.js +19 -9
- package/template/wall-e/tools/local-tools.js +1428 -16
- package/template/wall-e/tools/permission-checker.js +73 -5
- package/template/wall-e/tools/question-manager.js +117 -7
- package/template/wall-e/training/harvester.js +12 -28
- package/template/wall-e/training/replay.js +25 -80
- package/template/website/index.html +10 -10
- package/template/wall-e/eval/ab-test.js +0 -203
- package/template/wall-e/eval/agent-runner.js +0 -772
- package/template/wall-e/eval/agent-scorer.js +0 -461
- package/template/wall-e/eval/aggregator.js +0 -414
- package/template/wall-e/eval/allowed-test-commands.js +0 -34
- package/template/wall-e/eval/benchmark-generator.js +0 -113
- package/template/wall-e/eval/benchmarks/chat-eval.json +0 -1662
- package/template/wall-e/eval/benchmarks/chat.json +0 -82
- package/template/wall-e/eval/benchmarks/coding-agent-real.json +0 -1
- package/template/wall-e/eval/benchmarks/coding-agent.json +0 -1581
- package/template/wall-e/eval/benchmarks/coding.json +0 -122
- package/template/wall-e/eval/benchmarks/memory-retrieval.json +0 -234
- package/template/wall-e/eval/benchmarks/reasoning.json +0 -82
- package/template/wall-e/eval/benchmarks/swebench-lite-30.json +0 -212
- package/template/wall-e/eval/benchmarks.js +0 -669
- package/template/wall-e/eval/cc-replay.js +0 -719
- package/template/wall-e/eval/chat-eval.js +0 -525
- package/template/wall-e/eval/check-keys.js +0 -15
- package/template/wall-e/eval/check-providers.js +0 -42
- package/template/wall-e/eval/codex-cli-baseline.js +0 -669
- package/template/wall-e/eval/coding-agent-real.js +0 -570
- package/template/wall-e/eval/context-compactor.js +0 -251
- package/template/wall-e/eval/debug-agent003.js +0 -68
- package/template/wall-e/eval/diagnostics.js +0 -216
- package/template/wall-e/eval/eval-orchestrator.js +0 -642
- package/template/wall-e/eval/evaluate.js +0 -202
- package/template/wall-e/eval/evaluator.js +0 -373
- package/template/wall-e/eval/exporter.js +0 -212
- package/template/wall-e/eval/fixtures/express-basic/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-basic/server.js +0 -115
- package/template/wall-e/eval/fixtures/express-basic/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy/server.js +0 -113
- package/template/wall-e/eval/fixtures/express-buggy/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy-items/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy-items/server.js +0 -112
- package/template/wall-e/eval/fixtures/express-buggy-items/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-buggy-search/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-buggy-search/server.js +0 -121
- package/template/wall-e/eval/fixtures/express-buggy-search/test.js +0 -83
- package/template/wall-e/eval/fixtures/express-rename-data/data.js +0 -34
- package/template/wall-e/eval/fixtures/express-rename-data/package.json +0 -9
- package/template/wall-e/eval/fixtures/express-rename-data/server.js +0 -97
- package/template/wall-e/eval/fixtures/express-rename-data/test.js +0 -88
- package/template/wall-e/eval/fixtures/express-xss/package.json +0 -12
- package/template/wall-e/eval/fixtures/express-xss/server.js +0 -90
- package/template/wall-e/eval/fixtures/express-xss/test.js +0 -67
- package/template/wall-e/eval/fixtures/express-xss/views/profile.ejs +0 -9
- package/template/wall-e/eval/fixtures/fullstack-app/config/default.js +0 -9
- package/template/wall-e/eval/fixtures/fullstack-app/config/test.js +0 -13
- package/template/wall-e/eval/fixtures/fullstack-app/package.json +0 -11
- package/template/wall-e/eval/fixtures/fullstack-app/public/css/style.css +0 -137
- package/template/wall-e/eval/fixtures/fullstack-app/public/index.html +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/app.js +0 -121
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/auth.js +0 -71
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/items.js +0 -80
- package/template/wall-e/eval/fixtures/fullstack-app/public/js/users.js +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/public/login.html +0 -45
- package/template/wall-e/eval/fixtures/fullstack-app/public/register.html +0 -38
- package/template/wall-e/eval/fixtures/fullstack-app/scripts/migrate.js +0 -23
- package/template/wall-e/eval/fixtures/fullstack-app/scripts/seed.js +0 -46
- package/template/wall-e/eval/fixtures/fullstack-app/server/db.js +0 -99
- package/template/wall-e/eval/fixtures/fullstack-app/server/index.js +0 -94
- package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/auth.js +0 -19
- package/template/wall-e/eval/fixtures/fullstack-app/server/middleware/logger.js +0 -19
- package/template/wall-e/eval/fixtures/fullstack-app/server/router.js +0 -50
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/auth.js +0 -69
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/health.js +0 -23
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/items.js +0 -88
- package/template/wall-e/eval/fixtures/fullstack-app/server/routes/users.js +0 -75
- package/template/wall-e/eval/fixtures/fullstack-app/server/test.js +0 -198
- package/template/wall-e/eval/fixtures/fullstack-app/server/utils/response.js +0 -34
- package/template/wall-e/eval/fixtures/fullstack-app/server/utils/validate.js +0 -26
- package/template/wall-e/eval/fixtures/fullstack-app/server.js +0 -8
- package/template/wall-e/eval/fixtures/fullstack-app/test.js +0 -12
- package/template/wall-e/eval/fixtures/monorepo-basic/package.json +0 -8
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/data.js +0 -58
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/middleware.js +0 -46
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/package.json +0 -8
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/routes.js +0 -64
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/server.js +0 -56
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/api/test.js +0 -116
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/commands.js +0 -61
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/index.js +0 -62
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/output.js +0 -43
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/package.json +0 -11
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/cli/test.js +0 -44
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/formatters.js +0 -43
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/index.js +0 -12
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/package.json +0 -5
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/test.js +0 -55
- package/template/wall-e/eval/fixtures/monorepo-basic/packages/shared/validators.js +0 -29
- package/template/wall-e/eval/fixtures/monorepo-basic/test.js +0 -46
- package/template/wall-e/eval/fixtures/node-cli/index.js +0 -78
- package/template/wall-e/eval/fixtures/node-cli/package.json +0 -10
- package/template/wall-e/eval/fixtures/node-cli/test.js +0 -57
- package/template/wall-e/eval/fixtures/node-typed/package.json +0 -8
- package/template/wall-e/eval/fixtures/node-typed/src/handlers.js +0 -31
- package/template/wall-e/eval/fixtures/node-typed/src/utils.js +0 -33
- package/template/wall-e/eval/fixtures/node-typed/test.js +0 -36
- package/template/wall-e/eval/fixtures/python-flask/app.py +0 -14
- package/template/wall-e/eval/fixtures/python-flask/requirements.txt +0 -2
- package/template/wall-e/eval/fixtures/python-flask/test_app.py +0 -25
- package/template/wall-e/eval/fixtures/wall-e-subset/brain.js +0 -105
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/aggregator.js +0 -101
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/chat.json +0 -20
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks/coding.json +0 -32
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/benchmarks.js +0 -64
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/package.json +0 -6
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/server.js +0 -31
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/test.js +0 -18
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/fixtures/simple-project/utils.js +0 -34
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/runner.js +0 -104
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/scorer.js +0 -73
- package/template/wall-e/eval/fixtures/wall-e-subset/eval/test.js +0 -134
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/client.js +0 -99
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/providers.js +0 -63
- package/template/wall-e/eval/fixtures/wall-e-subset/llm/test.js +0 -70
- package/template/wall-e/eval/fixtures/wall-e-subset/package.json +0 -10
- package/template/wall-e/eval/fixtures/wall-e-subset/test.js +0 -86
- package/template/wall-e/eval/harvester.js +0 -685
- package/template/wall-e/eval/head-to-head.js +0 -388
- package/template/wall-e/eval/humaneval-adapter.js +0 -321
- package/template/wall-e/eval/list-models.js +0 -31
- package/template/wall-e/eval/livecodebench-adapter.js +0 -291
- package/template/wall-e/eval/mail-integration.js +0 -443
- package/template/wall-e/eval/manifest.js +0 -186
- package/template/wall-e/eval/meta-harness/adapters/coding-agent.js +0 -57
- package/template/wall-e/eval/meta-harness/bootstrap-snapshot.js +0 -149
- package/template/wall-e/eval/meta-harness/candidate-store.js +0 -117
- package/template/wall-e/eval/meta-harness/cli.js +0 -86
- package/template/wall-e/eval/meta-harness/domain-spec.js +0 -154
- package/template/wall-e/eval/meta-harness/domains/coding-agent.domain.json +0 -84
- package/template/wall-e/eval/meta-harness/examples/env-bootstrap-candidate.js +0 -29
- package/template/wall-e/eval/meta-harness/experience-store.js +0 -174
- package/template/wall-e/eval/meta-harness/frontier.js +0 -96
- package/template/wall-e/eval/meta-harness/harness-interface.js +0 -90
- package/template/wall-e/eval/meta-harness/leakage-guard.js +0 -80
- package/template/wall-e/eval/meta-harness/optimizer.js +0 -207
- package/template/wall-e/eval/meta-harness/proposer-runner.js +0 -110
- package/template/wall-e/eval/meta-harness/reporting.js +0 -58
- package/template/wall-e/eval/meta-harness/telemetry.js +0 -27
- package/template/wall-e/eval/meta-harness/validation.js +0 -81
- package/template/wall-e/eval/promoter.js +0 -228
- package/template/wall-e/eval/provider-normalizer.js +0 -33
- package/template/wall-e/eval/replay.js +0 -395
- package/template/wall-e/eval/run-agent-benchmarks.js +0 -386
- package/template/wall-e/eval/run-codex-cli-baseline.js +0 -177
- package/template/wall-e/eval/run-coding-agent-real.js +0 -187
- package/template/wall-e/eval/run-eval.js +0 -435
- package/template/wall-e/eval/run-model-comparison.js +0 -142
- package/template/wall-e/eval/session-evaluator.js +0 -187
- package/template/wall-e/eval/session-miner.js +0 -207
- package/template/wall-e/eval/session-retrieval-benchmark.js +0 -150
- package/template/wall-e/eval/session-transcripts.js +0 -509
- package/template/wall-e/eval/shadow.js +0 -161
- package/template/wall-e/eval/swebench-adapter.js +0 -345
- package/template/wall-e/eval/swebench-docker.js +0 -192
- package/template/wall-e/eval/train.py +0 -320
- package/template/wall-e/eval/trainer.js +0 -232
- package/template/wall-e/eval/weekly-eval-loop.js +0 -241
|
@@ -0,0 +1,494 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Per-session token-usage summary.
|
|
4
|
+
//
|
|
5
|
+
// Produces the numbers shown next to the model-name chip in the top bar:
|
|
6
|
+
// { total, ctx, ctxWindow, ctxPct, exact, totalLabel, breakdown }
|
|
7
|
+
//
|
|
8
|
+
// Sources, in order of fidelity:
|
|
9
|
+
// - Claude Code: ~/.claude/projects/<hash>/<id>.jsonl — every assistant entry
|
|
10
|
+
// carries message.usage (input/output/cache_read/cache_creation tokens). We
|
|
11
|
+
// sum them for `total` and take the last turn for `ctx`. EXACT.
|
|
12
|
+
// - Codex: ~/.codex/sessions/.../rollout-*.jsonl — periodic `token_count`
|
|
13
|
+
// events whose `total_token_usage` is already cumulative and which also
|
|
14
|
+
// carry `model_context_window`. We read the LAST such event. EXACT.
|
|
15
|
+
// - Everything else (Cursor/Gemini/Aider/OpenCode/Wall-E): estimate by
|
|
16
|
+
// tokenizing the parsed message text. Marked `exact:false` so the UI can
|
|
17
|
+
// render a `~` prefix.
|
|
18
|
+
//
|
|
19
|
+
// Claude files can be tens of MB and we recompute on every turn-finish, so the
|
|
20
|
+
// Claude path scans INCREMENTALLY: JSONL is append-only, so we keep a running
|
|
21
|
+
// total + byte offset per file and only read the bytes appended since last time.
|
|
22
|
+
// Codex events are cumulative, so we only ever read the file tail.
|
|
23
|
+
|
|
24
|
+
const fs = require('fs');
|
|
25
|
+
|
|
26
|
+
const DEFAULT_CTX_WINDOW = 200000;
|
|
27
|
+
// Codex: a cumulative token_count event normally sits within the last few KB, so
|
|
28
|
+
// read a small tail first and grow only if it isn't found (huge final tool blob).
|
|
29
|
+
const CODEX_TAIL_BYTES = 64 * 1024;
|
|
30
|
+
const CODEX_TAIL_BYTES_MAX = 4 * 1024 * 1024;
|
|
31
|
+
|
|
32
|
+
function _contextWindowOverride(value) {
|
|
33
|
+
const n = Math.round(Number(value) || 0);
|
|
34
|
+
return Number.isFinite(n) && n > 0 ? n : 0;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// Fallback context windows by model id (Codex carries its own window in-band, so
|
|
38
|
+
// this is only consulted for Claude + the estimate tier). Conservative defaults.
|
|
39
|
+
// `override` is the preferred path for provider/catalog metadata. The static map
|
|
40
|
+
// is only a cold-start safety net for transcripts seen before the registry cache
|
|
41
|
+
// has refreshed.
|
|
42
|
+
function ctxWindowForModel(modelId, override) {
|
|
43
|
+
const fromRegistry = _contextWindowOverride(override);
|
|
44
|
+
if (fromRegistry) return fromRegistry;
|
|
45
|
+
const id = String(modelId || '').toLowerCase();
|
|
46
|
+
if (!id) return DEFAULT_CTX_WINDOW;
|
|
47
|
+
if (/\[1m\]|-1m\b|context-1m/.test(id)) return 1000000;
|
|
48
|
+
// Cold-start defaults only. The live window is read from the model registry
|
|
49
|
+
// when available, and per-session transcript evidence can still correct it.
|
|
50
|
+
if (/^claude-(opus-4-8|fable-5|mythos-5)\b/.test(id)) return 1000000;
|
|
51
|
+
if (/^claude-/.test(id)) return 200000;
|
|
52
|
+
if (/^gpt-5|^o[34]\b/.test(id)) return 256000;
|
|
53
|
+
if (/^gpt-4/.test(id)) return 128000;
|
|
54
|
+
if (/^gemini/.test(id)) return 1000000;
|
|
55
|
+
if (/^(deepseek|kimi|moonshot|qwen)/.test(id)) return 128000;
|
|
56
|
+
return DEFAULT_CTX_WINDOW;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Claude's usable context window is NOT a fixed property of the model id — Claude Code
|
|
60
|
+
// runs a per-session 200K vs 1M ("extended context") mode, so a static model→window map
|
|
61
|
+
// is fundamentally incomplete. Read the window from the session's own authoritative
|
|
62
|
+
// signals instead (the Claude analog of Codex's in-band model_context_window):
|
|
63
|
+
// • AUTO compact_boundary `preTokens` — Claude Code auto-compacts near the limit, so
|
|
64
|
+
// its recorded pre-compaction token count is its OWN tight measurement of the window.
|
|
65
|
+
// This is authoritative: it can correct the static estimate UP *or* DOWN. (Manual
|
|
66
|
+
// /compact is ignored — the user can compact at any size.)
|
|
67
|
+
// • Largest single-turn prompt (input+cache_read+cache_creation) — a hard lower bound,
|
|
68
|
+
// since the API cannot accept a prompt larger than the window. Only raises, never
|
|
69
|
+
// lowers, the static default (it's a floor, not a measurement of the true window).
|
|
70
|
+
// The evidence is classified to the nearest standard window with a tolerance, so an
|
|
71
|
+
// auto-compact that overshoots a round 1M (e.g. 1,008,316) reads as the 1M tier instead
|
|
72
|
+
// of rounding up to 2M. A fresh session with no evidence falls back to the model map.
|
|
73
|
+
const _CTX_WINDOW_TIERS = [200000, 1000000, 2000000, 5000000, 10000000];
|
|
74
|
+
const _CTX_WINDOW_TOLERANCE = 1.05;
|
|
75
|
+
function _snapToCtxTier(observed) {
|
|
76
|
+
const obs = Math.max(0, Number(observed) || 0);
|
|
77
|
+
for (const tier of _CTX_WINDOW_TIERS) { if (obs <= tier * _CTX_WINDOW_TOLERANCE) return tier; }
|
|
78
|
+
return obs;
|
|
79
|
+
}
|
|
80
|
+
function _effectiveCtxWindow(baseWindow, evidence) {
|
|
81
|
+
const base = Number(baseWindow) > 0 ? Number(baseWindow) : DEFAULT_CTX_WINDOW;
|
|
82
|
+
const ev = evidence || {};
|
|
83
|
+
const autoAt = Math.max(0, Number(ev.autoCompactAt) || 0);
|
|
84
|
+
const maxPrompt = Math.max(0, Number(ev.maxPrompt) || 0);
|
|
85
|
+
// Authoritative auto-compact measurement wins outright (up or down).
|
|
86
|
+
if (autoAt > 0) return _snapToCtxTier(autoAt);
|
|
87
|
+
// Otherwise a prompt larger than the static estimate proves the estimate is too small.
|
|
88
|
+
if (maxPrompt > base) return Math.max(_snapToCtxTier(maxPrompt), base);
|
|
89
|
+
return base;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Compact human label: 980 -> "980", 1234 -> "1.2k", 1_200_000 -> "1.2M".
|
|
93
|
+
function formatTokens(n) {
|
|
94
|
+
const v = Math.max(0, Math.round(Number(n) || 0));
|
|
95
|
+
if (v < 1000) return String(v);
|
|
96
|
+
if (v < 1_000_000) {
|
|
97
|
+
const k = v / 1000;
|
|
98
|
+
return (k < 10 ? k.toFixed(1) : Math.round(k)) + 'k';
|
|
99
|
+
}
|
|
100
|
+
const m = v / 1_000_000;
|
|
101
|
+
return (m < 10 ? m.toFixed(1) : Math.round(m)) + 'M';
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function _isClaude(provider) { return String(provider || '').toLowerCase() === 'claude'; }
|
|
105
|
+
function _isCodex(provider) { return String(provider || '').toLowerCase() === 'codex'; }
|
|
106
|
+
|
|
107
|
+
// Match a brace-balanced JSON object beginning at the `{` that follows `key` at
|
|
108
|
+
// or after `from`. Returns the parsed object or null. Used for Codex events,
|
|
109
|
+
// whose token objects are small.
|
|
110
|
+
function _matchObjectAfter(text, key, from = 0) {
|
|
111
|
+
const at = text.indexOf(key, from);
|
|
112
|
+
if (at === -1) return null;
|
|
113
|
+
let i = text.indexOf('{', at);
|
|
114
|
+
if (i === -1) return null;
|
|
115
|
+
const start = i;
|
|
116
|
+
let depth = 0;
|
|
117
|
+
let inStr = false;
|
|
118
|
+
let esc = false;
|
|
119
|
+
for (; i < text.length; i++) {
|
|
120
|
+
const ch = text[i];
|
|
121
|
+
if (inStr) {
|
|
122
|
+
if (esc) esc = false;
|
|
123
|
+
else if (ch === '\\') esc = true;
|
|
124
|
+
else if (ch === '"') inStr = false;
|
|
125
|
+
continue;
|
|
126
|
+
}
|
|
127
|
+
if (ch === '"') inStr = true;
|
|
128
|
+
else if (ch === '{') depth++;
|
|
129
|
+
else if (ch === '}') {
|
|
130
|
+
depth--;
|
|
131
|
+
if (depth === 0) {
|
|
132
|
+
try { return JSON.parse(text.slice(start, i + 1)); }
|
|
133
|
+
catch { return null; }
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const _CODEX_TOKEN_COUNT_PAYLOAD_RE = /"payload"\s*:\s*\{\s*"type"\s*:\s*"token_count"/;
|
|
141
|
+
|
|
142
|
+
function _codexSummaryFromTokenCountLines(text) {
|
|
143
|
+
if (!text) return null;
|
|
144
|
+
let end = text.length;
|
|
145
|
+
while (end > 0) {
|
|
146
|
+
const start = text.lastIndexOf('\n', end - 1);
|
|
147
|
+
const line = text.slice(start + 1, end).trim();
|
|
148
|
+
if (line && _CODEX_TOKEN_COUNT_PAYLOAD_RE.test(line)) {
|
|
149
|
+
const summary = _codexSummaryFromTokenCountLine(line);
|
|
150
|
+
if (summary) return summary;
|
|
151
|
+
}
|
|
152
|
+
if (start === -1) break;
|
|
153
|
+
end = start;
|
|
154
|
+
}
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function _codexSummaryFromTokenCountLine(line) {
|
|
159
|
+
if (!line) return null;
|
|
160
|
+
try {
|
|
161
|
+
const obj = JSON.parse(line);
|
|
162
|
+
const payload = obj && obj.payload;
|
|
163
|
+
const info = payload && payload.info;
|
|
164
|
+
if (!payload || payload.type !== 'token_count' || !info) return null;
|
|
165
|
+
if (!info.total_token_usage || typeof info.total_token_usage !== 'object') return null;
|
|
166
|
+
return {
|
|
167
|
+
total: info.total_token_usage,
|
|
168
|
+
last: info.last_token_usage && typeof info.last_token_usage === 'object' ? info.last_token_usage : null,
|
|
169
|
+
window: Number(info.model_context_window) || 0,
|
|
170
|
+
};
|
|
171
|
+
} catch {
|
|
172
|
+
return null;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
function _codexCurrentContextTokens(summary) {
|
|
177
|
+
const last = summary && summary.last;
|
|
178
|
+
if (last && typeof last === 'object') {
|
|
179
|
+
// Codex emits a post-compaction token_count where input/output are zero but
|
|
180
|
+
// total_tokens is the new compacted context size. Treat 0 as valid; do not
|
|
181
|
+
// fall through to cumulative total_token_usage or the badge pins at 100%.
|
|
182
|
+
const lastTotal = Number(last.total_tokens);
|
|
183
|
+
if (Number.isFinite(lastTotal) && lastTotal > 0) return lastTotal;
|
|
184
|
+
const lastInput = Number(last.input_tokens);
|
|
185
|
+
if (Number.isFinite(lastInput) && lastInput > 0) return lastInput;
|
|
186
|
+
const componentTotal = (Number(last.input_tokens) || 0)
|
|
187
|
+
+ (Number(last.output_tokens) || 0)
|
|
188
|
+
+ (Number(last.reasoning_output_tokens) || 0);
|
|
189
|
+
return Math.max(0, componentTotal);
|
|
190
|
+
}
|
|
191
|
+
const total = summary && summary.total;
|
|
192
|
+
return Number(total && total.input_tokens) || 0;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// --- Claude: sum message.usage across assistant entries ------------------------
|
|
196
|
+
//
|
|
197
|
+
// Extract usage numbers from a single JSONL line WITHOUT JSON.parsing the whole
|
|
198
|
+
// (possibly multi-MB) line: the four counters are uniquely-named keys, so a field
|
|
199
|
+
// regex is both faster and immune to giant tool-result blobs.
|
|
200
|
+
const _CLAUDE_FIELD_RE = {
|
|
201
|
+
input: /"input_tokens":\s*(\d+)/,
|
|
202
|
+
output: /"output_tokens":\s*(\d+)/,
|
|
203
|
+
cacheRead: /"cache_read_input_tokens":\s*(\d+)/,
|
|
204
|
+
cacheCreate: /"cache_creation_input_tokens":\s*(\d+)/,
|
|
205
|
+
};
|
|
206
|
+
|
|
207
|
+
function _claudeUsageFromLine(line) {
|
|
208
|
+
// Only assistant entries carry message.usage; cheap guard before regex work.
|
|
209
|
+
if (line.indexOf('"usage"') === -1) return null;
|
|
210
|
+
const num = (re) => { const m = line.match(re); return m ? parseInt(m[1], 10) || 0 : 0; };
|
|
211
|
+
const u = {
|
|
212
|
+
input: num(_CLAUDE_FIELD_RE.input),
|
|
213
|
+
output: num(_CLAUDE_FIELD_RE.output),
|
|
214
|
+
cacheRead: num(_CLAUDE_FIELD_RE.cacheRead),
|
|
215
|
+
cacheCreate: num(_CLAUDE_FIELD_RE.cacheCreate),
|
|
216
|
+
};
|
|
217
|
+
if (!(u.input || u.output || u.cacheRead || u.cacheCreate)) return null;
|
|
218
|
+
return u;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Scan a chunk of appended JSONL text, accumulating into `acc`. Returns the
|
|
222
|
+
// last full assistant turn seen (for ctx), or acc.lastTurn unchanged.
|
|
223
|
+
function _scanClaudeChunk(text, acc) {
|
|
224
|
+
let start = 0;
|
|
225
|
+
while (start < text.length) {
|
|
226
|
+
let end = text.indexOf('\n', start);
|
|
227
|
+
if (end === -1) end = text.length;
|
|
228
|
+
const line = text.slice(start, end);
|
|
229
|
+
start = end + 1;
|
|
230
|
+
if (!line) continue;
|
|
231
|
+
// Authoritative window signal: Claude Code's AUTO compact_boundary records the
|
|
232
|
+
// pre-compaction token count near the context limit. Track the largest. Manual
|
|
233
|
+
// /compact is ignored (the user can compact at any size, so it says nothing about
|
|
234
|
+
// the window). These are system lines without usage, so they never double-count.
|
|
235
|
+
if (line.indexOf('compact_boundary') !== -1) {
|
|
236
|
+
if (/"trigger"\s*:\s*"auto"/.test(line)) {
|
|
237
|
+
const pt = line.match(/"preTokens"\s*:\s*(\d+)/);
|
|
238
|
+
if (pt) { const v = Number(pt[1]) || 0; if (v > (acc.autoCompactAt || 0)) acc.autoCompactAt = v; }
|
|
239
|
+
}
|
|
240
|
+
continue;
|
|
241
|
+
}
|
|
242
|
+
const u = _claudeUsageFromLine(line);
|
|
243
|
+
if (!u) continue;
|
|
244
|
+
acc.input += u.input;
|
|
245
|
+
acc.output += u.output;
|
|
246
|
+
acc.cacheRead += u.cacheRead;
|
|
247
|
+
acc.cacheCreate += u.cacheCreate;
|
|
248
|
+
// Current context occupancy ~ the most recent turn's prompt + reply.
|
|
249
|
+
acc.lastTurn = u.input + u.cacheRead + u.cacheCreate + u.output;
|
|
250
|
+
// High-water prompt size (input side only) — a hard lower bound on the real
|
|
251
|
+
// context window, used to self-correct a stale static window estimate.
|
|
252
|
+
const prompt = u.input + u.cacheRead + u.cacheCreate;
|
|
253
|
+
if (prompt > (acc.maxPrompt || 0)) acc.maxPrompt = prompt;
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Read the last `readLen` bytes of a file as utf8.
|
|
258
|
+
function _readFileTail(jsonlPath, fileSize, readLen) {
|
|
259
|
+
const len = Math.min(readLen, fileSize);
|
|
260
|
+
const fd = fs.openSync(jsonlPath, 'r');
|
|
261
|
+
try {
|
|
262
|
+
const buf = Buffer.alloc(len);
|
|
263
|
+
fs.readSync(fd, buf, 0, len, Math.max(0, fileSize - len));
|
|
264
|
+
return buf.toString('utf8');
|
|
265
|
+
} finally { fs.closeSync(fd); }
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Read a small tail first; grow (×8) only when the window didn't contain a
|
|
269
|
+
// token_count event. lastIndexOf+forward-scan means a window that includes the
|
|
270
|
+
// key includes its (EOF-ward) object, so a partial leading line never corrupts.
|
|
271
|
+
function _codexSummaryGrowing(jsonlPath, fileSize) {
|
|
272
|
+
let readLen = CODEX_TAIL_BYTES;
|
|
273
|
+
for (;;) {
|
|
274
|
+
let text;
|
|
275
|
+
try { text = _readFileTail(jsonlPath, fileSize, readLen); } catch { return null; }
|
|
276
|
+
const c = _codexSummaryFromTail(text);
|
|
277
|
+
if (c) return c;
|
|
278
|
+
if (readLen >= fileSize || readLen >= CODEX_TAIL_BYTES_MAX) return null;
|
|
279
|
+
readLen = Math.min(readLen * 8, CODEX_TAIL_BYTES_MAX, fileSize);
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
// --- Codex: read the last cumulative token_count event -------------------------
|
|
284
|
+
function _codexSummaryFromTail(text) {
|
|
285
|
+
const structured = _codexSummaryFromTokenCountLines(text);
|
|
286
|
+
if (structured) return structured;
|
|
287
|
+
|
|
288
|
+
// Find the last token_count event; its total_token_usage is cumulative.
|
|
289
|
+
const lastIdx = text.lastIndexOf('"total_token_usage"');
|
|
290
|
+
if (lastIdx === -1) return null;
|
|
291
|
+
const total = _matchObjectAfter(text, '"total_token_usage"', lastIdx);
|
|
292
|
+
if (!total) return null;
|
|
293
|
+
const last = _matchObjectAfter(text, '"last_token_usage"', lastIdx);
|
|
294
|
+
const winM = text.slice(lastIdx).match(/"model_context_window":\s*(\d+)/);
|
|
295
|
+
const window = winM ? parseInt(winM[1], 10) || 0 : 0;
|
|
296
|
+
return { total, last, window };
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
// --- Estimate tier -------------------------------------------------------------
|
|
300
|
+
let _encoder = null;
|
|
301
|
+
let _encoderTried = false;
|
|
302
|
+
function _estimateTokens(str) {
|
|
303
|
+
const text = String(str || '');
|
|
304
|
+
if (!text) return 0;
|
|
305
|
+
if (!_encoderTried) {
|
|
306
|
+
_encoderTried = true;
|
|
307
|
+
try { _encoder = require('gpt-tokenizer'); }
|
|
308
|
+
catch { _encoder = null; }
|
|
309
|
+
}
|
|
310
|
+
if (_encoder && typeof _encoder.encode === 'function') {
|
|
311
|
+
try { return _encoder.encode(text).length; } catch { /* fall through */ }
|
|
312
|
+
}
|
|
313
|
+
// Heuristic fallback (~4 chars/token) when the tokenizer dep isn't present.
|
|
314
|
+
return Math.ceil(text.length / 4);
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
function estimateFromMessages(messages, model, opts = {}) {
|
|
318
|
+
const list = Array.isArray(messages) ? messages : [];
|
|
319
|
+
let total = 0;
|
|
320
|
+
for (let k = 0; k < list.length; k++) {
|
|
321
|
+
const m = list[k];
|
|
322
|
+
// Structured-capture rows (tool calls/results, reasoning, patches) carry
|
|
323
|
+
// capped derived text; counting them would shift the estimate tier for
|
|
324
|
+
// sessions imported after the capture upgrade. Estimate from the same
|
|
325
|
+
// user/assistant prose the pre-capture importer produced.
|
|
326
|
+
if (m && m.role === 'system' && m.metadata && m.metadata.kind) continue;
|
|
327
|
+
total += _estimateTokens(m && (m.text || m.content));
|
|
328
|
+
}
|
|
329
|
+
const ctxWindow = ctxWindowForModel(model, opts.contextWindow || opts.ctxWindow);
|
|
330
|
+
return _finalize({ total, ctx: total, ctxWindow, exact: false,
|
|
331
|
+
breakdown: { input: 0, output: 0, cacheRead: 0, cacheCreate: 0, reasoning: 0, estimated: total } });
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// --- Shaping -------------------------------------------------------------------
|
|
335
|
+
function _finalize({ total, ctx, ctxWindow, exact, breakdown, cost }) {
|
|
336
|
+
const window = ctxWindow > 0 ? ctxWindow : DEFAULT_CTX_WINDOW;
|
|
337
|
+
const ctxClamped = Math.max(0, Math.min(Number(ctx) || 0, window));
|
|
338
|
+
const ctxPct = window > 0 ? Math.round((ctxClamped / window) * 100) : 0;
|
|
339
|
+
return {
|
|
340
|
+
total: Math.max(0, Math.round(Number(total) || 0)),
|
|
341
|
+
ctx: Math.max(0, Math.round(Number(ctx) || 0)),
|
|
342
|
+
ctxWindow: window,
|
|
343
|
+
ctxPct,
|
|
344
|
+
exact: !!exact,
|
|
345
|
+
totalLabel: formatTokens(total),
|
|
346
|
+
breakdown: breakdown || null,
|
|
347
|
+
cost: (cost === undefined ? null : cost),
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// --- Per-file cache (incremental for Claude, tail for Codex) -------------------
|
|
352
|
+
// keyed by jsonlPath -> { size, mtimeMs, offset, acc, summary }
|
|
353
|
+
// Bounded LRU: a long-lived primary sees many transcripts (incl. rotated/compacted
|
|
354
|
+
// ones whose paths never recur), so an unbounded Map would leak. Map preserves
|
|
355
|
+
// insertion order; we evict the oldest key and refresh recency on hit.
|
|
356
|
+
const _cache = new Map();
|
|
357
|
+
const _CACHE_MAX = 500;
|
|
358
|
+
|
|
359
|
+
function _cacheSet(key, val) {
|
|
360
|
+
if (_cache.has(key)) _cache.delete(key);
|
|
361
|
+
_cache.set(key, val);
|
|
362
|
+
if (_cache.size > _CACHE_MAX) {
|
|
363
|
+
const oldest = _cache.keys().next().value;
|
|
364
|
+
if (oldest !== undefined) _cache.delete(oldest);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
function _statSafe(p) {
|
|
369
|
+
try { return fs.statSync(p); } catch { return null; }
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
// `statHint` ({size, mtimeMs}) lets a caller that already freshly stat'd the file
|
|
373
|
+
// (e.g. _sessionPayload via _activeSessionFileInfo) skip a redundant statSync.
|
|
374
|
+
function _computeFromFile(provider, jsonlPath, model, statHint, opts = {}) {
|
|
375
|
+
const st = (statHint && Number.isFinite(statHint.size) && Number.isFinite(statHint.mtimeMs))
|
|
376
|
+
? statHint
|
|
377
|
+
: _statSafe(jsonlPath);
|
|
378
|
+
if (!st || (typeof st.isFile === 'function' && !st.isFile())) return null;
|
|
379
|
+
const sizeKey = st.size;
|
|
380
|
+
const mtimeKey = st.mtimeMs;
|
|
381
|
+
const cached = _cache.get(jsonlPath);
|
|
382
|
+
if (cached && cached.size === sizeKey && cached.mtimeMs === mtimeKey && cached.summary) {
|
|
383
|
+
_cacheSet(jsonlPath, cached); // refresh LRU recency
|
|
384
|
+
return cached.summary;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
if (_isCodex(provider)) {
|
|
388
|
+
const c = _codexSummaryGrowing(jsonlPath, st.size);
|
|
389
|
+
if (!c) return null;
|
|
390
|
+
const tt = c.total || {};
|
|
391
|
+
// "total" = genuinely-new tokens this session (exclude cached re-reads, which
|
|
392
|
+
// are re-counted every turn and would balloon into the billions). Codex's
|
|
393
|
+
// input_tokens INCLUDES cached_input_tokens, so subtract them out.
|
|
394
|
+
const cachedIn = Number(tt.cached_input_tokens) || 0;
|
|
395
|
+
const nonCachedIn = Math.max(0, (Number(tt.input_tokens) || 0) - cachedIn);
|
|
396
|
+
const total = nonCachedIn + (Number(tt.output_tokens) || 0) + (Number(tt.reasoning_output_tokens) || 0);
|
|
397
|
+
const ctx = _codexCurrentContextTokens(c);
|
|
398
|
+
const summary = _finalize({
|
|
399
|
+
total, ctx, ctxWindow: c.window || ctxWindowForModel(model, opts.contextWindow || opts.ctxWindow), exact: true,
|
|
400
|
+
breakdown: {
|
|
401
|
+
input: Number(tt.input_tokens) || 0,
|
|
402
|
+
output: Number(tt.output_tokens) || 0,
|
|
403
|
+
cacheRead: Number(tt.cached_input_tokens) || 0,
|
|
404
|
+
cacheCreate: 0,
|
|
405
|
+
reasoning: Number(tt.reasoning_output_tokens) || 0,
|
|
406
|
+
},
|
|
407
|
+
});
|
|
408
|
+
_cacheSet(jsonlPath, { size: sizeKey, mtimeMs: mtimeKey, summary });
|
|
409
|
+
return summary;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
// Claude (and any other provider whose transcript matches the Claude usage
|
|
413
|
+
// shape): incremental append-scan.
|
|
414
|
+
let acc;
|
|
415
|
+
let offset;
|
|
416
|
+
if (cached && cached.acc && st.size >= cached.offset) {
|
|
417
|
+
acc = cached.acc;
|
|
418
|
+
offset = cached.offset;
|
|
419
|
+
} else {
|
|
420
|
+
acc = { input: 0, output: 0, cacheRead: 0, cacheCreate: 0, lastTurn: 0, maxPrompt: 0, autoCompactAt: 0 };
|
|
421
|
+
offset = 0;
|
|
422
|
+
}
|
|
423
|
+
if (st.size > offset) {
|
|
424
|
+
try {
|
|
425
|
+
const fd = fs.openSync(jsonlPath, 'r');
|
|
426
|
+
try {
|
|
427
|
+
const len = st.size - offset;
|
|
428
|
+
const buf = Buffer.alloc(len);
|
|
429
|
+
fs.readSync(fd, buf, 0, len, offset);
|
|
430
|
+
_scanClaudeChunk(buf.toString('utf8'), acc);
|
|
431
|
+
} finally { fs.closeSync(fd); }
|
|
432
|
+
} catch { return null; }
|
|
433
|
+
}
|
|
434
|
+
// "total" = genuinely-new tokens (non-cached input + new cache-creation +
|
|
435
|
+
// output). cache_READ is the context re-read every turn; including it balloons
|
|
436
|
+
// the count into the billions, so it lives in the breakdown/tooltip only.
|
|
437
|
+
const total = acc.input + acc.cacheCreate + acc.output;
|
|
438
|
+
if (acc.input + acc.output + acc.cacheRead + acc.cacheCreate === 0) {
|
|
439
|
+
// No usage found (e.g. not actually a Claude transcript). Let caller fall
|
|
440
|
+
// back to estimation rather than showing 0.
|
|
441
|
+
_cacheSet(jsonlPath, { size: sizeKey, mtimeMs: mtimeKey, acc, offset: st.size, summary: null });
|
|
442
|
+
return null;
|
|
443
|
+
}
|
|
444
|
+
const summary = _finalize({
|
|
445
|
+
total,
|
|
446
|
+
ctx: acc.lastTurn || total,
|
|
447
|
+
ctxWindow: _effectiveCtxWindow(ctxWindowForModel(model, opts.contextWindow || opts.ctxWindow), { maxPrompt: acc.maxPrompt, autoCompactAt: acc.autoCompactAt }),
|
|
448
|
+
exact: true,
|
|
449
|
+
breakdown: {
|
|
450
|
+
input: acc.input, output: acc.output,
|
|
451
|
+
cacheRead: acc.cacheRead, cacheCreate: acc.cacheCreate, reasoning: 0,
|
|
452
|
+
},
|
|
453
|
+
});
|
|
454
|
+
_cacheSet(jsonlPath, { size: sizeKey, mtimeMs: mtimeKey, acc, offset: st.size, summary });
|
|
455
|
+
return summary;
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
/**
|
|
459
|
+
* Compute the token-usage summary for a session.
|
|
460
|
+
* @param {object} o
|
|
461
|
+
* @param {string} o.provider agent type: 'claude' | 'codex' | 'cursor' | ...
|
|
462
|
+
* @param {string} [o.jsonlPath] transcript path (Claude/Codex authoritative)
|
|
463
|
+
* @param {string} [o.model] model id, for the ctx-window fallback
|
|
464
|
+
* @param {Array} [o.messages] parsed messages, for the estimate tier
|
|
465
|
+
* @param {number} [o.contextWindow] provider/catalog context-window override
|
|
466
|
+
* @param {object} [o.stat] a fresh {size, mtimeMs} the caller already stat'd,
|
|
467
|
+
* to skip a redundant statSync (must be current — used as the cache key + read range)
|
|
468
|
+
* @returns {object|null} summary, or null when nothing is computable
|
|
469
|
+
*/
|
|
470
|
+
function computeSessionTokenUsage({ provider, jsonlPath, model, messages, stat, contextWindow, ctxWindow } = {}) {
|
|
471
|
+
const opts = { contextWindow: contextWindow || ctxWindow };
|
|
472
|
+
if (jsonlPath && (_isClaude(provider) || _isCodex(provider) || !provider)) {
|
|
473
|
+
const fromFile = _computeFromFile(provider, jsonlPath, model, stat, opts);
|
|
474
|
+
if (fromFile) return fromFile;
|
|
475
|
+
}
|
|
476
|
+
if (Array.isArray(messages) && messages.length) {
|
|
477
|
+
return estimateFromMessages(messages, model, opts);
|
|
478
|
+
}
|
|
479
|
+
return null;
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
function _resetCacheForTests() { _cache.clear(); }
|
|
483
|
+
function _cacheSizeForTests() { return _cache.size; }
|
|
484
|
+
|
|
485
|
+
module.exports = {
|
|
486
|
+
computeSessionTokenUsage,
|
|
487
|
+
estimateFromMessages,
|
|
488
|
+
formatTokens,
|
|
489
|
+
ctxWindowForModel,
|
|
490
|
+
_effectiveCtxWindow,
|
|
491
|
+
_resetCacheForTests,
|
|
492
|
+
_cacheSizeForTests,
|
|
493
|
+
_CACHE_MAX,
|
|
494
|
+
};
|