create-walle 0.9.26 → 0.9.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "create-walle",
3
- "version": "0.9.26",
3
+ "version": "0.9.27",
4
4
  "description": "CTM + Wall-E — AI coding dashboard and personal digital twin agent. Multi-agent terminal for Claude Code, Codex, Gemini, Aider, OpenCode, and more, plus prompt editor, task queue, remote phone and tablet access, code/doc review, and an agent that learns from Slack, email & calendar.",
5
5
  "bin": {
6
6
  "create-walle": "bin/create-walle.js"
@@ -2522,10 +2522,12 @@ const HOTKEY_BUILD_MARKER = path.join(os.homedir(), '.walle', 'bundles', '.ctm-h
2522
2522
  const SCREEN_AUTH_BINARY = path.join(os.homedir(), '.local', 'bin', 'ctm-screen-auth');
2523
2523
  const SCREEN_AUTH_SWIFT_SOURCE = path.join(__dirname, 'bin', 'ctm-screen-auth.swift');
2524
2524
  const SCREEN_AUTH_SIGN_IDENTIFIER = 'com.walle.ctm';
2525
- // Full-screen, non-interactive capture (-x = no camera sound). The hotkey/button should produce a
2526
- // screenshot immediately; the old '-i' popped a region-select crosshair that, when not drag-completed,
2527
- // exited with no file and logged as "cancelled" the #1 "the screenshot does nothing" report.
2528
- const SCREEN_CAPTURE_NODE_SCRIPT = "require('child_process').execFileSync('/usr/sbin/screencapture',['-x',process.argv[1]],{stdio:'inherit'})";
2525
+ // Interactive region capture (-i): pops the crosshair so the user drags a region (or hits Space for
2526
+ // a window, or Escape to cancel). This is what users expect from a screenshot hotkey/button — '-x'
2527
+ // (full-screen) grabbed everything with no choice. A cancelled selection exits 0 with no file; the
2528
+ // capture handler treats a missing file as a clean cancel (HTTP 400 "Screenshot cancelled"), so the
2529
+ // old "-i logs as a failure" concern no longer applies.
2530
+ const SCREEN_CAPTURE_NODE_SCRIPT = "require('child_process').execFileSync('/usr/sbin/screencapture',['-i',process.argv[1]],{stdio:'inherit'})";
2529
2531
  const SCREEN_AUTH_NODE_SCRIPT = "const cp=require('child_process');const r=cp.spawnSync(process.argv[1],{encoding:'utf8'});if(r.stdout)process.stdout.write(r.stdout);if(r.stderr)process.stderr.write(r.stderr);process.exit(r.status==null?1:r.status);";
2530
2532
  // Like SCREEN_AUTH_NODE_SCRIPT but runs the helper in --preflight mode (report grant, NEVER prompt)
2531
2533
  // so we can probe several node identities and pick one already granted without popping dialogs.
@@ -2935,7 +2937,7 @@ function screenshotCaptureCommand(tmpFile, context = {}) {
2935
2937
  cmdArgs: [context.realNode, '-e', SCREEN_CAPTURE_NODE_SCRIPT, tmpFile],
2936
2938
  };
2937
2939
  }
2938
- return { cmd: '/usr/sbin/screencapture', cmdArgs: ['-x', tmpFile] };
2940
+ return { cmd: '/usr/sbin/screencapture', cmdArgs: ['-i', tmpFile] };
2939
2941
  }
2940
2942
 
2941
2943
  // Spawn the Screen Recording permission helper under the same macOS responsible process
@@ -3037,7 +3039,10 @@ async function handleScreenshot(req, res) {
3037
3039
  // remain responsive and allows screencapture to work across all monitors.
3038
3040
  const tCap = Date.now();
3039
3041
  await new Promise((resolve, reject) => {
3040
- execFile(cmd, cmdArgs, { timeout: 30000, encoding: 'utf8' }, (err, _stdout, stderr) => {
3042
+ // Interactive (-i) keeps the crosshair up until the user drags/clicks/Escapes give them a
3043
+ // generous window (2 min) so a deliberating user isn't killed mid-selection and mis-reported
3044
+ // as a failure. An idle crosshair costs nothing; the timeout only bounds a truly abandoned one.
3045
+ execFile(cmd, cmdArgs, { timeout: 120000, encoding: 'utf8' }, (err, _stdout, stderr) => {
3041
3046
  if (err) { if (stderr) err._stderr = stderr; reject(err); } else resolve();
3042
3047
  });
3043
3048
  });
@@ -167,4 +167,33 @@ function buildPaginatedPageResponse({
167
167
  return { ...pageMeta, messages: visibleMessages, ...finalExtra };
168
168
  }
169
169
 
170
- module.exports = { applyExclusions, sessionMessageTextHash, projectSessionMessagesPage, buildPaginatedPageResponse };
170
+ // Stage C: the NON-paginated full read. Mirrors server.js sendMessages' projection + response
171
+ // assembly EXACTLY so the worker can project + serialize the full conversation off the main loop
172
+ // (the on-main merge/dedupText + exclusions sha256/msg + image-refs + stringify over a 3k-16k-msg
173
+ // array was the profiled "reading conversational logs" freeze). Two response shapes, matching
174
+ // sendMessages: a BARE ARRAY when the caller passed no extra (the common conversation read), else
175
+ // { messages, ...finalExtra } with the same timeline fields sendMessages adds when hasCallerExtra.
176
+ function buildFullMessagesResponse({
177
+ baseMessages, streamEvents = [], skipStreamTail = false, exclusionRows = [], filterCodexSynthetic = false,
178
+ imageManifests = null, extra = {}, publicIdentity = null, bareArray = false,
179
+ } = {}) {
180
+ const proj = projectSessionMessagesPage({
181
+ baseMessages, streamEvents, skipStreamTail, exclusionRows, filterCodexSynthetic, imageManifests, allowEmptyBase: true,
182
+ });
183
+ const visibleMessages = proj.messages;
184
+ // Bare array: non-paginated read with no caller extra (sendMessages returns JSON.stringify(arr)).
185
+ if (bareArray) return visibleMessages;
186
+ const visibleTailAdded = Math.max(0, (proj.visibleMergedCount || 0) - (proj.visibleBaseCount || 0));
187
+ const finalExtra = { ...extra };
188
+ delete finalExtra.skipStreamTail;
189
+ finalExtra.timelineIdentity = publicIdentity || null;
190
+ finalExtra.timelineFreshness = visibleTailAdded > 0 ? 'hot-tail' : 'durable';
191
+ finalExtra.streamEventCount = proj.streamEventCount || 0;
192
+ if (visibleTailAdded > 0) {
193
+ finalExtra.source = _sourceWithStreamTail(finalExtra.source, visibleTailAdded);
194
+ finalExtra.streamTailAdded = visibleTailAdded;
195
+ }
196
+ return { messages: visibleMessages, ...finalExtra };
197
+ }
198
+
199
+ module.exports = { applyExclusions, sessionMessageTextHash, projectSessionMessagesPage, buildPaginatedPageResponse, buildFullMessagesResponse };
@@ -12327,6 +12327,15 @@ const _CODEX_BUSY_STATUS_LINE_RE = /^(?:(?:waiting\s+for\s+background\s+terminal
12327
12327
  const _CODEX_BUSY_COMPACT_STATUS_LINE_RE = /^working\s+(?:\d+(?::\d+){0,2}|\d+(?:\.\d+)?\s*(?:ms|s|sec|secs|m|min|mins|h|hr|hrs))$/iu;
12328
12328
  const _CODEX_BUSY_WORD = 'working';
12329
12329
  const _CODEX_BUSY_HINT_RE = /esc\s+to\s+interrupt/i;
12330
+ // A width-reflow strand can paint the Codex "Working (12s • esc to interrupt)"
12331
+ // status line over trailing scrollback content, leaving a merged line such as
12332
+ // "Working RSA-1ES046-SHA256:DHE-RSA-AES256-SHA256:HIGH:!aNULL:!…". The strict
12333
+ // anchored REs above miss it, so a genuinely-running turn whose PTY has gone
12334
+ // briefly quiet gets demoted to Idle. Recognize the surviving "Working" prefix
12335
+ // when the trailing remainder is clearly NOT prose (code/cipher-like garble),
12336
+ // and never for ordinary "Working <word>" output ("working tree", "working on").
12337
+ const _CODEX_BUSY_GARBLE_PROSE_RE = /^working\s+(?:tree|dir|directory|copy|set|version|branch|on|in|with|through|from|for|as|at|to|and|but|so|the|a|an|my|your|our|this|that|it|now|here|hard|fine|well|great|correctly|properly)\b/i;
12338
+ const _CODEX_BUSY_GARBLE_SIGNAL_RE = /[:!]|[A-Z0-9]{3,}[-:]|\besc\b/;
12330
12339
  const _CODEX_SKILL_WARNING_RE = /^(?:⚠\s*)?Skipped loading\s+\d+\s+skill\(s\)\s+due to invalid SKILL\.md files\./i;
12331
12340
  const _CODEX_SKILL_DESCRIPTION_RE = /^\/.*\/SKILL\.md:\s+invalid description:\s+exceeds maximum length of\s+\d+\s+characters$/i;
12332
12341
  const _GEMINI_STATUS_FRAGMENT_RE = /^(?:[\s\d•◦·∙●○✦✧◆◇◐◓◑◒|\/\\-]+|Thinking\.{0,3}|Working\.{0,3}|Running\.{0,3}|Responding\.{0,3}|Loading\.{0,3}|esc\s+to\s+(?:cancel|interrupt)|ctrl\+c\s+to\s+(?:quit|cancel)|press\s+enter\s+to\s+send|shift\+enter\s+for\s+newline)$/i;
@@ -12357,11 +12366,18 @@ function _normalizeCodexStatusLineText(text) {
12357
12366
  .replace(/^[\s•◦·∙●○]+\s*/, '');
12358
12367
  }
12359
12368
 
12369
+ function _isGarbledCodexBusyStatusLine(line) {
12370
+ if (!/^working\b/i.test(line)) return false;
12371
+ if (_CODEX_BUSY_GARBLE_PROSE_RE.test(line)) return false;
12372
+ return _CODEX_BUSY_GARBLE_SIGNAL_RE.test(line.slice(_CODEX_BUSY_WORD.length));
12373
+ }
12374
+
12360
12375
  function _isCodexBusyStatusLineText(text) {
12361
12376
  const line = _normalizeCodexStatusLineText(text);
12362
12377
  if (!line || line.length > 240) return false;
12363
12378
  return _CODEX_BUSY_STATUS_LINE_RE.test(line) ||
12364
- _CODEX_BUSY_COMPACT_STATUS_LINE_RE.test(line);
12379
+ _CODEX_BUSY_COMPACT_STATUS_LINE_RE.test(line) ||
12380
+ _isGarbledCodexBusyStatusLine(line);
12365
12381
  }
12366
12382
 
12367
12383
  function _inputMayResolveWaiting(data, session) {
@@ -33241,7 +33257,30 @@ function _maybeReconcile(s, id, reason, extra) {
33241
33257
  // Only compare when the server measured at the client's CURRENT dims; otherwise
33242
33258
  // the fingerprints legitimately differ on dims and a resize/reconcile is already
33243
33259
  // in flight elsewhere. Comparing across dims would cause a reconcile loop.
33244
- if (s._serverFpCols != null && s._serverFpRows != null && (s.term.cols !== s._serverFpCols || s.term.rows !== s._serverFpRows)) return;
33260
+ if (s._serverFpCols != null && s._serverFpRows != null && (s.term.cols !== s._serverFpCols || s.term.rows !== s._serverFpRows)) {
33261
+ // The server's fingerprint was measured at a DIFFERENT width than our grid now
33262
+ // holds — e.g. a background-tab pane-resize refit the grid on switch but no fresh
33263
+ // heartbeat has arrived at the new width. We still skip the cross-dims compare
33264
+ // (it would loop), but a plain skip here leaves an idle session you just switched
33265
+ // to STUCK forever: no output ⇒ no heartbeat ⇒ the onHeartbeat dims-reassert never
33266
+ // runs, so the server keeps serializing its stale-wide frame and the stranded
33267
+ // composer / branch chip never heals (the on-switch "check + redraw" the user
33268
+ // expects). Re-assert our dims ONCE here too (rate-limited via the SAME
33269
+ // _dimsReassertAt the heartbeat path uses) so the PTY resizes, the agent repaints
33270
+ // clean at our width, and the next heartbeat is comparable. Skipped while streaming
33271
+ // (force bypasses the SIGWINCH-during-stream deferral → duplicate scrollback); the
33272
+ // heartbeat reassert covers that case once output quiesces. Uniform for every
33273
+ // provider — unlike the claude-only stale-width detector, this needs no agent type
33274
+ // or stranded-fragment shape, so it also unsticks an idle Codex frame.
33275
+ const nowTs = Date.now();
33276
+ if (!_isSessionStreaming(s)
33277
+ && (!s._dimsReassertAt || nowTs - s._dimsReassertAt > 2000)
33278
+ && Number.isFinite(s.term.cols) && Number.isFinite(s.term.rows)) {
33279
+ s._dimsReassertAt = nowTs;
33280
+ try { _sendTerminalResizeIfChanged(s, id, 'reconcile-dims-reassert', { force: true, cols: s.term.cols, rows: s.term.rows }); } catch {}
33281
+ }
33282
+ return;
33283
+ }
33245
33284
  let clientFp;
33246
33285
  try { clientFp = window.TerminalReconciler.clientFingerprint(s.term); } catch { return; }
33247
33286
  const focusHelper = (typeof _activeTerminalHasFocus === 'function') ? _activeTerminalHasFocus(s) : (document.hasFocus() && state.activeTab === id);
@@ -35986,6 +36025,15 @@ function onSnapshot(msg) {
35986
36025
  _terminalRenderCheckFullyRendered(s, 'snapshot-stale-no-data-skip');
35987
36026
  if (state.activeTab === msg.id) focusTerminalIfSafe(msg.id);
35988
36027
  _primeLatestAnswerForTerminalView(msg.id);
36028
+ // The server skipped this reflow because output arrived after the request — the agent is
36029
+ // still emitting (e.g. a post-/compact burst). Nothing else re-requests promptly, so the
36030
+ // render (and the user's typed-input echo) can lag until the 3s self-check. Schedule an
36031
+ // authoritative recovery so a fresh reflow lands ~1s after the output quiets; it
36032
+ // rate-limits itself, defers while typing, and — because each retry that ALSO stale-skips
36033
+ // re-enters this branch — forms a retry-until-settle loop that self-terminates when a fresh
36034
+ // frame lands (snapshot-done clears _needsAuthoritativeSnapshot at ~36319).
36035
+ s._needsAuthoritativeSnapshot = true;
36036
+ _scheduleAuthoritativeSnapshotRecovery(s, msg.id, 'reflow-stale-skip', { delayMs: 900, minGapMs: 900 });
35989
36037
  return;
35990
36038
  }
35991
36039
  // A hot terminal that already has healthy buffer content does not need a
@@ -10524,6 +10524,45 @@ function _standupSummaryFor(source, ctmSessionId, agentSessionId, identity = nul
10524
10524
  return null;
10525
10525
  }
10526
10526
 
10527
+ // Per-session timeline-summary cache. The materialized standup snapshot refreshes every
10528
+ // STANDUP_MATERIALIZED_REFRESH_AFTER_MS (1.5s) and re-projects EVERY active session — each projection
10529
+ // is a full-conversation merge (conversation-tail-merge dedupText) + summarize. With 16 mostly-idle
10530
+ // sessions, that re-derived the whole history of every session ~every 1.5s on the main loop (the
10531
+ // CPU-profiler's top `dedupText` freeze frame). Cache the per-session summary keyed on a CHEAP
10532
+ // content-version — the durable row count (countSessionMessageRows, an indexed COUNT) plus the live
10533
+ // stream summary's latest-activity ms — so an UNCHANGED session returns its cached summary instantly
10534
+ // and only sessions with new durable rows or new stream output pay the projection. A generous max-age
10535
+ // is a safety net against any version signal a change slips past. Disable with
10536
+ // CTM_TIMELINE_SUMMARY_CACHE=0; tune staleness with CTM_TIMELINE_SUMMARY_CACHE_MAX_AGE_MS.
10537
+ const _timelineStandupSummaryCache = new Map(); // ctmId → { version, summary, at }
10538
+ const _TIMELINE_SUMMARY_CACHE_MAX_AGE_MS = Number(process.env.CTM_TIMELINE_SUMMARY_CACHE_MAX_AGE_MS ?? 300000);
10539
+ function _timelineSummaryCacheVersion(ctmId, rawSummary) {
10540
+ let count = 0;
10541
+ try { count = dbModule.countSessionMessageRows(ctmId) || 0; } catch {}
10542
+ let streamMs = 0;
10543
+ try {
10544
+ const fn = require('./lib/session-timeline-summary')._private?._summaryActivityMs;
10545
+ streamMs = rawSummary && typeof fn === 'function' ? (fn(rawSummary) || 0) : 0;
10546
+ } catch {}
10547
+ return `${count}:${streamMs}`;
10548
+ }
10549
+ async function _cachedTimelineSummaryForStandup(sessionId, identity, rawSummary) {
10550
+ if (process.env.CTM_TIMELINE_SUMMARY_CACHE === '0') return _timelineSummaryForStandup(sessionId, identity);
10551
+ const ctmId = _resolveOwnerCtmSessionId(sessionId) || sessionId;
10552
+ const version = _timelineSummaryCacheVersion(ctmId, rawSummary);
10553
+ const now = Date.now();
10554
+ const hit = _timelineStandupSummaryCache.get(ctmId);
10555
+ if (hit && hit.version === version && (now - hit.at) < _TIMELINE_SUMMARY_CACHE_MAX_AGE_MS) {
10556
+ return hit.summary;
10557
+ }
10558
+ const summary = await _timelineSummaryForStandup(sessionId, identity);
10559
+ // Bound memory: this only ever keys active sessions, but a long-lived process churns through many —
10560
+ // drop the whole map if it grows past a sane ceiling (cheap to repopulate).
10561
+ if (_timelineStandupSummaryCache.size > 512) _timelineStandupSummaryCache.clear();
10562
+ _timelineStandupSummaryCache.set(ctmId, { version, summary, at: now });
10563
+ return summary;
10564
+ }
10565
+
10527
10566
  async function _timelineSummaryForStandup(sessionId, identity = null) {
10528
10567
  const resolved = identity || _resolveSessionTimelineIdentity(sessionId);
10529
10568
  let timeline = null;
@@ -11275,7 +11314,7 @@ async function _buildStandupSnapshot(options = {}) {
11275
11314
  const timelineIdentity = _resolveSessionTimelineIdentity(payload.id);
11276
11315
  const rawSummary = _standupSummaryFor(source, payload.id, agentSessionId, timelineIdentity);
11277
11316
  const timelineSummary = includeTimeline
11278
- ? await _timelineSummaryForStandup(payload.id, timelineIdentity)
11317
+ ? await _cachedTimelineSummaryForStandup(payload.id, timelineIdentity, rawSummary)
11279
11318
  : null;
11280
11319
  const rawMergedSummary = mergeTimelineSummaries(rawSummary, timelineSummary);
11281
11320
  let summary = rawMergedSummary ? {
@@ -12915,6 +12954,28 @@ async function _buildSessionMessagesPageResponseOffThread(payload) {
12915
12954
  });
12916
12955
  }
12917
12956
 
12957
+ // Stage C: the NON-paginated full read — project (merge + exclusions + image-refs) AND serialize the
12958
+ // whole conversation OFF the main loop (the on-main projection over a 3k-16k-msg array was the
12959
+ // profiled "reading conversational logs" freeze). Same shared projection module → byte-identical
12960
+ // on-main fallback (named apiSessionMessages:serializeFullResponse) on pool failure/degrade.
12961
+ async function _buildSessionMessagesFullResponseOffThread(payload) {
12962
+ const onMain = () => {
12963
+ const obj = require('./lib/session-messages-projection').buildFullMessagesResponse(payload);
12964
+ const str = _perfTracker.runSyncProbed('apiSessionMessages:serializeFullResponse',
12965
+ () => JSON.stringify(obj),
12966
+ { context: () => ({ msgs: Array.isArray(obj) ? obj.length : (Array.isArray(obj?.messages) ? obj.messages.length : 0), fallback: 1 }) });
12967
+ return Buffer.from(str, 'utf8');
12968
+ };
12969
+ return _offthreadRead({
12970
+ breaker: _sessionMessagesPageBreaker,
12971
+ op: 'buildSessionMessagesFullResponse',
12972
+ timeoutMs: _SESSION_MESSAGES_READPOOL_TIMEOUT_MS,
12973
+ payload,
12974
+ onSuccess: (res) => (res && Buffer.isBuffer(res.__resultBuffer)) ? res.__resultBuffer : onMain(),
12975
+ onFail: () => onMain(),
12976
+ });
12977
+ }
12978
+
12918
12979
  // Wave 3.1: off-thread the durable-tail reconcile's bounded JSONL parse. The cheap gate
12919
12980
  // (resolve the live JSONL path/size + the imported byte HWM) stays on main; only when the
12920
12981
  // importer is lagging (live > imported) do we hit the pool, so the common "caught up" case
@@ -12984,8 +13045,66 @@ async function apiSessionMessages(req, res, url) {
12984
13045
  return;
12985
13046
  }
12986
13047
 
13048
+ // Off-thread NON-paginated full read: resolve the small projection inputs on main, then merge +
13049
+ // exclude + image-ref + serialize the WHOLE conversation on the read-pool worker (mirrors
13050
+ // sendPaginatedMessagePage, for the full-array case). Removes the on-main projection that froze
13051
+ // the loop while reading a large conversation. Same shared module → byte-identical on-main
13052
+ // fallback on pool failure. Cacheable with a stream-event signature folded into the key (so a hot
13053
+ // tail rebuilds), exactly like the page path — no main-thread merge needed to decide cacheability.
13054
+ const _sendFullMessagesOffThread = async (baseMessages, extra = {}) => {
13055
+ const skipStreamTail = extra.skipStreamTail === true;
13056
+ let identity = null;
13057
+ let streamEvents = [];
13058
+ if (!skipStreamTail) {
13059
+ identity = _resolveSessionTimelineIdentity(sessionId);
13060
+ streamEvents = collectTimelineStreamEvents(identity, {
13061
+ limit: 200,
13062
+ getRecentEvents: (lookupId, lookupLimit) => _getRecentStreamEventsForTimelineId(lookupId, lookupLimit),
13063
+ });
13064
+ }
13065
+ const _extraForKey = { ...extra };
13066
+ delete _extraForKey.skipStreamTail;
13067
+ // Bare array iff the caller passed no extra — matches sendMessages' `JSON.stringify(visibleMessages)`.
13068
+ const bareArray = Object.keys(_extraForKey).length === 0;
13069
+ const _cacheable = CTM_MSG_RESPONSE_CACHE_ENABLED;
13070
+ let _cacheKey = null;
13071
+ if (_cacheable) {
13072
+ _cacheKey = buildResponseCacheKey({
13073
+ sessionId,
13074
+ sourceVersion: _msgBaseSignature(baseMessages),
13075
+ exclusionsVersion: _msgExclusionsSignature(sessionId),
13076
+ offset, limit,
13077
+ mode: `mfull:${skipStreamTail ? '1' : '0'}:s${_streamEventsSignature(streamEvents)}:${JSON.stringify(_extraForKey)}`,
13078
+ });
13079
+ if (!noCache) {
13080
+ const _hit = _msgResponseCache.get(_cacheKey);
13081
+ if (_hit) {
13082
+ res.writeHead(200, { 'Content-Type': 'application/json', 'X-Msg-Cache': 'hit' });
13083
+ res.end(_hit.buffer);
13084
+ return;
13085
+ }
13086
+ }
13087
+ }
13088
+ const exclusionRows = _loadSessionMessageExclusions(sessionId);
13089
+ const filterCodexSynthetic = _shouldFilterCodexSyntheticMessages(sessionId, baseMessages, {});
13090
+ const imageManifests = _resolveImageManifestsForProjection(sessionId, baseMessages, streamEvents);
13091
+ const buffer = await _buildSessionMessagesFullResponseOffThread({
13092
+ baseMessages, streamEvents, skipStreamTail, exclusionRows, filterCodexSynthetic, imageManifests,
13093
+ extra, publicIdentity: _publicTimelineIdentity(identity), bareArray,
13094
+ });
13095
+ res.writeHead(200, { 'Content-Type': 'application/json' });
13096
+ if (_cacheable && _cacheKey) _msgResponseCache.set(_cacheKey, { buffer });
13097
+ res.end(buffer);
13098
+ };
13099
+
12987
13100
  const sendMessages = (messages, extra = {}) => {
12988
13101
  const baseMessages = Array.isArray(messages) ? messages : [];
13102
+ // Large non-paginated read → project + serialize OFF the main loop (the conversation-read freeze).
13103
+ // Small reads stay on main (a worker round-trip + array clone isn't worth it); paginated reads
13104
+ // already have their own off-thread page path. CTM_MSG_OFFTHREAD_FULL=0 forces the on-main path.
13105
+ if (!paginated && CTM_MSG_OFFTHREAD_FULL_ENABLED && baseMessages.length >= CTM_MSG_OFFTHREAD_FULL_MIN) {
13106
+ return _sendFullMessagesOffThread(baseMessages, extra);
13107
+ }
12989
13108
  // Compute the stream-tail merge first (own attribution probe) so we can both name
12990
13109
  // it AND gate the serialized-response cache on whether the live tail adds anything.
12991
13110
  const merge = extra.skipStreamTail
@@ -14936,6 +15055,11 @@ const _sessionMessageExclusionsCache = new Map(); // sessionId -> { at, rows }
14936
15055
  // function of the output and served only when the live stream tail adds nothing
14937
15056
  // (merge.added === 0) — turns a steady-state poll into a Map lookup + res.end().
14938
15057
  const CTM_MSG_RESPONSE_CACHE_ENABLED = process.env.CTM_MSG_RESPONSE_CACHE !== '0';
15058
+ // Off-thread the NON-paginated full conversation read (merge + exclusions + image-refs + serialize)
15059
+ // once the base is large enough that a worker round-trip + array clone beats blocking the loop.
15060
+ // CTM_MSG_OFFTHREAD_FULL=0 disables (force on-main); CTM_MSG_OFFTHREAD_FULL_MIN tunes the threshold.
15061
+ const CTM_MSG_OFFTHREAD_FULL_ENABLED = process.env.CTM_MSG_OFFTHREAD_FULL !== '0';
15062
+ const CTM_MSG_OFFTHREAD_FULL_MIN = Math.max(1, Number(process.env.CTM_MSG_OFFTHREAD_FULL_MIN) || 800);
14939
15063
  const _msgResponseCache = createResponseCache({
14940
15064
  maxEntries: Math.max(16, Number(process.env.CTM_MSG_RESPONSE_CACHE_MAX) || 256),
14941
15065
  });
@@ -261,6 +261,16 @@ async function _runOp(op, payload = {}) {
261
261
  return { __transfer: true, buffer: Buffer.from(JSON.stringify(obj), 'utf8') };
262
262
  }
263
263
 
264
+ case 'buildSessionMessagesFullResponse': {
265
+ // Stage C: the NON-paginated full read — project (merge + exclusions + image-refs) AND
266
+ // serialize the WHOLE conversation in the worker, returning BYTES (zero-copy). Removes the
267
+ // on-main projection that froze the loop while reading a large conversation. Returns either a
268
+ // bare array or { messages, ...extra } per payload.bareArray (mirrors server.js sendMessages).
269
+ const proj = require('../lib/session-messages-projection');
270
+ const obj = proj.buildFullMessagesResponse(payload);
271
+ return { __transfer: true, buffer: Buffer.from(JSON.stringify(obj), 'utf8') };
272
+ }
273
+
264
274
  case 'parseJsonlTail': {
265
275
  // Bounded (≤~1MB) tail read+parse of a live agent JSONL OFF the main loop — the
266
276
  // apiSessionMessages durable-tail reconcile (lagging importer on an actively-written
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "walle",
3
- "version": "0.9.26",
3
+ "version": "0.9.27",
4
4
  "private": true,
5
5
  "description": "Wall-E — your personal digital twin",
6
6
  "scripts": {
@@ -557,6 +557,8 @@ async function main() {
557
557
  }, {
558
558
  timeoutMs: options.timeoutMs || 0,
559
559
  heavy: options.heavy !== false,
560
+ lane: options.lane,
561
+ priority: options.priority,
560
562
  terminateOnTimeout: (options.timeoutMs || 0) > 0,
561
563
  });
562
564
  } catch (err) {
@@ -734,6 +736,7 @@ async function main() {
734
736
  run: () => ingest.runOnce(adapters),
735
737
  intervalMs: clampInterval(config.intervals?.ingest_ms, 60000),
736
738
  pool: 'io',
739
+ lane: 0,
737
740
  priority: 1,
738
741
  startDelayMs: 2000,
739
742
  onResult: (r) => {
@@ -750,6 +753,7 @@ async function main() {
750
753
  run: () => think.runOnce(),
751
754
  intervalMs: clampInterval(config.intervals?.think_ms, 120000),
752
755
  pool: 'llm',
756
+ lane: 0,
753
757
  priority: 2,
754
758
  startDelayMs: 5000,
755
759
  dependsOn: ['ingest'],
@@ -770,6 +774,7 @@ async function main() {
770
774
  run: () => runDueTasks(),
771
775
  intervalMs: clampInterval(config.intervals?.tasks_ms, 30000),
772
776
  pool: 'llm',
777
+ lane: 0,
773
778
  priority: 3,
774
779
  startDelayMs: 8000,
775
780
  onResult: (r) => {
@@ -793,6 +798,7 @@ async function main() {
793
798
  run: ({ modelOverride } = {}) => runDueSkills({ modelOverride }),
794
799
  intervalMs: clampInterval(config.intervals?.skills_ms, 300000),
795
800
  pool: 'llm',
801
+ lane: 1,
796
802
  priority: 4,
797
803
  startDelayMs: 11000,
798
804
  onResult: (r) => {
@@ -842,7 +848,9 @@ async function main() {
842
848
  },
843
849
  intervalMs: clampInterval(config.intervals?.training_ms, 300000),
844
850
  pool: 'ollama',
851
+ lane: 2,
845
852
  priority: 5,
853
+ restartCatchupPolicy: 'defer',
846
854
  startDelayMs: 20000,
847
855
  shouldRun: () => getShadowConfig().enabled,
848
856
  onError: (err) => console.error('[eval] Loop error:', err.message),
@@ -865,7 +873,9 @@ async function main() {
865
873
  },
866
874
  intervalMs: clampInterval(config.intervals?.replay_ms, 300000),
867
875
  pool: 'ollama',
876
+ lane: 2,
868
877
  priority: 5,
878
+ restartCatchupPolicy: 'defer',
869
879
  startDelayMs: 15000,
870
880
  shouldRun: () => getShadowConfig().enabled,
871
881
  onError: (err) => console.error('[replay] Loop error:', err.message),
@@ -879,6 +889,7 @@ async function main() {
879
889
  },
880
890
  intervalMs: clampInterval(config.intervals?.initiative_ms, 300000),
881
891
  pool: 'llm',
892
+ lane: 1,
882
893
  priority: 6,
883
894
  startDelayMs: 25000,
884
895
  debounceMs: 30000,
@@ -899,7 +910,9 @@ async function main() {
899
910
  run: () => reflect.runOnce(),
900
911
  intervalMs: clampInterval(config.intervals?.reflect_ms, 3600000),
901
912
  pool: 'llm',
913
+ lane: 2,
902
914
  priority: 7,
915
+ restartCatchupPolicy: 'defer',
903
916
  dependsOn: ['think'],
904
917
  startDelayMs: 30000,
905
918
  onResult: (r) => {
@@ -924,7 +937,9 @@ async function main() {
924
937
  },
925
938
  intervalMs: 600000,
926
939
  pool: 'io',
940
+ lane: 2,
927
941
  priority: 8,
942
+ restartCatchupPolicy: 'defer',
928
943
  startDelayMs: 30000,
929
944
  onError: (err) => console.error('[harvest] Loop error:', err.message),
930
945
  });
@@ -947,7 +962,9 @@ async function main() {
947
962
  },
948
963
  intervalMs: 30000,
949
964
  pool: 'embedding',
965
+ lane: 2,
950
966
  priority: 9,
967
+ restartCatchupPolicy: 'defer',
951
968
  startDelayMs: 35000,
952
969
  timeoutMs: 90000,
953
970
  onError: (err) => {
@@ -961,7 +978,9 @@ async function main() {
961
978
  run: async () => require('./loops/brain-optimize').runOnce(),
962
979
  intervalMs: 3600000, // hourly; self-gates to a one-shot via kv (brain:optimize:v1)
963
980
  pool: 'io',
981
+ lane: 2,
964
982
  priority: 4,
983
+ restartCatchupPolicy: 'defer',
965
984
  startDelayMs: 180000, // 3 min after boot — well past the boot storm
966
985
  timeoutMs: 120000,
967
986
  onError: (err) => console.error('[brain-optimize] Loop error:', err.message),
@@ -975,7 +994,9 @@ async function main() {
975
994
  },
976
995
  intervalMs: 24 * 60 * 60 * 1000, // daily
977
996
  pool: 'embedding',
997
+ lane: 2,
978
998
  priority: 10,
999
+ restartCatchupPolicy: 'defer',
979
1000
  startDelayMs: 120000, // wait 2min after startup
980
1001
  onResult: (r) => {
981
1002
  if (r.archived > 0) console.log(`[wall-e] Dedup: archived ${r.archived} duplicates`);
@@ -996,8 +1017,10 @@ async function main() {
996
1017
  },
997
1018
  intervalMs: 24 * 60 * 60 * 1000, // daily
998
1019
  pool: 'io',
1020
+ lane: 2,
999
1021
  priority: 11,
1000
- startDelayMs: 180000, // 3min after startup — well off the boot/readiness path
1022
+ restartCatchupPolicy: 'defer',
1023
+ startDelayMs: 185000, // just after 3min — well off the boot/readiness path and unique in the IO pool
1001
1024
  onError: (err) => {
1002
1025
  console.error('[wall-e] Daily backup error:', err.message);
1003
1026
  telemetry.trackError('daily-backup', err);
@@ -1011,7 +1034,9 @@ async function main() {
1011
1034
  run: async () => brain.runBrainRetention(),
1012
1035
  intervalMs: 60 * 60 * 1000,
1013
1036
  pool: 'io',
1037
+ lane: 2,
1014
1038
  priority: 11,
1039
+ restartCatchupPolicy: 'defer',
1015
1040
  startDelayMs: 90000,
1016
1041
  onResult: (r) => {
1017
1042
  const nonZero = Object.entries(r || {}).filter(([, v]) => typeof v === 'number' && v > 0);
@@ -1031,7 +1056,9 @@ async function main() {
1031
1056
  },
1032
1057
  intervalMs: 60 * 60 * 1000,
1033
1058
  pool: 'io',
1059
+ lane: 2,
1034
1060
  priority: 11,
1061
+ restartCatchupPolicy: 'defer',
1035
1062
  startDelayMs: 100000,
1036
1063
  onResult: (r) => {
1037
1064
  if (r && r.sent) console.log(`[wall-e] Question digest sent (${r.count}): ${JSON.stringify(r.results)}`);
@@ -1048,6 +1075,7 @@ async function main() {
1048
1075
  },
1049
1076
  intervalMs: 300000,
1050
1077
  pool: 'io',
1078
+ lane: 1,
1051
1079
  priority: 7,
1052
1080
  startDelayMs: 12000,
1053
1081
  shouldRun: () => {
@@ -1072,7 +1100,9 @@ async function main() {
1072
1100
  },
1073
1101
  intervalMs: 24 * 60 * 60 * 1000,
1074
1102
  pool: 'io',
1103
+ lane: 2,
1075
1104
  priority: 10,
1105
+ restartCatchupPolicy: 'defer',
1076
1106
  startDelayMs: 300000,
1077
1107
  onResult: (r) => {
1078
1108
  if (r.cleaned > 0) console.log(`[wall-e] Cleaned ${r.cleaned} old recordings`);
@@ -1096,7 +1126,9 @@ async function main() {
1096
1126
  },
1097
1127
  intervalMs: 24 * 60 * 60 * 1000,
1098
1128
  pool: 'io',
1129
+ lane: 2,
1099
1130
  priority: 10,
1131
+ restartCatchupPolicy: 'defer',
1100
1132
  startDelayMs: 295000,
1101
1133
  shouldRun: () => process.env.WALLE_TELEMETRY_SERVER === '1',
1102
1134
  onResult: (r) => {
@@ -1140,7 +1172,9 @@ async function main() {
1140
1172
  },
1141
1173
  intervalMs: 7 * 24 * 60 * 60 * 1000, // Weekly
1142
1174
  pool: 'llm',
1175
+ lane: 2,
1143
1176
  priority: 10,
1177
+ restartCatchupPolicy: 'defer',
1144
1178
  startDelayMs: 300000, // 5min delay on startup
1145
1179
  shouldRun: () => {
1146
1180
  // Only run if we have coding-agent benchmarks
@@ -1171,7 +1205,9 @@ async function main() {
1171
1205
  },
1172
1206
  intervalMs: 7 * 24 * 60 * 60 * 1000, // Weekly
1173
1207
  pool: 'llm',
1208
+ lane: 2,
1174
1209
  priority: 5,
1210
+ restartCatchupPolicy: 'defer',
1175
1211
  startDelayMs: 600000, // 10min delay on startup
1176
1212
  shouldRun: () => {
1177
1213
  // Run on Sundays only
@@ -1226,31 +1262,48 @@ async function main() {
1226
1262
  telemetry.trackError('boot_check', e);
1227
1263
  }
1228
1264
 
1229
- // Restart catch-up replay jobs whose persisted nextEligibleAt fell
1230
- // while the daemon was down. Cap of 5 immediate runs (5s stagger);
1231
- // anything beyond that is deferred to the periodic tick. Borrowed
1232
- // from OpenClaw's runMissedJobs (src/cron/service/timer.ts:962-1040).
1233
- const restartCatchupOp = runtimeHealth.beginOperation('agent.restartCatchup');
1234
- try {
1235
- const catchup = await scheduler.runMissedJobs();
1236
- if (catchup.ran > 0 || catchup.deferred > 0) {
1237
- console.log(
1238
- `[wall-e] Restart catch-up: ran ${catchup.ran}, deferred ${catchup.deferred}`
1239
- );
1240
- telemetry.track('restart_catchup', {
1241
- ran: catchup.ran,
1242
- deferred: catchup.deferred,
1243
- });
1244
- }
1245
- restartCatchupOp.end({ ok: true, meta: { ran: catchup.ran || 0, deferred: catchup.deferred || 0 } });
1246
- } catch (e) {
1247
- restartCatchupOp.end({ ok: false, error: e });
1248
- console.warn('[wall-e] runMissedJobs threw (non-fatal):', e.message);
1249
- }
1250
-
1251
- // Start the scheduler
1265
+ // Start the scheduler before restart catch-up. Missed-job replay is useful,
1266
+ // but it must never be startup-critical or block the daemon from serving
1267
+ // chat/MCP requests after a restart.
1252
1268
  scheduler.start();
1253
1269
 
1270
+ // Restart catch-up — replay only small foreground jobs immediately; defer
1271
+ // expensive harvest/eval/maintenance work and persist that decision to the
1272
+ // runtime work-item queue for observability. This keeps the main process
1273
+ // responsive even after long downtime with many missed jobs.
1274
+ setTimeout(() => {
1275
+ (async () => {
1276
+ const restartCatchupOp = runtimeHealth.beginOperation('agent.restartCatchup');
1277
+ try {
1278
+ const catchup = await scheduler.runMissedJobs({
1279
+ nonBlocking: true,
1280
+ max: 3,
1281
+ staggerMs: 0,
1282
+ maxWorkMs: 5000,
1283
+ deferDelayMs: 120000,
1284
+ enqueueWorkItem: brain.enqueueRuntimeWorkItem,
1285
+ });
1286
+ if (catchup.ran > 0 || catchup.deferred > 0) {
1287
+ console.log(
1288
+ `[wall-e] Restart catch-up: dispatched ${catchup.ran}, deferred ${catchup.deferred}`
1289
+ );
1290
+ telemetry.track('restart_catchup', {
1291
+ ran: catchup.ran,
1292
+ deferred: catchup.deferred,
1293
+ non_blocking: 1,
1294
+ });
1295
+ }
1296
+ restartCatchupOp.end({
1297
+ ok: true,
1298
+ meta: { ran: catchup.ran || 0, deferred: catchup.deferred || 0, non_blocking: 1 },
1299
+ });
1300
+ } catch (e) {
1301
+ restartCatchupOp.end({ ok: false, error: e });
1302
+ console.warn('[wall-e] runMissedJobs threw (non-fatal):', e.message);
1303
+ }
1304
+ })().catch((e) => console.warn('[wall-e] Restart catch-up failed (non-fatal):', e.message));
1305
+ }, 1000).unref?.();
1306
+
1254
1307
  // Hot-reload config on file changes
1255
1308
  const { ConfigWatcher } = require('./lib/config-watcher');
1256
1309
  const configWatcher = new ConfigWatcher();