cawdex 1.35.74 → 1.35.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/README.md +5 -5
  2. package/bin/anycode.js +2 -2
  3. package/bin/cawdex.js +408 -408
  4. package/bin/ecc-hooks.cjs +11 -11
  5. package/dist/agents-md.d.ts +31 -0
  6. package/dist/agents-md.js +340 -0
  7. package/dist/agents-md.js.map +1 -0
  8. package/dist/agents.js +1424 -1424
  9. package/dist/api.d.ts +1 -0
  10. package/dist/api.js +19 -14
  11. package/dist/api.js.map +1 -1
  12. package/dist/autonomous-loops.js +287 -287
  13. package/dist/benchmark-repos.d.ts +31 -0
  14. package/dist/benchmark-repos.js +234 -8
  15. package/dist/benchmark-repos.js.map +1 -1
  16. package/dist/command-palette.js +4 -2
  17. package/dist/command-palette.js.map +1 -1
  18. package/dist/compaction.js +8 -8
  19. package/dist/config.js +51 -36
  20. package/dist/config.js.map +1 -1
  21. package/dist/content-engine.js +543 -543
  22. package/dist/context-brief.d.ts +4 -0
  23. package/dist/context-brief.js +230 -0
  24. package/dist/context-brief.js.map +1 -0
  25. package/dist/cost-tracker.d.ts +33 -14
  26. package/dist/cost-tracker.js +81 -19
  27. package/dist/cost-tracker.js.map +1 -1
  28. package/dist/coverage.js +39 -39
  29. package/dist/docs-sync.js +98 -98
  30. package/dist/evaluation.js +452 -452
  31. package/dist/fixed-footer.d.ts +7 -1
  32. package/dist/fixed-footer.js +92 -18
  33. package/dist/fixed-footer.js.map +1 -1
  34. package/dist/git-workflow.js +49 -49
  35. package/dist/index.d.ts +2 -0
  36. package/dist/index.js +197 -65
  37. package/dist/index.js.map +1 -1
  38. package/dist/instant-artifact.d.ts +6 -0
  39. package/dist/instant-artifact.js +397 -0
  40. package/dist/instant-artifact.js.map +1 -0
  41. package/dist/live-queue.js +1 -1
  42. package/dist/live-queue.js.map +1 -1
  43. package/dist/model-aliases.d.ts +37 -0
  44. package/dist/model-aliases.js +203 -0
  45. package/dist/model-aliases.js.map +1 -0
  46. package/dist/orchestration.js +15 -15
  47. package/dist/permissions.d.ts +6 -0
  48. package/dist/permissions.js +53 -0
  49. package/dist/permissions.js.map +1 -1
  50. package/dist/pm2-manager.js +26 -26
  51. package/dist/query.d.ts +0 -1
  52. package/dist/query.js +74 -39
  53. package/dist/query.js.map +1 -1
  54. package/dist/refactor.js +87 -87
  55. package/dist/repo-command.js +7 -1
  56. package/dist/repo-command.js.map +1 -1
  57. package/dist/search-first.js +92 -92
  58. package/dist/skill-create.js +100 -100
  59. package/dist/stitch.js +1 -1
  60. package/dist/system-prompt.d.ts +2 -1
  61. package/dist/system-prompt.js +10 -5
  62. package/dist/system-prompt.js.map +1 -1
  63. package/dist/tools/github-repo-digest.d.ts +1 -1
  64. package/dist/tools/github-repo-digest.js +38 -6
  65. package/dist/tools/github-repo-digest.js.map +1 -1
  66. package/dist/types.d.ts +3 -0
  67. package/dist/types.js.map +1 -1
  68. package/dist/verification.js +55 -55
  69. package/package.json +1 -1
  70. package/resources/__init__.py +1 -1
  71. package/resources/exgentic/cawdex_agent/README.md +114 -114
  72. package/resources/exgentic/cawdex_agent/__init__.py +5 -5
  73. package/resources/exgentic/cawdex_agent/agent.py +605 -605
  74. package/resources/exgentic/cawdex_agent/requirements.txt +2 -2
  75. package/resources/exgentic/cawdex_agent/setup.sh +21 -21
  76. package/resources/exgentic/cawdex_agent/utils.py +1061 -1061
  77. package/resources/hal/cawdex_agent/README.md +24 -24
  78. package/resources/hal/cawdex_agent/__init__.py +1 -1
  79. package/resources/hal/cawdex_agent/main.py +550 -550
  80. package/resources/hal/cawdex_agent/requirements.txt +2 -2
  81. package/resources/kbench/cawdex_agent/README.md +107 -107
  82. package/resources/kbench/cawdex_agent/adapter.manifest.json +19 -19
  83. package/resources/kbench/cawdex_agent/runner.mjs +753 -753
  84. package/resources/open_agent_leaderboard/cawdex-agent-card.md +119 -119
  85. package/resources/terminal_bench/__init__.py +1 -1
  86. package/resources/terminal_bench/cawdex_agent.py +174 -174
  87. package/resources/terminal_bench/setup.sh +121 -121
package/dist/query.js CHANGED
@@ -9,7 +9,7 @@ import { buildSystemPrompt } from './system-prompt.js';
9
9
  import { runHooks } from './hooks.js';
10
10
  import { scanToolCall, printSecurityWarning } from './security.js';
11
11
  import { trackUsage } from './cost-tracker.js';
12
- import { shouldCompact, compactMessages, quickCompact, buildCompactionConfig, contextCapTokens, enforceContextCap, inferContextWindowTokens, } from './compaction.js';
12
+ import { shouldCompact, compactMessages, quickCompact, estimateTokens, buildCompactionConfig, contextCapTokens, enforceContextCap, inferContextWindowTokens, } from './compaction.js';
13
13
  import { assistantTranscriptPrefix, theme, sym, printToolRun, printToolResult, printThinkingOpen, printThinkingText, printThinkingClose, printCost, printApiError, formatDuration, categorizeApiError } from './theme.js';
14
14
  import { isVoiceEnabled, getTtsConfig, getAccessibilityConfig, speakAssistantResponse, speak, speakUserEcho, } from './voice.js';
15
15
  import { isLikelyDestructive, describeDestructive, countWords, summarize } from './accessibility.js';
@@ -20,6 +20,7 @@ import * as liveQueue from './live-queue.js';
20
20
  import { isFooterActive, setFooterActivity, setFooterCost, writeScrollableLine } from './fixed-footer.js';
21
21
  import { applyQueuedInputChunk, drainQueuedInputBytes, queuedInputBytesToText } from './prompt-buffer.js';
22
22
  import { emit as dbgEmit } from './debug.js';
23
+ import { applyAgentToolInstructions } from './agents-md.js';
23
24
  import { buildBenchmarkCompletionReminder, buildBenchmarkTrajectorySystemBlock, makeBenchmarkInvalidToolActionEvent, makeBenchmarkTraceEvent, writeBenchmarkTrace, } from './benchmark-trace.js';
24
25
  import { buildTodoStateBlock } from './tools/todo.js';
25
26
  import { buildRuntimeInfoBlock } from './runtime-info.js';
@@ -30,11 +31,11 @@ import { archiveLargeToolOutput } from './tool-output-archive.js';
30
31
  // not persisted — restart, see hint again. Keyed by sessionId so different
31
32
  // sessions get fresh hints.
32
33
  const _thinkingHintShownForSession = new Set();
33
- const INTERACTIVE_FIRST_TOKEN_TIMEOUT_MS = 12_000;
34
+ const INTERACTIVE_FIRST_TOKEN_TIMEOUT_MS = 8_000;
34
35
  const INTERACTIVE_FLAKY_FIRST_TOKEN_TIMEOUT_MS = 6_000;
35
36
  const NON_INTERACTIVE_FIRST_TOKEN_TIMEOUT_MS = 60_000;
36
37
  const NON_INTERACTIVE_FLAKY_FIRST_TOKEN_TIMEOUT_MS = 20_000;
37
- const FAST_DIRECT_FIRST_TOKEN_TIMEOUT_MS = 8_000;
38
+ const FAST_DIRECT_FIRST_TOKEN_TIMEOUT_MS = 5_000;
38
39
  const INTERACTIVE_STREAM_IDLE_TIMEOUT_MS = 45_000;
39
40
  const NON_INTERACTIVE_STREAM_IDLE_TIMEOUT_MS = 120_000;
40
41
  const FAST_DIRECT_STREAM_IDLE_TIMEOUT_MS = 20_000;
@@ -84,13 +85,6 @@ function fallbackModelForTurn(config, usedFallbackModel) {
84
85
  return null;
85
86
  return fallback;
86
87
  }
87
- export function fallbackModelForKnownFlakyTurn(config, usedFallbackModel = false) {
88
- if (process.env.CAWDEX_ALLOW_FLAKY_MODELS === '1')
89
- return null;
90
- if (!isKnownFlakyOpenRouterModel(config))
91
- return null;
92
- return fallbackModelForTurn(config, usedFallbackModel);
93
- }
94
88
  export function isTurnCancelKeySequence(chunk) {
95
89
  const seq = chunk.toString('utf8');
96
90
  return (seq === '\x1b' ||
@@ -154,9 +148,10 @@ function printInteractiveTurnAccepted(config) {
154
148
  }
155
149
  }
156
150
  export function formatWorkingIndicatorFrame(elapsedMs, frameIndex = 0, message = 'Working') {
157
- const frames = ['\u25e6', '\u25c7', '\u25c6', '\u25c7'];
151
+ void elapsedMs;
152
+ const frames = ['\u25dc', '\u25dd', '\u25de', '\u25df'];
158
153
  const frame = frames[Math.abs(frameIndex) % frames.length];
159
- return ` ${frame} ${message} (${formatDuration(elapsedMs)} \u2022 esc to interrupt)`;
154
+ return ` ${frame} ${message} \u2022 Esc/F5 to interrupt`;
160
155
  }
161
156
  function startWorkingIndicator(startedAtMs, screenReader, turn = 0) {
162
157
  if (screenReader)
@@ -166,11 +161,11 @@ function startWorkingIndicator(startedAtMs, screenReader, turn = 0) {
166
161
  if (!process.stdout.isTTY)
167
162
  return null;
168
163
  const messages = [
169
- 'Working hard on your request',
170
- 'Sumi ink settling',
171
- 'Edo lanterns lit',
172
- 'Kamon crest aligned',
173
- 'Neon alley quiet',
164
+ 'Sumi ink moving',
165
+ 'Edo lanterns cycling',
166
+ 'Kamon crest pulsing',
167
+ 'Neon shoji breathing',
168
+ 'Signal blade drawn',
174
169
  ];
175
170
  let frame = 0;
176
171
  let stopped = false;
@@ -1129,18 +1124,6 @@ export async function runQuery(ctx) {
1129
1124
  // user's configured fallbackModel. After we use it, this latches so we
1130
1125
  // don't bounce back and forth between failing models in a single chain.
1131
1126
  let usedFallbackModel = false;
1132
- const immediateFallback = fallbackModelForKnownFlakyTurn(ctx.config, usedFallbackModel);
1133
- if (immediateFallback) {
1134
- usedFallbackModel = true;
1135
- const failedModel = ctx.config.model;
1136
- ctx.config.model = immediateFallback;
1137
- resetClient();
1138
- console.log(theme.warning(` ${sym.warn} ${failedModel} is known to stall in interactive OpenRouter sessions; switching this turn to ${immediateFallback}.`));
1139
- console.log(theme.dim(' Override only if you really want it: CAWDEX_ALLOW_FLAKY_MODELS=1'));
1140
- }
1141
- if (!chainFastDirect) {
1142
- printInteractiveTurnAccepted(ctx.config);
1143
- }
1144
1127
  // Tracks whether ANY reasoning tokens arrived across the entire chain.
1145
1128
  // Used at chain-end to print a one-time "/thinking is ON but this model
1146
1129
  // doesn't emit reasoning" hint. Hoisted to chain scope (not per-turn)
@@ -1210,8 +1193,11 @@ export async function runQuery(ctx) {
1210
1193
  // as new text, drowning the actual response).
1211
1194
  const isScreenReader = ctx.config.voice?.accessibility?.screenReader === true;
1212
1195
  const inputGuard = startInputSuppression(isScreenReader);
1196
+ let earlyWorkingIndicator = null;
1213
1197
  try {
1214
1198
  if (!chainFastDirect) {
1199
+ printInteractiveTurnAccepted(ctx.config);
1200
+ earlyWorkingIndicator = startWorkingIndicator(chainStart, isScreenReader, 0);
1215
1201
  // Turn-boundary collapse runs BEFORE compaction. Every completed prior
1216
1202
  // turn becomes [user, "<final text>\n[Completed: used X, Y]"] — the
1217
1203
  // model no longer sees stale tool_calls that it might mistake for
@@ -1263,9 +1249,12 @@ export async function runQuery(ctx) {
1263
1249
  if (!fastDirect) {
1264
1250
  replaceMessagesInPlace(ctx.messages, quickCompact(ctx.messages));
1265
1251
  }
1252
+ const requestTools = fastDirect
1253
+ ? []
1254
+ : applyAgentToolInstructions(ALL_TOOLS, ctx.cwd, ctx.config.model);
1266
1255
  const systemPrompt = fastDirect
1267
1256
  ? FAST_DIRECT_SYSTEM_PROMPT
1268
- : buildSystemPrompt(ctx.config, ctx.cwd, ctx.mode, userQuery);
1257
+ : buildSystemPrompt(ctx.config, ctx.cwd, ctx.mode, userQuery, requestTools);
1269
1258
  let visibleMessages = fastDirect
1270
1259
  ? (userQuery ? [{ role: 'user', content: userQuery }] : [])
1271
1260
  : maskOldToolResults(ctx.messages);
@@ -1292,7 +1281,9 @@ export async function runQuery(ctx) {
1292
1281
  // override.
1293
1282
  const stateBlock = fastDirect ? null : buildStateBlock(visibleMessages);
1294
1283
  const runtimeInfoBlock = fastDirect ? null : buildRuntimeInfoBlock(ctx.cwd);
1295
- const repoMapBlock = fastDirect ? null : buildAutoRepoMapBlock(ctx.cwd, userQuery);
1284
+ const repoMapBlock = fastDirect || ctx.mode === 'design'
1285
+ ? null
1286
+ : buildAutoRepoMapBlock(ctx.cwd, userQuery);
1296
1287
  const globalPlanBlock = fastDirect ? null : buildGlobalPlanBlock(visibleMessages);
1297
1288
  const todoStateBlock = fastDirect ? null : buildTodoStateBlock(ctx.cwd);
1298
1289
  const benchmarkTrajectoryBlock = !fastDirect && ctx.mode === 'benchmark'
@@ -1326,6 +1317,7 @@ export async function runQuery(ctx) {
1326
1317
  let lastCharWasNewline = false; // collapse 3+ consecutive newlines down to 2
1327
1318
  let consecutiveNewlines = 0;
1328
1319
  const turnStart = Date.now();
1320
+ let usageRecorded = false;
1329
1321
  // Loop detection state: a stuck model can stream the SAME N-char
1330
1322
  // window of text 50+ times in a single API call (observed in the
1331
1323
  // wild with openrouter/owl-alpha emitting tool-call JSON as text).
@@ -1446,16 +1438,22 @@ export async function runQuery(ctx) {
1446
1438
  // line and then announce every subsequent token as "after the
1447
1439
  // waiting line", which is noisier than helpful).
1448
1440
  let firstTokenSeen = false;
1441
+ let firstTokenLatencyMs = null;
1449
1442
  // Note: the outer `isScreenReader` declared at the top of runQuery
1450
1443
  // (line ~340) is in scope here via closure — no need for a second
1451
1444
  // declaration. Previously this re-declared inside the while loop
1452
1445
  // and TypeScript tolerated it as a different block scope, but it
1453
1446
  // was confusing and the audit flagged it as bug-bait.
1454
1447
  //
1455
- // Live waiting indicator on the response line. It keeps elapsed time
1456
- // and the interrupt hint visible while still clearing itself before
1457
- // the first model event writes real output.
1458
- let workingIndicator = startWorkingIndicator(turnStart, isScreenReader, turns);
1448
+ // Live waiting indicator on the response line. It keeps motion and the
1449
+ // interrupt hint visible while still clearing itself before the first
1450
+ // model event writes real output.
1451
+ let workingIndicator = turns === 1 ? earlyWorkingIndicator : null;
1452
+ if (turns === 1)
1453
+ earlyWorkingIndicator = null;
1454
+ if (!workingIndicator) {
1455
+ workingIndicator = startWorkingIndicator(turnStart, isScreenReader, turns);
1456
+ }
1459
1457
  // Slow-model warning and first-token watchdog. The warning is a
1460
1458
  // UX hint; the watchdog is the hard recovery path for providers
1461
1459
  // that accept a request but then never produce a stream event.
@@ -1482,7 +1480,6 @@ export async function runQuery(ctx) {
1482
1480
  const requestConfig = fastDirect
1483
1481
  ? { ...ctx.config, maxTokens: Math.min(ctx.config.maxTokens ?? 700, 700) }
1484
1482
  : ctx.config;
1485
- const requestTools = fastDirect ? [] : ALL_TOOLS;
1486
1483
  const stream = streamChat(requestConfig, apiMessages, requestTools, streamAbort.signal);
1487
1484
  const iterator = stream[Symbol.asyncIterator]();
1488
1485
  while (true) {
@@ -1529,6 +1526,7 @@ export async function runQuery(ctx) {
1529
1526
  // model warning timer; subsequent events are normal streaming.
1530
1527
  if (!firstTokenSeen) {
1531
1528
  firstTokenSeen = true;
1529
+ firstTokenLatencyMs = Date.now() - turnStart;
1532
1530
  clearTimeout(slowTimer);
1533
1531
  if (streamWaitTimer)
1534
1532
  clearTimeout(streamWaitTimer);
@@ -1573,7 +1571,12 @@ export async function runQuery(ctx) {
1573
1571
  else if (event.type === 'done') {
1574
1572
  if (event.usage) {
1575
1573
  const u = event.usage;
1576
- const { cost, warning } = trackUsage(ctx.sessionId, ctx.config.model, u.prompt, u.completion);
1574
+ const turnDurationMs = Date.now() - turnStart;
1575
+ const { cost, warning } = trackUsage(ctx.sessionId, ctx.config.model, u.prompt, u.completion, {
1576
+ provider: ctx.config.provider,
1577
+ firstTokenMs: firstTokenLatencyMs,
1578
+ durationMs: turnDurationMs,
1579
+ });
1577
1580
  chainStats.benchmarkUsageEvents.push({
1578
1581
  model: ctx.config.model,
1579
1582
  promptTokens: u.prompt,
@@ -1581,12 +1584,16 @@ export async function runQuery(ctx) {
1581
1584
  totalTokens: u.total || u.prompt + u.completion,
1582
1585
  estimatedCostUsd: cost,
1583
1586
  });
1584
- setFooterCost(cost, u.prompt, u.completion);
1587
+ usageRecorded = true;
1588
+ setFooterCost(cost, u.prompt, u.completion, {
1589
+ firstTokenMs: firstTokenLatencyMs,
1590
+ durationMs: turnDurationMs,
1591
+ });
1585
1592
  // Single newline separator if we just streamed text, then the
1586
1593
  // compact telemetry line.
1587
1594
  if (hasOutput && !lastCharWasNewline)
1588
1595
  process.stdout.write('\n');
1589
- printCost(u.prompt, u.completion, cost, warning, Date.now() - turnStart);
1596
+ printCost(u.prompt, u.completion, cost, warning, turnDurationMs);
1590
1597
  }
1591
1598
  try {
1592
1599
  streamAbort.abort();
@@ -1596,6 +1603,28 @@ export async function runQuery(ctx) {
1596
1603
  break;
1597
1604
  }
1598
1605
  }
1606
+ if (!usageRecorded && (hasOutput || (toolCalls && toolCalls.length > 0))) {
1607
+ const promptEstimate = Math.max(1, estimateTokens(apiMessages));
1608
+ const completionEstimate = Math.max(1, Math.ceil(((fullText || '') + (toolCalls ? JSON.stringify(toolCalls) : '')).length / 3.5));
1609
+ const turnDurationMs = Date.now() - turnStart;
1610
+ const { cost } = trackUsage(ctx.sessionId, ctx.config.model, promptEstimate, completionEstimate, {
1611
+ provider: ctx.config.provider,
1612
+ firstTokenMs: firstTokenLatencyMs,
1613
+ durationMs: turnDurationMs,
1614
+ });
1615
+ chainStats.benchmarkUsageEvents.push({
1616
+ model: ctx.config.model,
1617
+ promptTokens: promptEstimate,
1618
+ completionTokens: completionEstimate,
1619
+ totalTokens: promptEstimate + completionEstimate,
1620
+ estimatedCostUsd: cost,
1621
+ });
1622
+ setFooterCost(cost, promptEstimate, completionEstimate, {
1623
+ firstTokenMs: firstTokenLatencyMs,
1624
+ durationMs: turnDurationMs,
1625
+ });
1626
+ usageRecorded = true;
1627
+ }
1599
1628
  clearTimeout(slowTimer);
1600
1629
  if (streamWaitTimer)
1601
1630
  clearTimeout(streamWaitTimer);
@@ -1873,6 +1902,8 @@ export async function runQuery(ctx) {
1873
1902
  }
1874
1903
  // Chain ended; back to idle so F1 reports the correct state.
1875
1904
  setStatus({ state: 'idle' });
1905
+ if (isFooterActive())
1906
+ setFooterActivity('Ready', 0, null);
1876
1907
  // ── Voice: read the assistant's final response ────────────
1877
1908
  // Off the hot path — fire-and-forget so the next prompt appears
1878
1909
  // immediately. The playback runs in background; F2 pauses, F4 skips.
@@ -1972,6 +2003,8 @@ export async function runQuery(ctx) {
1972
2003
  }
1973
2004
  }
1974
2005
  finally {
2006
+ earlyWorkingIndicator?.stop();
2007
+ earlyWorkingIndicator = null;
1975
2008
  // Drain any queued user input typed during streaming. Stash on
1976
2009
  // globalThis for the REPL loop in index.ts to restore into the
1977
2010
  // next editable prompt. Enter typed mid-stream is preserved as
@@ -1985,6 +2018,8 @@ export async function runQuery(ctx) {
1985
2018
  // can't be aborted between turns by Shift+F5 (soft-cancel).
1986
2019
  globalThis.__turnAbortCtl = null;
1987
2020
  globalThis.__turnCancelCurrent = null;
2021
+ if (isFooterActive())
2022
+ setFooterActivity('Ready', 0, null);
1988
2023
  }
1989
2024
  }
1990
2025
  const TOOL_CALL_LOOP_THRESHOLD = 3;