npm - cawdex - Versions diffs - 1.35.74 → 1.35.76 - Mend

cawdex 1.35.74 → 1.35.76

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

package/README.md +5 -5
package/bin/anycode.js +2 -2
package/bin/cawdex.js +408 -408
package/bin/ecc-hooks.cjs +11 -11
package/dist/agents-md.d.ts +31 -0
package/dist/agents-md.js +340 -0
package/dist/agents-md.js.map +1 -0
package/dist/agents.js +1424 -1424
package/dist/api.d.ts +1 -0
package/dist/api.js +19 -14
package/dist/api.js.map +1 -1
package/dist/autonomous-loops.js +287 -287
package/dist/benchmark-repos.d.ts +31 -0
package/dist/benchmark-repos.js +234 -8
package/dist/benchmark-repos.js.map +1 -1
package/dist/command-palette.js +4 -2
package/dist/command-palette.js.map +1 -1
package/dist/compaction.js +8 -8
package/dist/config.js +51 -36
package/dist/config.js.map +1 -1
package/dist/content-engine.js +543 -543
package/dist/context-brief.d.ts +4 -0
package/dist/context-brief.js +230 -0
package/dist/context-brief.js.map +1 -0
package/dist/cost-tracker.d.ts +33 -14
package/dist/cost-tracker.js +81 -19
package/dist/cost-tracker.js.map +1 -1
package/dist/coverage.js +39 -39
package/dist/docs-sync.js +98 -98
package/dist/evaluation.js +452 -452
package/dist/fixed-footer.d.ts +7 -1
package/dist/fixed-footer.js +92 -18
package/dist/fixed-footer.js.map +1 -1
package/dist/git-workflow.js +49 -49
package/dist/index.d.ts +2 -0
package/dist/index.js +197 -65
package/dist/index.js.map +1 -1
package/dist/instant-artifact.d.ts +6 -0
package/dist/instant-artifact.js +397 -0
package/dist/instant-artifact.js.map +1 -0
package/dist/live-queue.js +1 -1
package/dist/live-queue.js.map +1 -1
package/dist/model-aliases.d.ts +37 -0
package/dist/model-aliases.js +203 -0
package/dist/model-aliases.js.map +1 -0
package/dist/orchestration.js +15 -15
package/dist/permissions.d.ts +6 -0
package/dist/permissions.js +53 -0
package/dist/permissions.js.map +1 -1
package/dist/pm2-manager.js +26 -26
package/dist/query.d.ts +0 -1
package/dist/query.js +74 -39
package/dist/query.js.map +1 -1
package/dist/refactor.js +87 -87
package/dist/repo-command.js +7 -1
package/dist/repo-command.js.map +1 -1
package/dist/search-first.js +92 -92
package/dist/skill-create.js +100 -100
package/dist/stitch.js +1 -1
package/dist/system-prompt.d.ts +2 -1
package/dist/system-prompt.js +10 -5
package/dist/system-prompt.js.map +1 -1
package/dist/tools/github-repo-digest.d.ts +1 -1
package/dist/tools/github-repo-digest.js +38 -6
package/dist/tools/github-repo-digest.js.map +1 -1
package/dist/types.d.ts +3 -0
package/dist/types.js.map +1 -1
package/dist/verification.js +55 -55
package/package.json +1 -1
package/resources/__init__.py +1 -1
package/resources/exgentic/cawdex_agent/README.md +114 -114
package/resources/exgentic/cawdex_agent/__init__.py +5 -5
package/resources/exgentic/cawdex_agent/agent.py +605 -605
package/resources/exgentic/cawdex_agent/requirements.txt +2 -2
package/resources/exgentic/cawdex_agent/setup.sh +21 -21
package/resources/exgentic/cawdex_agent/utils.py +1061 -1061
package/resources/hal/cawdex_agent/README.md +24 -24
package/resources/hal/cawdex_agent/__init__.py +1 -1
package/resources/hal/cawdex_agent/main.py +550 -550
package/resources/hal/cawdex_agent/requirements.txt +2 -2
package/resources/kbench/cawdex_agent/README.md +107 -107
package/resources/kbench/cawdex_agent/adapter.manifest.json +19 -19
package/resources/kbench/cawdex_agent/runner.mjs +753 -753
package/resources/open_agent_leaderboard/cawdex-agent-card.md +119 -119
package/resources/terminal_bench/__init__.py +1 -1
package/resources/terminal_bench/cawdex_agent.py +174 -174
package/resources/terminal_bench/setup.sh +121 -121

package/dist/query.js CHANGED Viewed

@@ -9,7 +9,7 @@ import { buildSystemPrompt } from './system-prompt.js';
 import { runHooks } from './hooks.js';
 import { scanToolCall, printSecurityWarning } from './security.js';
 import { trackUsage } from './cost-tracker.js';
-import { shouldCompact, compactMessages, quickCompact, buildCompactionConfig, contextCapTokens, enforceContextCap, inferContextWindowTokens, } from './compaction.js';
+import { shouldCompact, compactMessages, quickCompact, estimateTokens, buildCompactionConfig, contextCapTokens, enforceContextCap, inferContextWindowTokens, } from './compaction.js';
 import { assistantTranscriptPrefix, theme, sym, printToolRun, printToolResult, printThinkingOpen, printThinkingText, printThinkingClose, printCost, printApiError, formatDuration, categorizeApiError } from './theme.js';
 import { isVoiceEnabled, getTtsConfig, getAccessibilityConfig, speakAssistantResponse, speak, speakUserEcho, } from './voice.js';
 import { isLikelyDestructive, describeDestructive, countWords, summarize } from './accessibility.js';
@@ -20,6 +20,7 @@ import * as liveQueue from './live-queue.js';
 import { isFooterActive, setFooterActivity, setFooterCost, writeScrollableLine } from './fixed-footer.js';
 import { applyQueuedInputChunk, drainQueuedInputBytes, queuedInputBytesToText } from './prompt-buffer.js';
 import { emit as dbgEmit } from './debug.js';
+import { applyAgentToolInstructions } from './agents-md.js';
 import { buildBenchmarkCompletionReminder, buildBenchmarkTrajectorySystemBlock, makeBenchmarkInvalidToolActionEvent, makeBenchmarkTraceEvent, writeBenchmarkTrace, } from './benchmark-trace.js';
 import { buildTodoStateBlock } from './tools/todo.js';
 import { buildRuntimeInfoBlock } from './runtime-info.js';
@@ -30,11 +31,11 @@ import { archiveLargeToolOutput } from './tool-output-archive.js';
 // not persisted — restart, see hint again. Keyed by sessionId so different
 // sessions get fresh hints.
 const _thinkingHintShownForSession = new Set();
-const INTERACTIVE_FIRST_TOKEN_TIMEOUT_MS = 12_000;
+const INTERACTIVE_FIRST_TOKEN_TIMEOUT_MS = 8_000;
 const INTERACTIVE_FLAKY_FIRST_TOKEN_TIMEOUT_MS = 6_000;
 const NON_INTERACTIVE_FIRST_TOKEN_TIMEOUT_MS = 60_000;
 const NON_INTERACTIVE_FLAKY_FIRST_TOKEN_TIMEOUT_MS = 20_000;
-const FAST_DIRECT_FIRST_TOKEN_TIMEOUT_MS = 8_000;
+const FAST_DIRECT_FIRST_TOKEN_TIMEOUT_MS = 5_000;
 const INTERACTIVE_STREAM_IDLE_TIMEOUT_MS = 45_000;
 const NON_INTERACTIVE_STREAM_IDLE_TIMEOUT_MS = 120_000;
 const FAST_DIRECT_STREAM_IDLE_TIMEOUT_MS = 20_000;
@@ -84,13 +85,6 @@ function fallbackModelForTurn(config, usedFallbackModel) {
         return null;
     return fallback;
 }
-export function fallbackModelForKnownFlakyTurn(config, usedFallbackModel = false) {
-    if (process.env.CAWDEX_ALLOW_FLAKY_MODELS === '1')
-        return null;
-    if (!isKnownFlakyOpenRouterModel(config))
-        return null;
-    return fallbackModelForTurn(config, usedFallbackModel);
-}
 export function isTurnCancelKeySequence(chunk) {
     const seq = chunk.toString('utf8');
     return (seq === '\x1b' ||
@@ -154,9 +148,10 @@ function printInteractiveTurnAccepted(config) {
     }
 }
 export function formatWorkingIndicatorFrame(elapsedMs, frameIndex = 0, message = 'Working') {
-    const frames = ['\u25e6', '\u25c7', '\u25c6', '\u25c7'];
+    void elapsedMs;
+    const frames = ['\u25dc', '\u25dd', '\u25de', '\u25df'];
     const frame = frames[Math.abs(frameIndex) % frames.length];
-    return `  ${frame} ${message} (${formatDuration(elapsedMs)} \u2022 esc to interrupt)`;
+    return `  ${frame} ${message} \u2022 Esc/F5 to interrupt`;
 }
 function startWorkingIndicator(startedAtMs, screenReader, turn = 0) {
     if (screenReader)
@@ -166,11 +161,11 @@ function startWorkingIndicator(startedAtMs, screenReader, turn = 0) {
     if (!process.stdout.isTTY)
         return null;
     const messages = [
-        'Working hard on your request',
-        'Sumi ink settling',
-        'Edo lanterns lit',
-        'Kamon crest aligned',
-        'Neon alley quiet',
+        'Sumi ink moving',
+        'Edo lanterns cycling',
+        'Kamon crest pulsing',
+        'Neon shoji breathing',
+        'Signal blade drawn',
     ];
     let frame = 0;
     let stopped = false;
@@ -1129,18 +1124,6 @@ export async function runQuery(ctx) {
     // user's configured fallbackModel. After we use it, this latches so we
     // don't bounce back and forth between failing models in a single chain.
     let usedFallbackModel = false;
-    const immediateFallback = fallbackModelForKnownFlakyTurn(ctx.config, usedFallbackModel);
-    if (immediateFallback) {
-        usedFallbackModel = true;
-        const failedModel = ctx.config.model;
-        ctx.config.model = immediateFallback;
-        resetClient();
-        console.log(theme.warning(`  ${sym.warn} ${failedModel} is known to stall in interactive OpenRouter sessions; switching this turn to ${immediateFallback}.`));
-        console.log(theme.dim('    Override only if you really want it: CAWDEX_ALLOW_FLAKY_MODELS=1'));
-    }
-    if (!chainFastDirect) {
-        printInteractiveTurnAccepted(ctx.config);
-    }
     // Tracks whether ANY reasoning tokens arrived across the entire chain.
     // Used at chain-end to print a one-time "/thinking is ON but this model
     // doesn't emit reasoning" hint. Hoisted to chain scope (not per-turn)
@@ -1210,8 +1193,11 @@ export async function runQuery(ctx) {
     // as new text, drowning the actual response).
     const isScreenReader = ctx.config.voice?.accessibility?.screenReader === true;
     const inputGuard = startInputSuppression(isScreenReader);
+    let earlyWorkingIndicator = null;
     try {
         if (!chainFastDirect) {
+            printInteractiveTurnAccepted(ctx.config);
+            earlyWorkingIndicator = startWorkingIndicator(chainStart, isScreenReader, 0);
             // Turn-boundary collapse runs BEFORE compaction. Every completed prior
             // turn becomes [user, "<final text>\n[Completed: used X, Y]"] — the
             // model no longer sees stale tool_calls that it might mistake for
@@ -1263,9 +1249,12 @@ export async function runQuery(ctx) {
             if (!fastDirect) {
                 replaceMessagesInPlace(ctx.messages, quickCompact(ctx.messages));
             }
+            const requestTools = fastDirect
+                ? []
+                : applyAgentToolInstructions(ALL_TOOLS, ctx.cwd, ctx.config.model);
             const systemPrompt = fastDirect
                 ? FAST_DIRECT_SYSTEM_PROMPT
-                : buildSystemPrompt(ctx.config, ctx.cwd, ctx.mode, userQuery);
+                : buildSystemPrompt(ctx.config, ctx.cwd, ctx.mode, userQuery, requestTools);
             let visibleMessages = fastDirect
                 ? (userQuery ? [{ role: 'user', content: userQuery }] : [])
                 : maskOldToolResults(ctx.messages);
@@ -1292,7 +1281,9 @@ export async function runQuery(ctx) {
             // override.
             const stateBlock = fastDirect ? null : buildStateBlock(visibleMessages);
             const runtimeInfoBlock = fastDirect ? null : buildRuntimeInfoBlock(ctx.cwd);
-            const repoMapBlock = fastDirect ? null : buildAutoRepoMapBlock(ctx.cwd, userQuery);
+            const repoMapBlock = fastDirect || ctx.mode === 'design'
+                ? null
+                : buildAutoRepoMapBlock(ctx.cwd, userQuery);
             const globalPlanBlock = fastDirect ? null : buildGlobalPlanBlock(visibleMessages);
             const todoStateBlock = fastDirect ? null : buildTodoStateBlock(ctx.cwd);
             const benchmarkTrajectoryBlock = !fastDirect && ctx.mode === 'benchmark'
@@ -1326,6 +1317,7 @@ export async function runQuery(ctx) {
             let lastCharWasNewline = false; // collapse 3+ consecutive newlines down to 2
             let consecutiveNewlines = 0;
             const turnStart = Date.now();
+            let usageRecorded = false;
             // Loop detection state: a stuck model can stream the SAME N-char
             // window of text 50+ times in a single API call (observed in the
             // wild with openrouter/owl-alpha emitting tool-call JSON as text).
@@ -1446,16 +1438,22 @@ export async function runQuery(ctx) {
             // line and then announce every subsequent token as "after the
             // waiting line", which is noisier than helpful).
             let firstTokenSeen = false;
+            let firstTokenLatencyMs = null;
             // Note: the outer `isScreenReader` declared at the top of runQuery
             // (line ~340) is in scope here via closure — no need for a second
             // declaration. Previously this re-declared inside the while loop
             // and TypeScript tolerated it as a different block scope, but it
             // was confusing and the audit flagged it as bug-bait.
             //
-            // Live waiting indicator on the response line. It keeps elapsed time
-            // and the interrupt hint visible while still clearing itself before
-            // the first model event writes real output.
-            let workingIndicator = startWorkingIndicator(turnStart, isScreenReader, turns);
+            // Live waiting indicator on the response line. It keeps motion and the
+            // interrupt hint visible while still clearing itself before the first
+            // model event writes real output.
+            let workingIndicator = turns === 1 ? earlyWorkingIndicator : null;
+            if (turns === 1)
+                earlyWorkingIndicator = null;
+            if (!workingIndicator) {
+                workingIndicator = startWorkingIndicator(turnStart, isScreenReader, turns);
+            }
             // Slow-model warning and first-token watchdog. The warning is a
             // UX hint; the watchdog is the hard recovery path for providers
             // that accept a request but then never produce a stream event.
@@ -1482,7 +1480,6 @@ export async function runQuery(ctx) {
                 const requestConfig = fastDirect
                     ? { ...ctx.config, maxTokens: Math.min(ctx.config.maxTokens ?? 700, 700) }
                     : ctx.config;
-                const requestTools = fastDirect ? [] : ALL_TOOLS;
                 const stream = streamChat(requestConfig, apiMessages, requestTools, streamAbort.signal);
                 const iterator = stream[Symbol.asyncIterator]();
                 while (true) {
@@ -1529,6 +1526,7 @@ export async function runQuery(ctx) {
                     // model warning timer; subsequent events are normal streaming.
                     if (!firstTokenSeen) {
                         firstTokenSeen = true;
+                        firstTokenLatencyMs = Date.now() - turnStart;
                         clearTimeout(slowTimer);
                         if (streamWaitTimer)
                             clearTimeout(streamWaitTimer);
@@ -1573,7 +1571,12 @@ export async function runQuery(ctx) {
                     else if (event.type === 'done') {
                         if (event.usage) {
                             const u = event.usage;
-                            const { cost, warning } = trackUsage(ctx.sessionId, ctx.config.model, u.prompt, u.completion);
+                            const turnDurationMs = Date.now() - turnStart;
+                            const { cost, warning } = trackUsage(ctx.sessionId, ctx.config.model, u.prompt, u.completion, {
+                                provider: ctx.config.provider,
+                                firstTokenMs: firstTokenLatencyMs,
+                                durationMs: turnDurationMs,
+                            });
                             chainStats.benchmarkUsageEvents.push({
                                 model: ctx.config.model,
                                 promptTokens: u.prompt,
@@ -1581,12 +1584,16 @@ export async function runQuery(ctx) {
                                 totalTokens: u.total || u.prompt + u.completion,
                                 estimatedCostUsd: cost,
                             });
-                            setFooterCost(cost, u.prompt, u.completion);
+                            usageRecorded = true;
+                            setFooterCost(cost, u.prompt, u.completion, {
+                                firstTokenMs: firstTokenLatencyMs,
+                                durationMs: turnDurationMs,
+                            });
                             // Single newline separator if we just streamed text, then the
                             // compact telemetry line.
                             if (hasOutput && !lastCharWasNewline)
                                 process.stdout.write('\n');
-                            printCost(u.prompt, u.completion, cost, warning, Date.now() - turnStart);
+                            printCost(u.prompt, u.completion, cost, warning, turnDurationMs);
                         }
                         try {
                             streamAbort.abort();
@@ -1596,6 +1603,28 @@ export async function runQuery(ctx) {
                         break;
                     }
                 }
+                if (!usageRecorded && (hasOutput || (toolCalls && toolCalls.length > 0))) {
+                    const promptEstimate = Math.max(1, estimateTokens(apiMessages));
+                    const completionEstimate = Math.max(1, Math.ceil(((fullText || '') + (toolCalls ? JSON.stringify(toolCalls) : '')).length / 3.5));
+                    const turnDurationMs = Date.now() - turnStart;
+                    const { cost } = trackUsage(ctx.sessionId, ctx.config.model, promptEstimate, completionEstimate, {
+                        provider: ctx.config.provider,
+                        firstTokenMs: firstTokenLatencyMs,
+                        durationMs: turnDurationMs,
+                    });
+                    chainStats.benchmarkUsageEvents.push({
+                        model: ctx.config.model,
+                        promptTokens: promptEstimate,
+                        completionTokens: completionEstimate,
+                        totalTokens: promptEstimate + completionEstimate,
+                        estimatedCostUsd: cost,
+                    });
+                    setFooterCost(cost, promptEstimate, completionEstimate, {
+                        firstTokenMs: firstTokenLatencyMs,
+                        durationMs: turnDurationMs,
+                    });
+                    usageRecorded = true;
+                }
                 clearTimeout(slowTimer);
                 if (streamWaitTimer)
                     clearTimeout(streamWaitTimer);
@@ -1873,6 +1902,8 @@ export async function runQuery(ctx) {
         }
         // Chain ended; back to idle so F1 reports the correct state.
         setStatus({ state: 'idle' });
+        if (isFooterActive())
+            setFooterActivity('Ready', 0, null);
         // ── Voice: read the assistant's final response ────────────
         // Off the hot path — fire-and-forget so the next prompt appears
         // immediately. The playback runs in background; F2 pauses, F4 skips.
@@ -1972,6 +2003,8 @@ export async function runQuery(ctx) {
         }
     }
     finally {
+        earlyWorkingIndicator?.stop();
+        earlyWorkingIndicator = null;
         // Drain any queued user input typed during streaming. Stash on
         // globalThis for the REPL loop in index.ts to restore into the
         // next editable prompt. Enter typed mid-stream is preserved as
@@ -1985,6 +2018,8 @@ export async function runQuery(ctx) {
         // can't be aborted between turns by Shift+F5 (soft-cancel).
         globalThis.__turnAbortCtl = null;
         globalThis.__turnCancelCurrent = null;
+        if (isFooterActive())
+            setFooterActivity('Ready', 0, null);
     }
 }
 const TOOL_CALL_LOOP_THRESHOLD = 3;