npm - @link-assistant/agent - Versions diffs - 0.16.18 → 0.18.0 - Mend

@link-assistant/agent 0.16.18 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/package.json +1 -1
package/src/cli/argv.ts +54 -16
package/src/cli/continuous-mode.js +6 -2
package/src/cli/defaults.ts +18 -0
package/src/cli/model-config.js +87 -3
package/src/cli/run-options.js +163 -0
package/src/flag/flag.ts +13 -7
package/src/index.js +31 -150
package/src/provider/provider.ts +21 -16
package/src/session/compaction.ts +164 -5
package/src/session/message-v2.ts +32 -0
package/src/session/processor.ts +18 -0
package/src/session/prompt.ts +45 -2
package/src/session/summary.ts +121 -22
package/src/util/verbose-fetch.ts +5 -5

package/src/index.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { Server } from './server/server.ts';
 import { Instance } from './project/instance.ts';
 import { Log } from './util/log.ts';
 import { parseModelConfig } from './cli/model-config.js';
-import { DEFAULT_MODEL } from './cli/defaults.ts';
+import { buildRunOptions } from './cli/run-options.js';
 // Bus is used via createBusEventSubscription in event-handler.js
 import { Session } from './session/index.ts';
 import { SessionPrompt } from './session/prompt.ts';
@@ -278,7 +278,7 @@ async function runAgentMode(argv, request) {
     fn: async () => {
       // Parse model config inside Instance.provide context
       // This allows parseModelWithResolution to access the provider state
-      const { providerID, modelID } = await parseModelConfig(
+      const { providerID, modelID, compactionModel } = await parseModelConfig(
         argv,
         outputError,
         outputStatus
@@ -293,7 +293,8 @@ async function runAgentMode(argv, request) {
           modelID,
           systemMessage,
           appendSystemMessage,
-          jsonStandard
+          jsonStandard,
+          compactionModel
         );
       } else {
         // DIRECT MODE: Run everything in single process
@@ -304,7 +305,8 @@ async function runAgentMode(argv, request) {
           modelID,
           systemMessage,
           appendSystemMessage,
-          jsonStandard
+          jsonStandard,
+          compactionModel
         );
       }
     },
@@ -363,7 +365,7 @@ async function runContinuousAgentMode(argv) {
     fn: async () => {
       // Parse model config inside Instance.provide context
       // This allows parseModelWithResolution to access the provider state
-      const { providerID, modelID } = await parseModelConfig(
+      const { providerID, modelID, compactionModel } = await parseModelConfig(
         argv,
         outputError,
         outputStatus
@@ -377,7 +379,8 @@ async function runContinuousAgentMode(argv) {
           modelID,
           systemMessage,
           appendSystemMessage,
-          jsonStandard
+          jsonStandard,
+          compactionModel
         );
       } else {
         // DIRECT MODE: Run everything in single process
@@ -387,7 +390,8 @@ async function runContinuousAgentMode(argv) {
           modelID,
           systemMessage,
           appendSystemMessage,
-          jsonStandard
+          jsonStandard,
+          compactionModel
         );
       }
     },
@@ -409,7 +413,8 @@ async function runServerMode(
   modelID,
   systemMessage,
   appendSystemMessage,
-  jsonStandard
+  jsonStandard,
+  compactionModel
 ) {
   const compactJson = argv['compact-json'] === true;
@@ -475,6 +480,7 @@ async function runServerMode(
             providerID,
             modelID,
           },
+          compactionModel,
           system: systemMessage,
           appendSystem: appendSystemMessage,
         }),
@@ -508,7 +514,8 @@ async function runDirectMode(
   modelID,
   systemMessage,
   appendSystemMessage,
-  jsonStandard
+  jsonStandard,
+  compactionModel
 ) {
   const compactJson = argv['compact-json'] === true;
@@ -558,6 +565,7 @@ async function runDirectMode(
         providerID,
         modelID,
       },
+      compactionModel,
       system: systemMessage,
       appendSystem: appendSystemMessage,
     }).catch((error) => {
@@ -596,146 +604,7 @@ async function main() {
       .command({
         command: '$0',
         describe: 'Run agent in interactive or stdin mode (default)',
-        builder: (yargs) =>
-          yargs
-            .option('model', {
-              type: 'string',
-              description: 'Model to use in format providerID/modelID',
-              default: DEFAULT_MODEL,
-            })
-            .option('json-standard', {
-              type: 'string',
-              description:
-                'JSON output format standard: "opencode" (default) or "claude" (experimental)',
-              default: 'opencode',
-              choices: ['opencode', 'claude'],
-            })
-            .option('system-message', {
-              type: 'string',
-              description: 'Full override of the system message',
-            })
-            .option('system-message-file', {
-              type: 'string',
-              description: 'Full override of the system message from file',
-            })
-            .option('append-system-message', {
-              type: 'string',
-              description: 'Append to the default system message',
-            })
-            .option('append-system-message-file', {
-              type: 'string',
-              description: 'Append to the default system message from file',
-            })
-            .option('server', {
-              type: 'boolean',
-              description: 'Run in server mode (default)',
-              default: true,
-            })
-            .option('verbose', {
-              type: 'boolean',
-              description:
-                'Enable verbose mode to debug API requests (shows system prompt, token counts, etc.)',
-              default: false,
-            })
-            .option('dry-run', {
-              type: 'boolean',
-              description:
-                'Simulate operations without making actual API calls or package installations (useful for testing)',
-              default: false,
-            })
-            .option('use-existing-claude-oauth', {
-              type: 'boolean',
-              description:
-                'Use existing Claude OAuth credentials from ~/.claude/.credentials.json (from Claude Code CLI)',
-              default: false,
-            })
-            .option('prompt', {
-              alias: 'p',
-              type: 'string',
-              description:
-                'Prompt message to send directly (bypasses stdin reading)',
-            })
-            .option('disable-stdin', {
-              type: 'boolean',
-              description:
-                'Disable stdin streaming mode (requires --prompt or shows help)',
-              default: false,
-            })
-            .option('stdin-stream-timeout', {
-              type: 'number',
-              description:
-                'Optional timeout in milliseconds for stdin reading (default: no timeout)',
-            })
-            .option('auto-merge-queued-messages', {
-              type: 'boolean',
-              description:
-                'Enable auto-merging of rapidly arriving input lines into single messages (default: true)',
-              default: true,
-            })
-            .option('interactive', {
-              type: 'boolean',
-              description:
-                'Enable interactive mode to accept manual input as plain text strings (default: true). Use --no-interactive to only accept JSON input.',
-              default: true,
-            })
-            .option('always-accept-stdin', {
-              type: 'boolean',
-              description:
-                'Keep accepting stdin input even after the agent finishes work (default: true). Use --no-always-accept-stdin for single-message mode.',
-              default: true,
-            })
-            .option('compact-json', {
-              type: 'boolean',
-              description:
-                'Output compact JSON (single line) instead of pretty-printed JSON (default: false). Useful for program-to-program communication.',
-              default: false,
-            })
-            .option('resume', {
-              alias: 'r',
-              type: 'string',
-              description:
-                'Resume a specific session by ID. By default, forks the session with a new UUID. Use --no-fork to continue in the same session.',
-            })
-            .option('continue', {
-              alias: 'c',
-              type: 'boolean',
-              description:
-                'Continue the most recent session. By default, forks the session with a new UUID. Use --no-fork to continue in the same session.',
-              default: false,
-            })
-            .option('no-fork', {
-              type: 'boolean',
-              description:
-                'When used with --resume or --continue, continue in the same session without forking to a new UUID.',
-              default: false,
-            })
-            .option('generate-title', {
-              type: 'boolean',
-              description:
-                'Generate session titles using AI (default: false). Disabling saves tokens and prevents rate limit issues.',
-              default: false,
-            })
-            .option('retry-timeout', {
-              type: 'number',
-              description:
-                'Maximum total retry time in seconds for rate limit errors (default: 604800 = 7 days)',
-            })
-            .option('retry-on-rate-limits', {
-              type: 'boolean',
-              description:
-                'Retry AI completions API requests when rate limited (HTTP 429). Use --no-retry-on-rate-limits in integration tests to fail fast instead of waiting.',
-              default: true,
-            })
-            .option('output-response-model', {
-              type: 'boolean',
-              description: 'Include model info in step_finish output',
-              default: true,
-            })
-            .option('summarize-session', {
-              type: 'boolean',
-              description: 'Generate AI session summaries',
-              default: false,
-            }),
+        builder: buildRunOptions,
         handler: async (argv) => {
           // Check both CLI flag and environment variable for compact JSON mode
           const compactJson =
@@ -917,7 +786,10 @@ async function main() {
         if (argv['output-response-model'] === false) {
           Flag.setOutputResponseModel(false);
         }
-        if (argv['summarize-session'] === true) {
+        // summarize-session is enabled by default, only set if explicitly disabled
+        if (argv['summarize-session'] === false) {
+          Flag.setSummarizeSession(false);
+        } else {
           Flag.setSummarizeSession(true);
         }
         // retry-on-rate-limits is enabled by default, only set if explicitly disabled
@@ -929,6 +801,15 @@ async function main() {
           level: Flag.OPENCODE_VERBOSE ? 'DEBUG' : 'INFO',
           compactJson: isCompact,
         });
+        // Monkey-patch globalThis.fetch for raw HTTP logging in --verbose mode.
+        // Catches ALL HTTP calls regardless of AI SDK fetch passthrough. (#217)
+        if (!globalThis.__agentVerboseFetchInstalled) {
+          globalThis.fetch = createVerboseFetch(globalThis.fetch, {
+            caller: 'global',
+          });
+          globalThis.__agentVerboseFetchInstalled = true;
+        }
       })
       .fail((msg, err, yargs) => {
         // Handle errors from command handlers

package/src/provider/provider.ts CHANGED Viewed

@@ -1201,25 +1201,23 @@ export namespace Provider {
         sessionID: provider.id,
       });
-      // Wrap fetch with verbose HTTP logging for debugging provider issues.
-      // IMPORTANT: The verbose check is done at call time (not SDK creation time)
-      // because the SDK is cached and Flag.OPENCODE_VERBOSE may change after creation.
-      // When verbose is disabled, the wrapper is a no-op passthrough with negligible overhead.
-      // See: https://github.com/link-assistant/agent/issues/200
-      // See: https://github.com/link-assistant/agent/issues/206
+      // Verbose HTTP logging is handled by the global fetch monkey-patch
+      // (installed in CLI middleware in index.js). The global patch catches ALL
+      // HTTP calls reliably, regardless of how the AI SDK passes fetch internally.
+      // This provider-level wrapper is kept as a fallback for environments where
+      // the global patch may not be installed (e.g., programmatic use).
+      // See: https://github.com/link-assistant/agent/issues/217
       // See: https://github.com/link-assistant/agent/issues/215
       {
         const innerFetch = options['fetch'];
         let verboseWrapperConfirmed = false;
         let httpCallCount = 0;
-        // Log at SDK creation time that the fetch wrapper is installed.
-        // This runs once per provider SDK creation (not per request).
-        // If verbose is off at creation time, the per-request check still applies.
-        // See: https://github.com/link-assistant/agent/issues/215
-        log.info('verbose HTTP fetch wrapper installed', {
+        log.info('provider SDK fetch chain configured', {
           providerID: provider.id,
           pkg,
+          globalVerboseFetchInstalled:
+            !!globalThis.__agentVerboseFetchInstalled,
           verboseAtCreation: Flag.OPENCODE_VERBOSE,
         });
@@ -1227,8 +1225,15 @@ export namespace Provider {
           input: RequestInfo | URL,
           init?: RequestInit
         ): Promise<Response> => {
-          // Check verbose flag at call time — not at SDK creation time
-          if (!Flag.OPENCODE_VERBOSE) {
+          // Check verbose flag at call time — not at SDK creation time.
+          // When the global fetch monkey-patch is installed, it handles verbose
+          // logging for all calls. The provider wrapper is a fallback for
+          // environments without the global patch.
+          // See: https://github.com/link-assistant/agent/issues/217
+          if (
+            !Flag.OPENCODE_VERBOSE ||
+            globalThis.__agentVerboseFetchInstalled
+          ) {
             return innerFetch(input, init);
           }
@@ -1301,8 +1306,8 @@ export namespace Provider {
                     : undefined;
               if (bodyStr && typeof bodyStr === 'string') {
                 bodyPreview =
-                  bodyStr.length > 2000
-                    ? bodyStr.slice(0, 2000) +
+                  bodyStr.length > 200000
+                    ? bodyStr.slice(0, 200000) +
                       `... [truncated, total ${bodyStr.length} chars]`
                     : bodyStr;
               }
@@ -1362,7 +1367,7 @@ export namespace Provider {
             // still receives the full stream while we asynchronously log a preview.
             // For non-streaming responses, buffer the body and reconstruct the Response.
             // See: https://github.com/link-assistant/agent/issues/204
-            const responseBodyMaxChars = 4000;
+            const responseBodyMaxChars = 200000;
             const contentType = response.headers.get('content-type') ?? '';
             const isStreaming =
               contentType.includes('event-stream') ||

package/src/session/compaction.ts CHANGED Viewed

@@ -28,20 +28,149 @@ export namespace SessionCompaction {
     ),
   };
+  /**
+   * Default safety margin ratio for compaction trigger.
+   * We trigger compaction at 85% of usable context to avoid hitting hard limits.
+   * This means we stop 15% before (context - output) tokens.
+   * @see https://github.com/link-assistant/agent/issues/217
+   */
+  export const OVERFLOW_SAFETY_MARGIN = 0.85;
+  /**
+   * Compaction model configuration passed from CLI.
+   * @see https://github.com/link-assistant/agent/issues/219
+   */
+  export interface CompactionModelConfig {
+    providerID: string;
+    modelID: string;
+    useSameModel: boolean;
+    compactionSafetyMarginPercent: number;
+  }
+  /**
+   * Compute the effective safety margin ratio.
+   *
+   * When the compaction model has a larger context window than the base model,
+   * the entire base model context can be used (ratio = 1.0, i.e. 0% margin),
+   * because the compaction model can ingest all of it.
+   *
+   * When the compaction model has equal or smaller context, the configured
+   * safety margin applies (default 15% → ratio 0.85).
+   *
+   * @see https://github.com/link-assistant/agent/issues/219
+   */
+  export function computeSafetyMarginRatio(input: {
+    baseModelContextLimit: number;
+    compactionModel?: CompactionModelConfig;
+    compactionModelContextLimit?: number;
+  }): number {
+    const compactionModelConfig = input.compactionModel;
+    if (!compactionModelConfig) return OVERFLOW_SAFETY_MARGIN;
+    const compactionSafetyMarginPercent =
+      compactionModelConfig.compactionSafetyMarginPercent;
+    const configuredRatio = 1 - compactionSafetyMarginPercent / 100;
+    // When using the same model, always apply the configured safety margin
+    if (compactionModelConfig.useSameModel) return configuredRatio;
+    // When compaction model has a larger context, no safety margin needed
+    const compactionContextLimit = input.compactionModelContextLimit ?? 0;
+    if (
+      compactionContextLimit > 0 &&
+      compactionContextLimit > input.baseModelContextLimit
+    ) {
+      log.info(() => ({
+        message:
+          'compaction model has larger context — using full base model context',
+        baseModelContextLimit: input.baseModelContextLimit,
+        compactionModelContextLimit: compactionContextLimit,
+      }));
+      return 1.0;
+    }
+    return configuredRatio;
+  }
   export function isOverflow(input: {
     tokens: MessageV2.Assistant['tokens'];
     model: ModelsDev.Model;
+    compactionModel?: CompactionModelConfig;
+    compactionModelContextLimit?: number;
   }) {
     if (Flag.OPENCODE_DISABLE_AUTOCOMPACT) return false;
-    const context = input.model.limit.context;
-    if (context === 0) return false;
+    const baseModelContextLimit = input.model.limit.context;
+    if (baseModelContextLimit === 0) return false;
     const count =
       input.tokens.input + input.tokens.cache.read + input.tokens.output;
-    const output =
+    const outputTokenLimit =
       Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) ||
       SessionPrompt.OUTPUT_TOKEN_MAX;
-    const usable = context - output;
-    return count > usable;
+    const usableContextWindow = baseModelContextLimit - outputTokenLimit;
+    const safetyMarginRatio = computeSafetyMarginRatio({
+      baseModelContextLimit,
+      compactionModel: input.compactionModel,
+      compactionModelContextLimit: input.compactionModelContextLimit,
+    });
+    const safeLimit = Math.floor(usableContextWindow * safetyMarginRatio);
+    const overflow = count > safeLimit;
+    log.info(() => ({
+      message: 'overflow check',
+      modelID: input.model.id,
+      contextLimit: baseModelContextLimit,
+      outputLimit: outputTokenLimit,
+      usableContextWindow,
+      safeLimit,
+      safetyMarginRatio,
+      compactionModelID: input.compactionModel?.modelID,
+      compactionModelContextLimit: input.compactionModelContextLimit,
+      currentTokens: count,
+      tokensBreakdown: {
+        input: input.tokens.input,
+        cacheRead: input.tokens.cache.read,
+        output: input.tokens.output,
+      },
+      overflow,
+      headroom: safeLimit - count,
+    }));
+    return overflow;
+  }
+  /**
+   * Compute context diagnostics for a given model and token usage.
+   * Used in step-finish parts to show context usage in JSON output.
+   * @see https://github.com/link-assistant/agent/issues/217
+   */
+  export function contextDiagnostics(input: {
+    tokens: { input: number; output: number; cache: { read: number } };
+    model: ModelsDev.Model;
+    compactionModel?: CompactionModelConfig;
+    compactionModelContextLimit?: number;
+  }): MessageV2.ContextDiagnostics | undefined {
+    const contextLimit = input.model.limit.context;
+    if (contextLimit === 0) return undefined;
+    const outputLimit =
+      Math.min(input.model.limit.output, SessionPrompt.OUTPUT_TOKEN_MAX) ||
+      SessionPrompt.OUTPUT_TOKEN_MAX;
+    const usableContext = contextLimit - outputLimit;
+    const safetyMarginRatio = computeSafetyMarginRatio({
+      baseModelContextLimit: contextLimit,
+      compactionModel: input.compactionModel,
+      compactionModelContextLimit: input.compactionModelContextLimit,
+    });
+    const safeLimit = Math.floor(usableContext * safetyMarginRatio);
+    const currentTokens =
+      input.tokens.input + input.tokens.cache.read + input.tokens.output;
+    return {
+      contextLimit,
+      outputLimit,
+      usableContext,
+      safeLimit,
+      safetyMargin: safetyMarginRatio,
+      currentTokens,
+      headroom: safeLimit - currentTokens,
+      overflow: currentTokens > safeLimit,
+    };
   }
   export const PRUNE_MINIMUM = 20_000;
@@ -100,10 +229,27 @@ export namespace SessionCompaction {
     };
     abort: AbortSignal;
   }) {
+    log.info(() => ({
+      message: 'compaction process starting',
+      providerID: input.model.providerID,
+      modelID: input.model.modelID,
+      messageCount: input.messages.length,
+      sessionID: input.sessionID,
+    }));
     const model = await Provider.getModel(
       input.model.providerID,
       input.model.modelID
     );
+    if (Flag.OPENCODE_VERBOSE) {
+      log.info(() => ({
+        message: 'compaction model loaded',
+        providerID: model.providerID,
+        modelID: model.modelID,
+        npm: model.npm,
+        contextLimit: model.info.limit.context,
+        outputLimit: model.info.limit.output,
+      }));
+    }
     const system = [...SystemPrompt.summarize(model.providerID)];
     const msg = (await Session.updateMessage({
       id: Identifier.ascending('message'),
@@ -156,6 +302,19 @@ export namespace SessionCompaction {
     );
     // Defensive check: ensure modelMessages is iterable (AI SDK 6.0.1 compatibility fix)
     const safeModelMessages = Array.isArray(modelMessages) ? modelMessages : [];
+    if (Flag.OPENCODE_VERBOSE) {
+      log.info(() => ({
+        message: 'compaction streamText call',
+        providerID: model.providerID,
+        modelID: model.modelID,
+        systemPromptCount: system.length,
+        modelMessageCount: safeModelMessages.length,
+        filteredMessageCount: input.messages.length - safeModelMessages.length,
+        toolCall: model.info.tool_call,
+      }));
+    }
     const result = await processor.process(() =>
       streamText({
         onError(error) {

package/src/session/message-v2.ts CHANGED Viewed

@@ -240,6 +240,27 @@ export namespace MessageV2 {
     });
   export type ModelInfo = z.infer<typeof ModelInfo>;
+  /**
+   * Context diagnostic info for step-finish parts.
+   * Shows model context limits and current usage to help debug compaction decisions.
+   * @see https://github.com/link-assistant/agent/issues/217
+   */
+  export const ContextDiagnostics = z
+    .object({
+      contextLimit: z.number(),
+      outputLimit: z.number(),
+      usableContext: z.number(),
+      safeLimit: z.number(),
+      safetyMargin: z.number(),
+      currentTokens: z.number(),
+      headroom: z.number(),
+      overflow: z.boolean(),
+    })
+    .meta({
+      ref: 'ContextDiagnostics',
+    });
+  export type ContextDiagnostics = z.infer<typeof ContextDiagnostics>;
   export const StepFinishPart = PartBase.extend({
     type: z.literal('step-finish'),
     reason: z.string(),
@@ -257,6 +278,9 @@ export namespace MessageV2 {
     // Model info included when --output-response-model is enabled
     // @see https://github.com/link-assistant/agent/issues/179
     model: ModelInfo.optional(),
+    // Context diagnostics for debugging compaction decisions
+    // @see https://github.com/link-assistant/agent/issues/217
+    context: ContextDiagnostics.optional(),
   }).meta({
     ref: 'StepFinishPart',
   });
@@ -368,6 +392,14 @@ export namespace MessageV2 {
       providerID: z.string(),
       modelID: z.string(),
     }),
+    compactionModel: z
+      .object({
+        providerID: z.string(),
+        modelID: z.string(),
+        useSameModel: z.boolean(),
+        compactionSafetyMarginPercent: z.number(),
+      })
+      .optional(),
     system: z.string().optional(),
     appendSystem: z.string().optional(),
     tools: z.record(z.string(), z.boolean()).optional(),

package/src/session/processor.ts CHANGED Viewed

@@ -17,6 +17,7 @@ import { Bus } from '../bus';
 import { SessionRetry } from './retry';
 import { SessionStatus } from './status';
 import { Flag } from '../flag/flag';
+import { SessionCompaction } from './compaction';
 export namespace SessionProcessor {
   const DOOM_LOOP_THRESHOLD = 3;
@@ -366,6 +367,22 @@ export namespace SessionProcessor {
                         }
                       : undefined;
+                  // Compute context diagnostics for JSON output
+                  // @see https://github.com/link-assistant/agent/issues/217
+                  const contextDiag = SessionCompaction.contextDiagnostics({
+                    tokens: usage.tokens,
+                    model: input.model,
+                  });
+                  if (Flag.OPENCODE_VERBOSE && contextDiag) {
+                    log.info(() => ({
+                      message: 'step-finish context diagnostics',
+                      providerID: input.providerID,
+                      modelID: input.model.id,
+                      ...contextDiag,
+                    }));
+                  }
                   await Session.updatePart({
                     id: Identifier.ascending('part'),
                     reason: finishReason,
@@ -376,6 +393,7 @@ export namespace SessionProcessor {
                     tokens: usage.tokens,
                     cost: usage.cost,
                     model: modelInfo,
+                    context: contextDiag,
                   });
                   await Session.updateMessage(input.assistantMessage);
                   if (snapshot) {