npm - @ctxprotocol/sdk - Versions diffs - 0.8.3 → 0.8.5 - Mend

@ctxprotocol/sdk 0.8.3 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/dist/client/index.d.ts CHANGED Viewed

@@ -299,12 +299,14 @@ interface ExecutionResult<T = unknown> {
 }
 /** Supported orchestration depth modes for query execution. */
 type QueryDepth = "fast" | "auto" | "deep";
+type QueryDeepMode = "deep-light" | "deep-heavy";
 /**
  * Options for the agentic query endpoint (pay-per-response).
  *
  * Unlike `execute()` which calls a single tool once, `query()` sends a
- * natural-language question and lets the server handle tool discovery,
- * multi-tool orchestration, self-healing retries, and AI synthesis.
+ * natural-language question and lets the server handle discovery-first
+ * orchestration (`discover/probe -> plan-from-evidence -> execute ->
+ * bounded fallback`) plus synthesis.
  * One flat fee covers up to 100 MCP skill calls per tool.
  */
 interface QueryOptions {
@@ -331,6 +333,12 @@ interface QueryOptions {
      * Useful for large payload workflows where inline JSON is not ideal.
      */
     includeDataUrl?: boolean;
+    /**
+     * Include machine-readable developer trace output for this query response.
+     * When enabled, the server may return summary counters plus diagnostics
+     * for lane selection, scout probe adequacy, and bounded fallback behavior.
+     */
+    includeDeveloperTrace?: boolean;
     /**
      * Query orchestration depth mode:
      * - `fast`: lower-latency path
@@ -338,12 +346,168 @@ interface QueryOptions {
      * - `deep`: full completeness-oriented path
      */
     queryDepth?: QueryDepth;
+    /**
+     * Development/testing only: force the server's internal deep lane.
+     * Ignored by normal production usage and invalid when `queryDepth` is `fast`.
+     */
+    debugScoutDeepMode?: QueryDeepMode;
     /**
      * Optional idempotency key (UUID recommended).
      * Reuse the same key when retrying the same logical request.
      */
     idempotencyKey?: string;
 }
+/**
+ * Tool reference attached to developer trace timeline steps.
+ */
+interface QueryDeveloperTraceToolRef {
+    id?: string;
+    name?: string;
+    method?: string;
+    [key: string]: unknown;
+}
+/**
+ * Loop metadata attached to developer trace timeline steps.
+ */
+interface QueryDeveloperTraceLoopInfo {
+    name?: string;
+    iteration?: number;
+    maxIterations?: number;
+    [key: string]: unknown;
+}
+/**
+ * Tool selection metadata attached to discovery/planning diagnostics.
+ */
+interface QueryDeveloperTraceToolSelection {
+    toolId: string;
+    toolName: string;
+    selectedMethodCount: number;
+    selectedMethods: string[];
+    omittedSelectedMethodCount: number;
+    priceUsd?: string;
+}
+/**
+ * Initial planner diagnostic details.
+ */
+interface QueryPlanningTraceDiagnostic {
+    plannerQuery: string;
+    scoutEvidenceAttached: boolean;
+    scoutEvidencePromptBlock: string | null;
+    allowedModules: string[];
+}
+/**
+ * Rediscovery/fallback diagnostic details.
+ */
+interface QueryRediscoveryTraceDiagnostic {
+    considered: boolean;
+    executed: boolean;
+    skipReason: string | null;
+    missingCapability: string | null;
+    rediscoveryQuery: string | null;
+    capabilityLooksLikeSearchNeed: boolean;
+    allowSearchFallbackOnElapsedCap: boolean;
+    searchFallbackUsed: boolean;
+    preRediscoveryBudgetReasonCode: string | null;
+    candidateSearchResults: QueryDeveloperTraceToolSelection[];
+    selectedAlternatives: QueryDeveloperTraceToolSelection[];
+    mergedTools: QueryDeveloperTraceToolSelection[];
+    usingPaidFallback: boolean;
+    branchPlan: QueryPlanningTraceDiagnostic | null;
+}
+/**
+ * Rich developer-trace diagnostics for discovery-first orchestration internals.
+ */
+interface QueryDeveloperTraceDiagnostics {
+    selection: {
+        selectedDepth: string;
+        deepMode: string | null;
+        debugScoutDeepMode: string | null;
+        plannerReasoningStage: string;
+        scoutEnabled: boolean;
+        preserveFastOneShot: boolean;
+        candidateMethodCount: number;
+        scoutProbeStatus: string;
+        scoutProbeAdequacy: string;
+        scoutProbeConfidence: number;
+        scoutMetadataConfidence: number;
+        scoutProbeShortlistedMethodCount: number;
+        scoutProbeMissingCapability: string | null;
+        scoutPrePlanProbeCalls: number;
+        scoutPrePlanProbeBudgetReasonCode: string | null;
+        scoutChangedInitialPlan: boolean;
+        scoutChangedPlannerReasoningStage: boolean;
+        scoutInitialSelectedDepth: string;
+        scoutInitialDeepMode: string | null;
+        scoutInitialPlannerReasoningStage: string;
+        scoutInitialReasonCode: string;
+        scoutFinalReasonCode: string;
+        scoutEvidenceAttachedToPlanning: boolean;
+        scoutLlmSelectionUsed: boolean;
+        scoutLlmSelectionFallback: boolean;
+        scoutLlmSelectionLatencyMs: number | null;
+        selectedTools: QueryDeveloperTraceToolSelection[];
+    };
+    planning: {
+        initial: QueryPlanningTraceDiagnostic;
+    };
+    cost?: {
+        planningCostUsd: number;
+        initialExecutionCostUsd: number;
+        rediscoveryAdditionalCostUsd: number;
+        synthesisCostUsd: number;
+        totalModelCostUsd: number;
+        toolCostUsd: number;
+        totalChargedUsd: number;
+    };
+    completeness: {
+        evaluations: unknown[];
+        triggerNeedsDifferentTools: boolean;
+        triggerMissingCapability: string | null;
+    };
+    rediscovery: QueryRediscoveryTraceDiagnostic | null;
+    [key: string]: unknown;
+}
+/**
+ * A single developer-trace timeline step.
+ */
+interface QueryDeveloperTraceStep {
+    stepType?: string;
+    event?: string;
+    status?: string;
+    message?: string;
+    timestampMs?: number;
+    tool?: QueryDeveloperTraceToolRef;
+    attempt?: number;
+    loop?: QueryDeveloperTraceLoopInfo;
+    metadata?: Record<string, unknown>;
+    [key: string]: unknown;
+}
+/**
+ * Aggregate counters that summarize developer-trace behavior.
+ */
+interface QueryDeveloperTraceSummary {
+    toolCalls?: number;
+    retryCount?: number;
+    selfHealCount?: number;
+    fallbackCount?: number;
+    failureCount?: number;
+    recoveryCount?: number;
+    completionChecks?: number;
+    loopCount?: number;
+    [key: string]: unknown;
+}
+/**
+ * Developer Mode trace payload returned per query response (opt-in).
+ */
+interface QueryDeveloperTrace {
+    summary?: QueryDeveloperTraceSummary;
+    timeline?: QueryDeveloperTraceStep[];
+    requestId?: string;
+    query?: string;
+    source?: string;
+    diagnostics?: QueryDeveloperTraceDiagnostics;
+    [key: string]: unknown;
+}
 /**
  * Information about a tool that was used during a query response
  */
@@ -367,6 +531,19 @@ interface QueryCost {
     /** Total cost (model + tools) */
     totalCostUsd: string;
 }
+/**
+ * High-level orchestration outcome metrics returned by the query API.
+ */
+interface QueryOrchestrationMetrics {
+    parityStage: string;
+    orchestrationMode: string;
+    /** Whether the first plan path succeeded without fallback. */
+    firstPassSuccess: boolean;
+    /** Whether execution signaled a missing capability on first pass. */
+    capabilityMissSignaled: boolean;
+    /** Whether bounded rediscovery/fallback executed. */
+    rediscoveryExecuted: boolean;
+}
 /**
  * The resolved result of a pay-per-response query
  */
@@ -383,6 +560,10 @@ interface QueryResult {
     data?: unknown;
     /** Optional blob URL for persisted execution data (when includeDataUrl=true) */
     dataUrl?: string;
+    /** Optional machine-readable Developer Mode trace payload */
+    developerTrace?: QueryDeveloperTrace;
+    /** Optional orchestration outcome metrics for benchmarking and rollout analysis */
+    orchestrationMetrics?: QueryOrchestrationMetrics;
 }
 /**
  * Successful response from the /api/v1/query endpoint
@@ -395,6 +576,8 @@ interface QueryApiSuccessResponse {
     durationMs: number;
     data?: unknown;
     dataUrl?: string;
+    developerTrace?: QueryDeveloperTrace;
+    orchestrationMetrics?: QueryOrchestrationMetrics;
 }
 /**
  * Raw API response from the query endpoint
@@ -414,15 +597,28 @@ interface QueryStreamTextDeltaEvent {
     type: "text-delta";
     delta: string;
 }
+/** Emitted when the server streams developer trace updates/chunks */
+interface QueryStreamDeveloperTraceEvent {
+    type: "developer-trace";
+    trace: QueryDeveloperTrace;
+}
 /** Emitted when the full response is complete */
 interface QueryStreamDoneEvent {
     type: "done";
     result: QueryResult;
 }
+/** Emitted when the server reports a recoverable or terminal query error */
+interface QueryStreamErrorEvent {
+    type: "error";
+    error: string;
+    code?: ContextErrorCode | string;
+    scope?: string;
+    reasonCode?: string;
+}
 /**
  * Union of all events emitted during a streaming query
  */
-type QueryStreamEvent = QueryStreamToolStatusEvent | QueryStreamTextDeltaEvent | QueryStreamDoneEvent;
+type QueryStreamEvent = QueryStreamToolStatusEvent | QueryStreamTextDeltaEvent | QueryStreamDeveloperTraceEvent | QueryStreamDoneEvent | QueryStreamErrorEvent;
 /**
  * Specific error codes returned by the Context Protocol API
  */
@@ -524,8 +720,8 @@ declare class Tools {
  *
  * Unlike `tools.execute()` which calls a single tool once (pay-per-request),
  * the Query resource sends a natural-language question and lets the server
- * handle tool discovery, multi-tool orchestration, self-healing retries,
- * completeness checks, and AI synthesis — all for one flat fee.
+ * handle discovery-first orchestration (`discover/probe -> plan-from-evidence ->
+ * execute -> bounded fallback`) plus AI synthesis — all for one flat fee.
  *
  * This is the "prepared meal" vs "raw ingredients" distinction:
  * - `tools.execute()` = raw data, full control, predictable cost
@@ -534,11 +730,15 @@ declare class Tools {
 declare class Query {
     private client;
     constructor(client: ContextClient);
+    private buildSyntheticTraceFromRunResult;
+    private buildSyntheticTraceFromStreamStatus;
+    private mergeDeveloperTrace;
+    private parseStreamEvent;
     /**
      * Run an agentic query and wait for the full response.
      *
      * The server discovers relevant tools (or uses the ones you specify),
-     * executes the full agentic pipeline (up to 100 MCP calls per tool),
+     * executes the discovery-first pipeline (up to 100 MCP calls per tool),
      * and returns an AI-synthesized answer. Payment is settled after
      * successful execution via deferred settlement.
      *
@@ -573,6 +773,8 @@ declare class Query {
      * Event types:
      * - `tool-status` — A tool started executing or changed status
      * - `text-delta` — A chunk of the AI response text
+     * - `developer-trace` — Runtime trace metadata (when includeDeveloperTrace=true)
+     * - `error` — A structured query/runtime error emitted before stream completion
      * - `done` — The full response is complete (includes final `QueryResult`)
      *
      * @param options - Query options or a plain string question
@@ -588,9 +790,15 @@ declare class Query {
      *     case "text-delta":
      *       process.stdout.write(event.delta);
      *       break;
+     *     case "developer-trace":
+     *       console.log("Trace summary:", event.trace.summary);
+     *       break;
      *     case "done":
      *       console.log("\nCost:", event.result.cost.totalCostUsd);
      *       break;
+     *     case "error":
+     *       console.error("Stream error:", event.error);
+     *       break;
      *   }
      * }
      * ```
@@ -666,7 +874,9 @@ declare class ContextClient {
      *
      * @internal
      */
-    _fetch<T>(endpoint: string, options?: RequestInit): Promise<T>;
+    _fetch<T>(endpoint: string, options?: RequestInit, fetchOptions?: {
+        retry?: boolean;
+    }): Promise<T>;
     /**
      * Internal method for making authenticated HTTP requests that returns
      * the raw Response object. Used for streaming endpoints (SSE).
@@ -677,4 +887,4 @@ declare class ContextClient {
     _fetchRaw(endpoint: string, options?: RequestInit): Promise<Response>;
 }
-export { ContextClient, type ContextClientOptions, ContextError, type ContextErrorCode, Discovery, type ExecuteApiErrorResponse, type ExecuteApiResponse, type ExecuteApiSuccessResponse, type ExecuteOptions, type ExecuteSessionApiResponse, type ExecuteSessionApiSuccessResponse, type ExecuteSessionResult, type ExecuteSessionSpend, type ExecuteSessionStartOptions, type ExecuteSessionStatus, type ExecutionResult, type McpTool, type McpToolMeta, type McpToolRateLimitHints, Query, type QueryApiResponse, type QueryApiSuccessResponse, type QueryCost, type QueryOptions, type QueryResult, type QueryStreamDoneEvent, type QueryStreamEvent, type QueryStreamTextDeltaEvent, type QueryStreamToolStatusEvent, type QueryToolUsage, type SearchOptions, type SearchResponse, type Tool, Tools };
+export { ContextClient, type ContextClientOptions, ContextError, type ContextErrorCode, Discovery, type ExecuteApiErrorResponse, type ExecuteApiResponse, type ExecuteApiSuccessResponse, type ExecuteOptions, type ExecuteSessionApiResponse, type ExecuteSessionApiSuccessResponse, type ExecuteSessionResult, type ExecuteSessionSpend, type ExecuteSessionStartOptions, type ExecuteSessionStatus, type ExecutionResult, type McpTool, type McpToolMeta, type McpToolRateLimitHints, Query, type QueryApiResponse, type QueryApiSuccessResponse, type QueryCost, type QueryDeepMode, type QueryDeveloperTrace, type QueryDeveloperTraceLoopInfo, type QueryDeveloperTraceStep, type QueryDeveloperTraceSummary, type QueryDeveloperTraceToolRef, type QueryOptions, type QueryResult, type QueryStreamDeveloperTraceEvent, type QueryStreamDoneEvent, type QueryStreamErrorEvent, type QueryStreamEvent, type QueryStreamTextDeltaEvent, type QueryStreamToolStatusEvent, type QueryToolUsage, type SearchOptions, type SearchResponse, type Tool, Tools };

package/dist/client/index.js CHANGED Viewed

@@ -226,11 +226,119 @@ var Query = class {
   constructor(client) {
     this.client = client;
   }
+  buildSyntheticTraceFromRunResult(params) {
+    const timeline = params.toolsUsed.map((tool, index) => ({
+      stepType: "tool-call",
+      event: "tool-call",
+      status: "success",
+      timestampMs: index,
+      tool: {
+        id: tool.id,
+        name: tool.name
+      },
+      metadata: {
+        skillCalls: tool.skillCalls,
+        synthetic: true
+      }
+    }));
+    const toolCalls = params.toolsUsed.reduce(
+      (sum, tool) => sum + Math.max(tool.skillCalls, 0),
+      0
+    );
+    return {
+      summary: {
+        toolCalls,
+        retryCount: 0,
+        selfHealCount: 0,
+        fallbackCount: 0,
+        failureCount: 0,
+        recoveryCount: 0,
+        completionChecks: 0,
+        loopCount: 0
+      },
+      timeline,
+      source: "sdk-fallback",
+      synthetic: true,
+      reason: "backend_trace_missing",
+      durationMs: params.durationMs
+    };
+  }
+  buildSyntheticTraceFromStreamStatus(params) {
+    const timeline = params.statusTimeline.map((entry, index) => ({
+      stepType: "tool-status",
+      event: "tool-status",
+      status: entry.status,
+      timestampMs: index,
+      tool: entry.tool.name || entry.tool.id ? {
+        id: entry.tool.id || void 0,
+        name: entry.tool.name || void 0
+      } : void 0,
+      metadata: { synthetic: true }
+    }));
+    const toolCallsFromUsage = params.toolsUsed.reduce(
+      (sum, tool) => sum + Math.max(tool.skillCalls, 0),
+      0
+    );
+    const toolCallsFromStatus = params.statusTimeline.filter(
+      (entry) => entry.status === "tool-complete"
+    ).length;
+    const toolCalls = toolCallsFromUsage > 0 ? toolCallsFromUsage : toolCallsFromStatus;
+    const retryCount = params.statusTimeline.filter(
+      (entry) => /(retry|fix|reflect|recover)/i.test(entry.status)
+    ).length;
+    const completionChecks = params.statusTimeline.filter(
+      (entry) => /complet/i.test(entry.status)
+    ).length;
+    return {
+      summary: {
+        toolCalls,
+        retryCount,
+        selfHealCount: retryCount,
+        fallbackCount: 0,
+        failureCount: 0,
+        recoveryCount: 0,
+        completionChecks,
+        loopCount: retryCount
+      },
+      timeline,
+      source: "sdk-fallback",
+      synthetic: true,
+      reason: "backend_trace_missing",
+      durationMs: params.durationMs
+    };
+  }
+  mergeDeveloperTrace(first, second) {
+    if (!first) return second;
+    if (!second) return first;
+    const firstTimeline = Array.isArray(first.timeline) ? first.timeline : [];
+    const secondTimeline = Array.isArray(second.timeline) ? second.timeline : [];
+    const mergedTimeline = [...firstTimeline, ...secondTimeline];
+    return {
+      ...first,
+      ...second,
+      summary: {
+        ...typeof first.summary === "object" && first.summary ? first.summary : {},
+        ...typeof second.summary === "object" && second.summary ? second.summary : {}
+      },
+      ...mergedTimeline.length > 0 ? { timeline: mergedTimeline } : {}
+    };
+  }
+  parseStreamEvent(rawData) {
+    const parsed = JSON.parse(rawData);
+    if (!parsed || typeof parsed !== "object") {
+      return void 0;
+    }
+    const event = parsed;
+    if (typeof event.type !== "string") {
+      return void 0;
+    }
+    return event;
+  }
   /**
    * Run an agentic query and wait for the full response.
    *
    * The server discovers relevant tools (or uses the ones you specify),
-   * executes the full agentic pipeline (up to 100 MCP calls per tool),
+   * executes the discovery-first pipeline (up to 100 MCP calls per tool),
    * and returns an AI-synthesized answer. Payment is settled after
    * successful execution via deferred settlement.
    *
@@ -259,42 +367,25 @@ var Query = class {
    */
   async run(options) {
     const opts = typeof options === "string" ? { query: options } : options;
-    const headers = opts.idempotencyKey ? { "Idempotency-Key": opts.idempotencyKey } : void 0;
-    const response = await this.client._fetch(
-      "/api/v1/query",
-      {
-        method: "POST",
-        headers,
-        body: JSON.stringify({
-          query: opts.query,
-          tools: opts.tools,
-          modelId: opts.modelId,
-          includeData: opts.includeData,
-          includeDataUrl: opts.includeDataUrl,
-          queryDepth: opts.queryDepth,
-          stream: false
-        })
+    let terminalError;
+    for await (const event of this.stream(opts)) {
+      if (event.type === "error") {
+        terminalError = {
+          error: event.error,
+          ...event.code ? { code: event.code } : {},
+          ...event.scope ? { scope: event.scope } : {},
+          ...event.reasonCode ? { reasonCode: event.reasonCode } : {}
+        };
+        continue;
+      }
+      if (event.type === "done") {
+        return event.result;
       }
-    );
-    if ("error" in response) {
-      throw new ContextError(
-        response.error,
-        response.code,
-        void 0,
-        response.helpUrl
-      );
     }
-    if (response.success) {
-      return {
-        response: response.response,
-        toolsUsed: response.toolsUsed,
-        cost: response.cost,
-        durationMs: response.durationMs,
-        data: response.data,
-        dataUrl: response.dataUrl
-      };
+    if (terminalError) {
+      throw new ContextError(terminalError.error, terminalError.code);
     }
-    throw new ContextError("Unexpected response format from query API");
+    throw new ContextError("Streaming query ended before done event");
   }
   /**
    * Run an agentic query with streaming. Returns an async iterable that
@@ -303,6 +394,8 @@ var Query = class {
    * Event types:
    * - `tool-status` — A tool started executing or changed status
    * - `text-delta` — A chunk of the AI response text
+   * - `developer-trace` — Runtime trace metadata (when includeDeveloperTrace=true)
+   * - `error` — A structured query/runtime error emitted before stream completion
    * - `done` — The full response is complete (includes final `QueryResult`)
    *
    * @param options - Query options or a plain string question
@@ -318,9 +411,15 @@ var Query = class {
    *     case "text-delta":
    *       process.stdout.write(event.delta);
    *       break;
+   *     case "developer-trace":
+   *       console.log("Trace summary:", event.trace.summary);
+   *       break;
    *     case "done":
    *       console.log("\nCost:", event.result.cost.totalCostUsd);
    *       break;
+   *     case "error":
+   *       console.error("Stream error:", event.error);
+   *       break;
    *   }
    * }
    * ```
@@ -337,7 +436,9 @@ var Query = class {
         modelId: opts.modelId,
         includeData: opts.includeData,
         includeDataUrl: opts.includeDataUrl,
+        includeDeveloperTrace: opts.includeDeveloperTrace,
         queryDepth: opts.queryDepth,
+        debugScoutDeepMode: opts.debugScoutDeepMode,
         stream: true
       })
     });
@@ -348,6 +449,48 @@ var Query = class {
     const reader = body.getReader();
     const decoder = new TextDecoder();
     let buffer = "";
+    let aggregatedTrace;
+    const statusTimeline = [];
+    const parseAndHydrateEvent = (rawData) => {
+      const event = this.parseStreamEvent(rawData);
+      if (!event) {
+        return void 0;
+      }
+      if (event.type === "developer-trace") {
+        aggregatedTrace = this.mergeDeveloperTrace(aggregatedTrace, event.trace);
+        return event;
+      }
+      if (event.type === "tool-status") {
+        statusTimeline.push({
+          status: event.status,
+          tool: {
+            id: event.tool.id,
+            name: event.tool.name
+          }
+        });
+        return event;
+      }
+      if (event.type === "done") {
+        let mergedTrace = this.mergeDeveloperTrace(
+          aggregatedTrace,
+          event.result.developerTrace
+        );
+        if (!mergedTrace && opts.includeDeveloperTrace) {
+          mergedTrace = statusTimeline.length > 0 ? this.buildSyntheticTraceFromStreamStatus({
+            statusTimeline,
+            toolsUsed: event.result.toolsUsed,
+            durationMs: event.result.durationMs
+          }) : this.buildSyntheticTraceFromRunResult({
+            toolsUsed: event.result.toolsUsed,
+            durationMs: event.result.durationMs
+          });
+        }
+        if (mergedTrace) {
+          event.result.developerTrace = mergedTrace;
+        }
+      }
+      return event;
+    };
     try {
       while (true) {
         const { done, value } = await reader.read();
@@ -361,7 +504,10 @@ var Query = class {
             const data = trimmed.slice(6);
             if (data === "[DONE]") return;
             try {
-              yield JSON.parse(data);
+              const event = parseAndHydrateEvent(data);
+              if (event) {
+                yield event;
+              }
             } catch {
             }
           }
@@ -371,7 +517,10 @@ var Query = class {
         const data = buffer.trim().slice(6);
         if (data !== "[DONE]") {
           try {
-            yield JSON.parse(data);
+            const event = parseAndHydrateEvent(data);
+            if (event) {
+              yield event;
+            }
           } catch {
           }
         }
@@ -450,30 +599,34 @@ var ContextClient = class {
    *
    * @internal
    */
-  async _fetch(endpoint, options = {}) {
+  async _fetch(endpoint, options = {}, fetchOptions) {
     if (this._closed) {
       throw new ContextError("Client has been closed");
     }
     const url = `${this.baseUrl}${endpoint}`;
     const maxRetries = 3;
     const timeoutMs = this.requestTimeoutMs;
+    const method = (options.method ?? "GET").toUpperCase();
+    const requestHeaders = new Headers(options.headers);
+    const canRetryRequest = fetchOptions?.retry === false ? false : method === "GET" || method === "HEAD" || method === "OPTIONS" || requestHeaders.has("Idempotency-Key");
     let lastError;
     for (let attempt = 0; attempt <= maxRetries; attempt++) {
       const controller = new AbortController();
       const timeout = setTimeout(() => controller.abort(), timeoutMs);
+      const mergedHeaders = new Headers(requestHeaders);
+      if (!mergedHeaders.has("Content-Type")) {
+        mergedHeaders.set("Content-Type", "application/json");
+      }
+      mergedHeaders.set("Authorization", `Bearer ${this.apiKey}`);
       try {
         const response = await fetch(url, {
           ...options,
           signal: controller.signal,
-          headers: {
-            "Content-Type": "application/json",
-            Authorization: `Bearer ${this.apiKey}`,
-            ...options.headers
-          }
+          headers: mergedHeaders
         });
         clearTimeout(timeout);
         if (!response.ok) {
-          if (response.status >= 500 && attempt < maxRetries) {
+          if (response.status >= 500 && canRetryRequest && attempt < maxRetries) {
             const delay = Math.min(1e3 * 2 ** attempt, 1e4);
             await new Promise((resolve) => setTimeout(resolve, delay));
             continue;
@@ -492,7 +645,16 @@ var ContextClient = class {
           }
           throw new ContextError(errorMessage, errorCode, response.status, helpUrl);
         }
-        return response.json();
+        try {
+          return await response.json();
+        } catch (error) {
+          const parseError = error instanceof Error ? error : new Error(String(error));
+          throw new ContextError(
+            `Failed to parse JSON response: ${parseError.message}`,
+            void 0,
+            response.status
+          );
+        }
       } catch (error) {
         clearTimeout(timeout);
         if (error instanceof ContextError) {
@@ -500,7 +662,7 @@ var ContextClient = class {
         }
         lastError = error instanceof Error ? error : new Error(String(error));
         const isRetryable = lastError.name === "AbortError" || lastError.message.includes("fetch failed") || lastError.message.includes("ECONNRESET") || lastError.message.includes("ETIMEDOUT");
-        if (isRetryable && attempt < maxRetries) {
+        if (isRetryable && canRetryRequest && attempt < maxRetries) {
           const delay = Math.min(1e3 * 2 ** attempt, 1e4);
           await new Promise((resolve) => setTimeout(resolve, delay));
           continue;