npm - shortcutxl - Versions diffs - 0.3.60 → 0.3.61 - Mend

shortcutxl 0.3.60 → 0.3.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/BINARY-INVENTORY.json +9 -9
package/CHANGELOG.md +4 -0
package/dist/ai/models.js +2 -1
package/dist/ai/providers/anthropic.js +18 -18
package/dist/ai/providers/openai-completions.js +7 -5
package/dist/ai/providers/openai-responses-shared.d.ts +1 -1
package/dist/ai/providers/openai-responses-shared.js +4 -3
package/dist/ai/providers/openai-responses.d.ts +2 -1
package/dist/ai/providers/openai-responses.js +9 -5
package/dist/ai/providers/simple-options.js +1 -0
package/dist/ai/types.d.ts +8 -0
package/dist/ai/utils/overflow.js +1 -1
package/dist/app/agent-session.js +11 -3
package/dist/app/background/tool-summaries.d.ts +10 -4
package/dist/app/background/tool-summaries.js +21 -36
package/dist/app/extensions/runner.js +10 -0
package/dist/app/extensions/types.d.ts +3 -0
package/dist/app/providers/shortcut-llm-proxy-client.d.ts +61 -0
package/dist/app/providers/shortcut-llm-proxy-client.js +135 -0
package/dist/app/providers/shortcut-stream.js +7 -3
package/dist/app/session/tool-summary-emitter.d.ts +14 -5
package/dist/app/session/tool-summary-emitter.js +33 -19
package/dist/app/tools/llm-analysis.d.ts +1 -1
package/dist/app/tools/llm-analysis.js +20 -38
package/dist/app/tools/take-screenshot.d.ts +3 -3
package/dist/app/tools/take-screenshot.js +21 -41
package/dist/app/tools/task/runner.js +10 -0
package/dist/cli.js +464 -223
package/dist/contracts/agent-api.d.ts +3 -3
package/dist/contracts/agent-session-store.d.ts +1 -0
package/dist/contracts/agent-session-store.js +17 -9
package/dist/contracts/model-stream.d.ts +3 -3
package/dist/core/agent-snapshot-builder.js +6 -1
package/dist/core/core-types.d.ts +2 -2
package/dist/core/session/compaction/compaction.js +1 -1
package/dist/core/session/compaction-bridge.d.ts +4 -1
package/dist/core/session/compaction-bridge.js +7 -2
package/dist/core/session/context-overflow.js +1 -1
package/dist/core/session-schema.d.ts +1 -0
package/dist/core/session-schema.js +7 -1
package/dist/embedded-agent/anthropic-messages-transport.d.ts +3 -1
package/dist/embedded-agent/anthropic-messages-transport.js +20 -9
package/dist/embedded-agent/compaction-wiring.d.ts +4 -1
package/dist/embedded-agent/compaction-wiring.js +8 -4
package/dist/embedded-agent/compose.js +34 -6
package/dist/embedded-agent/host-tools/build-tool-list.js +5 -5
package/dist/embedded-agent/host-tools/index.d.ts +2 -0
package/dist/embedded-agent/host-tools/index.js +2 -0
package/dist/embedded-agent/host-tools/mode-host-tools.js +7 -17
package/dist/embedded-agent/host-tools/modify-skill/contract.d.ts +75 -0
package/dist/embedded-agent/host-tools/modify-skill/contract.js +233 -0
package/dist/embedded-agent/host-tools/modify-skill/index.d.ts +3 -0
package/dist/embedded-agent/host-tools/modify-skill/index.js +3 -0
package/dist/embedded-agent/host-tools/read-skill/contract.d.ts +36 -0
package/dist/embedded-agent/host-tools/read-skill/contract.js +113 -0
package/dist/embedded-agent/host-tools/read-skill/index.d.ts +2 -0
package/dist/embedded-agent/host-tools/read-skill/index.js +2 -0
package/dist/embedded-agent/host-tools/registry.d.ts +5 -3
package/dist/embedded-agent/host-tools/registry.js +7 -3
package/dist/embedded-agent/host-tools/timeouts.js +2 -0
package/dist/embedded-agent/host-tools/tool-names.d.ts +2 -0
package/dist/embedded-agent/host-tools/tool-names.js +2 -0
package/dist/embedded-agent/openai-completions-transport.d.ts +5 -1
package/dist/embedded-agent/openai-completions-transport.js +19 -5
package/dist/embedded-agent/openai-responses-transport.d.ts +2 -1
package/dist/embedded-agent/openai-responses-transport.js +13 -3
package/dist/embedded-agent/run-stats.d.ts +2 -0
package/dist/embedded-agent/run-stats.js +15 -9
package/dist/embedded-agent/session-entry-builder.d.ts +1 -1
package/dist/embedded-agent/session-entry-builder.js +2 -2
package/dist/embedded-agent/session-store.js +22 -5
package/dist/embedded-agent/stream-message-state.js +1 -1
package/dist/embedded-agent/stream.d.ts +2 -0
package/dist/embedded-agent/stream.js +13 -8
package/dist/fast-mode.d.ts +8 -0
package/dist/fast-mode.js +47 -0
package/dist/main.js +3 -3
package/dist/model-ids.js +1 -1
package/dist/shell/interactive/interactive-mode.js +11 -4
package/package.json +9 -1
package/plugins/shortcutxl/SKILL.md +6 -5
package/plugins/shortcutxl/skills/shortcutxl/SKILL.md +6 -5
package/skills/advanced-mog-api/api-reference.json +14808 -14808
package/user-docs/dist/shortcutxl-docs.pdf +0 -0
package/xll/ShortcutXL.xll +0 -0
package/xll/python/Lib/site-packages/httpx-0.28.1.dist-info/RECORD +1 -1
package/xll/python/Lib/site-packages/idna-3.18.dist-info/RECORD +1 -1
package/xll/python/Lib/site-packages/pip-26.1.2.dist-info/RECORD +3 -3
package/xll/python/Lib/site-packages/pywin32-311.dist-info/RECORD +2 -2
package/xll/python/Scripts/httpx.exe +0 -0
package/xll/python/Scripts/idna.exe +0 -0
package/xll/python/Scripts/pip.exe +0 -0
package/xll/python/Scripts/pip3.13.exe +0 -0
package/xll/python/Scripts/pip3.exe +0 -0
package/xll/python/Scripts/pywin32_postinstall.exe +0 -0
package/xll/python/Scripts/pywin32_testall.exe +0 -0
package/dist/app/providers/shortcut-attribution.d.ts +0 -13
package/dist/app/providers/shortcut-attribution.js +0 -24

package/BINARY-INVENTORY.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "schemaVersion": 1,
-  "generatedAt": "2026-06-04T07:10:58.989Z",
+  "generatedAt": "2026-06-05T01:51:32.781Z",
   "package": "shortcutxl",
   "binaryExtensions": [
     ".dll",
@@ -11,7 +11,7 @@
   "files": [
     {
       "path": "xll/ShortcutXL.xll",
-      "sha256": "0f4f7b363b32871ef4afd57fa891e740f91ca5ab60b8d452c36ecf334f443033",
+      "sha256": "7c86275b7bfd0b33b422de02a5d46e11004462b545d811bd53b7590a0b2891f9",
       "source": "ShortcutXL native XLL build",
       "version": "package",
       "builtBy": "shortcut",
@@ -523,7 +523,7 @@
     },
     {
       "path": "xll/python/Scripts/httpx.exe",
-      "sha256": "0ab1e42994f3ac9177d03abcf014f3eff940ca307b923ddf2cb2036b2c9eaadf",
+      "sha256": "6443bf0359e479a00549f26cfaaecebec067687bd721258108880e3e4b8c0cf9",
       "source": "httpx console launcher installed into embedded Python",
       "version": "see packaged httpx distribution",
       "builtBy": "third-party",
@@ -531,7 +531,7 @@
     },
     {
       "path": "xll/python/Scripts/idna.exe",
-      "sha256": "8b8baca0e902b3bd1757d5d14155cb1178e8825c232d67cd7b0c7e120c6fa5ff",
+      "sha256": "eb0bee35c9a602f93ee605c2a6d6df3069be9e2be8a4b5131b4e7599a438d168",
       "source": "Python package console launcher installed into embedded Python",
       "version": "see owning Python package metadata in site-packages",
       "builtBy": "third-party",
@@ -539,7 +539,7 @@
     },
     {
       "path": "xll/python/Scripts/pip.exe",
-      "sha256": "928216be655a94bb860d00a6a4b420d26f37fa5daa13f6d80a09aa90d87ba5f1",
+      "sha256": "1c0d88a63c83e6bb9ce9480ab1e5b1240cf391656806130748441871958fe337",
       "source": "pip console launcher installed into embedded Python",
       "version": "see packaged pip distribution",
       "builtBy": "third-party",
@@ -547,7 +547,7 @@
     },
     {
       "path": "xll/python/Scripts/pip3.13.exe",
-      "sha256": "928216be655a94bb860d00a6a4b420d26f37fa5daa13f6d80a09aa90d87ba5f1",
+      "sha256": "1c0d88a63c83e6bb9ce9480ab1e5b1240cf391656806130748441871958fe337",
       "source": "pip console launcher installed into embedded Python",
       "version": "see packaged pip distribution",
       "builtBy": "third-party",
@@ -555,7 +555,7 @@
     },
     {
       "path": "xll/python/Scripts/pip3.exe",
-      "sha256": "928216be655a94bb860d00a6a4b420d26f37fa5daa13f6d80a09aa90d87ba5f1",
+      "sha256": "1c0d88a63c83e6bb9ce9480ab1e5b1240cf391656806130748441871958fe337",
       "source": "pip console launcher installed into embedded Python",
       "version": "see packaged pip distribution",
       "builtBy": "third-party",
@@ -563,7 +563,7 @@
     },
     {
       "path": "xll/python/Scripts/pywin32_postinstall.exe",
-      "sha256": "cd410066e88131c0f203025dc86306a2f6498ed54abe9ff27ef349f7974c3521",
+      "sha256": "33c9bf469edc79cbdea68966e2b50eb15db5ba63278fec1befd274f00ef26bf4",
       "source": "Python package console launcher installed into embedded Python",
       "version": "see owning Python package metadata in site-packages",
       "builtBy": "third-party",
@@ -571,7 +571,7 @@
     },
     {
       "path": "xll/python/Scripts/pywin32_testall.exe",
-      "sha256": "af93cdcdd7dfeda2a71bedb565329960ca0d8621cb3dd36365ce197a4c44968d",
+      "sha256": "f4ab6b20be97a838b4e2eb369c603136e8bcf7d8b1829dd30e7fd7a681b48e65",
       "source": "Python package console launcher installed into embedded Python",
       "version": "see owning Python package metadata in site-packages",
       "builtBy": "third-party",

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,9 @@
 # Changelog
+## [0.3.61]
+- **ShortcutXL plugin guidance** - Refreshed the bundled Claude and Codex ShortcutXL plugin guidance.
 ## [0.3.60]
 - **Safer shell approvals** - High-risk Outlook-style send and delete shell operations now require explicit one-time approval even when runtime shell bypass is enabled.

package/dist/ai/models.js CHANGED Viewed

@@ -22,7 +22,8 @@ export function getModels(provider) {
         : [];
 }
 export function calculateCost(model, usage) {
-    usage.cost.input = (model.cost.input / 1000000) * usage.input;
+    const uncachedInput = Math.max(0, usage.input - usage.cacheRead - usage.cacheWrite);
+    usage.cost.input = (model.cost.input / 1000000) * uncachedInput;
     usage.cost.output = (model.cost.output / 1000000) * usage.output;
     usage.cost.cacheRead = (model.cost.cacheRead / 1000000) * usage.cacheRead;
     usage.cost.cacheWrite = (model.cost.cacheWrite / 1000000) * usage.cacheWrite;

package/dist/ai/providers/anthropic.js CHANGED Viewed

@@ -113,26 +113,26 @@ export const streamAnthropic = (model, context, options) => {
         };
         try {
             const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? '';
-            const client = createClient(model, apiKey, options?.interleavedThinking ?? true, options?.headers);
+            const client = createClient(model, apiKey, options?.interleavedThinking ?? true, options?.headers, options?.fetchOptions);
             const params = buildParams(model, context, options);
             options?.onPayload?.(params);
             const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
             stream.push({ type: 'start', partial: output });
             const blocks = output.content;
+            let uncachedInputTokens = 0;
             for await (const event of anthropicStream) {
                 if (event.type === 'message_start') {
                     // Capture initial token usage from message_start event
                     // This ensures we have input token counts even if the stream is aborted early
-                    output.usage.input = event.message.usage.input_tokens || 0;
+                    uncachedInputTokens = event.message.usage.input_tokens || 0;
                     output.usage.output = event.message.usage.output_tokens || 0;
                     output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
                     output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
-                    // Anthropic doesn't provide total_tokens, compute from components
-                    output.usage.totalTokens =
-                        output.usage.input +
-                            output.usage.output +
-                            output.usage.cacheRead +
-                            output.usage.cacheWrite;
+                    // Anthropic doesn't provide total_tokens; usage.input is normalized
+                    // to total input, including cache buckets.
+                    output.usage.input =
+                        uncachedInputTokens + output.usage.cacheRead + output.usage.cacheWrite;
+                    output.usage.totalTokens = output.usage.input + output.usage.output;
                     calculateCost(model, output.usage);
                 }
                 else if (event.type === 'content_block_start') {
@@ -284,9 +284,6 @@ export const streamAnthropic = (model, context, options) => {
                     }
                     // Only update usage fields if present (not null).
                     // Preserves input_tokens from message_start when proxies omit it in message_delta.
-                    if (event.usage.input_tokens != null) {
-                        output.usage.input = event.usage.input_tokens;
-                    }
                     if (event.usage.output_tokens != null) {
                         output.usage.output = event.usage.output_tokens;
                     }
@@ -296,12 +293,14 @@ export const streamAnthropic = (model, context, options) => {
                     if (event.usage.cache_creation_input_tokens != null) {
                         output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
                     }
-                    // Anthropic doesn't provide total_tokens, compute from components
-                    output.usage.totalTokens =
-                        output.usage.input +
-                            output.usage.output +
-                            output.usage.cacheRead +
-                            output.usage.cacheWrite;
+                    if (event.usage.input_tokens != null) {
+                        uncachedInputTokens = event.usage.input_tokens;
+                    }
+                    // Anthropic doesn't provide total_tokens; usage.input is normalized
+                    // to total input, including cache buckets.
+                    output.usage.input =
+                        uncachedInputTokens + output.usage.cacheRead + output.usage.cacheWrite;
+                    output.usage.totalTokens = output.usage.input + output.usage.output;
                     calculateCost(model, output.usage);
                 }
             }
@@ -388,7 +387,7 @@ export const streamSimpleAnthropic = (model, context, options) => {
 function isOAuthToken(apiKey) {
     return apiKey.includes('sk-ant-oat');
 }
-function createClient(model, apiKey, interleavedThinking, optionsHeaders) {
+function createClient(model, apiKey, interleavedThinking, optionsHeaders, fetchOptions) {
     // Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.
     // The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.
     const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id);
@@ -403,6 +402,7 @@ function createClient(model, apiKey, interleavedThinking, optionsHeaders) {
     const client = new Anthropic({
         apiKey,
         baseURL: model.baseUrl,
+        fetchOptions: fetchOptions,
         defaultHeaders: mergeHeaders({
             accept: 'application/json',
             'anthropic-beta': betaFeatures.join(',')

package/dist/ai/providers/openai-completions.js CHANGED Viewed

@@ -64,7 +64,7 @@ export const streamOpenAICompletions = (model, context, options) => {
         };
         try {
             const apiKey = options?.apiKey || getEnvApiKey(model.provider) || '';
-            const client = createClient(model, context, apiKey, options?.headers);
+            const client = createClient(model, context, apiKey, options?.headers, options?.fetchOptions);
             const params = buildParams(model, context, options);
             options?.onPayload?.(params);
             const openaiStream = await client.chat.completions.create(params, {
@@ -108,17 +108,18 @@ export const streamOpenAICompletions = (model, context, options) => {
                 if (chunk.usage) {
                     const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
                     const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
-                    const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
+                    const input = chunk.usage.prompt_tokens || 0;
                     const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens;
                     output.usage = {
-                        // OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
+                        // OpenAI includes cached tokens in prompt_tokens; cost calculation
+                        // derives uncached input from input - cacheRead - cacheWrite.
                         input,
                         output: outputTokens,
                         cacheRead: cachedTokens,
                         cacheWrite: 0,
                         // Compute totalTokens ourselves since we add reasoning_tokens to output
                         // and some providers (e.g., Groq) don't include them in total_tokens
-                        totalTokens: input + outputTokens + cachedTokens,
+                        totalTokens: input + outputTokens,
                         cost: {
                             input: 0,
                             output: 0,
@@ -287,7 +288,7 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
         toolChoice
     });
 };
-function createClient(model, context, apiKey, optionsHeaders) {
+function createClient(model, context, apiKey, optionsHeaders, fetchOptions) {
     if (!apiKey) {
         if (!process.env.OPENAI_API_KEY) {
             throw new Error('OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.');
@@ -302,6 +303,7 @@ function createClient(model, context, apiKey, optionsHeaders) {
     return new OpenAI({
         apiKey,
         baseURL: model.baseUrl,
+        fetchOptions: fetchOptions,
         defaultHeaders: headers
     });
 }

package/dist/ai/providers/openai-responses-shared.d.ts CHANGED Viewed

@@ -3,7 +3,7 @@ import type { Api, AssistantMessage, Context, Model, Tool, Usage } from '../type
 import type { AssistantMessageEventStream } from '../utils/event-stream.js';
 export interface OpenAIResponsesStreamOptions {
     serviceTier?: ResponseCreateParamsStreaming['service_tier'];
-    applyServiceTierPricing?: (usage: Usage, serviceTier: ResponseCreateParamsStreaming['service_tier'] | undefined) => void;
+    applyServiceTierPricing?: (usage: Usage, serviceTier: ResponseCreateParamsStreaming['service_tier'] | undefined, model: Pick<Model<Api>, 'id'>) => void;
 }
 export interface ConvertResponsesMessagesOptions {
     includeSystemPrompt?: boolean;

package/dist/ai/providers/openai-responses-shared.js CHANGED Viewed

@@ -385,8 +385,9 @@ export async function processResponsesStream(openaiStream, output, stream, model
             if (response?.usage) {
                 const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
                 output.usage = {
-                    // OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
-                    input: (response.usage.input_tokens || 0) - cachedTokens,
+                    // OpenAI includes cached tokens in input_tokens; cost calculation
+                    // derives uncached input from input - cacheRead - cacheWrite.
+                    input: response.usage.input_tokens || 0,
                     output: response.usage.output_tokens || 0,
                     cacheRead: cachedTokens,
                     cacheWrite: 0,
@@ -397,7 +398,7 @@ export async function processResponsesStream(openaiStream, output, stream, model
             calculateCost(model, output.usage);
             if (options?.applyServiceTierPricing) {
                 const serviceTier = response?.service_tier ?? options.serviceTier;
-                options.applyServiceTierPricing(output.usage, serviceTier);
+                options.applyServiceTierPricing(output.usage, serviceTier, model);
             }
             // Map status to stop reason
             output.stopReason = mapStopReason(response?.status);

package/dist/ai/providers/openai-responses.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
 import type { ResponseCreateParamsStreaming } from 'openai/resources/responses/responses.js';
-import type { SimpleStreamOptions, StreamFunction, StreamOptions } from '../types.js';
+import type { Api, Model, SimpleStreamOptions, StreamFunction, StreamOptions } from '../types.js';
 export interface OpenAIResponsesOptions extends StreamOptions {
     reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
     reasoningSummary?: 'auto' | 'detailed' | 'concise' | null;
@@ -10,4 +10,5 @@ export interface OpenAIResponsesOptions extends StreamOptions {
  */
 export declare const streamOpenAIResponses: StreamFunction<'openai-responses', OpenAIResponsesOptions>;
 export declare const streamSimpleOpenAIResponses: StreamFunction<'openai-responses', SimpleStreamOptions>;
+export declare function getServiceTierCostMultiplier(serviceTier: ResponseCreateParamsStreaming['service_tier'] | undefined, model: Pick<Model<Api>, 'id'>): number;
 //# sourceMappingURL=openai-responses.d.ts.map

package/dist/ai/providers/openai-responses.js CHANGED Viewed

@@ -1,4 +1,5 @@
 import OpenAI from 'openai';
+import { SHORTCUT_MODEL_ID } from '../../model-ids.js';
 import { getEnvApiKey } from '../env-api-keys.js';
 import { supportsXhigh } from '../models.js';
 import { AssistantMessageEventStream } from '../utils/event-stream.js';
@@ -58,7 +59,7 @@ export const streamOpenAIResponses = (model, context, options) => {
         try {
             // Create OpenAI client
             const apiKey = options?.apiKey || getEnvApiKey(model.provider) || '';
-            const client = createClient(model, context, apiKey, options?.headers);
+            const client = createClient(model, context, apiKey, options?.headers, options?.fetchOptions);
             const params = buildParams(model, context, options);
             options?.onPayload?.(params);
             const openaiStream = await client.responses.create(params, options?.signal ? { signal: options.signal } : undefined);
@@ -101,7 +102,7 @@ export const streamSimpleOpenAIResponses = (model, context, options) => {
         reasoningEffort
     });
 };
-function createClient(model, context, apiKey, optionsHeaders) {
+function createClient(model, context, apiKey, optionsHeaders, fetchOptions) {
     if (!apiKey) {
         if (!process.env.OPENAI_API_KEY) {
             throw new Error('OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.');
@@ -116,6 +117,7 @@ function createClient(model, context, apiKey, optionsHeaders) {
     return new OpenAI({
         apiKey,
         baseURL: model.baseUrl,
+        fetchOptions: fetchOptions,
         defaultHeaders: headers
     });
 }
@@ -167,18 +169,20 @@ function buildParams(model, context, options) {
     }
     return params;
 }
-function getServiceTierCostMultiplier(serviceTier) {
+export function getServiceTierCostMultiplier(serviceTier, model) {
     switch (serviceTier) {
         case 'flex':
             return 0.5;
         case 'priority':
+            if (model.id === SHORTCUT_MODEL_ID.Gpt55)
+                return 2.5;
             return 2;
         default:
             return 1;
     }
 }
-function applyServiceTierPricing(usage, serviceTier) {
-    const multiplier = getServiceTierCostMultiplier(serviceTier);
+function applyServiceTierPricing(usage, serviceTier, model) {
+    const multiplier = getServiceTierCostMultiplier(serviceTier, model);
     if (multiplier === 1)
         return;
     usage.cost.input *= multiplier;

package/dist/ai/providers/simple-options.js CHANGED Viewed

@@ -5,6 +5,7 @@ export function buildBaseOptions(model, options, apiKey) {
         signal: options?.signal,
         apiKey: apiKey || options?.apiKey,
         cacheRetention: options?.cacheRetention,
+        fetchOptions: options?.fetchOptions,
         sessionId: options?.sessionId,
         headers: options?.headers,
         onPayload: options?.onPayload,

package/dist/ai/types.d.ts CHANGED Viewed

@@ -32,6 +32,13 @@ export interface StreamOptions {
      * Optional callback for inspecting provider payloads before sending.
      */
     onPayload?: (payload: unknown) => void;
+    /**
+     * Optional fetch settings for provider SDK calls.
+     *
+     * This is primarily used by first-party gateway transports that need browser
+     * cookies for auth; direct provider calls should normally leave it unset.
+     */
+    fetchOptions?: RequestInit;
     /**
      * Optional custom HTTP headers to include in API requests.
      * Merged with provider defaults; can override default headers.
@@ -95,6 +102,7 @@ export interface ToolCall {
     thoughtSignature?: string;
 }
 export interface Usage {
+    /** Total input tokens, including cache read/write buckets when present. */
     input: number;
     output: number;
     cacheRead: number;

package/dist/ai/utils/overflow.js CHANGED Viewed

@@ -98,7 +98,7 @@ export function isContextOverflow(message, contextWindow) {
     }
     // Case 2: Silent overflow (z.ai style) - successful but usage exceeds context
     if (contextWindow && message.stopReason === 'stop') {
-        const inputTokens = message.usage.input + message.usage.cacheRead;
+        const inputTokens = message.usage.input;
         if (inputTokens > contextWindow) {
             return true;
         }

package/dist/app/agent-session.js CHANGED Viewed

@@ -17,9 +17,10 @@ import { createCompactionActions, createErrorRecoveryActions, triggerCompactionI
 import { isContextOverflow } from '../core/session/context-overflow.js';
 import { SessionCompaction } from '../core/session/session-compaction.js';
 import { classifyError, SessionErrorRecovery } from '../core/session/session-error-recovery.js';
+import { getActiveUserMessageId } from '../core/user-message-id.js';
 import { isShortcutFastModeModel } from '../model-ids.js';
 import { formatFileUploadsContext, mergeFileUploads } from './file-uploads.js';
-import { getShortcutLlmProxyAttributionFromMessages } from './providers/shortcut-attribution.js';
+import { createShortcutLlmProxyClient } from './providers/shortcut-llm-proxy-client.js';
 import { expandPromptTemplate } from './resources/prompt-template-expansion.js';
 import { parseSkillBlock } from './resources/skill-block.js';
 import { AUTONOMOUS_STATE_CUSTOM_TYPE, buildAutonomousRefreshPrompt, extractProgressFolder, inferAutonomousStateFromEntries } from './session/autonomous-workflow.js';
@@ -137,11 +138,18 @@ export class AgentSession {
         this._fastModeDisabledReason = this._fastModeAllowed
             ? undefined
             : (config.fastModeDisabledReason ?? 'team');
+        const summaryShortcutLlmProxy = createShortcutLlmProxyClient({
+            modelRegistry: this._modelRegistry,
+            getAttribution: () => ({
+                sessionId: this.sessionManager.getSessionId(),
+                userMessageId: getActiveUserMessageId(this.messages)
+            })
+        });
         this._summaryEmitter = new ToolSummaryEmitter({
             enabled: config.enableToolSummaries ?? true,
             isFeatureEnabled: () => this.settingsManager.getToolSummariesEnabled(),
             isEligibleTool: isToolSummaryEligible,
-            getAttribution: () => getShortcutLlmProxyAttributionFromMessages(this.messages, this.sessionManager.getSessionId()),
+            getShortcutLlmProxyClient: () => summaryShortcutLlmProxy,
             onDiagnostic: (diagnostic) => {
                 this._toolSummaryDiagnostics.push(diagnostic);
                 if (this._toolSummaryDiagnostics.length > 200) {
@@ -388,7 +396,7 @@ export class AgentSession {
             this._summaryEmitter.captureArgs(event.toolCallId, event.args);
         }
         if (event.type === 'tool_execution_end') {
-            this._summaryEmitter.maybeGenerate(event.toolCallId, event.toolName, event.result, this._modelRegistry, (e) => this._emit(e));
+            this._summaryEmitter.maybeGenerate(event.toolCallId, event.toolName, event.result, (e) => this._emit(e));
         }
         // --- Step 3c: Budget token accumulation (sync) ---
         if (event.type === 'message_end' && event.message.role === 'assistant') {

package/dist/app/background/tool-summaries.d.ts CHANGED Viewed

@@ -4,7 +4,7 @@
  * Summaries are ephemeral UX — displayed to the user but never injected
  * into the LLM conversation context.
  */
-import { type ShortcutLlmProxyAttribution } from '../providers/shortcut-attribution.js';
+import { type ShortcutLlmProxyClient } from '../providers/shortcut-llm-proxy-client.js';
 export declare const TOOL_SUMMARY_MODEL = "claude-haiku-4-5-20251001";
 interface ToolSummaryInput {
     toolName: string;
@@ -13,7 +13,7 @@ interface ToolSummaryInput {
 }
 export interface ToolSummaryGenerationResult {
     summary?: string;
-    failureReason?: 'aborted' | 'http-error' | 'invalid-json' | 'missing-text' | 'network-error';
+    failureReason?: 'aborted' | 'http-error' | 'invalid-json' | 'missing-auth' | 'missing-base-url' | 'missing-text' | 'network-error';
     httpStatus?: number;
 }
 /**
@@ -22,7 +22,13 @@ export interface ToolSummaryGenerationResult {
  * Returns the summary string, or undefined if the call fails or is aborted.
  * Never throws — errors are silently swallowed since summaries are best-effort UX.
  */
-export declare function generateToolSummaryDetailed(input: ToolSummaryInput, baseUrl: string, apiKey: string, signal?: AbortSignal, maxOutputTokens?: number, attribution?: ShortcutLlmProxyAttribution): Promise<ToolSummaryGenerationResult>;
-export declare function generateToolSummary(input: ToolSummaryInput, baseUrl: string, apiKey: string, signal?: AbortSignal, maxOutputTokens?: number, attribution?: ShortcutLlmProxyAttribution): Promise<string | undefined>;
+export declare function generateToolSummaryDetailed(input: ToolSummaryInput, shortcutLlmProxy: ShortcutLlmProxyClient, options?: {
+    signal?: AbortSignal;
+    maxOutputTokens?: number;
+}): Promise<ToolSummaryGenerationResult>;
+export declare function generateToolSummary(input: ToolSummaryInput, shortcutLlmProxy: ShortcutLlmProxyClient, options?: {
+    signal?: AbortSignal;
+    maxOutputTokens?: number;
+}): Promise<string | undefined>;
 export {};
 //# sourceMappingURL=tool-summaries.d.ts.map

package/dist/app/background/tool-summaries.js CHANGED Viewed

@@ -4,9 +4,7 @@
  * Summaries are ephemeral UX — displayed to the user but never injected
  * into the LLM conversation context.
  */
-import { APP_NAME } from '../../config.js';
-import { SHORTCUT_LLM_PROXY_ENDPOINTS } from '../../endpoints.js';
-import { buildShortcutLlmProxyBodyFields, buildShortcutLlmProxyHeaders } from '../providers/shortcut-attribution.js';
+import { ShortcutLlmProxyError } from '../providers/shortcut-llm-proxy-client.js';
 import { TOOL_SUMMARY_MAX_OUTPUT_TOKENS } from '../session/tool-summary-policy.js';
 export const TOOL_SUMMARY_MODEL = 'claude-haiku-4-5-20251001';
 const MAX_ARGS_CHARS = 500;
@@ -22,45 +20,26 @@ function truncate(value, maxChars) {
  * Returns the summary string, or undefined if the call fails or is aborted.
  * Never throws — errors are silently swallowed since summaries are best-effort UX.
  */
-export async function generateToolSummaryDetailed(input, baseUrl, apiKey, signal, maxOutputTokens = TOOL_SUMMARY_MAX_OUTPUT_TOKENS, attribution) {
+export async function generateToolSummaryDetailed(input, shortcutLlmProxy, options = {}) {
+    const { signal, maxOutputTokens = TOOL_SUMMARY_MAX_OUTPUT_TOKENS } = options;
     const userContent = [
         `Tool: ${input.toolName}`,
         `Input: ${truncate(input.args, MAX_ARGS_CHARS)}`,
         `Output: ${truncate(input.result, MAX_RESULT_CHARS)}`
     ].join('\n');
-    const body = {
-        model: TOOL_SUMMARY_MODEL,
-        messages: [
-            { role: 'system', content: SUMMARY_PROMPT },
-            { role: 'human', content: userContent }
-        ],
-        max_output_tokens: maxOutputTokens,
-        effort_level: 'low',
-        thinking_type: 'off',
-        ...buildShortcutLlmProxyBodyFields(attribution)
-    };
     try {
-        const response = await fetch(`${baseUrl}${SHORTCUT_LLM_PROXY_ENDPOINTS.invoke}`, {
-            method: 'POST',
-            headers: {
-                'Content-Type': 'application/json',
-                ...buildShortcutLlmProxyHeaders(attribution),
-                Authorization: `Bearer ${apiKey}`,
-                'User-AgentController': `Mozilla/5.0 (compatible; ${APP_NAME}-coding-agent/1.0)`
-            },
-            body: JSON.stringify(body),
+        const data = await shortcutLlmProxy.invoke({
+            model: TOOL_SUMMARY_MODEL,
+            messages: [
+                { role: 'system', content: SUMMARY_PROMPT },
+                { role: 'human', content: userContent }
+            ],
+            max_output_tokens: maxOutputTokens,
+            effort_level: 'low',
+            thinking_type: 'off'
+        }, {
             signal
         });
-        if (!response.ok) {
-            return { failureReason: 'http-error', httpStatus: response.status };
-        }
-        let data;
-        try {
-            data = await response.json();
-        }
-        catch {
-            return { failureReason: 'invalid-json' };
-        }
         const summary = data.text?.trim();
         if (!summary) {
             return { failureReason: 'missing-text' };
@@ -71,11 +50,17 @@ export async function generateToolSummaryDetailed(input, baseUrl, apiKey, signal
         if (signal?.aborted || (error instanceof DOMException && error.name === 'AbortError')) {
             return { failureReason: 'aborted' };
         }
+        if (error instanceof ShortcutLlmProxyError) {
+            return {
+                failureReason: error.code,
+                httpStatus: error.status
+            };
+        }
         return { failureReason: 'network-error' };
     }
 }
-export async function generateToolSummary(input, baseUrl, apiKey, signal, maxOutputTokens = TOOL_SUMMARY_MAX_OUTPUT_TOKENS, attribution) {
-    const result = await generateToolSummaryDetailed(input, baseUrl, apiKey, signal, maxOutputTokens, attribution);
+export async function generateToolSummary(input, shortcutLlmProxy, options = {}) {
+    const result = await generateToolSummaryDetailed(input, shortcutLlmProxy, options);
     return result.summary;
 }
 //# sourceMappingURL=tool-summaries.js.map

package/dist/app/extensions/runner.js CHANGED Viewed

@@ -1,7 +1,9 @@
 /**
  * Extension runner - executes extensions and manages their lifecycle.
  */
+import { getActiveUserMessageId } from '../../core/user-message-id.js';
 import { createDefaultPermissionPolicy } from '../permissions/policy.js';
+import { createShortcutLlmProxyClient } from '../providers/shortcut-llm-proxy-client.js';
 import { runPostTurnHandlers as runPostTurnQueue } from './post-turn-queue.js';
 const noOpUIContext = {
     select: async () => undefined,
@@ -245,12 +247,20 @@ export class ExtensionRunner {
      */
     createContext() {
         const getModel = this.getModel;
+        const shortcutLlmProxy = createShortcutLlmProxyClient({
+            modelRegistry: this.modelRegistry,
+            getAttribution: () => ({
+                sessionId: this.sessionManager.getSessionId(),
+                userMessageId: getActiveUserMessageId(this.getMessagesFn())
+            })
+        });
         return {
             ui: this.uiContext,
             hasUI: this.hasUI(),
             cwd: this.cwd,
             sessionManager: this.sessionManager,
             modelRegistry: this.modelRegistry,
+            shortcutLlmProxy,
             get model() {
                 return getModel();
             },

package/dist/app/extensions/types.d.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import type { RuntimeExtensionRuntime } from '../../contracts/extension-runtime.
 import type { AgentMessage, AgentState, AgentToolResult, AgentToolUpdateCallback, RuntimePermissionPolicy, ToolDefinition, ToolExecutionContext } from '../../core/core-types.js';
 import type { CompactionPreparation, CompactionResult } from '../../core/session/compaction/index.js';
 import type { CustomMessage } from '../messages.js';
+import type { ShortcutLlmProxyClient } from '../providers/shortcut-llm-proxy-client.js';
 import type { BashOperations, BashResult } from '../tools/bash-types.js';
 export type { AgentToolResult, AgentToolUpdateCallback, ToolDefinition };
 /**
@@ -175,6 +176,8 @@ export interface ExtensionContext extends ToolExecutionContext {
     sessionManager: ExtensionSessionView;
     /** Model registry for API key resolution */
     modelRegistry: AgentModelRegistry;
+    /** Shortcut LLM proxy client for direct helper model calls */
+    shortcutLlmProxy: ShortcutLlmProxyClient;
     /** Current model (may be undefined) */
     model: Model<any> | undefined;
     /** Whether the agent is idle (not streaming) */