shortcutxl 0.3.60 → 0.3.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/BINARY-INVENTORY.json +9 -9
  2. package/CHANGELOG.md +4 -0
  3. package/dist/ai/models.js +2 -1
  4. package/dist/ai/providers/anthropic.js +18 -18
  5. package/dist/ai/providers/openai-completions.js +7 -5
  6. package/dist/ai/providers/openai-responses-shared.d.ts +1 -1
  7. package/dist/ai/providers/openai-responses-shared.js +4 -3
  8. package/dist/ai/providers/openai-responses.d.ts +2 -1
  9. package/dist/ai/providers/openai-responses.js +9 -5
  10. package/dist/ai/providers/simple-options.js +1 -0
  11. package/dist/ai/types.d.ts +8 -0
  12. package/dist/ai/utils/overflow.js +1 -1
  13. package/dist/app/agent-session.js +11 -3
  14. package/dist/app/background/tool-summaries.d.ts +10 -4
  15. package/dist/app/background/tool-summaries.js +21 -36
  16. package/dist/app/extensions/runner.js +10 -0
  17. package/dist/app/extensions/types.d.ts +3 -0
  18. package/dist/app/providers/shortcut-llm-proxy-client.d.ts +61 -0
  19. package/dist/app/providers/shortcut-llm-proxy-client.js +135 -0
  20. package/dist/app/providers/shortcut-stream.js +7 -3
  21. package/dist/app/session/tool-summary-emitter.d.ts +14 -5
  22. package/dist/app/session/tool-summary-emitter.js +33 -19
  23. package/dist/app/tools/llm-analysis.d.ts +1 -1
  24. package/dist/app/tools/llm-analysis.js +20 -38
  25. package/dist/app/tools/take-screenshot.d.ts +3 -3
  26. package/dist/app/tools/take-screenshot.js +21 -41
  27. package/dist/app/tools/task/runner.js +10 -0
  28. package/dist/cli.js +464 -223
  29. package/dist/contracts/agent-api.d.ts +3 -3
  30. package/dist/contracts/agent-session-store.d.ts +1 -0
  31. package/dist/contracts/agent-session-store.js +17 -9
  32. package/dist/contracts/model-stream.d.ts +3 -3
  33. package/dist/core/agent-snapshot-builder.js +6 -1
  34. package/dist/core/core-types.d.ts +2 -2
  35. package/dist/core/session/compaction/compaction.js +1 -1
  36. package/dist/core/session/compaction-bridge.d.ts +4 -1
  37. package/dist/core/session/compaction-bridge.js +7 -2
  38. package/dist/core/session/context-overflow.js +1 -1
  39. package/dist/core/session-schema.d.ts +1 -0
  40. package/dist/core/session-schema.js +7 -1
  41. package/dist/embedded-agent/anthropic-messages-transport.d.ts +3 -1
  42. package/dist/embedded-agent/anthropic-messages-transport.js +20 -9
  43. package/dist/embedded-agent/compaction-wiring.d.ts +4 -1
  44. package/dist/embedded-agent/compaction-wiring.js +8 -4
  45. package/dist/embedded-agent/compose.js +34 -6
  46. package/dist/embedded-agent/host-tools/build-tool-list.js +5 -5
  47. package/dist/embedded-agent/host-tools/index.d.ts +2 -0
  48. package/dist/embedded-agent/host-tools/index.js +2 -0
  49. package/dist/embedded-agent/host-tools/mode-host-tools.js +7 -17
  50. package/dist/embedded-agent/host-tools/modify-skill/contract.d.ts +75 -0
  51. package/dist/embedded-agent/host-tools/modify-skill/contract.js +233 -0
  52. package/dist/embedded-agent/host-tools/modify-skill/index.d.ts +3 -0
  53. package/dist/embedded-agent/host-tools/modify-skill/index.js +3 -0
  54. package/dist/embedded-agent/host-tools/read-skill/contract.d.ts +36 -0
  55. package/dist/embedded-agent/host-tools/read-skill/contract.js +113 -0
  56. package/dist/embedded-agent/host-tools/read-skill/index.d.ts +2 -0
  57. package/dist/embedded-agent/host-tools/read-skill/index.js +2 -0
  58. package/dist/embedded-agent/host-tools/registry.d.ts +5 -3
  59. package/dist/embedded-agent/host-tools/registry.js +7 -3
  60. package/dist/embedded-agent/host-tools/timeouts.js +2 -0
  61. package/dist/embedded-agent/host-tools/tool-names.d.ts +2 -0
  62. package/dist/embedded-agent/host-tools/tool-names.js +2 -0
  63. package/dist/embedded-agent/openai-completions-transport.d.ts +5 -1
  64. package/dist/embedded-agent/openai-completions-transport.js +19 -5
  65. package/dist/embedded-agent/openai-responses-transport.d.ts +2 -1
  66. package/dist/embedded-agent/openai-responses-transport.js +13 -3
  67. package/dist/embedded-agent/run-stats.d.ts +2 -0
  68. package/dist/embedded-agent/run-stats.js +15 -9
  69. package/dist/embedded-agent/session-entry-builder.d.ts +1 -1
  70. package/dist/embedded-agent/session-entry-builder.js +2 -2
  71. package/dist/embedded-agent/session-store.js +22 -5
  72. package/dist/embedded-agent/stream-message-state.js +1 -1
  73. package/dist/embedded-agent/stream.d.ts +2 -0
  74. package/dist/embedded-agent/stream.js +13 -8
  75. package/dist/fast-mode.d.ts +8 -0
  76. package/dist/fast-mode.js +47 -0
  77. package/dist/main.js +3 -3
  78. package/dist/model-ids.js +1 -1
  79. package/dist/shell/interactive/interactive-mode.js +11 -4
  80. package/package.json +9 -1
  81. package/plugins/shortcutxl/SKILL.md +6 -5
  82. package/plugins/shortcutxl/skills/shortcutxl/SKILL.md +6 -5
  83. package/skills/advanced-mog-api/api-reference.json +14808 -14808
  84. package/user-docs/dist/shortcutxl-docs.pdf +0 -0
  85. package/xll/ShortcutXL.xll +0 -0
  86. package/xll/python/Lib/site-packages/httpx-0.28.1.dist-info/RECORD +1 -1
  87. package/xll/python/Lib/site-packages/idna-3.18.dist-info/RECORD +1 -1
  88. package/xll/python/Lib/site-packages/pip-26.1.2.dist-info/RECORD +3 -3
  89. package/xll/python/Lib/site-packages/pywin32-311.dist-info/RECORD +2 -2
  90. package/xll/python/Scripts/httpx.exe +0 -0
  91. package/xll/python/Scripts/idna.exe +0 -0
  92. package/xll/python/Scripts/pip.exe +0 -0
  93. package/xll/python/Scripts/pip3.13.exe +0 -0
  94. package/xll/python/Scripts/pip3.exe +0 -0
  95. package/xll/python/Scripts/pywin32_postinstall.exe +0 -0
  96. package/xll/python/Scripts/pywin32_testall.exe +0 -0
  97. package/dist/app/providers/shortcut-attribution.d.ts +0 -13
  98. package/dist/app/providers/shortcut-attribution.js +0 -24
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "schemaVersion": 1,
3
- "generatedAt": "2026-06-04T07:10:58.989Z",
3
+ "generatedAt": "2026-06-05T01:51:32.781Z",
4
4
  "package": "shortcutxl",
5
5
  "binaryExtensions": [
6
6
  ".dll",
@@ -11,7 +11,7 @@
11
11
  "files": [
12
12
  {
13
13
  "path": "xll/ShortcutXL.xll",
14
- "sha256": "0f4f7b363b32871ef4afd57fa891e740f91ca5ab60b8d452c36ecf334f443033",
14
+ "sha256": "7c86275b7bfd0b33b422de02a5d46e11004462b545d811bd53b7590a0b2891f9",
15
15
  "source": "ShortcutXL native XLL build",
16
16
  "version": "package",
17
17
  "builtBy": "shortcut",
@@ -523,7 +523,7 @@
523
523
  },
524
524
  {
525
525
  "path": "xll/python/Scripts/httpx.exe",
526
- "sha256": "0ab1e42994f3ac9177d03abcf014f3eff940ca307b923ddf2cb2036b2c9eaadf",
526
+ "sha256": "6443bf0359e479a00549f26cfaaecebec067687bd721258108880e3e4b8c0cf9",
527
527
  "source": "httpx console launcher installed into embedded Python",
528
528
  "version": "see packaged httpx distribution",
529
529
  "builtBy": "third-party",
@@ -531,7 +531,7 @@
531
531
  },
532
532
  {
533
533
  "path": "xll/python/Scripts/idna.exe",
534
- "sha256": "8b8baca0e902b3bd1757d5d14155cb1178e8825c232d67cd7b0c7e120c6fa5ff",
534
+ "sha256": "eb0bee35c9a602f93ee605c2a6d6df3069be9e2be8a4b5131b4e7599a438d168",
535
535
  "source": "Python package console launcher installed into embedded Python",
536
536
  "version": "see owning Python package metadata in site-packages",
537
537
  "builtBy": "third-party",
@@ -539,7 +539,7 @@
539
539
  },
540
540
  {
541
541
  "path": "xll/python/Scripts/pip.exe",
542
- "sha256": "928216be655a94bb860d00a6a4b420d26f37fa5daa13f6d80a09aa90d87ba5f1",
542
+ "sha256": "1c0d88a63c83e6bb9ce9480ab1e5b1240cf391656806130748441871958fe337",
543
543
  "source": "pip console launcher installed into embedded Python",
544
544
  "version": "see packaged pip distribution",
545
545
  "builtBy": "third-party",
@@ -547,7 +547,7 @@
547
547
  },
548
548
  {
549
549
  "path": "xll/python/Scripts/pip3.13.exe",
550
- "sha256": "928216be655a94bb860d00a6a4b420d26f37fa5daa13f6d80a09aa90d87ba5f1",
550
+ "sha256": "1c0d88a63c83e6bb9ce9480ab1e5b1240cf391656806130748441871958fe337",
551
551
  "source": "pip console launcher installed into embedded Python",
552
552
  "version": "see packaged pip distribution",
553
553
  "builtBy": "third-party",
@@ -555,7 +555,7 @@
555
555
  },
556
556
  {
557
557
  "path": "xll/python/Scripts/pip3.exe",
558
- "sha256": "928216be655a94bb860d00a6a4b420d26f37fa5daa13f6d80a09aa90d87ba5f1",
558
+ "sha256": "1c0d88a63c83e6bb9ce9480ab1e5b1240cf391656806130748441871958fe337",
559
559
  "source": "pip console launcher installed into embedded Python",
560
560
  "version": "see packaged pip distribution",
561
561
  "builtBy": "third-party",
@@ -563,7 +563,7 @@
563
563
  },
564
564
  {
565
565
  "path": "xll/python/Scripts/pywin32_postinstall.exe",
566
- "sha256": "cd410066e88131c0f203025dc86306a2f6498ed54abe9ff27ef349f7974c3521",
566
+ "sha256": "33c9bf469edc79cbdea68966e2b50eb15db5ba63278fec1befd274f00ef26bf4",
567
567
  "source": "Python package console launcher installed into embedded Python",
568
568
  "version": "see owning Python package metadata in site-packages",
569
569
  "builtBy": "third-party",
@@ -571,7 +571,7 @@
571
571
  },
572
572
  {
573
573
  "path": "xll/python/Scripts/pywin32_testall.exe",
574
- "sha256": "af93cdcdd7dfeda2a71bedb565329960ca0d8621cb3dd36365ce197a4c44968d",
574
+ "sha256": "f4ab6b20be97a838b4e2eb369c603136e8bcf7d8b1829dd30e7fd7a681b48e65",
575
575
  "source": "Python package console launcher installed into embedded Python",
576
576
  "version": "see owning Python package metadata in site-packages",
577
577
  "builtBy": "third-party",
package/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.3.61]
4
+
5
+ - **ShortcutXL plugin guidance** - Refreshed the bundled Claude and Codex ShortcutXL plugin guidance.
6
+
3
7
  ## [0.3.60]
4
8
 
5
9
  - **Safer shell approvals** - High-risk Outlook-style send and delete shell operations now require explicit one-time approval even when runtime shell bypass is enabled.
package/dist/ai/models.js CHANGED
@@ -22,7 +22,8 @@ export function getModels(provider) {
22
22
  : [];
23
23
  }
24
24
  export function calculateCost(model, usage) {
25
- usage.cost.input = (model.cost.input / 1000000) * usage.input;
25
+ const uncachedInput = Math.max(0, usage.input - usage.cacheRead - usage.cacheWrite);
26
+ usage.cost.input = (model.cost.input / 1000000) * uncachedInput;
26
27
  usage.cost.output = (model.cost.output / 1000000) * usage.output;
27
28
  usage.cost.cacheRead = (model.cost.cacheRead / 1000000) * usage.cacheRead;
28
29
  usage.cost.cacheWrite = (model.cost.cacheWrite / 1000000) * usage.cacheWrite;
@@ -113,26 +113,26 @@ export const streamAnthropic = (model, context, options) => {
113
113
  };
114
114
  try {
115
115
  const apiKey = options?.apiKey ?? getEnvApiKey(model.provider) ?? '';
116
- const client = createClient(model, apiKey, options?.interleavedThinking ?? true, options?.headers);
116
+ const client = createClient(model, apiKey, options?.interleavedThinking ?? true, options?.headers, options?.fetchOptions);
117
117
  const params = buildParams(model, context, options);
118
118
  options?.onPayload?.(params);
119
119
  const anthropicStream = client.messages.stream({ ...params, stream: true }, { signal: options?.signal });
120
120
  stream.push({ type: 'start', partial: output });
121
121
  const blocks = output.content;
122
+ let uncachedInputTokens = 0;
122
123
  for await (const event of anthropicStream) {
123
124
  if (event.type === 'message_start') {
124
125
  // Capture initial token usage from message_start event
125
126
  // This ensures we have input token counts even if the stream is aborted early
126
- output.usage.input = event.message.usage.input_tokens || 0;
127
+ uncachedInputTokens = event.message.usage.input_tokens || 0;
127
128
  output.usage.output = event.message.usage.output_tokens || 0;
128
129
  output.usage.cacheRead = event.message.usage.cache_read_input_tokens || 0;
129
130
  output.usage.cacheWrite = event.message.usage.cache_creation_input_tokens || 0;
130
- // Anthropic doesn't provide total_tokens, compute from components
131
- output.usage.totalTokens =
132
- output.usage.input +
133
- output.usage.output +
134
- output.usage.cacheRead +
135
- output.usage.cacheWrite;
131
+ // Anthropic doesn't provide total_tokens; usage.input is normalized
132
+ // to total input, including cache buckets.
133
+ output.usage.input =
134
+ uncachedInputTokens + output.usage.cacheRead + output.usage.cacheWrite;
135
+ output.usage.totalTokens = output.usage.input + output.usage.output;
136
136
  calculateCost(model, output.usage);
137
137
  }
138
138
  else if (event.type === 'content_block_start') {
@@ -284,9 +284,6 @@ export const streamAnthropic = (model, context, options) => {
284
284
  }
285
285
  // Only update usage fields if present (not null).
286
286
  // Preserves input_tokens from message_start when proxies omit it in message_delta.
287
- if (event.usage.input_tokens != null) {
288
- output.usage.input = event.usage.input_tokens;
289
- }
290
287
  if (event.usage.output_tokens != null) {
291
288
  output.usage.output = event.usage.output_tokens;
292
289
  }
@@ -296,12 +293,14 @@ export const streamAnthropic = (model, context, options) => {
296
293
  if (event.usage.cache_creation_input_tokens != null) {
297
294
  output.usage.cacheWrite = event.usage.cache_creation_input_tokens;
298
295
  }
299
- // Anthropic doesn't provide total_tokens, compute from components
300
- output.usage.totalTokens =
301
- output.usage.input +
302
- output.usage.output +
303
- output.usage.cacheRead +
304
- output.usage.cacheWrite;
296
+ if (event.usage.input_tokens != null) {
297
+ uncachedInputTokens = event.usage.input_tokens;
298
+ }
299
+ // Anthropic doesn't provide total_tokens; usage.input is normalized
300
+ // to total input, including cache buckets.
301
+ output.usage.input =
302
+ uncachedInputTokens + output.usage.cacheRead + output.usage.cacheWrite;
303
+ output.usage.totalTokens = output.usage.input + output.usage.output;
305
304
  calculateCost(model, output.usage);
306
305
  }
307
306
  }
@@ -388,7 +387,7 @@ export const streamSimpleAnthropic = (model, context, options) => {
388
387
  function isOAuthToken(apiKey) {
389
388
  return apiKey.includes('sk-ant-oat');
390
389
  }
391
- function createClient(model, apiKey, interleavedThinking, optionsHeaders) {
390
+ function createClient(model, apiKey, interleavedThinking, optionsHeaders, fetchOptions) {
392
391
  // Adaptive thinking models (Opus 4.6, Sonnet 4.6) have interleaved thinking built-in.
393
392
  // The beta header is deprecated on Opus 4.6 and redundant on Sonnet 4.6, so skip it.
394
393
  const needsInterleavedBeta = interleavedThinking && !supportsAdaptiveThinking(model.id);
@@ -403,6 +402,7 @@ function createClient(model, apiKey, interleavedThinking, optionsHeaders) {
403
402
  const client = new Anthropic({
404
403
  apiKey,
405
404
  baseURL: model.baseUrl,
405
+ fetchOptions: fetchOptions,
406
406
  defaultHeaders: mergeHeaders({
407
407
  accept: 'application/json',
408
408
  'anthropic-beta': betaFeatures.join(',')
@@ -64,7 +64,7 @@ export const streamOpenAICompletions = (model, context, options) => {
64
64
  };
65
65
  try {
66
66
  const apiKey = options?.apiKey || getEnvApiKey(model.provider) || '';
67
- const client = createClient(model, context, apiKey, options?.headers);
67
+ const client = createClient(model, context, apiKey, options?.headers, options?.fetchOptions);
68
68
  const params = buildParams(model, context, options);
69
69
  options?.onPayload?.(params);
70
70
  const openaiStream = await client.chat.completions.create(params, {
@@ -108,17 +108,18 @@ export const streamOpenAICompletions = (model, context, options) => {
108
108
  if (chunk.usage) {
109
109
  const cachedTokens = chunk.usage.prompt_tokens_details?.cached_tokens || 0;
110
110
  const reasoningTokens = chunk.usage.completion_tokens_details?.reasoning_tokens || 0;
111
- const input = (chunk.usage.prompt_tokens || 0) - cachedTokens;
111
+ const input = chunk.usage.prompt_tokens || 0;
112
112
  const outputTokens = (chunk.usage.completion_tokens || 0) + reasoningTokens;
113
113
  output.usage = {
114
- // OpenAI includes cached tokens in prompt_tokens, so subtract to get non-cached input
114
+ // OpenAI includes cached tokens in prompt_tokens; cost calculation
115
+ // derives uncached input from input - cacheRead - cacheWrite.
115
116
  input,
116
117
  output: outputTokens,
117
118
  cacheRead: cachedTokens,
118
119
  cacheWrite: 0,
119
120
  // Compute totalTokens ourselves since we add reasoning_tokens to output
120
121
  // and some providers (e.g., Groq) don't include them in total_tokens
121
- totalTokens: input + outputTokens + cachedTokens,
122
+ totalTokens: input + outputTokens,
122
123
  cost: {
123
124
  input: 0,
124
125
  output: 0,
@@ -287,7 +288,7 @@ export const streamSimpleOpenAICompletions = (model, context, options) => {
287
288
  toolChoice
288
289
  });
289
290
  };
290
- function createClient(model, context, apiKey, optionsHeaders) {
291
+ function createClient(model, context, apiKey, optionsHeaders, fetchOptions) {
291
292
  if (!apiKey) {
292
293
  if (!process.env.OPENAI_API_KEY) {
293
294
  throw new Error('OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.');
@@ -302,6 +303,7 @@ function createClient(model, context, apiKey, optionsHeaders) {
302
303
  return new OpenAI({
303
304
  apiKey,
304
305
  baseURL: model.baseUrl,
306
+ fetchOptions: fetchOptions,
305
307
  defaultHeaders: headers
306
308
  });
307
309
  }
@@ -3,7 +3,7 @@ import type { Api, AssistantMessage, Context, Model, Tool, Usage } from '../type
3
3
  import type { AssistantMessageEventStream } from '../utils/event-stream.js';
4
4
  export interface OpenAIResponsesStreamOptions {
5
5
  serviceTier?: ResponseCreateParamsStreaming['service_tier'];
6
- applyServiceTierPricing?: (usage: Usage, serviceTier: ResponseCreateParamsStreaming['service_tier'] | undefined) => void;
6
+ applyServiceTierPricing?: (usage: Usage, serviceTier: ResponseCreateParamsStreaming['service_tier'] | undefined, model: Pick<Model<Api>, 'id'>) => void;
7
7
  }
8
8
  export interface ConvertResponsesMessagesOptions {
9
9
  includeSystemPrompt?: boolean;
@@ -385,8 +385,9 @@ export async function processResponsesStream(openaiStream, output, stream, model
385
385
  if (response?.usage) {
386
386
  const cachedTokens = response.usage.input_tokens_details?.cached_tokens || 0;
387
387
  output.usage = {
388
- // OpenAI includes cached tokens in input_tokens, so subtract to get non-cached input
389
- input: (response.usage.input_tokens || 0) - cachedTokens,
388
+ // OpenAI includes cached tokens in input_tokens; cost calculation
389
+ // derives uncached input from input - cacheRead - cacheWrite.
390
+ input: response.usage.input_tokens || 0,
390
391
  output: response.usage.output_tokens || 0,
391
392
  cacheRead: cachedTokens,
392
393
  cacheWrite: 0,
@@ -397,7 +398,7 @@ export async function processResponsesStream(openaiStream, output, stream, model
397
398
  calculateCost(model, output.usage);
398
399
  if (options?.applyServiceTierPricing) {
399
400
  const serviceTier = response?.service_tier ?? options.serviceTier;
400
- options.applyServiceTierPricing(output.usage, serviceTier);
401
+ options.applyServiceTierPricing(output.usage, serviceTier, model);
401
402
  }
402
403
  // Map status to stop reason
403
404
  output.stopReason = mapStopReason(response?.status);
@@ -1,5 +1,5 @@
1
1
  import type { ResponseCreateParamsStreaming } from 'openai/resources/responses/responses.js';
2
- import type { SimpleStreamOptions, StreamFunction, StreamOptions } from '../types.js';
2
+ import type { Api, Model, SimpleStreamOptions, StreamFunction, StreamOptions } from '../types.js';
3
3
  export interface OpenAIResponsesOptions extends StreamOptions {
4
4
  reasoningEffort?: 'minimal' | 'low' | 'medium' | 'high' | 'xhigh';
5
5
  reasoningSummary?: 'auto' | 'detailed' | 'concise' | null;
@@ -10,4 +10,5 @@ export interface OpenAIResponsesOptions extends StreamOptions {
10
10
  */
11
11
  export declare const streamOpenAIResponses: StreamFunction<'openai-responses', OpenAIResponsesOptions>;
12
12
  export declare const streamSimpleOpenAIResponses: StreamFunction<'openai-responses', SimpleStreamOptions>;
13
+ export declare function getServiceTierCostMultiplier(serviceTier: ResponseCreateParamsStreaming['service_tier'] | undefined, model: Pick<Model<Api>, 'id'>): number;
13
14
  //# sourceMappingURL=openai-responses.d.ts.map
@@ -1,4 +1,5 @@
1
1
  import OpenAI from 'openai';
2
+ import { SHORTCUT_MODEL_ID } from '../../model-ids.js';
2
3
  import { getEnvApiKey } from '../env-api-keys.js';
3
4
  import { supportsXhigh } from '../models.js';
4
5
  import { AssistantMessageEventStream } from '../utils/event-stream.js';
@@ -58,7 +59,7 @@ export const streamOpenAIResponses = (model, context, options) => {
58
59
  try {
59
60
  // Create OpenAI client
60
61
  const apiKey = options?.apiKey || getEnvApiKey(model.provider) || '';
61
- const client = createClient(model, context, apiKey, options?.headers);
62
+ const client = createClient(model, context, apiKey, options?.headers, options?.fetchOptions);
62
63
  const params = buildParams(model, context, options);
63
64
  options?.onPayload?.(params);
64
65
  const openaiStream = await client.responses.create(params, options?.signal ? { signal: options.signal } : undefined);
@@ -101,7 +102,7 @@ export const streamSimpleOpenAIResponses = (model, context, options) => {
101
102
  reasoningEffort
102
103
  });
103
104
  };
104
- function createClient(model, context, apiKey, optionsHeaders) {
105
+ function createClient(model, context, apiKey, optionsHeaders, fetchOptions) {
105
106
  if (!apiKey) {
106
107
  if (!process.env.OPENAI_API_KEY) {
107
108
  throw new Error('OpenAI API key is required. Set OPENAI_API_KEY environment variable or pass it as an argument.');
@@ -116,6 +117,7 @@ function createClient(model, context, apiKey, optionsHeaders) {
116
117
  return new OpenAI({
117
118
  apiKey,
118
119
  baseURL: model.baseUrl,
120
+ fetchOptions: fetchOptions,
119
121
  defaultHeaders: headers
120
122
  });
121
123
  }
@@ -167,18 +169,20 @@ function buildParams(model, context, options) {
167
169
  }
168
170
  return params;
169
171
  }
170
- function getServiceTierCostMultiplier(serviceTier) {
172
+ export function getServiceTierCostMultiplier(serviceTier, model) {
171
173
  switch (serviceTier) {
172
174
  case 'flex':
173
175
  return 0.5;
174
176
  case 'priority':
177
+ if (model.id === SHORTCUT_MODEL_ID.Gpt55)
178
+ return 2.5;
175
179
  return 2;
176
180
  default:
177
181
  return 1;
178
182
  }
179
183
  }
180
- function applyServiceTierPricing(usage, serviceTier) {
181
- const multiplier = getServiceTierCostMultiplier(serviceTier);
184
+ function applyServiceTierPricing(usage, serviceTier, model) {
185
+ const multiplier = getServiceTierCostMultiplier(serviceTier, model);
182
186
  if (multiplier === 1)
183
187
  return;
184
188
  usage.cost.input *= multiplier;
@@ -5,6 +5,7 @@ export function buildBaseOptions(model, options, apiKey) {
5
5
  signal: options?.signal,
6
6
  apiKey: apiKey || options?.apiKey,
7
7
  cacheRetention: options?.cacheRetention,
8
+ fetchOptions: options?.fetchOptions,
8
9
  sessionId: options?.sessionId,
9
10
  headers: options?.headers,
10
11
  onPayload: options?.onPayload,
@@ -32,6 +32,13 @@ export interface StreamOptions {
32
32
  * Optional callback for inspecting provider payloads before sending.
33
33
  */
34
34
  onPayload?: (payload: unknown) => void;
35
+ /**
36
+ * Optional fetch settings for provider SDK calls.
37
+ *
38
+ * This is primarily used by first-party gateway transports that need browser
39
+ * cookies for auth; direct provider calls should normally leave it unset.
40
+ */
41
+ fetchOptions?: RequestInit;
35
42
  /**
36
43
  * Optional custom HTTP headers to include in API requests.
37
44
  * Merged with provider defaults; can override default headers.
@@ -95,6 +102,7 @@ export interface ToolCall {
95
102
  thoughtSignature?: string;
96
103
  }
97
104
  export interface Usage {
105
+ /** Total input tokens, including cache read/write buckets when present. */
98
106
  input: number;
99
107
  output: number;
100
108
  cacheRead: number;
@@ -98,7 +98,7 @@ export function isContextOverflow(message, contextWindow) {
98
98
  }
99
99
  // Case 2: Silent overflow (z.ai style) - successful but usage exceeds context
100
100
  if (contextWindow && message.stopReason === 'stop') {
101
- const inputTokens = message.usage.input + message.usage.cacheRead;
101
+ const inputTokens = message.usage.input;
102
102
  if (inputTokens > contextWindow) {
103
103
  return true;
104
104
  }
@@ -17,9 +17,10 @@ import { createCompactionActions, createErrorRecoveryActions, triggerCompactionI
17
17
  import { isContextOverflow } from '../core/session/context-overflow.js';
18
18
  import { SessionCompaction } from '../core/session/session-compaction.js';
19
19
  import { classifyError, SessionErrorRecovery } from '../core/session/session-error-recovery.js';
20
+ import { getActiveUserMessageId } from '../core/user-message-id.js';
20
21
  import { isShortcutFastModeModel } from '../model-ids.js';
21
22
  import { formatFileUploadsContext, mergeFileUploads } from './file-uploads.js';
22
- import { getShortcutLlmProxyAttributionFromMessages } from './providers/shortcut-attribution.js';
23
+ import { createShortcutLlmProxyClient } from './providers/shortcut-llm-proxy-client.js';
23
24
  import { expandPromptTemplate } from './resources/prompt-template-expansion.js';
24
25
  import { parseSkillBlock } from './resources/skill-block.js';
25
26
  import { AUTONOMOUS_STATE_CUSTOM_TYPE, buildAutonomousRefreshPrompt, extractProgressFolder, inferAutonomousStateFromEntries } from './session/autonomous-workflow.js';
@@ -137,11 +138,18 @@ export class AgentSession {
137
138
  this._fastModeDisabledReason = this._fastModeAllowed
138
139
  ? undefined
139
140
  : (config.fastModeDisabledReason ?? 'team');
141
+ const summaryShortcutLlmProxy = createShortcutLlmProxyClient({
142
+ modelRegistry: this._modelRegistry,
143
+ getAttribution: () => ({
144
+ sessionId: this.sessionManager.getSessionId(),
145
+ userMessageId: getActiveUserMessageId(this.messages)
146
+ })
147
+ });
140
148
  this._summaryEmitter = new ToolSummaryEmitter({
141
149
  enabled: config.enableToolSummaries ?? true,
142
150
  isFeatureEnabled: () => this.settingsManager.getToolSummariesEnabled(),
143
151
  isEligibleTool: isToolSummaryEligible,
144
- getAttribution: () => getShortcutLlmProxyAttributionFromMessages(this.messages, this.sessionManager.getSessionId()),
152
+ getShortcutLlmProxyClient: () => summaryShortcutLlmProxy,
145
153
  onDiagnostic: (diagnostic) => {
146
154
  this._toolSummaryDiagnostics.push(diagnostic);
147
155
  if (this._toolSummaryDiagnostics.length > 200) {
@@ -388,7 +396,7 @@ export class AgentSession {
388
396
  this._summaryEmitter.captureArgs(event.toolCallId, event.args);
389
397
  }
390
398
  if (event.type === 'tool_execution_end') {
391
- this._summaryEmitter.maybeGenerate(event.toolCallId, event.toolName, event.result, this._modelRegistry, (e) => this._emit(e));
399
+ this._summaryEmitter.maybeGenerate(event.toolCallId, event.toolName, event.result, (e) => this._emit(e));
392
400
  }
393
401
  // --- Step 3c: Budget token accumulation (sync) ---
394
402
  if (event.type === 'message_end' && event.message.role === 'assistant') {
@@ -4,7 +4,7 @@
4
4
  * Summaries are ephemeral UX — displayed to the user but never injected
5
5
  * into the LLM conversation context.
6
6
  */
7
- import { type ShortcutLlmProxyAttribution } from '../providers/shortcut-attribution.js';
7
+ import { type ShortcutLlmProxyClient } from '../providers/shortcut-llm-proxy-client.js';
8
8
  export declare const TOOL_SUMMARY_MODEL = "claude-haiku-4-5-20251001";
9
9
  interface ToolSummaryInput {
10
10
  toolName: string;
@@ -13,7 +13,7 @@ interface ToolSummaryInput {
13
13
  }
14
14
  export interface ToolSummaryGenerationResult {
15
15
  summary?: string;
16
- failureReason?: 'aborted' | 'http-error' | 'invalid-json' | 'missing-text' | 'network-error';
16
+ failureReason?: 'aborted' | 'http-error' | 'invalid-json' | 'missing-auth' | 'missing-base-url' | 'missing-text' | 'network-error';
17
17
  httpStatus?: number;
18
18
  }
19
19
  /**
@@ -22,7 +22,13 @@ export interface ToolSummaryGenerationResult {
22
22
  * Returns the summary string, or undefined if the call fails or is aborted.
23
23
  * Never throws — errors are silently swallowed since summaries are best-effort UX.
24
24
  */
25
- export declare function generateToolSummaryDetailed(input: ToolSummaryInput, baseUrl: string, apiKey: string, signal?: AbortSignal, maxOutputTokens?: number, attribution?: ShortcutLlmProxyAttribution): Promise<ToolSummaryGenerationResult>;
26
- export declare function generateToolSummary(input: ToolSummaryInput, baseUrl: string, apiKey: string, signal?: AbortSignal, maxOutputTokens?: number, attribution?: ShortcutLlmProxyAttribution): Promise<string | undefined>;
25
+ export declare function generateToolSummaryDetailed(input: ToolSummaryInput, shortcutLlmProxy: ShortcutLlmProxyClient, options?: {
26
+ signal?: AbortSignal;
27
+ maxOutputTokens?: number;
28
+ }): Promise<ToolSummaryGenerationResult>;
29
+ export declare function generateToolSummary(input: ToolSummaryInput, shortcutLlmProxy: ShortcutLlmProxyClient, options?: {
30
+ signal?: AbortSignal;
31
+ maxOutputTokens?: number;
32
+ }): Promise<string | undefined>;
27
33
  export {};
28
34
  //# sourceMappingURL=tool-summaries.d.ts.map
@@ -4,9 +4,7 @@
4
4
  * Summaries are ephemeral UX — displayed to the user but never injected
5
5
  * into the LLM conversation context.
6
6
  */
7
- import { APP_NAME } from '../../config.js';
8
- import { SHORTCUT_LLM_PROXY_ENDPOINTS } from '../../endpoints.js';
9
- import { buildShortcutLlmProxyBodyFields, buildShortcutLlmProxyHeaders } from '../providers/shortcut-attribution.js';
7
+ import { ShortcutLlmProxyError } from '../providers/shortcut-llm-proxy-client.js';
10
8
  import { TOOL_SUMMARY_MAX_OUTPUT_TOKENS } from '../session/tool-summary-policy.js';
11
9
  export const TOOL_SUMMARY_MODEL = 'claude-haiku-4-5-20251001';
12
10
  const MAX_ARGS_CHARS = 500;
@@ -22,45 +20,26 @@ function truncate(value, maxChars) {
22
20
  * Returns the summary string, or undefined if the call fails or is aborted.
23
21
  * Never throws — errors are silently swallowed since summaries are best-effort UX.
24
22
  */
25
- export async function generateToolSummaryDetailed(input, baseUrl, apiKey, signal, maxOutputTokens = TOOL_SUMMARY_MAX_OUTPUT_TOKENS, attribution) {
23
+ export async function generateToolSummaryDetailed(input, shortcutLlmProxy, options = {}) {
24
+ const { signal, maxOutputTokens = TOOL_SUMMARY_MAX_OUTPUT_TOKENS } = options;
26
25
  const userContent = [
27
26
  `Tool: ${input.toolName}`,
28
27
  `Input: ${truncate(input.args, MAX_ARGS_CHARS)}`,
29
28
  `Output: ${truncate(input.result, MAX_RESULT_CHARS)}`
30
29
  ].join('\n');
31
- const body = {
32
- model: TOOL_SUMMARY_MODEL,
33
- messages: [
34
- { role: 'system', content: SUMMARY_PROMPT },
35
- { role: 'human', content: userContent }
36
- ],
37
- max_output_tokens: maxOutputTokens,
38
- effort_level: 'low',
39
- thinking_type: 'off',
40
- ...buildShortcutLlmProxyBodyFields(attribution)
41
- };
42
30
  try {
43
- const response = await fetch(`${baseUrl}${SHORTCUT_LLM_PROXY_ENDPOINTS.invoke}`, {
44
- method: 'POST',
45
- headers: {
46
- 'Content-Type': 'application/json',
47
- ...buildShortcutLlmProxyHeaders(attribution),
48
- Authorization: `Bearer ${apiKey}`,
49
- 'User-AgentController': `Mozilla/5.0 (compatible; ${APP_NAME}-coding-agent/1.0)`
50
- },
51
- body: JSON.stringify(body),
31
+ const data = await shortcutLlmProxy.invoke({
32
+ model: TOOL_SUMMARY_MODEL,
33
+ messages: [
34
+ { role: 'system', content: SUMMARY_PROMPT },
35
+ { role: 'human', content: userContent }
36
+ ],
37
+ max_output_tokens: maxOutputTokens,
38
+ effort_level: 'low',
39
+ thinking_type: 'off'
40
+ }, {
52
41
  signal
53
42
  });
54
- if (!response.ok) {
55
- return { failureReason: 'http-error', httpStatus: response.status };
56
- }
57
- let data;
58
- try {
59
- data = await response.json();
60
- }
61
- catch {
62
- return { failureReason: 'invalid-json' };
63
- }
64
43
  const summary = data.text?.trim();
65
44
  if (!summary) {
66
45
  return { failureReason: 'missing-text' };
@@ -71,11 +50,17 @@ export async function generateToolSummaryDetailed(input, baseUrl, apiKey, signal
71
50
  if (signal?.aborted || (error instanceof DOMException && error.name === 'AbortError')) {
72
51
  return { failureReason: 'aborted' };
73
52
  }
53
+ if (error instanceof ShortcutLlmProxyError) {
54
+ return {
55
+ failureReason: error.code,
56
+ httpStatus: error.status
57
+ };
58
+ }
74
59
  return { failureReason: 'network-error' };
75
60
  }
76
61
  }
77
- export async function generateToolSummary(input, baseUrl, apiKey, signal, maxOutputTokens = TOOL_SUMMARY_MAX_OUTPUT_TOKENS, attribution) {
78
- const result = await generateToolSummaryDetailed(input, baseUrl, apiKey, signal, maxOutputTokens, attribution);
62
+ export async function generateToolSummary(input, shortcutLlmProxy, options = {}) {
63
+ const result = await generateToolSummaryDetailed(input, shortcutLlmProxy, options);
79
64
  return result.summary;
80
65
  }
81
66
  //# sourceMappingURL=tool-summaries.js.map
@@ -1,7 +1,9 @@
1
1
  /**
2
2
  * Extension runner - executes extensions and manages their lifecycle.
3
3
  */
4
+ import { getActiveUserMessageId } from '../../core/user-message-id.js';
4
5
  import { createDefaultPermissionPolicy } from '../permissions/policy.js';
6
+ import { createShortcutLlmProxyClient } from '../providers/shortcut-llm-proxy-client.js';
5
7
  import { runPostTurnHandlers as runPostTurnQueue } from './post-turn-queue.js';
6
8
  const noOpUIContext = {
7
9
  select: async () => undefined,
@@ -245,12 +247,20 @@ export class ExtensionRunner {
245
247
  */
246
248
  createContext() {
247
249
  const getModel = this.getModel;
250
+ const shortcutLlmProxy = createShortcutLlmProxyClient({
251
+ modelRegistry: this.modelRegistry,
252
+ getAttribution: () => ({
253
+ sessionId: this.sessionManager.getSessionId(),
254
+ userMessageId: getActiveUserMessageId(this.getMessagesFn())
255
+ })
256
+ });
248
257
  return {
249
258
  ui: this.uiContext,
250
259
  hasUI: this.hasUI(),
251
260
  cwd: this.cwd,
252
261
  sessionManager: this.sessionManager,
253
262
  modelRegistry: this.modelRegistry,
263
+ shortcutLlmProxy,
254
264
  get model() {
255
265
  return getModel();
256
266
  },
@@ -14,6 +14,7 @@ import type { RuntimeExtensionRuntime } from '../../contracts/extension-runtime.
14
14
  import type { AgentMessage, AgentState, AgentToolResult, AgentToolUpdateCallback, RuntimePermissionPolicy, ToolDefinition, ToolExecutionContext } from '../../core/core-types.js';
15
15
  import type { CompactionPreparation, CompactionResult } from '../../core/session/compaction/index.js';
16
16
  import type { CustomMessage } from '../messages.js';
17
+ import type { ShortcutLlmProxyClient } from '../providers/shortcut-llm-proxy-client.js';
17
18
  import type { BashOperations, BashResult } from '../tools/bash-types.js';
18
19
  export type { AgentToolResult, AgentToolUpdateCallback, ToolDefinition };
19
20
  /**
@@ -175,6 +176,8 @@ export interface ExtensionContext extends ToolExecutionContext {
175
176
  sessionManager: ExtensionSessionView;
176
177
  /** Model registry for API key resolution */
177
178
  modelRegistry: AgentModelRegistry;
179
+ /** Shortcut LLM proxy client for direct helper model calls */
180
+ shortcutLlmProxy: ShortcutLlmProxyClient;
178
181
  /** Current model (may be undefined) */
179
182
  model: Model<any> | undefined;
180
183
  /** Whether the agent is idle (not streaming) */