@ljoukov/llm 7.0.1 → 7.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -491,6 +491,18 @@ const { value } = await generateJson({
491
491
  console.log(value.ok, value.message);
492
492
  ```
493
493
 
494
+ For Gemini calls that need a specific thinking budget, pass `thinkingBudget`. This overrides the model default and
495
+ `thinkingLevel` for Gemini requests:
496
+
497
+ ```ts
498
+ const { value } = await generateJson({
499
+ model: "gemini-flash-latest",
500
+ input: "Return a JSON object with ok=true and message='hello'.",
501
+ schema,
502
+ thinkingBudget: 0,
503
+ });
504
+ ```
505
+
494
506
  ### Streaming JSON outputs
495
507
 
496
508
  Use `streamJson()` to stream thought deltas and get best-effort partial JSON snapshots while the model is still
package/dist/index.cjs CHANGED
@@ -35,6 +35,7 @@ __export(index_exports, {
35
35
  CODEX_APPLY_PATCH_JSON_TOOL_DESCRIPTION: () => CODEX_APPLY_PATCH_JSON_TOOL_DESCRIPTION,
36
36
  CODEX_APPLY_PATCH_LARK_GRAMMAR: () => CODEX_APPLY_PATCH_LARK_GRAMMAR,
37
37
  DEFAULT_FILE_TTL_SECONDS: () => DEFAULT_FILE_TTL_SECONDS,
38
+ DEFAULT_SIGNED_URL_TTL_SECONDS: () => DEFAULT_SIGNED_URL_TTL_SECONDS,
38
39
  FIREWORKS_DEFAULT_GLM_MODEL: () => FIREWORKS_DEFAULT_GLM_MODEL,
39
40
  FIREWORKS_DEFAULT_GPT_OSS_120B_MODEL: () => FIREWORKS_DEFAULT_GPT_OSS_120B_MODEL,
40
41
  FIREWORKS_DEFAULT_KIMI_MODEL: () => FIREWORKS_DEFAULT_KIMI_MODEL,
@@ -3373,6 +3374,7 @@ var import_promises3 = require("stream/promises");
3373
3374
  var import_storage = require("@google-cloud/storage");
3374
3375
  var import_mime = __toESM(require("mime"), 1);
3375
3376
  var DEFAULT_FILE_TTL_SECONDS = 48 * 60 * 60;
3377
+ var DEFAULT_SIGNED_URL_TTL_SECONDS = 6 * 60 * 60;
3376
3378
  var GEMINI_FILE_POLL_INTERVAL_MS = 1e3;
3377
3379
  var GEMINI_FILE_POLL_TIMEOUT_MS = 6e4;
3378
3380
  var FILES_TEMP_ROOT = import_node_path4.default.join(import_node_os3.default.tmpdir(), "ljoukov-llm-files");
@@ -4353,7 +4355,7 @@ async function getCanonicalFileSignedUrl(options) {
4353
4355
  const [signedUrl] = await getStorageClient().bucket(metadata.bucketName).file(metadata.objectName).getSignedUrl({
4354
4356
  version: "v4",
4355
4357
  action: "read",
4356
- expires: Date.now() + (options.expiresAfterSeconds ?? 15 * 60) * 1e3,
4358
+ expires: Date.now() + (options.expiresAfterSeconds ?? DEFAULT_SIGNED_URL_TTL_SECONDS) * 1e3,
4357
4359
  responseType: resolveCanonicalStorageContentType(metadata.filename, metadata.mimeType)
4358
4360
  });
4359
4361
  return signedUrl;
@@ -5099,7 +5101,7 @@ function isRetryableChatGptTransportError(error) {
5099
5101
  return false;
5100
5102
  }
5101
5103
  const message = error.message.toLowerCase();
5102
- return message === "terminated" || message.includes("socket hang up") || message.includes("fetch failed") || message.includes("network") || message.includes("responses websocket");
5104
+ return message === "terminated" || message.includes("socket hang up") || message.includes("fetch failed") || message.includes("failed to download file from") || message.includes("network") || message.includes("responses websocket");
5103
5105
  }
5104
5106
  async function collectChatGptCodexResponseWithRetry(options, maxAttempts = 3) {
5105
5107
  let attempt = 1;
@@ -7256,10 +7258,14 @@ function toGemini25ProThinkingBudget(thinkingLevel) {
7256
7258
  return 32768;
7257
7259
  }
7258
7260
  }
7259
- function resolveGeminiThinkingConfig(modelId, thinkingLevel) {
7261
+ function resolveGeminiThinkingConfig(modelId, thinkingLevel, thinkingBudget) {
7260
7262
  if (isGeminiImageModelId(modelId) || modelId === "gemini-flash-lite-latest") {
7261
7263
  return void 0;
7262
7264
  }
7265
+ if (thinkingBudget !== void 0) {
7266
+ const normalizedBudget = Math.max(0, Math.floor(thinkingBudget));
7267
+ return normalizedBudget === 0 ? { thinkingBudget: 0 } : { includeThoughts: true, thinkingBudget: normalizedBudget };
7268
+ }
7263
7269
  if (thinkingLevel) {
7264
7270
  if (modelId === "gemini-2.5-pro") {
7265
7271
  return {
@@ -7635,6 +7641,7 @@ function startLlmCallLoggerFromContents(options) {
7635
7641
  ...options.request.imageAspectRatio ? { imageAspectRatio: options.request.imageAspectRatio } : {},
7636
7642
  ...options.request.imageSize ? { imageSize: options.request.imageSize } : {},
7637
7643
  ...options.request.thinkingLevel ? { thinkingLevel: options.request.thinkingLevel } : {},
7644
+ ...options.request.thinkingBudget !== void 0 ? { thinkingBudget: options.request.thinkingBudget } : {},
7638
7645
  ...options.request.mediaResolution ? { mediaResolution: options.request.mediaResolution } : {},
7639
7646
  ...options.request.openAiTextFormat ? { openAiTextFormat: sanitiseLogValue(options.request.openAiTextFormat) } : {},
7640
7647
  ...getCurrentToolCallContext() ? { toolContext: getCurrentToolCallContext() } : {}
@@ -7936,7 +7943,11 @@ async function runTextCall(params) {
7936
7943
  })
7937
7944
  )
7938
7945
  );
7939
- const thinkingConfig = resolveGeminiThinkingConfig(modelForProvider, request.thinkingLevel);
7946
+ const thinkingConfig = resolveGeminiThinkingConfig(
7947
+ modelForProvider,
7948
+ request.thinkingLevel,
7949
+ request.thinkingBudget
7950
+ );
7940
7951
  const mediaResolution = toGeminiMediaResolution(request.mediaResolution);
7941
7952
  const config = {
7942
7953
  maxOutputTokens: 32e3,
@@ -8243,6 +8254,7 @@ function startJsonStream(request, operation) {
8243
8254
  responseMimeType: request.responseMimeType ?? "application/json",
8244
8255
  responseJsonSchema,
8245
8256
  thinkingLevel: request.thinkingLevel,
8257
+ thinkingBudget: request.thinkingBudget,
8246
8258
  ...openAiTextFormatForAttempt ? { openAiTextFormat: openAiTextFormatForAttempt } : {},
8247
8259
  telemetry: false,
8248
8260
  signal
@@ -9647,7 +9659,11 @@ async function runToolLoop(request) {
9647
9659
  firstModelEventAtMs = Date.now();
9648
9660
  }
9649
9661
  };
9650
- const thinkingConfig = resolveGeminiThinkingConfig(request.model, request.thinkingLevel);
9662
+ const thinkingConfig = resolveGeminiThinkingConfig(
9663
+ request.model,
9664
+ request.thinkingLevel,
9665
+ request.thinkingBudget
9666
+ );
9651
9667
  const mediaResolution = toGeminiMediaResolution(request.mediaResolution);
9652
9668
  const config = {
9653
9669
  maxOutputTokens: 32e3,
@@ -14098,6 +14114,7 @@ async function runCandidateEvolution(options) {
14098
14114
  CODEX_APPLY_PATCH_JSON_TOOL_DESCRIPTION,
14099
14115
  CODEX_APPLY_PATCH_LARK_GRAMMAR,
14100
14116
  DEFAULT_FILE_TTL_SECONDS,
14117
+ DEFAULT_SIGNED_URL_TTL_SECONDS,
14101
14118
  FIREWORKS_DEFAULT_GLM_MODEL,
14102
14119
  FIREWORKS_DEFAULT_GPT_OSS_120B_MODEL,
14103
14120
  FIREWORKS_DEFAULT_KIMI_MODEL,