@j0hanz/code-review-analyst-mcp 1.5.0 → 1.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -371,17 +371,21 @@ Create a test plan covering the changes in the diff using the Flash model with t
371
371
 
372
372
  ### Environment Variables
373
373
 
374
- | Variable | Description | Default | Required |
375
- | ------------------------------ | ---------------------------------------------------- | ------------ | -------- |
376
- | `GEMINI_API_KEY` | Gemini API key | — | Yes |
377
- | `GOOGLE_API_KEY` | Alternative API key (if `GEMINI_API_KEY` not set) | — | No |
378
- | `GEMINI_MODEL` | Override default model selection | — | No |
379
- | `GEMINI_HARM_BLOCK_THRESHOLD` | Safety threshold (BLOCK_NONE, BLOCK_ONLY_HIGH, etc.) | `BLOCK_NONE` | No |
380
- | `MAX_DIFF_CHARS` | Max chars for diff input | `120000` | No |
381
- | `MAX_CONTEXT_CHARS` | Max combined context for inspection | `500000` | No |
382
- | `MAX_CONCURRENT_CALLS` | Max concurrent Gemini requests | `10` | No |
383
- | `MAX_CONCURRENT_CALLS_WAIT_MS` | Max wait time for a free Gemini slot | `2000` | No |
384
- | `MAX_CONCURRENT_CALLS_POLL_MS` | Poll interval while waiting for a free slot | `25` | No |
374
+ | Variable | Description | Default | Required |
375
+ | ------------------------------- | ---------------------------------------------------- | ------------ | -------- |
376
+ | `GEMINI_API_KEY` | Gemini API key | — | Yes |
377
+ | `GOOGLE_API_KEY` | Alternative API key (if `GEMINI_API_KEY` not set) | — | No |
378
+ | `GEMINI_MODEL` | Override default model selection | — | No |
379
+ | `GEMINI_HARM_BLOCK_THRESHOLD` | Safety threshold (BLOCK_NONE, BLOCK_ONLY_HIGH, etc.) | `BLOCK_NONE` | No |
380
+ | `MAX_DIFF_CHARS` | Max chars for diff input | `120000` | No |
381
+ | `MAX_CONTEXT_CHARS` | Max combined context for inspection | `500000` | No |
382
+ | `MAX_CONCURRENT_CALLS` | Max concurrent Gemini requests | `10` | No |
383
+ | `MAX_CONCURRENT_BATCH_CALLS` | Max concurrent inline batch requests | `2` | No |
384
+ | `MAX_CONCURRENT_CALLS_WAIT_MS` | Max wait time for a free Gemini slot | `2000` | No |
385
+ | `MAX_SCHEMA_RETRY_ERROR_CHARS` | Max chars from schema error injected into retry text | `1500` | No |
386
+ | `GEMINI_BATCH_MODE` | Request mode for Gemini calls (`off`, `inline`) | `off` | No |
387
+ | `GEMINI_BATCH_POLL_INTERVAL_MS` | Poll interval for batch job status | `2000` | No |
388
+ | `GEMINI_BATCH_TIMEOUT_MS` | Max wait for batch completion | `120000` | No |
385
389
 
386
390
  ### Models
387
391
 
@@ -32,24 +32,35 @@ function sortPaths(paths) {
32
32
  }
33
33
  return Array.from(paths).sort(PATH_SORTER);
34
34
  }
35
- function buildDiffComputation(files) {
35
+ function buildDiffComputation(files, options) {
36
36
  let added = 0;
37
37
  let deleted = 0;
38
- const paths = new Set();
39
- const summaries = new Array(files.length);
38
+ const paths = options.needPaths ? new Set() : undefined;
39
+ const summaries = options.needSummaries
40
+ ? new Array(files.length)
41
+ : undefined;
40
42
  let index = 0;
41
43
  for (const file of files) {
42
44
  added += file.additions;
43
45
  deleted += file.deletions;
44
- const path = resolveChangedPath(file);
45
- if (path) {
46
- paths.add(path);
46
+ if (options.needPaths || options.needSummaries) {
47
+ const path = resolveChangedPath(file);
48
+ if (paths && path) {
49
+ paths.add(path);
50
+ }
51
+ if (summaries) {
52
+ summaries[index] =
53
+ `${path ?? UNKNOWN_PATH} (+${file.additions} -${file.deletions})`;
54
+ }
47
55
  }
48
- summaries[index] =
49
- `${path ?? UNKNOWN_PATH} (+${file.additions} -${file.deletions})`;
50
56
  index += 1;
51
57
  }
52
- return { added, deleted, paths, summaries };
58
+ return {
59
+ added,
60
+ deleted,
61
+ paths: paths ?? new Set(),
62
+ summaries: summaries ?? [],
63
+ };
53
64
  }
54
65
  function buildStats(filesCount, added, deleted) {
55
66
  return { files: filesCount, added, deleted };
@@ -61,7 +72,10 @@ export function computeDiffStatsAndSummaryFromFiles(files) {
61
72
  summary: NO_FILES_CHANGED,
62
73
  };
63
74
  }
64
- const computed = buildDiffComputation(files);
75
+ const computed = buildDiffComputation(files, {
76
+ needPaths: false,
77
+ needSummaries: true,
78
+ });
65
79
  const stats = buildStats(files.length, computed.added, computed.deleted);
66
80
  return {
67
81
  stats,
@@ -75,7 +89,10 @@ export function computeDiffStatsAndPathsFromFiles(files) {
75
89
  paths: EMPTY_PATHS,
76
90
  };
77
91
  }
78
- const computed = buildDiffComputation(files);
92
+ const computed = buildDiffComputation(files, {
93
+ needPaths: true,
94
+ needSummaries: false,
95
+ });
79
96
  return {
80
97
  stats: buildStats(files.length, computed.added, computed.deleted),
81
98
  paths: sortPaths(computed.paths),
@@ -86,7 +103,7 @@ export function extractChangedPathsFromFiles(files) {
86
103
  if (files.length === 0) {
87
104
  return EMPTY_PATHS;
88
105
  }
89
- return sortPaths(buildDiffComputation(files).paths);
106
+ return sortPaths(buildDiffComputation(files, { needPaths: true, needSummaries: false }).paths);
90
107
  }
91
108
  /** Extract all unique changed file paths (renamed: returns new path). */
92
109
  export function extractChangedPaths(diff) {
@@ -96,7 +113,10 @@ export function computeDiffStatsFromFiles(files) {
96
113
  if (files.length === 0) {
97
114
  return EMPTY_STATS;
98
115
  }
99
- const computed = buildDiffComputation(files);
116
+ const computed = buildDiffComputation(files, {
117
+ needPaths: false,
118
+ needSummaries: false,
119
+ });
100
120
  return buildStats(files.length, computed.added, computed.deleted);
101
121
  }
102
122
  /** Count changed files, added lines, and deleted lines. */
@@ -1,4 +1,5 @@
1
1
  import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import type { ParsedFile } from './diff-parser.js';
2
3
  import { createErrorToolResponse } from './tool-response.js';
3
4
  export declare const DIFF_RESOURCE_URI = "diff://current";
4
5
  export interface DiffStats {
@@ -8,15 +9,16 @@ export interface DiffStats {
8
9
  }
9
10
  export interface DiffSlot {
10
11
  diff: string;
12
+ parsedFiles: readonly ParsedFile[];
11
13
  stats: DiffStats;
12
14
  generatedAt: string;
13
15
  mode: string;
14
16
  }
15
17
  /** Call once during server setup so the store can emit resource-updated notifications. */
16
18
  export declare function initDiffStore(server: McpServer): void;
17
- export declare function storeDiff(data: DiffSlot): void;
18
- export declare function getDiff(): DiffSlot | undefined;
19
- export declare function hasDiff(): boolean;
19
+ export declare function storeDiff(data: DiffSlot, key?: string): void;
20
+ export declare function getDiff(key?: string): DiffSlot | undefined;
21
+ export declare function hasDiff(key?: string): boolean;
20
22
  /** Test-only: directly set or clear the diff slot without emitting resource-updated. */
21
- export declare function setDiffForTesting(data: DiffSlot | undefined): void;
23
+ export declare function setDiffForTesting(data: DiffSlot | undefined, key?: string): void;
22
24
  export declare function createNoDiffError(): ReturnType<typeof createErrorToolResponse>;
@@ -1,27 +1,32 @@
1
1
  import { createErrorToolResponse } from './tool-response.js';
2
2
  export const DIFF_RESOURCE_URI = 'diff://current';
3
- let slot;
3
+ const diffSlots = new Map();
4
4
  let sendResourceUpdated;
5
5
  /** Call once during server setup so the store can emit resource-updated notifications. */
6
6
  export function initDiffStore(server) {
7
7
  const inner = server.server;
8
8
  sendResourceUpdated = inner.sendResourceUpdated.bind(inner);
9
9
  }
10
- export function storeDiff(data) {
11
- slot = data;
10
+ export function storeDiff(data, key = process.cwd()) {
11
+ diffSlots.set(key, data);
12
12
  void sendResourceUpdated?.({ uri: DIFF_RESOURCE_URI }).catch(() => {
13
- // Notification is best-effort; never block the tool response.
13
+ // Ignore errors sending resource-updated, which can happen if the server is not fully initialized yet.
14
14
  });
15
15
  }
16
- export function getDiff() {
17
- return slot;
16
+ export function getDiff(key = process.cwd()) {
17
+ return diffSlots.get(key);
18
18
  }
19
- export function hasDiff() {
20
- return slot !== undefined;
19
+ export function hasDiff(key = process.cwd()) {
20
+ return diffSlots.has(key);
21
21
  }
22
22
  /** Test-only: directly set or clear the diff slot without emitting resource-updated. */
23
- export function setDiffForTesting(data) {
24
- slot = data;
23
+ export function setDiffForTesting(data, key = process.cwd()) {
24
+ if (data) {
25
+ diffSlots.set(key, data);
26
+ }
27
+ else {
28
+ diffSlots.delete(key);
29
+ }
25
30
  }
26
31
  export function createNoDiffError() {
27
32
  return createErrorToolResponse('E_NO_DIFF', 'No diff cached. You must call the generate_diff tool before using any review tool. Run generate_diff with mode="unstaged" or mode="staged" to capture the current branch changes, then retry this tool.', undefined, { retryable: false, kind: 'validation' });
@@ -4,4 +4,8 @@ import type { GeminiStructuredRequest } from './types.js';
4
4
  export declare const geminiEvents: EventEmitter<[never]>;
5
5
  export declare function getCurrentRequestId(): string;
6
6
  export declare function setClientForTesting(client: GoogleGenAI): void;
7
+ export declare function getGeminiQueueSnapshot(): {
8
+ activeCalls: number;
9
+ waitingCalls: number;
10
+ };
7
11
  export declare function generateStructuredJson(request: GeminiStructuredRequest): Promise<unknown>;
@@ -13,6 +13,7 @@ const DEFAULT_MODEL = 'gemini-3-flash-preview';
13
13
  const GEMINI_MODEL_ENV_VAR = 'GEMINI_MODEL';
14
14
  const GEMINI_HARM_BLOCK_THRESHOLD_ENV_VAR = 'GEMINI_HARM_BLOCK_THRESHOLD';
15
15
  const GEMINI_INCLUDE_THOUGHTS_ENV_VAR = 'GEMINI_INCLUDE_THOUGHTS';
16
+ const GEMINI_BATCH_MODE_ENV_VAR = 'GEMINI_BATCH_MODE';
16
17
  const GEMINI_API_KEY_ENV_VAR = 'GEMINI_API_KEY';
17
18
  const GOOGLE_API_KEY_ENV_VAR = 'GOOGLE_API_KEY';
18
19
  function getDefaultModel() {
@@ -30,14 +31,20 @@ const RETRY_DELAY_MAX_MS = 5_000;
30
31
  const RETRY_JITTER_RATIO = 0.2;
31
32
  const DEFAULT_SAFETY_THRESHOLD = HarmBlockThreshold.BLOCK_NONE;
32
33
  const DEFAULT_INCLUDE_THOUGHTS = false;
34
+ const DEFAULT_BATCH_MODE = 'off';
33
35
  const UNKNOWN_REQUEST_CONTEXT_VALUE = 'unknown';
34
36
  const RETRYABLE_NUMERIC_CODES = new Set([429, 500, 502, 503, 504]);
35
37
  const DIGITS_ONLY_PATTERN = /^\d+$/;
36
38
  const SLEEP_UNREF_OPTIONS = { ref: false };
37
39
  const maxConcurrentCallsConfig = createCachedEnvInt('MAX_CONCURRENT_CALLS', 10);
40
+ const maxConcurrentBatchCallsConfig = createCachedEnvInt('MAX_CONCURRENT_BATCH_CALLS', 2);
38
41
  const concurrencyWaitMsConfig = createCachedEnvInt('MAX_CONCURRENT_CALLS_WAIT_MS', 2_000);
42
+ const batchPollIntervalMsConfig = createCachedEnvInt('GEMINI_BATCH_POLL_INTERVAL_MS', 2_000);
43
+ const batchTimeoutMsConfig = createCachedEnvInt('GEMINI_BATCH_TIMEOUT_MS', 120_000);
39
44
  let activeCalls = 0;
45
+ let activeBatchCalls = 0;
40
46
  const slotWaiters = [];
47
+ const batchSlotWaiters = [];
41
48
  const RETRYABLE_TRANSIENT_CODES = new Set([
42
49
  'RESOURCE_EXHAUSTED',
43
50
  'UNAVAILABLE',
@@ -149,6 +156,25 @@ function getDefaultIncludeThoughts() {
149
156
  cachedIncludeThoughts = parseBooleanEnv(value) ?? DEFAULT_INCLUDE_THOUGHTS;
150
157
  return cachedIncludeThoughts;
151
158
  }
159
+ function getDefaultBatchMode() {
160
+ const value = process.env[GEMINI_BATCH_MODE_ENV_VAR]?.trim().toLowerCase();
161
+ if (value === 'inline') {
162
+ return 'inline';
163
+ }
164
+ return DEFAULT_BATCH_MODE;
165
+ }
166
+ function applyResponseKeyOrdering(responseSchema, responseKeyOrdering) {
167
+ if (!responseKeyOrdering || responseKeyOrdering.length === 0) {
168
+ return responseSchema;
169
+ }
170
+ return {
171
+ ...responseSchema,
172
+ propertyOrdering: [...responseKeyOrdering],
173
+ };
174
+ }
175
+ function getPromptWithFunctionCallingContext(request) {
176
+ return request.prompt;
177
+ }
152
178
  function getSafetySettings(threshold) {
153
179
  const cached = safetySettingsCache.get(threshold);
154
180
  if (cached) {
@@ -281,19 +307,21 @@ function findFirstStringCode(record, keys) {
281
307
  }
282
308
  return undefined;
283
309
  }
310
+ const NUMERIC_ERROR_KEYS = ['status', 'statusCode', 'code'];
284
311
  function getNumericErrorCode(error) {
285
312
  const record = getNestedError(error);
286
313
  if (!record) {
287
314
  return undefined;
288
315
  }
289
- return findFirstNumericCode(record, ['status', 'statusCode', 'code']);
316
+ return findFirstNumericCode(record, NUMERIC_ERROR_KEYS);
290
317
  }
318
+ const TRANSIENT_ERROR_KEYS = ['code', 'status', 'statusText'];
291
319
  function getTransientErrorCode(error) {
292
320
  const record = getNestedError(error);
293
321
  if (!record) {
294
322
  return undefined;
295
323
  }
296
- return findFirstStringCode(record, ['code', 'status', 'statusText']);
324
+ return findFirstStringCode(record, TRANSIENT_ERROR_KEYS);
297
325
  }
298
326
  function shouldRetry(error) {
299
327
  const numericCode = getNumericErrorCode(error);
@@ -322,10 +350,8 @@ function buildGenerationConfig(request, abortSignal) {
322
350
  temperature: request.temperature ?? 1.0,
323
351
  maxOutputTokens: request.maxOutputTokens ?? DEFAULT_MAX_OUTPUT_TOKENS,
324
352
  responseMimeType: 'application/json',
325
- responseSchema: request.responseSchema,
353
+ responseSchema: applyResponseKeyOrdering(request.responseSchema, request.responseKeyOrdering),
326
354
  safetySettings: getSafetySettings(getSafetyThreshold()),
327
- topP: 0.95,
328
- topK: 40,
329
355
  abortSignal,
330
356
  };
331
357
  if (request.systemInstruction) {
@@ -366,12 +392,12 @@ async function generateContentWithTimeout(request, model, timeoutMs) {
366
392
  try {
367
393
  return await getClient().models.generateContent({
368
394
  model,
369
- contents: request.prompt,
395
+ contents: getPromptWithFunctionCallingContext(request),
370
396
  config: buildGenerationConfig(request, signal),
371
397
  });
372
398
  }
373
399
  catch (error) {
374
- if (request.signal?.aborted) {
400
+ if (request.signal?.aborted === true) {
375
401
  throw new Error('Gemini request was cancelled.');
376
402
  }
377
403
  if (controller.signal.aborted) {
@@ -388,6 +414,14 @@ async function executeAttempt(request, model, timeoutMs, attempt, onLog) {
388
414
  const response = await generateContentWithTimeout(request, model, timeoutMs);
389
415
  const latencyMs = Math.round(performance.now() - startedAt);
390
416
  const finishReason = response.candidates?.[0]?.finishReason;
417
+ let thoughts;
418
+ const parts = response.candidates?.[0]?.content?.parts;
419
+ if (Array.isArray(parts)) {
420
+ const thoughtParts = parts.filter((p) => p.thought === true && typeof p.text === 'string');
421
+ if (thoughtParts.length > 0) {
422
+ thoughts = thoughtParts.map((p) => p.text).join('\n\n');
423
+ }
424
+ }
391
425
  await emitGeminiLog(onLog, 'info', {
392
426
  event: 'gemini_call',
393
427
  details: {
@@ -395,6 +429,7 @@ async function executeAttempt(request, model, timeoutMs, attempt, onLog) {
395
429
  latencyMs,
396
430
  finishReason: finishReason ?? null,
397
431
  usageMetadata: response.usageMetadata ?? null,
432
+ ...(thoughts ? { thoughts } : {}),
398
433
  },
399
434
  });
400
435
  if (finishReason === FinishReason.MAX_TOKENS) {
@@ -429,33 +464,34 @@ async function waitBeforeRetry(attempt, error, onLog, requestSignal) {
429
464
  throw sleepError;
430
465
  }
431
466
  }
432
- async function throwGeminiFailure(maxRetries, lastError, onLog) {
433
- const attempts = maxRetries + 1;
467
+ async function throwGeminiFailure(attemptsMade, lastError, onLog) {
434
468
  const message = getErrorMessage(lastError);
435
469
  await emitGeminiLog(onLog, 'error', {
436
470
  event: 'gemini_failure',
437
471
  details: {
438
472
  error: message,
439
- attempts,
473
+ attempts: attemptsMade,
440
474
  },
441
475
  });
442
- throw new Error(`Gemini request failed after ${attempts} attempts: ${message}`, { cause: lastError });
476
+ throw new Error(`Gemini request failed after ${attemptsMade} attempts: ${message}`, { cause: lastError });
443
477
  }
444
478
  async function runWithRetries(request, model, timeoutMs, maxRetries, onLog) {
445
479
  let lastError;
446
- for (let attempt = 0; attempt <= maxRetries; attempt += 1) {
480
+ let attempt = 0;
481
+ for (; attempt <= maxRetries; attempt += 1) {
447
482
  try {
448
483
  return await executeAttempt(request, model, timeoutMs, attempt, onLog);
449
484
  }
450
485
  catch (error) {
451
486
  lastError = error;
452
487
  if (!canRetryAttempt(attempt, maxRetries, error)) {
488
+ attempt += 1; // Count this attempt before breaking
453
489
  break;
454
490
  }
455
491
  await waitBeforeRetry(attempt, error, onLog, request.signal);
456
492
  }
457
493
  }
458
- return throwGeminiFailure(maxRetries, lastError, onLog);
494
+ return throwGeminiFailure(attempt, lastError, onLog);
459
495
  }
460
496
  function canRetryAttempt(attempt, maxRetries, error) {
461
497
  return attempt < maxRetries && shouldRetry(error);
@@ -466,8 +502,9 @@ function tryWakeNextWaiter() {
466
502
  next();
467
503
  }
468
504
  }
469
- async function waitForConcurrencySlot(limit, requestSignal) {
470
- if (activeCalls < limit) {
505
+ async function waitForSlot(limit, getActiveCount, acquireSlot, waiters, requestSignal) {
506
+ if (waiters.length === 0 && getActiveCount() < limit) {
507
+ acquireSlot();
471
508
  return;
472
509
  }
473
510
  if (requestSignal?.aborted) {
@@ -484,16 +521,17 @@ async function waitForConcurrencySlot(limit, requestSignal) {
484
521
  if (requestSignal) {
485
522
  requestSignal.removeEventListener('abort', onAbort);
486
523
  }
524
+ acquireSlot();
487
525
  resolve();
488
526
  };
489
- slotWaiters.push(waiter);
527
+ waiters.push(waiter);
490
528
  const deadlineTimer = setTimeout(() => {
491
529
  if (settled)
492
530
  return;
493
531
  settled = true;
494
- const idx = slotWaiters.indexOf(waiter);
532
+ const idx = waiters.indexOf(waiter);
495
533
  if (idx !== -1) {
496
- slotWaiters.splice(idx, 1);
534
+ waiters.splice(idx, 1);
497
535
  }
498
536
  if (requestSignal) {
499
537
  requestSignal.removeEventListener('abort', onAbort);
@@ -505,9 +543,9 @@ async function waitForConcurrencySlot(limit, requestSignal) {
505
543
  if (settled)
506
544
  return;
507
545
  settled = true;
508
- const idx = slotWaiters.indexOf(waiter);
546
+ const idx = waiters.indexOf(waiter);
509
547
  if (idx !== -1) {
510
- slotWaiters.splice(idx, 1);
548
+ waiters.splice(idx, 1);
511
549
  }
512
550
  clearTimeout(deadlineTimer);
513
551
  reject(new Error('Gemini request was cancelled.'));
@@ -517,19 +555,254 @@ async function waitForConcurrencySlot(limit, requestSignal) {
517
555
  }
518
556
  });
519
557
  }
558
+ async function waitForConcurrencySlot(limit, requestSignal) {
559
+ return waitForSlot(limit, () => activeCalls, () => {
560
+ activeCalls += 1;
561
+ }, slotWaiters, requestSignal);
562
+ }
563
+ function tryWakeNextBatchWaiter() {
564
+ const next = batchSlotWaiters.shift();
565
+ if (next !== undefined) {
566
+ next();
567
+ }
568
+ }
569
+ async function waitForBatchConcurrencySlot(limit, requestSignal) {
570
+ return waitForSlot(limit, () => activeBatchCalls, () => {
571
+ activeBatchCalls += 1;
572
+ }, batchSlotWaiters, requestSignal);
573
+ }
574
+ function getBatchState(payload) {
575
+ const record = asRecord(payload);
576
+ if (!record) {
577
+ return undefined;
578
+ }
579
+ const directState = toUpperStringCode(record.state);
580
+ if (directState) {
581
+ return directState;
582
+ }
583
+ const metadata = asRecord(record.metadata);
584
+ if (!metadata) {
585
+ return undefined;
586
+ }
587
+ return toUpperStringCode(metadata.state);
588
+ }
589
+ function extractBatchResponseText(payload) {
590
+ const record = asRecord(payload);
591
+ if (!record) {
592
+ return undefined;
593
+ }
594
+ const inlineResponse = asRecord(record.inlineResponse);
595
+ const inlineText = typeof inlineResponse?.text === 'string' ? inlineResponse.text : undefined;
596
+ if (inlineText) {
597
+ return inlineText;
598
+ }
599
+ const response = asRecord(record.response);
600
+ if (!response) {
601
+ return undefined;
602
+ }
603
+ const responseText = typeof response.text === 'string' ? response.text : undefined;
604
+ if (responseText) {
605
+ return responseText;
606
+ }
607
+ const { inlineResponses } = response;
608
+ if (!Array.isArray(inlineResponses) || inlineResponses.length === 0) {
609
+ return undefined;
610
+ }
611
+ const firstInline = asRecord(inlineResponses[0]);
612
+ return typeof firstInline?.text === 'string' ? firstInline.text : undefined;
613
+ }
614
+ function extractBatchErrorDetail(payload) {
615
+ const record = asRecord(payload);
616
+ if (!record) {
617
+ return undefined;
618
+ }
619
+ const directError = asRecord(record.error);
620
+ const directMessage = typeof directError?.message === 'string' ? directError.message : undefined;
621
+ if (directMessage) {
622
+ return directMessage;
623
+ }
624
+ const metadata = asRecord(record.metadata);
625
+ const metadataError = asRecord(metadata?.error);
626
+ const metadataMessage = typeof metadataError?.message === 'string'
627
+ ? metadataError.message
628
+ : undefined;
629
+ if (metadataMessage) {
630
+ return metadataMessage;
631
+ }
632
+ const response = asRecord(record.response);
633
+ const responseError = asRecord(response?.error);
634
+ return typeof responseError?.message === 'string'
635
+ ? responseError.message
636
+ : undefined;
637
+ }
638
+ function getBatchSuccessResponseText(polled) {
639
+ const responseText = extractBatchResponseText(polled);
640
+ if (!responseText) {
641
+ const errorDetail = extractBatchErrorDetail(polled);
642
+ throw new Error(errorDetail
643
+ ? `Gemini batch request succeeded but returned no response text: ${errorDetail}`
644
+ : 'Gemini batch request succeeded but returned no response text.');
645
+ }
646
+ return responseText;
647
+ }
648
+ function handleBatchTerminalState(state, payload) {
649
+ if (state === 'JOB_STATE_FAILED' || state === 'JOB_STATE_CANCELLED') {
650
+ const errorDetail = extractBatchErrorDetail(payload);
651
+ throw new Error(errorDetail
652
+ ? `Gemini batch request ended with state ${state}: ${errorDetail}`
653
+ : `Gemini batch request ended with state ${state}.`);
654
+ }
655
+ }
656
+ async function pollBatchStatusWithRetries(batches, batchName, onLog, requestSignal) {
657
+ const maxPollRetries = 2;
658
+ for (let attempt = 0; attempt <= maxPollRetries; attempt += 1) {
659
+ try {
660
+ return await batches.get({ name: batchName });
661
+ }
662
+ catch (error) {
663
+ if (!canRetryAttempt(attempt, maxPollRetries, error)) {
664
+ throw error;
665
+ }
666
+ await waitBeforeRetry(attempt, error, onLog, requestSignal);
667
+ }
668
+ }
669
+ throw new Error('Batch polling retries exhausted unexpectedly.');
670
+ }
671
+ async function cancelBatchIfNeeded(request, batches, batchName, onLog, completed, timedOut) {
672
+ const aborted = request.signal?.aborted === true;
673
+ if (completed || (!aborted && !timedOut) || !batchName) {
674
+ return;
675
+ }
676
+ if (batches.cancel === undefined) {
677
+ return;
678
+ }
679
+ try {
680
+ await batches.cancel({ name: batchName });
681
+ await emitGeminiLog(onLog, 'info', {
682
+ event: 'gemini_batch_cancelled',
683
+ details: {
684
+ batchName,
685
+ reason: timedOut ? 'timeout' : 'aborted',
686
+ },
687
+ });
688
+ }
689
+ catch (error) {
690
+ await emitGeminiLog(onLog, 'warning', {
691
+ event: 'gemini_batch_cancel_failed',
692
+ details: {
693
+ batchName,
694
+ reason: timedOut ? 'timeout' : 'aborted',
695
+ error: getErrorMessage(error),
696
+ },
697
+ });
698
+ }
699
+ }
700
+ async function runInlineBatchWithPolling(request, model, onLog) {
701
+ const client = getClient();
702
+ const { batches } = client;
703
+ if (batches === undefined) {
704
+ throw new Error('Batch mode requires SDK batch support, but batches API is unavailable.');
705
+ }
706
+ let batchName;
707
+ let completed = false;
708
+ let timedOut = false;
709
+ try {
710
+ const createPayload = {
711
+ model,
712
+ src: [
713
+ {
714
+ contents: [{ role: 'user', parts: [{ text: request.prompt }] }],
715
+ config: buildGenerationConfig(request, new AbortController().signal),
716
+ },
717
+ ],
718
+ };
719
+ const createdJob = await batches.create(createPayload);
720
+ const createdRecord = asRecord(createdJob);
721
+ batchName =
722
+ typeof createdRecord?.name === 'string' ? createdRecord.name : undefined;
723
+ if (!batchName) {
724
+ throw new Error('Batch mode failed to return a job name.');
725
+ }
726
+ const pollStart = performance.now();
727
+ const timeoutMs = batchTimeoutMsConfig.get();
728
+ const pollIntervalMs = batchPollIntervalMsConfig.get();
729
+ await emitGeminiLog(onLog, 'info', {
730
+ event: 'gemini_batch_created',
731
+ details: { batchName },
732
+ });
733
+ for (;;) {
734
+ if (request.signal?.aborted === true) {
735
+ throw new Error('Gemini request was cancelled.');
736
+ }
737
+ const elapsedMs = Math.round(performance.now() - pollStart);
738
+ if (elapsedMs > timeoutMs) {
739
+ timedOut = true;
740
+ throw new Error(`Gemini batch request timed out after ${formatNumber(timeoutMs)}ms.`);
741
+ }
742
+ const polled = await pollBatchStatusWithRetries(batches, batchName, onLog, request.signal);
743
+ const state = getBatchState(polled);
744
+ if (state === 'JOB_STATE_SUCCEEDED') {
745
+ const responseText = getBatchSuccessResponseText(polled);
746
+ completed = true;
747
+ return parseStructuredResponse(responseText);
748
+ }
749
+ handleBatchTerminalState(state, polled);
750
+ await sleep(pollIntervalMs, undefined, request.signal
751
+ ? { ...SLEEP_UNREF_OPTIONS, signal: request.signal }
752
+ : SLEEP_UNREF_OPTIONS);
753
+ }
754
+ }
755
+ finally {
756
+ await cancelBatchIfNeeded(request, batches, batchName, onLog, completed, timedOut);
757
+ }
758
+ }
759
+ export function getGeminiQueueSnapshot() {
760
+ return {
761
+ activeCalls,
762
+ waitingCalls: slotWaiters.length,
763
+ };
764
+ }
520
765
  export async function generateStructuredJson(request) {
521
766
  const model = request.model ?? getDefaultModel();
522
767
  const timeoutMs = request.timeoutMs ?? DEFAULT_TIMEOUT_MS;
523
768
  const maxRetries = request.maxRetries ?? DEFAULT_MAX_RETRIES;
769
+ const batchMode = request.batchMode ?? getDefaultBatchMode();
524
770
  const { onLog } = request;
525
- const limit = maxConcurrentCallsConfig.get();
526
- await waitForConcurrencySlot(limit, request.signal);
527
- activeCalls += 1;
771
+ const limit = batchMode === 'inline'
772
+ ? maxConcurrentBatchCallsConfig.get()
773
+ : maxConcurrentCallsConfig.get();
774
+ const queueWaitStartedAt = performance.now();
775
+ if (batchMode === 'inline') {
776
+ await waitForBatchConcurrencySlot(limit, request.signal);
777
+ }
778
+ else {
779
+ await waitForConcurrencySlot(limit, request.signal);
780
+ }
781
+ const queueWaitMs = Math.round(performance.now() - queueWaitStartedAt);
782
+ await safeCallOnLog(onLog, 'info', {
783
+ event: 'gemini_queue_acquired',
784
+ queueWaitMs,
785
+ waitingCalls: batchMode === 'inline' ? batchSlotWaiters.length : slotWaiters.length,
786
+ activeCalls,
787
+ activeBatchCalls,
788
+ mode: batchMode,
789
+ });
528
790
  try {
529
- return await geminiContext.run({ requestId: nextRequestId(), model }, () => runWithRetries(request, model, timeoutMs, maxRetries, onLog));
791
+ return await geminiContext.run({ requestId: nextRequestId(), model }, () => {
792
+ if (batchMode === 'inline') {
793
+ return runInlineBatchWithPolling(request, model, onLog);
794
+ }
795
+ return runWithRetries(request, model, timeoutMs, maxRetries, onLog);
796
+ });
530
797
  }
531
798
  finally {
532
- activeCalls -= 1;
533
- tryWakeNextWaiter();
799
+ if (batchMode === 'inline') {
800
+ activeBatchCalls -= 1;
801
+ tryWakeNextBatchWaiter();
802
+ }
803
+ else {
804
+ activeCalls -= 1;
805
+ tryWakeNextWaiter();
806
+ }
534
807
  }
535
808
  }