@j0hanz/code-review-analyst-mcp 1.4.4 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -18,7 +18,7 @@ This server accepts unified diffs and returns structured JSON results — findin
18
18
 
19
19
  - **Impact Analysis** — Objective severity scoring, breaking change detection, and rollback complexity assessment.
20
20
  - **Review Summary** — Concise PR digest with merge recommendation and change statistics.
21
- - **Deep Code Inspection** — Pro model with 16K thinking budget for context-aware analysis using full file contents.
21
+ - **Deep Code Inspection** — Pro model with high thinking level for context-aware analysis using full file contents.
22
22
  - **Search & Replace Fixes** — Verbatim, copy-paste-ready code fixes tied to specific findings.
23
23
  - **Test Plan Generation** — Systematic test case generation with priority ranking and pseudocode.
24
24
  - **Async Task Support** — All tools support MCP task lifecycle with progress notifications.
@@ -371,27 +371,31 @@ Create a test plan covering the changes in the diff using the Flash model with t
371
371
 
372
372
  ### Environment Variables
373
373
 
374
- | Variable | Description | Default | Required |
375
- | ------------------------------ | ---------------------------------------------------- | ------------ | -------- |
376
- | `GEMINI_API_KEY` | Gemini API key | — | Yes |
377
- | `GOOGLE_API_KEY` | Alternative API key (if `GEMINI_API_KEY` not set) | — | No |
378
- | `GEMINI_MODEL` | Override default model selection | — | No |
379
- | `GEMINI_HARM_BLOCK_THRESHOLD` | Safety threshold (BLOCK_NONE, BLOCK_ONLY_HIGH, etc.) | `BLOCK_NONE` | No |
380
- | `MAX_DIFF_CHARS` | Max chars for diff input | `120000` | No |
381
- | `MAX_CONTEXT_CHARS` | Max combined context for inspection | `500000` | No |
382
- | `MAX_CONCURRENT_CALLS` | Max concurrent Gemini requests | `10` | No |
383
- | `MAX_CONCURRENT_CALLS_WAIT_MS` | Max wait time for a free Gemini slot | `2000` | No |
384
- | `MAX_CONCURRENT_CALLS_POLL_MS` | Poll interval while waiting for a free slot | `25` | No |
374
+ | Variable | Description | Default | Required |
375
+ | ------------------------------- | ---------------------------------------------------- | ------------ | -------- |
376
+ | `GEMINI_API_KEY` | Gemini API key | — | Yes |
377
+ | `GOOGLE_API_KEY` | Alternative API key (if `GEMINI_API_KEY` not set) | — | No |
378
+ | `GEMINI_MODEL` | Override default model selection | — | No |
379
+ | `GEMINI_HARM_BLOCK_THRESHOLD` | Safety threshold (BLOCK_NONE, BLOCK_ONLY_HIGH, etc.) | `BLOCK_NONE` | No |
380
+ | `MAX_DIFF_CHARS` | Max chars for diff input | `120000` | No |
381
+ | `MAX_CONTEXT_CHARS` | Max combined context for inspection | `500000` | No |
382
+ | `MAX_CONCURRENT_CALLS` | Max concurrent Gemini requests | `10` | No |
383
+ | `MAX_CONCURRENT_BATCH_CALLS` | Max concurrent inline batch requests | `2` | No |
384
+ | `MAX_CONCURRENT_CALLS_WAIT_MS` | Max wait time for a free Gemini slot | `2000` | No |
385
+ | `MAX_SCHEMA_RETRY_ERROR_CHARS` | Max chars from schema error injected into retry text | `1500` | No |
386
+ | `GEMINI_BATCH_MODE` | Request mode for Gemini calls (`off`, `inline`) | `off` | No |
387
+ | `GEMINI_BATCH_POLL_INTERVAL_MS` | Poll interval for batch job status | `2000` | No |
388
+ | `GEMINI_BATCH_TIMEOUT_MS` | Max wait for batch completion | `120000` | No |
385
389
 
386
390
  ### Models
387
391
 
388
- | Tool | Model | Thinking Budget |
389
- | ------------------------- | ------------------ | --------------- |
390
- | `analyze_pr_impact` | `gemini-2.5-flash` | |
391
- | `generate_review_summary` | `gemini-2.5-flash` | |
392
- | `inspect_code_quality` | `gemini-2.5-pro` | 16,384 tokens |
393
- | `suggest_search_replace` | `gemini-2.5-pro` | 16,384 tokens |
394
- | `generate_test_plan` | `gemini-2.5-flash` | 8,192 tokens |
392
+ | Tool | Model | Thinking Level |
393
+ | ------------------------- | ------------------------ | -------------- |
394
+ | `analyze_pr_impact` | `gemini-3-flash-preview` | `minimal` |
395
+ | `generate_review_summary` | `gemini-3-flash-preview` | `minimal` |
396
+ | `inspect_code_quality` | `gemini-3-pro-preview` | `high` |
397
+ | `suggest_search_replace` | `gemini-3-pro-preview` | `high` |
398
+ | `generate_test_plan` | `gemini-3-flash-preview` | `medium` |
395
399
 
396
400
  ## Workflows
397
401
 
@@ -32,24 +32,35 @@ function sortPaths(paths) {
32
32
  }
33
33
  return Array.from(paths).sort(PATH_SORTER);
34
34
  }
35
- function buildDiffComputation(files) {
35
+ function buildDiffComputation(files, options) {
36
36
  let added = 0;
37
37
  let deleted = 0;
38
- const paths = new Set();
39
- const summaries = new Array(files.length);
38
+ const paths = options.needPaths ? new Set() : undefined;
39
+ const summaries = options.needSummaries
40
+ ? new Array(files.length)
41
+ : undefined;
40
42
  let index = 0;
41
43
  for (const file of files) {
42
44
  added += file.additions;
43
45
  deleted += file.deletions;
44
- const path = resolveChangedPath(file);
45
- if (path) {
46
- paths.add(path);
46
+ if (options.needPaths || options.needSummaries) {
47
+ const path = resolveChangedPath(file);
48
+ if (paths && path) {
49
+ paths.add(path);
50
+ }
51
+ if (summaries) {
52
+ summaries[index] =
53
+ `${path ?? UNKNOWN_PATH} (+${file.additions} -${file.deletions})`;
54
+ }
47
55
  }
48
- summaries[index] =
49
- `${path ?? UNKNOWN_PATH} (+${file.additions} -${file.deletions})`;
50
56
  index += 1;
51
57
  }
52
- return { added, deleted, paths, summaries };
58
+ return {
59
+ added,
60
+ deleted,
61
+ paths: paths ?? new Set(),
62
+ summaries: summaries ?? [],
63
+ };
53
64
  }
54
65
  function buildStats(filesCount, added, deleted) {
55
66
  return { files: filesCount, added, deleted };
@@ -61,7 +72,10 @@ export function computeDiffStatsAndSummaryFromFiles(files) {
61
72
  summary: NO_FILES_CHANGED,
62
73
  };
63
74
  }
64
- const computed = buildDiffComputation(files);
75
+ const computed = buildDiffComputation(files, {
76
+ needPaths: false,
77
+ needSummaries: true,
78
+ });
65
79
  const stats = buildStats(files.length, computed.added, computed.deleted);
66
80
  return {
67
81
  stats,
@@ -75,7 +89,10 @@ export function computeDiffStatsAndPathsFromFiles(files) {
75
89
  paths: EMPTY_PATHS,
76
90
  };
77
91
  }
78
- const computed = buildDiffComputation(files);
92
+ const computed = buildDiffComputation(files, {
93
+ needPaths: true,
94
+ needSummaries: false,
95
+ });
79
96
  return {
80
97
  stats: buildStats(files.length, computed.added, computed.deleted),
81
98
  paths: sortPaths(computed.paths),
@@ -86,7 +103,7 @@ export function extractChangedPathsFromFiles(files) {
86
103
  if (files.length === 0) {
87
104
  return EMPTY_PATHS;
88
105
  }
89
- return sortPaths(buildDiffComputation(files).paths);
106
+ return sortPaths(buildDiffComputation(files, { needPaths: true, needSummaries: false }).paths);
90
107
  }
91
108
  /** Extract all unique changed file paths (renamed: returns new path). */
92
109
  export function extractChangedPaths(diff) {
@@ -96,7 +113,10 @@ export function computeDiffStatsFromFiles(files) {
96
113
  if (files.length === 0) {
97
114
  return EMPTY_STATS;
98
115
  }
99
- const computed = buildDiffComputation(files);
116
+ const computed = buildDiffComputation(files, {
117
+ needPaths: false,
118
+ needSummaries: false,
119
+ });
100
120
  return buildStats(files.length, computed.added, computed.deleted);
101
121
  }
102
122
  /** Count changed files, added lines, and deleted lines. */
@@ -1,4 +1,5 @@
1
1
  import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
2
+ import type { ParsedFile } from './diff-parser.js';
2
3
  import { createErrorToolResponse } from './tool-response.js';
3
4
  export declare const DIFF_RESOURCE_URI = "diff://current";
4
5
  export interface DiffStats {
@@ -8,6 +9,7 @@ export interface DiffStats {
8
9
  }
9
10
  export interface DiffSlot {
10
11
  diff: string;
12
+ parsedFiles: readonly ParsedFile[];
11
13
  stats: DiffStats;
12
14
  generatedAt: string;
13
15
  mode: string;
@@ -4,4 +4,8 @@ import type { GeminiStructuredRequest } from './types.js';
4
4
  export declare const geminiEvents: EventEmitter<[never]>;
5
5
  export declare function getCurrentRequestId(): string;
6
6
  export declare function setClientForTesting(client: GoogleGenAI): void;
7
+ export declare function getGeminiQueueSnapshot(): {
8
+ activeCalls: number;
9
+ waitingCalls: number;
10
+ };
7
11
  export declare function generateStructuredJson(request: GeminiStructuredRequest): Promise<unknown>;
@@ -4,15 +4,16 @@ import { EventEmitter } from 'node:events';
4
4
  import { performance } from 'node:perf_hooks';
5
5
  import { setTimeout as sleep } from 'node:timers/promises';
6
6
  import { debuglog } from 'node:util';
7
- import { FinishReason, GoogleGenAI, HarmBlockThreshold, HarmCategory, } from '@google/genai';
7
+ import { FinishReason, GoogleGenAI, HarmBlockThreshold, HarmCategory, ThinkingLevel, } from '@google/genai';
8
8
  import { createCachedEnvInt } from './env-config.js';
9
9
  import { getErrorMessage, RETRYABLE_UPSTREAM_ERROR_PATTERN } from './errors.js';
10
10
  // Lazy-cached: first call happens after parseCommandLineArgs() sets GEMINI_MODEL.
11
11
  let _defaultModel;
12
- const DEFAULT_MODEL = 'gemini-2.5-flash';
12
+ const DEFAULT_MODEL = 'gemini-3-flash-preview';
13
13
  const GEMINI_MODEL_ENV_VAR = 'GEMINI_MODEL';
14
14
  const GEMINI_HARM_BLOCK_THRESHOLD_ENV_VAR = 'GEMINI_HARM_BLOCK_THRESHOLD';
15
15
  const GEMINI_INCLUDE_THOUGHTS_ENV_VAR = 'GEMINI_INCLUDE_THOUGHTS';
16
+ const GEMINI_BATCH_MODE_ENV_VAR = 'GEMINI_BATCH_MODE';
16
17
  const GEMINI_API_KEY_ENV_VAR = 'GEMINI_API_KEY';
17
18
  const GOOGLE_API_KEY_ENV_VAR = 'GOOGLE_API_KEY';
18
19
  function getDefaultModel() {
@@ -30,14 +31,20 @@ const RETRY_DELAY_MAX_MS = 5_000;
30
31
  const RETRY_JITTER_RATIO = 0.2;
31
32
  const DEFAULT_SAFETY_THRESHOLD = HarmBlockThreshold.BLOCK_NONE;
32
33
  const DEFAULT_INCLUDE_THOUGHTS = false;
34
+ const DEFAULT_BATCH_MODE = 'off';
33
35
  const UNKNOWN_REQUEST_CONTEXT_VALUE = 'unknown';
34
36
  const RETRYABLE_NUMERIC_CODES = new Set([429, 500, 502, 503, 504]);
35
37
  const DIGITS_ONLY_PATTERN = /^\d+$/;
36
38
  const SLEEP_UNREF_OPTIONS = { ref: false };
37
39
  const maxConcurrentCallsConfig = createCachedEnvInt('MAX_CONCURRENT_CALLS', 10);
40
+ const maxConcurrentBatchCallsConfig = createCachedEnvInt('MAX_CONCURRENT_BATCH_CALLS', 2);
38
41
  const concurrencyWaitMsConfig = createCachedEnvInt('MAX_CONCURRENT_CALLS_WAIT_MS', 2_000);
42
+ const batchPollIntervalMsConfig = createCachedEnvInt('GEMINI_BATCH_POLL_INTERVAL_MS', 2_000);
43
+ const batchTimeoutMsConfig = createCachedEnvInt('GEMINI_BATCH_TIMEOUT_MS', 120_000);
39
44
  let activeCalls = 0;
45
+ let activeBatchCalls = 0;
40
46
  const slotWaiters = [];
47
+ const batchSlotWaiters = [];
41
48
  const RETRYABLE_TRANSIENT_CODES = new Set([
42
49
  'RESOURCE_EXHAUSTED',
43
50
  'UNAVAILABLE',
@@ -91,14 +98,31 @@ function parseSafetyThreshold(threshold) {
91
98
  }
92
99
  return SAFETY_THRESHOLD_BY_NAME[normalizedThreshold];
93
100
  }
94
- function getThinkingConfig(thinkingBudget, includeThoughts) {
95
- if (thinkingBudget === undefined) {
101
+ function getThinkingConfig(thinkingLevel, includeThoughts) {
102
+ if (thinkingLevel === undefined && !includeThoughts) {
96
103
  return undefined;
97
104
  }
105
+ const config = {};
106
+ if (thinkingLevel !== undefined) {
107
+ switch (thinkingLevel) {
108
+ case 'minimal':
109
+ config.thinkingLevel = ThinkingLevel.MINIMAL;
110
+ break;
111
+ case 'low':
112
+ config.thinkingLevel = ThinkingLevel.LOW;
113
+ break;
114
+ case 'medium':
115
+ config.thinkingLevel = ThinkingLevel.MEDIUM;
116
+ break;
117
+ case 'high':
118
+ config.thinkingLevel = ThinkingLevel.HIGH;
119
+ break;
120
+ }
121
+ }
98
122
  if (includeThoughts) {
99
- return { includeThoughts: true, thinkingBudget };
123
+ config.includeThoughts = true;
100
124
  }
101
- return { thinkingBudget };
125
+ return config;
102
126
  }
103
127
  function parseBooleanEnv(value) {
104
128
  const normalized = value.trim().toLowerCase();
@@ -132,6 +156,25 @@ function getDefaultIncludeThoughts() {
132
156
  cachedIncludeThoughts = parseBooleanEnv(value) ?? DEFAULT_INCLUDE_THOUGHTS;
133
157
  return cachedIncludeThoughts;
134
158
  }
159
+ function getDefaultBatchMode() {
160
+ const value = process.env[GEMINI_BATCH_MODE_ENV_VAR]?.trim().toLowerCase();
161
+ if (value === 'inline') {
162
+ return 'inline';
163
+ }
164
+ return DEFAULT_BATCH_MODE;
165
+ }
166
+ function applyResponseKeyOrdering(responseSchema, responseKeyOrdering) {
167
+ if (!responseKeyOrdering || responseKeyOrdering.length === 0) {
168
+ return responseSchema;
169
+ }
170
+ return {
171
+ ...responseSchema,
172
+ propertyOrdering: [...responseKeyOrdering],
173
+ };
174
+ }
175
+ function getPromptWithFunctionCallingContext(request) {
176
+ return request.prompt;
177
+ }
135
178
  function getSafetySettings(threshold) {
136
179
  const cached = safetySettingsCache.get(threshold);
137
180
  if (cached) {
@@ -264,19 +307,21 @@ function findFirstStringCode(record, keys) {
264
307
  }
265
308
  return undefined;
266
309
  }
310
+ const NUMERIC_ERROR_KEYS = ['status', 'statusCode', 'code'];
267
311
  function getNumericErrorCode(error) {
268
312
  const record = getNestedError(error);
269
313
  if (!record) {
270
314
  return undefined;
271
315
  }
272
- return findFirstNumericCode(record, ['status', 'statusCode', 'code']);
316
+ return findFirstNumericCode(record, NUMERIC_ERROR_KEYS);
273
317
  }
318
+ const TRANSIENT_ERROR_KEYS = ['code', 'status', 'statusText'];
274
319
  function getTransientErrorCode(error) {
275
320
  const record = getNestedError(error);
276
321
  if (!record) {
277
322
  return undefined;
278
323
  }
279
- return findFirstStringCode(record, ['code', 'status', 'statusText']);
324
+ return findFirstStringCode(record, TRANSIENT_ERROR_KEYS);
280
325
  }
281
326
  function shouldRetry(error) {
282
327
  const numericCode = getNumericErrorCode(error);
@@ -300,12 +345,12 @@ function getRetryDelayMs(attempt) {
300
345
  }
301
346
  function buildGenerationConfig(request, abortSignal) {
302
347
  const includeThoughts = request.includeThoughts ?? getDefaultIncludeThoughts();
303
- const thinkingConfig = getThinkingConfig(request.thinkingBudget, includeThoughts);
348
+ const thinkingConfig = getThinkingConfig(request.thinkingLevel, includeThoughts);
304
349
  const config = {
305
- temperature: request.temperature ?? 0.2,
350
+ temperature: request.temperature ?? 1.0,
306
351
  maxOutputTokens: request.maxOutputTokens ?? DEFAULT_MAX_OUTPUT_TOKENS,
307
352
  responseMimeType: 'application/json',
308
- responseSchema: request.responseSchema,
353
+ responseSchema: applyResponseKeyOrdering(request.responseSchema, request.responseKeyOrdering),
309
354
  safetySettings: getSafetySettings(getSafetyThreshold()),
310
355
  topP: 0.95,
311
356
  topK: 40,
@@ -349,12 +394,12 @@ async function generateContentWithTimeout(request, model, timeoutMs) {
349
394
  try {
350
395
  return await getClient().models.generateContent({
351
396
  model,
352
- contents: request.prompt,
397
+ contents: getPromptWithFunctionCallingContext(request),
353
398
  config: buildGenerationConfig(request, signal),
354
399
  });
355
400
  }
356
401
  catch (error) {
357
- if (request.signal?.aborted) {
402
+ if (request.signal?.aborted === true) {
358
403
  throw new Error('Gemini request was cancelled.');
359
404
  }
360
405
  if (controller.signal.aborted) {
@@ -449,8 +494,9 @@ function tryWakeNextWaiter() {
449
494
  next();
450
495
  }
451
496
  }
452
- async function waitForConcurrencySlot(limit, requestSignal) {
453
- if (activeCalls < limit) {
497
+ async function waitForSlot(limit, getActiveCount, acquireSlot, waiters, requestSignal) {
498
+ if (waiters.length === 0 && getActiveCount() < limit) {
499
+ acquireSlot();
454
500
  return;
455
501
  }
456
502
  if (requestSignal?.aborted) {
@@ -467,16 +513,17 @@ async function waitForConcurrencySlot(limit, requestSignal) {
467
513
  if (requestSignal) {
468
514
  requestSignal.removeEventListener('abort', onAbort);
469
515
  }
516
+ acquireSlot();
470
517
  resolve();
471
518
  };
472
- slotWaiters.push(waiter);
519
+ waiters.push(waiter);
473
520
  const deadlineTimer = setTimeout(() => {
474
521
  if (settled)
475
522
  return;
476
523
  settled = true;
477
- const idx = slotWaiters.indexOf(waiter);
524
+ const idx = waiters.indexOf(waiter);
478
525
  if (idx !== -1) {
479
- slotWaiters.splice(idx, 1);
526
+ waiters.splice(idx, 1);
480
527
  }
481
528
  if (requestSignal) {
482
529
  requestSignal.removeEventListener('abort', onAbort);
@@ -488,9 +535,9 @@ async function waitForConcurrencySlot(limit, requestSignal) {
488
535
  if (settled)
489
536
  return;
490
537
  settled = true;
491
- const idx = slotWaiters.indexOf(waiter);
538
+ const idx = waiters.indexOf(waiter);
492
539
  if (idx !== -1) {
493
- slotWaiters.splice(idx, 1);
540
+ waiters.splice(idx, 1);
494
541
  }
495
542
  clearTimeout(deadlineTimer);
496
543
  reject(new Error('Gemini request was cancelled.'));
@@ -500,19 +547,254 @@ async function waitForConcurrencySlot(limit, requestSignal) {
500
547
  }
501
548
  });
502
549
  }
550
+ async function waitForConcurrencySlot(limit, requestSignal) {
551
+ return waitForSlot(limit, () => activeCalls, () => {
552
+ activeCalls += 1;
553
+ }, slotWaiters, requestSignal);
554
+ }
555
+ function tryWakeNextBatchWaiter() {
556
+ const next = batchSlotWaiters.shift();
557
+ if (next !== undefined) {
558
+ next();
559
+ }
560
+ }
561
+ async function waitForBatchConcurrencySlot(limit, requestSignal) {
562
+ return waitForSlot(limit, () => activeBatchCalls, () => {
563
+ activeBatchCalls += 1;
564
+ }, batchSlotWaiters, requestSignal);
565
+ }
566
+ function getBatchState(payload) {
567
+ const record = asRecord(payload);
568
+ if (!record) {
569
+ return undefined;
570
+ }
571
+ const directState = toUpperStringCode(record.state);
572
+ if (directState) {
573
+ return directState;
574
+ }
575
+ const metadata = asRecord(record.metadata);
576
+ if (!metadata) {
577
+ return undefined;
578
+ }
579
+ return toUpperStringCode(metadata.state);
580
+ }
581
+ function extractBatchResponseText(payload) {
582
+ const record = asRecord(payload);
583
+ if (!record) {
584
+ return undefined;
585
+ }
586
+ const inlineResponse = asRecord(record.inlineResponse);
587
+ const inlineText = typeof inlineResponse?.text === 'string' ? inlineResponse.text : undefined;
588
+ if (inlineText) {
589
+ return inlineText;
590
+ }
591
+ const response = asRecord(record.response);
592
+ if (!response) {
593
+ return undefined;
594
+ }
595
+ const responseText = typeof response.text === 'string' ? response.text : undefined;
596
+ if (responseText) {
597
+ return responseText;
598
+ }
599
+ const { inlineResponses } = response;
600
+ if (!Array.isArray(inlineResponses) || inlineResponses.length === 0) {
601
+ return undefined;
602
+ }
603
+ const firstInline = asRecord(inlineResponses[0]);
604
+ return typeof firstInline?.text === 'string' ? firstInline.text : undefined;
605
+ }
606
+ function extractBatchErrorDetail(payload) {
607
+ const record = asRecord(payload);
608
+ if (!record) {
609
+ return undefined;
610
+ }
611
+ const directError = asRecord(record.error);
612
+ const directMessage = typeof directError?.message === 'string' ? directError.message : undefined;
613
+ if (directMessage) {
614
+ return directMessage;
615
+ }
616
+ const metadata = asRecord(record.metadata);
617
+ const metadataError = asRecord(metadata?.error);
618
+ const metadataMessage = typeof metadataError?.message === 'string'
619
+ ? metadataError.message
620
+ : undefined;
621
+ if (metadataMessage) {
622
+ return metadataMessage;
623
+ }
624
+ const response = asRecord(record.response);
625
+ const responseError = asRecord(response?.error);
626
+ return typeof responseError?.message === 'string'
627
+ ? responseError.message
628
+ : undefined;
629
+ }
630
+ function getBatchSuccessResponseText(polled) {
631
+ const responseText = extractBatchResponseText(polled);
632
+ if (!responseText) {
633
+ const errorDetail = extractBatchErrorDetail(polled);
634
+ throw new Error(errorDetail
635
+ ? `Gemini batch request succeeded but returned no response text: ${errorDetail}`
636
+ : 'Gemini batch request succeeded but returned no response text.');
637
+ }
638
+ return responseText;
639
+ }
640
+ function handleBatchTerminalState(state, payload) {
641
+ if (state === 'JOB_STATE_FAILED' || state === 'JOB_STATE_CANCELLED') {
642
+ const errorDetail = extractBatchErrorDetail(payload);
643
+ throw new Error(errorDetail
644
+ ? `Gemini batch request ended with state ${state}: ${errorDetail}`
645
+ : `Gemini batch request ended with state ${state}.`);
646
+ }
647
+ }
648
+ async function pollBatchStatusWithRetries(batches, batchName, onLog, requestSignal) {
649
+ const maxPollRetries = 2;
650
+ for (let attempt = 0; attempt <= maxPollRetries; attempt += 1) {
651
+ try {
652
+ return await batches.get({ name: batchName });
653
+ }
654
+ catch (error) {
655
+ if (!canRetryAttempt(attempt, maxPollRetries, error)) {
656
+ throw error;
657
+ }
658
+ await waitBeforeRetry(attempt, error, onLog, requestSignal);
659
+ }
660
+ }
661
+ throw new Error('Batch polling retries exhausted unexpectedly.');
662
+ }
663
+ async function cancelBatchIfNeeded(request, batches, batchName, onLog, completed, timedOut) {
664
+ const aborted = request.signal?.aborted === true;
665
+ if (completed || (!aborted && !timedOut) || !batchName) {
666
+ return;
667
+ }
668
+ if (batches.cancel === undefined) {
669
+ return;
670
+ }
671
+ try {
672
+ await batches.cancel({ name: batchName });
673
+ await emitGeminiLog(onLog, 'info', {
674
+ event: 'gemini_batch_cancelled',
675
+ details: {
676
+ batchName,
677
+ reason: timedOut ? 'timeout' : 'aborted',
678
+ },
679
+ });
680
+ }
681
+ catch (error) {
682
+ await emitGeminiLog(onLog, 'warning', {
683
+ event: 'gemini_batch_cancel_failed',
684
+ details: {
685
+ batchName,
686
+ reason: timedOut ? 'timeout' : 'aborted',
687
+ error: getErrorMessage(error),
688
+ },
689
+ });
690
+ }
691
+ }
692
+ async function runInlineBatchWithPolling(request, model, onLog) {
693
+ const client = getClient();
694
+ const { batches } = client;
695
+ if (batches === undefined) {
696
+ throw new Error('Batch mode requires SDK batch support, but batches API is unavailable.');
697
+ }
698
+ let batchName;
699
+ let completed = false;
700
+ let timedOut = false;
701
+ try {
702
+ const createPayload = {
703
+ model,
704
+ src: [
705
+ {
706
+ contents: [{ role: 'user', parts: [{ text: request.prompt }] }],
707
+ config: buildGenerationConfig(request, new AbortController().signal),
708
+ },
709
+ ],
710
+ };
711
+ const createdJob = await batches.create(createPayload);
712
+ const createdRecord = asRecord(createdJob);
713
+ batchName =
714
+ typeof createdRecord?.name === 'string' ? createdRecord.name : undefined;
715
+ if (!batchName) {
716
+ throw new Error('Batch mode failed to return a job name.');
717
+ }
718
+ const pollStart = performance.now();
719
+ const timeoutMs = batchTimeoutMsConfig.get();
720
+ const pollIntervalMs = batchPollIntervalMsConfig.get();
721
+ await emitGeminiLog(onLog, 'info', {
722
+ event: 'gemini_batch_created',
723
+ details: { batchName },
724
+ });
725
+ for (;;) {
726
+ if (request.signal?.aborted === true) {
727
+ throw new Error('Gemini request was cancelled.');
728
+ }
729
+ const elapsedMs = Math.round(performance.now() - pollStart);
730
+ if (elapsedMs > timeoutMs) {
731
+ timedOut = true;
732
+ throw new Error(`Gemini batch request timed out after ${formatNumber(timeoutMs)}ms.`);
733
+ }
734
+ const polled = await pollBatchStatusWithRetries(batches, batchName, onLog, request.signal);
735
+ const state = getBatchState(polled);
736
+ if (state === 'JOB_STATE_SUCCEEDED') {
737
+ const responseText = getBatchSuccessResponseText(polled);
738
+ completed = true;
739
+ return parseStructuredResponse(responseText);
740
+ }
741
+ handleBatchTerminalState(state, polled);
742
+ await sleep(pollIntervalMs, undefined, request.signal
743
+ ? { ...SLEEP_UNREF_OPTIONS, signal: request.signal }
744
+ : SLEEP_UNREF_OPTIONS);
745
+ }
746
+ }
747
+ finally {
748
+ await cancelBatchIfNeeded(request, batches, batchName, onLog, completed, timedOut);
749
+ }
750
+ }
751
+ export function getGeminiQueueSnapshot() {
752
+ return {
753
+ activeCalls,
754
+ waitingCalls: slotWaiters.length,
755
+ };
756
+ }
503
757
  export async function generateStructuredJson(request) {
504
758
  const model = request.model ?? getDefaultModel();
505
759
  const timeoutMs = request.timeoutMs ?? DEFAULT_TIMEOUT_MS;
506
760
  const maxRetries = request.maxRetries ?? DEFAULT_MAX_RETRIES;
761
+ const batchMode = request.batchMode ?? getDefaultBatchMode();
507
762
  const { onLog } = request;
508
- const limit = maxConcurrentCallsConfig.get();
509
- await waitForConcurrencySlot(limit, request.signal);
510
- activeCalls += 1;
763
+ const limit = batchMode === 'inline'
764
+ ? maxConcurrentBatchCallsConfig.get()
765
+ : maxConcurrentCallsConfig.get();
766
+ const queueWaitStartedAt = performance.now();
767
+ if (batchMode === 'inline') {
768
+ await waitForBatchConcurrencySlot(limit, request.signal);
769
+ }
770
+ else {
771
+ await waitForConcurrencySlot(limit, request.signal);
772
+ }
773
+ const queueWaitMs = Math.round(performance.now() - queueWaitStartedAt);
774
+ await safeCallOnLog(onLog, 'info', {
775
+ event: 'gemini_queue_acquired',
776
+ queueWaitMs,
777
+ waitingCalls: batchMode === 'inline' ? batchSlotWaiters.length : slotWaiters.length,
778
+ activeCalls,
779
+ activeBatchCalls,
780
+ mode: batchMode,
781
+ });
511
782
  try {
512
- return await geminiContext.run({ requestId: nextRequestId(), model }, () => runWithRetries(request, model, timeoutMs, maxRetries, onLog));
783
+ return await geminiContext.run({ requestId: nextRequestId(), model }, () => {
784
+ if (batchMode === 'inline') {
785
+ return runInlineBatchWithPolling(request, model, onLog);
786
+ }
787
+ return runWithRetries(request, model, timeoutMs, maxRetries, onLog);
788
+ });
513
789
  }
514
790
  finally {
515
- activeCalls -= 1;
516
- tryWakeNextWaiter();
791
+ if (batchMode === 'inline') {
792
+ activeBatchCalls -= 1;
793
+ tryWakeNextBatchWaiter();
794
+ }
795
+ else {
796
+ activeCalls -= 1;
797
+ tryWakeNextWaiter();
798
+ }
517
799
  }
518
800
  }