npm - @j0hanz/code-review-analyst-mcp - Versions diffs - 1.4.4 → 1.5.1 - Mend

@j0hanz/code-review-analyst-mcp 1.4.4 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +23 -19
package/dist/lib/diff-parser.js +33 -13
package/dist/lib/diff-store.d.ts +2 -0
package/dist/lib/gemini.d.ts +4 -0
package/dist/lib/gemini.js +308 -26
package/dist/lib/model-config.d.ts +33 -38
package/dist/lib/model-config.js +55 -88
package/dist/lib/tool-contracts.d.ts +31 -22
package/dist/lib/tool-contracts.js +15 -8
package/dist/lib/tool-factory.d.ts +8 -3
package/dist/lib/tool-factory.js +53 -5
package/dist/lib/types.d.ts +7 -1
package/dist/prompts/index.js +3 -3
package/dist/resources/instructions.js +3 -3
package/dist/resources/server-config.js +19 -4
package/dist/resources/tool-info.js +4 -4
package/dist/schemas/outputs.d.ts +7 -7
package/dist/tools/analyze-complexity.js +6 -3
package/dist/tools/analyze-pr-impact.js +9 -4
package/dist/tools/detect-api-breaking.js +6 -3
package/dist/tools/generate-diff.js +1 -1
package/dist/tools/generate-review-summary.js +17 -6
package/dist/tools/generate-test-plan.js +9 -4
package/dist/tools/inspect-code-quality.js +9 -4
package/dist/tools/suggest-search-replace.js +9 -4
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -18,7 +18,7 @@ This server accepts unified diffs and returns structured JSON results — findin
 - **Impact Analysis** — Objective severity scoring, breaking change detection, and rollback complexity assessment.
 - **Review Summary** — Concise PR digest with merge recommendation and change statistics.
-- **Deep Code Inspection** — Pro model with 16K thinking budget for context-aware analysis using full file contents.
+- **Deep Code Inspection** — Pro model with high thinking level for context-aware analysis using full file contents.
 - **Search & Replace Fixes** — Verbatim, copy-paste-ready code fixes tied to specific findings.
 - **Test Plan Generation** — Systematic test case generation with priority ranking and pseudocode.
 - **Async Task Support** — All tools support MCP task lifecycle with progress notifications.
@@ -371,27 +371,31 @@ Create a test plan covering the changes in the diff using the Flash model with t
 ### Environment Variables
-| Variable                       | Description                                          | Default      | Required |
-| ------------------------------ | ---------------------------------------------------- | ------------ | -------- |
-| `GEMINI_API_KEY`               | Gemini API key                                       | —            | Yes      |
-| `GOOGLE_API_KEY`               | Alternative API key (if `GEMINI_API_KEY` not set)    | —            | No       |
-| `GEMINI_MODEL`                 | Override default model selection                     | —            | No       |
-| `GEMINI_HARM_BLOCK_THRESHOLD`  | Safety threshold (BLOCK_NONE, BLOCK_ONLY_HIGH, etc.) | `BLOCK_NONE` | No       |
-| `MAX_DIFF_CHARS`               | Max chars for diff input                             | `120000`     | No       |
-| `MAX_CONTEXT_CHARS`            | Max combined context for inspection                  | `500000`     | No       |
-| `MAX_CONCURRENT_CALLS`         | Max concurrent Gemini requests                       | `10`         | No       |
-| `MAX_CONCURRENT_CALLS_WAIT_MS` | Max wait time for a free Gemini slot                 | `2000`       | No       |
-| `MAX_CONCURRENT_CALLS_POLL_MS` | Poll interval while waiting for a free slot          | `25`         | No       |
+| Variable                        | Description                                          | Default      | Required |
+| ------------------------------- | ---------------------------------------------------- | ------------ | -------- |
+| `GEMINI_API_KEY`                | Gemini API key                                       | —            | Yes      |
+| `GOOGLE_API_KEY`                | Alternative API key (if `GEMINI_API_KEY` not set)    | —            | No       |
+| `GEMINI_MODEL`                  | Override default model selection                     | —            | No       |
+| `GEMINI_HARM_BLOCK_THRESHOLD`   | Safety threshold (BLOCK_NONE, BLOCK_ONLY_HIGH, etc.) | `BLOCK_NONE` | No       |
+| `MAX_DIFF_CHARS`                | Max chars for diff input                             | `120000`     | No       |
+| `MAX_CONTEXT_CHARS`             | Max combined context for inspection                  | `500000`     | No       |
+| `MAX_CONCURRENT_CALLS`          | Max concurrent Gemini requests                       | `10`         | No       |
+| `MAX_CONCURRENT_BATCH_CALLS`    | Max concurrent inline batch requests                 | `2`          | No       |
+| `MAX_CONCURRENT_CALLS_WAIT_MS`  | Max wait time for a free Gemini slot                 | `2000`       | No       |
+| `MAX_SCHEMA_RETRY_ERROR_CHARS`  | Max chars from schema error injected into retry text | `1500`       | No       |
+| `GEMINI_BATCH_MODE`             | Request mode for Gemini calls (`off`, `inline`)      | `off`        | No       |
+| `GEMINI_BATCH_POLL_INTERVAL_MS` | Poll interval for batch job status                   | `2000`       | No       |
+| `GEMINI_BATCH_TIMEOUT_MS`       | Max wait for batch completion                        | `120000`     | No       |
 ### Models
-| Tool                      | Model              | Thinking Budget |
-| ------------------------- | ------------------ | --------------- |
-| `analyze_pr_impact`       | `gemini-2.5-flash` | —               |
-| `generate_review_summary` | `gemini-2.5-flash` | —               |
-| `inspect_code_quality`    | `gemini-2.5-pro`   | 16,384 tokens   |
-| `suggest_search_replace`  | `gemini-2.5-pro`   | 16,384 tokens   |
-| `generate_test_plan`      | `gemini-2.5-flash` | 8,192 tokens    |
+| Tool                      | Model                    | Thinking Level |
+| ------------------------- | ------------------------ | -------------- |
+| `analyze_pr_impact`       | `gemini-3-flash-preview` | `minimal`      |
+| `generate_review_summary` | `gemini-3-flash-preview` | `minimal`      |
+| `inspect_code_quality`    | `gemini-3-pro-preview`   | `high`         |
+| `suggest_search_replace`  | `gemini-3-pro-preview`   | `high`         |
+| `generate_test_plan`      | `gemini-3-flash-preview` | `medium`       |
 ## Workflows

package/dist/lib/diff-parser.js CHANGED Viewed

@@ -32,24 +32,35 @@ function sortPaths(paths) {
     }
     return Array.from(paths).sort(PATH_SORTER);
 }
-function buildDiffComputation(files) {
+function buildDiffComputation(files, options) {
     let added = 0;
     let deleted = 0;
-    const paths = new Set();
-    const summaries = new Array(files.length);
+    const paths = options.needPaths ? new Set() : undefined;
+    const summaries = options.needSummaries
+        ? new Array(files.length)
+        : undefined;
     let index = 0;
     for (const file of files) {
         added += file.additions;
         deleted += file.deletions;
-        const path = resolveChangedPath(file);
-        if (path) {
-            paths.add(path);
+        if (options.needPaths || options.needSummaries) {
+            const path = resolveChangedPath(file);
+            if (paths && path) {
+                paths.add(path);
+            }
+            if (summaries) {
+                summaries[index] =
+                    `${path ?? UNKNOWN_PATH} (+${file.additions} -${file.deletions})`;
+            }
         }
-        summaries[index] =
-            `${path ?? UNKNOWN_PATH} (+${file.additions} -${file.deletions})`;
         index += 1;
     }
-    return { added, deleted, paths, summaries };
+    return {
+        added,
+        deleted,
+        paths: paths ?? new Set(),
+        summaries: summaries ?? [],
+    };
 }
 function buildStats(filesCount, added, deleted) {
     return { files: filesCount, added, deleted };
@@ -61,7 +72,10 @@ export function computeDiffStatsAndSummaryFromFiles(files) {
             summary: NO_FILES_CHANGED,
         };
     }
-    const computed = buildDiffComputation(files);
+    const computed = buildDiffComputation(files, {
+        needPaths: false,
+        needSummaries: true,
+    });
     const stats = buildStats(files.length, computed.added, computed.deleted);
     return {
         stats,
@@ -75,7 +89,10 @@ export function computeDiffStatsAndPathsFromFiles(files) {
             paths: EMPTY_PATHS,
         };
     }
-    const computed = buildDiffComputation(files);
+    const computed = buildDiffComputation(files, {
+        needPaths: true,
+        needSummaries: false,
+    });
     return {
         stats: buildStats(files.length, computed.added, computed.deleted),
         paths: sortPaths(computed.paths),
@@ -86,7 +103,7 @@ export function extractChangedPathsFromFiles(files) {
     if (files.length === 0) {
         return EMPTY_PATHS;
     }
-    return sortPaths(buildDiffComputation(files).paths);
+    return sortPaths(buildDiffComputation(files, { needPaths: true, needSummaries: false }).paths);
 }
 /** Extract all unique changed file paths (renamed: returns new path). */
 export function extractChangedPaths(diff) {
@@ -96,7 +113,10 @@ export function computeDiffStatsFromFiles(files) {
     if (files.length === 0) {
         return EMPTY_STATS;
     }
-    const computed = buildDiffComputation(files);
+    const computed = buildDiffComputation(files, {
+        needPaths: false,
+        needSummaries: false,
+    });
     return buildStats(files.length, computed.added, computed.deleted);
 }
 /** Count changed files, added lines, and deleted lines. */

package/dist/lib/diff-store.d.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import type { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
+import type { ParsedFile } from './diff-parser.js';
 import { createErrorToolResponse } from './tool-response.js';
 export declare const DIFF_RESOURCE_URI = "diff://current";
 export interface DiffStats {
@@ -8,6 +9,7 @@ export interface DiffStats {
 }
 export interface DiffSlot {
     diff: string;
+    parsedFiles: readonly ParsedFile[];
     stats: DiffStats;
     generatedAt: string;
     mode: string;

package/dist/lib/gemini.d.ts CHANGED Viewed

@@ -4,4 +4,8 @@ import type { GeminiStructuredRequest } from './types.js';
 export declare const geminiEvents: EventEmitter<[never]>;
 export declare function getCurrentRequestId(): string;
 export declare function setClientForTesting(client: GoogleGenAI): void;
+export declare function getGeminiQueueSnapshot(): {
+    activeCalls: number;
+    waitingCalls: number;
+};
 export declare function generateStructuredJson(request: GeminiStructuredRequest): Promise<unknown>;

package/dist/lib/gemini.js CHANGED Viewed

@@ -4,15 +4,16 @@ import { EventEmitter } from 'node:events';
 import { performance } from 'node:perf_hooks';
 import { setTimeout as sleep } from 'node:timers/promises';
 import { debuglog } from 'node:util';
-import { FinishReason, GoogleGenAI, HarmBlockThreshold, HarmCategory, } from '@google/genai';
+import { FinishReason, GoogleGenAI, HarmBlockThreshold, HarmCategory, ThinkingLevel, } from '@google/genai';
 import { createCachedEnvInt } from './env-config.js';
 import { getErrorMessage, RETRYABLE_UPSTREAM_ERROR_PATTERN } from './errors.js';
 // Lazy-cached: first call happens after parseCommandLineArgs() sets GEMINI_MODEL.
 let _defaultModel;
-const DEFAULT_MODEL = 'gemini-2.5-flash';
+const DEFAULT_MODEL = 'gemini-3-flash-preview';
 const GEMINI_MODEL_ENV_VAR = 'GEMINI_MODEL';
 const GEMINI_HARM_BLOCK_THRESHOLD_ENV_VAR = 'GEMINI_HARM_BLOCK_THRESHOLD';
 const GEMINI_INCLUDE_THOUGHTS_ENV_VAR = 'GEMINI_INCLUDE_THOUGHTS';
+const GEMINI_BATCH_MODE_ENV_VAR = 'GEMINI_BATCH_MODE';
 const GEMINI_API_KEY_ENV_VAR = 'GEMINI_API_KEY';
 const GOOGLE_API_KEY_ENV_VAR = 'GOOGLE_API_KEY';
 function getDefaultModel() {
@@ -30,14 +31,20 @@ const RETRY_DELAY_MAX_MS = 5_000;
 const RETRY_JITTER_RATIO = 0.2;
 const DEFAULT_SAFETY_THRESHOLD = HarmBlockThreshold.BLOCK_NONE;
 const DEFAULT_INCLUDE_THOUGHTS = false;
+const DEFAULT_BATCH_MODE = 'off';
 const UNKNOWN_REQUEST_CONTEXT_VALUE = 'unknown';
 const RETRYABLE_NUMERIC_CODES = new Set([429, 500, 502, 503, 504]);
 const DIGITS_ONLY_PATTERN = /^\d+$/;
 const SLEEP_UNREF_OPTIONS = { ref: false };
 const maxConcurrentCallsConfig = createCachedEnvInt('MAX_CONCURRENT_CALLS', 10);
+const maxConcurrentBatchCallsConfig = createCachedEnvInt('MAX_CONCURRENT_BATCH_CALLS', 2);
 const concurrencyWaitMsConfig = createCachedEnvInt('MAX_CONCURRENT_CALLS_WAIT_MS', 2_000);
+const batchPollIntervalMsConfig = createCachedEnvInt('GEMINI_BATCH_POLL_INTERVAL_MS', 2_000);
+const batchTimeoutMsConfig = createCachedEnvInt('GEMINI_BATCH_TIMEOUT_MS', 120_000);
 let activeCalls = 0;
+let activeBatchCalls = 0;
 const slotWaiters = [];
+const batchSlotWaiters = [];
 const RETRYABLE_TRANSIENT_CODES = new Set([
     'RESOURCE_EXHAUSTED',
     'UNAVAILABLE',
@@ -91,14 +98,31 @@ function parseSafetyThreshold(threshold) {
     }
     return SAFETY_THRESHOLD_BY_NAME[normalizedThreshold];
 }
-function getThinkingConfig(thinkingBudget, includeThoughts) {
-    if (thinkingBudget === undefined) {
+function getThinkingConfig(thinkingLevel, includeThoughts) {
+    if (thinkingLevel === undefined && !includeThoughts) {
         return undefined;
     }
+    const config = {};
+    if (thinkingLevel !== undefined) {
+        switch (thinkingLevel) {
+            case 'minimal':
+                config.thinkingLevel = ThinkingLevel.MINIMAL;
+                break;
+            case 'low':
+                config.thinkingLevel = ThinkingLevel.LOW;
+                break;
+            case 'medium':
+                config.thinkingLevel = ThinkingLevel.MEDIUM;
+                break;
+            case 'high':
+                config.thinkingLevel = ThinkingLevel.HIGH;
+                break;
+        }
+    }
     if (includeThoughts) {
-        return { includeThoughts: true, thinkingBudget };
+        config.includeThoughts = true;
     }
-    return { thinkingBudget };
+    return config;
 }
 function parseBooleanEnv(value) {
     const normalized = value.trim().toLowerCase();
@@ -132,6 +156,25 @@ function getDefaultIncludeThoughts() {
     cachedIncludeThoughts = parseBooleanEnv(value) ?? DEFAULT_INCLUDE_THOUGHTS;
     return cachedIncludeThoughts;
 }
+function getDefaultBatchMode() {
+    const value = process.env[GEMINI_BATCH_MODE_ENV_VAR]?.trim().toLowerCase();
+    if (value === 'inline') {
+        return 'inline';
+    }
+    return DEFAULT_BATCH_MODE;
+}
+function applyResponseKeyOrdering(responseSchema, responseKeyOrdering) {
+    if (!responseKeyOrdering || responseKeyOrdering.length === 0) {
+        return responseSchema;
+    }
+    return {
+        ...responseSchema,
+        propertyOrdering: [...responseKeyOrdering],
+    };
+}
+function getPromptWithFunctionCallingContext(request) {
+    return request.prompt;
+}
 function getSafetySettings(threshold) {
     const cached = safetySettingsCache.get(threshold);
     if (cached) {
@@ -264,19 +307,21 @@ function findFirstStringCode(record, keys) {
     }
     return undefined;
 }
+const NUMERIC_ERROR_KEYS = ['status', 'statusCode', 'code'];
 function getNumericErrorCode(error) {
     const record = getNestedError(error);
     if (!record) {
         return undefined;
     }
-    return findFirstNumericCode(record, ['status', 'statusCode', 'code']);
+    return findFirstNumericCode(record, NUMERIC_ERROR_KEYS);
 }
+const TRANSIENT_ERROR_KEYS = ['code', 'status', 'statusText'];
 function getTransientErrorCode(error) {
     const record = getNestedError(error);
     if (!record) {
         return undefined;
     }
-    return findFirstStringCode(record, ['code', 'status', 'statusText']);
+    return findFirstStringCode(record, TRANSIENT_ERROR_KEYS);
 }
 function shouldRetry(error) {
     const numericCode = getNumericErrorCode(error);
@@ -300,12 +345,12 @@ function getRetryDelayMs(attempt) {
 }
 function buildGenerationConfig(request, abortSignal) {
     const includeThoughts = request.includeThoughts ?? getDefaultIncludeThoughts();
-    const thinkingConfig = getThinkingConfig(request.thinkingBudget, includeThoughts);
+    const thinkingConfig = getThinkingConfig(request.thinkingLevel, includeThoughts);
     const config = {
-        temperature: request.temperature ?? 0.2,
+        temperature: request.temperature ?? 1.0,
         maxOutputTokens: request.maxOutputTokens ?? DEFAULT_MAX_OUTPUT_TOKENS,
         responseMimeType: 'application/json',
-        responseSchema: request.responseSchema,
+        responseSchema: applyResponseKeyOrdering(request.responseSchema, request.responseKeyOrdering),
         safetySettings: getSafetySettings(getSafetyThreshold()),
         topP: 0.95,
         topK: 40,
@@ -349,12 +394,12 @@ async function generateContentWithTimeout(request, model, timeoutMs) {
     try {
         return await getClient().models.generateContent({
             model,
-            contents: request.prompt,
+            contents: getPromptWithFunctionCallingContext(request),
             config: buildGenerationConfig(request, signal),
         });
     }
     catch (error) {
-        if (request.signal?.aborted) {
+        if (request.signal?.aborted === true) {
             throw new Error('Gemini request was cancelled.');
         }
         if (controller.signal.aborted) {
@@ -449,8 +494,9 @@ function tryWakeNextWaiter() {
         next();
     }
 }
-async function waitForConcurrencySlot(limit, requestSignal) {
-    if (activeCalls < limit) {
+async function waitForSlot(limit, getActiveCount, acquireSlot, waiters, requestSignal) {
+    if (waiters.length === 0 && getActiveCount() < limit) {
+        acquireSlot();
         return;
     }
     if (requestSignal?.aborted) {
@@ -467,16 +513,17 @@ async function waitForConcurrencySlot(limit, requestSignal) {
             if (requestSignal) {
                 requestSignal.removeEventListener('abort', onAbort);
             }
+            acquireSlot();
             resolve();
         };
-        slotWaiters.push(waiter);
+        waiters.push(waiter);
         const deadlineTimer = setTimeout(() => {
             if (settled)
                 return;
             settled = true;
-            const idx = slotWaiters.indexOf(waiter);
+            const idx = waiters.indexOf(waiter);
             if (idx !== -1) {
-                slotWaiters.splice(idx, 1);
+                waiters.splice(idx, 1);
             }
             if (requestSignal) {
                 requestSignal.removeEventListener('abort', onAbort);
@@ -488,9 +535,9 @@ async function waitForConcurrencySlot(limit, requestSignal) {
             if (settled)
                 return;
             settled = true;
-            const idx = slotWaiters.indexOf(waiter);
+            const idx = waiters.indexOf(waiter);
             if (idx !== -1) {
-                slotWaiters.splice(idx, 1);
+                waiters.splice(idx, 1);
             }
             clearTimeout(deadlineTimer);
             reject(new Error('Gemini request was cancelled.'));
@@ -500,19 +547,254 @@ async function waitForConcurrencySlot(limit, requestSignal) {
         }
     });
 }
+async function waitForConcurrencySlot(limit, requestSignal) {
+    return waitForSlot(limit, () => activeCalls, () => {
+        activeCalls += 1;
+    }, slotWaiters, requestSignal);
+}
+function tryWakeNextBatchWaiter() {
+    const next = batchSlotWaiters.shift();
+    if (next !== undefined) {
+        next();
+    }
+}
+async function waitForBatchConcurrencySlot(limit, requestSignal) {
+    return waitForSlot(limit, () => activeBatchCalls, () => {
+        activeBatchCalls += 1;
+    }, batchSlotWaiters, requestSignal);
+}
+function getBatchState(payload) {
+    const record = asRecord(payload);
+    if (!record) {
+        return undefined;
+    }
+    const directState = toUpperStringCode(record.state);
+    if (directState) {
+        return directState;
+    }
+    const metadata = asRecord(record.metadata);
+    if (!metadata) {
+        return undefined;
+    }
+    return toUpperStringCode(metadata.state);
+}
+function extractBatchResponseText(payload) {
+    const record = asRecord(payload);
+    if (!record) {
+        return undefined;
+    }
+    const inlineResponse = asRecord(record.inlineResponse);
+    const inlineText = typeof inlineResponse?.text === 'string' ? inlineResponse.text : undefined;
+    if (inlineText) {
+        return inlineText;
+    }
+    const response = asRecord(record.response);
+    if (!response) {
+        return undefined;
+    }
+    const responseText = typeof response.text === 'string' ? response.text : undefined;
+    if (responseText) {
+        return responseText;
+    }
+    const { inlineResponses } = response;
+    if (!Array.isArray(inlineResponses) || inlineResponses.length === 0) {
+        return undefined;
+    }
+    const firstInline = asRecord(inlineResponses[0]);
+    return typeof firstInline?.text === 'string' ? firstInline.text : undefined;
+}
+function extractBatchErrorDetail(payload) {
+    const record = asRecord(payload);
+    if (!record) {
+        return undefined;
+    }
+    const directError = asRecord(record.error);
+    const directMessage = typeof directError?.message === 'string' ? directError.message : undefined;
+    if (directMessage) {
+        return directMessage;
+    }
+    const metadata = asRecord(record.metadata);
+    const metadataError = asRecord(metadata?.error);
+    const metadataMessage = typeof metadataError?.message === 'string'
+        ? metadataError.message
+        : undefined;
+    if (metadataMessage) {
+        return metadataMessage;
+    }
+    const response = asRecord(record.response);
+    const responseError = asRecord(response?.error);
+    return typeof responseError?.message === 'string'
+        ? responseError.message
+        : undefined;
+}
+function getBatchSuccessResponseText(polled) {
+    const responseText = extractBatchResponseText(polled);
+    if (!responseText) {
+        const errorDetail = extractBatchErrorDetail(polled);
+        throw new Error(errorDetail
+            ? `Gemini batch request succeeded but returned no response text: ${errorDetail}`
+            : 'Gemini batch request succeeded but returned no response text.');
+    }
+    return responseText;
+}
+function handleBatchTerminalState(state, payload) {
+    if (state === 'JOB_STATE_FAILED' || state === 'JOB_STATE_CANCELLED') {
+        const errorDetail = extractBatchErrorDetail(payload);
+        throw new Error(errorDetail
+            ? `Gemini batch request ended with state ${state}: ${errorDetail}`
+            : `Gemini batch request ended with state ${state}.`);
+    }
+}
+async function pollBatchStatusWithRetries(batches, batchName, onLog, requestSignal) {
+    const maxPollRetries = 2;
+    for (let attempt = 0; attempt <= maxPollRetries; attempt += 1) {
+        try {
+            return await batches.get({ name: batchName });
+        }
+        catch (error) {
+            if (!canRetryAttempt(attempt, maxPollRetries, error)) {
+                throw error;
+            }
+            await waitBeforeRetry(attempt, error, onLog, requestSignal);
+        }
+    }
+    throw new Error('Batch polling retries exhausted unexpectedly.');
+}
+async function cancelBatchIfNeeded(request, batches, batchName, onLog, completed, timedOut) {
+    const aborted = request.signal?.aborted === true;
+    if (completed || (!aborted && !timedOut) || !batchName) {
+        return;
+    }
+    if (batches.cancel === undefined) {
+        return;
+    }
+    try {
+        await batches.cancel({ name: batchName });
+        await emitGeminiLog(onLog, 'info', {
+            event: 'gemini_batch_cancelled',
+            details: {
+                batchName,
+                reason: timedOut ? 'timeout' : 'aborted',
+            },
+        });
+    }
+    catch (error) {
+        await emitGeminiLog(onLog, 'warning', {
+            event: 'gemini_batch_cancel_failed',
+            details: {
+                batchName,
+                reason: timedOut ? 'timeout' : 'aborted',
+                error: getErrorMessage(error),
+            },
+        });
+    }
+}
+async function runInlineBatchWithPolling(request, model, onLog) {
+    const client = getClient();
+    const { batches } = client;
+    if (batches === undefined) {
+        throw new Error('Batch mode requires SDK batch support, but batches API is unavailable.');
+    }
+    let batchName;
+    let completed = false;
+    let timedOut = false;
+    try {
+        const createPayload = {
+            model,
+            src: [
+                {
+                    contents: [{ role: 'user', parts: [{ text: request.prompt }] }],
+                    config: buildGenerationConfig(request, new AbortController().signal),
+                },
+            ],
+        };
+        const createdJob = await batches.create(createPayload);
+        const createdRecord = asRecord(createdJob);
+        batchName =
+            typeof createdRecord?.name === 'string' ? createdRecord.name : undefined;
+        if (!batchName) {
+            throw new Error('Batch mode failed to return a job name.');
+        }
+        const pollStart = performance.now();
+        const timeoutMs = batchTimeoutMsConfig.get();
+        const pollIntervalMs = batchPollIntervalMsConfig.get();
+        await emitGeminiLog(onLog, 'info', {
+            event: 'gemini_batch_created',
+            details: { batchName },
+        });
+        for (;;) {
+            if (request.signal?.aborted === true) {
+                throw new Error('Gemini request was cancelled.');
+            }
+            const elapsedMs = Math.round(performance.now() - pollStart);
+            if (elapsedMs > timeoutMs) {
+                timedOut = true;
+                throw new Error(`Gemini batch request timed out after ${formatNumber(timeoutMs)}ms.`);
+            }
+            const polled = await pollBatchStatusWithRetries(batches, batchName, onLog, request.signal);
+            const state = getBatchState(polled);
+            if (state === 'JOB_STATE_SUCCEEDED') {
+                const responseText = getBatchSuccessResponseText(polled);
+                completed = true;
+                return parseStructuredResponse(responseText);
+            }
+            handleBatchTerminalState(state, polled);
+            await sleep(pollIntervalMs, undefined, request.signal
+                ? { ...SLEEP_UNREF_OPTIONS, signal: request.signal }
+                : SLEEP_UNREF_OPTIONS);
+        }
+    }
+    finally {
+        await cancelBatchIfNeeded(request, batches, batchName, onLog, completed, timedOut);
+    }
+}
+export function getGeminiQueueSnapshot() {
+    return {
+        activeCalls,
+        waitingCalls: slotWaiters.length,
+    };
+}
 export async function generateStructuredJson(request) {
     const model = request.model ?? getDefaultModel();
     const timeoutMs = request.timeoutMs ?? DEFAULT_TIMEOUT_MS;
     const maxRetries = request.maxRetries ?? DEFAULT_MAX_RETRIES;
+    const batchMode = request.batchMode ?? getDefaultBatchMode();
     const { onLog } = request;
-    const limit = maxConcurrentCallsConfig.get();
-    await waitForConcurrencySlot(limit, request.signal);
-    activeCalls += 1;
+    const limit = batchMode === 'inline'
+        ? maxConcurrentBatchCallsConfig.get()
+        : maxConcurrentCallsConfig.get();
+    const queueWaitStartedAt = performance.now();
+    if (batchMode === 'inline') {
+        await waitForBatchConcurrencySlot(limit, request.signal);
+    }
+    else {
+        await waitForConcurrencySlot(limit, request.signal);
+    }
+    const queueWaitMs = Math.round(performance.now() - queueWaitStartedAt);
+    await safeCallOnLog(onLog, 'info', {
+        event: 'gemini_queue_acquired',
+        queueWaitMs,
+        waitingCalls: batchMode === 'inline' ? batchSlotWaiters.length : slotWaiters.length,
+        activeCalls,
+        activeBatchCalls,
+        mode: batchMode,
+    });
     try {
-        return await geminiContext.run({ requestId: nextRequestId(), model }, () => runWithRetries(request, model, timeoutMs, maxRetries, onLog));
+        return await geminiContext.run({ requestId: nextRequestId(), model }, () => {
+            if (batchMode === 'inline') {
+                return runInlineBatchWithPolling(request, model, onLog);
+            }
+            return runWithRetries(request, model, timeoutMs, maxRetries, onLog);
+        });
     }
     finally {
-        activeCalls -= 1;
-        tryWakeNextWaiter();
+        if (batchMode === 'inline') {
+            activeBatchCalls -= 1;
+            tryWakeNextBatchWaiter();
+        }
+        else {
+            activeCalls -= 1;
+            tryWakeNextWaiter();
+        }
     }
 }