npm - page-analyzer - Versions diffs - 1.2.1 → 1.2.3 - Mend

page-analyzer 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/.env.example +36 -0
package/README.md +29 -3
package/extractors/block-assigner.js +1 -1
package/index.d.ts +318 -0
package/index.js +192 -36
package/llm/analyzers/event-analyzer/event-analyzer-blocks.js +19 -0
package/llm/providers/claude-cli-provider.js +137 -0
package/llm/providers/cli-runner.js +129 -0
package/llm/providers/codex-cli-provider.js +154 -0
package/llm/providers/index.js +61 -0
package/package.json +6 -1
package/page-extractor.js +210 -17
package/scripts/analyze.js +10 -5
package/test/smoke.test.js +151 -12

package/index.js CHANGED Viewed

@@ -1,17 +1,30 @@
 /**
  * page-analyzer — Standalone module
  *
- * Simplest usage — just pass a URL:
+ * Three LLM backends are supported via `llm.type`:
  *
- *   import { analyzeUrl } from './page-analyzer/index.js';
- *
- *   const result = await analyzeUrl('https://example.com', {
+ *   // 1. OpenAI-compatible HTTP API (default, backward compatible)
+ *   await analyzeUrl('https://example.com', {
  *     llm: {
+ *       type: 'openai',           // optional; default
  *       apiKey: 'sk-...',
  *       apiEndpoint: 'https://api.openai.com/v1/chat/completions',
- *       model: 'gpt-4',
- *     },
- *     showEvents: true
+ *       model: 'gpt-4'
+ *     }
+ *   });
+ *
+ *   // 2. Local Codex CLI (`codex exec`) — uses your local auth
+ *   await analyzeUrl('https://example.com', {
+ *     llm: {
+ *       type: 'codex',
+ *       model: 'gpt-5.5'          // model === 'gpt-5.5' auto-enables fast mode
+ *       // fast: true             // optional explicit override
+ *     }
+ *   });
+ *
+ *   // 3. Local Claude CLI (`claude -p`) — uses your local auth
+ *   await analyzeUrl('https://example.com', {
+ *     llm: { type: 'claude', model: 'sonnet' }
  *   });
  *
  * Or step-by-step with analyzePageEvents() for pre-fetched data.
@@ -20,12 +33,22 @@
  */
 import { HtmlParser } from './html-parser.js';
-import { assignBlocksToElements } from './extractors/block-assigner.js';
+import { assignBlocksToElements, mapRectToBlock } from './extractors/block-assigner.js';
 import { CsvExporter } from './csv-exporter.js';
-import { OpenAiProvider } from './llm/providers/openai-provider.js';
+import { createLlmProvider } from './llm/providers/index.js';
 import { EventAnalyzer } from './llm/analyzers/event-analyzer/event-analyzer.js';
 import { PageExtractor } from './page-extractor.js';
+function validateLlmConfig(llmConfig, scope) {
+  const type = String(llmConfig?.type || 'openai').toLowerCase();
+  if (!llmConfig?.model) {
+    throw new Error(`${scope}.model is required`);
+  }
+  if (type === 'openai' && (!llmConfig.apiKey || !llmConfig.apiEndpoint)) {
+    throw new Error(`${scope}.apiKey and ${scope}.apiEndpoint are required when ${scope}.type='openai'`);
+  }
+}
 function isObject(value) {
   return value && typeof value === 'object' && !Array.isArray(value);
 }
@@ -131,6 +154,92 @@ function mergeScreenshots(primary, secondary) {
   return hasScreenshots(merged) ? merged : null;
 }
+function pickSizedElementFields(element) {
+  const source = isObject(element) ? element : {};
+  return {
+    tag: source.tag || '',
+    text: source.text || '',
+    href: source.href || '',
+    src: source.src || '',
+    width: Number(source.width) || 0,
+    height: Number(source.height) || 0,
+    top: Number(source.top) || 0,
+    left: Number(source.left) || 0,
+    cssSelector: source.cssSelector || '',
+    id: source.id || '',
+    class: source.class || '',
+    role: source.role || '',
+    ariaLabel: source.ariaLabel || '',
+    imageAlt: source.imageAlt || '',
+    interactive: source.interactive === true
+  };
+}
+/**
+ * Nest sized DOM elements under each logical (output) block.
+ *
+ * Maps each element's rect → physical block index (via mapRectToBlock against
+ * the deduped, largely non-overlapping physical blocks), then → the logical
+ * block whose dot-joined `blockIdxs` contains that physical index. Mutates the
+ * raw analysis blocks (which still carry `blockIdxs`) in place; must run BEFORE
+ * compaction. Returns elements that mapped to no block (only when there are no
+ * physical blocks at all).
+ *
+ * @param {Array} analysisBlocks - Raw block_analysis.blocks (carry `blockIdxs`)
+ * @param {Array} sizedElements - Records from PageExtractor.collectSizedElements
+ * @param {Array} physicalBlocks - Visual blocks from extraction (bundle.blocks)
+ * @returns {Array} Unassigned elements (trimmed)
+ */
+function attachSizedElementsToBlocks(analysisBlocks, sizedElements, physicalBlocks) {
+  if (!Array.isArray(analysisBlocks) || analysisBlocks.length === 0) {
+    return [];
+  }
+  const elements = Array.isArray(sizedElements) ? sizedElements : [];
+  if (elements.length === 0) {
+    return [];
+  }
+  const blocks = Array.isArray(physicalBlocks) ? physicalBlocks : [];
+  const physicalToLogical = new Map();
+  for (const block of analysisBlocks) {
+    if (!isObject(block)) {
+      continue;
+    }
+    block.elements = [];
+    for (const physicalIdx of parseBlockIdxs(block.blockIdxs)) {
+      if (!physicalToLogical.has(physicalIdx)) {
+        physicalToLogical.set(physicalIdx, block);
+      }
+    }
+  }
+  const unassigned = [];
+  for (const element of elements) {
+    const rect = {
+      top: Number(element?.top) || 0,
+      left: Number(element?.left) || 0,
+      width: Number(element?.width) || 0,
+      height: Number(element?.height) || 0
+    };
+    const physicalIdx = mapRectToBlock(rect, blocks);
+    const logical = physicalToLogical.get(physicalIdx);
+    const trimmed = pickSizedElementFields(element);
+    if (logical) {
+      logical.elements.push(trimmed);
+    } else {
+      unassigned.push(trimmed);
+    }
+  }
+  for (const block of analysisBlocks) {
+    if (isObject(block) && Array.isArray(block.elements) && block.elements.length > 1) {
+      block.elements.sort((left, right) => (left.top - right.top) || (left.left - right.left));
+    }
+  }
+  return unassigned;
+}
 function attachLogicalBlockScreenshotPaths(result, screenshots) {
   const blocks = result?.analysis?.block_analysis?.blocks;
   if (!Array.isArray(blocks) || blocks.length === 0) {
@@ -170,7 +279,16 @@ function compactBlockAnalysisBlock(block, displayOptions) {
     blockDescription: source.blockDescription,
     blockSemantics: Array.isArray(source.blockSemantics) ? source.blockSemantics : [],
     blockCssPath: source.blockCssPath,
-    blockPosition: source.blockPosition
+    blockPosition: source.blockPosition,
+    fixed: source.fixed === true,
+    tag: source.tag || '',
+    branchPath: source.branchPath || '',
+    depth: Number.isInteger(source.depth) ? source.depth : 0,
+    domOrder: Number.isInteger(source.domOrder) ? source.domOrder : 0,
+    textPreview: source.textPreview || '',
+    childInteractiveCount: Number.isInteger(source.childInteractiveCount)
+      ? source.childInteractiveCount
+      : 0
   };
   if (displayOptions.showBlockIdx) {
@@ -192,6 +310,10 @@ function compactBlockAnalysisBlock(block, displayOptions) {
     out.blockScreenshotPaths = source.blockScreenshotPaths;
   }
+  if (Array.isArray(source.elements)) {
+    out.elements = source.elements;
+  }
   return out;
 }
@@ -210,6 +332,10 @@ function compactBlockAnalysis(blockAnalysis, displayOptions) {
       : [];
   }
+  if (Array.isArray(source.unassignedElements) && source.unassignedElements.length > 0) {
+    out.unassignedElements = source.unassignedElements;
+  }
   if (isObject(source.stats)) {
     out.stats = displayOptions.showEvents
       ? source.stats
@@ -282,13 +408,23 @@ function buildPageAnalysisResult({
  *
  * @param {string} url - URL to analyze
  * @param {Object} options
- * @param {Object} options.llm - { apiKey, apiEndpoint, model, ... }
+ * @param {Object} options.llm - { type, model, apiKey, apiEndpoint, cliPath, cwd, fast, ... }
+ *   - `type`: 'openai' (default) | 'codex' | 'claude'
+ *   - `model`: required for all types
+ *   - `apiKey` / `apiEndpoint`: required only for type='openai'
+ *   - `cliPath` / `cwd`: optional for type='codex' | 'claude' (override CLI binary / cwd)
+ *   - `fast`: codex only; auto-enabled when model === 'gpt-5.5'
  * @param {Array}  [options.knownEventTypes] - Accumulated event types for consistency
  * @param {Object} [options.parserConfig] - HtmlParser config overrides
  * @param {Object} [options.extractorConfig] - PageExtractor config overrides
  * @param {boolean} [options.showEvents=false] - Include event arrays and full event-related metadata.
  *   Also enables node-level event classification.
  * @param {boolean} [options.showBlockIdx=false] - Include CSV/block index alignment fields.
+ * @param {boolean} [options.showElement=false] - Collect all visible DOM elements with
+ *   width or height > `elementSize` and nest them under each block as `elements`.
+ *   When false (default), the sized-element collection is skipped (previous behavior).
+ * @param {number} [options.elementSize=24] - Min size (px) threshold for `showElement`
+ *   (an element qualifies when width > elementSize OR height > elementSize).
  * @param {boolean} [options.fullPageScreenshot=false] - Save a full-page screenshot to snapshots/ and return its path.
  * @param {boolean} [options.blockScreenshots=false] - Save one screenshot per merged logical block to snapshots/ and return their paths.
  * @param {boolean} [options.waitForImagesLoaded=false] - Wait for page images before extracting and screenshotting.
@@ -303,18 +439,22 @@ export async function analyzeUrl(url, options = {}) {
     extractorConfig,
     showEvents,
     showBlockIdx,
+    showElement,
+    elementSize,
     fullPageScreenshot,
     blockScreenshots,
     waitForImagesLoaded
   } = options;
   if (!url) throw new Error('url is required');
-  if (!llmConfig?.apiKey || !llmConfig?.apiEndpoint || !llmConfig?.model) {
-    throw new Error('options.llm.apiKey, apiEndpoint, and model are required');
-  }
+  validateLlmConfig(llmConfig, 'options.llm');
   const shouldCaptureFullPage = fullPageScreenshot ?? extractorConfig?.fullPageScreenshot;
   const shouldCaptureBlocks = blockScreenshots ?? extractorConfig?.blockScreenshots;
+  const sizedElementsEnabled = showElement === true;
+  const sizedElementMinSize = Number.isInteger(elementSize)
+    ? Math.max(0, elementSize)
+    : (extractorConfig?.sizedElementMinSize ?? 24);
   // Step 0: Playwright extraction
   console.log(`[page-analyzer] Extracting ${url} ...`);
@@ -322,12 +462,14 @@ export async function analyzeUrl(url, options = {}) {
     ...extractorConfig,
     fullPageScreenshot: shouldCaptureFullPage,
     blockScreenshots: false,
-    waitForImagesLoaded: waitForImagesLoaded ?? extractorConfig?.waitForImagesLoaded
+    waitForImagesLoaded: waitForImagesLoaded ?? extractorConfig?.waitForImagesLoaded,
+    sizedElementsEnabled,
+    sizedElementMinSize
   });
   return await extractor.withPreparedPage(url, async (page, targetUrl) => {
     const bundle = await extractor.extractPreparedPage(page, targetUrl);
-    console.log(`[page-analyzer] Extracted: ${bundle.blocks.length} blocks, ${bundle.elementGeometries.length} geometries`);
+    console.log(`[page-analyzer] Extracted: ${bundle.blocks.length} blocks, ${bundle.elementGeometries.length} geometries, ${Array.isArray(bundle.sizedElements) ? bundle.sizedElements.length : 0} sized elements`);
     // Derive domain from URL
     let domain = '';
@@ -338,6 +480,7 @@ export async function analyzeUrl(url, options = {}) {
       url: targetUrl,
       blocks: bundle.blocks,
       elementGeometries: bundle.elementGeometries,
+      sizedElements: bundle.sizedElements,
       llm: llmConfig,
       knownEventTypes,
       parserConfig,
@@ -378,13 +521,19 @@ export async function analyzeUrl(url, options = {}) {
  * @param {string} input.url - Page URL
  * @param {Array}  input.blocks - Visual blocks from Playwright extraction
  * @param {Array}  input.elementGeometries - Element geometry records
+ * @param {Array}  [input.sizedElements] - All visible DOM elements with width>24 or height>24
+ *   (from PageExtractor.collectSizedElements); nested under each block as `elements`.
  * @param {string} [input.markdown] - Markdown content (reserved for future use)
  * @param {Object} input.llm - LLM provider config
- * @param {string} input.llm.apiKey - API key
- * @param {string} input.llm.apiEndpoint - API endpoint URL
- * @param {string} input.llm.model - Model name
- * @param {number} [input.llm.maxTokens] - Max tokens
- * @param {number} [input.llm.temperature] - Temperature
+ * @param {('openai'|'codex'|'claude')} [input.llm.type='openai'] - Backend type
+ * @param {string} input.llm.model - Model name (required for all types)
+ * @param {string} [input.llm.apiKey] - API key (required when type='openai')
+ * @param {string} [input.llm.apiEndpoint] - API endpoint URL (required when type='openai')
+ * @param {string} [input.llm.cliPath] - Override CLI binary path (codex/claude)
+ * @param {string} [input.llm.cwd] - Working directory for the CLI child process
+ * @param {boolean} [input.llm.fast] - Codex only; auto-enabled when model === 'gpt-5.5'
+ * @param {number} [input.llm.maxTokens] - Max tokens (openai only)
+ * @param {number} [input.llm.temperature] - Temperature (openai only)
  * @param {number} [input.llm.timeout] - Request timeout ms
  * @param {number} [input.llm.maxRetries] - Max retries
  * @param {Array}  [input.llm.knownEventTypes] - Pre-configured known event types
@@ -405,6 +554,7 @@ export async function analyzePageEvents(input) {
     url,
     blocks = [],
     elementGeometries = [],
+    sizedElements = [],
     markdown = '',
     llm: llmConfig,
     knownEventTypes = [],
@@ -423,9 +573,7 @@ export async function analyzePageEvents(input) {
   if (!url) {
     throw new Error('url is required');
   }
-  if (!llmConfig?.apiKey || !llmConfig?.apiEndpoint || !llmConfig?.model) {
-    throw new Error('llm.apiKey, llm.apiEndpoint, and llm.model are required');
-  }
+  validateLlmConfig(llmConfig, 'llm');
   // Step 1: Parse HTML → elements
   const htmlParser = new HtmlParser(parserConfig);
@@ -440,16 +588,7 @@ export async function analyzePageEvents(input) {
   const csvContent = csvExporter.buildCsvContent(nodeId || 'page', elements);
   // Step 4: LLM event analysis
-  const provider = new OpenAiProvider({
-    apiKey: llmConfig.apiKey,
-    apiEndpoint: llmConfig.apiEndpoint,
-    model: llmConfig.model,
-    maxTokens: llmConfig.maxTokens,
-    temperature: llmConfig.temperature,
-    timeout: llmConfig.timeout,
-    maxRetries: llmConfig.maxRetries,
-    interactionLogger: llmConfig.interactionLogger
-  });
+  const provider = createLlmProvider(llmConfig);
   const eventAnalyzer = new EventAnalyzer(provider, llmConfig, {
     domain,
@@ -467,6 +606,17 @@ export async function analyzePageEvents(input) {
     }
   );
+  // Step 5: Nest sized DOM elements under each block (before compaction, while
+  // raw analysis blocks still carry blockIdxs for physical→logical mapping).
+  const unassignedSizedElements = attachSizedElementsToBlocks(
+    analysis?.block_analysis?.blocks,
+    sizedElements,
+    blocks
+  );
+  if (unassignedSizedElements.length > 0 && isObject(analysis?.block_analysis)) {
+    analysis.block_analysis.unassignedElements = unassignedSizedElements;
+  }
   return buildPageAnalysisResult({
     elements,
     csvContent,
@@ -481,7 +631,13 @@ export async function analyzePageEvents(input) {
 export { HtmlParser } from './html-parser.js';
 export { assignBlocksToElements } from './extractors/block-assigner.js';
 export { CsvExporter } from './csv-exporter.js';
-export { OpenAiProvider } from './llm/providers/openai-provider.js';
-export { BaseLlmProvider } from './llm/providers/base-provider.js';
+export {
+  createLlmProvider,
+  OpenAiProvider,
+  CodexCliProvider,
+  ClaudeCliProvider,
+  BaseLlmProvider,
+  LLM_PROVIDER_TYPES
+} from './llm/providers/index.js';
 export { EventAnalyzer } from './llm/analyzers/event-analyzer/event-analyzer.js';
 export { PageExtractor } from './page-extractor.js';

package/llm/analyzers/event-analyzer/event-analyzer-blocks.js CHANGED Viewed

@@ -534,6 +534,15 @@ function buildBlockAnalysisArtifact(siteSummary, blockContexts = [], llmGroups =
       possibleEventTypes.push(eventType);
     }
+    const sourceBlocksArr = Array.isArray(context.sourceBlocks) ? context.sourceBlocks : [];
+    const firstSource = sourceBlocksArr[0] || {};
+    const depthValues = sourceBlocksArr
+      .map((b) => (Number.isInteger(b?.depth) ? b.depth : null))
+      .filter((v) => v !== null);
+    const domOrderValues = sourceBlocksArr
+      .map((b) => (Number.isInteger(b?.domOrder) ? b.domOrder : null))
+      .filter((v) => v !== null);
     blocks.push({
       blockIdxs: context.blockIdxKey,
       blockName: context.blockName,
@@ -543,6 +552,16 @@ function buildBlockAnalysisArtifact(siteSummary, blockContexts = [], llmGroups =
       blockDescription: context.blockDescription,
       blockCssPath: resolveLogicalBlockCssPath(context.sourceBlocks),
       blockPosition: buildLogicalBlockPosition(context.sourceBlocks),
+      fixed: sourceBlocksArr.some((b) => b?.fixed === true),
+      tag: firstSource.tag || '',
+      branchPath: firstSource.branchPath || '',
+      depth: depthValues.length ? Math.min(...depthValues) : 0,
+      domOrder: domOrderValues.length ? Math.min(...domOrderValues) : 0,
+      textPreview: firstSource.textPreview || '',
+      childInteractiveCount: sourceBlocksArr.reduce(
+        (sum, b) => sum + (Number(b?.childInteractiveCount) || 0),
+        0
+      ),
       rowCount: context.rows.length,
       mode
     });

package/llm/providers/claude-cli-provider.js ADDED Viewed

@@ -0,0 +1,137 @@
+import { BaseLlmProvider } from './base-provider.js';
+import { runCli } from './cli-runner.js';
+/**
+ * Claude CLI provider — invokes the locally installed `claude -p` command.
+ *
+ * Auth is whatever the user's local claude install already has; no API key needed.
+ * In `--output-format text` print mode, stdout is exactly the model's final text
+ * response, so we use it directly.
+ */
+export class ClaudeCliProvider extends BaseLlmProvider {
+  constructor(config = {}) {
+    super(config);
+    if (!this.model) {
+      throw new Error('ClaudeCliProvider: model is required');
+    }
+    this.cliPath = config.cliPath || 'claude';
+    this.cwd = config.cwd || null;
+  }
+  buildArgs() {
+    return [
+      '-p',
+      '--model', this.model,
+      '--output-format', 'text',
+      '--bare'
+    ];
+  }
+  resolveInteractionContext(metadata) {
+    const context = metadata && typeof metadata === 'object' ? metadata : {};
+    const domain = String(context.domain || '').trim();
+    const nodeId = String(context.nodeId || '').trim();
+    if (!domain || !nodeId) {
+      return null;
+    }
+    return {
+      domain,
+      nodeId,
+      operation: String(context.operation || 'analysis').trim() || 'analysis',
+      chunkLabel: String(context.chunkLabel || '').trim() || null
+    };
+  }
+  async makeRequest(prompt, options = {}) {
+    const metadata = options && typeof options.metadata === 'object' ? options.metadata : {};
+    const interactionContext = this.resolveInteractionContext(metadata);
+    const args = this.buildArgs();
+    const requestPayload = { argv: [this.cliPath, ...args], model: this.model };
+    let failureLogged = false;
+    try {
+      const result = await runCli({
+        command: this.cliPath,
+        args,
+        prompt,
+        timeoutMs: this.timeout,
+        cwd: this.cwd || undefined
+      });
+      const outputText = String(result.stdout ?? '').replace(/\r\n/g, '\n').replace(/\n+$/, '');
+      if (result.code !== 0 || !outputText) {
+        if (interactionContext) {
+          await this.emitInteractionLog({
+            ...interactionContext,
+            provider: 'Claude',
+            model: this.model,
+            requestId: null,
+            inputText: prompt,
+            outputText: outputText || null,
+            requestPayload,
+            responsePayload: { stdout: result.stdout, stderr: result.stderr, code: result.code, signal: result.signal },
+            usagePromptTokens: null,
+            usageCompletionTokens: null,
+            usageReasoningTokens: null,
+            usageCost: null
+          });
+          failureLogged = true;
+        }
+        const reason = result.code !== 0
+          ? `exited with code ${result.code}${result.signal ? ` (signal ${result.signal})` : ''}`
+          : 'produced empty stdout';
+        const stderrTail = String(result.stderr || '').slice(-500);
+        throw new Error(`claude -p ${reason}${stderrTail ? `: ${stderrTail}` : ''}`);
+      }
+      if (interactionContext) {
+        await this.emitInteractionLog({
+          ...interactionContext,
+          provider: 'Claude',
+          model: this.model,
+          requestId: null,
+          inputText: prompt,
+          outputText,
+          requestPayload,
+          responsePayload: { stdout: result.stdout, stderr: result.stderr, code: result.code, signal: result.signal },
+          usagePromptTokens: null,
+          usageCompletionTokens: null,
+          usageReasoningTokens: null,
+          usageCost: null
+        });
+      }
+      return outputText;
+    } catch (error) {
+      if (!failureLogged && interactionContext) {
+        await this.emitInteractionLog({
+          ...interactionContext,
+          provider: 'Claude',
+          model: this.model,
+          requestId: null,
+          inputText: prompt,
+          outputText: null,
+          requestPayload,
+          responsePayload: null,
+          usagePromptTokens: null,
+          usageCompletionTokens: null,
+          usageReasoningTokens: null,
+          usageCost: null
+        });
+      }
+      throw error;
+    }
+  }
+  async analyze(content, options = {}) {
+    const requestOptions = { ...options };
+    delete requestOptions.parseJson;
+    return this.makeRequestWithRetry(() => this.makeRequest(String(content ?? ''), requestOptions));
+  }
+  validateConfig() {
+    if (!this.model) {
+      throw new Error('Model is required');
+    }
+  }
+}

package/llm/providers/cli-runner.js ADDED Viewed

@@ -0,0 +1,129 @@
+import { spawn } from 'node:child_process';
+import { mkdtemp, readFile, rm } from 'node:fs/promises';
+import os from 'node:os';
+import path from 'node:path';
+const DEFAULT_KILL_GRACE_MS = 10_000;
+/**
+ * Spawn a local CLI tool, write prompt to stdin, collect stdout/stderr, and
+ * optionally read a designated output file. Used by codex-cli-provider and
+ * claude-cli-provider.
+ *
+ * @param {Object}   opts
+ * @param {string}   opts.command   - Executable name or absolute path (e.g. 'codex')
+ * @param {string[]} opts.args      - Argv (excluding the command itself)
+ * @param {string}   opts.prompt    - Text piped to the child's stdin
+ * @param {number}   opts.timeoutMs - Hard timeout; SIGTERM then SIGKILL after grace
+ * @param {string=}  opts.outFile   - If set, the file is read after the child exits and returned as outFileContent
+ * @param {string=}  opts.cwd       - Optional working directory for the child
+ * @param {Object=}  opts.env       - Optional env overrides; merged onto process.env
+ * @returns {Promise<{ stdout: string, stderr: string, code: number|null, signal: NodeJS.Signals|null, outFileContent: string|null }>}
+ */
+export async function runCli(opts) {
+  const {
+    command,
+    args = [],
+    prompt = '',
+    timeoutMs = 600_000,
+    outFile = null,
+    cwd,
+    env
+  } = opts || {};
+  if (!command) {
+    throw new Error('runCli: command is required');
+  }
+  const child = spawn(command, args, {
+    stdio: ['pipe', 'pipe', 'pipe'],
+    cwd: cwd || undefined,
+    env: env ? { ...process.env, ...env } : process.env
+  });
+  const stdoutChunks = [];
+  const stderrChunks = [];
+  child.stdout.on('data', (chunk) => { stdoutChunks.push(chunk); });
+  child.stderr.on('data', (chunk) => { stderrChunks.push(chunk); });
+  let timedOut = false;
+  const timer = setTimeout(() => {
+    timedOut = true;
+    child.kill('SIGTERM');
+    setTimeout(() => {
+      if (child.exitCode === null && child.signalCode === null) {
+        try { child.kill('SIGKILL'); } catch { /* ignore */ }
+      }
+    }, DEFAULT_KILL_GRACE_MS).unref();
+  }, timeoutMs);
+  if (typeof timer.unref === 'function') timer.unref();
+  // Pipe prompt to stdin; tolerate EPIPE if the child closes stdin early.
+  try {
+    if (prompt) {
+      child.stdin.write(prompt);
+    }
+    child.stdin.end();
+  } catch (err) {
+    if (err && err.code !== 'EPIPE') {
+      clearTimeout(timer);
+      throw err;
+    }
+  }
+  child.stdin.on('error', (err) => {
+    if (err && err.code !== 'EPIPE') {
+      // Surface non-EPIPE stdin errors via stderr buffer
+      stderrChunks.push(Buffer.from(`\n[stdin error] ${err.message}\n`));
+    }
+  });
+  const { code, signal } = await new Promise((resolve, reject) => {
+    child.on('error', (err) => {
+      clearTimeout(timer);
+      reject(err);
+    });
+    child.on('close', (exitCode, exitSignal) => {
+      clearTimeout(timer);
+      resolve({ code: exitCode, signal: exitSignal });
+    });
+  });
+  const stdout = Buffer.concat(stdoutChunks).toString('utf-8');
+  const stderr = Buffer.concat(stderrChunks).toString('utf-8');
+  let outFileContent = null;
+  if (outFile) {
+    try {
+      outFileContent = await readFile(outFile, 'utf-8');
+    } catch {
+      outFileContent = null;
+    }
+  }
+  if (timedOut) {
+    const err = new Error(`CLI '${command}' timed out after ${timeoutMs}ms`);
+    err.code = 'CLI_TIMEOUT';
+    err.stdout = stdout;
+    err.stderr = stderr;
+    throw err;
+  }
+  return { stdout, stderr, code, signal, outFileContent };
+}
+/**
+ * Create a unique tmp file path inside an isolated tmp directory.
+ * Returns the file path plus a cleanup function that removes the parent dir.
+ *
+ * @param {string} prefix
+ * @param {string} fileName
+ * @returns {Promise<{ filePath: string, cleanup: () => Promise<void> }>}
+ */
+export async function makeTmpOutFile(prefix, fileName) {
+  const dir = await mkdtemp(path.join(os.tmpdir(), `${prefix}-`));
+  const filePath = path.join(dir, fileName);
+  const cleanup = async () => {
+    try { await rm(dir, { recursive: true, force: true }); } catch { /* ignore */ }
+  };
+  return { filePath, cleanup };
+}