page-analyzer 1.2.1 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -1,17 +1,30 @@
1
1
  /**
2
2
  * page-analyzer — Standalone module
3
3
  *
4
- * Simplest usage just pass a URL:
4
+ * Three LLM backends are supported via `llm.type`:
5
5
  *
6
- * import { analyzeUrl } from './page-analyzer/index.js';
7
- *
8
- * const result = await analyzeUrl('https://example.com', {
6
+ * // 1. OpenAI-compatible HTTP API (default, backward compatible)
7
+ * await analyzeUrl('https://example.com', {
9
8
  * llm: {
9
+ * type: 'openai', // optional; default
10
10
  * apiKey: 'sk-...',
11
11
  * apiEndpoint: 'https://api.openai.com/v1/chat/completions',
12
- * model: 'gpt-4',
13
- * },
14
- * showEvents: true
12
+ * model: 'gpt-4'
13
+ * }
14
+ * });
15
+ *
16
+ * // 2. Local Codex CLI (`codex exec`) — uses your local auth
17
+ * await analyzeUrl('https://example.com', {
18
+ * llm: {
19
+ * type: 'codex',
20
+ * model: 'gpt-5.5' // model === 'gpt-5.5' auto-enables fast mode
21
+ * // fast: true // optional explicit override
22
+ * }
23
+ * });
24
+ *
25
+ * // 3. Local Claude CLI (`claude -p`) — uses your local auth
26
+ * await analyzeUrl('https://example.com', {
27
+ * llm: { type: 'claude', model: 'sonnet' }
15
28
  * });
16
29
  *
17
30
  * Or step-by-step with analyzePageEvents() for pre-fetched data.
@@ -20,12 +33,22 @@
20
33
  */
21
34
 
22
35
  import { HtmlParser } from './html-parser.js';
23
- import { assignBlocksToElements } from './extractors/block-assigner.js';
36
+ import { assignBlocksToElements, mapRectToBlock } from './extractors/block-assigner.js';
24
37
  import { CsvExporter } from './csv-exporter.js';
25
- import { OpenAiProvider } from './llm/providers/openai-provider.js';
38
+ import { createLlmProvider } from './llm/providers/index.js';
26
39
  import { EventAnalyzer } from './llm/analyzers/event-analyzer/event-analyzer.js';
27
40
  import { PageExtractor } from './page-extractor.js';
28
41
 
42
+ function validateLlmConfig(llmConfig, scope) {
43
+ const type = String(llmConfig?.type || 'openai').toLowerCase();
44
+ if (!llmConfig?.model) {
45
+ throw new Error(`${scope}.model is required`);
46
+ }
47
+ if (type === 'openai' && (!llmConfig.apiKey || !llmConfig.apiEndpoint)) {
48
+ throw new Error(`${scope}.apiKey and ${scope}.apiEndpoint are required when ${scope}.type='openai'`);
49
+ }
50
+ }
51
+
29
52
  function isObject(value) {
30
53
  return value && typeof value === 'object' && !Array.isArray(value);
31
54
  }
@@ -131,6 +154,92 @@ function mergeScreenshots(primary, secondary) {
131
154
  return hasScreenshots(merged) ? merged : null;
132
155
  }
133
156
 
157
+ function pickSizedElementFields(element) {
158
+ const source = isObject(element) ? element : {};
159
+ return {
160
+ tag: source.tag || '',
161
+ text: source.text || '',
162
+ href: source.href || '',
163
+ src: source.src || '',
164
+ width: Number(source.width) || 0,
165
+ height: Number(source.height) || 0,
166
+ top: Number(source.top) || 0,
167
+ left: Number(source.left) || 0,
168
+ cssSelector: source.cssSelector || '',
169
+ id: source.id || '',
170
+ class: source.class || '',
171
+ role: source.role || '',
172
+ ariaLabel: source.ariaLabel || '',
173
+ imageAlt: source.imageAlt || '',
174
+ interactive: source.interactive === true
175
+ };
176
+ }
177
+
178
+ /**
179
+ * Nest sized DOM elements under each logical (output) block.
180
+ *
181
+ * Maps each element's rect → physical block index (via mapRectToBlock against
182
+ * the deduped, largely non-overlapping physical blocks), then → the logical
183
+ * block whose dot-joined `blockIdxs` contains that physical index. Mutates the
184
+ * raw analysis blocks (which still carry `blockIdxs`) in place; must run BEFORE
185
+ * compaction. Returns elements that mapped to no block (only when there are no
186
+ * physical blocks at all).
187
+ *
188
+ * @param {Array} analysisBlocks - Raw block_analysis.blocks (carry `blockIdxs`)
189
+ * @param {Array} sizedElements - Records from PageExtractor.collectSizedElements
190
+ * @param {Array} physicalBlocks - Visual blocks from extraction (bundle.blocks)
191
+ * @returns {Array} Unassigned elements (trimmed)
192
+ */
193
+ function attachSizedElementsToBlocks(analysisBlocks, sizedElements, physicalBlocks) {
194
+ if (!Array.isArray(analysisBlocks) || analysisBlocks.length === 0) {
195
+ return [];
196
+ }
197
+ const elements = Array.isArray(sizedElements) ? sizedElements : [];
198
+ if (elements.length === 0) {
199
+ return [];
200
+ }
201
+ const blocks = Array.isArray(physicalBlocks) ? physicalBlocks : [];
202
+
203
+ const physicalToLogical = new Map();
204
+ for (const block of analysisBlocks) {
205
+ if (!isObject(block)) {
206
+ continue;
207
+ }
208
+ block.elements = [];
209
+ for (const physicalIdx of parseBlockIdxs(block.blockIdxs)) {
210
+ if (!physicalToLogical.has(physicalIdx)) {
211
+ physicalToLogical.set(physicalIdx, block);
212
+ }
213
+ }
214
+ }
215
+
216
+ const unassigned = [];
217
+ for (const element of elements) {
218
+ const rect = {
219
+ top: Number(element?.top) || 0,
220
+ left: Number(element?.left) || 0,
221
+ width: Number(element?.width) || 0,
222
+ height: Number(element?.height) || 0
223
+ };
224
+ const physicalIdx = mapRectToBlock(rect, blocks);
225
+ const logical = physicalToLogical.get(physicalIdx);
226
+ const trimmed = pickSizedElementFields(element);
227
+ if (logical) {
228
+ logical.elements.push(trimmed);
229
+ } else {
230
+ unassigned.push(trimmed);
231
+ }
232
+ }
233
+
234
+ for (const block of analysisBlocks) {
235
+ if (isObject(block) && Array.isArray(block.elements) && block.elements.length > 1) {
236
+ block.elements.sort((left, right) => (left.top - right.top) || (left.left - right.left));
237
+ }
238
+ }
239
+
240
+ return unassigned;
241
+ }
242
+
134
243
  function attachLogicalBlockScreenshotPaths(result, screenshots) {
135
244
  const blocks = result?.analysis?.block_analysis?.blocks;
136
245
  if (!Array.isArray(blocks) || blocks.length === 0) {
@@ -170,7 +279,16 @@ function compactBlockAnalysisBlock(block, displayOptions) {
170
279
  blockDescription: source.blockDescription,
171
280
  blockSemantics: Array.isArray(source.blockSemantics) ? source.blockSemantics : [],
172
281
  blockCssPath: source.blockCssPath,
173
- blockPosition: source.blockPosition
282
+ blockPosition: source.blockPosition,
283
+ fixed: source.fixed === true,
284
+ tag: source.tag || '',
285
+ branchPath: source.branchPath || '',
286
+ depth: Number.isInteger(source.depth) ? source.depth : 0,
287
+ domOrder: Number.isInteger(source.domOrder) ? source.domOrder : 0,
288
+ textPreview: source.textPreview || '',
289
+ childInteractiveCount: Number.isInteger(source.childInteractiveCount)
290
+ ? source.childInteractiveCount
291
+ : 0
174
292
  };
175
293
 
176
294
  if (displayOptions.showBlockIdx) {
@@ -192,6 +310,10 @@ function compactBlockAnalysisBlock(block, displayOptions) {
192
310
  out.blockScreenshotPaths = source.blockScreenshotPaths;
193
311
  }
194
312
 
313
+ if (Array.isArray(source.elements)) {
314
+ out.elements = source.elements;
315
+ }
316
+
195
317
  return out;
196
318
  }
197
319
 
@@ -210,6 +332,10 @@ function compactBlockAnalysis(blockAnalysis, displayOptions) {
210
332
  : [];
211
333
  }
212
334
 
335
+ if (Array.isArray(source.unassignedElements) && source.unassignedElements.length > 0) {
336
+ out.unassignedElements = source.unassignedElements;
337
+ }
338
+
213
339
  if (isObject(source.stats)) {
214
340
  out.stats = displayOptions.showEvents
215
341
  ? source.stats
@@ -282,13 +408,23 @@ function buildPageAnalysisResult({
282
408
  *
283
409
  * @param {string} url - URL to analyze
284
410
  * @param {Object} options
285
- * @param {Object} options.llm - { apiKey, apiEndpoint, model, ... }
411
+ * @param {Object} options.llm - { type, model, apiKey, apiEndpoint, cliPath, cwd, fast, ... }
412
+ * - `type`: 'openai' (default) | 'codex' | 'claude'
413
+ * - `model`: required for all types
414
+ * - `apiKey` / `apiEndpoint`: required only for type='openai'
415
+ * - `cliPath` / `cwd`: optional for type='codex' | 'claude' (override CLI binary / cwd)
416
+ * - `fast`: codex only; auto-enabled when model === 'gpt-5.5'
286
417
  * @param {Array} [options.knownEventTypes] - Accumulated event types for consistency
287
418
  * @param {Object} [options.parserConfig] - HtmlParser config overrides
288
419
  * @param {Object} [options.extractorConfig] - PageExtractor config overrides
289
420
  * @param {boolean} [options.showEvents=false] - Include event arrays and full event-related metadata.
290
421
  * Also enables node-level event classification.
291
422
  * @param {boolean} [options.showBlockIdx=false] - Include CSV/block index alignment fields.
423
+ * @param {boolean} [options.showElement=false] - Collect all visible DOM elements with
424
+ * width or height > `elementSize` and nest them under each block as `elements`.
425
+ * When false (default), the sized-element collection is skipped (previous behavior).
426
+ * @param {number} [options.elementSize=24] - Min size (px) threshold for `showElement`
427
+ * (an element qualifies when width > elementSize OR height > elementSize).
292
428
  * @param {boolean} [options.fullPageScreenshot=false] - Save a full-page screenshot to snapshots/ and return its path.
293
429
  * @param {boolean} [options.blockScreenshots=false] - Save one screenshot per merged logical block to snapshots/ and return their paths.
294
430
  * @param {boolean} [options.waitForImagesLoaded=false] - Wait for page images before extracting and screenshotting.
@@ -303,18 +439,22 @@ export async function analyzeUrl(url, options = {}) {
303
439
  extractorConfig,
304
440
  showEvents,
305
441
  showBlockIdx,
442
+ showElement,
443
+ elementSize,
306
444
  fullPageScreenshot,
307
445
  blockScreenshots,
308
446
  waitForImagesLoaded
309
447
  } = options;
310
448
 
311
449
  if (!url) throw new Error('url is required');
312
- if (!llmConfig?.apiKey || !llmConfig?.apiEndpoint || !llmConfig?.model) {
313
- throw new Error('options.llm.apiKey, apiEndpoint, and model are required');
314
- }
450
+ validateLlmConfig(llmConfig, 'options.llm');
315
451
 
316
452
  const shouldCaptureFullPage = fullPageScreenshot ?? extractorConfig?.fullPageScreenshot;
317
453
  const shouldCaptureBlocks = blockScreenshots ?? extractorConfig?.blockScreenshots;
454
+ const sizedElementsEnabled = showElement === true;
455
+ const sizedElementMinSize = Number.isInteger(elementSize)
456
+ ? Math.max(0, elementSize)
457
+ : (extractorConfig?.sizedElementMinSize ?? 24);
318
458
 
319
459
  // Step 0: Playwright extraction
320
460
  console.log(`[page-analyzer] Extracting ${url} ...`);
@@ -322,12 +462,14 @@ export async function analyzeUrl(url, options = {}) {
322
462
  ...extractorConfig,
323
463
  fullPageScreenshot: shouldCaptureFullPage,
324
464
  blockScreenshots: false,
325
- waitForImagesLoaded: waitForImagesLoaded ?? extractorConfig?.waitForImagesLoaded
465
+ waitForImagesLoaded: waitForImagesLoaded ?? extractorConfig?.waitForImagesLoaded,
466
+ sizedElementsEnabled,
467
+ sizedElementMinSize
326
468
  });
327
469
 
328
470
  return await extractor.withPreparedPage(url, async (page, targetUrl) => {
329
471
  const bundle = await extractor.extractPreparedPage(page, targetUrl);
330
- console.log(`[page-analyzer] Extracted: ${bundle.blocks.length} blocks, ${bundle.elementGeometries.length} geometries`);
472
+ console.log(`[page-analyzer] Extracted: ${bundle.blocks.length} blocks, ${bundle.elementGeometries.length} geometries, ${Array.isArray(bundle.sizedElements) ? bundle.sizedElements.length : 0} sized elements`);
331
473
 
332
474
  // Derive domain from URL
333
475
  let domain = '';
@@ -338,6 +480,7 @@ export async function analyzeUrl(url, options = {}) {
338
480
  url: targetUrl,
339
481
  blocks: bundle.blocks,
340
482
  elementGeometries: bundle.elementGeometries,
483
+ sizedElements: bundle.sizedElements,
341
484
  llm: llmConfig,
342
485
  knownEventTypes,
343
486
  parserConfig,
@@ -378,13 +521,19 @@ export async function analyzeUrl(url, options = {}) {
378
521
  * @param {string} input.url - Page URL
379
522
  * @param {Array} input.blocks - Visual blocks from Playwright extraction
380
523
  * @param {Array} input.elementGeometries - Element geometry records
524
+ * @param {Array} [input.sizedElements] - All visible DOM elements with width>24 or height>24
525
+ * (from PageExtractor.collectSizedElements); nested under each block as `elements`.
381
526
  * @param {string} [input.markdown] - Markdown content (reserved for future use)
382
527
  * @param {Object} input.llm - LLM provider config
383
- * @param {string} input.llm.apiKey - API key
384
- * @param {string} input.llm.apiEndpoint - API endpoint URL
385
- * @param {string} input.llm.model - Model name
386
- * @param {number} [input.llm.maxTokens] - Max tokens
387
- * @param {number} [input.llm.temperature] - Temperature
528
+ * @param {('openai'|'codex'|'claude')} [input.llm.type='openai'] - Backend type
529
+ * @param {string} input.llm.model - Model name (required for all types)
530
+ * @param {string} [input.llm.apiKey] - API key (required when type='openai')
531
+ * @param {string} [input.llm.apiEndpoint] - API endpoint URL (required when type='openai')
532
+ * @param {string} [input.llm.cliPath] - Override CLI binary path (codex/claude)
533
+ * @param {string} [input.llm.cwd] - Working directory for the CLI child process
534
+ * @param {boolean} [input.llm.fast] - Codex only; auto-enabled when model === 'gpt-5.5'
535
+ * @param {number} [input.llm.maxTokens] - Max tokens (openai only)
536
+ * @param {number} [input.llm.temperature] - Temperature (openai only)
388
537
  * @param {number} [input.llm.timeout] - Request timeout ms
389
538
  * @param {number} [input.llm.maxRetries] - Max retries
390
539
  * @param {Array} [input.llm.knownEventTypes] - Pre-configured known event types
@@ -405,6 +554,7 @@ export async function analyzePageEvents(input) {
405
554
  url,
406
555
  blocks = [],
407
556
  elementGeometries = [],
557
+ sizedElements = [],
408
558
  markdown = '',
409
559
  llm: llmConfig,
410
560
  knownEventTypes = [],
@@ -423,9 +573,7 @@ export async function analyzePageEvents(input) {
423
573
  if (!url) {
424
574
  throw new Error('url is required');
425
575
  }
426
- if (!llmConfig?.apiKey || !llmConfig?.apiEndpoint || !llmConfig?.model) {
427
- throw new Error('llm.apiKey, llm.apiEndpoint, and llm.model are required');
428
- }
576
+ validateLlmConfig(llmConfig, 'llm');
429
577
 
430
578
  // Step 1: Parse HTML → elements
431
579
  const htmlParser = new HtmlParser(parserConfig);
@@ -440,16 +588,7 @@ export async function analyzePageEvents(input) {
440
588
  const csvContent = csvExporter.buildCsvContent(nodeId || 'page', elements);
441
589
 
442
590
  // Step 4: LLM event analysis
443
- const provider = new OpenAiProvider({
444
- apiKey: llmConfig.apiKey,
445
- apiEndpoint: llmConfig.apiEndpoint,
446
- model: llmConfig.model,
447
- maxTokens: llmConfig.maxTokens,
448
- temperature: llmConfig.temperature,
449
- timeout: llmConfig.timeout,
450
- maxRetries: llmConfig.maxRetries,
451
- interactionLogger: llmConfig.interactionLogger
452
- });
591
+ const provider = createLlmProvider(llmConfig);
453
592
 
454
593
  const eventAnalyzer = new EventAnalyzer(provider, llmConfig, {
455
594
  domain,
@@ -467,6 +606,17 @@ export async function analyzePageEvents(input) {
467
606
  }
468
607
  );
469
608
 
609
+ // Step 5: Nest sized DOM elements under each block (before compaction, while
610
+ // raw analysis blocks still carry blockIdxs for physical→logical mapping).
611
+ const unassignedSizedElements = attachSizedElementsToBlocks(
612
+ analysis?.block_analysis?.blocks,
613
+ sizedElements,
614
+ blocks
615
+ );
616
+ if (unassignedSizedElements.length > 0 && isObject(analysis?.block_analysis)) {
617
+ analysis.block_analysis.unassignedElements = unassignedSizedElements;
618
+ }
619
+
470
620
  return buildPageAnalysisResult({
471
621
  elements,
472
622
  csvContent,
@@ -481,7 +631,13 @@ export async function analyzePageEvents(input) {
481
631
  export { HtmlParser } from './html-parser.js';
482
632
  export { assignBlocksToElements } from './extractors/block-assigner.js';
483
633
  export { CsvExporter } from './csv-exporter.js';
484
- export { OpenAiProvider } from './llm/providers/openai-provider.js';
485
- export { BaseLlmProvider } from './llm/providers/base-provider.js';
634
+ export {
635
+ createLlmProvider,
636
+ OpenAiProvider,
637
+ CodexCliProvider,
638
+ ClaudeCliProvider,
639
+ BaseLlmProvider,
640
+ LLM_PROVIDER_TYPES
641
+ } from './llm/providers/index.js';
486
642
  export { EventAnalyzer } from './llm/analyzers/event-analyzer/event-analyzer.js';
487
643
  export { PageExtractor } from './page-extractor.js';
@@ -534,6 +534,15 @@ function buildBlockAnalysisArtifact(siteSummary, blockContexts = [], llmGroups =
534
534
  possibleEventTypes.push(eventType);
535
535
  }
536
536
 
537
+ const sourceBlocksArr = Array.isArray(context.sourceBlocks) ? context.sourceBlocks : [];
538
+ const firstSource = sourceBlocksArr[0] || {};
539
+ const depthValues = sourceBlocksArr
540
+ .map((b) => (Number.isInteger(b?.depth) ? b.depth : null))
541
+ .filter((v) => v !== null);
542
+ const domOrderValues = sourceBlocksArr
543
+ .map((b) => (Number.isInteger(b?.domOrder) ? b.domOrder : null))
544
+ .filter((v) => v !== null);
545
+
537
546
  blocks.push({
538
547
  blockIdxs: context.blockIdxKey,
539
548
  blockName: context.blockName,
@@ -543,6 +552,16 @@ function buildBlockAnalysisArtifact(siteSummary, blockContexts = [], llmGroups =
543
552
  blockDescription: context.blockDescription,
544
553
  blockCssPath: resolveLogicalBlockCssPath(context.sourceBlocks),
545
554
  blockPosition: buildLogicalBlockPosition(context.sourceBlocks),
555
+ fixed: sourceBlocksArr.some((b) => b?.fixed === true),
556
+ tag: firstSource.tag || '',
557
+ branchPath: firstSource.branchPath || '',
558
+ depth: depthValues.length ? Math.min(...depthValues) : 0,
559
+ domOrder: domOrderValues.length ? Math.min(...domOrderValues) : 0,
560
+ textPreview: firstSource.textPreview || '',
561
+ childInteractiveCount: sourceBlocksArr.reduce(
562
+ (sum, b) => sum + (Number(b?.childInteractiveCount) || 0),
563
+ 0
564
+ ),
546
565
  rowCount: context.rows.length,
547
566
  mode
548
567
  });
@@ -0,0 +1,137 @@
1
+ import { BaseLlmProvider } from './base-provider.js';
2
+ import { runCli } from './cli-runner.js';
3
+
4
+ /**
5
+ * Claude CLI provider — invokes the locally installed `claude -p` command.
6
+ *
7
+ * Auth is whatever the user's local claude install already has; no API key needed.
8
+ * In `--output-format text` print mode, stdout is exactly the model's final text
9
+ * response, so we use it directly.
10
+ */
11
+ export class ClaudeCliProvider extends BaseLlmProvider {
12
+ constructor(config = {}) {
13
+ super(config);
14
+ if (!this.model) {
15
+ throw new Error('ClaudeCliProvider: model is required');
16
+ }
17
+ this.cliPath = config.cliPath || 'claude';
18
+ this.cwd = config.cwd || null;
19
+ }
20
+
21
+ buildArgs() {
22
+ return [
23
+ '-p',
24
+ '--model', this.model,
25
+ '--output-format', 'text',
26
+ '--bare'
27
+ ];
28
+ }
29
+
30
+ resolveInteractionContext(metadata) {
31
+ const context = metadata && typeof metadata === 'object' ? metadata : {};
32
+ const domain = String(context.domain || '').trim();
33
+ const nodeId = String(context.nodeId || '').trim();
34
+ if (!domain || !nodeId) {
35
+ return null;
36
+ }
37
+ return {
38
+ domain,
39
+ nodeId,
40
+ operation: String(context.operation || 'analysis').trim() || 'analysis',
41
+ chunkLabel: String(context.chunkLabel || '').trim() || null
42
+ };
43
+ }
44
+
45
+ async makeRequest(prompt, options = {}) {
46
+ const metadata = options && typeof options.metadata === 'object' ? options.metadata : {};
47
+ const interactionContext = this.resolveInteractionContext(metadata);
48
+ const args = this.buildArgs();
49
+ const requestPayload = { argv: [this.cliPath, ...args], model: this.model };
50
+
51
+ let failureLogged = false;
52
+ try {
53
+ const result = await runCli({
54
+ command: this.cliPath,
55
+ args,
56
+ prompt,
57
+ timeoutMs: this.timeout,
58
+ cwd: this.cwd || undefined
59
+ });
60
+
61
+ const outputText = String(result.stdout ?? '').replace(/\r\n/g, '\n').replace(/\n+$/, '');
62
+ if (result.code !== 0 || !outputText) {
63
+ if (interactionContext) {
64
+ await this.emitInteractionLog({
65
+ ...interactionContext,
66
+ provider: 'Claude',
67
+ model: this.model,
68
+ requestId: null,
69
+ inputText: prompt,
70
+ outputText: outputText || null,
71
+ requestPayload,
72
+ responsePayload: { stdout: result.stdout, stderr: result.stderr, code: result.code, signal: result.signal },
73
+ usagePromptTokens: null,
74
+ usageCompletionTokens: null,
75
+ usageReasoningTokens: null,
76
+ usageCost: null
77
+ });
78
+ failureLogged = true;
79
+ }
80
+ const reason = result.code !== 0
81
+ ? `exited with code ${result.code}${result.signal ? ` (signal ${result.signal})` : ''}`
82
+ : 'produced empty stdout';
83
+ const stderrTail = String(result.stderr || '').slice(-500);
84
+ throw new Error(`claude -p ${reason}${stderrTail ? `: ${stderrTail}` : ''}`);
85
+ }
86
+
87
+ if (interactionContext) {
88
+ await this.emitInteractionLog({
89
+ ...interactionContext,
90
+ provider: 'Claude',
91
+ model: this.model,
92
+ requestId: null,
93
+ inputText: prompt,
94
+ outputText,
95
+ requestPayload,
96
+ responsePayload: { stdout: result.stdout, stderr: result.stderr, code: result.code, signal: result.signal },
97
+ usagePromptTokens: null,
98
+ usageCompletionTokens: null,
99
+ usageReasoningTokens: null,
100
+ usageCost: null
101
+ });
102
+ }
103
+
104
+ return outputText;
105
+ } catch (error) {
106
+ if (!failureLogged && interactionContext) {
107
+ await this.emitInteractionLog({
108
+ ...interactionContext,
109
+ provider: 'Claude',
110
+ model: this.model,
111
+ requestId: null,
112
+ inputText: prompt,
113
+ outputText: null,
114
+ requestPayload,
115
+ responsePayload: null,
116
+ usagePromptTokens: null,
117
+ usageCompletionTokens: null,
118
+ usageReasoningTokens: null,
119
+ usageCost: null
120
+ });
121
+ }
122
+ throw error;
123
+ }
124
+ }
125
+
126
+ async analyze(content, options = {}) {
127
+ const requestOptions = { ...options };
128
+ delete requestOptions.parseJson;
129
+ return this.makeRequestWithRetry(() => this.makeRequest(String(content ?? ''), requestOptions));
130
+ }
131
+
132
+ validateConfig() {
133
+ if (!this.model) {
134
+ throw new Error('Model is required');
135
+ }
136
+ }
137
+ }
@@ -0,0 +1,129 @@
1
+ import { spawn } from 'node:child_process';
2
+ import { mkdtemp, readFile, rm } from 'node:fs/promises';
3
+ import os from 'node:os';
4
+ import path from 'node:path';
5
+
6
+ const DEFAULT_KILL_GRACE_MS = 10_000;
7
+
8
+ /**
9
+ * Spawn a local CLI tool, write prompt to stdin, collect stdout/stderr, and
10
+ * optionally read a designated output file. Used by codex-cli-provider and
11
+ * claude-cli-provider.
12
+ *
13
+ * @param {Object} opts
14
+ * @param {string} opts.command - Executable name or absolute path (e.g. 'codex')
15
+ * @param {string[]} opts.args - Argv (excluding the command itself)
16
+ * @param {string} opts.prompt - Text piped to the child's stdin
17
+ * @param {number} opts.timeoutMs - Hard timeout; SIGTERM then SIGKILL after grace
18
+ * @param {string=} opts.outFile - If set, the file is read after the child exits and returned as outFileContent
19
+ * @param {string=} opts.cwd - Optional working directory for the child
20
+ * @param {Object=} opts.env - Optional env overrides; merged onto process.env
21
+ * @returns {Promise<{ stdout: string, stderr: string, code: number|null, signal: NodeJS.Signals|null, outFileContent: string|null }>}
22
+ */
23
+ export async function runCli(opts) {
24
+ const {
25
+ command,
26
+ args = [],
27
+ prompt = '',
28
+ timeoutMs = 600_000,
29
+ outFile = null,
30
+ cwd,
31
+ env
32
+ } = opts || {};
33
+
34
+ if (!command) {
35
+ throw new Error('runCli: command is required');
36
+ }
37
+
38
+ const child = spawn(command, args, {
39
+ stdio: ['pipe', 'pipe', 'pipe'],
40
+ cwd: cwd || undefined,
41
+ env: env ? { ...process.env, ...env } : process.env
42
+ });
43
+
44
+ const stdoutChunks = [];
45
+ const stderrChunks = [];
46
+ child.stdout.on('data', (chunk) => { stdoutChunks.push(chunk); });
47
+ child.stderr.on('data', (chunk) => { stderrChunks.push(chunk); });
48
+
49
+ let timedOut = false;
50
+ const timer = setTimeout(() => {
51
+ timedOut = true;
52
+ child.kill('SIGTERM');
53
+ setTimeout(() => {
54
+ if (child.exitCode === null && child.signalCode === null) {
55
+ try { child.kill('SIGKILL'); } catch { /* ignore */ }
56
+ }
57
+ }, DEFAULT_KILL_GRACE_MS).unref();
58
+ }, timeoutMs);
59
+ if (typeof timer.unref === 'function') timer.unref();
60
+
61
+ // Pipe prompt to stdin; tolerate EPIPE if the child closes stdin early.
62
+ try {
63
+ if (prompt) {
64
+ child.stdin.write(prompt);
65
+ }
66
+ child.stdin.end();
67
+ } catch (err) {
68
+ if (err && err.code !== 'EPIPE') {
69
+ clearTimeout(timer);
70
+ throw err;
71
+ }
72
+ }
73
+ child.stdin.on('error', (err) => {
74
+ if (err && err.code !== 'EPIPE') {
75
+ // Surface non-EPIPE stdin errors via stderr buffer
76
+ stderrChunks.push(Buffer.from(`\n[stdin error] ${err.message}\n`));
77
+ }
78
+ });
79
+
80
+ const { code, signal } = await new Promise((resolve, reject) => {
81
+ child.on('error', (err) => {
82
+ clearTimeout(timer);
83
+ reject(err);
84
+ });
85
+ child.on('close', (exitCode, exitSignal) => {
86
+ clearTimeout(timer);
87
+ resolve({ code: exitCode, signal: exitSignal });
88
+ });
89
+ });
90
+
91
+ const stdout = Buffer.concat(stdoutChunks).toString('utf-8');
92
+ const stderr = Buffer.concat(stderrChunks).toString('utf-8');
93
+
94
+ let outFileContent = null;
95
+ if (outFile) {
96
+ try {
97
+ outFileContent = await readFile(outFile, 'utf-8');
98
+ } catch {
99
+ outFileContent = null;
100
+ }
101
+ }
102
+
103
+ if (timedOut) {
104
+ const err = new Error(`CLI '${command}' timed out after ${timeoutMs}ms`);
105
+ err.code = 'CLI_TIMEOUT';
106
+ err.stdout = stdout;
107
+ err.stderr = stderr;
108
+ throw err;
109
+ }
110
+
111
+ return { stdout, stderr, code, signal, outFileContent };
112
+ }
113
+
114
+ /**
115
+ * Create a unique tmp file path inside an isolated tmp directory.
116
+ * Returns the file path plus a cleanup function that removes the parent dir.
117
+ *
118
+ * @param {string} prefix
119
+ * @param {string} fileName
120
+ * @returns {Promise<{ filePath: string, cleanup: () => Promise<void> }>}
121
+ */
122
+ export async function makeTmpOutFile(prefix, fileName) {
123
+ const dir = await mkdtemp(path.join(os.tmpdir(), `${prefix}-`));
124
+ const filePath = path.join(dir, fileName);
125
+ const cleanup = async () => {
126
+ try { await rm(dir, { recursive: true, force: true }); } catch { /* ignore */ }
127
+ };
128
+ return { filePath, cleanup };
129
+ }