page-analyzer 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +36 -0
- package/README.md +27 -1
- package/extractors/block-assigner.js +1 -1
- package/index.d.ts +318 -0
- package/index.js +192 -36
- package/llm/analyzers/event-analyzer/event-analyzer-blocks.js +19 -0
- package/llm/providers/claude-cli-provider.js +137 -0
- package/llm/providers/cli-runner.js +129 -0
- package/llm/providers/codex-cli-provider.js +154 -0
- package/llm/providers/index.js +61 -0
- package/package.json +6 -1
- package/page-extractor.js +153 -5
- package/scripts/analyze.js +10 -5
- package/test/smoke.test.js +82 -1
package/index.js
CHANGED
|
@@ -1,17 +1,30 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* page-analyzer — Standalone module
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* Three LLM backends are supported via `llm.type`:
|
|
5
5
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
* const result = await analyzeUrl('https://example.com', {
|
|
6
|
+
* // 1. OpenAI-compatible HTTP API (default, backward compatible)
|
|
7
|
+
* await analyzeUrl('https://example.com', {
|
|
9
8
|
* llm: {
|
|
9
|
+
* type: 'openai', // optional; default
|
|
10
10
|
* apiKey: 'sk-...',
|
|
11
11
|
* apiEndpoint: 'https://api.openai.com/v1/chat/completions',
|
|
12
|
-
* model: 'gpt-4'
|
|
13
|
-
* }
|
|
14
|
-
*
|
|
12
|
+
* model: 'gpt-4'
|
|
13
|
+
* }
|
|
14
|
+
* });
|
|
15
|
+
*
|
|
16
|
+
* // 2. Local Codex CLI (`codex exec`) — uses your local auth
|
|
17
|
+
* await analyzeUrl('https://example.com', {
|
|
18
|
+
* llm: {
|
|
19
|
+
* type: 'codex',
|
|
20
|
+
* model: 'gpt-5.5' // model === 'gpt-5.5' auto-enables fast mode
|
|
21
|
+
* // fast: true // optional explicit override
|
|
22
|
+
* }
|
|
23
|
+
* });
|
|
24
|
+
*
|
|
25
|
+
* // 3. Local Claude CLI (`claude -p`) — uses your local auth
|
|
26
|
+
* await analyzeUrl('https://example.com', {
|
|
27
|
+
* llm: { type: 'claude', model: 'sonnet' }
|
|
15
28
|
* });
|
|
16
29
|
*
|
|
17
30
|
* Or step-by-step with analyzePageEvents() for pre-fetched data.
|
|
@@ -20,12 +33,22 @@
|
|
|
20
33
|
*/
|
|
21
34
|
|
|
22
35
|
import { HtmlParser } from './html-parser.js';
|
|
23
|
-
import { assignBlocksToElements } from './extractors/block-assigner.js';
|
|
36
|
+
import { assignBlocksToElements, mapRectToBlock } from './extractors/block-assigner.js';
|
|
24
37
|
import { CsvExporter } from './csv-exporter.js';
|
|
25
|
-
import {
|
|
38
|
+
import { createLlmProvider } from './llm/providers/index.js';
|
|
26
39
|
import { EventAnalyzer } from './llm/analyzers/event-analyzer/event-analyzer.js';
|
|
27
40
|
import { PageExtractor } from './page-extractor.js';
|
|
28
41
|
|
|
42
|
+
function validateLlmConfig(llmConfig, scope) {
|
|
43
|
+
const type = String(llmConfig?.type || 'openai').toLowerCase();
|
|
44
|
+
if (!llmConfig?.model) {
|
|
45
|
+
throw new Error(`${scope}.model is required`);
|
|
46
|
+
}
|
|
47
|
+
if (type === 'openai' && (!llmConfig.apiKey || !llmConfig.apiEndpoint)) {
|
|
48
|
+
throw new Error(`${scope}.apiKey and ${scope}.apiEndpoint are required when ${scope}.type='openai'`);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
29
52
|
function isObject(value) {
|
|
30
53
|
return value && typeof value === 'object' && !Array.isArray(value);
|
|
31
54
|
}
|
|
@@ -131,6 +154,92 @@ function mergeScreenshots(primary, secondary) {
|
|
|
131
154
|
return hasScreenshots(merged) ? merged : null;
|
|
132
155
|
}
|
|
133
156
|
|
|
157
|
+
function pickSizedElementFields(element) {
|
|
158
|
+
const source = isObject(element) ? element : {};
|
|
159
|
+
return {
|
|
160
|
+
tag: source.tag || '',
|
|
161
|
+
text: source.text || '',
|
|
162
|
+
href: source.href || '',
|
|
163
|
+
src: source.src || '',
|
|
164
|
+
width: Number(source.width) || 0,
|
|
165
|
+
height: Number(source.height) || 0,
|
|
166
|
+
top: Number(source.top) || 0,
|
|
167
|
+
left: Number(source.left) || 0,
|
|
168
|
+
cssSelector: source.cssSelector || '',
|
|
169
|
+
id: source.id || '',
|
|
170
|
+
class: source.class || '',
|
|
171
|
+
role: source.role || '',
|
|
172
|
+
ariaLabel: source.ariaLabel || '',
|
|
173
|
+
imageAlt: source.imageAlt || '',
|
|
174
|
+
interactive: source.interactive === true
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Nest sized DOM elements under each logical (output) block.
|
|
180
|
+
*
|
|
181
|
+
* Maps each element's rect → physical block index (via mapRectToBlock against
|
|
182
|
+
* the deduped, largely non-overlapping physical blocks), then → the logical
|
|
183
|
+
* block whose dot-joined `blockIdxs` contains that physical index. Mutates the
|
|
184
|
+
* raw analysis blocks (which still carry `blockIdxs`) in place; must run BEFORE
|
|
185
|
+
* compaction. Returns elements that mapped to no block (only when there are no
|
|
186
|
+
* physical blocks at all).
|
|
187
|
+
*
|
|
188
|
+
* @param {Array} analysisBlocks - Raw block_analysis.blocks (carry `blockIdxs`)
|
|
189
|
+
* @param {Array} sizedElements - Records from PageExtractor.collectSizedElements
|
|
190
|
+
* @param {Array} physicalBlocks - Visual blocks from extraction (bundle.blocks)
|
|
191
|
+
* @returns {Array} Unassigned elements (trimmed)
|
|
192
|
+
*/
|
|
193
|
+
function attachSizedElementsToBlocks(analysisBlocks, sizedElements, physicalBlocks) {
|
|
194
|
+
if (!Array.isArray(analysisBlocks) || analysisBlocks.length === 0) {
|
|
195
|
+
return [];
|
|
196
|
+
}
|
|
197
|
+
const elements = Array.isArray(sizedElements) ? sizedElements : [];
|
|
198
|
+
if (elements.length === 0) {
|
|
199
|
+
return [];
|
|
200
|
+
}
|
|
201
|
+
const blocks = Array.isArray(physicalBlocks) ? physicalBlocks : [];
|
|
202
|
+
|
|
203
|
+
const physicalToLogical = new Map();
|
|
204
|
+
for (const block of analysisBlocks) {
|
|
205
|
+
if (!isObject(block)) {
|
|
206
|
+
continue;
|
|
207
|
+
}
|
|
208
|
+
block.elements = [];
|
|
209
|
+
for (const physicalIdx of parseBlockIdxs(block.blockIdxs)) {
|
|
210
|
+
if (!physicalToLogical.has(physicalIdx)) {
|
|
211
|
+
physicalToLogical.set(physicalIdx, block);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const unassigned = [];
|
|
217
|
+
for (const element of elements) {
|
|
218
|
+
const rect = {
|
|
219
|
+
top: Number(element?.top) || 0,
|
|
220
|
+
left: Number(element?.left) || 0,
|
|
221
|
+
width: Number(element?.width) || 0,
|
|
222
|
+
height: Number(element?.height) || 0
|
|
223
|
+
};
|
|
224
|
+
const physicalIdx = mapRectToBlock(rect, blocks);
|
|
225
|
+
const logical = physicalToLogical.get(physicalIdx);
|
|
226
|
+
const trimmed = pickSizedElementFields(element);
|
|
227
|
+
if (logical) {
|
|
228
|
+
logical.elements.push(trimmed);
|
|
229
|
+
} else {
|
|
230
|
+
unassigned.push(trimmed);
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
for (const block of analysisBlocks) {
|
|
235
|
+
if (isObject(block) && Array.isArray(block.elements) && block.elements.length > 1) {
|
|
236
|
+
block.elements.sort((left, right) => (left.top - right.top) || (left.left - right.left));
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return unassigned;
|
|
241
|
+
}
|
|
242
|
+
|
|
134
243
|
function attachLogicalBlockScreenshotPaths(result, screenshots) {
|
|
135
244
|
const blocks = result?.analysis?.block_analysis?.blocks;
|
|
136
245
|
if (!Array.isArray(blocks) || blocks.length === 0) {
|
|
@@ -170,7 +279,16 @@ function compactBlockAnalysisBlock(block, displayOptions) {
|
|
|
170
279
|
blockDescription: source.blockDescription,
|
|
171
280
|
blockSemantics: Array.isArray(source.blockSemantics) ? source.blockSemantics : [],
|
|
172
281
|
blockCssPath: source.blockCssPath,
|
|
173
|
-
blockPosition: source.blockPosition
|
|
282
|
+
blockPosition: source.blockPosition,
|
|
283
|
+
fixed: source.fixed === true,
|
|
284
|
+
tag: source.tag || '',
|
|
285
|
+
branchPath: source.branchPath || '',
|
|
286
|
+
depth: Number.isInteger(source.depth) ? source.depth : 0,
|
|
287
|
+
domOrder: Number.isInteger(source.domOrder) ? source.domOrder : 0,
|
|
288
|
+
textPreview: source.textPreview || '',
|
|
289
|
+
childInteractiveCount: Number.isInteger(source.childInteractiveCount)
|
|
290
|
+
? source.childInteractiveCount
|
|
291
|
+
: 0
|
|
174
292
|
};
|
|
175
293
|
|
|
176
294
|
if (displayOptions.showBlockIdx) {
|
|
@@ -192,6 +310,10 @@ function compactBlockAnalysisBlock(block, displayOptions) {
|
|
|
192
310
|
out.blockScreenshotPaths = source.blockScreenshotPaths;
|
|
193
311
|
}
|
|
194
312
|
|
|
313
|
+
if (Array.isArray(source.elements)) {
|
|
314
|
+
out.elements = source.elements;
|
|
315
|
+
}
|
|
316
|
+
|
|
195
317
|
return out;
|
|
196
318
|
}
|
|
197
319
|
|
|
@@ -210,6 +332,10 @@ function compactBlockAnalysis(blockAnalysis, displayOptions) {
|
|
|
210
332
|
: [];
|
|
211
333
|
}
|
|
212
334
|
|
|
335
|
+
if (Array.isArray(source.unassignedElements) && source.unassignedElements.length > 0) {
|
|
336
|
+
out.unassignedElements = source.unassignedElements;
|
|
337
|
+
}
|
|
338
|
+
|
|
213
339
|
if (isObject(source.stats)) {
|
|
214
340
|
out.stats = displayOptions.showEvents
|
|
215
341
|
? source.stats
|
|
@@ -282,13 +408,23 @@ function buildPageAnalysisResult({
|
|
|
282
408
|
*
|
|
283
409
|
* @param {string} url - URL to analyze
|
|
284
410
|
* @param {Object} options
|
|
285
|
-
* @param {Object} options.llm - { apiKey, apiEndpoint,
|
|
411
|
+
* @param {Object} options.llm - { type, model, apiKey, apiEndpoint, cliPath, cwd, fast, ... }
|
|
412
|
+
* - `type`: 'openai' (default) | 'codex' | 'claude'
|
|
413
|
+
* - `model`: required for all types
|
|
414
|
+
* - `apiKey` / `apiEndpoint`: required only for type='openai'
|
|
415
|
+
* - `cliPath` / `cwd`: optional for type='codex' | 'claude' (override CLI binary / cwd)
|
|
416
|
+
* - `fast`: codex only; auto-enabled when model === 'gpt-5.5'
|
|
286
417
|
* @param {Array} [options.knownEventTypes] - Accumulated event types for consistency
|
|
287
418
|
* @param {Object} [options.parserConfig] - HtmlParser config overrides
|
|
288
419
|
* @param {Object} [options.extractorConfig] - PageExtractor config overrides
|
|
289
420
|
* @param {boolean} [options.showEvents=false] - Include event arrays and full event-related metadata.
|
|
290
421
|
* Also enables node-level event classification.
|
|
291
422
|
* @param {boolean} [options.showBlockIdx=false] - Include CSV/block index alignment fields.
|
|
423
|
+
* @param {boolean} [options.showElement=false] - Collect all visible DOM elements with
|
|
424
|
+
* width or height > `elementSize` and nest them under each block as `elements`.
|
|
425
|
+
* When false (default), the sized-element collection is skipped (previous behavior).
|
|
426
|
+
* @param {number} [options.elementSize=24] - Min size (px) threshold for `showElement`
|
|
427
|
+
* (an element qualifies when width > elementSize OR height > elementSize).
|
|
292
428
|
* @param {boolean} [options.fullPageScreenshot=false] - Save a full-page screenshot to snapshots/ and return its path.
|
|
293
429
|
* @param {boolean} [options.blockScreenshots=false] - Save one screenshot per merged logical block to snapshots/ and return their paths.
|
|
294
430
|
* @param {boolean} [options.waitForImagesLoaded=false] - Wait for page images before extracting and screenshotting.
|
|
@@ -303,18 +439,22 @@ export async function analyzeUrl(url, options = {}) {
|
|
|
303
439
|
extractorConfig,
|
|
304
440
|
showEvents,
|
|
305
441
|
showBlockIdx,
|
|
442
|
+
showElement,
|
|
443
|
+
elementSize,
|
|
306
444
|
fullPageScreenshot,
|
|
307
445
|
blockScreenshots,
|
|
308
446
|
waitForImagesLoaded
|
|
309
447
|
} = options;
|
|
310
448
|
|
|
311
449
|
if (!url) throw new Error('url is required');
|
|
312
|
-
|
|
313
|
-
throw new Error('options.llm.apiKey, apiEndpoint, and model are required');
|
|
314
|
-
}
|
|
450
|
+
validateLlmConfig(llmConfig, 'options.llm');
|
|
315
451
|
|
|
316
452
|
const shouldCaptureFullPage = fullPageScreenshot ?? extractorConfig?.fullPageScreenshot;
|
|
317
453
|
const shouldCaptureBlocks = blockScreenshots ?? extractorConfig?.blockScreenshots;
|
|
454
|
+
const sizedElementsEnabled = showElement === true;
|
|
455
|
+
const sizedElementMinSize = Number.isInteger(elementSize)
|
|
456
|
+
? Math.max(0, elementSize)
|
|
457
|
+
: (extractorConfig?.sizedElementMinSize ?? 24);
|
|
318
458
|
|
|
319
459
|
// Step 0: Playwright extraction
|
|
320
460
|
console.log(`[page-analyzer] Extracting ${url} ...`);
|
|
@@ -322,12 +462,14 @@ export async function analyzeUrl(url, options = {}) {
|
|
|
322
462
|
...extractorConfig,
|
|
323
463
|
fullPageScreenshot: shouldCaptureFullPage,
|
|
324
464
|
blockScreenshots: false,
|
|
325
|
-
waitForImagesLoaded: waitForImagesLoaded ?? extractorConfig?.waitForImagesLoaded
|
|
465
|
+
waitForImagesLoaded: waitForImagesLoaded ?? extractorConfig?.waitForImagesLoaded,
|
|
466
|
+
sizedElementsEnabled,
|
|
467
|
+
sizedElementMinSize
|
|
326
468
|
});
|
|
327
469
|
|
|
328
470
|
return await extractor.withPreparedPage(url, async (page, targetUrl) => {
|
|
329
471
|
const bundle = await extractor.extractPreparedPage(page, targetUrl);
|
|
330
|
-
console.log(`[page-analyzer] Extracted: ${bundle.blocks.length} blocks, ${bundle.elementGeometries.length} geometries`);
|
|
472
|
+
console.log(`[page-analyzer] Extracted: ${bundle.blocks.length} blocks, ${bundle.elementGeometries.length} geometries, ${Array.isArray(bundle.sizedElements) ? bundle.sizedElements.length : 0} sized elements`);
|
|
331
473
|
|
|
332
474
|
// Derive domain from URL
|
|
333
475
|
let domain = '';
|
|
@@ -338,6 +480,7 @@ export async function analyzeUrl(url, options = {}) {
|
|
|
338
480
|
url: targetUrl,
|
|
339
481
|
blocks: bundle.blocks,
|
|
340
482
|
elementGeometries: bundle.elementGeometries,
|
|
483
|
+
sizedElements: bundle.sizedElements,
|
|
341
484
|
llm: llmConfig,
|
|
342
485
|
knownEventTypes,
|
|
343
486
|
parserConfig,
|
|
@@ -378,13 +521,19 @@ export async function analyzeUrl(url, options = {}) {
|
|
|
378
521
|
* @param {string} input.url - Page URL
|
|
379
522
|
* @param {Array} input.blocks - Visual blocks from Playwright extraction
|
|
380
523
|
* @param {Array} input.elementGeometries - Element geometry records
|
|
524
|
+
* @param {Array} [input.sizedElements] - All visible DOM elements with width>24 or height>24
|
|
525
|
+
* (from PageExtractor.collectSizedElements); nested under each block as `elements`.
|
|
381
526
|
* @param {string} [input.markdown] - Markdown content (reserved for future use)
|
|
382
527
|
* @param {Object} input.llm - LLM provider config
|
|
383
|
-
* @param {
|
|
384
|
-
* @param {string} input.llm.
|
|
385
|
-
* @param {string} input.llm.
|
|
386
|
-
* @param {
|
|
387
|
-
* @param {
|
|
528
|
+
* @param {('openai'|'codex'|'claude')} [input.llm.type='openai'] - Backend type
|
|
529
|
+
* @param {string} input.llm.model - Model name (required for all types)
|
|
530
|
+
* @param {string} [input.llm.apiKey] - API key (required when type='openai')
|
|
531
|
+
* @param {string} [input.llm.apiEndpoint] - API endpoint URL (required when type='openai')
|
|
532
|
+
* @param {string} [input.llm.cliPath] - Override CLI binary path (codex/claude)
|
|
533
|
+
* @param {string} [input.llm.cwd] - Working directory for the CLI child process
|
|
534
|
+
* @param {boolean} [input.llm.fast] - Codex only; auto-enabled when model === 'gpt-5.5'
|
|
535
|
+
* @param {number} [input.llm.maxTokens] - Max tokens (openai only)
|
|
536
|
+
* @param {number} [input.llm.temperature] - Temperature (openai only)
|
|
388
537
|
* @param {number} [input.llm.timeout] - Request timeout ms
|
|
389
538
|
* @param {number} [input.llm.maxRetries] - Max retries
|
|
390
539
|
* @param {Array} [input.llm.knownEventTypes] - Pre-configured known event types
|
|
@@ -405,6 +554,7 @@ export async function analyzePageEvents(input) {
|
|
|
405
554
|
url,
|
|
406
555
|
blocks = [],
|
|
407
556
|
elementGeometries = [],
|
|
557
|
+
sizedElements = [],
|
|
408
558
|
markdown = '',
|
|
409
559
|
llm: llmConfig,
|
|
410
560
|
knownEventTypes = [],
|
|
@@ -423,9 +573,7 @@ export async function analyzePageEvents(input) {
|
|
|
423
573
|
if (!url) {
|
|
424
574
|
throw new Error('url is required');
|
|
425
575
|
}
|
|
426
|
-
|
|
427
|
-
throw new Error('llm.apiKey, llm.apiEndpoint, and llm.model are required');
|
|
428
|
-
}
|
|
576
|
+
validateLlmConfig(llmConfig, 'llm');
|
|
429
577
|
|
|
430
578
|
// Step 1: Parse HTML → elements
|
|
431
579
|
const htmlParser = new HtmlParser(parserConfig);
|
|
@@ -440,16 +588,7 @@ export async function analyzePageEvents(input) {
|
|
|
440
588
|
const csvContent = csvExporter.buildCsvContent(nodeId || 'page', elements);
|
|
441
589
|
|
|
442
590
|
// Step 4: LLM event analysis
|
|
443
|
-
const provider =
|
|
444
|
-
apiKey: llmConfig.apiKey,
|
|
445
|
-
apiEndpoint: llmConfig.apiEndpoint,
|
|
446
|
-
model: llmConfig.model,
|
|
447
|
-
maxTokens: llmConfig.maxTokens,
|
|
448
|
-
temperature: llmConfig.temperature,
|
|
449
|
-
timeout: llmConfig.timeout,
|
|
450
|
-
maxRetries: llmConfig.maxRetries,
|
|
451
|
-
interactionLogger: llmConfig.interactionLogger
|
|
452
|
-
});
|
|
591
|
+
const provider = createLlmProvider(llmConfig);
|
|
453
592
|
|
|
454
593
|
const eventAnalyzer = new EventAnalyzer(provider, llmConfig, {
|
|
455
594
|
domain,
|
|
@@ -467,6 +606,17 @@ export async function analyzePageEvents(input) {
|
|
|
467
606
|
}
|
|
468
607
|
);
|
|
469
608
|
|
|
609
|
+
// Step 5: Nest sized DOM elements under each block (before compaction, while
|
|
610
|
+
// raw analysis blocks still carry blockIdxs for physical→logical mapping).
|
|
611
|
+
const unassignedSizedElements = attachSizedElementsToBlocks(
|
|
612
|
+
analysis?.block_analysis?.blocks,
|
|
613
|
+
sizedElements,
|
|
614
|
+
blocks
|
|
615
|
+
);
|
|
616
|
+
if (unassignedSizedElements.length > 0 && isObject(analysis?.block_analysis)) {
|
|
617
|
+
analysis.block_analysis.unassignedElements = unassignedSizedElements;
|
|
618
|
+
}
|
|
619
|
+
|
|
470
620
|
return buildPageAnalysisResult({
|
|
471
621
|
elements,
|
|
472
622
|
csvContent,
|
|
@@ -481,7 +631,13 @@ export async function analyzePageEvents(input) {
|
|
|
481
631
|
export { HtmlParser } from './html-parser.js';
|
|
482
632
|
export { assignBlocksToElements } from './extractors/block-assigner.js';
|
|
483
633
|
export { CsvExporter } from './csv-exporter.js';
|
|
484
|
-
export {
|
|
485
|
-
|
|
634
|
+
export {
|
|
635
|
+
createLlmProvider,
|
|
636
|
+
OpenAiProvider,
|
|
637
|
+
CodexCliProvider,
|
|
638
|
+
ClaudeCliProvider,
|
|
639
|
+
BaseLlmProvider,
|
|
640
|
+
LLM_PROVIDER_TYPES
|
|
641
|
+
} from './llm/providers/index.js';
|
|
486
642
|
export { EventAnalyzer } from './llm/analyzers/event-analyzer/event-analyzer.js';
|
|
487
643
|
export { PageExtractor } from './page-extractor.js';
|
|
@@ -534,6 +534,15 @@ function buildBlockAnalysisArtifact(siteSummary, blockContexts = [], llmGroups =
|
|
|
534
534
|
possibleEventTypes.push(eventType);
|
|
535
535
|
}
|
|
536
536
|
|
|
537
|
+
const sourceBlocksArr = Array.isArray(context.sourceBlocks) ? context.sourceBlocks : [];
|
|
538
|
+
const firstSource = sourceBlocksArr[0] || {};
|
|
539
|
+
const depthValues = sourceBlocksArr
|
|
540
|
+
.map((b) => (Number.isInteger(b?.depth) ? b.depth : null))
|
|
541
|
+
.filter((v) => v !== null);
|
|
542
|
+
const domOrderValues = sourceBlocksArr
|
|
543
|
+
.map((b) => (Number.isInteger(b?.domOrder) ? b.domOrder : null))
|
|
544
|
+
.filter((v) => v !== null);
|
|
545
|
+
|
|
537
546
|
blocks.push({
|
|
538
547
|
blockIdxs: context.blockIdxKey,
|
|
539
548
|
blockName: context.blockName,
|
|
@@ -543,6 +552,16 @@ function buildBlockAnalysisArtifact(siteSummary, blockContexts = [], llmGroups =
|
|
|
543
552
|
blockDescription: context.blockDescription,
|
|
544
553
|
blockCssPath: resolveLogicalBlockCssPath(context.sourceBlocks),
|
|
545
554
|
blockPosition: buildLogicalBlockPosition(context.sourceBlocks),
|
|
555
|
+
fixed: sourceBlocksArr.some((b) => b?.fixed === true),
|
|
556
|
+
tag: firstSource.tag || '',
|
|
557
|
+
branchPath: firstSource.branchPath || '',
|
|
558
|
+
depth: depthValues.length ? Math.min(...depthValues) : 0,
|
|
559
|
+
domOrder: domOrderValues.length ? Math.min(...domOrderValues) : 0,
|
|
560
|
+
textPreview: firstSource.textPreview || '',
|
|
561
|
+
childInteractiveCount: sourceBlocksArr.reduce(
|
|
562
|
+
(sum, b) => sum + (Number(b?.childInteractiveCount) || 0),
|
|
563
|
+
0
|
|
564
|
+
),
|
|
546
565
|
rowCount: context.rows.length,
|
|
547
566
|
mode
|
|
548
567
|
});
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { BaseLlmProvider } from './base-provider.js';
|
|
2
|
+
import { runCli } from './cli-runner.js';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Claude CLI provider — invokes the locally installed `claude -p` command.
|
|
6
|
+
*
|
|
7
|
+
* Auth is whatever the user's local claude install already has; no API key needed.
|
|
8
|
+
* In `--output-format text` print mode, stdout is exactly the model's final text
|
|
9
|
+
* response, so we use it directly.
|
|
10
|
+
*/
|
|
11
|
+
export class ClaudeCliProvider extends BaseLlmProvider {
|
|
12
|
+
constructor(config = {}) {
|
|
13
|
+
super(config);
|
|
14
|
+
if (!this.model) {
|
|
15
|
+
throw new Error('ClaudeCliProvider: model is required');
|
|
16
|
+
}
|
|
17
|
+
this.cliPath = config.cliPath || 'claude';
|
|
18
|
+
this.cwd = config.cwd || null;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
buildArgs() {
|
|
22
|
+
return [
|
|
23
|
+
'-p',
|
|
24
|
+
'--model', this.model,
|
|
25
|
+
'--output-format', 'text',
|
|
26
|
+
'--bare'
|
|
27
|
+
];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
resolveInteractionContext(metadata) {
|
|
31
|
+
const context = metadata && typeof metadata === 'object' ? metadata : {};
|
|
32
|
+
const domain = String(context.domain || '').trim();
|
|
33
|
+
const nodeId = String(context.nodeId || '').trim();
|
|
34
|
+
if (!domain || !nodeId) {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
return {
|
|
38
|
+
domain,
|
|
39
|
+
nodeId,
|
|
40
|
+
operation: String(context.operation || 'analysis').trim() || 'analysis',
|
|
41
|
+
chunkLabel: String(context.chunkLabel || '').trim() || null
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
async makeRequest(prompt, options = {}) {
|
|
46
|
+
const metadata = options && typeof options.metadata === 'object' ? options.metadata : {};
|
|
47
|
+
const interactionContext = this.resolveInteractionContext(metadata);
|
|
48
|
+
const args = this.buildArgs();
|
|
49
|
+
const requestPayload = { argv: [this.cliPath, ...args], model: this.model };
|
|
50
|
+
|
|
51
|
+
let failureLogged = false;
|
|
52
|
+
try {
|
|
53
|
+
const result = await runCli({
|
|
54
|
+
command: this.cliPath,
|
|
55
|
+
args,
|
|
56
|
+
prompt,
|
|
57
|
+
timeoutMs: this.timeout,
|
|
58
|
+
cwd: this.cwd || undefined
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
const outputText = String(result.stdout ?? '').replace(/\r\n/g, '\n').replace(/\n+$/, '');
|
|
62
|
+
if (result.code !== 0 || !outputText) {
|
|
63
|
+
if (interactionContext) {
|
|
64
|
+
await this.emitInteractionLog({
|
|
65
|
+
...interactionContext,
|
|
66
|
+
provider: 'Claude',
|
|
67
|
+
model: this.model,
|
|
68
|
+
requestId: null,
|
|
69
|
+
inputText: prompt,
|
|
70
|
+
outputText: outputText || null,
|
|
71
|
+
requestPayload,
|
|
72
|
+
responsePayload: { stdout: result.stdout, stderr: result.stderr, code: result.code, signal: result.signal },
|
|
73
|
+
usagePromptTokens: null,
|
|
74
|
+
usageCompletionTokens: null,
|
|
75
|
+
usageReasoningTokens: null,
|
|
76
|
+
usageCost: null
|
|
77
|
+
});
|
|
78
|
+
failureLogged = true;
|
|
79
|
+
}
|
|
80
|
+
const reason = result.code !== 0
|
|
81
|
+
? `exited with code ${result.code}${result.signal ? ` (signal ${result.signal})` : ''}`
|
|
82
|
+
: 'produced empty stdout';
|
|
83
|
+
const stderrTail = String(result.stderr || '').slice(-500);
|
|
84
|
+
throw new Error(`claude -p ${reason}${stderrTail ? `: ${stderrTail}` : ''}`);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (interactionContext) {
|
|
88
|
+
await this.emitInteractionLog({
|
|
89
|
+
...interactionContext,
|
|
90
|
+
provider: 'Claude',
|
|
91
|
+
model: this.model,
|
|
92
|
+
requestId: null,
|
|
93
|
+
inputText: prompt,
|
|
94
|
+
outputText,
|
|
95
|
+
requestPayload,
|
|
96
|
+
responsePayload: { stdout: result.stdout, stderr: result.stderr, code: result.code, signal: result.signal },
|
|
97
|
+
usagePromptTokens: null,
|
|
98
|
+
usageCompletionTokens: null,
|
|
99
|
+
usageReasoningTokens: null,
|
|
100
|
+
usageCost: null
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return outputText;
|
|
105
|
+
} catch (error) {
|
|
106
|
+
if (!failureLogged && interactionContext) {
|
|
107
|
+
await this.emitInteractionLog({
|
|
108
|
+
...interactionContext,
|
|
109
|
+
provider: 'Claude',
|
|
110
|
+
model: this.model,
|
|
111
|
+
requestId: null,
|
|
112
|
+
inputText: prompt,
|
|
113
|
+
outputText: null,
|
|
114
|
+
requestPayload,
|
|
115
|
+
responsePayload: null,
|
|
116
|
+
usagePromptTokens: null,
|
|
117
|
+
usageCompletionTokens: null,
|
|
118
|
+
usageReasoningTokens: null,
|
|
119
|
+
usageCost: null
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
throw error;
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async analyze(content, options = {}) {
|
|
127
|
+
const requestOptions = { ...options };
|
|
128
|
+
delete requestOptions.parseJson;
|
|
129
|
+
return this.makeRequestWithRetry(() => this.makeRequest(String(content ?? ''), requestOptions));
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
validateConfig() {
|
|
133
|
+
if (!this.model) {
|
|
134
|
+
throw new Error('Model is required');
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { spawn } from 'node:child_process';
|
|
2
|
+
import { mkdtemp, readFile, rm } from 'node:fs/promises';
|
|
3
|
+
import os from 'node:os';
|
|
4
|
+
import path from 'node:path';
|
|
5
|
+
|
|
6
|
+
const DEFAULT_KILL_GRACE_MS = 10_000;
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Spawn a local CLI tool, write prompt to stdin, collect stdout/stderr, and
|
|
10
|
+
* optionally read a designated output file. Used by codex-cli-provider and
|
|
11
|
+
* claude-cli-provider.
|
|
12
|
+
*
|
|
13
|
+
* @param {Object} opts
|
|
14
|
+
* @param {string} opts.command - Executable name or absolute path (e.g. 'codex')
|
|
15
|
+
* @param {string[]} opts.args - Argv (excluding the command itself)
|
|
16
|
+
* @param {string} opts.prompt - Text piped to the child's stdin
|
|
17
|
+
* @param {number} opts.timeoutMs - Hard timeout; SIGTERM then SIGKILL after grace
|
|
18
|
+
* @param {string=} opts.outFile - If set, the file is read after the child exits and returned as outFileContent
|
|
19
|
+
* @param {string=} opts.cwd - Optional working directory for the child
|
|
20
|
+
* @param {Object=} opts.env - Optional env overrides; merged onto process.env
|
|
21
|
+
* @returns {Promise<{ stdout: string, stderr: string, code: number|null, signal: NodeJS.Signals|null, outFileContent: string|null }>}
|
|
22
|
+
*/
|
|
23
|
+
export async function runCli(opts) {
|
|
24
|
+
const {
|
|
25
|
+
command,
|
|
26
|
+
args = [],
|
|
27
|
+
prompt = '',
|
|
28
|
+
timeoutMs = 600_000,
|
|
29
|
+
outFile = null,
|
|
30
|
+
cwd,
|
|
31
|
+
env
|
|
32
|
+
} = opts || {};
|
|
33
|
+
|
|
34
|
+
if (!command) {
|
|
35
|
+
throw new Error('runCli: command is required');
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const child = spawn(command, args, {
|
|
39
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
40
|
+
cwd: cwd || undefined,
|
|
41
|
+
env: env ? { ...process.env, ...env } : process.env
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
const stdoutChunks = [];
|
|
45
|
+
const stderrChunks = [];
|
|
46
|
+
child.stdout.on('data', (chunk) => { stdoutChunks.push(chunk); });
|
|
47
|
+
child.stderr.on('data', (chunk) => { stderrChunks.push(chunk); });
|
|
48
|
+
|
|
49
|
+
let timedOut = false;
|
|
50
|
+
const timer = setTimeout(() => {
|
|
51
|
+
timedOut = true;
|
|
52
|
+
child.kill('SIGTERM');
|
|
53
|
+
setTimeout(() => {
|
|
54
|
+
if (child.exitCode === null && child.signalCode === null) {
|
|
55
|
+
try { child.kill('SIGKILL'); } catch { /* ignore */ }
|
|
56
|
+
}
|
|
57
|
+
}, DEFAULT_KILL_GRACE_MS).unref();
|
|
58
|
+
}, timeoutMs);
|
|
59
|
+
if (typeof timer.unref === 'function') timer.unref();
|
|
60
|
+
|
|
61
|
+
// Pipe prompt to stdin; tolerate EPIPE if the child closes stdin early.
|
|
62
|
+
try {
|
|
63
|
+
if (prompt) {
|
|
64
|
+
child.stdin.write(prompt);
|
|
65
|
+
}
|
|
66
|
+
child.stdin.end();
|
|
67
|
+
} catch (err) {
|
|
68
|
+
if (err && err.code !== 'EPIPE') {
|
|
69
|
+
clearTimeout(timer);
|
|
70
|
+
throw err;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
child.stdin.on('error', (err) => {
|
|
74
|
+
if (err && err.code !== 'EPIPE') {
|
|
75
|
+
// Surface non-EPIPE stdin errors via stderr buffer
|
|
76
|
+
stderrChunks.push(Buffer.from(`\n[stdin error] ${err.message}\n`));
|
|
77
|
+
}
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
const { code, signal } = await new Promise((resolve, reject) => {
|
|
81
|
+
child.on('error', (err) => {
|
|
82
|
+
clearTimeout(timer);
|
|
83
|
+
reject(err);
|
|
84
|
+
});
|
|
85
|
+
child.on('close', (exitCode, exitSignal) => {
|
|
86
|
+
clearTimeout(timer);
|
|
87
|
+
resolve({ code: exitCode, signal: exitSignal });
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
const stdout = Buffer.concat(stdoutChunks).toString('utf-8');
|
|
92
|
+
const stderr = Buffer.concat(stderrChunks).toString('utf-8');
|
|
93
|
+
|
|
94
|
+
let outFileContent = null;
|
|
95
|
+
if (outFile) {
|
|
96
|
+
try {
|
|
97
|
+
outFileContent = await readFile(outFile, 'utf-8');
|
|
98
|
+
} catch {
|
|
99
|
+
outFileContent = null;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
if (timedOut) {
|
|
104
|
+
const err = new Error(`CLI '${command}' timed out after ${timeoutMs}ms`);
|
|
105
|
+
err.code = 'CLI_TIMEOUT';
|
|
106
|
+
err.stdout = stdout;
|
|
107
|
+
err.stderr = stderr;
|
|
108
|
+
throw err;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
return { stdout, stderr, code, signal, outFileContent };
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Create a unique tmp file path inside an isolated tmp directory.
|
|
116
|
+
* Returns the file path plus a cleanup function that removes the parent dir.
|
|
117
|
+
*
|
|
118
|
+
* @param {string} prefix
|
|
119
|
+
* @param {string} fileName
|
|
120
|
+
* @returns {Promise<{ filePath: string, cleanup: () => Promise<void> }>}
|
|
121
|
+
*/
|
|
122
|
+
export async function makeTmpOutFile(prefix, fileName) {
|
|
123
|
+
const dir = await mkdtemp(path.join(os.tmpdir(), `${prefix}-`));
|
|
124
|
+
const filePath = path.join(dir, fileName);
|
|
125
|
+
const cleanup = async () => {
|
|
126
|
+
try { await rm(dir, { recursive: true, force: true }); } catch { /* ignore */ }
|
|
127
|
+
};
|
|
128
|
+
return { filePath, cleanup };
|
|
129
|
+
}
|