browserwire 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +113 -0
- package/cli/api/bridge.js +64 -0
- package/cli/api/openapi.js +175 -0
- package/cli/api/router.js +280 -0
- package/cli/api/swagger-ui.js +26 -0
- package/cli/discovery/classify.js +304 -0
- package/cli/discovery/compile.js +392 -0
- package/cli/discovery/enrich.js +376 -0
- package/cli/discovery/entities.js +356 -0
- package/cli/discovery/llm-client.js +352 -0
- package/cli/discovery/locators.js +326 -0
- package/cli/discovery/perceive.js +476 -0
- package/cli/discovery/session.js +930 -0
- package/cli/discovery/synthesize-workflows.js +295 -0
- package/cli/index.js +63 -0
- package/cli/manifest-store.js +140 -0
- package/cli/server.js +539 -0
- package/extension/background.js +1512 -0
- package/extension/content-script.js +491 -0
- package/extension/discovery.js +495 -0
- package/extension/executor.js +392 -0
- package/extension/icons/icon-128.png +0 -0
- package/extension/icons/icon-16.png +0 -0
- package/extension/icons/icon-48.png +0 -0
- package/extension/manifest.json +33 -0
- package/extension/shared/protocol.js +50 -0
- package/extension/sidepanel.html +277 -0
- package/extension/sidepanel.js +211 -0
- package/extension/vendor/LICENSE +22 -0
- package/extension/vendor/rrweb-record.min.js +84 -0
- package/package.json +49 -0
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* entities.js — Stage 4: Entity Grouping
|
|
3
|
+
*
|
|
4
|
+
* Runs on the CLI server. Takes ScannedElement[] + A11yInfo[] + InteractableElement[]
|
|
5
|
+
* and clusters related elements into semantic EntityCandidates.
|
|
6
|
+
*
|
|
7
|
+
* Grouping heuristics (in priority order):
|
|
8
|
+
* 1. data-testid boundaries
|
|
9
|
+
* 2. Landmark roles (form, navigation, main, dialog, etc.)
|
|
10
|
+
* 3. Semantic containers (article, section, fieldset, li, tr, etc.)
|
|
11
|
+
* 4. Repeated structure (parent with multiple children of same tag)
|
|
12
|
+
*
|
|
13
|
+
* @typedef {{ candidateId: string, name: string, source: string, rootScanId: number, memberScanIds: number[], signals: Array<{kind: string, value: string, weight: number}>, interactableScanIds: number[] }} EntityCandidate
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
// Tags that form natural entity boundaries
|
|
17
|
+
const SEMANTIC_CONTAINER_TAGS = new Set([
|
|
18
|
+
"article", "section", "fieldset", "li", "tr", "details", "dialog",
|
|
19
|
+
"figure", "blockquote"
|
|
20
|
+
]);
|
|
21
|
+
|
|
22
|
+
// Roles that form natural entity boundaries
|
|
23
|
+
const LANDMARK_ROLES = new Set([
|
|
24
|
+
"form", "navigation", "main", "region", "complementary", "dialog",
|
|
25
|
+
"banner", "contentinfo", "search", "alertdialog", "group"
|
|
26
|
+
]);
|
|
27
|
+
|
|
28
|
+
// Tags whose role already qualifies as landmark (avoid double-detection)
|
|
29
|
+
const LANDMARK_TAG_ROLES = new Map([
|
|
30
|
+
["nav", "navigation"],
|
|
31
|
+
["main", "main"],
|
|
32
|
+
["form", "form"],
|
|
33
|
+
["aside", "complementary"],
|
|
34
|
+
["header", "banner"],
|
|
35
|
+
["footer", "contentinfo"],
|
|
36
|
+
["dialog", "dialog"]
|
|
37
|
+
]);
|
|
38
|
+
|
|
39
|
+
const HEADING_TAGS = new Set(["h1", "h2", "h3", "h4", "h5", "h6"]);
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Collect all descendants of a given scanId (inclusive) from the element map.
|
|
43
|
+
*/
|
|
44
|
+
const collectDescendants = (rootScanId, elementMap) => {
|
|
45
|
+
const result = [];
|
|
46
|
+
const queue = [rootScanId];
|
|
47
|
+
|
|
48
|
+
while (queue.length > 0) {
|
|
49
|
+
const id = queue.shift();
|
|
50
|
+
result.push(id);
|
|
51
|
+
const el = elementMap.get(id);
|
|
52
|
+
if (el && el.childScanIds) {
|
|
53
|
+
queue.push(...el.childScanIds);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
return result;
|
|
58
|
+
};
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Derive a human-readable name for an entity rooted at the given element.
|
|
62
|
+
*
|
|
63
|
+
* Priority:
|
|
64
|
+
* 1. aria-label or aria-labelledby on the container
|
|
65
|
+
* 2. First heading child (h1–h6)
|
|
66
|
+
* 3. First <legend> child (for fieldsets)
|
|
67
|
+
* 4. data-testid value (titlecased)
|
|
68
|
+
* 5. Tag + role fallback
|
|
69
|
+
*/
|
|
70
|
+
const deriveName = (rootElement, rootA11y, memberScanIds, elementMap, a11yMap) => {
|
|
71
|
+
// 1. aria-label
|
|
72
|
+
const ariaLabel = rootElement.attributes["aria-label"];
|
|
73
|
+
if (ariaLabel && ariaLabel.trim()) {
|
|
74
|
+
return ariaLabel.trim().slice(0, 80);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// 1b. a11y name from the root
|
|
78
|
+
if (rootA11y && rootA11y.name && rootA11y.name.trim()) {
|
|
79
|
+
// Only use if it's short enough to be a label (not full text content)
|
|
80
|
+
const name = rootA11y.name.trim();
|
|
81
|
+
if (name.length <= 80) {
|
|
82
|
+
return name;
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// 2. First heading child
|
|
87
|
+
for (const sid of memberScanIds) {
|
|
88
|
+
const el = elementMap.get(sid);
|
|
89
|
+
if (el && HEADING_TAGS.has(el.tagName)) {
|
|
90
|
+
const headingA11y = a11yMap.get(sid);
|
|
91
|
+
const text = headingA11y?.name || el.textContent || "";
|
|
92
|
+
if (text.trim()) {
|
|
93
|
+
return text.trim().slice(0, 80);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// 3. First <legend> child
|
|
99
|
+
for (const sid of memberScanIds) {
|
|
100
|
+
const el = elementMap.get(sid);
|
|
101
|
+
if (el && el.tagName === "legend") {
|
|
102
|
+
const text = el.textContent || "";
|
|
103
|
+
if (text.trim()) {
|
|
104
|
+
return text.trim().slice(0, 80);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// 4. data-testid
|
|
110
|
+
const testid = rootElement.attributes["data-testid"];
|
|
111
|
+
if (testid) {
|
|
112
|
+
return testid
|
|
113
|
+
.replace(/[-_]/g, " ")
|
|
114
|
+
.replace(/\b\w/g, (c) => c.toUpperCase())
|
|
115
|
+
.trim();
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// 5. Fallback: tag + role
|
|
119
|
+
const role = rootA11y?.role || "";
|
|
120
|
+
const tag = rootElement.tagName;
|
|
121
|
+
if (role && role !== "none") {
|
|
122
|
+
return `${tag} (${role})`;
|
|
123
|
+
}
|
|
124
|
+
return tag;
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* Build signals for a DSL-compatible SignalDef from the entity root.
|
|
129
|
+
*/
|
|
130
|
+
const buildSignals = (rootElement, rootA11y) => {
|
|
131
|
+
const signals = [];
|
|
132
|
+
|
|
133
|
+
// Role signal
|
|
134
|
+
if (rootA11y?.role) {
|
|
135
|
+
signals.push({ kind: "role", value: rootA11y.role, weight: 0.8 });
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
// Text signal (from a11y name)
|
|
139
|
+
if (rootA11y?.name && rootA11y.name.trim().length <= 100) {
|
|
140
|
+
signals.push({ kind: "text", value: rootA11y.name.trim(), weight: 0.6 });
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// data-testid as attribute signal
|
|
144
|
+
const testid = rootElement.attributes["data-testid"];
|
|
145
|
+
if (testid) {
|
|
146
|
+
signals.push({ kind: "attribute", value: `data-testid:${testid}`, weight: 0.9 });
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
// aria-label as attribute signal
|
|
150
|
+
const ariaLabel = rootElement.attributes["aria-label"];
|
|
151
|
+
if (ariaLabel) {
|
|
152
|
+
signals.push({ kind: "attribute", value: `aria-label:${ariaLabel}`, weight: 0.7 });
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
return signals;
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Detect repeated structure: a parent that has multiple children with
|
|
160
|
+
* the same tag name, suggesting a list-like pattern.
|
|
161
|
+
*/
|
|
162
|
+
const findRepeatedStructures = (elements, elementMap, a11yMap, claimed) => {
|
|
163
|
+
const candidates = [];
|
|
164
|
+
|
|
165
|
+
for (const el of elements) {
|
|
166
|
+
if (claimed.has(el.scanId)) continue;
|
|
167
|
+
if (el.childScanIds.length < 2) continue;
|
|
168
|
+
|
|
169
|
+
// Count children by tag
|
|
170
|
+
const tagCounts = new Map();
|
|
171
|
+
for (const childId of el.childScanIds) {
|
|
172
|
+
const child = elementMap.get(childId);
|
|
173
|
+
if (!child || claimed.has(childId)) continue;
|
|
174
|
+
const count = tagCounts.get(child.tagName) || 0;
|
|
175
|
+
tagCounts.set(child.tagName, count + 1);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// Find tags that repeat at least 2 times
|
|
179
|
+
for (const [tag, count] of tagCounts) {
|
|
180
|
+
if (count < 2) continue;
|
|
181
|
+
|
|
182
|
+
// Skip generic divs/spans with no semantic meaning unless they have roles
|
|
183
|
+
if ((tag === "div" || tag === "span")) {
|
|
184
|
+
// Check if the repeating children have roles
|
|
185
|
+
const childrenWithRoles = el.childScanIds.filter((cid) => {
|
|
186
|
+
const a = a11yMap.get(cid);
|
|
187
|
+
return a && a.role && a.role !== "none";
|
|
188
|
+
});
|
|
189
|
+
if (childrenWithRoles.length < 2) continue;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
// Each repeating child becomes its own entity
|
|
193
|
+
for (const childId of el.childScanIds) {
|
|
194
|
+
const child = elementMap.get(childId);
|
|
195
|
+
if (!child || child.tagName !== tag || claimed.has(childId)) continue;
|
|
196
|
+
|
|
197
|
+
const memberIds = collectDescendants(childId, elementMap);
|
|
198
|
+
const childA11y = a11yMap.get(childId) || null;
|
|
199
|
+
|
|
200
|
+
candidates.push({
|
|
201
|
+
rootScanId: childId,
|
|
202
|
+
source: "repeated_structure",
|
|
203
|
+
memberScanIds: memberIds,
|
|
204
|
+
name: deriveName(child, childA11y, memberIds, elementMap, a11yMap),
|
|
205
|
+
signals: buildSignals(child, childA11y)
|
|
206
|
+
});
|
|
207
|
+
|
|
208
|
+
// Claim all members
|
|
209
|
+
for (const mid of memberIds) {
|
|
210
|
+
claimed.add(mid);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
return candidates;
|
|
217
|
+
};
|
|
218
|
+
|
|
219
|
+
/**
|
|
220
|
+
* Group all elements into entity candidates.
|
|
221
|
+
*
|
|
222
|
+
* @param {Array} elements - ScannedElement[]
|
|
223
|
+
* @param {Array} a11yEntries - A11yInfo[]
|
|
224
|
+
* @param {Array} interactables - InteractableElement[]
|
|
225
|
+
* @returns {{ entities: EntityCandidate[], stats: { total: number, entityCount: number, bySource: Record<string, number> } }}
|
|
226
|
+
*/
|
|
227
|
+
export const groupEntities = (elements, a11yEntries, interactables) => {
|
|
228
|
+
// Build lookup maps
|
|
229
|
+
const elementMap = new Map();
|
|
230
|
+
for (const el of elements) {
|
|
231
|
+
elementMap.set(el.scanId, el);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const a11yMap = new Map();
|
|
235
|
+
for (const entry of a11yEntries) {
|
|
236
|
+
a11yMap.set(entry.scanId, entry);
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
const interactableSet = new Set();
|
|
240
|
+
for (const item of interactables) {
|
|
241
|
+
interactableSet.add(item.scanId);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// Track which scanIds have been claimed by an entity
|
|
245
|
+
const claimed = new Set();
|
|
246
|
+
const rawCandidates = [];
|
|
247
|
+
|
|
248
|
+
// --- Pass 1: data-testid boundaries (highest priority) ---
|
|
249
|
+
for (const el of elements) {
|
|
250
|
+
if (claimed.has(el.scanId)) continue;
|
|
251
|
+
const testid = el.attributes["data-testid"];
|
|
252
|
+
if (!testid) continue;
|
|
253
|
+
|
|
254
|
+
const memberIds = collectDescendants(el.scanId, elementMap);
|
|
255
|
+
const a11y = a11yMap.get(el.scanId) || null;
|
|
256
|
+
|
|
257
|
+
rawCandidates.push({
|
|
258
|
+
rootScanId: el.scanId,
|
|
259
|
+
source: "testid",
|
|
260
|
+
memberScanIds: memberIds,
|
|
261
|
+
name: deriveName(el, a11y, memberIds, elementMap, a11yMap),
|
|
262
|
+
signals: buildSignals(el, a11y)
|
|
263
|
+
});
|
|
264
|
+
|
|
265
|
+
for (const mid of memberIds) {
|
|
266
|
+
claimed.add(mid);
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// --- Pass 2: Landmark roles ---
|
|
271
|
+
for (const el of elements) {
|
|
272
|
+
if (claimed.has(el.scanId)) continue;
|
|
273
|
+
|
|
274
|
+
const a11y = a11yMap.get(el.scanId) || null;
|
|
275
|
+
const role = a11y?.role || "";
|
|
276
|
+
|
|
277
|
+
// Check if tag is a landmark tag
|
|
278
|
+
const isLandmarkTag = LANDMARK_TAG_ROLES.has(el.tagName);
|
|
279
|
+
const isLandmarkRole = LANDMARK_ROLES.has(role);
|
|
280
|
+
|
|
281
|
+
if (!isLandmarkTag && !isLandmarkRole) continue;
|
|
282
|
+
|
|
283
|
+
const memberIds = collectDescendants(el.scanId, elementMap);
|
|
284
|
+
|
|
285
|
+
rawCandidates.push({
|
|
286
|
+
rootScanId: el.scanId,
|
|
287
|
+
source: isLandmarkTag ? "landmark" : "landmark",
|
|
288
|
+
memberScanIds: memberIds,
|
|
289
|
+
name: deriveName(el, a11y, memberIds, elementMap, a11yMap),
|
|
290
|
+
signals: buildSignals(el, a11y)
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
for (const mid of memberIds) {
|
|
294
|
+
claimed.add(mid);
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
// --- Pass 3: Semantic containers ---
|
|
299
|
+
for (const el of elements) {
|
|
300
|
+
if (claimed.has(el.scanId)) continue;
|
|
301
|
+
if (!SEMANTIC_CONTAINER_TAGS.has(el.tagName)) continue;
|
|
302
|
+
|
|
303
|
+
const memberIds = collectDescendants(el.scanId, elementMap);
|
|
304
|
+
const a11y = a11yMap.get(el.scanId) || null;
|
|
305
|
+
|
|
306
|
+
rawCandidates.push({
|
|
307
|
+
rootScanId: el.scanId,
|
|
308
|
+
source: "semantic_container",
|
|
309
|
+
memberScanIds: memberIds,
|
|
310
|
+
name: deriveName(el, a11y, memberIds, elementMap, a11yMap),
|
|
311
|
+
signals: buildSignals(el, a11y)
|
|
312
|
+
});
|
|
313
|
+
|
|
314
|
+
for (const mid of memberIds) {
|
|
315
|
+
claimed.add(mid);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// --- Pass 4: Repeated structure ---
|
|
320
|
+
const repeatedCandidates = findRepeatedStructures(elements, elementMap, a11yMap, claimed);
|
|
321
|
+
rawCandidates.push(...repeatedCandidates);
|
|
322
|
+
|
|
323
|
+
// --- Assign IDs and calculate interactable membership ---
|
|
324
|
+
let entityIndex = 0;
|
|
325
|
+
const entities = [];
|
|
326
|
+
const bySource = {};
|
|
327
|
+
|
|
328
|
+
for (const raw of rawCandidates) {
|
|
329
|
+
// Skip entities with no members beyond the root itself
|
|
330
|
+
// (unless the root itself is interactable)
|
|
331
|
+
const memberInteractables = raw.memberScanIds.filter((id) => interactableSet.has(id));
|
|
332
|
+
|
|
333
|
+
const entity = {
|
|
334
|
+
candidateId: `entity_${entityIndex++}`,
|
|
335
|
+
name: raw.name,
|
|
336
|
+
source: raw.source,
|
|
337
|
+
rootScanId: raw.rootScanId,
|
|
338
|
+
memberScanIds: raw.memberScanIds,
|
|
339
|
+
signals: raw.signals,
|
|
340
|
+
interactableScanIds: memberInteractables
|
|
341
|
+
};
|
|
342
|
+
|
|
343
|
+
entities.push(entity);
|
|
344
|
+
bySource[raw.source] = (bySource[raw.source] || 0) + 1;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
return {
|
|
348
|
+
entities,
|
|
349
|
+
stats: {
|
|
350
|
+
total: elements.length,
|
|
351
|
+
entityCount: entities.length,
|
|
352
|
+
bySource,
|
|
353
|
+
unclaimedElements: elements.length - claimed.size
|
|
354
|
+
}
|
|
355
|
+
};
|
|
356
|
+
};
|
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* llm-client.js — Thin LLM provider abstraction for Stage 7.
|
|
3
|
+
*
|
|
4
|
+
* Supports OpenAI-compatible, Anthropic, and Ollama endpoints via
|
|
5
|
+
* environment variables:
|
|
6
|
+
*
|
|
7
|
+
* BROWSERWIRE_LLM_PROVIDER = openai | anthropic | ollama
|
|
8
|
+
* BROWSERWIRE_LLM_MODEL = model name (default varies by provider)
|
|
9
|
+
* BROWSERWIRE_LLM_API_KEY = API key (not needed for ollama)
|
|
10
|
+
* BROWSERWIRE_LLM_BASE_URL = custom endpoint (default varies by provider)
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
const MAX_RETRIES = 3;
|
|
14
|
+
const BASE_RETRY_DELAY_MS = 5000;
|
|
15
|
+
|
|
16
|
+
const PROVIDER_DEFAULTS = {
|
|
17
|
+
openai: {
|
|
18
|
+
baseUrl: "https://api.openai.com/v1",
|
|
19
|
+
model: "gpt-4o",
|
|
20
|
+
path: "/chat/completions"
|
|
21
|
+
},
|
|
22
|
+
gemini: {
|
|
23
|
+
baseUrl: "https://generativelanguage.googleapis.com/v1beta/openai",
|
|
24
|
+
model: "gemini-2.5-flash",
|
|
25
|
+
path: "/chat/completions"
|
|
26
|
+
},
|
|
27
|
+
anthropic: {
|
|
28
|
+
baseUrl: "https://api.anthropic.com",
|
|
29
|
+
model: "claude-sonnet-4-20250514",
|
|
30
|
+
path: "/v1/messages"
|
|
31
|
+
},
|
|
32
|
+
ollama: {
|
|
33
|
+
baseUrl: "http://localhost:11434",
|
|
34
|
+
model: "llama3",
|
|
35
|
+
path: "/api/chat"
|
|
36
|
+
}
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Read LLM configuration from environment variables.
|
|
41
|
+
* Returns null if no provider is configured.
|
|
42
|
+
*/
|
|
43
|
+
export const getLLMConfig = () => {
|
|
44
|
+
const provider = process.env.BROWSERWIRE_LLM_PROVIDER;
|
|
45
|
+
if (!provider) return null;
|
|
46
|
+
|
|
47
|
+
const defaults = PROVIDER_DEFAULTS[provider];
|
|
48
|
+
if (!defaults) {
|
|
49
|
+
console.warn(`[browserwire-cli] unknown LLM provider: ${provider}`);
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
return {
|
|
54
|
+
provider,
|
|
55
|
+
model: process.env.BROWSERWIRE_LLM_MODEL || defaults.model,
|
|
56
|
+
apiKey: process.env.BROWSERWIRE_LLM_API_KEY || "",
|
|
57
|
+
baseUrl: process.env.BROWSERWIRE_LLM_BASE_URL || defaults.baseUrl,
|
|
58
|
+
path: defaults.path
|
|
59
|
+
};
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* Call the configured LLM with a vision prompt: annotated screenshot + HTML skeleton.
|
|
64
|
+
* Supports OpenAI-compatible (gpt-4o etc.) and Anthropic (claude-*) vision formats.
|
|
65
|
+
* Reuses the same retry/rate-limit logic as callLLM.
|
|
66
|
+
*
|
|
67
|
+
* @param {string} systemPrompt
|
|
68
|
+
* @param {string} screenshotBase64 - base64-encoded JPEG
|
|
69
|
+
* @param {string} textContent - HTML skeleton + page context string
|
|
70
|
+
* @param {object} config - from getLLMConfig()
|
|
71
|
+
* @returns {Promise<string>}
|
|
72
|
+
*/
|
|
73
|
+
export const callLLMWithVision = async (systemPrompt, screenshotBase64, textContent, config) => {
|
|
74
|
+
if (!config) {
|
|
75
|
+
throw new Error("LLM not configured");
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const approxTokens = Math.round((systemPrompt.length + textContent.length) / 4) + 85;
|
|
79
|
+
console.log(
|
|
80
|
+
`[browserwire-cli] vision LLM call → ${config.provider}/${config.model} ` +
|
|
81
|
+
`(~${approxTokens} tokens total)`
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
const url = `${config.baseUrl}${config.path}`;
|
|
85
|
+
|
|
86
|
+
if (config.provider === "anthropic") {
|
|
87
|
+
return callAnthropicVision(url, systemPrompt, screenshotBase64, textContent, config);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return callOpenAIVision(url, systemPrompt, screenshotBase64, textContent, config);
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* OpenAI vision call (gpt-4o, etc.).
|
|
95
|
+
*/
|
|
96
|
+
const callOpenAIVision = async (url, systemPrompt, screenshotBase64, textContent, config) => {
|
|
97
|
+
const headers = { "Content-Type": "application/json" };
|
|
98
|
+
if (config.apiKey) {
|
|
99
|
+
headers["Authorization"] = `Bearer ${config.apiKey}`;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const body = {
|
|
103
|
+
model: config.model,
|
|
104
|
+
messages: [
|
|
105
|
+
{ role: "system", content: systemPrompt },
|
|
106
|
+
{
|
|
107
|
+
role: "user",
|
|
108
|
+
content: [
|
|
109
|
+
{
|
|
110
|
+
type: "image_url",
|
|
111
|
+
image_url: { url: `data:image/jpeg;base64,${screenshotBase64}` }
|
|
112
|
+
},
|
|
113
|
+
{ type: "text", text: textContent }
|
|
114
|
+
]
|
|
115
|
+
}
|
|
116
|
+
],
|
|
117
|
+
temperature: 0.2,
|
|
118
|
+
response_format: { type: "json_object" }
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
122
|
+
const response = await fetch(url, {
|
|
123
|
+
method: "POST",
|
|
124
|
+
headers,
|
|
125
|
+
body: JSON.stringify(body)
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
if (response.status === 429 && attempt < MAX_RETRIES) {
|
|
129
|
+
const delay = await getRetryDelay(response, attempt);
|
|
130
|
+
console.log(`[browserwire-cli] rate limited, retrying in ${Math.round(delay / 1000)}s (attempt ${attempt + 1}/${MAX_RETRIES})`);
|
|
131
|
+
await sleep(delay);
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
if (!response.ok) {
|
|
136
|
+
const errorText = await response.text().catch(() => "unknown");
|
|
137
|
+
throw new Error(`LLM API error ${response.status}: ${errorText}`);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const data = await response.json();
|
|
141
|
+
return data.choices?.[0]?.message?.content || "";
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Anthropic vision call (claude-* with vision support).
|
|
147
|
+
*/
|
|
148
|
+
const callAnthropicVision = async (url, systemPrompt, screenshotBase64, textContent, config) => {
|
|
149
|
+
const headers = {
|
|
150
|
+
"Content-Type": "application/json",
|
|
151
|
+
"x-api-key": config.apiKey,
|
|
152
|
+
"anthropic-version": "2023-06-01"
|
|
153
|
+
};
|
|
154
|
+
|
|
155
|
+
const body = {
|
|
156
|
+
model: config.model,
|
|
157
|
+
max_tokens: 4096,
|
|
158
|
+
system: systemPrompt,
|
|
159
|
+
messages: [
|
|
160
|
+
{
|
|
161
|
+
role: "user",
|
|
162
|
+
content: [
|
|
163
|
+
{
|
|
164
|
+
type: "image",
|
|
165
|
+
source: {
|
|
166
|
+
type: "base64",
|
|
167
|
+
media_type: "image/jpeg",
|
|
168
|
+
data: screenshotBase64
|
|
169
|
+
}
|
|
170
|
+
},
|
|
171
|
+
{ type: "text", text: textContent }
|
|
172
|
+
]
|
|
173
|
+
}
|
|
174
|
+
]
|
|
175
|
+
};
|
|
176
|
+
|
|
177
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
178
|
+
const response = await fetch(url, {
|
|
179
|
+
method: "POST",
|
|
180
|
+
headers,
|
|
181
|
+
body: JSON.stringify(body)
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
if (response.status === 429 && attempt < MAX_RETRIES) {
|
|
185
|
+
const delay = await getRetryDelay(response, attempt);
|
|
186
|
+
console.log(`[browserwire-cli] rate limited, retrying in ${Math.round(delay / 1000)}s (attempt ${attempt + 1}/${MAX_RETRIES})`);
|
|
187
|
+
await sleep(delay);
|
|
188
|
+
continue;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
if (!response.ok) {
|
|
192
|
+
const errorText = await response.text().catch(() => "unknown");
|
|
193
|
+
throw new Error(`Anthropic API error ${response.status}: ${errorText}`);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const data = await response.json();
|
|
197
|
+
const textBlock = data.content?.find((b) => b.type === "text");
|
|
198
|
+
return textBlock?.text || "";
|
|
199
|
+
}
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Call the configured LLM with a system prompt and user message.
|
|
204
|
+
* Returns the raw text response.
|
|
205
|
+
*
|
|
206
|
+
* @param {string} systemPrompt
|
|
207
|
+
* @param {string} userMessage
|
|
208
|
+
* @param {object} config - from getLLMConfig()
|
|
209
|
+
* @returns {Promise<string>}
|
|
210
|
+
*/
|
|
211
|
+
export const callLLM = async (systemPrompt, userMessage, config) => {
|
|
212
|
+
if (!config) {
|
|
213
|
+
throw new Error("LLM not configured");
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const inputChars = systemPrompt.length + userMessage.length;
|
|
217
|
+
const approxTokens = Math.round(inputChars / 4);
|
|
218
|
+
console.log(
|
|
219
|
+
`[browserwire-cli] LLM call → ${config.provider}/${config.model} ` +
|
|
220
|
+
`(~${approxTokens} tokens, ${Math.round(inputChars / 1024)}KB input)`
|
|
221
|
+
);
|
|
222
|
+
|
|
223
|
+
const url = `${config.baseUrl}${config.path}`;
|
|
224
|
+
|
|
225
|
+
if (config.provider === "anthropic") {
|
|
226
|
+
return callAnthropic(url, systemPrompt, userMessage, config);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// OpenAI-compatible (also works for Ollama)
|
|
230
|
+
return callOpenAICompatible(url, systemPrompt, userMessage, config);
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Sleep for the given number of milliseconds.
|
|
235
|
+
*/
|
|
236
|
+
const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
|
|
237
|
+
|
|
238
|
+
/**
|
|
239
|
+
* Extract retry delay from a 429 response.
|
|
240
|
+
* Checks the Retry-After header first, then tries to parse the error body.
|
|
241
|
+
*/
|
|
242
|
+
const getRetryDelay = async (response, attempt) => {
|
|
243
|
+
// Check Retry-After header
|
|
244
|
+
const retryAfter = response.headers.get("retry-after");
|
|
245
|
+
if (retryAfter) {
|
|
246
|
+
const seconds = parseFloat(retryAfter);
|
|
247
|
+
if (!isNaN(seconds) && seconds > 0) {
|
|
248
|
+
return Math.ceil(seconds * 1000);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
// Try to parse delay hint from error body (OpenAI includes "Please try again in Xs")
|
|
253
|
+
try {
|
|
254
|
+
const text = await response.clone().text();
|
|
255
|
+
const match = text.match(/try again in ([\d.]+)s/i);
|
|
256
|
+
if (match) {
|
|
257
|
+
return Math.ceil(parseFloat(match[1]) * 1000);
|
|
258
|
+
}
|
|
259
|
+
} catch {}
|
|
260
|
+
|
|
261
|
+
// Exponential backoff fallback
|
|
262
|
+
return BASE_RETRY_DELAY_MS * 2 ** attempt;
|
|
263
|
+
};
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* OpenAI-compatible API call (also works for Ollama).
|
|
267
|
+
*/
|
|
268
|
+
const callOpenAICompatible = async (url, systemPrompt, userMessage, config) => {
|
|
269
|
+
const headers = {
|
|
270
|
+
"Content-Type": "application/json"
|
|
271
|
+
};
|
|
272
|
+
if (config.apiKey) {
|
|
273
|
+
headers["Authorization"] = `Bearer ${config.apiKey}`;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const body = {
|
|
277
|
+
model: config.model,
|
|
278
|
+
messages: [
|
|
279
|
+
{ role: "system", content: systemPrompt },
|
|
280
|
+
{ role: "user", content: userMessage }
|
|
281
|
+
],
|
|
282
|
+
temperature: 0.2,
|
|
283
|
+
response_format: { type: "json_object" }
|
|
284
|
+
};
|
|
285
|
+
|
|
286
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
287
|
+
const response = await fetch(url, {
|
|
288
|
+
method: "POST",
|
|
289
|
+
headers,
|
|
290
|
+
body: JSON.stringify(body)
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
if (response.status === 429 && attempt < MAX_RETRIES) {
|
|
294
|
+
const delay = await getRetryDelay(response, attempt);
|
|
295
|
+
console.log(`[browserwire-cli] rate limited, retrying in ${Math.round(delay / 1000)}s (attempt ${attempt + 1}/${MAX_RETRIES})`);
|
|
296
|
+
await sleep(delay);
|
|
297
|
+
continue;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (!response.ok) {
|
|
301
|
+
const errorText = await response.text().catch(() => "unknown");
|
|
302
|
+
throw new Error(`LLM API error ${response.status}: ${errorText}`);
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
const data = await response.json();
|
|
306
|
+
return data.choices?.[0]?.message?.content || "";
|
|
307
|
+
}
|
|
308
|
+
};
|
|
309
|
+
|
|
310
|
+
/**
|
|
311
|
+
* Anthropic Messages API call.
|
|
312
|
+
*/
|
|
313
|
+
const callAnthropic = async (url, systemPrompt, userMessage, config) => {
|
|
314
|
+
const headers = {
|
|
315
|
+
"Content-Type": "application/json",
|
|
316
|
+
"x-api-key": config.apiKey,
|
|
317
|
+
"anthropic-version": "2023-06-01"
|
|
318
|
+
};
|
|
319
|
+
|
|
320
|
+
const body = {
|
|
321
|
+
model: config.model,
|
|
322
|
+
max_tokens: 4096,
|
|
323
|
+
system: systemPrompt,
|
|
324
|
+
messages: [
|
|
325
|
+
{ role: "user", content: userMessage }
|
|
326
|
+
]
|
|
327
|
+
};
|
|
328
|
+
|
|
329
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
330
|
+
const response = await fetch(url, {
|
|
331
|
+
method: "POST",
|
|
332
|
+
headers,
|
|
333
|
+
body: JSON.stringify(body)
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
if (response.status === 429 && attempt < MAX_RETRIES) {
|
|
337
|
+
const delay = await getRetryDelay(response, attempt);
|
|
338
|
+
console.log(`[browserwire-cli] rate limited, retrying in ${Math.round(delay / 1000)}s (attempt ${attempt + 1}/${MAX_RETRIES})`);
|
|
339
|
+
await sleep(delay);
|
|
340
|
+
continue;
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if (!response.ok) {
|
|
344
|
+
const errorText = await response.text().catch(() => "unknown");
|
|
345
|
+
throw new Error(`Anthropic API error ${response.status}: ${errorText}`);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
const data = await response.json();
|
|
349
|
+
const textBlock = data.content?.find((b) => b.type === "text");
|
|
350
|
+
return textBlock?.text || "";
|
|
351
|
+
}
|
|
352
|
+
};
|