@awi-protocol/sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +195 -0
- package/dist/index.d.mts +382 -0
- package/dist/index.d.ts +382 -0
- package/dist/index.js +766 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +733 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +72 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,733 @@
|
|
|
1
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
2
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
3
|
+
}) : x)(function(x) {
|
|
4
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
5
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
6
|
+
});
|
|
7
|
+
|
|
8
|
+
// src/client.ts
|
|
9
|
+
import fetch from "cross-fetch";
|
|
10
|
+
var AWIError = class extends Error {
|
|
11
|
+
code;
|
|
12
|
+
statusCode;
|
|
13
|
+
details;
|
|
14
|
+
constructor(code, message, statusCode, details) {
|
|
15
|
+
super(message);
|
|
16
|
+
this.name = "AWIError";
|
|
17
|
+
this.code = code;
|
|
18
|
+
this.statusCode = statusCode;
|
|
19
|
+
this.details = details;
|
|
20
|
+
}
|
|
21
|
+
};
|
|
22
|
+
var AWIClient = class {
|
|
23
|
+
endpoint;
|
|
24
|
+
certificate;
|
|
25
|
+
timeout;
|
|
26
|
+
retries;
|
|
27
|
+
constructor(options) {
|
|
28
|
+
this.endpoint = options.endpoint.replace(/\/$/, "");
|
|
29
|
+
this.certificate = options.certificate;
|
|
30
|
+
this.timeout = options.timeout || 3e4;
|
|
31
|
+
this.retries = options.retries || 3;
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Execute a recipe in proxy mode.
|
|
35
|
+
* The server runs the browser and returns structured data.
|
|
36
|
+
*/
|
|
37
|
+
async execute(request) {
|
|
38
|
+
return this._request("/v1/execute", {
|
|
39
|
+
...request,
|
|
40
|
+
mode: "proxy"
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Get advisory blueprint for agent-side execution.
|
|
45
|
+
* Returns the recipe structure without running the browser.
|
|
46
|
+
*/
|
|
47
|
+
async getAdvisory(target) {
|
|
48
|
+
return this._request("/v1/advisory", {
|
|
49
|
+
target
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Execute in advisory mode - get blueprint then run locally.
|
|
54
|
+
* This is useful when you want to control the browser yourself.
|
|
55
|
+
*/
|
|
56
|
+
async executeAdvisory(request, localExecutor) {
|
|
57
|
+
const advisory = await this.getAdvisory(request.target);
|
|
58
|
+
if (!advisory.success || !advisory.data) {
|
|
59
|
+
return {
|
|
60
|
+
...advisory,
|
|
61
|
+
data: null
|
|
62
|
+
};
|
|
63
|
+
}
|
|
64
|
+
const startTime = Date.now();
|
|
65
|
+
try {
|
|
66
|
+
const data = await localExecutor(advisory.data, request.params);
|
|
67
|
+
const latency = Date.now() - startTime;
|
|
68
|
+
return {
|
|
69
|
+
success: true,
|
|
70
|
+
data,
|
|
71
|
+
errors: [],
|
|
72
|
+
metadata: {
|
|
73
|
+
...advisory.metadata,
|
|
74
|
+
latency_ms: latency,
|
|
75
|
+
mode: "advisory-local"
|
|
76
|
+
},
|
|
77
|
+
execution_path: ["advisory", "local-execution"],
|
|
78
|
+
axir_intent: advisory.axir_intent
|
|
79
|
+
};
|
|
80
|
+
} catch (error) {
|
|
81
|
+
return {
|
|
82
|
+
success: false,
|
|
83
|
+
data: null,
|
|
84
|
+
errors: [{
|
|
85
|
+
code: "LOCAL_EXECUTION_ERROR",
|
|
86
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
87
|
+
}],
|
|
88
|
+
metadata: advisory.metadata,
|
|
89
|
+
execution_path: ["advisory", "local-execution-failed"]
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Submit feedback on execution quality.
|
|
95
|
+
*/
|
|
96
|
+
async feedback(request) {
|
|
97
|
+
return this._request("/v1/feedback", request);
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Explore an unknown domain and generate a recipe.
|
|
101
|
+
*/
|
|
102
|
+
async explore(domain, action, resource) {
|
|
103
|
+
const target = `awi://${domain}/${resource || action}/${action}/v1`;
|
|
104
|
+
return this._request("/v1/execute", {
|
|
105
|
+
target,
|
|
106
|
+
params: {},
|
|
107
|
+
mode: "proxy",
|
|
108
|
+
options: { explore: true }
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* List supported sites in the registry.
|
|
113
|
+
*/
|
|
114
|
+
async listRegistry(options) {
|
|
115
|
+
const params = new URLSearchParams();
|
|
116
|
+
if (options?.category) params.set("category", options.category);
|
|
117
|
+
if (options?.certifiedOnly) params.set("certified_only", "true");
|
|
118
|
+
if (options?.minConfidence) params.set("min_confidence", String(options.minConfidence));
|
|
119
|
+
if (options?.search) params.set("search", options.search);
|
|
120
|
+
if (options?.limit) params.set("limit", String(options.limit));
|
|
121
|
+
return this._request(`/v1/registry?${params.toString()}`, void 0, "GET");
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Search registry.
|
|
125
|
+
*/
|
|
126
|
+
async searchRegistry(query, limit) {
|
|
127
|
+
const params = new URLSearchParams();
|
|
128
|
+
params.set("q", query);
|
|
129
|
+
if (limit) params.set("limit", String(limit));
|
|
130
|
+
return this._request(`/v1/registry/search?${params.toString()}`, void 0, "GET");
|
|
131
|
+
}
|
|
132
|
+
/**
|
|
133
|
+
* Delegate execution to another agent.
|
|
134
|
+
*/
|
|
135
|
+
async delegate(request) {
|
|
136
|
+
return this._request("/v1/delegate", request);
|
|
137
|
+
}
|
|
138
|
+
/**
|
|
139
|
+
* Join a multi-agent session.
|
|
140
|
+
*/
|
|
141
|
+
async joinSession(sessionId) {
|
|
142
|
+
return this._request("/v1/session/join", { session_id: sessionId });
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Check server health.
|
|
146
|
+
*/
|
|
147
|
+
async health() {
|
|
148
|
+
const response = await fetch(`${this.endpoint}/health`, {
|
|
149
|
+
method: "GET",
|
|
150
|
+
headers: {
|
|
151
|
+
"Accept": "application/json"
|
|
152
|
+
}
|
|
153
|
+
});
|
|
154
|
+
if (!response.ok) {
|
|
155
|
+
throw new AWIError("HEALTH_CHECK_FAILED", "Server health check failed", response.status);
|
|
156
|
+
}
|
|
157
|
+
return response.json();
|
|
158
|
+
}
|
|
159
|
+
/**
|
|
160
|
+
* Get execution metrics from a response.
|
|
161
|
+
*/
|
|
162
|
+
getMetrics(response) {
|
|
163
|
+
return {
|
|
164
|
+
latency_ms: response.metadata?.latency_ms || 0,
|
|
165
|
+
fallback_count: response.metadata?.fallback_count || 0,
|
|
166
|
+
selectors_used: response.metadata?.selectors_used || [],
|
|
167
|
+
cache_status: response.metadata?.cache_status || "miss"
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
async _request(path2, body, method = "POST") {
|
|
171
|
+
const url = `${this.endpoint}${path2}`;
|
|
172
|
+
const headers = {
|
|
173
|
+
"Content-Type": "application/json",
|
|
174
|
+
"Accept": "application/json",
|
|
175
|
+
"AWI-Agent-Certificate": this.certificate
|
|
176
|
+
};
|
|
177
|
+
let lastError;
|
|
178
|
+
for (let attempt = 0; attempt < this.retries; attempt++) {
|
|
179
|
+
try {
|
|
180
|
+
const controller = new AbortController();
|
|
181
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
182
|
+
const response = await fetch(url, {
|
|
183
|
+
method,
|
|
184
|
+
headers,
|
|
185
|
+
body: body ? JSON.stringify(body) : void 0,
|
|
186
|
+
signal: controller.signal
|
|
187
|
+
});
|
|
188
|
+
clearTimeout(timeoutId);
|
|
189
|
+
if (!response.ok) {
|
|
190
|
+
const errorData = await response.json().catch(() => ({}));
|
|
191
|
+
throw new AWIError(
|
|
192
|
+
errorData.errors?.[0]?.code || "REQUEST_FAILED",
|
|
193
|
+
errorData.errors?.[0]?.message || `HTTP ${response.status}`,
|
|
194
|
+
response.status,
|
|
195
|
+
errorData
|
|
196
|
+
);
|
|
197
|
+
}
|
|
198
|
+
return response.json();
|
|
199
|
+
} catch (error) {
|
|
200
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
201
|
+
if (error instanceof AWIError && error.statusCode >= 400 && error.statusCode < 500) {
|
|
202
|
+
throw error;
|
|
203
|
+
}
|
|
204
|
+
if (attempt < this.retries - 1) {
|
|
205
|
+
await new Promise((resolve) => setTimeout(resolve, Math.pow(2, attempt) * 1e3));
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
throw lastError || new AWIError("MAX_RETRIES", "Max retries exceeded", 502);
|
|
210
|
+
}
|
|
211
|
+
};
|
|
212
|
+
var client_default = AWIClient;
|
|
213
|
+
|
|
214
|
+
// src/advisory-executor.ts
|
|
215
|
+
var AdvisoryExecutor = class {
|
|
216
|
+
context;
|
|
217
|
+
metrics;
|
|
218
|
+
constructor(context) {
|
|
219
|
+
this.context = context;
|
|
220
|
+
this.metrics = {
|
|
221
|
+
latency_ms: 0,
|
|
222
|
+
fallback_count: 0,
|
|
223
|
+
selectors_used: [],
|
|
224
|
+
cache_status: "bypass"
|
|
225
|
+
};
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Execute a recipe blueprint locally.
|
|
229
|
+
*/
|
|
230
|
+
async execute(recipe, params) {
|
|
231
|
+
const startTime = Date.now();
|
|
232
|
+
const errors = [];
|
|
233
|
+
const executionPath = [];
|
|
234
|
+
try {
|
|
235
|
+
for (const step of recipe.steps) {
|
|
236
|
+
const stepResult = await this._executeStep(step, params, recipe);
|
|
237
|
+
if (stepResult.error) {
|
|
238
|
+
errors.push(stepResult.error);
|
|
239
|
+
break;
|
|
240
|
+
}
|
|
241
|
+
executionPath.push(`${step.type}:${step.name || "unnamed"}`);
|
|
242
|
+
}
|
|
243
|
+
let data = null;
|
|
244
|
+
if (errors.length === 0 && recipe.extraction) {
|
|
245
|
+
data = await this._extractData(recipe);
|
|
246
|
+
}
|
|
247
|
+
this.metrics.latency_ms = Date.now() - startTime;
|
|
248
|
+
return {
|
|
249
|
+
success: errors.length === 0,
|
|
250
|
+
data,
|
|
251
|
+
errors,
|
|
252
|
+
metrics: { ...this.metrics }
|
|
253
|
+
};
|
|
254
|
+
} catch (error) {
|
|
255
|
+
return {
|
|
256
|
+
success: false,
|
|
257
|
+
data: null,
|
|
258
|
+
errors: [{
|
|
259
|
+
code: "EXECUTION_ERROR",
|
|
260
|
+
message: error instanceof Error ? error.message : "Unknown error"
|
|
261
|
+
}],
|
|
262
|
+
metrics: { ...this.metrics }
|
|
263
|
+
};
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
async _executeStep(step, params, recipe) {
|
|
267
|
+
try {
|
|
268
|
+
switch (step.type) {
|
|
269
|
+
case "navigate":
|
|
270
|
+
if (step.url) {
|
|
271
|
+
const url = this._interpolate(step.url, params);
|
|
272
|
+
this.context.window.location.href = url;
|
|
273
|
+
}
|
|
274
|
+
break;
|
|
275
|
+
case "wait":
|
|
276
|
+
if (step.selector) {
|
|
277
|
+
const selectorSet = recipe.selectors[step.selector];
|
|
278
|
+
if (selectorSet) {
|
|
279
|
+
await this._waitForSelector(selectorSet);
|
|
280
|
+
}
|
|
281
|
+
} else {
|
|
282
|
+
await this._sleep(1e3);
|
|
283
|
+
}
|
|
284
|
+
break;
|
|
285
|
+
case "click":
|
|
286
|
+
if (step.selector) {
|
|
287
|
+
const element = this._resolveSelector(recipe.selectors[step.selector]);
|
|
288
|
+
if (element) {
|
|
289
|
+
element.click();
|
|
290
|
+
} else {
|
|
291
|
+
return { error: { code: "CLICK_FAILED", message: `Selector not found: ${step.selector}` } };
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
break;
|
|
295
|
+
case "type":
|
|
296
|
+
if (step.selector && step.value) {
|
|
297
|
+
const element = this._resolveSelector(recipe.selectors[step.selector]);
|
|
298
|
+
if (element) {
|
|
299
|
+
const value = this._interpolate(step.value, params);
|
|
300
|
+
element.value = value;
|
|
301
|
+
element.dispatchEvent(new Event("input", { bubbles: true }));
|
|
302
|
+
} else {
|
|
303
|
+
return { error: { code: "TYPE_FAILED", message: `Selector not found: ${step.selector}` } };
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
break;
|
|
307
|
+
case "scroll":
|
|
308
|
+
const amount = step.amount || 500;
|
|
309
|
+
this.context.window.scrollBy(0, amount);
|
|
310
|
+
break;
|
|
311
|
+
case "extract_list":
|
|
312
|
+
case "extract_one":
|
|
313
|
+
break;
|
|
314
|
+
}
|
|
315
|
+
return {};
|
|
316
|
+
} catch (error) {
|
|
317
|
+
return {
|
|
318
|
+
error: {
|
|
319
|
+
code: "STEP_ERROR",
|
|
320
|
+
message: error instanceof Error ? error.message : "Unknown step error"
|
|
321
|
+
}
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
async _extractData(recipe) {
|
|
326
|
+
const extraction = recipe.extraction;
|
|
327
|
+
if (!extraction) return null;
|
|
328
|
+
const containerSet = recipe.selectors[extraction.container];
|
|
329
|
+
if (!containerSet) return null;
|
|
330
|
+
const container = this._resolveSelector(containerSet);
|
|
331
|
+
if (!container) return null;
|
|
332
|
+
const items = extraction.mode === "list" ? Array.from(container.children) : [container];
|
|
333
|
+
const results = [];
|
|
334
|
+
for (const item of items) {
|
|
335
|
+
const record = {};
|
|
336
|
+
for (const field of extraction.fields) {
|
|
337
|
+
const fieldSet = recipe.selectors[field.selector];
|
|
338
|
+
if (fieldSet) {
|
|
339
|
+
const fieldElement = this._resolveSelectorWithin(item, fieldSet);
|
|
340
|
+
if (fieldElement) {
|
|
341
|
+
let value = fieldElement.textContent || "";
|
|
342
|
+
if (field.transform) {
|
|
343
|
+
value = String(this._applyTransform(value, field.transform));
|
|
344
|
+
}
|
|
345
|
+
record[field.name] = value;
|
|
346
|
+
} else {
|
|
347
|
+
record[field.name] = null;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
results.push(record);
|
|
352
|
+
}
|
|
353
|
+
return extraction.mode === "one" ? results[0] : results;
|
|
354
|
+
}
|
|
355
|
+
_resolveSelector(selectorSet) {
|
|
356
|
+
for (const selector of selectorSet.selectors) {
|
|
357
|
+
const element = this._trySelector(selector.type, selector.value);
|
|
358
|
+
if (element) {
|
|
359
|
+
this.metrics.selectors_used.push(selectorSet.name);
|
|
360
|
+
return element;
|
|
361
|
+
}
|
|
362
|
+
this.metrics.fallback_count++;
|
|
363
|
+
}
|
|
364
|
+
return null;
|
|
365
|
+
}
|
|
366
|
+
_resolveSelectorWithin(parent, selectorSet) {
|
|
367
|
+
for (const selector of selectorSet.selectors) {
|
|
368
|
+
try {
|
|
369
|
+
const element = parent.querySelector(selector.value);
|
|
370
|
+
if (element) {
|
|
371
|
+
this.metrics.selectors_used.push(selectorSet.name);
|
|
372
|
+
return element;
|
|
373
|
+
}
|
|
374
|
+
} catch {
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
return null;
|
|
378
|
+
}
|
|
379
|
+
_trySelector(type, value) {
|
|
380
|
+
try {
|
|
381
|
+
switch (type) {
|
|
382
|
+
case "css":
|
|
383
|
+
return this.context.document.querySelector(value);
|
|
384
|
+
case "semantic":
|
|
385
|
+
return this.context.document.querySelector(`[data-testid="${value}"]`);
|
|
386
|
+
case "text":
|
|
387
|
+
const xpath = `//*[contains(text(), "${value}")]`;
|
|
388
|
+
const result = this.context.document.evaluate(
|
|
389
|
+
xpath,
|
|
390
|
+
this.context.document,
|
|
391
|
+
null,
|
|
392
|
+
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
|
393
|
+
null
|
|
394
|
+
);
|
|
395
|
+
return result.singleNodeValue;
|
|
396
|
+
case "attribute":
|
|
397
|
+
return this.context.document.querySelector(`[${value}]`);
|
|
398
|
+
default:
|
|
399
|
+
return null;
|
|
400
|
+
}
|
|
401
|
+
} catch {
|
|
402
|
+
return null;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
async _waitForSelector(selectorSet, timeout = 1e4) {
|
|
406
|
+
const start = Date.now();
|
|
407
|
+
while (Date.now() - start < timeout) {
|
|
408
|
+
if (this._resolveSelector(selectorSet)) {
|
|
409
|
+
return;
|
|
410
|
+
}
|
|
411
|
+
await this._sleep(100);
|
|
412
|
+
}
|
|
413
|
+
throw new Error(`Timeout waiting for selector: ${selectorSet.name}`);
|
|
414
|
+
}
|
|
415
|
+
_interpolate(template, params) {
|
|
416
|
+
return template.replace(/\{([^}]+)\}/g, (match, key) => {
|
|
417
|
+
const value = params[key];
|
|
418
|
+
return value !== void 0 ? String(value) : match;
|
|
419
|
+
});
|
|
420
|
+
}
|
|
421
|
+
_applyTransform(value, transform) {
|
|
422
|
+
switch (transform) {
|
|
423
|
+
case "strip":
|
|
424
|
+
return value.trim();
|
|
425
|
+
case "strip_currency":
|
|
426
|
+
return value.replace(/[$€,\s]/g, "").trim();
|
|
427
|
+
case "number":
|
|
428
|
+
const digits = value.replace(/[^0-9.]/g, "");
|
|
429
|
+
return digits.includes(".") ? parseFloat(digits) : parseInt(digits, 10);
|
|
430
|
+
case "lower":
|
|
431
|
+
return value.toLowerCase();
|
|
432
|
+
case "upper":
|
|
433
|
+
return value.toUpperCase();
|
|
434
|
+
default:
|
|
435
|
+
return value;
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
_sleep(ms) {
|
|
439
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
440
|
+
}
|
|
441
|
+
};
|
|
442
|
+
|
|
443
|
+
// src/compiler/local-axir.ts
|
|
444
|
+
import * as fs from "fs";
|
|
445
|
+
import * as path from "path";
|
|
446
|
+
import * as os from "os";
|
|
447
|
+
import * as https from "https";
|
|
448
|
+
import * as http from "http";
|
|
449
|
+
|
|
450
|
+
// src/compiler/grammar/axir-schema.gbnf
|
|
451
|
+
var axir_schema_default = 'root ::= "{" ws axir-fields ws "}"\n\naxir-fields ::=\n "\\"workflow\\"" ":" workflow ws ","\n ws "\\"intents\\"" ":" intents ws ","\n ws "\\"selectors\\"" ":" selectors ws ","\n ws "\\"fields\\"" ":" fields\n ["," ws "\\"container\\"" ":" string]\n ["," ws "\\"model_used\\"" ":" string]\n ["," ws "\\"tokens_used\\"" ":" number]\n ["," ws "\\"compilation_time_ms\\"" ":" number]\n\nworkflow ::=\n "{" ws\n "\\"nodes\\"" ":" "{" ws node-list ws "}" ws ","\n ws "\\"edges\\"" ":" "[" ws edge-list ws "]" ws ","\n ws "\\"entry_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"exit_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"domain\\"" ":" string ws ","\n ws "\\"page_type\\"" ":" page-type\n ["," ws "\\"structure_hash\\"" ":" string]\n ws "}"\n\nnode-list ::= [node-pair ("," ws node-pair)*]\nnode-pair ::= string ":" "{" ws\n "\\"element_type\\"" ":" element-type ws ","\n ws "\\"semantic_role\\"" ":" string ws ","\n ws "\\"intent\\"" ":" string ws ","\n ws "\\"tag\\"" ":" string ws ","\n ws "\\"selector_candidates\\"" ":" "[" ws selector-list ws "]"\n ["," ws "\\"parent_id\\"" ":" string]\n ["," ws "\\"children_ids\\"" ":" "[" ws string-list ws "]"]\n ["," ws "\\"aria_label\\"" ":" string]\n ["," ws "\\"aria_role\\"" ":" string]\n ["," ws "\\"text_content\\"" ":" string]\n "," ws "\\"confidence\\"" ":" number\n ["," ws "\\"reasoning\\"" ":" string]\nws "}"\n\nelement-type ::=\n "\\"button\\"" | "\\"link\\"" | "\\"input\\"" | "\\"form\\"" |\n "\\"navigation\\"" | "\\"search\\"" | "\\"filter\\"" | "\\"sort\\"" |\n "\\"pagination\\"" | "\\"container\\"" | "\\"list\\"" | "\\"item\\"" |\n "\\"heading\\"" | "\\"text\\"" | "\\"image\\"" | "\\"unknown\\""\n\npage-type ::=\n "\\"landing\\"" | "\\"search\\"" | "\\"listing\\"" | "\\"detail\\"" |\n "\\"form\\"" | "\\"checkout\\"" | "\\"dashboard\\"" | "\\"unknown\\""\n\nedge-list ::= [edge ("," ws edge)*]\nedge ::= "{" ws\n "\\"from_node\\"" ":" string ws ","\n ws "\\"to_node\\"" ":" string ws ","\n ws "\\"action\\"" ":" string\n ["," ws "\\"condition\\"" ":" string]\n ["," ws "\\"probability\\"" ":" number]\nws "}"\n\nselector-list ::= [selector ("," ws selector)*]\nselector ::= "{" ws\n "\\"type\\"" ":" selector-type ws ","\n ws "\\"value\\"" ":" string ws ","\n ws "\\"priority\\"" ":" number\n ["," ws "\\"confidence\\"" ":" number]\nws "}"\n\nselector-type ::= "\\"css\\"" | "\\"semantic\\"" | "\\"text\\"" | "\\"attribute\\""\n\nintents ::= "[" ws [intent ("," ws intent)*] ws "]"\nintent ::= "{" ws\n "\\"intent\\"" ":" string ws ","\n ws "\\"action\\"" ":" string ws ","\n ws "\\"parameters\\"" ":" "[" ws [string ("," ws string)*] ws "]" ws ","\n ws "\\"context\\"" ":" string\nws "}"\n\nfields ::= "[" ws [field ("," ws field)*] ws "]"\nfield ::= "{" ws\n "\\"name\\"" ":" string ws ","\n ws "\\"selector\\"" ":" string\n ["," ws "\\"transform\\"" ":" string]\n ["," ws "\\"required\\"" ":" boolean]\nws "}"\n\nselectors ::= "{" ws [selector-pair ("," ws selector-pair)*] ws "}"\nselector-pair ::= string ":" "[" ws selector-list ws "]"\n\nstring-list ::= [string ("," ws string)*]\n\nstring ::= "\\"" char* "\\""\nchar ::= [^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F]{4})\n\nnumber ::= ["-"]? ("0" | [1-9] [0-9]*) ([.] [0-9]+)? ([eE] ["-"]? [0-9]+)?\n\nboolean ::= "true" | "false"\n\nws ::= [ \\t\\n\\r]*\n';
|
|
452
|
+
|
|
453
|
+
// src/compiler/local-axir.ts
|
|
454
|
+
var nativeAvailable = false;
|
|
455
|
+
var getLlama;
|
|
456
|
+
try {
|
|
457
|
+
const llama = __require("node-llama-cpp");
|
|
458
|
+
getLlama = llama.getLlama;
|
|
459
|
+
nativeAvailable = true;
|
|
460
|
+
} catch {
|
|
461
|
+
nativeAvailable = false;
|
|
462
|
+
}
|
|
463
|
+
var DEFAULT_MODEL_URL = "https://huggingface.co/bartowski/Phi-3-mini-128k-instruct-GGUF/resolve/main/Phi-3-mini-128k-instruct-Q4_K_M.gguf";
|
|
464
|
+
var DEFAULT_MODEL_FILENAME = "phi3-128k-q4.gguf";
|
|
465
|
+
var LocalAXIRCompiler = class {
|
|
466
|
+
modelPath;
|
|
467
|
+
modelUrl;
|
|
468
|
+
contextSize;
|
|
469
|
+
gpuLayers;
|
|
470
|
+
onDownloadProgress;
|
|
471
|
+
onStatus;
|
|
472
|
+
model = null;
|
|
473
|
+
context = null;
|
|
474
|
+
grammar = null;
|
|
475
|
+
ready = false;
|
|
476
|
+
constructor(options = {}) {
|
|
477
|
+
if (!nativeAvailable) {
|
|
478
|
+
throw new Error(
|
|
479
|
+
"Local AXIR compilation requires node-llama-cpp.\nInstall it: npm install node-llama-cpp\nNote: this package contains native C++ bindings and needs build tools.\n * macOS: Xcode Command Line Tools (xcode-select --install)\n * Linux: build-essential / gcc-c++ / python3\n * Windows: Visual Studio Build Tools or windows-build-tools npm package\nDocs: https://github.com/withcatai/node-llama-cpp#installation"
|
|
480
|
+
);
|
|
481
|
+
}
|
|
482
|
+
const cacheDir = path.join(os.homedir(), ".awi", "models");
|
|
483
|
+
this.modelPath = options.modelPath || path.join(cacheDir, DEFAULT_MODEL_FILENAME);
|
|
484
|
+
this.modelUrl = options.modelUrl || DEFAULT_MODEL_URL;
|
|
485
|
+
this.contextSize = options.contextSize || 32768;
|
|
486
|
+
this.gpuLayers = options.gpuLayers;
|
|
487
|
+
this.onDownloadProgress = options.onDownloadProgress;
|
|
488
|
+
this.onStatus = options.onStatus;
|
|
489
|
+
}
|
|
490
|
+
// -------------------------------------------------------------------------
|
|
491
|
+
// Public API
|
|
492
|
+
// -------------------------------------------------------------------------
|
|
493
|
+
async compile(domHTML, a11yTree, intent, params) {
|
|
494
|
+
await this._ensureModel();
|
|
495
|
+
await this._ensureGrammar();
|
|
496
|
+
const prompt = this._buildCompilePrompt(domHTML, a11yTree, intent, params);
|
|
497
|
+
const start = Date.now();
|
|
498
|
+
this._status("Compiling AXIR locally...");
|
|
499
|
+
const resultText = await this._complete(prompt, 4096, 0.3);
|
|
500
|
+
const parsed = JSON.parse(resultText);
|
|
501
|
+
parsed.model_used = "phi-3-mini-128k-q4-local";
|
|
502
|
+
parsed.tokens_used = this._estimateTokens(prompt, resultText);
|
|
503
|
+
parsed.compilation_time_ms = Date.now() - start;
|
|
504
|
+
this._status(`AXIR compiled in ${parsed.compilation_time_ms}ms`);
|
|
505
|
+
return parsed;
|
|
506
|
+
}
|
|
507
|
+
async heal(domHTML, brokenSelector, semanticIntent) {
|
|
508
|
+
await this._ensureModel();
|
|
509
|
+
const prompt = this._buildHealPrompt(domHTML, brokenSelector, semanticIntent);
|
|
510
|
+
const start = Date.now();
|
|
511
|
+
this._status("Healing selector locally...");
|
|
512
|
+
const resultText = await this._complete(prompt, 256, 0.1);
|
|
513
|
+
let selector;
|
|
514
|
+
let confidence = 0.85;
|
|
515
|
+
let reasoning;
|
|
516
|
+
try {
|
|
517
|
+
const parsed = JSON.parse(resultText);
|
|
518
|
+
selector = parsed.selector ?? parsed;
|
|
519
|
+
confidence = parsed.confidence ?? 0.85;
|
|
520
|
+
reasoning = parsed.reasoning;
|
|
521
|
+
} catch {
|
|
522
|
+
selector = resultText.trim().replace(/^["']|["']$/g, "");
|
|
523
|
+
}
|
|
524
|
+
this._status(`Selector healed in ${Date.now() - start}ms`);
|
|
525
|
+
return { selector, confidence, reasoning };
|
|
526
|
+
}
|
|
527
|
+
isModelCached() {
|
|
528
|
+
return fs.existsSync(this.modelPath);
|
|
529
|
+
}
|
|
530
|
+
clearCache() {
|
|
531
|
+
if (fs.existsSync(this.modelPath)) {
|
|
532
|
+
fs.unlinkSync(this.modelPath);
|
|
533
|
+
this.model = null;
|
|
534
|
+
this.context = null;
|
|
535
|
+
this.grammar = null;
|
|
536
|
+
this.ready = false;
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
// -------------------------------------------------------------------------
|
|
540
|
+
// Model lifecycle
|
|
541
|
+
// -------------------------------------------------------------------------
|
|
542
|
+
async _ensureModel() {
|
|
543
|
+
if (this.ready) return;
|
|
544
|
+
if (!fs.existsSync(this.modelPath)) {
|
|
545
|
+
await this._downloadModel();
|
|
546
|
+
}
|
|
547
|
+
this._status("Loading local model...");
|
|
548
|
+
const llama = await getLlama();
|
|
549
|
+
const gpuLayers = this.gpuLayers ?? this._autoDetectGPULayers();
|
|
550
|
+
this.model = new llama.LlamaModel({
|
|
551
|
+
modelPath: this.modelPath,
|
|
552
|
+
gpuLayers
|
|
553
|
+
});
|
|
554
|
+
this.context = await this.model.createContext({
|
|
555
|
+
contextSize: this.contextSize
|
|
556
|
+
});
|
|
557
|
+
this.ready = true;
|
|
558
|
+
this._status("Local model ready.");
|
|
559
|
+
}
|
|
560
|
+
async _ensureGrammar() {
|
|
561
|
+
if (this.grammar) return;
|
|
562
|
+
const llama = await getLlama();
|
|
563
|
+
this.grammar = new llama.LlamaGrammar({
|
|
564
|
+
grammar: axir_schema_default
|
|
565
|
+
});
|
|
566
|
+
}
|
|
567
|
+
_autoDetectGPULayers() {
|
|
568
|
+
if (process.env.AWI_GPU_LAYERS) {
|
|
569
|
+
return parseInt(process.env.AWI_GPU_LAYERS, 10);
|
|
570
|
+
}
|
|
571
|
+
return 0;
|
|
572
|
+
}
|
|
573
|
+
// -------------------------------------------------------------------------
|
|
574
|
+
// Resumable model download
|
|
575
|
+
// -------------------------------------------------------------------------
|
|
576
|
+
async _downloadModel() {
|
|
577
|
+
const dir = path.dirname(this.modelPath);
|
|
578
|
+
if (!fs.existsSync(dir)) {
|
|
579
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
580
|
+
}
|
|
581
|
+
const tempPath = `${this.modelPath}.tmp`;
|
|
582
|
+
const urlObj = new URL(this.modelUrl);
|
|
583
|
+
const protocol = urlObj.protocol === "https:" ? https : http;
|
|
584
|
+
let startByte = 0;
|
|
585
|
+
if (fs.existsSync(tempPath)) {
|
|
586
|
+
startByte = fs.statSync(tempPath).size;
|
|
587
|
+
this._status(`Resuming download from ${startByte} bytes...`);
|
|
588
|
+
} else {
|
|
589
|
+
this._status("Downloading local compiler model (2.7GB, one-time)...");
|
|
590
|
+
}
|
|
591
|
+
return new Promise((resolve, reject) => {
|
|
592
|
+
const headers = {
|
|
593
|
+
"User-Agent": "AWI-SDK/1.0"
|
|
594
|
+
};
|
|
595
|
+
if (startByte > 0) {
|
|
596
|
+
headers["Range"] = `bytes=${startByte}-`;
|
|
597
|
+
}
|
|
598
|
+
const request = protocol.get(
|
|
599
|
+
urlObj,
|
|
600
|
+
{ headers },
|
|
601
|
+
(response) => {
|
|
602
|
+
if (response.statusCode === 301 || response.statusCode === 302) {
|
|
603
|
+
if (response.headers.location) {
|
|
604
|
+
this.modelUrl = response.headers.location;
|
|
605
|
+
return this._downloadModel().then(resolve).catch(reject);
|
|
606
|
+
}
|
|
607
|
+
}
|
|
608
|
+
if (response.statusCode !== 200 && response.statusCode !== 206) {
|
|
609
|
+
return reject(
|
|
610
|
+
new Error(`Model download failed: HTTP ${response.statusCode}`)
|
|
611
|
+
);
|
|
612
|
+
}
|
|
613
|
+
const total = parseInt(
|
|
614
|
+
response.headers["content-length"] || "0",
|
|
615
|
+
10
|
|
616
|
+
);
|
|
617
|
+
const append = startByte > 0 && response.statusCode === 206;
|
|
618
|
+
const file = fs.createWriteStream(tempPath, { flags: append ? "a" : "w" });
|
|
619
|
+
let downloaded = startByte;
|
|
620
|
+
response.on("data", (chunk) => {
|
|
621
|
+
downloaded += chunk.length;
|
|
622
|
+
this.onDownloadProgress?.(downloaded, total + startByte);
|
|
623
|
+
});
|
|
624
|
+
response.pipe(file);
|
|
625
|
+
file.on("finish", () => {
|
|
626
|
+
file.close();
|
|
627
|
+
fs.renameSync(tempPath, this.modelPath);
|
|
628
|
+
this._status("Model download complete.");
|
|
629
|
+
resolve();
|
|
630
|
+
});
|
|
631
|
+
file.on("error", (err) => {
|
|
632
|
+
fs.unlinkSync(tempPath);
|
|
633
|
+
reject(err);
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
);
|
|
637
|
+
request.on("error", reject);
|
|
638
|
+
request.setTimeout(3e5, () => {
|
|
639
|
+
request.destroy();
|
|
640
|
+
reject(new Error("Model download timed out after 5 minutes"));
|
|
641
|
+
});
|
|
642
|
+
});
|
|
643
|
+
}
|
|
644
|
+
// -------------------------------------------------------------------------
|
|
645
|
+
// Inference
|
|
646
|
+
// -------------------------------------------------------------------------
|
|
647
|
+
async _complete(prompt, maxTokens, temperature) {
|
|
648
|
+
if (!this.context) throw new Error("Model not loaded");
|
|
649
|
+
const sequence = this.context.getSequence();
|
|
650
|
+
await sequence.evaluate(prompt);
|
|
651
|
+
const response = await sequence.generateResponse(maxTokens, {
|
|
652
|
+
temperature,
|
|
653
|
+
grammar: this.grammar
|
|
654
|
+
});
|
|
655
|
+
let text = "";
|
|
656
|
+
for await (const token of response) {
|
|
657
|
+
text += token;
|
|
658
|
+
}
|
|
659
|
+
return text;
|
|
660
|
+
}
|
|
661
|
+
// -------------------------------------------------------------------------
|
|
662
|
+
// Prompt builders
|
|
663
|
+
// -------------------------------------------------------------------------
|
|
664
|
+
_buildCompilePrompt(domHTML, a11yTree, intent, params) {
|
|
665
|
+
const paramsJson = params ? JSON.stringify(params, null, 2) : "{}";
|
|
666
|
+
return [
|
|
667
|
+
`<|system|>`,
|
|
668
|
+
`You are an expert web-scraping analyst. Your job is to read a simplified DOM and accessibility tree, then output a structured JSON object describing the page layout, interactive elements, and data extraction plan.`,
|
|
669
|
+
``,
|
|
670
|
+
`Output MUST be valid JSON matching this schema:`,
|
|
671
|
+
`- workflow.nodes: map of node_id -> {element_type, semantic_role, intent, tag, selector_candidates[], parent_id?, children_ids?, aria_label?, aria_role?, text_content?, confidence, reasoning?}`,
|
|
672
|
+
`- workflow.edges: list of {from_node, to_node, action, condition?, probability}`,
|
|
673
|
+
`- workflow.entry_points: list of starting node_ids`,
|
|
674
|
+
`- workflow.exit_points: list of terminal node_ids`,
|
|
675
|
+
`- workflow.domain: the domain name`,
|
|
676
|
+
`- workflow.page_type: one of landing|search|listing|detail|form|checkout|dashboard|unknown`,
|
|
677
|
+
`- intents: list of {intent, action, parameters[], context}`,
|
|
678
|
+
`- selectors: map of selector_name -> list of {type, value, priority}`,
|
|
679
|
+
`- fields: list of {name, selector, transform?, required}`,
|
|
680
|
+
`- container?: string (optional container selector name)`,
|
|
681
|
+
``,
|
|
682
|
+
`Element types: button, link, input, form, navigation, search, filter, sort, pagination, container, list, item, heading, text, image, unknown.`,
|
|
683
|
+
`Selector types: css, semantic, text, attribute.`,
|
|
684
|
+
`<|user|>`,
|
|
685
|
+
`Intent: ${intent}`,
|
|
686
|
+
`Parameters: ${paramsJson}`,
|
|
687
|
+
``,
|
|
688
|
+
`Simplified DOM:`,
|
|
689
|
+
`${this._truncate(domHTML, 4e4)}`,
|
|
690
|
+
``,
|
|
691
|
+
`Accessibility Tree:`,
|
|
692
|
+
`${this._truncate(a11yTree, 8e3)}`,
|
|
693
|
+
``,
|
|
694
|
+
`Compile AXIR:`,
|
|
695
|
+
`<|assistant|>`
|
|
696
|
+
].join("\n");
|
|
697
|
+
}
|
|
698
|
+
_buildHealPrompt(domHTML, brokenSelector, semanticIntent) {
|
|
699
|
+
return [
|
|
700
|
+
`<|system|>`,
|
|
701
|
+
`You are a CSS selector repair tool. Given a broken selector and the current DOM, output the new CSS selector that targets the same semantic element.`,
|
|
702
|
+
``,
|
|
703
|
+
`Output JSON: {"selector": "...", "confidence": 0.0-1.0, "reasoning": "..."}`,
|
|
704
|
+
`<|user|>`,
|
|
705
|
+
`Broken selector: ${brokenSelector}`,
|
|
706
|
+
`Semantic intent: ${semanticIntent}`,
|
|
707
|
+
``,
|
|
708
|
+
`Current DOM (truncated):`,
|
|
709
|
+
`${this._truncate(domHTML, 2e4)}`,
|
|
710
|
+
``,
|
|
711
|
+
`New selector:`,
|
|
712
|
+
`<|assistant|>`
|
|
713
|
+
].join("\n");
|
|
714
|
+
}
|
|
715
|
+
_truncate(text, maxChars) {
|
|
716
|
+
if (text.length <= maxChars) return text;
|
|
717
|
+
return text.slice(0, maxChars) + "\n[...truncated...]";
|
|
718
|
+
}
|
|
719
|
+
_estimateTokens(prompt, response) {
|
|
720
|
+
return Math.ceil((prompt.length + response.length) / 4);
|
|
721
|
+
}
|
|
722
|
+
_status(message) {
|
|
723
|
+
this.onStatus?.(message);
|
|
724
|
+
}
|
|
725
|
+
};
|
|
726
|
+
export {
|
|
727
|
+
AWIClient,
|
|
728
|
+
AWIError,
|
|
729
|
+
AdvisoryExecutor,
|
|
730
|
+
LocalAXIRCompiler,
|
|
731
|
+
client_default as default
|
|
732
|
+
};
|
|
733
|
+
//# sourceMappingURL=index.mjs.map
|