@awi-protocol/sdk 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +37 -9
- package/dist/index.d.ts +37 -9
- package/dist/index.js +364 -89
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +363 -95
- package/dist/index.mjs.map +1 -1
- package/package.json +29 -50
- package/src/compiler/grammar/axir-schema.gbnf +97 -0
package/dist/index.mjs
CHANGED
|
@@ -1,10 +1,3 @@
|
|
|
1
|
-
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
2
|
-
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
3
|
-
}) : x)(function(x) {
|
|
4
|
-
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
5
|
-
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
6
|
-
});
|
|
7
|
-
|
|
8
1
|
// src/client.ts
|
|
9
2
|
import fetch from "cross-fetch";
|
|
10
3
|
var AWIError = class extends Error {
|
|
@@ -209,7 +202,6 @@ var AWIClient = class {
|
|
|
209
202
|
throw lastError || new AWIError("MAX_RETRIES", "Max retries exceeded", 502);
|
|
210
203
|
}
|
|
211
204
|
};
|
|
212
|
-
var client_default = AWIClient;
|
|
213
205
|
|
|
214
206
|
// src/advisory-executor.ts
|
|
215
207
|
var AdvisoryExecutor = class {
|
|
@@ -440,27 +432,312 @@ var AdvisoryExecutor = class {
|
|
|
440
432
|
}
|
|
441
433
|
};
|
|
442
434
|
|
|
435
|
+
// src/compiler/axir-compiler.ts
|
|
436
|
+
import * as cheerio from "cheerio";
|
|
437
|
+
var AXIRCompiler = class {
|
|
438
|
+
$;
|
|
439
|
+
intent;
|
|
440
|
+
params;
|
|
441
|
+
domain;
|
|
442
|
+
constructor(html, options) {
|
|
443
|
+
this.$ = cheerio.load(html);
|
|
444
|
+
this.intent = options.intent;
|
|
445
|
+
this.params = options.params || {};
|
|
446
|
+
this.domain = options.domain || "unknown";
|
|
447
|
+
}
|
|
448
|
+
compile() {
|
|
449
|
+
const start = Date.now();
|
|
450
|
+
this.simplifyDOM();
|
|
451
|
+
const regions = this.identifyRegions();
|
|
452
|
+
const target = this.routeIntent(regions);
|
|
453
|
+
return {
|
|
454
|
+
workflow: this.buildWorkflow(target, regions),
|
|
455
|
+
intents: this.mapIntents(),
|
|
456
|
+
selectors: this.generateSelectors(target),
|
|
457
|
+
fields: this.generateFields(target),
|
|
458
|
+
container: target.container,
|
|
459
|
+
model_used: "axir-deterministic-v1",
|
|
460
|
+
tokens_used: 0,
|
|
461
|
+
compilation_time_ms: Date.now() - start
|
|
462
|
+
};
|
|
463
|
+
}
|
|
464
|
+
simplifyDOM() {
|
|
465
|
+
this.$("script, style, svg, noscript, iframe, canvas, video, audio").remove();
|
|
466
|
+
for (const el of this.$("div, span").toArray()) {
|
|
467
|
+
const $el = this.$(el);
|
|
468
|
+
if ($el.children().length === 0 && $el.text().trim() === "") $el.remove();
|
|
469
|
+
}
|
|
470
|
+
this.$('[style*="display:none"], [style*="display: none"], [hidden], [aria-hidden="true"]').remove();
|
|
471
|
+
}
|
|
472
|
+
identifyRegions() {
|
|
473
|
+
const regions = [];
|
|
474
|
+
for (const el of this.$('form, [role="search"], input[type="search"]').toArray()) {
|
|
475
|
+
const r = this.analyzeSearchRegion(this.$(el));
|
|
476
|
+
if (r) regions.push(r);
|
|
477
|
+
}
|
|
478
|
+
for (const el of this.$('nav, [role="navigation"], header, .nav, .navbar, .menu').toArray()) {
|
|
479
|
+
regions.push({ type: "navigation", element: this.$(el), confidence: 0.9 });
|
|
480
|
+
}
|
|
481
|
+
for (const el of this.$('ul, ol, [role="list"], .list, .results, .items, table, [role="grid"]').toArray()) {
|
|
482
|
+
const $el = this.$(el);
|
|
483
|
+
if ($el.find('li, tr, .item, [role="listitem"]').length > 1) {
|
|
484
|
+
regions.push({ type: "listing", element: $el, confidence: 0.85 });
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
for (const el of this.$("form").toArray()) {
|
|
488
|
+
const $el = this.$(el);
|
|
489
|
+
if (!regions.some((r) => r.element.is($el))) regions.push({ type: "form", element: $el, confidence: 0.9 });
|
|
490
|
+
}
|
|
491
|
+
for (const el of this.$(".pagination, .pager, .pages").toArray()) {
|
|
492
|
+
if (this.isPagination(this.$(el))) regions.push({ type: "pagination", element: this.$(el), confidence: 0.8 });
|
|
493
|
+
}
|
|
494
|
+
for (const el of this.$('article, [role="article"], .content, .main, main, .detail').toArray()) {
|
|
495
|
+
regions.push({ type: "detail", element: this.$(el), confidence: 0.75 });
|
|
496
|
+
}
|
|
497
|
+
return regions;
|
|
498
|
+
}
|
|
499
|
+
analyzeSearchRegion($el) {
|
|
500
|
+
const hasInput = $el.find('input[type="text"], input[type="search"], input:not([type])').length > 0;
|
|
501
|
+
const hasButton = $el.find('button, input[type="submit"]').length > 0;
|
|
502
|
+
if (hasInput || hasButton) return { type: "search", element: $el, confidence: hasInput && hasButton ? 0.95 : 0.7 };
|
|
503
|
+
return null;
|
|
504
|
+
}
|
|
505
|
+
isPagination($el) {
|
|
506
|
+
const text = $el.text().toLowerCase();
|
|
507
|
+
return /\d+/.test(text) && (/next|>|\u203a|\u2192|\u00bb/.test(text) || /prev|previous|<|\u2039|\u2190|\u00ab/.test(text));
|
|
508
|
+
}
|
|
509
|
+
routeIntent(regions) {
|
|
510
|
+
const intentMap = {
|
|
511
|
+
search: ["search", "form"],
|
|
512
|
+
search_jobs: ["search", "listing", "form"],
|
|
513
|
+
extract_list: ["listing", "search", "detail"],
|
|
514
|
+
extract_detail: ["detail", "listing"],
|
|
515
|
+
fill_form: ["form", "search"],
|
|
516
|
+
navigate: ["navigation", "listing"],
|
|
517
|
+
login: ["form"],
|
|
518
|
+
filter: ["search", "listing"],
|
|
519
|
+
sort: ["listing", "search"],
|
|
520
|
+
scrape: ["listing", "detail", "search"]
|
|
521
|
+
};
|
|
522
|
+
const targetTypes = intentMap[this.intent.toLowerCase()] || ["search", "listing", "form"];
|
|
523
|
+
let best = null;
|
|
524
|
+
let bestScore = 0;
|
|
525
|
+
for (const r of regions) {
|
|
526
|
+
const match = targetTypes.indexOf(r.type);
|
|
527
|
+
const score = match >= 0 ? (targetTypes.length - match) * r.confidence : 0;
|
|
528
|
+
if (score > bestScore) {
|
|
529
|
+
bestScore = score;
|
|
530
|
+
best = r;
|
|
531
|
+
}
|
|
532
|
+
}
|
|
533
|
+
if (!best) best = this.findLargestRegion(regions);
|
|
534
|
+
return { region: best, container: this.generateContainerSelector(best.element) };
|
|
535
|
+
}
|
|
536
|
+
findLargestRegion(regions) {
|
|
537
|
+
if (regions.length === 0) return { type: "unknown", element: this.$("body"), confidence: 0.5 };
|
|
538
|
+
return regions.reduce((l, c) => c.element.find("*").length > l.element.find("*").length ? c : l);
|
|
539
|
+
}
|
|
540
|
+
buildWorkflow(_target, all) {
|
|
541
|
+
const nodes = {};
|
|
542
|
+
const edges = [];
|
|
543
|
+
const entry = [];
|
|
544
|
+
const exit = [];
|
|
545
|
+
all.forEach((r, i) => {
|
|
546
|
+
const id = `${r.type}_${i}`;
|
|
547
|
+
const raw = r.element[0];
|
|
548
|
+
nodes[id] = {
|
|
549
|
+
node_id: id,
|
|
550
|
+
element_type: this.mapType(r.type),
|
|
551
|
+
semantic_role: r.type,
|
|
552
|
+
intent: this.intent,
|
|
553
|
+
tag: raw?.tagName?.toLowerCase(),
|
|
554
|
+
selector_candidates: this.buildCandidates(r.element),
|
|
555
|
+
confidence: r.confidence
|
|
556
|
+
};
|
|
557
|
+
if (r.type === "navigation") entry.push(id);
|
|
558
|
+
if (r.type === "listing" || r.type === "detail") exit.push(id);
|
|
559
|
+
});
|
|
560
|
+
all.forEach((f, fi) => all.forEach((t, ti) => {
|
|
561
|
+
if (fi !== ti) {
|
|
562
|
+
const e = this.inferEdge(f, t, fi, ti);
|
|
563
|
+
if (e) edges.push(e);
|
|
564
|
+
}
|
|
565
|
+
}));
|
|
566
|
+
if (entry.length === 0 && Object.keys(nodes).length > 0) entry.push(Object.keys(nodes)[0]);
|
|
567
|
+
return { nodes, edges, entry_points: entry, exit_points: exit, domain: this.domain, page_type: this.inferPageType(all) };
|
|
568
|
+
}
|
|
569
|
+
mapType(t) {
|
|
570
|
+
const m = {
|
|
571
|
+
search: "search",
|
|
572
|
+
navigation: "navigation",
|
|
573
|
+
listing: "list",
|
|
574
|
+
form: "form",
|
|
575
|
+
pagination: "pagination",
|
|
576
|
+
detail: "container"
|
|
577
|
+
};
|
|
578
|
+
return m[t] || "unknown";
|
|
579
|
+
}
|
|
580
|
+
inferEdge(f, t, fi, ti) {
|
|
581
|
+
if (f.type === "search" && t.type === "listing") return { from_node: `search_${fi}`, to_node: `listing_${ti}`, action: "submit_search", probability: 0.9 };
|
|
582
|
+
if (f.type === "navigation" && t.type === "search") return { from_node: `navigation_${fi}`, to_node: `search_${ti}`, action: "navigate_to_search", probability: 0.7 };
|
|
583
|
+
if (f.type === "listing" && t.type === "pagination") return { from_node: `listing_${fi}`, to_node: `pagination_${ti}`, action: "next_page", probability: 0.8 };
|
|
584
|
+
if (f.type === "pagination" && t.type === "listing") return { from_node: `pagination_${fi}`, to_node: `listing_${ti}`, action: "load_results", probability: 0.95 };
|
|
585
|
+
return null;
|
|
586
|
+
}
|
|
587
|
+
inferPageType(regions) {
|
|
588
|
+
const t = regions.map((r) => r.type);
|
|
589
|
+
if (t.includes("search") && t.includes("listing")) return "search";
|
|
590
|
+
if (t.includes("listing")) return "listing";
|
|
591
|
+
if (t.includes("form")) return "form";
|
|
592
|
+
if (t.includes("search")) return "search";
|
|
593
|
+
if (t.includes("navigation")) return "landing";
|
|
594
|
+
return "unknown";
|
|
595
|
+
}
|
|
596
|
+
generateSelectors(target) {
|
|
597
|
+
const s = {};
|
|
598
|
+
const $el = target.region.element;
|
|
599
|
+
s.container = this.buildCandidates($el);
|
|
600
|
+
for (const el of $el.find("input, textarea, select").toArray()) {
|
|
601
|
+
const n = this.inferFieldName(this.$(el));
|
|
602
|
+
if (n) s[n] = this.buildCandidates(this.$(el));
|
|
603
|
+
}
|
|
604
|
+
for (const el of $el.find('button, input[type="submit"], input[type="button"]').toArray()) {
|
|
605
|
+
const $btn = this.$(el);
|
|
606
|
+
const label = $btn.text().trim() || String($btn.val() || "button");
|
|
607
|
+
s[`btn_${this.slugify(label)}`] = this.buildCandidates($btn);
|
|
608
|
+
}
|
|
609
|
+
for (const el of $el.find("a").toArray()) {
|
|
610
|
+
const $a = this.$(el);
|
|
611
|
+
const t = $a.text().trim();
|
|
612
|
+
if (t && t.length < 50) s[`link_${this.slugify(t)}`] = this.buildCandidates($a);
|
|
613
|
+
}
|
|
614
|
+
return s;
|
|
615
|
+
}
|
|
616
|
+
buildCandidates($el) {
|
|
617
|
+
const c = [];
|
|
618
|
+
const el = $el[0];
|
|
619
|
+
if (!el) return c;
|
|
620
|
+
const id = $el.attr("id");
|
|
621
|
+
if (id && !id.match(/^\d/)) c.push({ type: "css", value: `#${this.escape(id)}`, priority: 1, confidence: 0.99 });
|
|
622
|
+
const classes = ($el.attr("class") || "").split(/\s+/).filter((x) => x && !x.match(/^js-|^ng-|^vue-|^data-/));
|
|
623
|
+
if (classes.length) c.push({ type: "css", value: `.${classes.map((x) => this.escape(x)).join(".")}`, priority: 2, confidence: 0.85 });
|
|
624
|
+
const raw = el;
|
|
625
|
+
const tag = raw.tagName?.toLowerCase() || "";
|
|
626
|
+
const name = $el.attr("name");
|
|
627
|
+
const type = $el.attr("type");
|
|
628
|
+
const placeholder = $el.attr("placeholder");
|
|
629
|
+
if (name) c.push({ type: "css", value: `${tag}[name="${this.q(name)}"]`, priority: 3, confidence: 0.9 });
|
|
630
|
+
if (type) c.push({ type: "css", value: `${tag}[type="${type}"]`, priority: 4, confidence: 0.8 });
|
|
631
|
+
if (placeholder) c.push({ type: "css", value: `${tag}[placeholder="${this.q(placeholder)}"]`, priority: 5, confidence: 0.75 });
|
|
632
|
+
const role = $el.attr("role");
|
|
633
|
+
if (role) c.push({ type: "semantic", value: `[role="${role}"]`, priority: 6, confidence: 0.9 });
|
|
634
|
+
const al = $el.attr("aria-label");
|
|
635
|
+
if (al) c.push({ type: "semantic", value: `[aria-label="${this.q(al)}"]`, priority: 7, confidence: 0.85 });
|
|
636
|
+
const text = $el.text().trim();
|
|
637
|
+
if (text && text.length < 100) c.push({ type: "text", value: text, priority: 8, confidence: 0.7 });
|
|
638
|
+
return c;
|
|
639
|
+
}
|
|
640
|
+
generateFields(target) {
|
|
641
|
+
const f = [];
|
|
642
|
+
for (const el of target.region.element.find("input, textarea, select").toArray()) {
|
|
643
|
+
const $el = this.$(el);
|
|
644
|
+
const name = this.inferFieldName($el);
|
|
645
|
+
if (!name) continue;
|
|
646
|
+
f.push({
|
|
647
|
+
name,
|
|
648
|
+
selector: this.bestSelector($el),
|
|
649
|
+
transform: this.inferTransform($el),
|
|
650
|
+
required: $el.attr("required") !== void 0
|
|
651
|
+
});
|
|
652
|
+
}
|
|
653
|
+
return f;
|
|
654
|
+
}
|
|
655
|
+
inferFieldName($el) {
|
|
656
|
+
const id = $el.attr("id");
|
|
657
|
+
if (id) {
|
|
658
|
+
const $l = this.$(`label[for="${id}"]`);
|
|
659
|
+
if ($l.length) return this.slugify($l.text());
|
|
660
|
+
}
|
|
661
|
+
const ph = $el.attr("placeholder");
|
|
662
|
+
if (ph) return this.slugify(ph);
|
|
663
|
+
const al = $el.attr("aria-label");
|
|
664
|
+
if (al) return this.slugify(al);
|
|
665
|
+
const n = $el.attr("name");
|
|
666
|
+
if (n) return this.slugify(n);
|
|
667
|
+
return null;
|
|
668
|
+
}
|
|
669
|
+
inferTransform($el) {
|
|
670
|
+
const t = $el.attr("type");
|
|
671
|
+
if (t === "number") return "number";
|
|
672
|
+
if (t === "email") return "email";
|
|
673
|
+
if (t === "date") return "date";
|
|
674
|
+
if (t === "checkbox") return "boolean";
|
|
675
|
+
if ($el.is("select")) return "select";
|
|
676
|
+
return void 0;
|
|
677
|
+
}
|
|
678
|
+
bestSelector($el) {
|
|
679
|
+
const c = this.buildCandidates($el);
|
|
680
|
+
if (c.length) return c[0].value;
|
|
681
|
+
const raw = $el[0];
|
|
682
|
+
return raw?.tagName?.toLowerCase() || "*";
|
|
683
|
+
}
|
|
684
|
+
generateContainerSelector($el) {
|
|
685
|
+
const c = this.buildCandidates($el);
|
|
686
|
+
return c.length ? c[0].value : "body";
|
|
687
|
+
}
|
|
688
|
+
mapIntents() {
|
|
689
|
+
const m = {
|
|
690
|
+
search: { intent: "search", action: "fill_and_submit", parameters: ["query", "location", "filters"], context: "Enter search terms and submit form" },
|
|
691
|
+
search_jobs: { intent: "search_jobs", action: "fill_and_submit", parameters: ["query", "location", "experience_level", "job_type"], context: "Search for job listings with optional filters" },
|
|
692
|
+
extract_list: { intent: "extract_list", action: "extract_fields", parameters: ["items", "title", "url", "metadata"], context: "Extract structured data from list items" },
|
|
693
|
+
extract_detail: { intent: "extract_detail", action: "extract_fields", parameters: ["title", "description", "metadata", "links"], context: "Extract structured data from detail page" },
|
|
694
|
+
fill_form: { intent: "fill_form", action: "fill_and_submit", parameters: Object.keys(this.params), context: "Fill form fields with provided parameters" },
|
|
695
|
+
navigate: { intent: "navigate", action: "click", parameters: ["target_url", "link_text"], context: "Click navigation link to target page" },
|
|
696
|
+
login: { intent: "login", action: "fill_and_submit", parameters: ["username", "password"], context: "Enter credentials and submit login form" },
|
|
697
|
+
scrape: { intent: "scrape", action: "extract_fields", parameters: ["all_visible_text", "links", "images", "structured_data"], context: "Extract all visible content from the page" }
|
|
698
|
+
};
|
|
699
|
+
const mapped = m[this.intent.toLowerCase()];
|
|
700
|
+
if (mapped) return [mapped];
|
|
701
|
+
return [{ intent: this.intent, action: "interact", parameters: Object.keys(this.params), context: `Perform ${this.intent} on the page` }];
|
|
702
|
+
}
|
|
703
|
+
slugify(t) {
|
|
704
|
+
return t.toLowerCase().replace(/[^\w\s-]/g, "").replace(/[\s_-]+/g, "_").replace(/^_|_$/g, "").substring(0, 50);
|
|
705
|
+
}
|
|
706
|
+
escape(s) {
|
|
707
|
+
return s.replace(/([:.])/g, "\\$1");
|
|
708
|
+
}
|
|
709
|
+
q(s) {
|
|
710
|
+
return s.replace(/"/g, '\\"');
|
|
711
|
+
}
|
|
712
|
+
};
|
|
713
|
+
|
|
443
714
|
// src/compiler/local-axir.ts
|
|
444
715
|
import * as fs from "fs";
|
|
445
716
|
import * as path from "path";
|
|
446
717
|
import * as os from "os";
|
|
447
718
|
import * as https from "https";
|
|
448
719
|
import * as http from "http";
|
|
449
|
-
|
|
450
|
-
// src/compiler/grammar/axir-schema.gbnf
|
|
451
|
-
var axir_schema_default = 'root ::= "{" ws axir-fields ws "}"\n\naxir-fields ::=\n "\\"workflow\\"" ":" workflow ws ","\n ws "\\"intents\\"" ":" intents ws ","\n ws "\\"selectors\\"" ":" selectors ws ","\n ws "\\"fields\\"" ":" fields\n ["," ws "\\"container\\"" ":" string]\n ["," ws "\\"model_used\\"" ":" string]\n ["," ws "\\"tokens_used\\"" ":" number]\n ["," ws "\\"compilation_time_ms\\"" ":" number]\n\nworkflow ::=\n "{" ws\n "\\"nodes\\"" ":" "{" ws node-list ws "}" ws ","\n ws "\\"edges\\"" ":" "[" ws edge-list ws "]" ws ","\n ws "\\"entry_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"exit_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"domain\\"" ":" string ws ","\n ws "\\"page_type\\"" ":" page-type\n ["," ws "\\"structure_hash\\"" ":" string]\n ws "}"\n\nnode-list ::= [node-pair ("," ws node-pair)*]\nnode-pair ::= string ":" "{" ws\n "\\"element_type\\"" ":" element-type ws ","\n ws "\\"semantic_role\\"" ":" string ws ","\n ws "\\"intent\\"" ":" string ws ","\n ws "\\"tag\\"" ":" string ws ","\n ws "\\"selector_candidates\\"" ":" "[" ws selector-list ws "]"\n ["," ws "\\"parent_id\\"" ":" string]\n ["," ws "\\"children_ids\\"" ":" "[" ws string-list ws "]"]\n ["," ws "\\"aria_label\\"" ":" string]\n ["," ws "\\"aria_role\\"" ":" string]\n ["," ws "\\"text_content\\"" ":" string]\n "," ws "\\"confidence\\"" ":" number\n ["," ws "\\"reasoning\\"" ":" string]\nws "}"\n\nelement-type ::=\n "\\"button\\"" | "\\"link\\"" | "\\"input\\"" | "\\"form\\"" |\n "\\"navigation\\"" | "\\"search\\"" | "\\"filter\\"" | "\\"sort\\"" |\n "\\"pagination\\"" | "\\"container\\"" | "\\"list\\"" | "\\"item\\"" |\n "\\"heading\\"" | "\\"text\\"" | "\\"image\\"" | "\\"unknown\\""\n\npage-type ::=\n "\\"landing\\"" | "\\"search\\"" | "\\"listing\\"" | "\\"detail\\"" |\n "\\"form\\"" | "\\"checkout\\"" | "\\"dashboard\\"" | "\\"unknown\\""\n\nedge-list ::= [edge ("," ws edge)*]\nedge ::= "{" ws\n "\\"from_node\\"" ":" string ws ","\n ws "\\"to_node\\"" ":" string ws ","\n ws "\\"action\\"" ":" string\n ["," ws "\\"condition\\"" ":" string]\n ["," ws "\\"probability\\"" ":" number]\nws "}"\n\nselector-list ::= [selector ("," ws selector)*]\nselector ::= "{" ws\n "\\"type\\"" ":" selector-type ws ","\n ws "\\"value\\"" ":" string ws ","\n ws "\\"priority\\"" ":" number\n ["," ws "\\"confidence\\"" ":" number]\nws "}"\n\nselector-type ::= "\\"css\\"" | "\\"semantic\\"" | "\\"text\\"" | "\\"attribute\\""\n\nintents ::= "[" ws [intent ("," ws intent)*] ws "]"\nintent ::= "{" ws\n "\\"intent\\"" ":" string ws ","\n ws "\\"action\\"" ":" string ws ","\n ws "\\"parameters\\"" ":" "[" ws [string ("," ws string)*] ws "]" ws ","\n ws "\\"context\\"" ":" string\nws "}"\n\nfields ::= "[" ws [field ("," ws field)*] ws "]"\nfield ::= "{" ws\n "\\"name\\"" ":" string ws ","\n ws "\\"selector\\"" ":" string\n ["," ws "\\"transform\\"" ":" string]\n ["," ws "\\"required\\"" ":" boolean]\nws "}"\n\nselectors ::= "{" ws [selector-pair ("," ws selector-pair)*] ws "}"\nselector-pair ::= string ":" "[" ws selector-list ws "]"\n\nstring-list ::= [string ("," ws string)*]\n\nstring ::= "\\"" char* "\\""\nchar ::= [^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F]{4})\n\nnumber ::= ["-"]? ("0" | [1-9] [0-9]*) ([.] [0-9]+)? ([eE] ["-"]? [0-9]+)?\n\nboolean ::= "true" | "false"\n\nws ::= [ \\t\\n\\r]*\n';
|
|
452
|
-
|
|
453
|
-
// src/compiler/local-axir.ts
|
|
720
|
+
import { URL } from "url";
|
|
454
721
|
var nativeAvailable = false;
|
|
455
722
|
var getLlama;
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
723
|
+
var LlamaModel;
|
|
724
|
+
var LlamaContext;
|
|
725
|
+
var LlamaGrammar;
|
|
726
|
+
var llamaPromise = import("node-llama-cpp").then((m) => {
|
|
459
727
|
nativeAvailable = true;
|
|
460
|
-
|
|
728
|
+
getLlama = m.getLlama;
|
|
729
|
+
LlamaModel = m.LlamaModel;
|
|
730
|
+
LlamaContext = m.LlamaContext;
|
|
731
|
+
LlamaGrammar = m.LlamaGrammar;
|
|
732
|
+
return m;
|
|
733
|
+
}).catch((err) => {
|
|
461
734
|
nativeAvailable = false;
|
|
462
|
-
|
|
463
|
-
|
|
735
|
+
if (process.env.AWI_DEBUG) {
|
|
736
|
+
console.error("[AWI] node-llama-cpp load error:", err.message);
|
|
737
|
+
}
|
|
738
|
+
return null;
|
|
739
|
+
});
|
|
740
|
+
var DEFAULT_MODEL_URL = "https://github.com/RayAKaan/AWI/releases/download/v0.0.0-models/Phi-3-mini-128k-instruct-Q4_K_M.gguf";
|
|
464
741
|
var DEFAULT_MODEL_FILENAME = "phi3-128k-q4.gguf";
|
|
465
742
|
var LocalAXIRCompiler = class {
|
|
466
743
|
modelPath;
|
|
@@ -487,9 +764,6 @@ var LocalAXIRCompiler = class {
|
|
|
487
764
|
this.onDownloadProgress = options.onDownloadProgress;
|
|
488
765
|
this.onStatus = options.onStatus;
|
|
489
766
|
}
|
|
490
|
-
// -------------------------------------------------------------------------
|
|
491
|
-
// Public API
|
|
492
|
-
// -------------------------------------------------------------------------
|
|
493
767
|
async compile(domHTML, a11yTree, intent, params) {
|
|
494
768
|
await this._ensureModel();
|
|
495
769
|
await this._ensureGrammar();
|
|
@@ -536,22 +810,26 @@ var LocalAXIRCompiler = class {
|
|
|
536
810
|
this.ready = false;
|
|
537
811
|
}
|
|
538
812
|
}
|
|
539
|
-
// -------------------------------------------------------------------------
|
|
540
|
-
// Model lifecycle
|
|
541
|
-
// -------------------------------------------------------------------------
|
|
542
813
|
async _ensureModel() {
|
|
543
814
|
if (this.ready) return;
|
|
815
|
+
await llamaPromise;
|
|
816
|
+
if (!getLlama) {
|
|
817
|
+
throw new Error("node-llama-cpp failed to load. Is it installed?");
|
|
818
|
+
}
|
|
544
819
|
if (!fs.existsSync(this.modelPath)) {
|
|
545
820
|
await this._downloadModel();
|
|
546
821
|
}
|
|
547
822
|
this._status("Loading local model...");
|
|
548
823
|
const llama = await getLlama();
|
|
549
824
|
const gpuLayers = this.gpuLayers ?? this._autoDetectGPULayers();
|
|
550
|
-
this.model = new
|
|
825
|
+
this.model = new LlamaModel({
|
|
826
|
+
llama,
|
|
551
827
|
modelPath: this.modelPath,
|
|
552
828
|
gpuLayers
|
|
553
829
|
});
|
|
554
|
-
this.context =
|
|
830
|
+
this.context = new LlamaContext({
|
|
831
|
+
llama,
|
|
832
|
+
model: this.model,
|
|
555
833
|
contextSize: this.contextSize
|
|
556
834
|
});
|
|
557
835
|
this.ready = true;
|
|
@@ -559,9 +837,10 @@ var LocalAXIRCompiler = class {
|
|
|
559
837
|
}
|
|
560
838
|
async _ensureGrammar() {
|
|
561
839
|
if (this.grammar) return;
|
|
562
|
-
const
|
|
563
|
-
this.grammar = new
|
|
564
|
-
|
|
840
|
+
const grammarPath = path.join(__dirname, "grammar", "axir-schema.gbnf");
|
|
841
|
+
this.grammar = new LlamaGrammar({
|
|
842
|
+
llama: await getLlama(),
|
|
843
|
+
grammar: fs.readFileSync(grammarPath, "utf-8")
|
|
565
844
|
});
|
|
566
845
|
}
|
|
567
846
|
_autoDetectGPULayers() {
|
|
@@ -570,17 +849,14 @@ var LocalAXIRCompiler = class {
|
|
|
570
849
|
}
|
|
571
850
|
return 0;
|
|
572
851
|
}
|
|
573
|
-
// -------------------------------------------------------------------------
|
|
574
|
-
// Resumable model download
|
|
575
|
-
// -------------------------------------------------------------------------
|
|
576
852
|
async _downloadModel() {
|
|
577
853
|
const dir = path.dirname(this.modelPath);
|
|
578
854
|
if (!fs.existsSync(dir)) {
|
|
579
855
|
fs.mkdirSync(dir, { recursive: true });
|
|
580
856
|
}
|
|
581
857
|
const tempPath = `${this.modelPath}.tmp`;
|
|
582
|
-
const
|
|
583
|
-
const protocol =
|
|
858
|
+
const url = new URL(this.modelUrl);
|
|
859
|
+
const protocol = url.protocol === "https:" ? https : http;
|
|
584
860
|
let startByte = 0;
|
|
585
861
|
if (fs.existsSync(tempPath)) {
|
|
586
862
|
startByte = fs.statSync(tempPath).size;
|
|
@@ -596,10 +872,10 @@ var LocalAXIRCompiler = class {
|
|
|
596
872
|
headers["Range"] = `bytes=${startByte}-`;
|
|
597
873
|
}
|
|
598
874
|
const request = protocol.get(
|
|
599
|
-
|
|
875
|
+
url,
|
|
600
876
|
{ headers },
|
|
601
877
|
(response) => {
|
|
602
|
-
if (response.statusCode ===
|
|
878
|
+
if (response.statusCode === 302 || response.statusCode === 301) {
|
|
603
879
|
if (response.headers.location) {
|
|
604
880
|
this.modelUrl = response.headers.location;
|
|
605
881
|
return this._downloadModel().then(resolve).catch(reject);
|
|
@@ -607,7 +883,7 @@ var LocalAXIRCompiler = class {
|
|
|
607
883
|
}
|
|
608
884
|
if (response.statusCode !== 200 && response.statusCode !== 206) {
|
|
609
885
|
return reject(
|
|
610
|
-
new Error(`
|
|
886
|
+
new Error(`Download failed: HTTP ${response.statusCode}`)
|
|
611
887
|
);
|
|
612
888
|
}
|
|
613
889
|
const total = parseInt(
|
|
@@ -641,76 +917,69 @@ var LocalAXIRCompiler = class {
|
|
|
641
917
|
});
|
|
642
918
|
});
|
|
643
919
|
}
|
|
644
|
-
// -------------------------------------------------------------------------
|
|
645
|
-
// Inference
|
|
646
|
-
// -------------------------------------------------------------------------
|
|
647
920
|
async _complete(prompt, maxTokens, temperature) {
|
|
648
921
|
if (!this.context) throw new Error("Model not loaded");
|
|
649
922
|
const sequence = this.context.getSequence();
|
|
650
923
|
await sequence.evaluate(prompt);
|
|
651
|
-
const
|
|
924
|
+
const result = await sequence.generateResponse(maxTokens, {
|
|
652
925
|
temperature,
|
|
653
926
|
grammar: this.grammar
|
|
654
927
|
});
|
|
655
928
|
let text = "";
|
|
656
|
-
for await (const token of
|
|
929
|
+
for await (const token of result) {
|
|
657
930
|
text += token;
|
|
658
931
|
}
|
|
659
932
|
return text;
|
|
660
933
|
}
|
|
661
|
-
// -------------------------------------------------------------------------
|
|
662
|
-
// Prompt builders
|
|
663
|
-
// -------------------------------------------------------------------------
|
|
664
934
|
_buildCompilePrompt(domHTML, a11yTree, intent, params) {
|
|
665
935
|
const paramsJson = params ? JSON.stringify(params, null, 2) : "{}";
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
936
|
+
const a11y = a11yTree || "No accessibility tree available.";
|
|
937
|
+
return `|<|system|>
|
|
938
|
+
You are an expert web-scraping analyst. Your job is to read a simplified DOM and accessibility tree, then output a structured JSON object describing the page layout, interactive elements, and data extraction plan.
|
|
939
|
+
|
|
940
|
+
Output MUST be valid JSON matching this schema:
|
|
941
|
+
- workflow.nodes: map of node_id -> {element_type, semantic_role, intent, tag, selector_candidates[], parent_id?, children_ids?, aria_label?, aria_role?, text_content?, confidence, reasoning?}
|
|
942
|
+
- workflow.edges: list of {from_node, to_node, action, condition?, probability}
|
|
943
|
+
- workflow.entry_points: list of starting node_ids
|
|
944
|
+
- workflow.exit_points: list of terminal node_ids
|
|
945
|
+
- workflow.domain: the domain name
|
|
946
|
+
- workflow.page_type: one of landing|search|listing|detail|form|checkout|dashboard|unknown
|
|
947
|
+
- intents: list of {intent, action, parameters[], context}
|
|
948
|
+
- selectors: map of selector_name -> list of {type, value, priority}
|
|
949
|
+
- fields: list of {name, selector, transform?, required}
|
|
950
|
+
- container?: string (optional container selector name)
|
|
951
|
+
|
|
952
|
+
Element types: button, link, input, form, navigation, search, filter, sort, pagination, container, list, item, heading, text, image, unknown.
|
|
953
|
+
Selector types: css, semantic, text, attribute.
|
|
954
|
+
|<|user|>
|
|
955
|
+
Intent: ${intent}
|
|
956
|
+
Parameters: ${paramsJson}
|
|
957
|
+
|
|
958
|
+
Simplified DOM:
|
|
959
|
+
${this._truncate(domHTML, 4e4)}
|
|
960
|
+
|
|
961
|
+
Accessibility Tree:
|
|
962
|
+
${this._truncate(a11y, 8e3)}
|
|
963
|
+
|
|
964
|
+
Compile AXIR:
|
|
965
|
+
|<|assistant|>
|
|
966
|
+
`;
|
|
697
967
|
}
|
|
698
968
|
_buildHealPrompt(domHTML, brokenSelector, semanticIntent) {
|
|
699
|
-
return
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
].join("\n");
|
|
969
|
+
return `|<|system|>
|
|
970
|
+
You are a CSS selector repair tool. Given a broken selector and the current DOM, output the new CSS selector that targets the same semantic element.
|
|
971
|
+
|
|
972
|
+
Output JSON: {"selector": "...", "confidence": 0.0-1.0, "reasoning": "..."}
|
|
973
|
+
|<|user|>
|
|
974
|
+
Broken selector: ${brokenSelector}
|
|
975
|
+
Semantic intent: ${semanticIntent}
|
|
976
|
+
|
|
977
|
+
Current DOM (truncated):
|
|
978
|
+
${this._truncate(domHTML, 2e4)}
|
|
979
|
+
|
|
980
|
+
New selector:
|
|
981
|
+
|<|assistant|>
|
|
982
|
+
`;
|
|
714
983
|
}
|
|
715
984
|
_truncate(text, maxChars) {
|
|
716
985
|
if (text.length <= maxChars) return text;
|
|
@@ -725,9 +994,8 @@ var LocalAXIRCompiler = class {
|
|
|
725
994
|
};
|
|
726
995
|
export {
|
|
727
996
|
AWIClient,
|
|
728
|
-
|
|
997
|
+
AXIRCompiler,
|
|
729
998
|
AdvisoryExecutor,
|
|
730
|
-
LocalAXIRCompiler
|
|
731
|
-
client_default as default
|
|
999
|
+
LocalAXIRCompiler
|
|
732
1000
|
};
|
|
733
1001
|
//# sourceMappingURL=index.mjs.map
|