@awi-protocol/sdk 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -31,10 +31,9 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
33
  AWIClient: () => AWIClient,
34
- AWIError: () => AWIError,
34
+ AXIRCompiler: () => AXIRCompiler,
35
35
  AdvisoryExecutor: () => AdvisoryExecutor,
36
- LocalAXIRCompiler: () => LocalAXIRCompiler,
37
- default: () => client_default
36
+ LocalAXIRCompiler: () => LocalAXIRCompiler
38
37
  });
39
38
  module.exports = __toCommonJS(index_exports);
40
39
 
@@ -242,7 +241,6 @@ var AWIClient = class {
242
241
  throw lastError || new AWIError("MAX_RETRIES", "Max retries exceeded", 502);
243
242
  }
244
243
  };
245
- var client_default = AWIClient;
246
244
 
247
245
  // src/advisory-executor.ts
248
246
  var AdvisoryExecutor = class {
@@ -473,27 +471,312 @@ var AdvisoryExecutor = class {
473
471
  }
474
472
  };
475
473
 
474
+ // src/compiler/axir-compiler.ts
475
+ var cheerio = __toESM(require("cheerio"));
476
+ var AXIRCompiler = class {
477
+ $;
478
+ intent;
479
+ params;
480
+ domain;
481
+ constructor(html, options) {
482
+ this.$ = cheerio.load(html);
483
+ this.intent = options.intent;
484
+ this.params = options.params || {};
485
+ this.domain = options.domain || "unknown";
486
+ }
487
+ compile() {
488
+ const start = Date.now();
489
+ this.simplifyDOM();
490
+ const regions = this.identifyRegions();
491
+ const target = this.routeIntent(regions);
492
+ return {
493
+ workflow: this.buildWorkflow(target, regions),
494
+ intents: this.mapIntents(),
495
+ selectors: this.generateSelectors(target),
496
+ fields: this.generateFields(target),
497
+ container: target.container,
498
+ model_used: "axir-deterministic-v1",
499
+ tokens_used: 0,
500
+ compilation_time_ms: Date.now() - start
501
+ };
502
+ }
503
+ simplifyDOM() {
504
+ this.$("script, style, svg, noscript, iframe, canvas, video, audio").remove();
505
+ for (const el of this.$("div, span").toArray()) {
506
+ const $el = this.$(el);
507
+ if ($el.children().length === 0 && $el.text().trim() === "") $el.remove();
508
+ }
509
+ this.$('[style*="display:none"], [style*="display: none"], [hidden], [aria-hidden="true"]').remove();
510
+ }
511
+ identifyRegions() {
512
+ const regions = [];
513
+ for (const el of this.$('form, [role="search"], input[type="search"]').toArray()) {
514
+ const r = this.analyzeSearchRegion(this.$(el));
515
+ if (r) regions.push(r);
516
+ }
517
+ for (const el of this.$('nav, [role="navigation"], header, .nav, .navbar, .menu').toArray()) {
518
+ regions.push({ type: "navigation", element: this.$(el), confidence: 0.9 });
519
+ }
520
+ for (const el of this.$('ul, ol, [role="list"], .list, .results, .items, table, [role="grid"]').toArray()) {
521
+ const $el = this.$(el);
522
+ if ($el.find('li, tr, .item, [role="listitem"]').length > 1) {
523
+ regions.push({ type: "listing", element: $el, confidence: 0.85 });
524
+ }
525
+ }
526
+ for (const el of this.$("form").toArray()) {
527
+ const $el = this.$(el);
528
+ if (!regions.some((r) => r.element.is($el))) regions.push({ type: "form", element: $el, confidence: 0.9 });
529
+ }
530
+ for (const el of this.$(".pagination, .pager, .pages").toArray()) {
531
+ if (this.isPagination(this.$(el))) regions.push({ type: "pagination", element: this.$(el), confidence: 0.8 });
532
+ }
533
+ for (const el of this.$('article, [role="article"], .content, .main, main, .detail').toArray()) {
534
+ regions.push({ type: "detail", element: this.$(el), confidence: 0.75 });
535
+ }
536
+ return regions;
537
+ }
538
+ analyzeSearchRegion($el) {
539
+ const hasInput = $el.find('input[type="text"], input[type="search"], input:not([type])').length > 0;
540
+ const hasButton = $el.find('button, input[type="submit"]').length > 0;
541
+ if (hasInput || hasButton) return { type: "search", element: $el, confidence: hasInput && hasButton ? 0.95 : 0.7 };
542
+ return null;
543
+ }
544
+ isPagination($el) {
545
+ const text = $el.text().toLowerCase();
546
+ return /\d+/.test(text) && (/next|>|\u203a|\u2192|\u00bb/.test(text) || /prev|previous|<|\u2039|\u2190|\u00ab/.test(text));
547
+ }
548
+ routeIntent(regions) {
549
+ const intentMap = {
550
+ search: ["search", "form"],
551
+ search_jobs: ["search", "listing", "form"],
552
+ extract_list: ["listing", "search", "detail"],
553
+ extract_detail: ["detail", "listing"],
554
+ fill_form: ["form", "search"],
555
+ navigate: ["navigation", "listing"],
556
+ login: ["form"],
557
+ filter: ["search", "listing"],
558
+ sort: ["listing", "search"],
559
+ scrape: ["listing", "detail", "search"]
560
+ };
561
+ const targetTypes = intentMap[this.intent.toLowerCase()] || ["search", "listing", "form"];
562
+ let best = null;
563
+ let bestScore = 0;
564
+ for (const r of regions) {
565
+ const match = targetTypes.indexOf(r.type);
566
+ const score = match >= 0 ? (targetTypes.length - match) * r.confidence : 0;
567
+ if (score > bestScore) {
568
+ bestScore = score;
569
+ best = r;
570
+ }
571
+ }
572
+ if (!best) best = this.findLargestRegion(regions);
573
+ return { region: best, container: this.generateContainerSelector(best.element) };
574
+ }
575
+ findLargestRegion(regions) {
576
+ if (regions.length === 0) return { type: "unknown", element: this.$("body"), confidence: 0.5 };
577
+ return regions.reduce((l, c) => c.element.find("*").length > l.element.find("*").length ? c : l);
578
+ }
579
+ buildWorkflow(_target, all) {
580
+ const nodes = {};
581
+ const edges = [];
582
+ const entry = [];
583
+ const exit = [];
584
+ all.forEach((r, i) => {
585
+ const id = `${r.type}_${i}`;
586
+ const raw = r.element[0];
587
+ nodes[id] = {
588
+ node_id: id,
589
+ element_type: this.mapType(r.type),
590
+ semantic_role: r.type,
591
+ intent: this.intent,
592
+ tag: raw?.tagName?.toLowerCase(),
593
+ selector_candidates: this.buildCandidates(r.element),
594
+ confidence: r.confidence
595
+ };
596
+ if (r.type === "navigation") entry.push(id);
597
+ if (r.type === "listing" || r.type === "detail") exit.push(id);
598
+ });
599
+ all.forEach((f, fi) => all.forEach((t, ti) => {
600
+ if (fi !== ti) {
601
+ const e = this.inferEdge(f, t, fi, ti);
602
+ if (e) edges.push(e);
603
+ }
604
+ }));
605
+ if (entry.length === 0 && Object.keys(nodes).length > 0) entry.push(Object.keys(nodes)[0]);
606
+ return { nodes, edges, entry_points: entry, exit_points: exit, domain: this.domain, page_type: this.inferPageType(all) };
607
+ }
608
+ mapType(t) {
609
+ const m = {
610
+ search: "search",
611
+ navigation: "navigation",
612
+ listing: "list",
613
+ form: "form",
614
+ pagination: "pagination",
615
+ detail: "container"
616
+ };
617
+ return m[t] || "unknown";
618
+ }
619
+ inferEdge(f, t, fi, ti) {
620
+ if (f.type === "search" && t.type === "listing") return { from_node: `search_${fi}`, to_node: `listing_${ti}`, action: "submit_search", probability: 0.9 };
621
+ if (f.type === "navigation" && t.type === "search") return { from_node: `navigation_${fi}`, to_node: `search_${ti}`, action: "navigate_to_search", probability: 0.7 };
622
+ if (f.type === "listing" && t.type === "pagination") return { from_node: `listing_${fi}`, to_node: `pagination_${ti}`, action: "next_page", probability: 0.8 };
623
+ if (f.type === "pagination" && t.type === "listing") return { from_node: `pagination_${fi}`, to_node: `listing_${ti}`, action: "load_results", probability: 0.95 };
624
+ return null;
625
+ }
626
+ inferPageType(regions) {
627
+ const t = regions.map((r) => r.type);
628
+ if (t.includes("search") && t.includes("listing")) return "search";
629
+ if (t.includes("listing")) return "listing";
630
+ if (t.includes("form")) return "form";
631
+ if (t.includes("search")) return "search";
632
+ if (t.includes("navigation")) return "landing";
633
+ return "unknown";
634
+ }
635
+ generateSelectors(target) {
636
+ const s = {};
637
+ const $el = target.region.element;
638
+ s.container = this.buildCandidates($el);
639
+ for (const el of $el.find("input, textarea, select").toArray()) {
640
+ const n = this.inferFieldName(this.$(el));
641
+ if (n) s[n] = this.buildCandidates(this.$(el));
642
+ }
643
+ for (const el of $el.find('button, input[type="submit"], input[type="button"]').toArray()) {
644
+ const $btn = this.$(el);
645
+ const label = $btn.text().trim() || String($btn.val() || "button");
646
+ s[`btn_${this.slugify(label)}`] = this.buildCandidates($btn);
647
+ }
648
+ for (const el of $el.find("a").toArray()) {
649
+ const $a = this.$(el);
650
+ const t = $a.text().trim();
651
+ if (t && t.length < 50) s[`link_${this.slugify(t)}`] = this.buildCandidates($a);
652
+ }
653
+ return s;
654
+ }
655
+ buildCandidates($el) {
656
+ const c = [];
657
+ const el = $el[0];
658
+ if (!el) return c;
659
+ const id = $el.attr("id");
660
+ if (id && !id.match(/^\d/)) c.push({ type: "css", value: `#${this.escape(id)}`, priority: 1, confidence: 0.99 });
661
+ const classes = ($el.attr("class") || "").split(/\s+/).filter((x) => x && !x.match(/^js-|^ng-|^vue-|^data-/));
662
+ if (classes.length) c.push({ type: "css", value: `.${classes.map((x) => this.escape(x)).join(".")}`, priority: 2, confidence: 0.85 });
663
+ const raw = el;
664
+ const tag = raw.tagName?.toLowerCase() || "";
665
+ const name = $el.attr("name");
666
+ const type = $el.attr("type");
667
+ const placeholder = $el.attr("placeholder");
668
+ if (name) c.push({ type: "css", value: `${tag}[name="${this.q(name)}"]`, priority: 3, confidence: 0.9 });
669
+ if (type) c.push({ type: "css", value: `${tag}[type="${type}"]`, priority: 4, confidence: 0.8 });
670
+ if (placeholder) c.push({ type: "css", value: `${tag}[placeholder="${this.q(placeholder)}"]`, priority: 5, confidence: 0.75 });
671
+ const role = $el.attr("role");
672
+ if (role) c.push({ type: "semantic", value: `[role="${role}"]`, priority: 6, confidence: 0.9 });
673
+ const al = $el.attr("aria-label");
674
+ if (al) c.push({ type: "semantic", value: `[aria-label="${this.q(al)}"]`, priority: 7, confidence: 0.85 });
675
+ const text = $el.text().trim();
676
+ if (text && text.length < 100) c.push({ type: "text", value: text, priority: 8, confidence: 0.7 });
677
+ return c;
678
+ }
679
+ generateFields(target) {
680
+ const f = [];
681
+ for (const el of target.region.element.find("input, textarea, select").toArray()) {
682
+ const $el = this.$(el);
683
+ const name = this.inferFieldName($el);
684
+ if (!name) continue;
685
+ f.push({
686
+ name,
687
+ selector: this.bestSelector($el),
688
+ transform: this.inferTransform($el),
689
+ required: $el.attr("required") !== void 0
690
+ });
691
+ }
692
+ return f;
693
+ }
694
+ inferFieldName($el) {
695
+ const id = $el.attr("id");
696
+ if (id) {
697
+ const $l = this.$(`label[for="${id}"]`);
698
+ if ($l.length) return this.slugify($l.text());
699
+ }
700
+ const ph = $el.attr("placeholder");
701
+ if (ph) return this.slugify(ph);
702
+ const al = $el.attr("aria-label");
703
+ if (al) return this.slugify(al);
704
+ const n = $el.attr("name");
705
+ if (n) return this.slugify(n);
706
+ return null;
707
+ }
708
+ inferTransform($el) {
709
+ const t = $el.attr("type");
710
+ if (t === "number") return "number";
711
+ if (t === "email") return "email";
712
+ if (t === "date") return "date";
713
+ if (t === "checkbox") return "boolean";
714
+ if ($el.is("select")) return "select";
715
+ return void 0;
716
+ }
717
+ bestSelector($el) {
718
+ const c = this.buildCandidates($el);
719
+ if (c.length) return c[0].value;
720
+ const raw = $el[0];
721
+ return raw?.tagName?.toLowerCase() || "*";
722
+ }
723
+ generateContainerSelector($el) {
724
+ const c = this.buildCandidates($el);
725
+ return c.length ? c[0].value : "body";
726
+ }
727
+ mapIntents() {
728
+ const m = {
729
+ search: { intent: "search", action: "fill_and_submit", parameters: ["query", "location", "filters"], context: "Enter search terms and submit form" },
730
+ search_jobs: { intent: "search_jobs", action: "fill_and_submit", parameters: ["query", "location", "experience_level", "job_type"], context: "Search for job listings with optional filters" },
731
+ extract_list: { intent: "extract_list", action: "extract_fields", parameters: ["items", "title", "url", "metadata"], context: "Extract structured data from list items" },
732
+ extract_detail: { intent: "extract_detail", action: "extract_fields", parameters: ["title", "description", "metadata", "links"], context: "Extract structured data from detail page" },
733
+ fill_form: { intent: "fill_form", action: "fill_and_submit", parameters: Object.keys(this.params), context: "Fill form fields with provided parameters" },
734
+ navigate: { intent: "navigate", action: "click", parameters: ["target_url", "link_text"], context: "Click navigation link to target page" },
735
+ login: { intent: "login", action: "fill_and_submit", parameters: ["username", "password"], context: "Enter credentials and submit login form" },
736
+ scrape: { intent: "scrape", action: "extract_fields", parameters: ["all_visible_text", "links", "images", "structured_data"], context: "Extract all visible content from the page" }
737
+ };
738
+ const mapped = m[this.intent.toLowerCase()];
739
+ if (mapped) return [mapped];
740
+ return [{ intent: this.intent, action: "interact", parameters: Object.keys(this.params), context: `Perform ${this.intent} on the page` }];
741
+ }
742
+ slugify(t) {
743
+ return t.toLowerCase().replace(/[^\w\s-]/g, "").replace(/[\s_-]+/g, "_").replace(/^_|_$/g, "").substring(0, 50);
744
+ }
745
+ escape(s) {
746
+ return s.replace(/([:.])/g, "\\$1");
747
+ }
748
+ q(s) {
749
+ return s.replace(/"/g, '\\"');
750
+ }
751
+ };
752
+
476
753
  // src/compiler/local-axir.ts
477
754
  var fs = __toESM(require("fs"));
478
755
  var path = __toESM(require("path"));
479
756
  var os = __toESM(require("os"));
480
757
  var https = __toESM(require("https"));
481
758
  var http = __toESM(require("http"));
482
-
483
- // src/compiler/grammar/axir-schema.gbnf
484
- var axir_schema_default = 'root ::= "{" ws axir-fields ws "}"\n\naxir-fields ::=\n "\\"workflow\\"" ":" workflow ws ","\n ws "\\"intents\\"" ":" intents ws ","\n ws "\\"selectors\\"" ":" selectors ws ","\n ws "\\"fields\\"" ":" fields\n ["," ws "\\"container\\"" ":" string]\n ["," ws "\\"model_used\\"" ":" string]\n ["," ws "\\"tokens_used\\"" ":" number]\n ["," ws "\\"compilation_time_ms\\"" ":" number]\n\nworkflow ::=\n "{" ws\n "\\"nodes\\"" ":" "{" ws node-list ws "}" ws ","\n ws "\\"edges\\"" ":" "[" ws edge-list ws "]" ws ","\n ws "\\"entry_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"exit_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"domain\\"" ":" string ws ","\n ws "\\"page_type\\"" ":" page-type\n ["," ws "\\"structure_hash\\"" ":" string]\n ws "}"\n\nnode-list ::= [node-pair ("," ws node-pair)*]\nnode-pair ::= string ":" "{" ws\n "\\"element_type\\"" ":" element-type ws ","\n ws "\\"semantic_role\\"" ":" string ws ","\n ws "\\"intent\\"" ":" string ws ","\n ws "\\"tag\\"" ":" string ws ","\n ws "\\"selector_candidates\\"" ":" "[" ws selector-list ws "]"\n ["," ws "\\"parent_id\\"" ":" string]\n ["," ws "\\"children_ids\\"" ":" "[" ws string-list ws "]"]\n ["," ws "\\"aria_label\\"" ":" string]\n ["," ws "\\"aria_role\\"" ":" string]\n ["," ws "\\"text_content\\"" ":" string]\n "," ws "\\"confidence\\"" ":" number\n ["," ws "\\"reasoning\\"" ":" string]\nws "}"\n\nelement-type ::=\n "\\"button\\"" | "\\"link\\"" | "\\"input\\"" | "\\"form\\"" |\n "\\"navigation\\"" | "\\"search\\"" | "\\"filter\\"" | "\\"sort\\"" |\n "\\"pagination\\"" | "\\"container\\"" | "\\"list\\"" | "\\"item\\"" |\n "\\"heading\\"" | "\\"text\\"" | "\\"image\\"" | "\\"unknown\\""\n\npage-type ::=\n "\\"landing\\"" | "\\"search\\"" | "\\"listing\\"" | "\\"detail\\"" |\n "\\"form\\"" | "\\"checkout\\"" | "\\"dashboard\\"" | "\\"unknown\\""\n\nedge-list ::= [edge ("," ws edge)*]\nedge ::= "{" ws\n "\\"from_node\\"" ":" string ws ","\n ws "\\"to_node\\"" ":" string ws ","\n ws "\\"action\\"" ":" string\n ["," ws "\\"condition\\"" ":" string]\n ["," ws "\\"probability\\"" ":" number]\nws "}"\n\nselector-list ::= [selector ("," ws selector)*]\nselector ::= "{" ws\n "\\"type\\"" ":" selector-type ws ","\n ws "\\"value\\"" ":" string ws ","\n ws "\\"priority\\"" ":" number\n ["," ws "\\"confidence\\"" ":" number]\nws "}"\n\nselector-type ::= "\\"css\\"" | "\\"semantic\\"" | "\\"text\\"" | "\\"attribute\\""\n\nintents ::= "[" ws [intent ("," ws intent)*] ws "]"\nintent ::= "{" ws\n "\\"intent\\"" ":" string ws ","\n ws "\\"action\\"" ":" string ws ","\n ws "\\"parameters\\"" ":" "[" ws [string ("," ws string)*] ws "]" ws ","\n ws "\\"context\\"" ":" string\nws "}"\n\nfields ::= "[" ws [field ("," ws field)*] ws "]"\nfield ::= "{" ws\n "\\"name\\"" ":" string ws ","\n ws "\\"selector\\"" ":" string\n ["," ws "\\"transform\\"" ":" string]\n ["," ws "\\"required\\"" ":" boolean]\nws "}"\n\nselectors ::= "{" ws [selector-pair ("," ws selector-pair)*] ws "}"\nselector-pair ::= string ":" "[" ws selector-list ws "]"\n\nstring-list ::= [string ("," ws string)*]\n\nstring ::= "\\"" char* "\\""\nchar ::= [^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F]{4})\n\nnumber ::= ["-"]? ("0" | [1-9] [0-9]*) ([.] [0-9]+)? ([eE] ["-"]? [0-9]+)?\n\nboolean ::= "true" | "false"\n\nws ::= [ \\t\\n\\r]*\n';
485
-
486
- // src/compiler/local-axir.ts
759
+ var import_url = require("url");
487
760
  var nativeAvailable = false;
488
761
  var getLlama;
489
- try {
490
- const llama = require("node-llama-cpp");
491
- getLlama = llama.getLlama;
762
+ var LlamaModel;
763
+ var LlamaContext;
764
+ var LlamaGrammar;
765
+ var llamaPromise = import("node-llama-cpp").then((m) => {
492
766
  nativeAvailable = true;
493
- } catch {
767
+ getLlama = m.getLlama;
768
+ LlamaModel = m.LlamaModel;
769
+ LlamaContext = m.LlamaContext;
770
+ LlamaGrammar = m.LlamaGrammar;
771
+ return m;
772
+ }).catch((err) => {
494
773
  nativeAvailable = false;
495
- }
496
- var DEFAULT_MODEL_URL = "https://huggingface.co/bartowski/Phi-3-mini-128k-instruct-GGUF/resolve/main/Phi-3-mini-128k-instruct-Q4_K_M.gguf";
774
+ if (process.env.AWI_DEBUG) {
775
+ console.error("[AWI] node-llama-cpp load error:", err.message);
776
+ }
777
+ return null;
778
+ });
779
+ var DEFAULT_MODEL_URL = "https://github.com/RayAKaan/AWI/releases/download/v0.0.0-models/Phi-3-mini-128k-instruct-Q4_K_M.gguf";
497
780
  var DEFAULT_MODEL_FILENAME = "phi3-128k-q4.gguf";
498
781
  var LocalAXIRCompiler = class {
499
782
  modelPath;
@@ -520,14 +803,10 @@ var LocalAXIRCompiler = class {
520
803
  this.onDownloadProgress = options.onDownloadProgress;
521
804
  this.onStatus = options.onStatus;
522
805
  }
523
- // -------------------------------------------------------------------------
524
- // Public API
525
- // -------------------------------------------------------------------------
526
806
  async compile(domHTML, a11yTree, intent, params) {
527
807
  await this._ensureModel();
528
808
  await this._ensureGrammar();
529
- const a11y = a11yTree ?? "No accessibility tree available.";
530
- const prompt = this._buildCompilePrompt(domHTML, a11y, intent, params);
809
+ const prompt = this._buildCompilePrompt(domHTML, a11yTree, intent, params);
531
810
  const start = Date.now();
532
811
  this._status("Compiling AXIR locally...");
533
812
  const resultText = await this._complete(prompt, 4096, 0.3);
@@ -570,22 +849,26 @@ var LocalAXIRCompiler = class {
570
849
  this.ready = false;
571
850
  }
572
851
  }
573
- // -------------------------------------------------------------------------
574
- // Model lifecycle
575
- // -------------------------------------------------------------------------
576
852
  async _ensureModel() {
577
853
  if (this.ready) return;
854
+ await llamaPromise;
855
+ if (!getLlama) {
856
+ throw new Error("node-llama-cpp failed to load. Is it installed?");
857
+ }
578
858
  if (!fs.existsSync(this.modelPath)) {
579
859
  await this._downloadModel();
580
860
  }
581
861
  this._status("Loading local model...");
582
862
  const llama = await getLlama();
583
863
  const gpuLayers = this.gpuLayers ?? this._autoDetectGPULayers();
584
- this.model = new llama.LlamaModel({
864
+ this.model = new LlamaModel({
865
+ llama,
585
866
  modelPath: this.modelPath,
586
867
  gpuLayers
587
868
  });
588
- this.context = await this.model.createContext({
869
+ this.context = new LlamaContext({
870
+ llama,
871
+ model: this.model,
589
872
  contextSize: this.contextSize
590
873
  });
591
874
  this.ready = true;
@@ -593,9 +876,10 @@ var LocalAXIRCompiler = class {
593
876
  }
594
877
  async _ensureGrammar() {
595
878
  if (this.grammar) return;
596
- const llama = await getLlama();
597
- this.grammar = new llama.LlamaGrammar({
598
- grammar: axir_schema_default
879
+ const grammarPath = path.join(__dirname, "grammar", "axir-schema.gbnf");
880
+ this.grammar = new LlamaGrammar({
881
+ llama: await getLlama(),
882
+ grammar: fs.readFileSync(grammarPath, "utf-8")
599
883
  });
600
884
  }
601
885
  _autoDetectGPULayers() {
@@ -604,17 +888,14 @@ var LocalAXIRCompiler = class {
604
888
  }
605
889
  return 0;
606
890
  }
607
- // -------------------------------------------------------------------------
608
- // Resumable model download
609
- // -------------------------------------------------------------------------
610
891
  async _downloadModel() {
611
892
  const dir = path.dirname(this.modelPath);
612
893
  if (!fs.existsSync(dir)) {
613
894
  fs.mkdirSync(dir, { recursive: true });
614
895
  }
615
896
  const tempPath = `${this.modelPath}.tmp`;
616
- const urlObj = new URL(this.modelUrl);
617
- const protocol = urlObj.protocol === "https:" ? https : http;
897
+ const url = new import_url.URL(this.modelUrl);
898
+ const protocol = url.protocol === "https:" ? https : http;
618
899
  let startByte = 0;
619
900
  if (fs.existsSync(tempPath)) {
620
901
  startByte = fs.statSync(tempPath).size;
@@ -630,10 +911,10 @@ var LocalAXIRCompiler = class {
630
911
  headers["Range"] = `bytes=${startByte}-`;
631
912
  }
632
913
  const request = protocol.get(
633
- urlObj,
914
+ url,
634
915
  { headers },
635
916
  (response) => {
636
- if (response.statusCode === 301 || response.statusCode === 302) {
917
+ if (response.statusCode === 302 || response.statusCode === 301) {
637
918
  if (response.headers.location) {
638
919
  this.modelUrl = response.headers.location;
639
920
  return this._downloadModel().then(resolve).catch(reject);
@@ -641,7 +922,7 @@ var LocalAXIRCompiler = class {
641
922
  }
642
923
  if (response.statusCode !== 200 && response.statusCode !== 206) {
643
924
  return reject(
644
- new Error(`Model download failed: HTTP ${response.statusCode}`)
925
+ new Error(`Download failed: HTTP ${response.statusCode}`)
645
926
  );
646
927
  }
647
928
  const total = parseInt(
@@ -675,76 +956,69 @@ var LocalAXIRCompiler = class {
675
956
  });
676
957
  });
677
958
  }
678
- // -------------------------------------------------------------------------
679
- // Inference
680
- // -------------------------------------------------------------------------
681
959
  async _complete(prompt, maxTokens, temperature) {
682
960
  if (!this.context) throw new Error("Model not loaded");
683
961
  const sequence = this.context.getSequence();
684
962
  await sequence.evaluate(prompt);
685
- const response = await sequence.generateResponse(maxTokens, {
963
+ const result = await sequence.generateResponse(maxTokens, {
686
964
  temperature,
687
965
  grammar: this.grammar
688
966
  });
689
967
  let text = "";
690
- for await (const token of response) {
968
+ for await (const token of result) {
691
969
  text += token;
692
970
  }
693
971
  return text;
694
972
  }
695
- // -------------------------------------------------------------------------
696
- // Prompt builders
697
- // -------------------------------------------------------------------------
698
973
  _buildCompilePrompt(domHTML, a11yTree, intent, params) {
699
974
  const paramsJson = params ? JSON.stringify(params, null, 2) : "{}";
700
- return [
701
- `<|system|>`,
702
- `You are an expert web-scraping analyst. Your job is to read a simplified DOM and accessibility tree, then output a structured JSON object describing the page layout, interactive elements, and data extraction plan.`,
703
- ``,
704
- `Output MUST be valid JSON matching this schema:`,
705
- `- workflow.nodes: map of node_id -> {element_type, semantic_role, intent, tag, selector_candidates[], parent_id?, children_ids?, aria_label?, aria_role?, text_content?, confidence, reasoning?}`,
706
- `- workflow.edges: list of {from_node, to_node, action, condition?, probability}`,
707
- `- workflow.entry_points: list of starting node_ids`,
708
- `- workflow.exit_points: list of terminal node_ids`,
709
- `- workflow.domain: the domain name`,
710
- `- workflow.page_type: one of landing|search|listing|detail|form|checkout|dashboard|unknown`,
711
- `- intents: list of {intent, action, parameters[], context}`,
712
- `- selectors: map of selector_name -> list of {type, value, priority}`,
713
- `- fields: list of {name, selector, transform?, required}`,
714
- `- container?: string (optional container selector name)`,
715
- ``,
716
- `Element types: button, link, input, form, navigation, search, filter, sort, pagination, container, list, item, heading, text, image, unknown.`,
717
- `Selector types: css, semantic, text, attribute.`,
718
- `<|user|>`,
719
- `Intent: ${intent}`,
720
- `Parameters: ${paramsJson}`,
721
- ``,
722
- `Simplified DOM:`,
723
- `${this._truncate(domHTML, 4e4)}`,
724
- ``,
725
- `Accessibility Tree:`,
726
- `${this._truncate(a11yTree, 8e3)}`,
727
- ``,
728
- `Compile AXIR:`,
729
- `<|assistant|>`
730
- ].join("\n");
975
+ const a11y = a11yTree || "No accessibility tree available.";
976
+ return `|<|system|>
977
+ You are an expert web-scraping analyst. Your job is to read a simplified DOM and accessibility tree, then output a structured JSON object describing the page layout, interactive elements, and data extraction plan.
978
+
979
+ Output MUST be valid JSON matching this schema:
980
+ - workflow.nodes: map of node_id -> {element_type, semantic_role, intent, tag, selector_candidates[], parent_id?, children_ids?, aria_label?, aria_role?, text_content?, confidence, reasoning?}
981
+ - workflow.edges: list of {from_node, to_node, action, condition?, probability}
982
+ - workflow.entry_points: list of starting node_ids
983
+ - workflow.exit_points: list of terminal node_ids
984
+ - workflow.domain: the domain name
985
+ - workflow.page_type: one of landing|search|listing|detail|form|checkout|dashboard|unknown
986
+ - intents: list of {intent, action, parameters[], context}
987
+ - selectors: map of selector_name -> list of {type, value, priority}
988
+ - fields: list of {name, selector, transform?, required}
989
+ - container?: string (optional container selector name)
990
+
991
+ Element types: button, link, input, form, navigation, search, filter, sort, pagination, container, list, item, heading, text, image, unknown.
992
+ Selector types: css, semantic, text, attribute.
993
+ |<|user|>
994
+ Intent: ${intent}
995
+ Parameters: ${paramsJson}
996
+
997
+ Simplified DOM:
998
+ ${this._truncate(domHTML, 4e4)}
999
+
1000
+ Accessibility Tree:
1001
+ ${this._truncate(a11y, 8e3)}
1002
+
1003
+ Compile AXIR:
1004
+ |<|assistant|>
1005
+ `;
731
1006
  }
732
1007
  _buildHealPrompt(domHTML, brokenSelector, semanticIntent) {
733
- return [
734
- `<|system|>`,
735
- `You are a CSS selector repair tool. Given a broken selector and the current DOM, output the new CSS selector that targets the same semantic element.`,
736
- ``,
737
- `Output JSON: {"selector": "...", "confidence": 0.0-1.0, "reasoning": "..."}`,
738
- `<|user|>`,
739
- `Broken selector: ${brokenSelector}`,
740
- `Semantic intent: ${semanticIntent}`,
741
- ``,
742
- `Current DOM (truncated):`,
743
- `${this._truncate(domHTML, 2e4)}`,
744
- ``,
745
- `New selector:`,
746
- `<|assistant|>`
747
- ].join("\n");
1008
+ return `|<|system|>
1009
+ You are a CSS selector repair tool. Given a broken selector and the current DOM, output the new CSS selector that targets the same semantic element.
1010
+
1011
+ Output JSON: {"selector": "...", "confidence": 0.0-1.0, "reasoning": "..."}
1012
+ |<|user|>
1013
+ Broken selector: ${brokenSelector}
1014
+ Semantic intent: ${semanticIntent}
1015
+
1016
+ Current DOM (truncated):
1017
+ ${this._truncate(domHTML, 2e4)}
1018
+
1019
+ New selector:
1020
+ |<|assistant|>
1021
+ `;
748
1022
  }
749
1023
  _truncate(text, maxChars) {
750
1024
  if (text.length <= maxChars) return text;
@@ -760,7 +1034,7 @@ var LocalAXIRCompiler = class {
760
1034
  // Annotate the CommonJS export names for ESM import in node:
761
1035
  0 && (module.exports = {
762
1036
  AWIClient,
763
- AWIError,
1037
+ AXIRCompiler,
764
1038
  AdvisoryExecutor,
765
1039
  LocalAXIRCompiler
766
1040
  });