@awi-protocol/sdk 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,10 +1,3 @@
1
- var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
2
- get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
3
- }) : x)(function(x) {
4
- if (typeof require !== "undefined") return require.apply(this, arguments);
5
- throw Error('Dynamic require of "' + x + '" is not supported');
6
- });
7
-
8
1
  // src/client.ts
9
2
  import fetch from "cross-fetch";
10
3
  var AWIError = class extends Error {
@@ -209,7 +202,6 @@ var AWIClient = class {
209
202
  throw lastError || new AWIError("MAX_RETRIES", "Max retries exceeded", 502);
210
203
  }
211
204
  };
212
- var client_default = AWIClient;
213
205
 
214
206
  // src/advisory-executor.ts
215
207
  var AdvisoryExecutor = class {
@@ -440,27 +432,312 @@ var AdvisoryExecutor = class {
440
432
  }
441
433
  };
442
434
 
435
+ // src/compiler/axir-compiler.ts
436
+ import * as cheerio from "cheerio";
437
+ var AXIRCompiler = class {
438
+ $;
439
+ intent;
440
+ params;
441
+ domain;
442
+ constructor(html, options) {
443
+ this.$ = cheerio.load(html);
444
+ this.intent = options.intent;
445
+ this.params = options.params || {};
446
+ this.domain = options.domain || "unknown";
447
+ }
448
+ compile() {
449
+ const start = Date.now();
450
+ this.simplifyDOM();
451
+ const regions = this.identifyRegions();
452
+ const target = this.routeIntent(regions);
453
+ return {
454
+ workflow: this.buildWorkflow(target, regions),
455
+ intents: this.mapIntents(),
456
+ selectors: this.generateSelectors(target),
457
+ fields: this.generateFields(target),
458
+ container: target.container,
459
+ model_used: "axir-deterministic-v1",
460
+ tokens_used: 0,
461
+ compilation_time_ms: Date.now() - start
462
+ };
463
+ }
464
+ simplifyDOM() {
465
+ this.$("script, style, svg, noscript, iframe, canvas, video, audio").remove();
466
+ for (const el of this.$("div, span").toArray()) {
467
+ const $el = this.$(el);
468
+ if ($el.children().length === 0 && $el.text().trim() === "") $el.remove();
469
+ }
470
+ this.$('[style*="display:none"], [style*="display: none"], [hidden], [aria-hidden="true"]').remove();
471
+ }
472
+ identifyRegions() {
473
+ const regions = [];
474
+ for (const el of this.$('form, [role="search"], input[type="search"]').toArray()) {
475
+ const r = this.analyzeSearchRegion(this.$(el));
476
+ if (r) regions.push(r);
477
+ }
478
+ for (const el of this.$('nav, [role="navigation"], header, .nav, .navbar, .menu').toArray()) {
479
+ regions.push({ type: "navigation", element: this.$(el), confidence: 0.9 });
480
+ }
481
+ for (const el of this.$('ul, ol, [role="list"], .list, .results, .items, table, [role="grid"]').toArray()) {
482
+ const $el = this.$(el);
483
+ if ($el.find('li, tr, .item, [role="listitem"]').length > 1) {
484
+ regions.push({ type: "listing", element: $el, confidence: 0.85 });
485
+ }
486
+ }
487
+ for (const el of this.$("form").toArray()) {
488
+ const $el = this.$(el);
489
+ if (!regions.some((r) => r.element.is($el))) regions.push({ type: "form", element: $el, confidence: 0.9 });
490
+ }
491
+ for (const el of this.$(".pagination, .pager, .pages").toArray()) {
492
+ if (this.isPagination(this.$(el))) regions.push({ type: "pagination", element: this.$(el), confidence: 0.8 });
493
+ }
494
+ for (const el of this.$('article, [role="article"], .content, .main, main, .detail').toArray()) {
495
+ regions.push({ type: "detail", element: this.$(el), confidence: 0.75 });
496
+ }
497
+ return regions;
498
+ }
499
+ analyzeSearchRegion($el) {
500
+ const hasInput = $el.find('input[type="text"], input[type="search"], input:not([type])').length > 0;
501
+ const hasButton = $el.find('button, input[type="submit"]').length > 0;
502
+ if (hasInput || hasButton) return { type: "search", element: $el, confidence: hasInput && hasButton ? 0.95 : 0.7 };
503
+ return null;
504
+ }
505
+ isPagination($el) {
506
+ const text = $el.text().toLowerCase();
507
+ return /\d+/.test(text) && (/next|>|\u203a|\u2192|\u00bb/.test(text) || /prev|previous|<|\u2039|\u2190|\u00ab/.test(text));
508
+ }
509
+ routeIntent(regions) {
510
+ const intentMap = {
511
+ search: ["search", "form"],
512
+ search_jobs: ["search", "listing", "form"],
513
+ extract_list: ["listing", "search", "detail"],
514
+ extract_detail: ["detail", "listing"],
515
+ fill_form: ["form", "search"],
516
+ navigate: ["navigation", "listing"],
517
+ login: ["form"],
518
+ filter: ["search", "listing"],
519
+ sort: ["listing", "search"],
520
+ scrape: ["listing", "detail", "search"]
521
+ };
522
+ const targetTypes = intentMap[this.intent.toLowerCase()] || ["search", "listing", "form"];
523
+ let best = null;
524
+ let bestScore = 0;
525
+ for (const r of regions) {
526
+ const match = targetTypes.indexOf(r.type);
527
+ const score = match >= 0 ? (targetTypes.length - match) * r.confidence : 0;
528
+ if (score > bestScore) {
529
+ bestScore = score;
530
+ best = r;
531
+ }
532
+ }
533
+ if (!best) best = this.findLargestRegion(regions);
534
+ return { region: best, container: this.generateContainerSelector(best.element) };
535
+ }
536
+ findLargestRegion(regions) {
537
+ if (regions.length === 0) return { type: "unknown", element: this.$("body"), confidence: 0.5 };
538
+ return regions.reduce((l, c) => c.element.find("*").length > l.element.find("*").length ? c : l);
539
+ }
540
+ buildWorkflow(_target, all) {
541
+ const nodes = {};
542
+ const edges = [];
543
+ const entry = [];
544
+ const exit = [];
545
+ all.forEach((r, i) => {
546
+ const id = `${r.type}_${i}`;
547
+ const raw = r.element[0];
548
+ nodes[id] = {
549
+ node_id: id,
550
+ element_type: this.mapType(r.type),
551
+ semantic_role: r.type,
552
+ intent: this.intent,
553
+ tag: raw?.tagName?.toLowerCase(),
554
+ selector_candidates: this.buildCandidates(r.element),
555
+ confidence: r.confidence
556
+ };
557
+ if (r.type === "navigation") entry.push(id);
558
+ if (r.type === "listing" || r.type === "detail") exit.push(id);
559
+ });
560
+ all.forEach((f, fi) => all.forEach((t, ti) => {
561
+ if (fi !== ti) {
562
+ const e = this.inferEdge(f, t, fi, ti);
563
+ if (e) edges.push(e);
564
+ }
565
+ }));
566
+ if (entry.length === 0 && Object.keys(nodes).length > 0) entry.push(Object.keys(nodes)[0]);
567
+ return { nodes, edges, entry_points: entry, exit_points: exit, domain: this.domain, page_type: this.inferPageType(all) };
568
+ }
569
+ mapType(t) {
570
+ const m = {
571
+ search: "search",
572
+ navigation: "navigation",
573
+ listing: "list",
574
+ form: "form",
575
+ pagination: "pagination",
576
+ detail: "container"
577
+ };
578
+ return m[t] || "unknown";
579
+ }
580
+ inferEdge(f, t, fi, ti) {
581
+ if (f.type === "search" && t.type === "listing") return { from_node: `search_${fi}`, to_node: `listing_${ti}`, action: "submit_search", probability: 0.9 };
582
+ if (f.type === "navigation" && t.type === "search") return { from_node: `navigation_${fi}`, to_node: `search_${ti}`, action: "navigate_to_search", probability: 0.7 };
583
+ if (f.type === "listing" && t.type === "pagination") return { from_node: `listing_${fi}`, to_node: `pagination_${ti}`, action: "next_page", probability: 0.8 };
584
+ if (f.type === "pagination" && t.type === "listing") return { from_node: `pagination_${fi}`, to_node: `listing_${ti}`, action: "load_results", probability: 0.95 };
585
+ return null;
586
+ }
587
+ inferPageType(regions) {
588
+ const t = regions.map((r) => r.type);
589
+ if (t.includes("search") && t.includes("listing")) return "search";
590
+ if (t.includes("listing")) return "listing";
591
+ if (t.includes("form")) return "form";
592
+ if (t.includes("search")) return "search";
593
+ if (t.includes("navigation")) return "landing";
594
+ return "unknown";
595
+ }
596
+ generateSelectors(target) {
597
+ const s = {};
598
+ const $el = target.region.element;
599
+ s.container = this.buildCandidates($el);
600
+ for (const el of $el.find("input, textarea, select").toArray()) {
601
+ const n = this.inferFieldName(this.$(el));
602
+ if (n) s[n] = this.buildCandidates(this.$(el));
603
+ }
604
+ for (const el of $el.find('button, input[type="submit"], input[type="button"]').toArray()) {
605
+ const $btn = this.$(el);
606
+ const label = $btn.text().trim() || String($btn.val() || "button");
607
+ s[`btn_${this.slugify(label)}`] = this.buildCandidates($btn);
608
+ }
609
+ for (const el of $el.find("a").toArray()) {
610
+ const $a = this.$(el);
611
+ const t = $a.text().trim();
612
+ if (t && t.length < 50) s[`link_${this.slugify(t)}`] = this.buildCandidates($a);
613
+ }
614
+ return s;
615
+ }
616
+ buildCandidates($el) {
617
+ const c = [];
618
+ const el = $el[0];
619
+ if (!el) return c;
620
+ const id = $el.attr("id");
621
+ if (id && !id.match(/^\d/)) c.push({ type: "css", value: `#${this.escape(id)}`, priority: 1, confidence: 0.99 });
622
+ const classes = ($el.attr("class") || "").split(/\s+/).filter((x) => x && !x.match(/^js-|^ng-|^vue-|^data-/));
623
+ if (classes.length) c.push({ type: "css", value: `.${classes.map((x) => this.escape(x)).join(".")}`, priority: 2, confidence: 0.85 });
624
+ const raw = el;
625
+ const tag = raw.tagName?.toLowerCase() || "";
626
+ const name = $el.attr("name");
627
+ const type = $el.attr("type");
628
+ const placeholder = $el.attr("placeholder");
629
+ if (name) c.push({ type: "css", value: `${tag}[name="${this.q(name)}"]`, priority: 3, confidence: 0.9 });
630
+ if (type) c.push({ type: "css", value: `${tag}[type="${type}"]`, priority: 4, confidence: 0.8 });
631
+ if (placeholder) c.push({ type: "css", value: `${tag}[placeholder="${this.q(placeholder)}"]`, priority: 5, confidence: 0.75 });
632
+ const role = $el.attr("role");
633
+ if (role) c.push({ type: "semantic", value: `[role="${role}"]`, priority: 6, confidence: 0.9 });
634
+ const al = $el.attr("aria-label");
635
+ if (al) c.push({ type: "semantic", value: `[aria-label="${this.q(al)}"]`, priority: 7, confidence: 0.85 });
636
+ const text = $el.text().trim();
637
+ if (text && text.length < 100) c.push({ type: "text", value: text, priority: 8, confidence: 0.7 });
638
+ return c;
639
+ }
640
+ generateFields(target) {
641
+ const f = [];
642
+ for (const el of target.region.element.find("input, textarea, select").toArray()) {
643
+ const $el = this.$(el);
644
+ const name = this.inferFieldName($el);
645
+ if (!name) continue;
646
+ f.push({
647
+ name,
648
+ selector: this.bestSelector($el),
649
+ transform: this.inferTransform($el),
650
+ required: $el.attr("required") !== void 0
651
+ });
652
+ }
653
+ return f;
654
+ }
655
+ inferFieldName($el) {
656
+ const id = $el.attr("id");
657
+ if (id) {
658
+ const $l = this.$(`label[for="${id}"]`);
659
+ if ($l.length) return this.slugify($l.text());
660
+ }
661
+ const ph = $el.attr("placeholder");
662
+ if (ph) return this.slugify(ph);
663
+ const al = $el.attr("aria-label");
664
+ if (al) return this.slugify(al);
665
+ const n = $el.attr("name");
666
+ if (n) return this.slugify(n);
667
+ return null;
668
+ }
669
+ inferTransform($el) {
670
+ const t = $el.attr("type");
671
+ if (t === "number") return "number";
672
+ if (t === "email") return "email";
673
+ if (t === "date") return "date";
674
+ if (t === "checkbox") return "boolean";
675
+ if ($el.is("select")) return "select";
676
+ return void 0;
677
+ }
678
+ bestSelector($el) {
679
+ const c = this.buildCandidates($el);
680
+ if (c.length) return c[0].value;
681
+ const raw = $el[0];
682
+ return raw?.tagName?.toLowerCase() || "*";
683
+ }
684
+ generateContainerSelector($el) {
685
+ const c = this.buildCandidates($el);
686
+ return c.length ? c[0].value : "body";
687
+ }
688
+ mapIntents() {
689
+ const m = {
690
+ search: { intent: "search", action: "fill_and_submit", parameters: ["query", "location", "filters"], context: "Enter search terms and submit form" },
691
+ search_jobs: { intent: "search_jobs", action: "fill_and_submit", parameters: ["query", "location", "experience_level", "job_type"], context: "Search for job listings with optional filters" },
692
+ extract_list: { intent: "extract_list", action: "extract_fields", parameters: ["items", "title", "url", "metadata"], context: "Extract structured data from list items" },
693
+ extract_detail: { intent: "extract_detail", action: "extract_fields", parameters: ["title", "description", "metadata", "links"], context: "Extract structured data from detail page" },
694
+ fill_form: { intent: "fill_form", action: "fill_and_submit", parameters: Object.keys(this.params), context: "Fill form fields with provided parameters" },
695
+ navigate: { intent: "navigate", action: "click", parameters: ["target_url", "link_text"], context: "Click navigation link to target page" },
696
+ login: { intent: "login", action: "fill_and_submit", parameters: ["username", "password"], context: "Enter credentials and submit login form" },
697
+ scrape: { intent: "scrape", action: "extract_fields", parameters: ["all_visible_text", "links", "images", "structured_data"], context: "Extract all visible content from the page" }
698
+ };
699
+ const mapped = m[this.intent.toLowerCase()];
700
+ if (mapped) return [mapped];
701
+ return [{ intent: this.intent, action: "interact", parameters: Object.keys(this.params), context: `Perform ${this.intent} on the page` }];
702
+ }
703
+ slugify(t) {
704
+ return t.toLowerCase().replace(/[^\w\s-]/g, "").replace(/[\s_-]+/g, "_").replace(/^_|_$/g, "").substring(0, 50);
705
+ }
706
+ escape(s) {
707
+ return s.replace(/([:.])/g, "\\$1");
708
+ }
709
+ q(s) {
710
+ return s.replace(/"/g, '\\"');
711
+ }
712
+ };
713
+
443
714
  // src/compiler/local-axir.ts
444
715
  import * as fs from "fs";
445
716
  import * as path from "path";
446
717
  import * as os from "os";
447
718
  import * as https from "https";
448
719
  import * as http from "http";
449
-
450
- // src/compiler/grammar/axir-schema.gbnf
451
- var axir_schema_default = 'root ::= "{" ws axir-fields ws "}"\n\naxir-fields ::=\n "\\"workflow\\"" ":" workflow ws ","\n ws "\\"intents\\"" ":" intents ws ","\n ws "\\"selectors\\"" ":" selectors ws ","\n ws "\\"fields\\"" ":" fields\n ["," ws "\\"container\\"" ":" string]\n ["," ws "\\"model_used\\"" ":" string]\n ["," ws "\\"tokens_used\\"" ":" number]\n ["," ws "\\"compilation_time_ms\\"" ":" number]\n\nworkflow ::=\n "{" ws\n "\\"nodes\\"" ":" "{" ws node-list ws "}" ws ","\n ws "\\"edges\\"" ":" "[" ws edge-list ws "]" ws ","\n ws "\\"entry_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"exit_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"domain\\"" ":" string ws ","\n ws "\\"page_type\\"" ":" page-type\n ["," ws "\\"structure_hash\\"" ":" string]\n ws "}"\n\nnode-list ::= [node-pair ("," ws node-pair)*]\nnode-pair ::= string ":" "{" ws\n "\\"element_type\\"" ":" element-type ws ","\n ws "\\"semantic_role\\"" ":" string ws ","\n ws "\\"intent\\"" ":" string ws ","\n ws "\\"tag\\"" ":" string ws ","\n ws "\\"selector_candidates\\"" ":" "[" ws selector-list ws "]"\n ["," ws "\\"parent_id\\"" ":" string]\n ["," ws "\\"children_ids\\"" ":" "[" ws string-list ws "]"]\n ["," ws "\\"aria_label\\"" ":" string]\n ["," ws "\\"aria_role\\"" ":" string]\n ["," ws "\\"text_content\\"" ":" string]\n "," ws "\\"confidence\\"" ":" number\n ["," ws "\\"reasoning\\"" ":" string]\nws "}"\n\nelement-type ::=\n "\\"button\\"" | "\\"link\\"" | "\\"input\\"" | "\\"form\\"" |\n "\\"navigation\\"" | "\\"search\\"" | "\\"filter\\"" | "\\"sort\\"" |\n "\\"pagination\\"" | "\\"container\\"" | "\\"list\\"" | "\\"item\\"" |\n "\\"heading\\"" | "\\"text\\"" | "\\"image\\"" | "\\"unknown\\""\n\npage-type ::=\n "\\"landing\\"" | "\\"search\\"" | "\\"listing\\"" | "\\"detail\\"" |\n "\\"form\\"" | "\\"checkout\\"" | "\\"dashboard\\"" | "\\"unknown\\""\n\nedge-list ::= [edge ("," ws edge)*]\nedge ::= "{" ws\n "\\"from_node\\"" ":" string ws ","\n ws "\\"to_node\\"" ":" string ws ","\n ws "\\"action\\"" ":" string\n ["," ws "\\"condition\\"" ":" string]\n ["," ws "\\"probability\\"" ":" number]\nws "}"\n\nselector-list ::= [selector ("," ws selector)*]\nselector ::= "{" ws\n "\\"type\\"" ":" selector-type ws ","\n ws "\\"value\\"" ":" string ws ","\n ws "\\"priority\\"" ":" number\n ["," ws "\\"confidence\\"" ":" number]\nws "}"\n\nselector-type ::= "\\"css\\"" | "\\"semantic\\"" | "\\"text\\"" | "\\"attribute\\""\n\nintents ::= "[" ws [intent ("," ws intent)*] ws "]"\nintent ::= "{" ws\n "\\"intent\\"" ":" string ws ","\n ws "\\"action\\"" ":" string ws ","\n ws "\\"parameters\\"" ":" "[" ws [string ("," ws string)*] ws "]" ws ","\n ws "\\"context\\"" ":" string\nws "}"\n\nfields ::= "[" ws [field ("," ws field)*] ws "]"\nfield ::= "{" ws\n "\\"name\\"" ":" string ws ","\n ws "\\"selector\\"" ":" string\n ["," ws "\\"transform\\"" ":" string]\n ["," ws "\\"required\\"" ":" boolean]\nws "}"\n\nselectors ::= "{" ws [selector-pair ("," ws selector-pair)*] ws "}"\nselector-pair ::= string ":" "[" ws selector-list ws "]"\n\nstring-list ::= [string ("," ws string)*]\n\nstring ::= "\\"" char* "\\""\nchar ::= [^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F]{4})\n\nnumber ::= ["-"]? ("0" | [1-9] [0-9]*) ([.] [0-9]+)? ([eE] ["-"]? [0-9]+)?\n\nboolean ::= "true" | "false"\n\nws ::= [ \\t\\n\\r]*\n';
452
-
453
- // src/compiler/local-axir.ts
720
+ import { URL } from "url";
454
721
  var nativeAvailable = false;
455
722
  var getLlama;
456
- try {
457
- const llama = __require("node-llama-cpp");
458
- getLlama = llama.getLlama;
723
+ var LlamaModel;
724
+ var LlamaContext;
725
+ var LlamaGrammar;
726
+ var llamaPromise = import("node-llama-cpp").then((m) => {
459
727
  nativeAvailable = true;
460
- } catch {
728
+ getLlama = m.getLlama;
729
+ LlamaModel = m.LlamaModel;
730
+ LlamaContext = m.LlamaContext;
731
+ LlamaGrammar = m.LlamaGrammar;
732
+ return m;
733
+ }).catch((err) => {
461
734
  nativeAvailable = false;
462
- }
463
- var DEFAULT_MODEL_URL = "https://huggingface.co/bartowski/Phi-3-mini-128k-instruct-GGUF/resolve/main/Phi-3-mini-128k-instruct-Q4_K_M.gguf";
735
+ if (process.env.AWI_DEBUG) {
736
+ console.error("[AWI] node-llama-cpp load error:", err.message);
737
+ }
738
+ return null;
739
+ });
740
+ var DEFAULT_MODEL_URL = "https://github.com/RayAKaan/AWI/releases/download/v0.0.0-models/Phi-3-mini-128k-instruct-Q4_K_M.gguf";
464
741
  var DEFAULT_MODEL_FILENAME = "phi3-128k-q4.gguf";
465
742
  var LocalAXIRCompiler = class {
466
743
  modelPath;
@@ -487,9 +764,6 @@ var LocalAXIRCompiler = class {
487
764
  this.onDownloadProgress = options.onDownloadProgress;
488
765
  this.onStatus = options.onStatus;
489
766
  }
490
- // -------------------------------------------------------------------------
491
- // Public API
492
- // -------------------------------------------------------------------------
493
767
  async compile(domHTML, a11yTree, intent, params) {
494
768
  await this._ensureModel();
495
769
  await this._ensureGrammar();
@@ -536,22 +810,26 @@ var LocalAXIRCompiler = class {
536
810
  this.ready = false;
537
811
  }
538
812
  }
539
- // -------------------------------------------------------------------------
540
- // Model lifecycle
541
- // -------------------------------------------------------------------------
542
813
  async _ensureModel() {
543
814
  if (this.ready) return;
815
+ await llamaPromise;
816
+ if (!getLlama) {
817
+ throw new Error("node-llama-cpp failed to load. Is it installed?");
818
+ }
544
819
  if (!fs.existsSync(this.modelPath)) {
545
820
  await this._downloadModel();
546
821
  }
547
822
  this._status("Loading local model...");
548
823
  const llama = await getLlama();
549
824
  const gpuLayers = this.gpuLayers ?? this._autoDetectGPULayers();
550
- this.model = new llama.LlamaModel({
825
+ this.model = new LlamaModel({
826
+ llama,
551
827
  modelPath: this.modelPath,
552
828
  gpuLayers
553
829
  });
554
- this.context = await this.model.createContext({
830
+ this.context = new LlamaContext({
831
+ llama,
832
+ model: this.model,
555
833
  contextSize: this.contextSize
556
834
  });
557
835
  this.ready = true;
@@ -559,9 +837,10 @@ var LocalAXIRCompiler = class {
559
837
  }
560
838
  async _ensureGrammar() {
561
839
  if (this.grammar) return;
562
- const llama = await getLlama();
563
- this.grammar = new llama.LlamaGrammar({
564
- grammar: axir_schema_default
840
+ const grammarPath = path.join(__dirname, "grammar", "axir-schema.gbnf");
841
+ this.grammar = new LlamaGrammar({
842
+ llama: await getLlama(),
843
+ grammar: fs.readFileSync(grammarPath, "utf-8")
565
844
  });
566
845
  }
567
846
  _autoDetectGPULayers() {
@@ -570,17 +849,14 @@ var LocalAXIRCompiler = class {
570
849
  }
571
850
  return 0;
572
851
  }
573
- // -------------------------------------------------------------------------
574
- // Resumable model download
575
- // -------------------------------------------------------------------------
576
852
  async _downloadModel() {
577
853
  const dir = path.dirname(this.modelPath);
578
854
  if (!fs.existsSync(dir)) {
579
855
  fs.mkdirSync(dir, { recursive: true });
580
856
  }
581
857
  const tempPath = `${this.modelPath}.tmp`;
582
- const urlObj = new URL(this.modelUrl);
583
- const protocol = urlObj.protocol === "https:" ? https : http;
858
+ const url = new URL(this.modelUrl);
859
+ const protocol = url.protocol === "https:" ? https : http;
584
860
  let startByte = 0;
585
861
  if (fs.existsSync(tempPath)) {
586
862
  startByte = fs.statSync(tempPath).size;
@@ -596,10 +872,10 @@ var LocalAXIRCompiler = class {
596
872
  headers["Range"] = `bytes=${startByte}-`;
597
873
  }
598
874
  const request = protocol.get(
599
- urlObj,
875
+ url,
600
876
  { headers },
601
877
  (response) => {
602
- if (response.statusCode === 301 || response.statusCode === 302) {
878
+ if (response.statusCode === 302 || response.statusCode === 301) {
603
879
  if (response.headers.location) {
604
880
  this.modelUrl = response.headers.location;
605
881
  return this._downloadModel().then(resolve).catch(reject);
@@ -607,7 +883,7 @@ var LocalAXIRCompiler = class {
607
883
  }
608
884
  if (response.statusCode !== 200 && response.statusCode !== 206) {
609
885
  return reject(
610
- new Error(`Model download failed: HTTP ${response.statusCode}`)
886
+ new Error(`Download failed: HTTP ${response.statusCode}`)
611
887
  );
612
888
  }
613
889
  const total = parseInt(
@@ -641,76 +917,69 @@ var LocalAXIRCompiler = class {
641
917
  });
642
918
  });
643
919
  }
644
- // -------------------------------------------------------------------------
645
- // Inference
646
- // -------------------------------------------------------------------------
647
920
  async _complete(prompt, maxTokens, temperature) {
648
921
  if (!this.context) throw new Error("Model not loaded");
649
922
  const sequence = this.context.getSequence();
650
923
  await sequence.evaluate(prompt);
651
- const response = await sequence.generateResponse(maxTokens, {
924
+ const result = await sequence.generateResponse(maxTokens, {
652
925
  temperature,
653
926
  grammar: this.grammar
654
927
  });
655
928
  let text = "";
656
- for await (const token of response) {
929
+ for await (const token of result) {
657
930
  text += token;
658
931
  }
659
932
  return text;
660
933
  }
661
- // -------------------------------------------------------------------------
662
- // Prompt builders
663
- // -------------------------------------------------------------------------
664
934
  _buildCompilePrompt(domHTML, a11yTree, intent, params) {
665
935
  const paramsJson = params ? JSON.stringify(params, null, 2) : "{}";
666
- return [
667
- `<|system|>`,
668
- `You are an expert web-scraping analyst. Your job is to read a simplified DOM and accessibility tree, then output a structured JSON object describing the page layout, interactive elements, and data extraction plan.`,
669
- ``,
670
- `Output MUST be valid JSON matching this schema:`,
671
- `- workflow.nodes: map of node_id -> {element_type, semantic_role, intent, tag, selector_candidates[], parent_id?, children_ids?, aria_label?, aria_role?, text_content?, confidence, reasoning?}`,
672
- `- workflow.edges: list of {from_node, to_node, action, condition?, probability}`,
673
- `- workflow.entry_points: list of starting node_ids`,
674
- `- workflow.exit_points: list of terminal node_ids`,
675
- `- workflow.domain: the domain name`,
676
- `- workflow.page_type: one of landing|search|listing|detail|form|checkout|dashboard|unknown`,
677
- `- intents: list of {intent, action, parameters[], context}`,
678
- `- selectors: map of selector_name -> list of {type, value, priority}`,
679
- `- fields: list of {name, selector, transform?, required}`,
680
- `- container?: string (optional container selector name)`,
681
- ``,
682
- `Element types: button, link, input, form, navigation, search, filter, sort, pagination, container, list, item, heading, text, image, unknown.`,
683
- `Selector types: css, semantic, text, attribute.`,
684
- `<|user|>`,
685
- `Intent: ${intent}`,
686
- `Parameters: ${paramsJson}`,
687
- ``,
688
- `Simplified DOM:`,
689
- `${this._truncate(domHTML, 4e4)}`,
690
- ``,
691
- `Accessibility Tree:`,
692
- `${this._truncate(a11yTree, 8e3)}`,
693
- ``,
694
- `Compile AXIR:`,
695
- `<|assistant|>`
696
- ].join("\n");
936
+ const a11y = a11yTree || "No accessibility tree available.";
937
+ return `|<|system|>
938
+ You are an expert web-scraping analyst. Your job is to read a simplified DOM and accessibility tree, then output a structured JSON object describing the page layout, interactive elements, and data extraction plan.
939
+
940
+ Output MUST be valid JSON matching this schema:
941
+ - workflow.nodes: map of node_id -> {element_type, semantic_role, intent, tag, selector_candidates[], parent_id?, children_ids?, aria_label?, aria_role?, text_content?, confidence, reasoning?}
942
+ - workflow.edges: list of {from_node, to_node, action, condition?, probability}
943
+ - workflow.entry_points: list of starting node_ids
944
+ - workflow.exit_points: list of terminal node_ids
945
+ - workflow.domain: the domain name
946
+ - workflow.page_type: one of landing|search|listing|detail|form|checkout|dashboard|unknown
947
+ - intents: list of {intent, action, parameters[], context}
948
+ - selectors: map of selector_name -> list of {type, value, priority}
949
+ - fields: list of {name, selector, transform?, required}
950
+ - container?: string (optional container selector name)
951
+
952
+ Element types: button, link, input, form, navigation, search, filter, sort, pagination, container, list, item, heading, text, image, unknown.
953
+ Selector types: css, semantic, text, attribute.
954
+ |<|user|>
955
+ Intent: ${intent}
956
+ Parameters: ${paramsJson}
957
+
958
+ Simplified DOM:
959
+ ${this._truncate(domHTML, 4e4)}
960
+
961
+ Accessibility Tree:
962
+ ${this._truncate(a11y, 8e3)}
963
+
964
+ Compile AXIR:
965
+ |<|assistant|>
966
+ `;
697
967
  }
698
968
  _buildHealPrompt(domHTML, brokenSelector, semanticIntent) {
699
- return [
700
- `<|system|>`,
701
- `You are a CSS selector repair tool. Given a broken selector and the current DOM, output the new CSS selector that targets the same semantic element.`,
702
- ``,
703
- `Output JSON: {"selector": "...", "confidence": 0.0-1.0, "reasoning": "..."}`,
704
- `<|user|>`,
705
- `Broken selector: ${brokenSelector}`,
706
- `Semantic intent: ${semanticIntent}`,
707
- ``,
708
- `Current DOM (truncated):`,
709
- `${this._truncate(domHTML, 2e4)}`,
710
- ``,
711
- `New selector:`,
712
- `<|assistant|>`
713
- ].join("\n");
969
+ return `|<|system|>
970
+ You are a CSS selector repair tool. Given a broken selector and the current DOM, output the new CSS selector that targets the same semantic element.
971
+
972
+ Output JSON: {"selector": "...", "confidence": 0.0-1.0, "reasoning": "..."}
973
+ |<|user|>
974
+ Broken selector: ${brokenSelector}
975
+ Semantic intent: ${semanticIntent}
976
+
977
+ Current DOM (truncated):
978
+ ${this._truncate(domHTML, 2e4)}
979
+
980
+ New selector:
981
+ |<|assistant|>
982
+ `;
714
983
  }
715
984
  _truncate(text, maxChars) {
716
985
  if (text.length <= maxChars) return text;
@@ -725,9 +994,8 @@ var LocalAXIRCompiler = class {
725
994
  };
726
995
  export {
727
996
  AWIClient,
728
- AWIError,
997
+ AXIRCompiler,
729
998
  AdvisoryExecutor,
730
- LocalAXIRCompiler,
731
- client_default as default
999
+ LocalAXIRCompiler
732
1000
  };
733
1001
  //# sourceMappingURL=index.mjs.map