@awi-protocol/sdk 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -31,10 +31,9 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
33
  AWIClient: () => AWIClient,
34
- AWIError: () => AWIError,
34
+ AXIRCompiler: () => AXIRCompiler,
35
35
  AdvisoryExecutor: () => AdvisoryExecutor,
36
- LocalAXIRCompiler: () => LocalAXIRCompiler,
37
- default: () => client_default
36
+ LocalAXIRCompiler: () => LocalAXIRCompiler
38
37
  });
39
38
  module.exports = __toCommonJS(index_exports);
40
39
 
@@ -242,7 +241,6 @@ var AWIClient = class {
242
241
  throw lastError || new AWIError("MAX_RETRIES", "Max retries exceeded", 502);
243
242
  }
244
243
  };
245
- var client_default = AWIClient;
246
244
 
247
245
  // src/advisory-executor.ts
248
246
  var AdvisoryExecutor = class {
@@ -473,27 +471,312 @@ var AdvisoryExecutor = class {
473
471
  }
474
472
  };
475
473
 
474
+ // src/compiler/axir-compiler.ts
475
+ var cheerio = __toESM(require("cheerio"));
476
+ var AXIRCompiler = class {
477
+ $;
478
+ intent;
479
+ params;
480
+ domain;
481
+ constructor(html, options) {
482
+ this.$ = cheerio.load(html);
483
+ this.intent = options.intent;
484
+ this.params = options.params || {};
485
+ this.domain = options.domain || "unknown";
486
+ }
487
+ compile() {
488
+ const start = Date.now();
489
+ this.simplifyDOM();
490
+ const regions = this.identifyRegions();
491
+ const target = this.routeIntent(regions);
492
+ return {
493
+ workflow: this.buildWorkflow(target, regions),
494
+ intents: this.mapIntents(),
495
+ selectors: this.generateSelectors(target),
496
+ fields: this.generateFields(target),
497
+ container: target.container,
498
+ model_used: "axir-deterministic-v1",
499
+ tokens_used: 0,
500
+ compilation_time_ms: Date.now() - start
501
+ };
502
+ }
503
+ simplifyDOM() {
504
+ this.$("script, style, svg, noscript, iframe, canvas, video, audio").remove();
505
+ for (const el of this.$("div, span").toArray()) {
506
+ const $el = this.$(el);
507
+ if ($el.children().length === 0 && $el.text().trim() === "") $el.remove();
508
+ }
509
+ this.$('[style*="display:none"], [style*="display: none"], [hidden], [aria-hidden="true"]').remove();
510
+ }
511
+ identifyRegions() {
512
+ const regions = [];
513
+ for (const el of this.$('form, [role="search"], input[type="search"]').toArray()) {
514
+ const r = this.analyzeSearchRegion(this.$(el));
515
+ if (r) regions.push(r);
516
+ }
517
+ for (const el of this.$('nav, [role="navigation"], header, .nav, .navbar, .menu').toArray()) {
518
+ regions.push({ type: "navigation", element: this.$(el), confidence: 0.9 });
519
+ }
520
+ for (const el of this.$('ul, ol, [role="list"], .list, .results, .items, table, [role="grid"]').toArray()) {
521
+ const $el = this.$(el);
522
+ if ($el.find('li, tr, .item, [role="listitem"]').length > 1) {
523
+ regions.push({ type: "listing", element: $el, confidence: 0.85 });
524
+ }
525
+ }
526
+ for (const el of this.$("form").toArray()) {
527
+ const $el = this.$(el);
528
+ if (!regions.some((r) => r.element.is($el))) regions.push({ type: "form", element: $el, confidence: 0.9 });
529
+ }
530
+ for (const el of this.$(".pagination, .pager, .pages").toArray()) {
531
+ if (this.isPagination(this.$(el))) regions.push({ type: "pagination", element: this.$(el), confidence: 0.8 });
532
+ }
533
+ for (const el of this.$('article, [role="article"], .content, .main, main, .detail').toArray()) {
534
+ regions.push({ type: "detail", element: this.$(el), confidence: 0.75 });
535
+ }
536
+ return regions;
537
+ }
538
+ analyzeSearchRegion($el) {
539
+ const hasInput = $el.find('input[type="text"], input[type="search"], input:not([type])').length > 0;
540
+ const hasButton = $el.find('button, input[type="submit"]').length > 0;
541
+ if (hasInput || hasButton) return { type: "search", element: $el, confidence: hasInput && hasButton ? 0.95 : 0.7 };
542
+ return null;
543
+ }
544
+ isPagination($el) {
545
+ const text = $el.text().toLowerCase();
546
+ return /\d+/.test(text) && (/next|>|\u203a|\u2192|\u00bb/.test(text) || /prev|previous|<|\u2039|\u2190|\u00ab/.test(text));
547
+ }
548
+ routeIntent(regions) {
549
+ const intentMap = {
550
+ search: ["search", "form"],
551
+ search_jobs: ["search", "listing", "form"],
552
+ extract_list: ["listing", "search", "detail"],
553
+ extract_detail: ["detail", "listing"],
554
+ fill_form: ["form", "search"],
555
+ navigate: ["navigation", "listing"],
556
+ login: ["form"],
557
+ filter: ["search", "listing"],
558
+ sort: ["listing", "search"],
559
+ scrape: ["listing", "detail", "search"]
560
+ };
561
+ const targetTypes = intentMap[this.intent.toLowerCase()] || ["search", "listing", "form"];
562
+ let best = null;
563
+ let bestScore = 0;
564
+ for (const r of regions) {
565
+ const match = targetTypes.indexOf(r.type);
566
+ const score = match >= 0 ? (targetTypes.length - match) * r.confidence : 0;
567
+ if (score > bestScore) {
568
+ bestScore = score;
569
+ best = r;
570
+ }
571
+ }
572
+ if (!best) best = this.findLargestRegion(regions);
573
+ return { region: best, container: this.generateContainerSelector(best.element) };
574
+ }
575
+ findLargestRegion(regions) {
576
+ if (regions.length === 0) return { type: "unknown", element: this.$("body"), confidence: 0.5 };
577
+ return regions.reduce((l, c) => c.element.find("*").length > l.element.find("*").length ? c : l);
578
+ }
579
+ buildWorkflow(_target, all) {
580
+ const nodes = {};
581
+ const edges = [];
582
+ const entry = [];
583
+ const exit = [];
584
+ all.forEach((r, i) => {
585
+ const id = `${r.type}_${i}`;
586
+ const raw = r.element[0];
587
+ nodes[id] = {
588
+ node_id: id,
589
+ element_type: this.mapType(r.type),
590
+ semantic_role: r.type,
591
+ intent: this.intent,
592
+ tag: raw?.tagName?.toLowerCase(),
593
+ selector_candidates: this.buildCandidates(r.element),
594
+ confidence: r.confidence
595
+ };
596
+ if (r.type === "navigation") entry.push(id);
597
+ if (r.type === "listing" || r.type === "detail") exit.push(id);
598
+ });
599
+ all.forEach((f, fi) => all.forEach((t, ti) => {
600
+ if (fi !== ti) {
601
+ const e = this.inferEdge(f, t, fi, ti);
602
+ if (e) edges.push(e);
603
+ }
604
+ }));
605
+ if (entry.length === 0 && Object.keys(nodes).length > 0) entry.push(Object.keys(nodes)[0]);
606
+ return { nodes, edges, entry_points: entry, exit_points: exit, domain: this.domain, page_type: this.inferPageType(all) };
607
+ }
608
+ mapType(t) {
609
+ const m = {
610
+ search: "search",
611
+ navigation: "navigation",
612
+ listing: "list",
613
+ form: "form",
614
+ pagination: "pagination",
615
+ detail: "container"
616
+ };
617
+ return m[t] || "unknown";
618
+ }
619
+ inferEdge(f, t, fi, ti) {
620
+ if (f.type === "search" && t.type === "listing") return { from_node: `search_${fi}`, to_node: `listing_${ti}`, action: "submit_search", probability: 0.9 };
621
+ if (f.type === "navigation" && t.type === "search") return { from_node: `navigation_${fi}`, to_node: `search_${ti}`, action: "navigate_to_search", probability: 0.7 };
622
+ if (f.type === "listing" && t.type === "pagination") return { from_node: `listing_${fi}`, to_node: `pagination_${ti}`, action: "next_page", probability: 0.8 };
623
+ if (f.type === "pagination" && t.type === "listing") return { from_node: `pagination_${fi}`, to_node: `listing_${ti}`, action: "load_results", probability: 0.95 };
624
+ return null;
625
+ }
626
+ inferPageType(regions) {
627
+ const t = regions.map((r) => r.type);
628
+ if (t.includes("search") && t.includes("listing")) return "search";
629
+ if (t.includes("listing")) return "listing";
630
+ if (t.includes("form")) return "form";
631
+ if (t.includes("search")) return "search";
632
+ if (t.includes("navigation")) return "landing";
633
+ return "unknown";
634
+ }
635
+ generateSelectors(target) {
636
+ const s = {};
637
+ const $el = target.region.element;
638
+ s.container = this.buildCandidates($el);
639
+ for (const el of $el.find("input, textarea, select").toArray()) {
640
+ const n = this.inferFieldName(this.$(el));
641
+ if (n) s[n] = this.buildCandidates(this.$(el));
642
+ }
643
+ for (const el of $el.find('button, input[type="submit"], input[type="button"]').toArray()) {
644
+ const $btn = this.$(el);
645
+ const label = $btn.text().trim() || String($btn.val() || "button");
646
+ s[`btn_${this.slugify(label)}`] = this.buildCandidates($btn);
647
+ }
648
+ for (const el of $el.find("a").toArray()) {
649
+ const $a = this.$(el);
650
+ const t = $a.text().trim();
651
+ if (t && t.length < 50) s[`link_${this.slugify(t)}`] = this.buildCandidates($a);
652
+ }
653
+ return s;
654
+ }
655
+ buildCandidates($el) {
656
+ const c = [];
657
+ const el = $el[0];
658
+ if (!el) return c;
659
+ const id = $el.attr("id");
660
+ if (id && !id.match(/^\d/)) c.push({ type: "css", value: `#${this.escape(id)}`, priority: 1, confidence: 0.99 });
661
+ const classes = ($el.attr("class") || "").split(/\s+/).filter((x) => x && !x.match(/^js-|^ng-|^vue-|^data-/));
662
+ if (classes.length) c.push({ type: "css", value: `.${classes.map((x) => this.escape(x)).join(".")}`, priority: 2, confidence: 0.85 });
663
+ const raw = el;
664
+ const tag = raw.tagName?.toLowerCase() || "";
665
+ const name = $el.attr("name");
666
+ const type = $el.attr("type");
667
+ const placeholder = $el.attr("placeholder");
668
+ if (name) c.push({ type: "css", value: `${tag}[name="${this.q(name)}"]`, priority: 3, confidence: 0.9 });
669
+ if (type) c.push({ type: "css", value: `${tag}[type="${type}"]`, priority: 4, confidence: 0.8 });
670
+ if (placeholder) c.push({ type: "css", value: `${tag}[placeholder="${this.q(placeholder)}"]`, priority: 5, confidence: 0.75 });
671
+ const role = $el.attr("role");
672
+ if (role) c.push({ type: "semantic", value: `[role="${role}"]`, priority: 6, confidence: 0.9 });
673
+ const al = $el.attr("aria-label");
674
+ if (al) c.push({ type: "semantic", value: `[aria-label="${this.q(al)}"]`, priority: 7, confidence: 0.85 });
675
+ const text = $el.text().trim();
676
+ if (text && text.length < 100) c.push({ type: "text", value: text, priority: 8, confidence: 0.7 });
677
+ return c;
678
+ }
679
+ generateFields(target) {
680
+ const f = [];
681
+ for (const el of target.region.element.find("input, textarea, select").toArray()) {
682
+ const $el = this.$(el);
683
+ const name = this.inferFieldName($el);
684
+ if (!name) continue;
685
+ f.push({
686
+ name,
687
+ selector: this.bestSelector($el),
688
+ transform: this.inferTransform($el),
689
+ required: $el.attr("required") !== void 0
690
+ });
691
+ }
692
+ return f;
693
+ }
694
+ inferFieldName($el) {
695
+ const id = $el.attr("id");
696
+ if (id) {
697
+ const $l = this.$(`label[for="${id}"]`);
698
+ if ($l.length) return this.slugify($l.text());
699
+ }
700
+ const ph = $el.attr("placeholder");
701
+ if (ph) return this.slugify(ph);
702
+ const al = $el.attr("aria-label");
703
+ if (al) return this.slugify(al);
704
+ const n = $el.attr("name");
705
+ if (n) return this.slugify(n);
706
+ return null;
707
+ }
708
+ inferTransform($el) {
709
+ const t = $el.attr("type");
710
+ if (t === "number") return "number";
711
+ if (t === "email") return "email";
712
+ if (t === "date") return "date";
713
+ if (t === "checkbox") return "boolean";
714
+ if ($el.is("select")) return "select";
715
+ return void 0;
716
+ }
717
+ bestSelector($el) {
718
+ const c = this.buildCandidates($el);
719
+ if (c.length) return c[0].value;
720
+ const raw = $el[0];
721
+ return raw?.tagName?.toLowerCase() || "*";
722
+ }
723
+ generateContainerSelector($el) {
724
+ const c = this.buildCandidates($el);
725
+ return c.length ? c[0].value : "body";
726
+ }
727
+ mapIntents() {
728
+ const m = {
729
+ search: { intent: "search", action: "fill_and_submit", parameters: ["query", "location", "filters"], context: "Enter search terms and submit form" },
730
+ search_jobs: { intent: "search_jobs", action: "fill_and_submit", parameters: ["query", "location", "experience_level", "job_type"], context: "Search for job listings with optional filters" },
731
+ extract_list: { intent: "extract_list", action: "extract_fields", parameters: ["items", "title", "url", "metadata"], context: "Extract structured data from list items" },
732
+ extract_detail: { intent: "extract_detail", action: "extract_fields", parameters: ["title", "description", "metadata", "links"], context: "Extract structured data from detail page" },
733
+ fill_form: { intent: "fill_form", action: "fill_and_submit", parameters: Object.keys(this.params), context: "Fill form fields with provided parameters" },
734
+ navigate: { intent: "navigate", action: "click", parameters: ["target_url", "link_text"], context: "Click navigation link to target page" },
735
+ login: { intent: "login", action: "fill_and_submit", parameters: ["username", "password"], context: "Enter credentials and submit login form" },
736
+ scrape: { intent: "scrape", action: "extract_fields", parameters: ["all_visible_text", "links", "images", "structured_data"], context: "Extract all visible content from the page" }
737
+ };
738
+ const mapped = m[this.intent.toLowerCase()];
739
+ if (mapped) return [mapped];
740
+ return [{ intent: this.intent, action: "interact", parameters: Object.keys(this.params), context: `Perform ${this.intent} on the page` }];
741
+ }
742
+ slugify(t) {
743
+ return t.toLowerCase().replace(/[^\w\s-]/g, "").replace(/[\s_-]+/g, "_").replace(/^_|_$/g, "").substring(0, 50);
744
+ }
745
+ escape(s) {
746
+ return s.replace(/([:.])/g, "\\$1");
747
+ }
748
+ q(s) {
749
+ return s.replace(/"/g, '\\"');
750
+ }
751
+ };
752
+
476
753
  // src/compiler/local-axir.ts
477
754
  var fs = __toESM(require("fs"));
478
755
  var path = __toESM(require("path"));
479
756
  var os = __toESM(require("os"));
480
757
  var https = __toESM(require("https"));
481
758
  var http = __toESM(require("http"));
482
-
483
- // src/compiler/grammar/axir-schema.gbnf
484
- var axir_schema_default = 'root ::= "{" ws axir-fields ws "}"\n\naxir-fields ::=\n "\\"workflow\\"" ":" workflow ws ","\n ws "\\"intents\\"" ":" intents ws ","\n ws "\\"selectors\\"" ":" selectors ws ","\n ws "\\"fields\\"" ":" fields\n ["," ws "\\"container\\"" ":" string]\n ["," ws "\\"model_used\\"" ":" string]\n ["," ws "\\"tokens_used\\"" ":" number]\n ["," ws "\\"compilation_time_ms\\"" ":" number]\n\nworkflow ::=\n "{" ws\n "\\"nodes\\"" ":" "{" ws node-list ws "}" ws ","\n ws "\\"edges\\"" ":" "[" ws edge-list ws "]" ws ","\n ws "\\"entry_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"exit_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"domain\\"" ":" string ws ","\n ws "\\"page_type\\"" ":" page-type\n ["," ws "\\"structure_hash\\"" ":" string]\n ws "}"\n\nnode-list ::= [node-pair ("," ws node-pair)*]\nnode-pair ::= string ":" "{" ws\n "\\"element_type\\"" ":" element-type ws ","\n ws "\\"semantic_role\\"" ":" string ws ","\n ws "\\"intent\\"" ":" string ws ","\n ws "\\"tag\\"" ":" string ws ","\n ws "\\"selector_candidates\\"" ":" "[" ws selector-list ws "]"\n ["," ws "\\"parent_id\\"" ":" string]\n ["," ws "\\"children_ids\\"" ":" "[" ws string-list ws "]"]\n ["," ws "\\"aria_label\\"" ":" string]\n ["," ws "\\"aria_role\\"" ":" string]\n ["," ws "\\"text_content\\"" ":" string]\n "," ws "\\"confidence\\"" ":" number\n ["," ws "\\"reasoning\\"" ":" string]\nws "}"\n\nelement-type ::=\n "\\"button\\"" | "\\"link\\"" | "\\"input\\"" | "\\"form\\"" |\n "\\"navigation\\"" | "\\"search\\"" | "\\"filter\\"" | "\\"sort\\"" |\n "\\"pagination\\"" | "\\"container\\"" | "\\"list\\"" | "\\"item\\"" |\n "\\"heading\\"" | "\\"text\\"" | "\\"image\\"" | "\\"unknown\\""\n\npage-type ::=\n "\\"landing\\"" | "\\"search\\"" | "\\"listing\\"" | "\\"detail\\"" |\n "\\"form\\"" | "\\"checkout\\"" | "\\"dashboard\\"" | "\\"unknown\\""\n\nedge-list ::= [edge ("," ws edge)*]\nedge ::= "{" ws\n "\\"from_node\\"" ":" string ws ","\n ws "\\"to_node\\"" ":" string ws ","\n ws "\\"action\\"" ":" string\n ["," ws "\\"condition\\"" ":" string]\n ["," ws "\\"probability\\"" ":" number]\nws "}"\n\nselector-list ::= [selector ("," ws selector)*]\nselector ::= "{" ws\n "\\"type\\"" ":" selector-type ws ","\n ws "\\"value\\"" ":" string ws ","\n ws "\\"priority\\"" ":" number\n ["," ws "\\"confidence\\"" ":" number]\nws "}"\n\nselector-type ::= "\\"css\\"" | "\\"semantic\\"" | "\\"text\\"" | "\\"attribute\\""\n\nintents ::= "[" ws [intent ("," ws intent)*] ws "]"\nintent ::= "{" ws\n "\\"intent\\"" ":" string ws ","\n ws "\\"action\\"" ":" string ws ","\n ws "\\"parameters\\"" ":" "[" ws [string ("," ws string)*] ws "]" ws ","\n ws "\\"context\\"" ":" string\nws "}"\n\nfields ::= "[" ws [field ("," ws field)*] ws "]"\nfield ::= "{" ws\n "\\"name\\"" ":" string ws ","\n ws "\\"selector\\"" ":" string\n ["," ws "\\"transform\\"" ":" string]\n ["," ws "\\"required\\"" ":" boolean]\nws "}"\n\nselectors ::= "{" ws [selector-pair ("," ws selector-pair)*] ws "}"\nselector-pair ::= string ":" "[" ws selector-list ws "]"\n\nstring-list ::= [string ("," ws string)*]\n\nstring ::= "\\"" char* "\\""\nchar ::= [^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F]{4})\n\nnumber ::= ["-"]? ("0" | [1-9] [0-9]*) ([.] [0-9]+)? ([eE] ["-"]? [0-9]+)?\n\nboolean ::= "true" | "false"\n\nws ::= [ \\t\\n\\r]*\n';
485
-
486
- // src/compiler/local-axir.ts
759
+ var import_url = require("url");
487
760
  var nativeAvailable = false;
488
761
  var getLlama;
489
- try {
490
- const llama = require("node-llama-cpp");
491
- getLlama = llama.getLlama;
762
+ var LlamaModel;
763
+ var LlamaContext;
764
+ var LlamaGrammar;
765
+ var llamaPromise = import("node-llama-cpp").then((m) => {
492
766
  nativeAvailable = true;
493
- } catch {
767
+ getLlama = m.getLlama;
768
+ LlamaModel = m.LlamaModel;
769
+ LlamaContext = m.LlamaContext;
770
+ LlamaGrammar = m.LlamaGrammar;
771
+ return m;
772
+ }).catch((err) => {
494
773
  nativeAvailable = false;
495
- }
496
- var DEFAULT_MODEL_URL = "https://huggingface.co/bartowski/Phi-3-mini-128k-instruct-GGUF/resolve/main/Phi-3-mini-128k-instruct-Q4_K_M.gguf";
774
+ if (process.env.AWI_DEBUG) {
775
+ console.error("[AWI] node-llama-cpp load error:", err.message);
776
+ }
777
+ return null;
778
+ });
779
+ var DEFAULT_MODEL_URL = "https://github.com/RayAKaan/AWI/releases/download/v0.0.0-models/Phi-3-mini-128k-instruct-Q4_K_M.gguf";
497
780
  var DEFAULT_MODEL_FILENAME = "phi3-128k-q4.gguf";
498
781
  var LocalAXIRCompiler = class {
499
782
  modelPath;
@@ -520,9 +803,6 @@ var LocalAXIRCompiler = class {
520
803
  this.onDownloadProgress = options.onDownloadProgress;
521
804
  this.onStatus = options.onStatus;
522
805
  }
523
- // -------------------------------------------------------------------------
524
- // Public API
525
- // -------------------------------------------------------------------------
526
806
  async compile(domHTML, a11yTree, intent, params) {
527
807
  await this._ensureModel();
528
808
  await this._ensureGrammar();
@@ -569,22 +849,26 @@ var LocalAXIRCompiler = class {
569
849
  this.ready = false;
570
850
  }
571
851
  }
572
- // -------------------------------------------------------------------------
573
- // Model lifecycle
574
- // -------------------------------------------------------------------------
575
852
  async _ensureModel() {
576
853
  if (this.ready) return;
854
+ await llamaPromise;
855
+ if (!getLlama) {
856
+ throw new Error("node-llama-cpp failed to load. Is it installed?");
857
+ }
577
858
  if (!fs.existsSync(this.modelPath)) {
578
859
  await this._downloadModel();
579
860
  }
580
861
  this._status("Loading local model...");
581
862
  const llama = await getLlama();
582
863
  const gpuLayers = this.gpuLayers ?? this._autoDetectGPULayers();
583
- this.model = new llama.LlamaModel({
864
+ this.model = new LlamaModel({
865
+ llama,
584
866
  modelPath: this.modelPath,
585
867
  gpuLayers
586
868
  });
587
- this.context = await this.model.createContext({
869
+ this.context = new LlamaContext({
870
+ llama,
871
+ model: this.model,
588
872
  contextSize: this.contextSize
589
873
  });
590
874
  this.ready = true;
@@ -592,9 +876,10 @@ var LocalAXIRCompiler = class {
592
876
  }
593
877
  async _ensureGrammar() {
594
878
  if (this.grammar) return;
595
- const llama = await getLlama();
596
- this.grammar = new llama.LlamaGrammar({
597
- grammar: axir_schema_default
879
+ const grammarPath = path.join(__dirname, "grammar", "axir-schema.gbnf");
880
+ this.grammar = new LlamaGrammar({
881
+ llama: await getLlama(),
882
+ grammar: fs.readFileSync(grammarPath, "utf-8")
598
883
  });
599
884
  }
600
885
  _autoDetectGPULayers() {
@@ -603,17 +888,14 @@ var LocalAXIRCompiler = class {
603
888
  }
604
889
  return 0;
605
890
  }
606
- // -------------------------------------------------------------------------
607
- // Resumable model download
608
- // -------------------------------------------------------------------------
609
891
  async _downloadModel() {
610
892
  const dir = path.dirname(this.modelPath);
611
893
  if (!fs.existsSync(dir)) {
612
894
  fs.mkdirSync(dir, { recursive: true });
613
895
  }
614
896
  const tempPath = `${this.modelPath}.tmp`;
615
- const urlObj = new URL(this.modelUrl);
616
- const protocol = urlObj.protocol === "https:" ? https : http;
897
+ const url = new import_url.URL(this.modelUrl);
898
+ const protocol = url.protocol === "https:" ? https : http;
617
899
  let startByte = 0;
618
900
  if (fs.existsSync(tempPath)) {
619
901
  startByte = fs.statSync(tempPath).size;
@@ -629,10 +911,10 @@ var LocalAXIRCompiler = class {
629
911
  headers["Range"] = `bytes=${startByte}-`;
630
912
  }
631
913
  const request = protocol.get(
632
- urlObj,
914
+ url,
633
915
  { headers },
634
916
  (response) => {
635
- if (response.statusCode === 301 || response.statusCode === 302) {
917
+ if (response.statusCode === 302 || response.statusCode === 301) {
636
918
  if (response.headers.location) {
637
919
  this.modelUrl = response.headers.location;
638
920
  return this._downloadModel().then(resolve).catch(reject);
@@ -640,7 +922,7 @@ var LocalAXIRCompiler = class {
640
922
  }
641
923
  if (response.statusCode !== 200 && response.statusCode !== 206) {
642
924
  return reject(
643
- new Error(`Model download failed: HTTP ${response.statusCode}`)
925
+ new Error(`Download failed: HTTP ${response.statusCode}`)
644
926
  );
645
927
  }
646
928
  const total = parseInt(
@@ -674,76 +956,69 @@ var LocalAXIRCompiler = class {
674
956
  });
675
957
  });
676
958
  }
677
- // -------------------------------------------------------------------------
678
- // Inference
679
- // -------------------------------------------------------------------------
680
959
  async _complete(prompt, maxTokens, temperature) {
681
960
  if (!this.context) throw new Error("Model not loaded");
682
961
  const sequence = this.context.getSequence();
683
962
  await sequence.evaluate(prompt);
684
- const response = await sequence.generateResponse(maxTokens, {
963
+ const result = await sequence.generateResponse(maxTokens, {
685
964
  temperature,
686
965
  grammar: this.grammar
687
966
  });
688
967
  let text = "";
689
- for await (const token of response) {
968
+ for await (const token of result) {
690
969
  text += token;
691
970
  }
692
971
  return text;
693
972
  }
694
- // -------------------------------------------------------------------------
695
- // Prompt builders
696
- // -------------------------------------------------------------------------
697
973
  _buildCompilePrompt(domHTML, a11yTree, intent, params) {
698
974
  const paramsJson = params ? JSON.stringify(params, null, 2) : "{}";
699
- return [
700
- `<|system|>`,
701
- `You are an expert web-scraping analyst. Your job is to read a simplified DOM and accessibility tree, then output a structured JSON object describing the page layout, interactive elements, and data extraction plan.`,
702
- ``,
703
- `Output MUST be valid JSON matching this schema:`,
704
- `- workflow.nodes: map of node_id -> {element_type, semantic_role, intent, tag, selector_candidates[], parent_id?, children_ids?, aria_label?, aria_role?, text_content?, confidence, reasoning?}`,
705
- `- workflow.edges: list of {from_node, to_node, action, condition?, probability}`,
706
- `- workflow.entry_points: list of starting node_ids`,
707
- `- workflow.exit_points: list of terminal node_ids`,
708
- `- workflow.domain: the domain name`,
709
- `- workflow.page_type: one of landing|search|listing|detail|form|checkout|dashboard|unknown`,
710
- `- intents: list of {intent, action, parameters[], context}`,
711
- `- selectors: map of selector_name -> list of {type, value, priority}`,
712
- `- fields: list of {name, selector, transform?, required}`,
713
- `- container?: string (optional container selector name)`,
714
- ``,
715
- `Element types: button, link, input, form, navigation, search, filter, sort, pagination, container, list, item, heading, text, image, unknown.`,
716
- `Selector types: css, semantic, text, attribute.`,
717
- `<|user|>`,
718
- `Intent: ${intent}`,
719
- `Parameters: ${paramsJson}`,
720
- ``,
721
- `Simplified DOM:`,
722
- `${this._truncate(domHTML, 4e4)}`,
723
- ``,
724
- `Accessibility Tree:`,
725
- `${this._truncate(a11yTree, 8e3)}`,
726
- ``,
727
- `Compile AXIR:`,
728
- `<|assistant|>`
729
- ].join("\n");
975
+ const a11y = a11yTree || "No accessibility tree available.";
976
+ return `|<|system|>
977
+ You are an expert web-scraping analyst. Your job is to read a simplified DOM and accessibility tree, then output a structured JSON object describing the page layout, interactive elements, and data extraction plan.
978
+
979
+ Output MUST be valid JSON matching this schema:
980
+ - workflow.nodes: map of node_id -> {element_type, semantic_role, intent, tag, selector_candidates[], parent_id?, children_ids?, aria_label?, aria_role?, text_content?, confidence, reasoning?}
981
+ - workflow.edges: list of {from_node, to_node, action, condition?, probability}
982
+ - workflow.entry_points: list of starting node_ids
983
+ - workflow.exit_points: list of terminal node_ids
984
+ - workflow.domain: the domain name
985
+ - workflow.page_type: one of landing|search|listing|detail|form|checkout|dashboard|unknown
986
+ - intents: list of {intent, action, parameters[], context}
987
+ - selectors: map of selector_name -> list of {type, value, priority}
988
+ - fields: list of {name, selector, transform?, required}
989
+ - container?: string (optional container selector name)
990
+
991
+ Element types: button, link, input, form, navigation, search, filter, sort, pagination, container, list, item, heading, text, image, unknown.
992
+ Selector types: css, semantic, text, attribute.
993
+ |<|user|>
994
+ Intent: ${intent}
995
+ Parameters: ${paramsJson}
996
+
997
+ Simplified DOM:
998
+ ${this._truncate(domHTML, 4e4)}
999
+
1000
+ Accessibility Tree:
1001
+ ${this._truncate(a11y, 8e3)}
1002
+
1003
+ Compile AXIR:
1004
+ |<|assistant|>
1005
+ `;
730
1006
  }
731
1007
  _buildHealPrompt(domHTML, brokenSelector, semanticIntent) {
732
- return [
733
- `<|system|>`,
734
- `You are a CSS selector repair tool. Given a broken selector and the current DOM, output the new CSS selector that targets the same semantic element.`,
735
- ``,
736
- `Output JSON: {"selector": "...", "confidence": 0.0-1.0, "reasoning": "..."}`,
737
- `<|user|>`,
738
- `Broken selector: ${brokenSelector}`,
739
- `Semantic intent: ${semanticIntent}`,
740
- ``,
741
- `Current DOM (truncated):`,
742
- `${this._truncate(domHTML, 2e4)}`,
743
- ``,
744
- `New selector:`,
745
- `<|assistant|>`
746
- ].join("\n");
1008
+ return `|<|system|>
1009
+ You are a CSS selector repair tool. Given a broken selector and the current DOM, output the new CSS selector that targets the same semantic element.
1010
+
1011
+ Output JSON: {"selector": "...", "confidence": 0.0-1.0, "reasoning": "..."}
1012
+ |<|user|>
1013
+ Broken selector: ${brokenSelector}
1014
+ Semantic intent: ${semanticIntent}
1015
+
1016
+ Current DOM (truncated):
1017
+ ${this._truncate(domHTML, 2e4)}
1018
+
1019
+ New selector:
1020
+ |<|assistant|>
1021
+ `;
747
1022
  }
748
1023
  _truncate(text, maxChars) {
749
1024
  if (text.length <= maxChars) return text;
@@ -759,7 +1034,7 @@ var LocalAXIRCompiler = class {
759
1034
  // Annotate the CommonJS export names for ESM import in node:
760
1035
  0 && (module.exports = {
761
1036
  AWIClient,
762
- AWIError,
1037
+ AXIRCompiler,
763
1038
  AdvisoryExecutor,
764
1039
  LocalAXIRCompiler
765
1040
  });