@awi-protocol/sdk 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,10 +1,3 @@
1
- var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
2
- get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
3
- }) : x)(function(x) {
4
- if (typeof require !== "undefined") return require.apply(this, arguments);
5
- throw Error('Dynamic require of "' + x + '" is not supported');
6
- });
7
-
8
1
  // src/client.ts
9
2
  import fetch from "cross-fetch";
10
3
  var AWIError = class extends Error {
@@ -209,7 +202,6 @@ var AWIClient = class {
209
202
  throw lastError || new AWIError("MAX_RETRIES", "Max retries exceeded", 502);
210
203
  }
211
204
  };
212
- var client_default = AWIClient;
213
205
 
214
206
  // src/advisory-executor.ts
215
207
  var AdvisoryExecutor = class {
@@ -440,27 +432,312 @@ var AdvisoryExecutor = class {
440
432
  }
441
433
  };
442
434
 
435
+ // src/compiler/axir-compiler.ts
436
+ import * as cheerio from "cheerio";
437
+ var AXIRCompiler = class {
438
+ $;
439
+ intent;
440
+ params;
441
+ domain;
442
+ constructor(html, options) {
443
+ this.$ = cheerio.load(html);
444
+ this.intent = options.intent;
445
+ this.params = options.params || {};
446
+ this.domain = options.domain || "unknown";
447
+ }
448
+ compile() {
449
+ const start = Date.now();
450
+ this.simplifyDOM();
451
+ const regions = this.identifyRegions();
452
+ const target = this.routeIntent(regions);
453
+ return {
454
+ workflow: this.buildWorkflow(target, regions),
455
+ intents: this.mapIntents(),
456
+ selectors: this.generateSelectors(target),
457
+ fields: this.generateFields(target),
458
+ container: target.container,
459
+ model_used: "axir-deterministic-v1",
460
+ tokens_used: 0,
461
+ compilation_time_ms: Date.now() - start
462
+ };
463
+ }
464
+ simplifyDOM() {
465
+ this.$("script, style, svg, noscript, iframe, canvas, video, audio").remove();
466
+ for (const el of this.$("div, span").toArray()) {
467
+ const $el = this.$(el);
468
+ if ($el.children().length === 0 && $el.text().trim() === "") $el.remove();
469
+ }
470
+ this.$('[style*="display:none"], [style*="display: none"], [hidden], [aria-hidden="true"]').remove();
471
+ }
472
+ identifyRegions() {
473
+ const regions = [];
474
+ for (const el of this.$('form, [role="search"], input[type="search"]').toArray()) {
475
+ const r = this.analyzeSearchRegion(this.$(el));
476
+ if (r) regions.push(r);
477
+ }
478
+ for (const el of this.$('nav, [role="navigation"], header, .nav, .navbar, .menu').toArray()) {
479
+ regions.push({ type: "navigation", element: this.$(el), confidence: 0.9 });
480
+ }
481
+ for (const el of this.$('ul, ol, [role="list"], .list, .results, .items, table, [role="grid"]').toArray()) {
482
+ const $el = this.$(el);
483
+ if ($el.find('li, tr, .item, [role="listitem"]').length > 1) {
484
+ regions.push({ type: "listing", element: $el, confidence: 0.85 });
485
+ }
486
+ }
487
+ for (const el of this.$("form").toArray()) {
488
+ const $el = this.$(el);
489
+ if (!regions.some((r) => r.element.is($el))) regions.push({ type: "form", element: $el, confidence: 0.9 });
490
+ }
491
+ for (const el of this.$(".pagination, .pager, .pages").toArray()) {
492
+ if (this.isPagination(this.$(el))) regions.push({ type: "pagination", element: this.$(el), confidence: 0.8 });
493
+ }
494
+ for (const el of this.$('article, [role="article"], .content, .main, main, .detail').toArray()) {
495
+ regions.push({ type: "detail", element: this.$(el), confidence: 0.75 });
496
+ }
497
+ return regions;
498
+ }
499
+ analyzeSearchRegion($el) {
500
+ const hasInput = $el.find('input[type="text"], input[type="search"], input:not([type])').length > 0;
501
+ const hasButton = $el.find('button, input[type="submit"]').length > 0;
502
+ if (hasInput || hasButton) return { type: "search", element: $el, confidence: hasInput && hasButton ? 0.95 : 0.7 };
503
+ return null;
504
+ }
505
+ isPagination($el) {
506
+ const text = $el.text().toLowerCase();
507
+ return /\d+/.test(text) && (/next|>|\u203a|\u2192|\u00bb/.test(text) || /prev|previous|<|\u2039|\u2190|\u00ab/.test(text));
508
+ }
509
+ routeIntent(regions) {
510
+ const intentMap = {
511
+ search: ["search", "form"],
512
+ search_jobs: ["search", "listing", "form"],
513
+ extract_list: ["listing", "search", "detail"],
514
+ extract_detail: ["detail", "listing"],
515
+ fill_form: ["form", "search"],
516
+ navigate: ["navigation", "listing"],
517
+ login: ["form"],
518
+ filter: ["search", "listing"],
519
+ sort: ["listing", "search"],
520
+ scrape: ["listing", "detail", "search"]
521
+ };
522
+ const targetTypes = intentMap[this.intent.toLowerCase()] || ["search", "listing", "form"];
523
+ let best = null;
524
+ let bestScore = 0;
525
+ for (const r of regions) {
526
+ const match = targetTypes.indexOf(r.type);
527
+ const score = match >= 0 ? (targetTypes.length - match) * r.confidence : 0;
528
+ if (score > bestScore) {
529
+ bestScore = score;
530
+ best = r;
531
+ }
532
+ }
533
+ if (!best) best = this.findLargestRegion(regions);
534
+ return { region: best, container: this.generateContainerSelector(best.element) };
535
+ }
536
+ findLargestRegion(regions) {
537
+ if (regions.length === 0) return { type: "unknown", element: this.$("body"), confidence: 0.5 };
538
+ return regions.reduce((l, c) => c.element.find("*").length > l.element.find("*").length ? c : l);
539
+ }
540
+ buildWorkflow(_target, all) {
541
+ const nodes = {};
542
+ const edges = [];
543
+ const entry = [];
544
+ const exit = [];
545
+ all.forEach((r, i) => {
546
+ const id = `${r.type}_${i}`;
547
+ const raw = r.element[0];
548
+ nodes[id] = {
549
+ node_id: id,
550
+ element_type: this.mapType(r.type),
551
+ semantic_role: r.type,
552
+ intent: this.intent,
553
+ tag: raw?.tagName?.toLowerCase(),
554
+ selector_candidates: this.buildCandidates(r.element),
555
+ confidence: r.confidence
556
+ };
557
+ if (r.type === "navigation") entry.push(id);
558
+ if (r.type === "listing" || r.type === "detail") exit.push(id);
559
+ });
560
+ all.forEach((f, fi) => all.forEach((t, ti) => {
561
+ if (fi !== ti) {
562
+ const e = this.inferEdge(f, t, fi, ti);
563
+ if (e) edges.push(e);
564
+ }
565
+ }));
566
+ if (entry.length === 0 && Object.keys(nodes).length > 0) entry.push(Object.keys(nodes)[0]);
567
+ return { nodes, edges, entry_points: entry, exit_points: exit, domain: this.domain, page_type: this.inferPageType(all) };
568
+ }
569
+ mapType(t) {
570
+ const m = {
571
+ search: "search",
572
+ navigation: "navigation",
573
+ listing: "list",
574
+ form: "form",
575
+ pagination: "pagination",
576
+ detail: "container"
577
+ };
578
+ return m[t] || "unknown";
579
+ }
580
+ inferEdge(f, t, fi, ti) {
581
+ if (f.type === "search" && t.type === "listing") return { from_node: `search_${fi}`, to_node: `listing_${ti}`, action: "submit_search", probability: 0.9 };
582
+ if (f.type === "navigation" && t.type === "search") return { from_node: `navigation_${fi}`, to_node: `search_${ti}`, action: "navigate_to_search", probability: 0.7 };
583
+ if (f.type === "listing" && t.type === "pagination") return { from_node: `listing_${fi}`, to_node: `pagination_${ti}`, action: "next_page", probability: 0.8 };
584
+ if (f.type === "pagination" && t.type === "listing") return { from_node: `pagination_${fi}`, to_node: `listing_${ti}`, action: "load_results", probability: 0.95 };
585
+ return null;
586
+ }
587
+ inferPageType(regions) {
588
+ const t = regions.map((r) => r.type);
589
+ if (t.includes("search") && t.includes("listing")) return "search";
590
+ if (t.includes("listing")) return "listing";
591
+ if (t.includes("form")) return "form";
592
+ if (t.includes("search")) return "search";
593
+ if (t.includes("navigation")) return "landing";
594
+ return "unknown";
595
+ }
596
+ generateSelectors(target) {
597
+ const s = {};
598
+ const $el = target.region.element;
599
+ s.container = this.buildCandidates($el);
600
+ for (const el of $el.find("input, textarea, select").toArray()) {
601
+ const n = this.inferFieldName(this.$(el));
602
+ if (n) s[n] = this.buildCandidates(this.$(el));
603
+ }
604
+ for (const el of $el.find('button, input[type="submit"], input[type="button"]').toArray()) {
605
+ const $btn = this.$(el);
606
+ const label = $btn.text().trim() || String($btn.val() || "button");
607
+ s[`btn_${this.slugify(label)}`] = this.buildCandidates($btn);
608
+ }
609
+ for (const el of $el.find("a").toArray()) {
610
+ const $a = this.$(el);
611
+ const t = $a.text().trim();
612
+ if (t && t.length < 50) s[`link_${this.slugify(t)}`] = this.buildCandidates($a);
613
+ }
614
+ return s;
615
+ }
616
+ buildCandidates($el) {
617
+ const c = [];
618
+ const el = $el[0];
619
+ if (!el) return c;
620
+ const id = $el.attr("id");
621
+ if (id && !id.match(/^\d/)) c.push({ type: "css", value: `#${this.escape(id)}`, priority: 1, confidence: 0.99 });
622
+ const classes = ($el.attr("class") || "").split(/\s+/).filter((x) => x && !x.match(/^js-|^ng-|^vue-|^data-/));
623
+ if (classes.length) c.push({ type: "css", value: `.${classes.map((x) => this.escape(x)).join(".")}`, priority: 2, confidence: 0.85 });
624
+ const raw = el;
625
+ const tag = raw.tagName?.toLowerCase() || "";
626
+ const name = $el.attr("name");
627
+ const type = $el.attr("type");
628
+ const placeholder = $el.attr("placeholder");
629
+ if (name) c.push({ type: "css", value: `${tag}[name="${this.q(name)}"]`, priority: 3, confidence: 0.9 });
630
+ if (type) c.push({ type: "css", value: `${tag}[type="${type}"]`, priority: 4, confidence: 0.8 });
631
+ if (placeholder) c.push({ type: "css", value: `${tag}[placeholder="${this.q(placeholder)}"]`, priority: 5, confidence: 0.75 });
632
+ const role = $el.attr("role");
633
+ if (role) c.push({ type: "semantic", value: `[role="${role}"]`, priority: 6, confidence: 0.9 });
634
+ const al = $el.attr("aria-label");
635
+ if (al) c.push({ type: "semantic", value: `[aria-label="${this.q(al)}"]`, priority: 7, confidence: 0.85 });
636
+ const text = $el.text().trim();
637
+ if (text && text.length < 100) c.push({ type: "text", value: text, priority: 8, confidence: 0.7 });
638
+ return c;
639
+ }
640
+ generateFields(target) {
641
+ const f = [];
642
+ for (const el of target.region.element.find("input, textarea, select").toArray()) {
643
+ const $el = this.$(el);
644
+ const name = this.inferFieldName($el);
645
+ if (!name) continue;
646
+ f.push({
647
+ name,
648
+ selector: this.bestSelector($el),
649
+ transform: this.inferTransform($el),
650
+ required: $el.attr("required") !== void 0
651
+ });
652
+ }
653
+ return f;
654
+ }
655
+ inferFieldName($el) {
656
+ const id = $el.attr("id");
657
+ if (id) {
658
+ const $l = this.$(`label[for="${id}"]`);
659
+ if ($l.length) return this.slugify($l.text());
660
+ }
661
+ const ph = $el.attr("placeholder");
662
+ if (ph) return this.slugify(ph);
663
+ const al = $el.attr("aria-label");
664
+ if (al) return this.slugify(al);
665
+ const n = $el.attr("name");
666
+ if (n) return this.slugify(n);
667
+ return null;
668
+ }
669
+ inferTransform($el) {
670
+ const t = $el.attr("type");
671
+ if (t === "number") return "number";
672
+ if (t === "email") return "email";
673
+ if (t === "date") return "date";
674
+ if (t === "checkbox") return "boolean";
675
+ if ($el.is("select")) return "select";
676
+ return void 0;
677
+ }
678
+ bestSelector($el) {
679
+ const c = this.buildCandidates($el);
680
+ if (c.length) return c[0].value;
681
+ const raw = $el[0];
682
+ return raw?.tagName?.toLowerCase() || "*";
683
+ }
684
+ generateContainerSelector($el) {
685
+ const c = this.buildCandidates($el);
686
+ return c.length ? c[0].value : "body";
687
+ }
688
+ mapIntents() {
689
+ const m = {
690
+ search: { intent: "search", action: "fill_and_submit", parameters: ["query", "location", "filters"], context: "Enter search terms and submit form" },
691
+ search_jobs: { intent: "search_jobs", action: "fill_and_submit", parameters: ["query", "location", "experience_level", "job_type"], context: "Search for job listings with optional filters" },
692
+ extract_list: { intent: "extract_list", action: "extract_fields", parameters: ["items", "title", "url", "metadata"], context: "Extract structured data from list items" },
693
+ extract_detail: { intent: "extract_detail", action: "extract_fields", parameters: ["title", "description", "metadata", "links"], context: "Extract structured data from detail page" },
694
+ fill_form: { intent: "fill_form", action: "fill_and_submit", parameters: Object.keys(this.params), context: "Fill form fields with provided parameters" },
695
+ navigate: { intent: "navigate", action: "click", parameters: ["target_url", "link_text"], context: "Click navigation link to target page" },
696
+ login: { intent: "login", action: "fill_and_submit", parameters: ["username", "password"], context: "Enter credentials and submit login form" },
697
+ scrape: { intent: "scrape", action: "extract_fields", parameters: ["all_visible_text", "links", "images", "structured_data"], context: "Extract all visible content from the page" }
698
+ };
699
+ const mapped = m[this.intent.toLowerCase()];
700
+ if (mapped) return [mapped];
701
+ return [{ intent: this.intent, action: "interact", parameters: Object.keys(this.params), context: `Perform ${this.intent} on the page` }];
702
+ }
703
+ slugify(t) {
704
+ return t.toLowerCase().replace(/[^\w\s-]/g, "").replace(/[\s_-]+/g, "_").replace(/^_|_$/g, "").substring(0, 50);
705
+ }
706
+ escape(s) {
707
+ return s.replace(/([:.])/g, "\\$1");
708
+ }
709
+ q(s) {
710
+ return s.replace(/"/g, '\\"');
711
+ }
712
+ };
713
+
443
714
  // src/compiler/local-axir.ts
444
715
  import * as fs from "fs";
445
716
  import * as path from "path";
446
717
  import * as os from "os";
447
718
  import * as https from "https";
448
719
  import * as http from "http";
449
-
450
- // src/compiler/grammar/axir-schema.gbnf
451
- var axir_schema_default = 'root ::= "{" ws axir-fields ws "}"\n\naxir-fields ::=\n "\\"workflow\\"" ":" workflow ws ","\n ws "\\"intents\\"" ":" intents ws ","\n ws "\\"selectors\\"" ":" selectors ws ","\n ws "\\"fields\\"" ":" fields\n ["," ws "\\"container\\"" ":" string]\n ["," ws "\\"model_used\\"" ":" string]\n ["," ws "\\"tokens_used\\"" ":" number]\n ["," ws "\\"compilation_time_ms\\"" ":" number]\n\nworkflow ::=\n "{" ws\n "\\"nodes\\"" ":" "{" ws node-list ws "}" ws ","\n ws "\\"edges\\"" ":" "[" ws edge-list ws "]" ws ","\n ws "\\"entry_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"exit_points\\"" ":" "[" ws string-list ws "]" ws ","\n ws "\\"domain\\"" ":" string ws ","\n ws "\\"page_type\\"" ":" page-type\n ["," ws "\\"structure_hash\\"" ":" string]\n ws "}"\n\nnode-list ::= [node-pair ("," ws node-pair)*]\nnode-pair ::= string ":" "{" ws\n "\\"element_type\\"" ":" element-type ws ","\n ws "\\"semantic_role\\"" ":" string ws ","\n ws "\\"intent\\"" ":" string ws ","\n ws "\\"tag\\"" ":" string ws ","\n ws "\\"selector_candidates\\"" ":" "[" ws selector-list ws "]"\n ["," ws "\\"parent_id\\"" ":" string]\n ["," ws "\\"children_ids\\"" ":" "[" ws string-list ws "]"]\n ["," ws "\\"aria_label\\"" ":" string]\n ["," ws "\\"aria_role\\"" ":" string]\n ["," ws "\\"text_content\\"" ":" string]\n "," ws "\\"confidence\\"" ":" number\n ["," ws "\\"reasoning\\"" ":" string]\nws "}"\n\nelement-type ::=\n "\\"button\\"" | "\\"link\\"" | "\\"input\\"" | "\\"form\\"" |\n "\\"navigation\\"" | "\\"search\\"" | "\\"filter\\"" | "\\"sort\\"" |\n "\\"pagination\\"" | "\\"container\\"" | "\\"list\\"" | "\\"item\\"" |\n "\\"heading\\"" | "\\"text\\"" | "\\"image\\"" | "\\"unknown\\""\n\npage-type ::=\n "\\"landing\\"" | "\\"search\\"" | "\\"listing\\"" | "\\"detail\\"" |\n "\\"form\\"" | "\\"checkout\\"" | "\\"dashboard\\"" | "\\"unknown\\""\n\nedge-list ::= [edge ("," ws edge)*]\nedge ::= "{" ws\n "\\"from_node\\"" ":" string ws ","\n ws "\\"to_node\\"" ":" string ws ","\n ws "\\"action\\"" ":" string\n ["," ws "\\"condition\\"" ":" string]\n ["," ws "\\"probability\\"" ":" number]\nws "}"\n\nselector-list ::= [selector ("," ws selector)*]\nselector ::= "{" ws\n "\\"type\\"" ":" selector-type ws ","\n ws "\\"value\\"" ":" string ws ","\n ws "\\"priority\\"" ":" number\n ["," ws "\\"confidence\\"" ":" number]\nws "}"\n\nselector-type ::= "\\"css\\"" | "\\"semantic\\"" | "\\"text\\"" | "\\"attribute\\""\n\nintents ::= "[" ws [intent ("," ws intent)*] ws "]"\nintent ::= "{" ws\n "\\"intent\\"" ":" string ws ","\n ws "\\"action\\"" ":" string ws ","\n ws "\\"parameters\\"" ":" "[" ws [string ("," ws string)*] ws "]" ws ","\n ws "\\"context\\"" ":" string\nws "}"\n\nfields ::= "[" ws [field ("," ws field)*] ws "]"\nfield ::= "{" ws\n "\\"name\\"" ":" string ws ","\n ws "\\"selector\\"" ":" string\n ["," ws "\\"transform\\"" ":" string]\n ["," ws "\\"required\\"" ":" boolean]\nws "}"\n\nselectors ::= "{" ws [selector-pair ("," ws selector-pair)*] ws "}"\nselector-pair ::= string ":" "[" ws selector-list ws "]"\n\nstring-list ::= [string ("," ws string)*]\n\nstring ::= "\\"" char* "\\""\nchar ::= [^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F]{4})\n\nnumber ::= ["-"]? ("0" | [1-9] [0-9]*) ([.] [0-9]+)? ([eE] ["-"]? [0-9]+)?\n\nboolean ::= "true" | "false"\n\nws ::= [ \\t\\n\\r]*\n';
452
-
453
- // src/compiler/local-axir.ts
720
+ import { URL } from "url";
454
721
  var nativeAvailable = false;
455
722
  var getLlama;
456
- try {
457
- const llama = __require("node-llama-cpp");
458
- getLlama = llama.getLlama;
723
+ var LlamaModel;
724
+ var LlamaContext;
725
+ var LlamaGrammar;
726
+ var llamaPromise = import("node-llama-cpp").then((m) => {
459
727
  nativeAvailable = true;
460
- } catch {
728
+ getLlama = m.getLlama;
729
+ LlamaModel = m.LlamaModel;
730
+ LlamaContext = m.LlamaContext;
731
+ LlamaGrammar = m.LlamaGrammar;
732
+ return m;
733
+ }).catch((err) => {
461
734
  nativeAvailable = false;
462
- }
463
- var DEFAULT_MODEL_URL = "https://huggingface.co/bartowski/Phi-3-mini-128k-instruct-GGUF/resolve/main/Phi-3-mini-128k-instruct-Q4_K_M.gguf";
735
+ if (process.env.AWI_DEBUG) {
736
+ console.error("[AWI] node-llama-cpp load error:", err.message);
737
+ }
738
+ return null;
739
+ });
740
+ var DEFAULT_MODEL_URL = "https://github.com/RayAKaan/AWI/releases/download/v0.0.0-models/Phi-3-mini-128k-instruct-Q4_K_M.gguf";
464
741
  var DEFAULT_MODEL_FILENAME = "phi3-128k-q4.gguf";
465
742
  var LocalAXIRCompiler = class {
466
743
  modelPath;
@@ -487,14 +764,10 @@ var LocalAXIRCompiler = class {
487
764
  this.onDownloadProgress = options.onDownloadProgress;
488
765
  this.onStatus = options.onStatus;
489
766
  }
490
- // -------------------------------------------------------------------------
491
- // Public API
492
- // -------------------------------------------------------------------------
493
767
  async compile(domHTML, a11yTree, intent, params) {
494
768
  await this._ensureModel();
495
769
  await this._ensureGrammar();
496
- const a11y = a11yTree ?? "No accessibility tree available.";
497
- const prompt = this._buildCompilePrompt(domHTML, a11y, intent, params);
770
+ const prompt = this._buildCompilePrompt(domHTML, a11yTree, intent, params);
498
771
  const start = Date.now();
499
772
  this._status("Compiling AXIR locally...");
500
773
  const resultText = await this._complete(prompt, 4096, 0.3);
@@ -537,22 +810,26 @@ var LocalAXIRCompiler = class {
537
810
  this.ready = false;
538
811
  }
539
812
  }
540
- // -------------------------------------------------------------------------
541
- // Model lifecycle
542
- // -------------------------------------------------------------------------
543
813
  async _ensureModel() {
544
814
  if (this.ready) return;
815
+ await llamaPromise;
816
+ if (!getLlama) {
817
+ throw new Error("node-llama-cpp failed to load. Is it installed?");
818
+ }
545
819
  if (!fs.existsSync(this.modelPath)) {
546
820
  await this._downloadModel();
547
821
  }
548
822
  this._status("Loading local model...");
549
823
  const llama = await getLlama();
550
824
  const gpuLayers = this.gpuLayers ?? this._autoDetectGPULayers();
551
- this.model = new llama.LlamaModel({
825
+ this.model = new LlamaModel({
826
+ llama,
552
827
  modelPath: this.modelPath,
553
828
  gpuLayers
554
829
  });
555
- this.context = await this.model.createContext({
830
+ this.context = new LlamaContext({
831
+ llama,
832
+ model: this.model,
556
833
  contextSize: this.contextSize
557
834
  });
558
835
  this.ready = true;
@@ -560,9 +837,10 @@ var LocalAXIRCompiler = class {
560
837
  }
561
838
  async _ensureGrammar() {
562
839
  if (this.grammar) return;
563
- const llama = await getLlama();
564
- this.grammar = new llama.LlamaGrammar({
565
- grammar: axir_schema_default
840
+ const grammarPath = path.join(__dirname, "grammar", "axir-schema.gbnf");
841
+ this.grammar = new LlamaGrammar({
842
+ llama: await getLlama(),
843
+ grammar: fs.readFileSync(grammarPath, "utf-8")
566
844
  });
567
845
  }
568
846
  _autoDetectGPULayers() {
@@ -571,17 +849,14 @@ var LocalAXIRCompiler = class {
571
849
  }
572
850
  return 0;
573
851
  }
574
- // -------------------------------------------------------------------------
575
- // Resumable model download
576
- // -------------------------------------------------------------------------
577
852
  async _downloadModel() {
578
853
  const dir = path.dirname(this.modelPath);
579
854
  if (!fs.existsSync(dir)) {
580
855
  fs.mkdirSync(dir, { recursive: true });
581
856
  }
582
857
  const tempPath = `${this.modelPath}.tmp`;
583
- const urlObj = new URL(this.modelUrl);
584
- const protocol = urlObj.protocol === "https:" ? https : http;
858
+ const url = new URL(this.modelUrl);
859
+ const protocol = url.protocol === "https:" ? https : http;
585
860
  let startByte = 0;
586
861
  if (fs.existsSync(tempPath)) {
587
862
  startByte = fs.statSync(tempPath).size;
@@ -597,10 +872,10 @@ var LocalAXIRCompiler = class {
597
872
  headers["Range"] = `bytes=${startByte}-`;
598
873
  }
599
874
  const request = protocol.get(
600
- urlObj,
875
+ url,
601
876
  { headers },
602
877
  (response) => {
603
- if (response.statusCode === 301 || response.statusCode === 302) {
878
+ if (response.statusCode === 302 || response.statusCode === 301) {
604
879
  if (response.headers.location) {
605
880
  this.modelUrl = response.headers.location;
606
881
  return this._downloadModel().then(resolve).catch(reject);
@@ -608,7 +883,7 @@ var LocalAXIRCompiler = class {
608
883
  }
609
884
  if (response.statusCode !== 200 && response.statusCode !== 206) {
610
885
  return reject(
611
- new Error(`Model download failed: HTTP ${response.statusCode}`)
886
+ new Error(`Download failed: HTTP ${response.statusCode}`)
612
887
  );
613
888
  }
614
889
  const total = parseInt(
@@ -642,76 +917,69 @@ var LocalAXIRCompiler = class {
642
917
  });
643
918
  });
644
919
  }
645
- // -------------------------------------------------------------------------
646
- // Inference
647
- // -------------------------------------------------------------------------
648
920
  async _complete(prompt, maxTokens, temperature) {
649
921
  if (!this.context) throw new Error("Model not loaded");
650
922
  const sequence = this.context.getSequence();
651
923
  await sequence.evaluate(prompt);
652
- const response = await sequence.generateResponse(maxTokens, {
924
+ const result = await sequence.generateResponse(maxTokens, {
653
925
  temperature,
654
926
  grammar: this.grammar
655
927
  });
656
928
  let text = "";
657
- for await (const token of response) {
929
+ for await (const token of result) {
658
930
  text += token;
659
931
  }
660
932
  return text;
661
933
  }
662
- // -------------------------------------------------------------------------
663
- // Prompt builders
664
- // -------------------------------------------------------------------------
665
934
  _buildCompilePrompt(domHTML, a11yTree, intent, params) {
666
935
  const paramsJson = params ? JSON.stringify(params, null, 2) : "{}";
667
- return [
668
- `<|system|>`,
669
- `You are an expert web-scraping analyst. Your job is to read a simplified DOM and accessibility tree, then output a structured JSON object describing the page layout, interactive elements, and data extraction plan.`,
670
- ``,
671
- `Output MUST be valid JSON matching this schema:`,
672
- `- workflow.nodes: map of node_id -> {element_type, semantic_role, intent, tag, selector_candidates[], parent_id?, children_ids?, aria_label?, aria_role?, text_content?, confidence, reasoning?}`,
673
- `- workflow.edges: list of {from_node, to_node, action, condition?, probability}`,
674
- `- workflow.entry_points: list of starting node_ids`,
675
- `- workflow.exit_points: list of terminal node_ids`,
676
- `- workflow.domain: the domain name`,
677
- `- workflow.page_type: one of landing|search|listing|detail|form|checkout|dashboard|unknown`,
678
- `- intents: list of {intent, action, parameters[], context}`,
679
- `- selectors: map of selector_name -> list of {type, value, priority}`,
680
- `- fields: list of {name, selector, transform?, required}`,
681
- `- container?: string (optional container selector name)`,
682
- ``,
683
- `Element types: button, link, input, form, navigation, search, filter, sort, pagination, container, list, item, heading, text, image, unknown.`,
684
- `Selector types: css, semantic, text, attribute.`,
685
- `<|user|>`,
686
- `Intent: ${intent}`,
687
- `Parameters: ${paramsJson}`,
688
- ``,
689
- `Simplified DOM:`,
690
- `${this._truncate(domHTML, 4e4)}`,
691
- ``,
692
- `Accessibility Tree:`,
693
- `${this._truncate(a11yTree, 8e3)}`,
694
- ``,
695
- `Compile AXIR:`,
696
- `<|assistant|>`
697
- ].join("\n");
936
+ const a11y = a11yTree || "No accessibility tree available.";
937
+ return `|<|system|>
938
+ You are an expert web-scraping analyst. Your job is to read a simplified DOM and accessibility tree, then output a structured JSON object describing the page layout, interactive elements, and data extraction plan.
939
+
940
+ Output MUST be valid JSON matching this schema:
941
+ - workflow.nodes: map of node_id -> {element_type, semantic_role, intent, tag, selector_candidates[], parent_id?, children_ids?, aria_label?, aria_role?, text_content?, confidence, reasoning?}
942
+ - workflow.edges: list of {from_node, to_node, action, condition?, probability}
943
+ - workflow.entry_points: list of starting node_ids
944
+ - workflow.exit_points: list of terminal node_ids
945
+ - workflow.domain: the domain name
946
+ - workflow.page_type: one of landing|search|listing|detail|form|checkout|dashboard|unknown
947
+ - intents: list of {intent, action, parameters[], context}
948
+ - selectors: map of selector_name -> list of {type, value, priority}
949
+ - fields: list of {name, selector, transform?, required}
950
+ - container?: string (optional container selector name)
951
+
952
+ Element types: button, link, input, form, navigation, search, filter, sort, pagination, container, list, item, heading, text, image, unknown.
953
+ Selector types: css, semantic, text, attribute.
954
+ |<|user|>
955
+ Intent: ${intent}
956
+ Parameters: ${paramsJson}
957
+
958
+ Simplified DOM:
959
+ ${this._truncate(domHTML, 4e4)}
960
+
961
+ Accessibility Tree:
962
+ ${this._truncate(a11y, 8e3)}
963
+
964
+ Compile AXIR:
965
+ |<|assistant|>
966
+ `;
698
967
  }
699
968
  _buildHealPrompt(domHTML, brokenSelector, semanticIntent) {
700
- return [
701
- `<|system|>`,
702
- `You are a CSS selector repair tool. Given a broken selector and the current DOM, output the new CSS selector that targets the same semantic element.`,
703
- ``,
704
- `Output JSON: {"selector": "...", "confidence": 0.0-1.0, "reasoning": "..."}`,
705
- `<|user|>`,
706
- `Broken selector: ${brokenSelector}`,
707
- `Semantic intent: ${semanticIntent}`,
708
- ``,
709
- `Current DOM (truncated):`,
710
- `${this._truncate(domHTML, 2e4)}`,
711
- ``,
712
- `New selector:`,
713
- `<|assistant|>`
714
- ].join("\n");
969
+ return `|<|system|>
970
+ You are a CSS selector repair tool. Given a broken selector and the current DOM, output the new CSS selector that targets the same semantic element.
971
+
972
+ Output JSON: {"selector": "...", "confidence": 0.0-1.0, "reasoning": "..."}
973
+ |<|user|>
974
+ Broken selector: ${brokenSelector}
975
+ Semantic intent: ${semanticIntent}
976
+
977
+ Current DOM (truncated):
978
+ ${this._truncate(domHTML, 2e4)}
979
+
980
+ New selector:
981
+ |<|assistant|>
982
+ `;
715
983
  }
716
984
  _truncate(text, maxChars) {
717
985
  if (text.length <= maxChars) return text;
@@ -726,9 +994,8 @@ var LocalAXIRCompiler = class {
726
994
  };
727
995
  export {
728
996
  AWIClient,
729
- AWIError,
997
+ AXIRCompiler,
730
998
  AdvisoryExecutor,
731
- LocalAXIRCompiler,
732
- client_default as default
999
+ LocalAXIRCompiler
733
1000
  };
734
1001
  //# sourceMappingURL=index.mjs.map