@modelrelay/sdk 1.28.0 → 1.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/node.js CHANGED
@@ -1,8 +1,9 @@
1
1
  import {
2
2
  PathEscapeError,
3
3
  ToolArgumentError,
4
- ToolRegistry
5
- } from "./chunk-G5H7EY4F.js";
4
+ ToolRegistry,
5
+ ToolTypes
6
+ } from "./chunk-CV3DTA6P.js";
6
7
 
7
8
  // src/tools_local_fs.ts
8
9
  import { promises as fs } from "fs";
@@ -601,11 +602,589 @@ function createLocalFSToolPack(options) {
601
602
  function createLocalFSTools(options) {
602
603
  return createLocalFSToolPack(options).toRegistry();
603
604
  }
605
+
606
+ // src/tools_browser.ts
607
+ var BrowserToolNames = {
608
+ /** Navigate to a URL and return accessibility tree */
609
+ NAVIGATE: "browser.navigate",
610
+ /** Click an element by accessible name/role */
611
+ CLICK: "browser.click",
612
+ /** Type text into an input field */
613
+ TYPE: "browser.type",
614
+ /** Get current accessibility tree */
615
+ SNAPSHOT: "browser.snapshot",
616
+ /** Scroll the page */
617
+ SCROLL: "browser.scroll",
618
+ /** Capture a screenshot */
619
+ SCREENSHOT: "browser.screenshot",
620
+ /** Extract data using CSS selectors */
621
+ EXTRACT: "browser.extract"
622
+ };
623
+ var BrowserDefaults = {
624
+ /** Navigation timeout in milliseconds */
625
+ NAVIGATION_TIMEOUT_MS: 3e4,
626
+ /** Action timeout in milliseconds */
627
+ ACTION_TIMEOUT_MS: 5e3,
628
+ /** Maximum nodes to include in accessibility tree output */
629
+ MAX_SNAPSHOT_NODES: 500,
630
+ /** Maximum screenshot size in bytes */
631
+ MAX_SCREENSHOT_BYTES: 5e6
632
+ };
633
+ var BrowserToolPack = class {
634
+ constructor(options = {}) {
635
+ this.browser = null;
636
+ this.context = null;
637
+ this.page = null;
638
+ this.cdpSession = null;
639
+ this.ownsBrowser = false;
640
+ this.ownsContext = false;
641
+ this.cfg = {
642
+ allowedDomains: options.allowedDomains ?? [],
643
+ blockedDomains: options.blockedDomains ?? [],
644
+ navigationTimeoutMs: options.navigationTimeoutMs ?? BrowserDefaults.NAVIGATION_TIMEOUT_MS,
645
+ actionTimeoutMs: options.actionTimeoutMs ?? BrowserDefaults.ACTION_TIMEOUT_MS,
646
+ maxSnapshotNodes: options.maxSnapshotNodes ?? BrowserDefaults.MAX_SNAPSHOT_NODES,
647
+ headless: options.headless ?? true
648
+ };
649
+ if (options.browser) {
650
+ this.browser = options.browser;
651
+ this.ownsBrowser = false;
652
+ }
653
+ if (options.context) {
654
+ this.context = options.context;
655
+ this.ownsContext = false;
656
+ }
657
+ }
658
+ /**
659
+ * Initialize the browser. Must be called before using any tools.
660
+ */
661
+ async initialize() {
662
+ if (this.page) {
663
+ return;
664
+ }
665
+ const { chromium } = await import("playwright");
666
+ if (!this.browser) {
667
+ this.browser = await chromium.launch({
668
+ headless: this.cfg.headless
669
+ });
670
+ this.ownsBrowser = true;
671
+ }
672
+ if (!this.context) {
673
+ this.context = await this.browser.newContext();
674
+ this.ownsContext = true;
675
+ }
676
+ this.page = await this.context.newPage();
677
+ }
678
+ /**
679
+ * Close the browser and clean up resources.
680
+ */
681
+ async close() {
682
+ if (this.cdpSession) {
683
+ await this.cdpSession.detach().catch(() => {
684
+ });
685
+ this.cdpSession = null;
686
+ }
687
+ if (this.page) {
688
+ await this.page.close().catch(() => {
689
+ });
690
+ this.page = null;
691
+ }
692
+ if (this.ownsContext && this.context) {
693
+ await this.context.close().catch(() => {
694
+ });
695
+ this.context = null;
696
+ }
697
+ if (this.ownsBrowser && this.browser) {
698
+ await this.browser.close().catch(() => {
699
+ });
700
+ this.browser = null;
701
+ }
702
+ }
703
+ /**
704
+ * Get tool definitions for use with LLM APIs.
705
+ */
706
+ getToolDefinitions() {
707
+ return [
708
+ {
709
+ type: ToolTypes.Function,
710
+ function: {
711
+ name: BrowserToolNames.NAVIGATE,
712
+ description: "Navigate to a URL and return the page's accessibility tree. The tree shows interactive elements (buttons, links, inputs) with their accessible names.",
713
+ parameters: {
714
+ type: "object",
715
+ properties: {
716
+ url: {
717
+ type: "string",
718
+ description: "The URL to navigate to (must be http/https)"
719
+ },
720
+ waitUntil: {
721
+ type: "string",
722
+ enum: ["load", "domcontentloaded", "networkidle"],
723
+ description: "When to consider navigation complete. Default: domcontentloaded"
724
+ }
725
+ },
726
+ required: ["url"]
727
+ }
728
+ }
729
+ },
730
+ {
731
+ type: ToolTypes.Function,
732
+ function: {
733
+ name: BrowserToolNames.CLICK,
734
+ description: "Click an element by its accessible name. Returns updated accessibility tree.",
735
+ parameters: {
736
+ type: "object",
737
+ properties: {
738
+ name: {
739
+ type: "string",
740
+ description: "The accessible name of the element (from button text, aria-label, etc.)"
741
+ },
742
+ role: {
743
+ type: "string",
744
+ enum: [
745
+ "button",
746
+ "link",
747
+ "menuitem",
748
+ "checkbox",
749
+ "radio",
750
+ "tab"
751
+ ],
752
+ description: "ARIA role to match. If omitted, searches buttons, links, and menuitems."
753
+ }
754
+ },
755
+ required: ["name"]
756
+ }
757
+ }
758
+ },
759
+ {
760
+ type: ToolTypes.Function,
761
+ function: {
762
+ name: BrowserToolNames.TYPE,
763
+ description: "Type text into an input field identified by accessible name.",
764
+ parameters: {
765
+ type: "object",
766
+ properties: {
767
+ name: {
768
+ type: "string",
769
+ description: "The accessible name of the input (from label, aria-label, placeholder)"
770
+ },
771
+ text: {
772
+ type: "string",
773
+ description: "The text to type"
774
+ },
775
+ role: {
776
+ type: "string",
777
+ enum: ["textbox", "searchbox", "combobox"],
778
+ description: "ARIA role. Default: textbox"
779
+ }
780
+ },
781
+ required: ["name", "text"]
782
+ }
783
+ }
784
+ },
785
+ {
786
+ type: ToolTypes.Function,
787
+ function: {
788
+ name: BrowserToolNames.SNAPSHOT,
789
+ description: "Get the current page's accessibility tree without navigating.",
790
+ parameters: {
791
+ type: "object",
792
+ properties: {}
793
+ }
794
+ }
795
+ },
796
+ {
797
+ type: ToolTypes.Function,
798
+ function: {
799
+ name: BrowserToolNames.SCROLL,
800
+ description: "Scroll the page in a given direction.",
801
+ parameters: {
802
+ type: "object",
803
+ properties: {
804
+ direction: {
805
+ type: "string",
806
+ enum: ["up", "down"],
807
+ description: "Scroll direction"
808
+ },
809
+ amount: {
810
+ type: "string",
811
+ enum: ["page", "half", "toTop", "toBottom"],
812
+ description: "How much to scroll. Default: page"
813
+ }
814
+ },
815
+ required: ["direction"]
816
+ }
817
+ }
818
+ },
819
+ {
820
+ type: ToolTypes.Function,
821
+ function: {
822
+ name: BrowserToolNames.SCREENSHOT,
823
+ description: "Capture a PNG screenshot of the current page. Use sparingly - prefer accessibility tree for decisions.",
824
+ parameters: {
825
+ type: "object",
826
+ properties: {
827
+ fullPage: {
828
+ type: "boolean",
829
+ description: "Capture full scrollable page. Default: false (viewport only)"
830
+ }
831
+ }
832
+ }
833
+ }
834
+ },
835
+ {
836
+ type: ToolTypes.Function,
837
+ function: {
838
+ name: BrowserToolNames.EXTRACT,
839
+ description: "Extract structured data from the page using CSS selectors.",
840
+ parameters: {
841
+ type: "object",
842
+ properties: {
843
+ selector: {
844
+ type: "string",
845
+ description: "CSS selector for elements to extract"
846
+ },
847
+ attribute: {
848
+ type: "string",
849
+ description: "Attribute to extract (textContent, href, src, etc.). Default: textContent"
850
+ },
851
+ multiple: {
852
+ type: "boolean",
853
+ description: "Return all matches as JSON array. Default: false (first match only)"
854
+ }
855
+ },
856
+ required: ["selector"]
857
+ }
858
+ }
859
+ }
860
+ ];
861
+ }
862
+ /**
863
+ * Register tool handlers into an existing registry.
864
+ */
865
+ registerInto(registry) {
866
+ registry.register(BrowserToolNames.NAVIGATE, this.navigate.bind(this));
867
+ registry.register(BrowserToolNames.CLICK, this.click.bind(this));
868
+ registry.register(BrowserToolNames.TYPE, this.type.bind(this));
869
+ registry.register(BrowserToolNames.SNAPSHOT, this.snapshot.bind(this));
870
+ registry.register(BrowserToolNames.SCROLL, this.scroll.bind(this));
871
+ registry.register(BrowserToolNames.SCREENSHOT, this.screenshot.bind(this));
872
+ registry.register(BrowserToolNames.EXTRACT, this.extract.bind(this));
873
+ return registry;
874
+ }
875
+ /**
876
+ * Create a new registry with just this pack's tools.
877
+ */
878
+ toRegistry() {
879
+ return this.registerInto(new ToolRegistry());
880
+ }
881
+ // ========================================================================
882
+ // Private: Helpers
883
+ // ========================================================================
884
+ ensureInitialized() {
885
+ if (!this.page) {
886
+ throw new Error(
887
+ "BrowserToolPack not initialized. Call initialize() first."
888
+ );
889
+ }
890
+ }
891
+ parseArgs(call, required) {
892
+ const func = call.function;
893
+ if (!func) {
894
+ throw new ToolArgumentError({
895
+ message: "tool call missing function",
896
+ toolCallId: call.id,
897
+ toolName: "",
898
+ rawArguments: ""
899
+ });
900
+ }
901
+ const rawArgs = func.arguments || "{}";
902
+ let parsed;
903
+ try {
904
+ parsed = JSON.parse(rawArgs);
905
+ } catch (err) {
906
+ throw new ToolArgumentError({
907
+ message: `invalid JSON arguments: ${err.message}`,
908
+ toolCallId: call.id,
909
+ toolName: func.name,
910
+ rawArguments: rawArgs
911
+ });
912
+ }
913
+ if (typeof parsed !== "object" || parsed === null) {
914
+ throw new ToolArgumentError({
915
+ message: "arguments must be an object",
916
+ toolCallId: call.id,
917
+ toolName: func.name,
918
+ rawArguments: rawArgs
919
+ });
920
+ }
921
+ const args = parsed;
922
+ for (const key of required) {
923
+ const value = args[key];
924
+ if (value === void 0 || value === null || value === "") {
925
+ throw new ToolArgumentError({
926
+ message: `${key} is required`,
927
+ toolCallId: call.id,
928
+ toolName: func.name,
929
+ rawArguments: rawArgs
930
+ });
931
+ }
932
+ }
933
+ return args;
934
+ }
935
+ validateUrl(url, call) {
936
+ let parsed;
937
+ try {
938
+ parsed = new URL(url);
939
+ } catch {
940
+ throw new ToolArgumentError({
941
+ message: `Invalid URL: ${url}`,
942
+ toolCallId: call.id,
943
+ toolName: call.function?.name ?? "",
944
+ rawArguments: call.function?.arguments ?? ""
945
+ });
946
+ }
947
+ if (!["http:", "https:"].includes(parsed.protocol)) {
948
+ throw new ToolArgumentError({
949
+ message: `Invalid protocol: ${parsed.protocol}. Only http/https allowed.`,
950
+ toolCallId: call.id,
951
+ toolName: call.function?.name ?? "",
952
+ rawArguments: call.function?.arguments ?? ""
953
+ });
954
+ }
955
+ const domain = parsed.hostname;
956
+ if (this.cfg.blockedDomains.some((d) => domain.endsWith(d))) {
957
+ throw new ToolArgumentError({
958
+ message: `Domain blocked: ${domain}`,
959
+ toolCallId: call.id,
960
+ toolName: call.function?.name ?? "",
961
+ rawArguments: call.function?.arguments ?? ""
962
+ });
963
+ }
964
+ if (this.cfg.allowedDomains.length > 0) {
965
+ if (!this.cfg.allowedDomains.some((d) => domain.endsWith(d))) {
966
+ throw new ToolArgumentError({
967
+ message: `Domain not in allowlist: ${domain}`,
968
+ toolCallId: call.id,
969
+ toolName: call.function?.name ?? "",
970
+ rawArguments: call.function?.arguments ?? ""
971
+ });
972
+ }
973
+ }
974
+ }
975
+ /**
976
+ * Validates the current page URL against allowlist/blocklist.
977
+ * Called after navigation and before any action to catch redirects
978
+ * and in-session navigation to blocked domains.
979
+ */
980
+ ensureCurrentUrlAllowed() {
981
+ if (!this.page) return;
982
+ const currentUrl = this.page.url();
983
+ if (currentUrl === "about:blank") return;
984
+ let parsed;
985
+ try {
986
+ parsed = new URL(currentUrl);
987
+ } catch {
988
+ throw new Error(`Current page has invalid URL: ${currentUrl}`);
989
+ }
990
+ if (!["http:", "https:"].includes(parsed.protocol)) {
991
+ throw new Error(
992
+ `Current page protocol not allowed: ${parsed.protocol}. Only http/https allowed.`
993
+ );
994
+ }
995
+ const domain = parsed.hostname;
996
+ if (this.cfg.blockedDomains.some((d) => domain.endsWith(d))) {
997
+ throw new Error(`Current page domain is blocked: ${domain}`);
998
+ }
999
+ if (this.cfg.allowedDomains.length > 0) {
1000
+ if (!this.cfg.allowedDomains.some((d) => domain.endsWith(d))) {
1001
+ throw new Error(`Current page domain not in allowlist: ${domain}`);
1002
+ }
1003
+ }
1004
+ }
1005
+ async getAccessibilityTree() {
1006
+ this.ensureInitialized();
1007
+ if (!this.cdpSession) {
1008
+ this.cdpSession = await this.page.context().newCDPSession(this.page);
1009
+ await this.cdpSession.send("Accessibility.enable");
1010
+ }
1011
+ const response = await this.cdpSession.send(
1012
+ "Accessibility.getFullAXTree"
1013
+ );
1014
+ return response.nodes;
1015
+ }
1016
+ formatAXTree(nodes) {
1017
+ const lines = [];
1018
+ let count = 0;
1019
+ for (const node of nodes) {
1020
+ if (count >= this.cfg.maxSnapshotNodes) {
1021
+ lines.push(`[truncated at ${this.cfg.maxSnapshotNodes} nodes]`);
1022
+ break;
1023
+ }
1024
+ if (node.ignored) {
1025
+ continue;
1026
+ }
1027
+ const role = node.role?.value || "unknown";
1028
+ const name = node.name?.value || "";
1029
+ if (!name && ["generic", "none", "text"].includes(role)) {
1030
+ continue;
1031
+ }
1032
+ const states = [];
1033
+ if (node.properties) {
1034
+ for (const prop of node.properties) {
1035
+ if (prop.value?.value === true) {
1036
+ const stateName = prop.name;
1037
+ if (["focused", "checked", "disabled", "expanded", "selected"].includes(
1038
+ stateName
1039
+ )) {
1040
+ states.push(stateName);
1041
+ }
1042
+ }
1043
+ }
1044
+ }
1045
+ const stateStr = states.length ? " " + states.join(" ") : "";
1046
+ const nameStr = name ? ` "${name}"` : "";
1047
+ lines.push(`[${role}${nameStr}${stateStr}]`);
1048
+ count++;
1049
+ }
1050
+ return lines.join("\n");
1051
+ }
1052
+ // ========================================================================
1053
+ // Private: Tool Handlers
1054
+ // ========================================================================
1055
+ async navigate(_args, call) {
1056
+ const args = this.parseArgs(call, ["url"]);
1057
+ this.validateUrl(args.url, call);
1058
+ this.ensureInitialized();
1059
+ const waitUntil = args.waitUntil ?? "domcontentloaded";
1060
+ await this.page.goto(args.url, {
1061
+ timeout: this.cfg.navigationTimeoutMs,
1062
+ waitUntil
1063
+ });
1064
+ this.ensureCurrentUrlAllowed();
1065
+ const tree = await this.getAccessibilityTree();
1066
+ return this.formatAXTree(tree);
1067
+ }
1068
+ async click(_args, call) {
1069
+ const args = this.parseArgs(call, ["name"]);
1070
+ this.ensureInitialized();
1071
+ this.ensureCurrentUrlAllowed();
1072
+ let locator;
1073
+ if (args.role) {
1074
+ locator = this.page.getByRole(args.role, {
1075
+ name: args.name
1076
+ });
1077
+ } else {
1078
+ locator = this.page.getByRole("button", { name: args.name }).or(this.page.getByRole("link", { name: args.name })).or(this.page.getByRole("menuitem", { name: args.name }));
1079
+ }
1080
+ await locator.click({ timeout: this.cfg.actionTimeoutMs });
1081
+ const tree = await this.getAccessibilityTree();
1082
+ return this.formatAXTree(tree);
1083
+ }
1084
+ async type(_args, call) {
1085
+ const args = this.parseArgs(call, ["name", "text"]);
1086
+ this.ensureInitialized();
1087
+ this.ensureCurrentUrlAllowed();
1088
+ const role = args.role ?? "textbox";
1089
+ const locator = this.page.getByRole(role, { name: args.name });
1090
+ await locator.fill(args.text, { timeout: this.cfg.actionTimeoutMs });
1091
+ return `Typed "${args.text}" into ${role} "${args.name}"`;
1092
+ }
1093
+ async snapshot(_args, _call) {
1094
+ this.ensureInitialized();
1095
+ this.ensureCurrentUrlAllowed();
1096
+ const tree = await this.getAccessibilityTree();
1097
+ return this.formatAXTree(tree);
1098
+ }
1099
+ async scroll(_args, call) {
1100
+ const args = this.parseArgs(call, ["direction"]);
1101
+ this.ensureInitialized();
1102
+ this.ensureCurrentUrlAllowed();
1103
+ const amount = args.amount ?? "page";
1104
+ if (amount === "toTop") {
1105
+ await this.page.evaluate(() => window.scrollTo(0, 0));
1106
+ } else if (amount === "toBottom") {
1107
+ await this.page.evaluate(
1108
+ () => window.scrollTo(0, document.body.scrollHeight)
1109
+ );
1110
+ } else {
1111
+ const viewport = this.page.viewportSize();
1112
+ const height = viewport?.height ?? 800;
1113
+ const scrollAmount = amount === "half" ? height / 2 : height;
1114
+ const delta = args.direction === "down" ? scrollAmount : -scrollAmount;
1115
+ await this.page.evaluate((d) => window.scrollBy(0, d), delta);
1116
+ }
1117
+ const tree = await this.getAccessibilityTree();
1118
+ return this.formatAXTree(tree);
1119
+ }
1120
+ async screenshot(_args, call) {
1121
+ const args = this.parseArgs(call, []);
1122
+ this.ensureInitialized();
1123
+ this.ensureCurrentUrlAllowed();
1124
+ const buffer = await this.page.screenshot({
1125
+ fullPage: args.fullPage ?? false,
1126
+ type: "png"
1127
+ });
1128
+ if (buffer.length > BrowserDefaults.MAX_SCREENSHOT_BYTES) {
1129
+ throw new Error(
1130
+ `Screenshot size (${buffer.length} bytes) exceeds maximum allowed (${BrowserDefaults.MAX_SCREENSHOT_BYTES} bytes). Try capturing viewport only.`
1131
+ );
1132
+ }
1133
+ const base64 = buffer.toString("base64");
1134
+ return `data:image/png;base64,${base64}`;
1135
+ }
1136
+ async extract(_args, call) {
1137
+ const args = this.parseArgs(call, ["selector"]);
1138
+ this.ensureInitialized();
1139
+ this.ensureCurrentUrlAllowed();
1140
+ const attribute = args.attribute ?? "textContent";
1141
+ const multiple = args.multiple ?? false;
1142
+ if (multiple) {
1143
+ const elements = this.page.locator(args.selector);
1144
+ const count = await elements.count();
1145
+ const results = [];
1146
+ for (let i = 0; i < count; i++) {
1147
+ const el = elements.nth(i);
1148
+ let value;
1149
+ if (attribute === "textContent") {
1150
+ value = await el.textContent();
1151
+ } else {
1152
+ value = await el.getAttribute(attribute);
1153
+ }
1154
+ if (value !== null) {
1155
+ results.push(value.trim());
1156
+ }
1157
+ }
1158
+ return JSON.stringify(results);
1159
+ } else {
1160
+ const el = this.page.locator(args.selector).first();
1161
+ let value;
1162
+ if (attribute === "textContent") {
1163
+ value = await el.textContent();
1164
+ } else {
1165
+ value = await el.getAttribute(attribute);
1166
+ }
1167
+ return value?.trim() ?? "";
1168
+ }
1169
+ }
1170
+ };
1171
+ function createBrowserToolPack(options = {}) {
1172
+ return new BrowserToolPack(options);
1173
+ }
1174
+ function createBrowserTools(options = {}) {
1175
+ const pack = new BrowserToolPack(options);
1176
+ return { pack, registry: pack.toRegistry() };
1177
+ }
604
1178
  export {
1179
+ BrowserDefaults,
1180
+ BrowserToolNames,
1181
+ BrowserToolPack,
605
1182
  DEFAULT_IGNORE_DIRS,
606
1183
  FSDefaults,
607
1184
  ToolNames as FSToolNames,
608
1185
  LocalFSToolPack,
1186
+ createBrowserToolPack,
1187
+ createBrowserTools,
609
1188
  createLocalFSToolPack,
610
1189
  createLocalFSTools
611
1190
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@modelrelay/sdk",
3
- "version": "1.28.0",
3
+ "version": "1.29.0",
4
4
  "description": "TypeScript SDK for the ModelRelay API",
5
5
  "type": "module",
6
6
  "main": "dist/index.cjs",