@humanjs/mcp 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/index.cjs +149 -1
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +149 -1
- package/dist/index.js.map +1 -1
- package/package.json +10 -5
package/README.md
CHANGED
|
@@ -71,6 +71,7 @@ The `config` payload is base64 of `{"command":"npx","args":["-y","@humanjs/mcp"]
|
|
|
71
71
|
| `HUMANJS_SPEED` | `human` \| `fast` \| `instant` | `human` | Humanization pace. `human` = full realistic motion; `fast` = humanized but quick; `instant` = no humanized motion. Changes how long each action *executes*, not the wait between actions. |
|
|
72
72
|
| `HUMANJS_HEADLESS` | `true` \| `false` | `false` | Headless browser. Default is visible — the point of the MCP. |
|
|
73
73
|
| `HUMANJS_OUTPUT_DIR` | path | server's CWD | Where screenshots and recordings are written. |
|
|
74
|
+
| `HUMANJS_UPLOAD_DIR` | path | server's CWD | Folder `human_upload` reads files from (basename only — can't escape it). |
|
|
74
75
|
| `HUMANJS_VIEWPORT` | `WIDTHxHEIGHT` | `1440x900` | Default viewport for new sessions. Bump to `1920x1080` for crisper recordings. |
|
|
75
76
|
| `HUMANJS_AUTO_INSTALL` | `true` \| `false` | `true` | Auto-download the Chromium binary on first launch if missing. Set `false` to require a manual `npx playwright install chromium`. |
|
|
76
77
|
| `HUMANJS_PERSIST` | `true` \| `false` | `false` | Persist a profile across runs (logins/cookies survive). Uses `~/.humanjs/profile` unless `HUMANJS_USER_DATA_DIR` is set. See [Browser modes](#browser-modes). |
|
|
@@ -232,6 +233,7 @@ The server ships **built-in guidance** (sent to the agent on connect via MCP `in
|
|
|
232
233
|
|
|
233
234
|
- **No arbitrary-JS `evaluate` tool.** Executing page-supplied JavaScript is a prompt-injection cliff — a malicious page could trick the agent into running code that exfiltrates data. The read-only inspection tools cover the legitimate "what's on the page" need.
|
|
234
235
|
- **File-path safety.** Tools that write files accept a basename only; path components (`../`, absolute paths) are rejected, so a prompt-injected filename can't escape `HUMANJS_OUTPUT_DIR`.
|
|
236
|
+
- **Upload path safety.** `human_upload` can attach a local file to a web form — a potential exfiltration path if a page prompt-injects the agent. So it reads files by **basename only** from `HUMANJS_UPLOAD_DIR` (default: the server's working dir); subdirectories, `../`, and absolute paths are rejected, so the agent can't reach (and send) files outside that folder. Point `HUMANJS_UPLOAD_DIR` at where your upload fixtures live.
|
|
235
237
|
- **No credentials handling.** The server drives the browser; it doesn't manage logins, payment details, or secrets on your behalf.
|
|
236
238
|
- **Attaching to your real browser (CDP) is opt-in and env-only.** When you point `HUMANJS_CDP_URL` at your running browser, the agent acts with *your* live sessions — a bigger blast radius if a page tries to manipulate it. That's why it's a deliberate config choice you make up front, never something a tool can switch on.
|
|
237
239
|
|
package/dist/index.cjs
CHANGED
|
@@ -23,6 +23,7 @@ function readEnv() {
|
|
|
23
23
|
speed: parseSpeed(process.env.HUMANJS_SPEED),
|
|
24
24
|
headless: parseBool(process.env.HUMANJS_HEADLESS, false),
|
|
25
25
|
outputDir: process.env.HUMANJS_OUTPUT_DIR ?? process.cwd(),
|
|
26
|
+
uploadDir: process.env.HUMANJS_UPLOAD_DIR ?? process.cwd(),
|
|
26
27
|
viewport: parseViewport(process.env.HUMANJS_VIEWPORT),
|
|
27
28
|
autoInstall: parseBool(process.env.HUMANJS_AUTO_INSTALL, true),
|
|
28
29
|
browser: resolveBrowserConfig(),
|
|
@@ -592,6 +593,15 @@ function resolveOutputPath(outputDir, filename) {
|
|
|
592
593
|
}
|
|
593
594
|
return path.join(outputDir, base);
|
|
594
595
|
}
|
|
596
|
+
function resolveUploadPath(uploadDir, filename) {
|
|
597
|
+
const base = path.basename(filename);
|
|
598
|
+
if (base !== filename || base.length === 0) {
|
|
599
|
+
throw new Error(
|
|
600
|
+
`upload filename must be a plain name with no path components, got "${filename}". Files are read from HUMANJS_UPLOAD_DIR \u2014 place the file there (or point HUMANJS_UPLOAD_DIR at its folder) and pass just the name.`
|
|
601
|
+
);
|
|
602
|
+
}
|
|
603
|
+
return path.join(uploadDir, base);
|
|
604
|
+
}
|
|
595
605
|
function resolveRecordingFormat(filename) {
|
|
596
606
|
const lower = filename.toLowerCase();
|
|
597
607
|
if (lower.endsWith(".mp4") || lower.endsWith(".webm")) return "video";
|
|
@@ -646,6 +656,22 @@ function registerInspectionTools(server, ctx) {
|
|
|
646
656
|
return { content: [{ type: "text", text }] };
|
|
647
657
|
}
|
|
648
658
|
);
|
|
659
|
+
server.registerTool(
|
|
660
|
+
"human_outline",
|
|
661
|
+
{
|
|
662
|
+
title: "Page outline (accessibility tree)",
|
|
663
|
+
description: 'Returns a compact accessibility-tree outline of the page (or a region) \u2014 every interactive element and landmark by its ARIA role + accessible name, as YAML (e.g. `- button "Sign in"`, `- textbox "Email"`). The most token-efficient way to see what is actionable and pick a selector: the names map directly to getByRole / accessible-name selectors. Prefer this over human_get_html for "what can I click or fill"; use human_screenshot when you need the visual layout.',
|
|
664
|
+
inputSchema: {
|
|
665
|
+
selector: zod.z.string().optional().describe("Optional region selector to scope the outline. Omit for the whole page."),
|
|
666
|
+
session: sessionArg
|
|
667
|
+
}
|
|
668
|
+
},
|
|
669
|
+
async ({ selector, session }) => {
|
|
670
|
+
const { human } = await ctx.sessions.get(session);
|
|
671
|
+
const text = await human.outline(selector);
|
|
672
|
+
return { content: [{ type: "text", text }] };
|
|
673
|
+
}
|
|
674
|
+
);
|
|
649
675
|
server.registerTool(
|
|
650
676
|
"human_get_text",
|
|
651
677
|
{
|
|
@@ -724,7 +750,7 @@ function resolveTarget(input) {
|
|
|
724
750
|
var sessionArg2 = zod.z.string().optional().describe(
|
|
725
751
|
"Session ID to act on. Omit to use the default session (created lazily on first call). Use human_create_session for parallel browsers."
|
|
726
752
|
);
|
|
727
|
-
function registerPrimitiveTools(server, { sessions }) {
|
|
753
|
+
function registerPrimitiveTools(server, { sessions, env }) {
|
|
728
754
|
server.registerTool(
|
|
729
755
|
"human_goto",
|
|
730
756
|
{
|
|
@@ -771,6 +797,22 @@ function registerPrimitiveTools(server, { sessions }) {
|
|
|
771
797
|
};
|
|
772
798
|
}
|
|
773
799
|
);
|
|
800
|
+
server.registerTool(
|
|
801
|
+
"human_doubleClick",
|
|
802
|
+
{
|
|
803
|
+
title: "Double-click (humanized)",
|
|
804
|
+
description: "Double-clicks the target \u2014 same humanized motion as human_click, but two presses within the OS double-click window. Use for things that open/activate on double-click (list rows, file items, editable cells). Target is a selector OR x/y coordinates.",
|
|
805
|
+
inputSchema: { ...targetFields, session: sessionArg2 }
|
|
806
|
+
},
|
|
807
|
+
async ({ selector, x, y, session }) => {
|
|
808
|
+
const { human } = await sessions.get(session);
|
|
809
|
+
const target = resolveTarget({ selector, x, y });
|
|
810
|
+
await human.doubleClick(target);
|
|
811
|
+
return {
|
|
812
|
+
content: [{ type: "text", text: `double-clicked ${describeTarget(selector, x, y)}` }]
|
|
813
|
+
};
|
|
814
|
+
}
|
|
815
|
+
);
|
|
774
816
|
server.registerTool(
|
|
775
817
|
"human_hover",
|
|
776
818
|
{
|
|
@@ -865,6 +907,112 @@ function registerPrimitiveTools(server, { sessions }) {
|
|
|
865
907
|
return { content: [{ type: "text", text: `pasted ${value.length} chars into ${selector}` }] };
|
|
866
908
|
}
|
|
867
909
|
);
|
|
910
|
+
server.registerTool(
|
|
911
|
+
"human_clear",
|
|
912
|
+
{
|
|
913
|
+
title: "Clear a field (humanized)",
|
|
914
|
+
description: "Clears a text field (input/textarea/contenteditable) with a real keyboard gesture \u2014 click to focus, select-all, then delete \u2014 firing the input events the page expects. Use before human_type when you need to replace an existing value rather than append to it.",
|
|
915
|
+
inputSchema: {
|
|
916
|
+
selector: zod.z.string().describe("Selector of the field to clear."),
|
|
917
|
+
session: sessionArg2
|
|
918
|
+
}
|
|
919
|
+
},
|
|
920
|
+
async ({ selector, session }) => {
|
|
921
|
+
const { human } = await sessions.get(session);
|
|
922
|
+
await human.clear(selector);
|
|
923
|
+
return { content: [{ type: "text", text: `cleared ${selector}` }] };
|
|
924
|
+
}
|
|
925
|
+
);
|
|
926
|
+
server.registerTool(
|
|
927
|
+
"human_selectText",
|
|
928
|
+
{
|
|
929
|
+
title: "Select an element\u2019s text (humanized)",
|
|
930
|
+
description: "Selects (highlights) text inside an element \u2014 moves the cursor to it, then selects. By default selects all of the element\u2019s text; pass `text` to select just that substring (found inside the element, whitespace-tolerant, first match; falls back to the whole element if not found). Use before copying, replacing, or triggering a highlight menu.",
|
|
931
|
+
inputSchema: {
|
|
932
|
+
selector: zod.z.string().describe("Selector of the element whose text to select."),
|
|
933
|
+
text: zod.z.string().optional().describe("Optional substring to select instead of the element\u2019s whole text."),
|
|
934
|
+
session: sessionArg2
|
|
935
|
+
}
|
|
936
|
+
},
|
|
937
|
+
async ({ selector, text, session }) => {
|
|
938
|
+
const { human } = await sessions.get(session);
|
|
939
|
+
await human.selectText(selector, text === void 0 ? void 0 : { text });
|
|
940
|
+
const what = text === void 0 ? "text" : `"${text}"`;
|
|
941
|
+
return { content: [{ type: "text", text: `selected ${what} in ${selector}` }] };
|
|
942
|
+
}
|
|
943
|
+
);
|
|
944
|
+
server.registerTool(
|
|
945
|
+
"human_check",
|
|
946
|
+
{
|
|
947
|
+
title: "Check a box (humanized)",
|
|
948
|
+
description: "Ticks a checkbox or radio \u2014 moves the cursor to it and clicks, but only if it is not already checked (a real user does not re-click a ticked box). Verifies the resulting state. Pass the checkbox/radio input itself (or a [role=checkbox]) \u2014 not a wrapping <label> \u2014 so the current state can be read and the click stays idempotent.",
|
|
949
|
+
inputSchema: {
|
|
950
|
+
selector: zod.z.string().describe("Selector of the checkbox/radio input."),
|
|
951
|
+
session: sessionArg2
|
|
952
|
+
}
|
|
953
|
+
},
|
|
954
|
+
async ({ selector, session }) => {
|
|
955
|
+
const { human } = await sessions.get(session);
|
|
956
|
+
await human.check(selector);
|
|
957
|
+
return { content: [{ type: "text", text: `checked ${selector}` }] };
|
|
958
|
+
}
|
|
959
|
+
);
|
|
960
|
+
server.registerTool(
|
|
961
|
+
"human_uncheck",
|
|
962
|
+
{
|
|
963
|
+
title: "Uncheck a box (humanized)",
|
|
964
|
+
description: "Unticks a checkbox \u2014 humanized click only if currently checked. Radios cannot be unchecked by clicking (select a different option instead). Pass the checkbox input itself (or a [role=checkbox]) \u2014 not a wrapping <label> \u2014 so its state can be read and the click stays idempotent.",
|
|
965
|
+
inputSchema: {
|
|
966
|
+
selector: zod.z.string().describe("Selector of the checkbox input."),
|
|
967
|
+
session: sessionArg2
|
|
968
|
+
}
|
|
969
|
+
},
|
|
970
|
+
async ({ selector, session }) => {
|
|
971
|
+
const { human } = await sessions.get(session);
|
|
972
|
+
await human.uncheck(selector);
|
|
973
|
+
return { content: [{ type: "text", text: `unchecked ${selector}` }] };
|
|
974
|
+
}
|
|
975
|
+
);
|
|
976
|
+
server.registerTool(
|
|
977
|
+
"human_selectOption",
|
|
978
|
+
{
|
|
979
|
+
title: "Select dropdown option (humanized)",
|
|
980
|
+
description: "Chooses option(s) in a native <select> \u2014 moves the cursor to the dropdown, then sets the value (native selects open an OS menu automation can't drive, so the value is set programmatically, firing change/input). For custom DOM dropdowns, use human_click on the rendered options instead. Match by value(s); pass one string or an array for multi-selects.",
|
|
981
|
+
inputSchema: {
|
|
982
|
+
selector: zod.z.string().describe("Selector of the <select> element."),
|
|
983
|
+
values: zod.z.union([zod.z.string(), zod.z.array(zod.z.string())]).describe("Option value, or array of values for a multi-select."),
|
|
984
|
+
session: sessionArg2
|
|
985
|
+
}
|
|
986
|
+
},
|
|
987
|
+
async ({ selector, values, session }) => {
|
|
988
|
+
const { human } = await sessions.get(session);
|
|
989
|
+
const selected = await human.selectOption(selector, values);
|
|
990
|
+
return {
|
|
991
|
+
content: [{ type: "text", text: `selected ${selected.join(", ")} in ${selector}` }]
|
|
992
|
+
};
|
|
993
|
+
}
|
|
994
|
+
);
|
|
995
|
+
server.registerTool(
|
|
996
|
+
"human_upload",
|
|
997
|
+
{
|
|
998
|
+
title: "Upload file(s) (humanized)",
|
|
999
|
+
description: `Attaches file(s) to a file input \u2014 moves the cursor to the control, then sets the files (never opens the OS dialog, which would hang). For safety, files are read by basename from HUMANJS_UPLOAD_DIR (default: the server working dir) \u2014 subdirectories, "../", and absolute paths are rejected, so the agent can't read and exfiltrate arbitrary local files. Pass the <input type="file"> selector and the filename(s).`,
|
|
1000
|
+
inputSchema: {
|
|
1001
|
+
selector: zod.z.string().describe("Selector of the file input."),
|
|
1002
|
+
files: zod.z.union([zod.z.string(), zod.z.array(zod.z.string())]).describe("Filename(s) inside HUMANJS_UPLOAD_DIR \u2014 a basename only, no path components."),
|
|
1003
|
+
session: sessionArg2
|
|
1004
|
+
}
|
|
1005
|
+
},
|
|
1006
|
+
async ({ selector, files, session }) => {
|
|
1007
|
+
const { human } = await sessions.get(session);
|
|
1008
|
+
const names = Array.isArray(files) ? files : [files];
|
|
1009
|
+
const paths = names.map((name) => resolveUploadPath(env.uploadDir, name));
|
|
1010
|
+
await human.upload(selector, paths);
|
|
1011
|
+
return {
|
|
1012
|
+
content: [{ type: "text", text: `uploaded ${paths.length} file(s) to ${selector}` }]
|
|
1013
|
+
};
|
|
1014
|
+
}
|
|
1015
|
+
);
|
|
868
1016
|
server.registerTool(
|
|
869
1017
|
"human_press",
|
|
870
1018
|
{
|