@humanjs/mcp 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -2
- package/dist/index.cjs +161 -15
- package/dist/index.cjs.map +1 -1
- package/dist/index.js +162 -16
- package/dist/index.js.map +1 -1
- package/package.json +10 -5
package/dist/index.js
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
3
3
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
4
4
|
import { homedir } from 'os';
|
|
5
|
-
import { join,
|
|
5
|
+
import { join, basename, dirname } from 'path';
|
|
6
6
|
import { spawn } from 'child_process';
|
|
7
7
|
import { readFileSync } from 'fs';
|
|
8
8
|
import { createRequire } from 'module';
|
|
@@ -20,6 +20,7 @@ function readEnv() {
|
|
|
20
20
|
speed: parseSpeed(process.env.HUMANJS_SPEED),
|
|
21
21
|
headless: parseBool(process.env.HUMANJS_HEADLESS, false),
|
|
22
22
|
outputDir: process.env.HUMANJS_OUTPUT_DIR ?? process.cwd(),
|
|
23
|
+
uploadDir: process.env.HUMANJS_UPLOAD_DIR ?? process.cwd(),
|
|
23
24
|
viewport: parseViewport(process.env.HUMANJS_VIEWPORT),
|
|
24
25
|
autoInstall: parseBool(process.env.HUMANJS_AUTO_INSTALL, true),
|
|
25
26
|
browser: resolveBrowserConfig(),
|
|
@@ -189,7 +190,10 @@ var SessionManager = class {
|
|
|
189
190
|
stop = resolve;
|
|
190
191
|
});
|
|
191
192
|
const video = options.video ?? true;
|
|
192
|
-
const done = session.human.record(
|
|
193
|
+
const done = session.human.record(
|
|
194
|
+
{ name: options.name, video, quality: options.quality ?? "high" },
|
|
195
|
+
() => signal
|
|
196
|
+
);
|
|
193
197
|
session.recording = {
|
|
194
198
|
name: options.name ?? "recording",
|
|
195
199
|
startedAt: Date.now(),
|
|
@@ -586,6 +590,24 @@ function resolveOutputPath(outputDir, filename) {
|
|
|
586
590
|
}
|
|
587
591
|
return join(outputDir, base);
|
|
588
592
|
}
|
|
593
|
+
function resolveUploadPath(uploadDir, filename) {
|
|
594
|
+
const base = basename(filename);
|
|
595
|
+
if (base !== filename || base.length === 0) {
|
|
596
|
+
throw new Error(
|
|
597
|
+
`upload filename must be a plain name with no path components, got "${filename}". Files are read from HUMANJS_UPLOAD_DIR \u2014 place the file there (or point HUMANJS_UPLOAD_DIR at its folder) and pass just the name.`
|
|
598
|
+
);
|
|
599
|
+
}
|
|
600
|
+
return join(uploadDir, base);
|
|
601
|
+
}
|
|
602
|
+
function resolveRecordingFormat(filename) {
|
|
603
|
+
const lower = filename.toLowerCase();
|
|
604
|
+
if (lower.endsWith(".mp4") || lower.endsWith(".webm")) return "video";
|
|
605
|
+
if (lower.endsWith(".gif")) return "gif";
|
|
606
|
+
if (lower.endsWith(".json")) return "timeline";
|
|
607
|
+
if (lower.endsWith(".spec.ts") || lower.endsWith(".test.ts")) return "playwright";
|
|
608
|
+
if (lower.endsWith(".ts")) return "humanjs";
|
|
609
|
+
return null;
|
|
610
|
+
}
|
|
589
611
|
|
|
590
612
|
// src/tools/inspection.ts
|
|
591
613
|
var sessionArg = z.string().optional().describe("Session ID to act on. Omit to use the default session.");
|
|
@@ -631,6 +653,22 @@ function registerInspectionTools(server, ctx) {
|
|
|
631
653
|
return { content: [{ type: "text", text }] };
|
|
632
654
|
}
|
|
633
655
|
);
|
|
656
|
+
server.registerTool(
|
|
657
|
+
"human_outline",
|
|
658
|
+
{
|
|
659
|
+
title: "Page outline (accessibility tree)",
|
|
660
|
+
description: 'Returns a compact accessibility-tree outline of the page (or a region) \u2014 every interactive element and landmark by its ARIA role + accessible name, as YAML (e.g. `- button "Sign in"`, `- textbox "Email"`). The most token-efficient way to see what is actionable and pick a selector: the names map directly to getByRole / accessible-name selectors. Prefer this over human_get_html for "what can I click or fill"; use human_screenshot when you need the visual layout.',
|
|
661
|
+
inputSchema: {
|
|
662
|
+
selector: z.string().optional().describe("Optional region selector to scope the outline. Omit for the whole page."),
|
|
663
|
+
session: sessionArg
|
|
664
|
+
}
|
|
665
|
+
},
|
|
666
|
+
async ({ selector, session }) => {
|
|
667
|
+
const { human } = await ctx.sessions.get(session);
|
|
668
|
+
const text = await human.outline(selector);
|
|
669
|
+
return { content: [{ type: "text", text }] };
|
|
670
|
+
}
|
|
671
|
+
);
|
|
634
672
|
server.registerTool(
|
|
635
673
|
"human_get_text",
|
|
636
674
|
{
|
|
@@ -709,7 +747,7 @@ function resolveTarget(input) {
|
|
|
709
747
|
var sessionArg2 = z.string().optional().describe(
|
|
710
748
|
"Session ID to act on. Omit to use the default session (created lazily on first call). Use human_create_session for parallel browsers."
|
|
711
749
|
);
|
|
712
|
-
function registerPrimitiveTools(server, { sessions }) {
|
|
750
|
+
function registerPrimitiveTools(server, { sessions, env }) {
|
|
713
751
|
server.registerTool(
|
|
714
752
|
"human_goto",
|
|
715
753
|
{
|
|
@@ -756,6 +794,22 @@ function registerPrimitiveTools(server, { sessions }) {
|
|
|
756
794
|
};
|
|
757
795
|
}
|
|
758
796
|
);
|
|
797
|
+
server.registerTool(
|
|
798
|
+
"human_doubleClick",
|
|
799
|
+
{
|
|
800
|
+
title: "Double-click (humanized)",
|
|
801
|
+
description: "Double-clicks the target \u2014 same humanized motion as human_click, but two presses within the OS double-click window. Use for things that open/activate on double-click (list rows, file items, editable cells). Target is a selector OR x/y coordinates.",
|
|
802
|
+
inputSchema: { ...targetFields, session: sessionArg2 }
|
|
803
|
+
},
|
|
804
|
+
async ({ selector, x, y, session }) => {
|
|
805
|
+
const { human } = await sessions.get(session);
|
|
806
|
+
const target = resolveTarget({ selector, x, y });
|
|
807
|
+
await human.doubleClick(target);
|
|
808
|
+
return {
|
|
809
|
+
content: [{ type: "text", text: `double-clicked ${describeTarget(selector, x, y)}` }]
|
|
810
|
+
};
|
|
811
|
+
}
|
|
812
|
+
);
|
|
759
813
|
server.registerTool(
|
|
760
814
|
"human_hover",
|
|
761
815
|
{
|
|
@@ -850,6 +904,94 @@ function registerPrimitiveTools(server, { sessions }) {
|
|
|
850
904
|
return { content: [{ type: "text", text: `pasted ${value.length} chars into ${selector}` }] };
|
|
851
905
|
}
|
|
852
906
|
);
|
|
907
|
+
server.registerTool(
|
|
908
|
+
"human_clear",
|
|
909
|
+
{
|
|
910
|
+
title: "Clear a field (humanized)",
|
|
911
|
+
description: "Clears a text field (input/textarea/contenteditable) with a real keyboard gesture \u2014 click to focus, select-all, then delete \u2014 firing the input events the page expects. Use before human_type when you need to replace an existing value rather than append to it.",
|
|
912
|
+
inputSchema: {
|
|
913
|
+
selector: z.string().describe("Selector of the field to clear."),
|
|
914
|
+
session: sessionArg2
|
|
915
|
+
}
|
|
916
|
+
},
|
|
917
|
+
async ({ selector, session }) => {
|
|
918
|
+
const { human } = await sessions.get(session);
|
|
919
|
+
await human.clear(selector);
|
|
920
|
+
return { content: [{ type: "text", text: `cleared ${selector}` }] };
|
|
921
|
+
}
|
|
922
|
+
);
|
|
923
|
+
server.registerTool(
|
|
924
|
+
"human_check",
|
|
925
|
+
{
|
|
926
|
+
title: "Check a box (humanized)",
|
|
927
|
+
description: "Ticks a checkbox or radio \u2014 moves the cursor to it and clicks, but only if it is not already checked (a real user does not re-click a ticked box). Verifies the resulting state. Pass the checkbox/radio input itself (or a [role=checkbox]) \u2014 not a wrapping <label> \u2014 so the current state can be read and the click stays idempotent.",
|
|
928
|
+
inputSchema: {
|
|
929
|
+
selector: z.string().describe("Selector of the checkbox/radio input."),
|
|
930
|
+
session: sessionArg2
|
|
931
|
+
}
|
|
932
|
+
},
|
|
933
|
+
async ({ selector, session }) => {
|
|
934
|
+
const { human } = await sessions.get(session);
|
|
935
|
+
await human.check(selector);
|
|
936
|
+
return { content: [{ type: "text", text: `checked ${selector}` }] };
|
|
937
|
+
}
|
|
938
|
+
);
|
|
939
|
+
server.registerTool(
|
|
940
|
+
"human_uncheck",
|
|
941
|
+
{
|
|
942
|
+
title: "Uncheck a box (humanized)",
|
|
943
|
+
description: "Unticks a checkbox \u2014 humanized click only if currently checked. Radios cannot be unchecked by clicking (select a different option instead). Pass the checkbox input itself (or a [role=checkbox]) \u2014 not a wrapping <label> \u2014 so its state can be read and the click stays idempotent.",
|
|
944
|
+
inputSchema: {
|
|
945
|
+
selector: z.string().describe("Selector of the checkbox input."),
|
|
946
|
+
session: sessionArg2
|
|
947
|
+
}
|
|
948
|
+
},
|
|
949
|
+
async ({ selector, session }) => {
|
|
950
|
+
const { human } = await sessions.get(session);
|
|
951
|
+
await human.uncheck(selector);
|
|
952
|
+
return { content: [{ type: "text", text: `unchecked ${selector}` }] };
|
|
953
|
+
}
|
|
954
|
+
);
|
|
955
|
+
server.registerTool(
|
|
956
|
+
"human_selectOption",
|
|
957
|
+
{
|
|
958
|
+
title: "Select dropdown option (humanized)",
|
|
959
|
+
description: "Chooses option(s) in a native <select> \u2014 moves the cursor to the dropdown, then sets the value (native selects open an OS menu automation can't drive, so the value is set programmatically, firing change/input). For custom DOM dropdowns, use human_click on the rendered options instead. Match by value(s); pass one string or an array for multi-selects.",
|
|
960
|
+
inputSchema: {
|
|
961
|
+
selector: z.string().describe("Selector of the <select> element."),
|
|
962
|
+
values: z.union([z.string(), z.array(z.string())]).describe("Option value, or array of values for a multi-select."),
|
|
963
|
+
session: sessionArg2
|
|
964
|
+
}
|
|
965
|
+
},
|
|
966
|
+
async ({ selector, values, session }) => {
|
|
967
|
+
const { human } = await sessions.get(session);
|
|
968
|
+
const selected = await human.selectOption(selector, values);
|
|
969
|
+
return {
|
|
970
|
+
content: [{ type: "text", text: `selected ${selected.join(", ")} in ${selector}` }]
|
|
971
|
+
};
|
|
972
|
+
}
|
|
973
|
+
);
|
|
974
|
+
server.registerTool(
|
|
975
|
+
"human_upload",
|
|
976
|
+
{
|
|
977
|
+
title: "Upload file(s) (humanized)",
|
|
978
|
+
description: `Attaches file(s) to a file input \u2014 moves the cursor to the control, then sets the files (never opens the OS dialog, which would hang). For safety, files are read by basename from HUMANJS_UPLOAD_DIR (default: the server working dir) \u2014 subdirectories, "../", and absolute paths are rejected, so the agent can't read and exfiltrate arbitrary local files. Pass the <input type="file"> selector and the filename(s).`,
|
|
979
|
+
inputSchema: {
|
|
980
|
+
selector: z.string().describe("Selector of the file input."),
|
|
981
|
+
files: z.union([z.string(), z.array(z.string())]).describe("Filename(s) inside HUMANJS_UPLOAD_DIR \u2014 a basename only, no path components."),
|
|
982
|
+
session: sessionArg2
|
|
983
|
+
}
|
|
984
|
+
},
|
|
985
|
+
async ({ selector, files, session }) => {
|
|
986
|
+
const { human } = await sessions.get(session);
|
|
987
|
+
const names = Array.isArray(files) ? files : [files];
|
|
988
|
+
const paths = names.map((name) => resolveUploadPath(env.uploadDir, name));
|
|
989
|
+
await human.upload(selector, paths);
|
|
990
|
+
return {
|
|
991
|
+
content: [{ type: "text", text: `uploaded ${paths.length} file(s) to ${selector}` }]
|
|
992
|
+
};
|
|
993
|
+
}
|
|
994
|
+
);
|
|
853
995
|
server.registerTool(
|
|
854
996
|
"human_press",
|
|
855
997
|
{
|
|
@@ -952,32 +1094,32 @@ function registerRecordingTools(server, { sessions, env }) {
|
|
|
952
1094
|
"human_stop_recording",
|
|
953
1095
|
{
|
|
954
1096
|
title: "Stop recording and save",
|
|
955
|
-
description: `Stops the active recording and writes it to one or more files in HUMANJS_OUTPUT_DIR. Each filename's extension picks its format: .mp4/.webm = video, .gif = animated gif, .json = action timeline. Pass several to export the same recording multiple ways, e.g. ["demo.mp4", "
|
|
1097
|
+
description: `Stops the active recording and writes it to one or more files in HUMANJS_OUTPUT_DIR. Each filename's extension picks its format: .mp4/.webm = video, .gif = animated gif, .json = action timeline, .ts = runnable HumanJS script, .spec.ts/.test.ts = @playwright/test spec (humanized, with derived assertions). Pass several to export the same recording multiple ways, e.g. ["demo.mp4", "checkout.spec.ts"] for a video plus a ready-to-commit test. Path components are rejected for safety.`,
|
|
956
1098
|
inputSchema: {
|
|
957
1099
|
filenames: z.array(z.string()).min(1).describe(
|
|
958
|
-
'One or more output filenames. The recording is saved to each, format chosen by extension. e.g. ["demo.mp4"] or ["demo.mp4", "demo.
|
|
1100
|
+
'One or more output filenames. The recording is saved to each, format chosen by extension. e.g. ["demo.mp4"], ["checkout.spec.ts"], or ["demo.mp4", "demo.json", "demo.ts"].'
|
|
959
1101
|
),
|
|
960
1102
|
session: z.string().optional().describe("Session ID. Omit for the default session.")
|
|
961
1103
|
}
|
|
962
1104
|
},
|
|
963
1105
|
async ({ filenames, session }) => {
|
|
964
|
-
const targets = filenames.map((filename) =>
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
}));
|
|
968
|
-
for (const { ext } of targets) {
|
|
969
|
-
if (ext !== ".mp4" && ext !== ".webm" && ext !== ".gif" && ext !== ".json") {
|
|
1106
|
+
const targets = filenames.map((filename) => {
|
|
1107
|
+
const format = resolveRecordingFormat(filename);
|
|
1108
|
+
if (format === null) {
|
|
970
1109
|
throw new Error(
|
|
971
|
-
`Unsupported output extension "${
|
|
1110
|
+
`Unsupported output extension for "${filename}". Use .mp4/.webm (video), .gif, .json (timeline), .ts (HumanJS script), or .spec.ts/.test.ts (Playwright test).`
|
|
972
1111
|
);
|
|
973
1112
|
}
|
|
974
|
-
|
|
1113
|
+
return { path: resolveOutputPath(env.outputDir, filename), format };
|
|
1114
|
+
});
|
|
975
1115
|
const recording = await sessions.stopRecording(session);
|
|
976
1116
|
try {
|
|
977
1117
|
const saved = [];
|
|
978
|
-
for (const { path,
|
|
979
|
-
if (
|
|
980
|
-
else if (
|
|
1118
|
+
for (const { path, format } of targets) {
|
|
1119
|
+
if (format === "gif") saved.push(await recording.toGif(path));
|
|
1120
|
+
else if (format === "timeline") saved.push(await recording.toTimeline(path));
|
|
1121
|
+
else if (format === "humanjs") saved.push(await recording.toHumanJS(path));
|
|
1122
|
+
else if (format === "playwright") saved.push(await recording.toPlaywright(path));
|
|
981
1123
|
else saved.push(await recording.toVideo(path));
|
|
982
1124
|
}
|
|
983
1125
|
return { content: [{ type: "text", text: `saved recording to:
|
|
@@ -1062,6 +1204,10 @@ Recording a flow (the natural-looking way):
|
|
|
1062
1204
|
1. EXPLORE FIRST (un-recorded). Navigate the flow once to discover correct, unambiguous selectors (human_screenshot / human_get_html / human_get_attribute). Do this by default whenever the selectors aren't already known \u2014 no need for the user to ask. Skip it only if the selectors are already known or the user tells you not to explore.
|
|
1063
1205
|
2. THEN RECORD ONE CLEAN RUN AS A SINGLE BATCH: human_start_recording + every action + human_stop_recording, all emitted in one turn. Keep selector-guessing and fumbles out of the take.
|
|
1064
1206
|
|
|
1207
|
+
Export as a test: human_stop_recording picks format by extension. A .spec.ts (or .test.ts) filename writes a ready-to-commit @playwright/test with derived assertions; a .ts writes a standalone HumanJS script; .mp4/.webm/.gif/.json are video/timeline. So "record this flow and save it as a test" = run the clean pass, then stop into e.g. "checkout.spec.ts".
|
|
1208
|
+
|
|
1209
|
+
Captured input + passwords: typed/pasted text IS recorded into the timeline and code exports, so generated scripts/tests are runnable \u2014 EXCEPT password fields, which are always masked (emitted as an empty string with a "fill in" comment). This is intentional, not a bug; don't work around it by hand-editing the secret back in. If the user explicitly wants the flow to log in, edit the exported file to read the credential from an env var (e.g. process.env.APP_PASSWORD) and tell them to set it \u2014 never hardcode a real password into a file that may be committed.
|
|
1210
|
+
|
|
1065
1211
|
Dynamic UI: prefer specific selectors (role, aria-label) over text \u2014 the same visible text often matches several cards before a filter, or the wrong one after. If a click reports multiple matches, narrow the selector.
|
|
1066
1212
|
|
|
1067
1213
|
Browser state: by default each run is a fresh, signed-out browser. If a flow needs a login, tell the user to enable persistence (human_enable_persistence or HUMANJS_PERSIST) or CDP attach \u2014 see human_browser_info.`;
|