@humanjs/mcp 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1099 @@
1
+ #!/usr/bin/env node
2
+ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
3
+ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
4
+ import { homedir } from 'os';
5
+ import { join, extname, basename, dirname } from 'path';
6
+ import { spawn } from 'child_process';
7
+ import { readFileSync } from 'fs';
8
+ import { createRequire } from 'module';
9
+ import { createHuman, installMouseHelper } from '@humanjs/playwright';
10
+ import { chromium } from 'playwright';
11
+ import { z } from 'zod';
12
+ import { blend } from '@humanjs/core';
13
+ import { writeFile } from 'fs/promises';
14
+
15
+ var VALID_PRESETS = ["careful", "fast", "distracted", "precise"];
16
+ var VALID_SPEEDS = ["human", "fast", "instant"];
17
+ function readEnv() {
18
+ return {
19
+ personality: parsePersonality(process.env.HUMANJS_PERSONALITY),
20
+ speed: parseSpeed(process.env.HUMANJS_SPEED),
21
+ headless: parseBool(process.env.HUMANJS_HEADLESS, false),
22
+ outputDir: process.env.HUMANJS_OUTPUT_DIR ?? process.cwd(),
23
+ viewport: parseViewport(process.env.HUMANJS_VIEWPORT),
24
+ autoInstall: parseBool(process.env.HUMANJS_AUTO_INSTALL, true),
25
+ browser: resolveBrowserConfig(),
26
+ channel: process.env.HUMANJS_CHANNEL?.trim() || void 0
27
+ };
28
+ }
29
+ var DEFAULT_PERSIST_DIR = join(homedir(), ".humanjs", "profile");
30
+ function resolveBrowserConfig() {
31
+ const cdpUrl = process.env.HUMANJS_CDP_URL?.trim() || void 0;
32
+ if (cdpUrl) return { mode: "cdp", cdpUrl };
33
+ const explicitDir = process.env.HUMANJS_USER_DATA_DIR?.trim() || void 0;
34
+ if (explicitDir) return { mode: "persistent", userDataDir: explicitDir };
35
+ if (parseBool(process.env.HUMANJS_PERSIST, false)) {
36
+ return { mode: "persistent", userDataDir: DEFAULT_PERSIST_DIR };
37
+ }
38
+ return { mode: "ephemeral" };
39
+ }
40
+ function parsePersonality(raw) {
41
+ if (!raw) return "careful";
42
+ const lower = raw.toLowerCase();
43
+ if (VALID_PRESETS.includes(lower)) return lower;
44
+ throw new Error(
45
+ `HUMANJS_PERSONALITY="${raw}" is not a known preset. Expected one of: ${VALID_PRESETS.join(", ")}.`
46
+ );
47
+ }
48
+ function parseSpeed(raw) {
49
+ if (!raw) return "human";
50
+ const lower = raw.toLowerCase();
51
+ if (VALID_SPEEDS.includes(lower)) return lower;
52
+ throw new Error(
53
+ `HUMANJS_SPEED="${raw}" is not valid. Expected one of: ${VALID_SPEEDS.join(", ")}.`
54
+ );
55
+ }
56
+ function parseBool(raw, fallback) {
57
+ if (raw === void 0) return fallback;
58
+ const lower = raw.toLowerCase();
59
+ if (lower === "true" || lower === "1" || lower === "yes") return true;
60
+ if (lower === "false" || lower === "0" || lower === "no") return false;
61
+ throw new Error(`Expected a boolean ("true"/"false"), got "${raw}".`);
62
+ }
63
+ function parseViewport(raw) {
64
+ if (!raw) return { width: 1440, height: 900 };
65
+ const match = /^\s*(\d+)\s*[x×]\s*(\d+)\s*$/i.exec(raw);
66
+ if (!match) {
67
+ throw new Error(
68
+ `HUMANJS_VIEWPORT="${raw}" is invalid. Expected "WIDTHxHEIGHT", e.g. "1920x1080".`
69
+ );
70
+ }
71
+ return { width: Number(match[1]), height: Number(match[2]) };
72
+ }
73
+ var DEFAULT_SESSION_ID = "default";
74
+ var SessionManager = class {
75
+ /** Backing browser for `ephemeral` and `cdp` modes. */
76
+ browser = null;
77
+ /** Backing context for `persistent` mode (launchPersistentContext returns a context, no Browser). */
78
+ persistentContext = null;
79
+ /** True when `browser` was obtained via connectOverCDP — must NOT be closed on teardown. */
80
+ cdpConnected = false;
81
+ /** Runtime persistence toggle (from human_enable_persistence); overrides env mode on next launch. */
82
+ persistOverride = null;
83
+ sessions = /* @__PURE__ */ new Map();
84
+ env;
85
+ constructor(env) {
86
+ this.env = env;
87
+ }
88
+ /** Effective browser config, honoring the runtime persistence override. */
89
+ effectiveConfig() {
90
+ if (this.persistOverride) {
91
+ return { mode: "persistent", userDataDir: this.persistOverride.userDataDir };
92
+ }
93
+ return this.env.browser;
94
+ }
95
+ /**
96
+ * Resolves the session named by `id`, creating the default session
97
+ * lazily if `id` is omitted or `'default'` and the session doesn't
98
+ * exist yet. Throws if an explicit non-default session ID hasn't been
99
+ * created — that case is a caller bug, not a missing-default UX
100
+ * problem, and a clear error helps the AI agent recover.
101
+ */
102
+ async get(id = DEFAULT_SESSION_ID) {
103
+ const existing = this.sessions.get(id);
104
+ if (existing) return existing;
105
+ if (id === DEFAULT_SESSION_ID) return this.create(DEFAULT_SESSION_ID, {});
106
+ throw new Error(
107
+ `Session "${id}" does not exist. Use human_create_session to create it first, or omit the session argument to use the default session.`
108
+ );
109
+ }
110
+ /**
111
+ * Creates a new named session. Throws if the ID is already in use —
112
+ * the caller (an AI agent) should close the old one first if they
113
+ * want to recreate.
114
+ */
115
+ async create(id, options) {
116
+ if (this.sessions.has(id)) {
117
+ throw new Error(
118
+ `Session "${id}" already exists. Close it first with human_close_session if you want to recreate it.`
119
+ );
120
+ }
121
+ const config = this.effectiveConfig();
122
+ if (config.mode !== "ephemeral" && id !== DEFAULT_SESSION_ID) {
123
+ throw new Error(
124
+ `In ${config.mode} mode HumanJS drives a single shared browser, so named/parallel sessions aren't available. Omit the session argument to use the default session.`
125
+ );
126
+ }
127
+ const viewport = options.viewport ?? this.env.viewport;
128
+ const { context, page } = await this.acquireContext(config, viewport);
129
+ const personality = options.personality ?? this.env.personality;
130
+ const speed = options.speed ?? this.env.speed;
131
+ const human = await createHuman(page, { personality, speed });
132
+ const session = {
133
+ id,
134
+ context,
135
+ page,
136
+ human,
137
+ personality,
138
+ speed,
139
+ mode: config.mode,
140
+ recording: null,
141
+ createdAt: Date.now()
142
+ };
143
+ this.sessions.set(id, session);
144
+ return session;
145
+ }
146
+ /**
147
+ * Obtains a `{ context, page }` for the given mode:
148
+ *
149
+ * - `cdp` — reuse the attached browser's existing context + page (the
150
+ * user's real session); only make a new one if there's none.
151
+ * - `persistent` — the single persistent context; reuse its page.
152
+ * - `ephemeral` — a fresh isolated context + page per session.
153
+ *
154
+ * The visible cursor overlay is installed on the context in every mode.
155
+ */
156
+ async acquireContext(config, viewport) {
157
+ if (config.mode === "cdp") {
158
+ const browser2 = await this.ensureCdpBrowser(config.cdpUrl);
159
+ const context2 = browser2.contexts()[0] ?? await browser2.newContext();
160
+ await installMouseHelper(context2);
161
+ const page2 = context2.pages()[0] ?? await context2.newPage();
162
+ return { context: context2, page: page2 };
163
+ }
164
+ if (config.mode === "persistent") {
165
+ const context2 = await this.ensurePersistentContext(config.userDataDir, viewport);
166
+ const page2 = context2.pages()[0] ?? await context2.newPage();
167
+ return { context: context2, page: page2 };
168
+ }
169
+ const browser = await this.ensureEphemeralBrowser();
170
+ const context = await browser.newContext({ viewport });
171
+ await installMouseHelper(context);
172
+ const page = await context.newPage();
173
+ return { context, page };
174
+ }
175
+ /**
176
+ * Starts a recording on a session. Holds `human.record()` open across
177
+ * tool calls by awaiting an internal stop-signal — capture (frames +
178
+ * action timeline) runs until {@link stopRecording} fires it.
179
+ */
180
+ async startRecording(id, options) {
181
+ const session = await this.get(id);
182
+ if (session.recording) {
183
+ throw new Error(
184
+ `Session "${session.id}" is already recording. Stop it first with human_stop_recording.`
185
+ );
186
+ }
187
+ let stop;
188
+ const signal = new Promise((resolve) => {
189
+ stop = resolve;
190
+ });
191
+ const video = options.video ?? true;
192
+ const done = session.human.record({ video, quality: options.quality ?? "high" }, () => signal);
193
+ session.recording = {
194
+ name: options.name ?? "recording",
195
+ startedAt: Date.now(),
196
+ video,
197
+ stop,
198
+ done
199
+ };
200
+ }
201
+ /**
202
+ * Stops the active recording, returns the finished {@link Recording} for
203
+ * export, and recreates the session's `Human` so it can record again
204
+ * (`human.record()` is single-use per instance; page/context/cookies are
205
+ * preserved).
206
+ */
207
+ async stopRecording(id) {
208
+ const session = await this.get(id);
209
+ const rec = session.recording;
210
+ if (!rec) {
211
+ throw new Error(
212
+ `Session "${session.id}" is not recording. Start one with human_start_recording first.`
213
+ );
214
+ }
215
+ rec.stop();
216
+ const recording = await rec.done;
217
+ session.recording = null;
218
+ session.human = await createHuman(session.page, {
219
+ personality: session.personality,
220
+ speed: session.speed
221
+ });
222
+ return recording;
223
+ }
224
+ /**
225
+ * Replaces the `Human` instance on an existing session with one bound
226
+ * to a new personality. Browser context, page, cookies, and scroll
227
+ * position are preserved — only the humanization profile changes.
228
+ */
229
+ async setPersonality(id = DEFAULT_SESSION_ID, personality) {
230
+ const session = await this.get(id);
231
+ assertNotRecording(session, "change personality");
232
+ session.human = await createHuman(session.page, { personality, speed: session.speed });
233
+ session.personality = session.human.personality.name ?? "careful";
234
+ return toSessionInfo(session);
235
+ }
236
+ /**
237
+ * Changes the humanization pace for a session at runtime. Recreates the
238
+ * `Human` (speed is fixed at creation); browser context, page, cookies,
239
+ * and scroll position are preserved.
240
+ */
241
+ async setSpeed(id = DEFAULT_SESSION_ID, speed) {
242
+ const session = await this.get(id);
243
+ assertNotRecording(session, "change speed");
244
+ session.speed = speed;
245
+ session.human = await createHuman(session.page, {
246
+ personality: session.personality,
247
+ speed
248
+ });
249
+ return toSessionInfo(session);
250
+ }
251
+ /** Lists all currently-open sessions, including the default if active. */
252
+ list() {
253
+ return [...this.sessions.values()].map(toSessionInfo);
254
+ }
255
+ /** Closes a single session and frees its browser context. */
256
+ async close(id) {
257
+ const session = this.sessions.get(id);
258
+ if (!session) return;
259
+ if (session.recording) {
260
+ const rec = session.recording;
261
+ session.recording = null;
262
+ try {
263
+ rec.stop();
264
+ const recording = await rec.done;
265
+ const ext = rec.video ? ".mp4" : ".json";
266
+ const path = join(this.env.outputDir, basename(`${rec.name}-${rec.startedAt}${ext}`));
267
+ if (rec.video) await recording.toVideo(path);
268
+ else await recording.toTimeline(path);
269
+ } catch {
270
+ }
271
+ }
272
+ this.sessions.delete(id);
273
+ if (session.mode === "ephemeral") {
274
+ await session.context.close();
275
+ } else if (session.mode === "persistent") {
276
+ await this.persistentContext?.close();
277
+ this.persistentContext = null;
278
+ }
279
+ }
280
+ /**
281
+ * Tears down every session and the backing browser. Called from the bin
282
+ * entry's shutdown handlers (SIGINT / SIGTERM) so we don't leak chrome
283
+ * processes — and by {@link restartBrowser}. A CDP-attached browser is
284
+ * never closed (it's the user's), only disconnected by dropping the ref.
285
+ */
286
+ async closeAll() {
287
+ for (const id of [...this.sessions.keys()]) {
288
+ await this.close(id);
289
+ }
290
+ if (this.browser && !this.cdpConnected) {
291
+ await this.browser.close();
292
+ }
293
+ this.browser = null;
294
+ this.cdpConnected = false;
295
+ this.persistentContext = null;
296
+ }
297
+ /**
298
+ * Tears the browser down so the next action relaunches it in the current
299
+ * (possibly newly-toggled) mode. Backs `human_restart_browser` — the way
300
+ * to apply a persistence change without restarting the whole MCP server.
301
+ * Discards open pages/tabs.
302
+ */
303
+ async restartBrowser() {
304
+ await this.closeAll();
305
+ }
306
+ /**
307
+ * Turns on a persistent profile for subsequent browser starts (backs
308
+ * `human_enable_persistence`). Takes effect on the next browser launch —
309
+ * call {@link restartBrowser} to apply it to an already-running browser.
310
+ */
311
+ setPersistOverride(userDataDir) {
312
+ this.persistOverride = { userDataDir: userDataDir ?? DEFAULT_PERSIST_DIR };
313
+ }
314
+ /** Read-only snapshot of the browser configuration (backs `human_browser_info`). */
315
+ browserInfo() {
316
+ const config = this.effectiveConfig();
317
+ const running = this.browserRunning();
318
+ return {
319
+ mode: config.mode,
320
+ userDataDir: config.mode === "persistent" ? config.userDataDir : null,
321
+ cdpUrl: config.mode === "cdp" ? config.cdpUrl : null,
322
+ channel: this.env.channel ?? null,
323
+ // A toggle is "pending restart" only when a browser is already up:
324
+ // before any browser exists, the new mode just applies on next start.
325
+ persistPendingRestart: this.persistOverride !== null && running,
326
+ browserRunning: running
327
+ };
328
+ }
329
+ browserRunning() {
330
+ return this.browser !== null || this.persistentContext !== null;
331
+ }
332
+ async ensureEphemeralBrowser() {
333
+ if (this.browser) return this.browser;
334
+ this.browser = await this.withBrowserInstall(
335
+ () => chromium.launch({ headless: this.env.headless, channel: this.env.channel })
336
+ );
337
+ return this.browser;
338
+ }
339
+ async ensureCdpBrowser(url) {
340
+ if (this.browser) return this.browser;
341
+ try {
342
+ this.browser = await chromium.connectOverCDP(url);
343
+ } catch (error) {
344
+ const message = error instanceof Error ? error.message : String(error);
345
+ throw new Error(
346
+ `Could not attach to a browser at ${url} (HUMANJS_CDP_URL). Start your browser with --remote-debugging-port and a matching URL, then retry. (${message})`
347
+ );
348
+ }
349
+ this.cdpConnected = true;
350
+ return this.browser;
351
+ }
352
+ async ensurePersistentContext(userDataDir, viewport) {
353
+ if (this.persistentContext) return this.persistentContext;
354
+ this.persistentContext = await this.withBrowserInstall(
355
+ () => chromium.launchPersistentContext(userDataDir, {
356
+ headless: this.env.headless,
357
+ channel: this.env.channel,
358
+ viewport
359
+ })
360
+ );
361
+ await installMouseHelper(this.persistentContext);
362
+ return this.persistentContext;
363
+ }
364
+ /**
365
+ * Runs a browser-launch thunk, auto-installing Chromium once and retrying
366
+ * if the binary is missing (the common first-run failure — binaries can't
367
+ * ship via npm). Honors `HUMANJS_AUTO_INSTALL=false`. CDP attach doesn't
368
+ * need a local binary, so it doesn't go through here.
369
+ */
370
+ async withBrowserInstall(launch) {
371
+ try {
372
+ return await launch();
373
+ } catch (error) {
374
+ const message = error instanceof Error ? error.message : String(error);
375
+ if (!/executable doesn't exist|playwright install/i.test(message)) throw error;
376
+ if (!this.env.autoInstall) {
377
+ throw new Error(
378
+ "Chromium isn't installed and HUMANJS_AUTO_INSTALL is off. Run `npx playwright install chromium` once, then retry."
379
+ );
380
+ }
381
+ await installChromium();
382
+ try {
383
+ return await launch();
384
+ } catch (retryError) {
385
+ const retryMessage = retryError instanceof Error ? retryError.message : String(retryError);
386
+ throw new Error(
387
+ `Auto-install of Chromium ran but the browser still failed to launch. Try \`npx playwright install chromium\` manually. (Original error: ${retryMessage})`
388
+ );
389
+ }
390
+ }
391
+ }
392
+ };
393
+ async function installChromium() {
394
+ process.stderr.write(
395
+ "[humanjs-mcp] Chromium not found \u2014 installing once (~150MB, may take a minute)\u2026\n"
396
+ );
397
+ const require2 = createRequire(import.meta.url);
398
+ const pkgPath = require2.resolve("playwright/package.json");
399
+ const pkg = JSON.parse(readFileSync(pkgPath, "utf8"));
400
+ const binRel = pkg.bin?.playwright;
401
+ if (!binRel) {
402
+ throw new Error("Could not locate Playwright's CLI to install Chromium.");
403
+ }
404
+ const cli = join(dirname(pkgPath), binRel);
405
+ await new Promise((resolve, reject) => {
406
+ const child = spawn(process.execPath, [cli, "install", "chromium"], {
407
+ stdio: ["ignore", 2, 2]
408
+ });
409
+ child.on("error", reject);
410
+ child.on("exit", (code) => {
411
+ if (code === 0) resolve();
412
+ else reject(new Error(`playwright install exited with code ${code}.`));
413
+ });
414
+ });
415
+ process.stderr.write("[humanjs-mcp] Chromium installed.\n");
416
+ }
417
+ function toSessionInfo(session) {
418
+ return {
419
+ id: session.id,
420
+ personality: session.personality,
421
+ speed: session.speed,
422
+ createdAt: session.createdAt
423
+ };
424
+ }
425
+ function assertNotRecording(session, action) {
426
+ if (session.recording) {
427
+ throw new Error(
428
+ `Cannot ${action} while session "${session.id}" is recording. Stop the recording first with human_stop_recording.`
429
+ );
430
+ }
431
+ }
432
+ function registerBrowserTools(server, { sessions }) {
433
+ server.registerTool(
434
+ "human_browser_info",
435
+ {
436
+ title: "Browser configuration",
437
+ description: "Reports how the browser is obtained (ephemeral fresh profile, persistent profile, or attached over CDP), the channel, and whether logins persist. Use it to explain to the user why they are or are not signed in, and how to change it.",
438
+ inputSchema: {}
439
+ },
440
+ async () => {
441
+ const info = sessions.browserInfo();
442
+ const lines = [];
443
+ if (info.mode === "cdp") {
444
+ lines.push(`Mode: attached to your running browser over CDP (${info.cdpUrl}).`);
445
+ lines.push("Uses that browser's existing logins, tabs, and extensions.");
446
+ } else if (info.mode === "persistent") {
447
+ lines.push(`Mode: persistent profile at ${info.userDataDir}.`);
448
+ lines.push("Logins persist across runs (sign in once; it sticks).");
449
+ } else {
450
+ lines.push("Mode: ephemeral \u2014 a fresh, empty profile each run (no saved logins).");
451
+ lines.push(
452
+ "To keep logins across runs: call human_enable_persistence, or set HUMANJS_PERSIST=true in the MCP config for a permanent default."
453
+ );
454
+ }
455
+ lines.push(`Channel: ${info.channel ?? "bundled Chromium"}.`);
456
+ lines.push(`Browser running: ${info.browserRunning ? "yes" : "no"}.`);
457
+ if (info.persistPendingRestart) {
458
+ lines.push(
459
+ "A persistence change is set but not yet applied \u2014 call human_restart_browser to apply it now."
460
+ );
461
+ }
462
+ return { content: [{ type: "text", text: lines.join("\n") }] };
463
+ }
464
+ );
465
+ server.registerTool(
466
+ "human_enable_persistence",
467
+ {
468
+ title: "Enable a persistent profile",
469
+ description: "Switches HumanJS to a persistent browser profile so logins/cookies survive across runs. Takes effect on the next browser start; pass restartNow:true to apply immediately (restarting discards the current page \u2014 re-navigate after). For a permanent default, set HUMANJS_PERSIST=true in the MCP config instead. Note: this does NOT use your real Chrome profile (that stays an env-only, opt-in setting).",
470
+ inputSchema: {
471
+ userDataDir: z.string().optional().describe("Optional profile directory. Defaults to ~/.humanjs/profile."),
472
+ restartNow: z.boolean().optional().describe(
473
+ "Restart the browser immediately to apply (discards the current page). Default false."
474
+ )
475
+ }
476
+ },
477
+ async ({ userDataDir, restartNow }) => {
478
+ sessions.setPersistOverride(userDataDir);
479
+ const info = sessions.browserInfo();
480
+ let text = `Persistence enabled (profile: ${info.userDataDir}).`;
481
+ if (info.persistPendingRestart) {
482
+ if (restartNow) {
483
+ await sessions.restartBrowser();
484
+ text += " Browser restarted \u2014 re-navigate to your page; logins will now persist.";
485
+ } else {
486
+ text += " Active on the next browser start. Call human_restart_browser to apply now (discards the current page).";
487
+ }
488
+ } else {
489
+ text += " It will apply on the next action.";
490
+ }
491
+ text += " For a permanent default, set HUMANJS_PERSIST=true in your MCP config.";
492
+ return { content: [{ type: "text", text }] };
493
+ }
494
+ );
495
+ server.registerTool(
496
+ "human_restart_browser",
497
+ {
498
+ title: "Restart the browser",
499
+ description: "Closes the browser and all sessions; the next action launches a fresh one in the current mode. Use to apply a persistence change, or to recover a wedged browser. Discards open pages/tabs \u2014 re-navigate afterward. (Does not affect a CDP-attached browser beyond disconnecting.)",
500
+ inputSchema: {}
501
+ },
502
+ async () => {
503
+ await sessions.restartBrowser();
504
+ return {
505
+ content: [
506
+ {
507
+ type: "text",
508
+ text: "Browser restarted. The next action launches a fresh browser in the current mode \u2014 re-navigate to your page."
509
+ }
510
+ ]
511
+ };
512
+ }
513
+ );
514
+ }
515
+ var preset = z.enum(["careful", "fast", "distracted", "precise"]);
516
+ function registerConfigTools(server, { sessions }) {
517
+ server.registerTool(
518
+ "human_set_personality",
519
+ {
520
+ title: "Set session personality",
521
+ description: "Changes the humanization personality for a session at runtime. Pass a preset, or a blend of two presets (e.g. mostly careful with a touch of distracted). The browser, cookies, and scroll position are preserved \u2014 only the motion/typing/reading profile changes.",
522
+ inputSchema: {
523
+ personality: preset.optional().describe("A preset to apply. Provide this OR `blend`, not both."),
524
+ blend: z.object({
525
+ a: preset.describe("First personality."),
526
+ b: preset.describe("Second personality."),
527
+ ratio: z.number().min(0).max(1).describe("Weight toward `b` (0 = all a, 1 = all b). e.g. 0.3 = mostly a.")
528
+ }).optional().describe("Blend two presets. Provide this OR `personality`, not both."),
529
+ session: z.string().optional().describe("Session ID. Omit for the default session.")
530
+ }
531
+ },
532
+ async ({ personality, blend: blendArg, session }) => {
533
+ if (personality && blendArg) {
534
+ throw new Error("Provide either `personality` or `blend`, not both.");
535
+ }
536
+ if (!personality && !blendArg) {
537
+ throw new Error("Provide a `personality` preset or a `blend`.");
538
+ }
539
+ const config = blendArg ? blend(blendArg.a, blendArg.b, blendArg.ratio) : personality;
540
+ const info = await sessions.setPersonality(session, config);
541
+ const label = blendArg ? `blend(${blendArg.a}, ${blendArg.b}, ${blendArg.ratio})` : personality;
542
+ return {
543
+ content: [{ type: "text", text: `set "${info.id}" personality to ${label}` }]
544
+ };
545
+ }
546
+ );
547
+ server.registerTool(
548
+ "human_set_speed",
549
+ {
550
+ title: "Set humanization speed",
551
+ description: `Changes a session's humanization pace at runtime. "human" = full realistic motion (best for recordings); "fast" = humanized but quicker; "instant" = no humanized motion (straight Playwright). Note: this changes how long each action takes to execute, not the wait between actions. Cannot change while recording.`,
552
+ inputSchema: {
553
+ speed: z.enum(["human", "fast", "instant"]).describe("The pace to switch to."),
554
+ session: z.string().optional().describe("Session ID. Omit for the default session.")
555
+ }
556
+ },
557
+ async ({ speed, session }) => {
558
+ const info = await sessions.setSpeed(session, speed);
559
+ return { content: [{ type: "text", text: `set "${info.id}" speed to ${speed}` }] };
560
+ }
561
+ );
562
+ server.registerTool(
563
+ "human_set_viewport",
564
+ {
565
+ title: "Resize the viewport",
566
+ description: "Resizes a session's browser viewport at runtime. Use for a bigger/crisper recording or to test responsive layouts. The default size for new sessions is set by HUMANJS_VIEWPORT (default 1440\xD7900).",
567
+ inputSchema: {
568
+ width: z.number().int().positive().describe("Viewport width in CSS px."),
569
+ height: z.number().int().positive().describe("Viewport height in CSS px."),
570
+ session: z.string().optional().describe("Session ID. Omit for the default session.")
571
+ }
572
+ },
573
+ async ({ width, height, session }) => {
574
+ const { human } = await sessions.get(session);
575
+ await human.setViewportSize({ width, height });
576
+ return { content: [{ type: "text", text: `viewport set to ${width}\xD7${height}` }] };
577
+ }
578
+ );
579
+ }
580
+ function resolveOutputPath(outputDir, filename) {
581
+ const base = basename(filename);
582
+ if (base !== filename || base.length === 0) {
583
+ throw new Error(
584
+ `filename must be a plain name with no path components, got "${filename}". Files are always written to HUMANJS_OUTPUT_DIR.`
585
+ );
586
+ }
587
+ return join(outputDir, base);
588
+ }
589
+
590
+ // src/tools/inspection.ts
591
+ var sessionArg = z.string().optional().describe("Session ID to act on. Omit to use the default session.");
592
+ function registerInspectionTools(server, ctx) {
593
+ server.registerTool(
594
+ "human_screenshot",
595
+ {
596
+ title: "Screenshot the current page",
597
+ description: "Captures the current page (or a specific element if `selector` is given) as a PNG and returns it as image content the AI can view directly. Pass `filename` to also save it to disk (HUMANJS_OUTPUT_DIR); omit it for an ephemeral look-at-the-page capture.",
598
+ inputSchema: {
599
+ selector: z.string().optional().describe("Optional selector. If omitted, captures the entire viewport."),
600
+ fullPage: z.boolean().optional().describe(
601
+ "Capture the full scrollable page instead of just the viewport. Ignored if `selector` is set."
602
+ ),
603
+ filename: z.string().optional().describe(
604
+ 'Optional plain filename (e.g. "homepage.png"). When set, the screenshot is saved to HUMANJS_OUTPUT_DIR. Path components are rejected for safety.'
605
+ ),
606
+ session: sessionArg
607
+ }
608
+ },
609
+ async ({ selector, fullPage, filename, session }) => {
610
+ const { human, page } = await ctx.sessions.get(session);
611
+ const buffer = selector ? await page.locator(selector).screenshot() : await human.screenshot({ fullPage: fullPage ?? false });
612
+ const content = [{ type: "image", data: buffer.toString("base64"), mimeType: "image/png" }];
613
+ if (filename) {
614
+ const path = resolveOutputPath(ctx.env.outputDir, filename);
615
+ await writeFile(path, buffer);
616
+ content.push({ type: "text", text: `saved screenshot to ${path}` });
617
+ }
618
+ return { content };
619
+ }
620
+ );
621
+ server.registerTool(
622
+ "human_page_text",
623
+ {
624
+ title: "Get visible page text",
625
+ description: "Returns the page's visible text (document.body.innerText). The fastest way to understand what's on screen without parsing HTML \u2014 prefer this over human_get_html unless you need element structure or attributes.",
626
+ inputSchema: { session: sessionArg }
627
+ },
628
+ async ({ session }) => {
629
+ const { human } = await ctx.sessions.get(session);
630
+ const text = await human.pageText();
631
+ return { content: [{ type: "text", text }] };
632
+ }
633
+ );
634
+ server.registerTool(
635
+ "human_get_text",
636
+ {
637
+ title: "Get an element's text",
638
+ description: "Returns the visible innerText of the first element matching `selector`. Use to read a specific label, price, status, or message.",
639
+ inputSchema: {
640
+ selector: z.string().describe("Selector of the element to read."),
641
+ session: sessionArg
642
+ }
643
+ },
644
+ async ({ selector, session }) => {
645
+ const { page } = await ctx.sessions.get(session);
646
+ const text = await page.locator(selector).innerText();
647
+ return { content: [{ type: "text", text }] };
648
+ }
649
+ );
650
+ server.registerTool(
651
+ "human_get_attribute",
652
+ {
653
+ title: "Get an element's attribute",
654
+ description: "Returns the value of an attribute on the first element matching `selector` (or reports it is absent). Handy for reading aria-label, data-*, href, value, disabled state, etc. \u2014 often how you confirm an icon-only button's purpose.",
655
+ inputSchema: {
656
+ selector: z.string().describe("Selector of the element."),
657
+ attribute: z.string().describe('Attribute name, e.g. "aria-label", "href", "data-state".'),
658
+ session: sessionArg
659
+ }
660
+ },
661
+ async ({ selector, attribute, session }) => {
662
+ const { page } = await ctx.sessions.get(session);
663
+ const value = await page.locator(selector).getAttribute(attribute);
664
+ const text = value === null ? `${selector} has no attribute "${attribute}"` : `${attribute}="${value}"`;
665
+ return { content: [{ type: "text", text }] };
666
+ }
667
+ );
668
+ server.registerTool(
669
+ "human_get_html",
670
+ {
671
+ title: "Get an element's HTML",
672
+ description: "Returns the outerHTML of the first element matching `selector` \u2014 the element plus its children, including its own attributes (class, aria-label, etc.). The go-to tool for discovering the real selector of a control with no obvious text. Target a specific region; full-page HTML is large.",
673
+ inputSchema: {
674
+ selector: z.string().describe("Selector of the region to dump. Target narrowly."),
675
+ session: sessionArg
676
+ }
677
+ },
678
+ async ({ selector, session }) => {
679
+ const { page } = await ctx.sessions.get(session);
680
+ const html = await page.locator(selector).evaluate((el) => el.outerHTML);
681
+ return { content: [{ type: "text", text: html }] };
682
+ }
683
+ );
684
+ }
685
+ var targetFields = {
686
+ selector: z.string().optional().describe(
687
+ "Playwright-compatible selector. Provide this OR x/y \u2014 not both. Prefer role/text selectors over brittle CSS."
688
+ ),
689
+ x: z.number().optional().describe(
690
+ "X coordinate (CSS px from viewport left). Use x+y when there is no clean selector \u2014 e.g. an icon-only button you can see in a screenshot. Requires y."
691
+ ),
692
+ y: z.number().optional().describe("Y coordinate (CSS px from viewport top). Requires x.")
693
+ };
694
+ function resolveTarget(input) {
695
+ const hasSelector = input.selector !== void 0;
696
+ const hasPoint = input.x !== void 0 && input.y !== void 0;
697
+ if (hasSelector && hasPoint) {
698
+ throw new Error("Provide either a selector or x/y coordinates, not both.");
699
+ }
700
+ if (hasSelector) return input.selector;
701
+ if (hasPoint) return { x: input.x, y: input.y };
702
+ if (input.x !== void 0 || input.y !== void 0) {
703
+ throw new Error("Coordinate targets need both x and y.");
704
+ }
705
+ throw new Error("Provide a selector or x/y coordinates.");
706
+ }
707
+
708
+ // src/tools/primitives.ts
709
+ var sessionArg2 = z.string().optional().describe(
710
+ "Session ID to act on. Omit to use the default session (created lazily on first call). Use human_create_session for parallel browsers."
711
+ );
712
+ function registerPrimitiveTools(server, { sessions }) {
713
+ server.registerTool(
714
+ "human_goto",
715
+ {
716
+ title: "Navigate to URL",
717
+ description: `Navigates the session's page to a URL. Plugins observe a "goto" action. Equivalent to a user typing a URL in the address bar.`,
718
+ inputSchema: {
719
+ url: z.string().url().describe("Absolute URL to navigate to."),
720
+ session: sessionArg2
721
+ }
722
+ },
723
+ async ({ url, session }) => {
724
+ const { human } = await sessions.get(session);
725
+ await human.goto(url);
726
+ return { content: [{ type: "text", text: `navigated to ${url}` }] };
727
+ }
728
+ );
729
+ server.registerTool(
730
+ "human_click",
731
+ {
732
+ title: "Click (humanized)",
733
+ description: "Moves the cursor to the target along a humanized Bezier path and clicks. Target is a selector OR x/y coordinates \u2014 use coordinates for icon-only buttons or anything with no clean selector that you can see in a screenshot.",
734
+ inputSchema: { ...targetFields, session: sessionArg2 }
735
+ },
736
+ async ({ selector, x, y, session }) => {
737
+ const { human } = await sessions.get(session);
738
+ const target = resolveTarget({ selector, x, y });
739
+ await human.click(target);
740
+ return { content: [{ type: "text", text: `clicked ${describeTarget(selector, x, y)}` }] };
741
+ }
742
+ );
743
+ server.registerTool(
744
+ "human_rightClick",
745
+ {
746
+ title: "Right-click (humanized)",
747
+ description: "Right-clicks the target to open a context menu. Same motion as human_click; only the dispatched button differs. Target is a selector OR x/y coordinates.",
748
+ inputSchema: { ...targetFields, session: sessionArg2 }
749
+ },
750
+ async ({ selector, x, y, session }) => {
751
+ const { human } = await sessions.get(session);
752
+ const target = resolveTarget({ selector, x, y });
753
+ await human.rightClick(target);
754
+ return {
755
+ content: [{ type: "text", text: `right-clicked ${describeTarget(selector, x, y)}` }]
756
+ };
757
+ }
758
+ );
759
+ server.registerTool(
760
+ "human_hover",
761
+ {
762
+ title: "Hover an element (humanized)",
763
+ description: "Moves the cursor to an element and settles on it (no click), letting hover-triggered UI fire \u2014 tooltips, dropdowns. Element-bound only; for positioning the cursor at coordinates without an element, use human_move.",
764
+ inputSchema: {
765
+ selector: z.string().describe("Selector of the element to hover."),
766
+ session: sessionArg2
767
+ }
768
+ },
769
+ async ({ selector, session }) => {
770
+ const { human } = await sessions.get(session);
771
+ await human.hover(selector);
772
+ return { content: [{ type: "text", text: `hovered ${selector}` }] };
773
+ }
774
+ );
775
+ server.registerTool(
776
+ "human_move",
777
+ {
778
+ title: "Move the cursor (humanized)",
779
+ description: "Moves the cursor to a target along a Bezier path with no click and no settle dwell \u2014 pure positioning. Useful before a keyboard action, for canvas work, or cinematic beats. Target is a selector OR x/y coordinates.",
780
+ inputSchema: { ...targetFields, session: sessionArg2 }
781
+ },
782
+ async ({ selector, x, y, session }) => {
783
+ const { human } = await sessions.get(session);
784
+ const target = resolveTarget({ selector, x, y });
785
+ await human.move(target);
786
+ return { content: [{ type: "text", text: `moved to ${describeTarget(selector, x, y)}` }] };
787
+ }
788
+ );
789
+ server.registerTool(
790
+ "human_drag",
791
+ {
792
+ title: "Drag (humanized)",
793
+ description: "Drags from one location to another \u2014 cursor \u2192 source, mousedown, source \u2192 destination, mouseup, all humanized. Each endpoint is a selector OR x/y coordinates (use coordinates for sliders, canvas, SVG handles).",
794
+ inputSchema: {
795
+ fromSelector: z.string().optional().describe("Source selector. Provide this OR fromX/fromY."),
796
+ fromX: z.number().optional().describe("Source X coordinate. Requires fromY."),
797
+ fromY: z.number().optional().describe("Source Y coordinate. Requires fromX."),
798
+ toSelector: z.string().optional().describe("Destination selector. Provide this OR toX/toY."),
799
+ toX: z.number().optional().describe("Destination X coordinate. Requires toY."),
800
+ toY: z.number().optional().describe("Destination Y coordinate. Requires toX."),
801
+ session: sessionArg2
802
+ }
803
+ },
804
+ async ({ fromSelector, fromX, fromY, toSelector, toX, toY, session }) => {
805
+ const { human } = await sessions.get(session);
806
+ const from = resolveTarget({ selector: fromSelector, x: fromX, y: fromY });
807
+ const to = resolveTarget({ selector: toSelector, x: toX, y: toY });
808
+ await human.drag(from, to);
809
+ return {
810
+ content: [
811
+ {
812
+ type: "text",
813
+ text: `dragged ${describeTarget(fromSelector, fromX, fromY)} \u2192 ${describeTarget(toSelector, toX, toY)}`
814
+ }
815
+ ]
816
+ };
817
+ }
818
+ );
819
+ server.registerTool(
820
+ "human_type",
821
+ {
822
+ title: "Type text (humanized)",
823
+ description: "Clicks the element to focus it, then types with humanized per-key rhythm. The current personality controls speed, typo probability, and corrections (HUMANJS_PERSONALITY / human_set_personality). If this types into a search/filter, the results re-render (often debounced) \u2014 use a specific selector for the result, as the list shifts as it filters.",
824
+ inputSchema: {
825
+ selector: z.string().describe("Selector of the input/textarea/contenteditable."),
826
+ value: z.string().describe("Text to type. May contain newlines."),
827
+ session: sessionArg2
828
+ }
829
+ },
830
+ async ({ selector, value, session }) => {
831
+ const { human } = await sessions.get(session);
832
+ await human.type(selector, value);
833
+ return { content: [{ type: "text", text: `typed ${value.length} chars into ${selector}` }] };
834
+ }
835
+ );
836
+ server.registerTool(
837
+ "human_paste",
838
+ {
839
+ title: "Paste text (one shot)",
840
+ description: "Inserts text in one shot (the Cmd-V semantic) \u2014 focuses the field, then sets the whole value via insertText with no per-key timing. Use for long strings where humanized typing would be slow. Does not fire the page paste event.",
841
+ inputSchema: {
842
+ selector: z.string().describe("Selector of the field to paste into."),
843
+ value: z.string().describe("Text to insert."),
844
+ session: sessionArg2
845
+ }
846
+ },
847
+ async ({ selector, value, session }) => {
848
+ const { human } = await sessions.get(session);
849
+ await human.paste(selector, value);
850
+ return { content: [{ type: "text", text: `pasted ${value.length} chars into ${selector}` }] };
851
+ }
852
+ );
853
+ server.registerTool(
854
+ "human_press",
855
+ {
856
+ title: "Press a key or chord",
857
+ description: 'Presses a single key (Enter, Tab, Escape, ArrowDown, \u2026) or a chord (Mod+S, Cmd+Shift+P, Ctrl+C). "Mod" maps to Meta on Mac and Control elsewhere. Dispatches against focus \u2014 does not move the cursor; compose with human_click/human_move when you need both.',
858
+ inputSchema: {
859
+ key: z.string().describe('Key or chord, e.g. "Enter", "Tab", "Mod+S", "Ctrl+Shift+K".'),
860
+ session: sessionArg2
861
+ }
862
+ },
863
+ async ({ key, session }) => {
864
+ const { human } = await sessions.get(session);
865
+ await human.press(key);
866
+ return { content: [{ type: "text", text: `pressed ${key}` }] };
867
+ }
868
+ );
869
+ server.registerTool(
870
+ "human_scroll",
871
+ {
872
+ title: "Scroll (humanized)",
873
+ description: 'Scrolls the page or a container with a natural velocity profile. Default scrolls one viewport down. Use `target` for presets ("natural"/"end"/"top") or an element selector to scroll into view; `by` for a relative pixel delta; `to` for an absolute position.',
874
+ inputSchema: {
875
+ target: z.string().optional().describe(
876
+ 'One of "natural" (one viewport), "end", "top", or an element selector to scroll until visible. Defaults to "natural". Ignored if `by` or `to` is set.'
877
+ ),
878
+ by: z.number().optional().describe("Relative pixel delta (negative = up/left)."),
879
+ to: z.number().optional().describe("Absolute scroll position on the chosen axis."),
880
+ axis: z.enum(["x", "y"]).optional().describe('Axis to scroll. Defaults to "y".'),
881
+ within: z.string().optional().describe("Selector of a scrollable container to scope the scroll to."),
882
+ session: sessionArg2
883
+ }
884
+ },
885
+ async ({ target, by, to, axis, within, session }) => {
886
+ const { human } = await sessions.get(session);
887
+ const scrollTarget = by !== void 0 ? { by } : to !== void 0 ? { to } : target ?? "natural";
888
+ const result = await human.scroll(scrollTarget, { axis, within });
889
+ return {
890
+ content: [
891
+ { type: "text", text: `scrolled ${result.from} \u2192 ${result.to} (${result.distance}px)` }
892
+ ]
893
+ };
894
+ }
895
+ );
896
+ server.registerTool(
897
+ "human_read",
898
+ {
899
+ title: "Read dwell (humanized)",
900
+ description: `Dwells as if reading the target \u2014 pause time derived from word count and the personality's reading speed, with a visible cursor scan across the text. Models the "user pauses to read" beat. Provide a selector OR literal text.`,
901
+ inputSchema: {
902
+ selector: z.string().optional().describe("Selector of the text to read. Provide this OR text."),
903
+ text: z.string().optional().describe('Literal text to "read" (no DOM lookup). Provide this OR selector.'),
904
+ kind: z.enum(["prose", "code", "scan"]).optional().describe('Reading style. Auto-detected as "code" for <pre>/<code> when omitted.'),
905
+ session: sessionArg2
906
+ }
907
+ },
908
+ async ({ selector, text, kind, session }) => {
909
+ const { human } = await sessions.get(session);
910
+ if (selector === void 0 && text === void 0) {
911
+ throw new Error("Provide a selector or text to read.");
912
+ }
913
+ const readTarget = text !== void 0 ? { text } : selector;
914
+ const result = await human.read(readTarget, { kind });
915
+ return {
916
+ content: [
917
+ {
918
+ type: "text",
919
+ text: `read ${result.words} words (${result.kind}) over ${result.durationMs}ms`
920
+ }
921
+ ]
922
+ };
923
+ }
924
+ );
925
+ }
926
+ function describeTarget(selector, x, y) {
927
+ if (selector !== void 0) return selector;
928
+ if (x !== void 0 && y !== void 0) return `(${x}, ${y})`;
929
+ return "target";
930
+ }
931
+ function registerRecordingTools(server, { sessions, env }) {
932
+ server.registerTool(
933
+ "human_start_recording",
934
+ {
935
+ title: "Start recording",
936
+ description: "Begins recording the session. Every humanized action until human_stop_recording is captured (frames + action timeline). The visible cursor is in the video. One recording per session at a time. For a natural-looking take: explore the flow first to find correct selectors, then dispatch the whole run \u2014 start_recording + every action + stop_recording \u2014 in a SINGLE turn (one batch of tool calls), so there are no model-thinking pauses between actions to leave dead air in the video.",
937
+ inputSchema: {
938
+ name: z.string().optional().describe("Label for the recording (used in the timeline + the fallback filename)."),
939
+ video: z.boolean().optional().describe("Capture video frames. Default true. Set false for a timeline-only recording."),
940
+ quality: z.enum(["fast", "standard", "high", "lossless"]).optional().describe('Capture/encode quality. Default "high" (1080p, visually lossless).'),
941
+ session: z.string().optional().describe("Session ID. Omit for the default session.")
942
+ }
943
+ },
944
+ async ({ name, video, quality, session }) => {
945
+ await sessions.startRecording(session, { name, video, quality });
946
+ return {
947
+ content: [{ type: "text", text: `recording started${name ? ` ("${name}")` : ""}` }]
948
+ };
949
+ }
950
+ );
951
+ server.registerTool(
952
+ "human_stop_recording",
953
+ {
954
+ title: "Stop recording and save",
955
+ description: `Stops the active recording and writes it to one or more files in HUMANJS_OUTPUT_DIR. Each filename's extension picks its format: .mp4/.webm = video, .gif = animated gif, .json = action timeline. Pass several to export the same recording multiple ways, e.g. ["demo.mp4", "demo.json"] for video + timeline. Path components are rejected for safety.`,
956
+ inputSchema: {
957
+ filenames: z.array(z.string()).min(1).describe(
958
+ 'One or more output filenames. The recording is saved to each, format chosen by extension. e.g. ["demo.mp4"] or ["demo.mp4", "demo.gif", "demo.json"].'
959
+ ),
960
+ session: z.string().optional().describe("Session ID. Omit for the default session.")
961
+ }
962
+ },
963
+ async ({ filenames, session }) => {
964
+ const targets = filenames.map((filename) => ({
965
+ path: resolveOutputPath(env.outputDir, filename),
966
+ ext: extname(filename).toLowerCase()
967
+ }));
968
+ for (const { ext } of targets) {
969
+ if (ext !== ".mp4" && ext !== ".webm" && ext !== ".gif" && ext !== ".json") {
970
+ throw new Error(
971
+ `Unsupported output extension "${ext}". Use .mp4, .webm, .gif, or .json.`
972
+ );
973
+ }
974
+ }
975
+ const recording = await sessions.stopRecording(session);
976
+ try {
977
+ const saved = [];
978
+ for (const { path, ext } of targets) {
979
+ if (ext === ".gif") saved.push(await recording.toGif(path));
980
+ else if (ext === ".json") saved.push(await recording.toTimeline(path));
981
+ else saved.push(await recording.toVideo(path));
982
+ }
983
+ return { content: [{ type: "text", text: `saved recording to:
984
+ ${saved.join("\n")}` }] };
985
+ } finally {
986
+ await recording.dispose();
987
+ }
988
+ }
989
+ );
990
+ }
991
+ var personalityArg = z.enum(["careful", "fast", "distracted", "precise"]).optional().describe("Personality preset for this session. Defaults to HUMANJS_PERSONALITY.");
992
+ var speedArg = z.enum(["human", "fast", "instant"]).optional().describe(
993
+ 'Humanization pace. "human" (default) = full realistic motion; "fast" = humanized but quick; "instant" = no humanized motion. Defaults to HUMANJS_SPEED.'
994
+ );
995
+ function registerSessionTools(server, { sessions }) {
996
+ server.registerTool(
997
+ "human_create_session",
998
+ {
999
+ title: "Create a browser session",
1000
+ description: "Opens a new isolated session (its own browser context, cookies, viewport) under the given ID. Only needed for parallel browsers \u2014 for a single browser, just omit the session arg on other tools and the default session is used.",
1001
+ inputSchema: {
1002
+ id: z.string().describe('Unique session ID, e.g. "buyer", "seller".'),
1003
+ personality: personalityArg,
1004
+ speed: speedArg,
1005
+ width: z.number().int().positive().optional().describe("Viewport width in CSS px. Defaults to HUMANJS_VIEWPORT. Requires height."),
1006
+ height: z.number().int().positive().optional().describe("Viewport height in CSS px. Requires width.")
1007
+ }
1008
+ },
1009
+ async ({ id, personality, speed, width, height }) => {
1010
+ if (width === void 0 !== (height === void 0)) {
1011
+ throw new Error("Provide both width and height, or neither.");
1012
+ }
1013
+ const viewport = width !== void 0 && height !== void 0 ? { width, height } : void 0;
1014
+ const session = await sessions.create(id, { personality, speed, viewport });
1015
+ return {
1016
+ content: [
1017
+ {
1018
+ type: "text",
1019
+ text: `created session "${session.id}" (personality: ${session.personality}, speed: ${session.speed})`
1020
+ }
1021
+ ]
1022
+ };
1023
+ }
1024
+ );
1025
+ server.registerTool(
1026
+ "human_close_session",
1027
+ {
1028
+ title: "Close a browser session",
1029
+ description: "Closes a session and frees its browser context. Closing the default session is allowed \u2014 it will be recreated lazily on the next call.",
1030
+ inputSchema: {
1031
+ id: z.string().describe("Session ID to close.")
1032
+ }
1033
+ },
1034
+ async ({ id }) => {
1035
+ await sessions.close(id);
1036
+ return { content: [{ type: "text", text: `closed session "${id}"` }] };
1037
+ }
1038
+ );
1039
+ server.registerTool(
1040
+ "human_list_sessions",
1041
+ {
1042
+ title: "List open sessions",
1043
+ description: "Lists every currently-open session with its personality. Use to orient before acting on a specific session.",
1044
+ inputSchema: {}
1045
+ },
1046
+ async () => {
1047
+ const list = sessions.list();
1048
+ const text = list.length === 0 ? "no open sessions (the default session is created on first action)" : list.map((s) => `${s.id} (personality: ${s.personality}, speed: ${s.speed})`).join("\n");
1049
+ return { content: [{ type: "text", text }] };
1050
+ }
1051
+ );
1052
+ }
1053
+
1054
+ // src/index.ts
1055
+ var SERVER_NAME = "humanjs-mcp";
1056
+ var SERVER_VERSION = "0.1.0";
1057
+ var SERVER_INSTRUCTIONS = `HumanJS drives a real browser with humanized motion, typing, and reading dwell. Motion is already realistic at the default speed \u2014 do NOT switch to 'fast'/'instant' or change personality to make a flow "look natural"; it already does.
1058
+
1059
+ DISPATCH KNOWN STEPS TOGETHER. When you already know the full sequence (a recording, or any flow you've mapped out), emit ALL the tool calls in a SINGLE turn, back-to-back, WITHOUT pausing to reason between them. This matters a lot: each model turn between actions is a multi-second gap, which is slow in general and shows up as dead air in a recording. The humanized motion paces the actions on its own \u2014 don't add thinking gaps on top. Only go one tool at a time when a step genuinely needs the previous step's result (exploring, or reacting to something you can't predict).
1060
+
1061
+ Recording a flow (the natural-looking way):
1062
+ 1. EXPLORE FIRST (un-recorded). Navigate the flow once to discover correct, unambiguous selectors (human_screenshot / human_get_html / human_get_attribute). Do this by default whenever the selectors aren't already known \u2014 no need for the user to ask. Skip it only if the selectors are already known or the user tells you not to explore.
1063
+ 2. THEN RECORD ONE CLEAN RUN AS A SINGLE BATCH: human_start_recording + every action + human_stop_recording, all emitted in one turn. Keep selector-guessing and fumbles out of the take.
1064
+
1065
+ Dynamic UI: prefer specific selectors (role, aria-label) over text \u2014 the same visible text often matches several cards before a filter, or the wrong one after. If a click reports multiple matches, narrow the selector.
1066
+
1067
+ Browser state: by default each run is a fresh, signed-out browser. If a flow needs a login, tell the user to enable persistence (human_enable_persistence or HUMANJS_PERSIST) or CDP attach \u2014 see human_browser_info.`;
1068
+ async function main() {
1069
+ const env = readEnv();
1070
+ const sessions = new SessionManager(env);
1071
+ const ctx = { sessions, env };
1072
+ const server = new McpServer(
1073
+ { name: SERVER_NAME, version: SERVER_VERSION },
1074
+ { instructions: SERVER_INSTRUCTIONS }
1075
+ );
1076
+ registerPrimitiveTools(server, ctx);
1077
+ registerInspectionTools(server, ctx);
1078
+ registerRecordingTools(server, ctx);
1079
+ registerSessionTools(server, ctx);
1080
+ registerConfigTools(server, ctx);
1081
+ registerBrowserTools(server, ctx);
1082
+ const shutdown = async () => {
1083
+ try {
1084
+ await sessions.closeAll();
1085
+ } finally {
1086
+ process.exit(0);
1087
+ }
1088
+ };
1089
+ process.on("SIGINT", shutdown);
1090
+ process.on("SIGTERM", shutdown);
1091
+ const transport = new StdioServerTransport();
1092
+ await server.connect(transport);
1093
+ }
1094
+ main().catch((error) => {
1095
+ console.error("[humanjs-mcp] fatal:", error);
1096
+ process.exit(1);
1097
+ });
1098
+ //# sourceMappingURL=index.js.map
1099
+ //# sourceMappingURL=index.js.map