@lydia-agent/core 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1396,6 +1396,17 @@ var ConfigSchema = z5.object({
1396
1396
  checkpointTtlHours: z5.number().default(24),
1397
1397
  observationFrameTtlHours: z5.number().default(24 * 7)
1398
1398
  }).default({}),
1399
+ browser: z5.object({
1400
+ enabled: z5.boolean().default(true),
1401
+ mode: z5.enum(["auto", "cdp", "headless", "remote"]).default("auto"),
1402
+ cdpPort: z5.number().int().positive().default(9222),
1403
+ remoteUrl: z5.string().default(""),
1404
+ chromePath: z5.string().default(""),
1405
+ launchHostBrowser: z5.boolean().default(false),
1406
+ navigationTimeoutMs: z5.number().positive().default(3e4),
1407
+ actionTimeoutMs: z5.number().positive().default(1e4),
1408
+ downloadDir: z5.string().default("")
1409
+ }).default({}),
1399
1410
  skills: z5.object({
1400
1411
  /** Maximum number of skills whose full content is injected into the prompt (default: 3) */
1401
1412
  matchTopK: z5.number().default(3),
@@ -1459,6 +1470,10 @@ var ConfigLoader = class {
1459
1470
  ...current.memory || {},
1460
1471
  ...partial.memory
1461
1472
  },
1473
+ browser: {
1474
+ ...current.browser || {},
1475
+ ...partial.browser
1476
+ },
1462
1477
  skills: {
1463
1478
  ...current.skills || {},
1464
1479
  ...partial.skills
@@ -3289,8 +3304,8 @@ var InteractionServer = class extends EventEmitter3 {
3289
3304
  prompt: args.prompt
3290
3305
  };
3291
3306
  this.emit("request", interaction);
3292
- const responseText = await new Promise((resolve6) => {
3293
- this.pendingInteractions.set(id, resolve6);
3307
+ const responseText = await new Promise((resolve7) => {
3308
+ this.pendingInteractions.set(id, resolve7);
3294
3309
  });
3295
3310
  return {
3296
3311
  content: [{ type: "text", text: responseText }]
@@ -3309,6 +3324,616 @@ var InteractionServer = class extends EventEmitter3 {
3309
3324
  }
3310
3325
  };
3311
3326
 
3327
+ // src/mcp/servers/browser.ts
3328
+ import { Server as Server6 } from "@modelcontextprotocol/sdk/server/index.js";
3329
+ import { CallToolRequestSchema as CallToolRequestSchema6, ListToolsRequestSchema as ListToolsRequestSchema6 } from "@modelcontextprotocol/sdk/types.js";
3330
+
3331
+ // src/browser/manager.ts
3332
+ import { mkdir as mkdir4 } from "fs/promises";
3333
+ import { dirname as dirname5, join as join8, resolve as resolve5 } from "path";
3334
+ import { platform } from "os";
3335
+ import { spawn } from "child_process";
3336
+ import { chromium } from "playwright";
3337
+ function createBrowserToolError(code, message, retryable = true) {
3338
+ const error = new Error(`${code}: ${message}`);
3339
+ error.code = code;
3340
+ error.retryable = retryable;
3341
+ return error;
3342
+ }
3343
+ function createDefaultBrowserRuntimeConfig(partial = {}) {
3344
+ return {
3345
+ enabled: partial.enabled ?? true,
3346
+ mode: partial.mode ?? "auto",
3347
+ cdpPort: partial.cdpPort ?? 9222,
3348
+ remoteUrl: partial.remoteUrl ?? "",
3349
+ chromePath: partial.chromePath ?? "",
3350
+ launchHostBrowser: partial.launchHostBrowser ?? false,
3351
+ navigationTimeoutMs: partial.navigationTimeoutMs ?? 3e4,
3352
+ actionTimeoutMs: partial.actionTimeoutMs ?? 1e4,
3353
+ downloadDir: partial.downloadDir || join8(process.cwd(), ".lydia-artifacts", "browser-downloads")
3354
+ };
3355
+ }
3356
+ var BrowserAutomationManager = class {
3357
+ config;
3358
+ sessions = /* @__PURE__ */ new Map();
3359
+ browser = null;
3360
+ resolvedMode = null;
3361
+ attemptedHostLaunch = false;
3362
+ constructor(config = {}) {
3363
+ this.config = createDefaultBrowserRuntimeConfig(config);
3364
+ }
3365
+ getResolvedMode() {
3366
+ return this.resolvedMode;
3367
+ }
3368
+ async navigate(sessionId, args) {
3369
+ const page = await this.getPage(sessionId);
3370
+ const response = await page.goto(args.url, {
3371
+ waitUntil: args.waitUntil ?? "domcontentloaded",
3372
+ timeout: args.timeoutMs ?? this.config.navigationTimeoutMs
3373
+ });
3374
+ return {
3375
+ text: `Navigated to ${page.url()} (${await page.title() || "untitled"}) [mode=${this.getResolvedMode() || "unknown"} status=${response?.status() ?? "n/a"}]`,
3376
+ metadata: {
3377
+ url: page.url(),
3378
+ title: await page.title(),
3379
+ status: response?.status() ?? null,
3380
+ mode: this.getResolvedMode()
3381
+ }
3382
+ };
3383
+ }
3384
+ async click(sessionId, args) {
3385
+ const page = await this.getPage(sessionId);
3386
+ const locator = page.locator(args.selector).first();
3387
+ await locator.waitFor({ state: "visible", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3388
+ await locator.click({ timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3389
+ return {
3390
+ text: `Clicked ${args.selector} on ${page.url()}`,
3391
+ metadata: { url: page.url(), selector: args.selector, mode: this.getResolvedMode() }
3392
+ };
3393
+ }
3394
+ async type(sessionId, args) {
3395
+ const page = await this.getPage(sessionId);
3396
+ const locator = page.locator(args.selector).first();
3397
+ await locator.waitFor({ state: "visible", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3398
+ if (args.clearExisting !== false) {
3399
+ await locator.fill("", { timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3400
+ }
3401
+ await locator.fill(args.text, { timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3402
+ return {
3403
+ text: `Typed into ${args.selector} on ${page.url()}`,
3404
+ metadata: { url: page.url(), selector: args.selector, length: args.text.length }
3405
+ };
3406
+ }
3407
+ async select(sessionId, args) {
3408
+ const page = await this.getPage(sessionId);
3409
+ const values = Array.isArray(args.value) ? args.value : [args.value];
3410
+ await page.locator(args.selector).first().selectOption(values, {
3411
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3412
+ });
3413
+ return {
3414
+ text: `Selected ${values.join(", ")} in ${args.selector} on ${page.url()}`,
3415
+ metadata: { url: page.url(), selector: args.selector, values }
3416
+ };
3417
+ }
3418
+ async waitFor(sessionId, args) {
3419
+ const page = await this.getPage(sessionId);
3420
+ const state = args.state ?? "visible";
3421
+ await page.locator(args.selector).first().waitFor({
3422
+ state,
3423
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3424
+ });
3425
+ return {
3426
+ text: `Wait condition satisfied for ${args.selector} (${state}) on ${page.url()}`,
3427
+ metadata: { url: page.url(), selector: args.selector, state }
3428
+ };
3429
+ }
3430
+ async extractText(sessionId, args) {
3431
+ const page = await this.getPage(sessionId);
3432
+ const locator = page.locator(args.selector).first();
3433
+ await locator.waitFor({ state: "attached", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3434
+ const text = (await locator.innerText({ timeout: args.timeoutMs ?? this.config.actionTimeoutMs })).trim();
3435
+ return {
3436
+ text: text || `[empty text at ${args.selector}]`,
3437
+ metadata: { url: page.url(), selector: args.selector }
3438
+ };
3439
+ }
3440
+ async screenshot(sessionId, args) {
3441
+ const page = await this.getPage(sessionId);
3442
+ const buffer = await page.screenshot({
3443
+ fullPage: args.fullPage ?? true,
3444
+ timeout: args.timeoutMs ?? this.config.navigationTimeoutMs,
3445
+ type: "png"
3446
+ });
3447
+ return {
3448
+ text: `Captured screenshot for ${page.url()} [mode=${this.getResolvedMode() || "unknown"}]`,
3449
+ imageBase64: buffer.toString("base64"),
3450
+ mediaType: "image/png",
3451
+ metadata: { url: page.url(), fullPage: args.fullPage ?? true }
3452
+ };
3453
+ }
3454
+ async download(sessionId, args) {
3455
+ const page = await this.getPage(sessionId);
3456
+ if (!args.selector && !args.url) {
3457
+ throw createBrowserToolError("DOWNLOAD_FAILED", 'Either "selector" or "url" is required.', false);
3458
+ }
3459
+ const timeout = args.timeoutMs ?? this.config.navigationTimeoutMs;
3460
+ const downloadPromise = page.waitForEvent("download", { timeout });
3461
+ if (args.selector) {
3462
+ await page.locator(args.selector).first().click({ timeout });
3463
+ } else if (args.url) {
3464
+ await page.goto(args.url, { waitUntil: "commit", timeout });
3465
+ }
3466
+ const download = await downloadPromise;
3467
+ const downloadPath = await this.saveDownload(download, args.saveAs);
3468
+ return {
3469
+ text: `Downloaded artifact to ${downloadPath}`,
3470
+ downloadPath,
3471
+ metadata: { url: page.url(), suggestedFilename: download.suggestedFilename() }
3472
+ };
3473
+ }
3474
+ async upload(sessionId, args) {
3475
+ const page = await this.getPage(sessionId);
3476
+ const locator = page.locator(args.selector).first();
3477
+ await locator.setInputFiles(resolve5(args.path), {
3478
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3479
+ });
3480
+ return {
3481
+ text: `Uploaded ${resolve5(args.path)} into ${args.selector}`,
3482
+ artifactPath: resolve5(args.path),
3483
+ metadata: { url: page.url(), selector: args.selector, path: resolve5(args.path) }
3484
+ };
3485
+ }
3486
+ async closeSession(sessionId) {
3487
+ const state = this.sessions.get(sessionId);
3488
+ if (!state) {
3489
+ return {
3490
+ text: `Session ${sessionId} already closed`,
3491
+ metadata: { sessionId, mode: this.getResolvedMode() }
3492
+ };
3493
+ }
3494
+ try {
3495
+ if (!state.page.isClosed()) {
3496
+ await state.page.close();
3497
+ }
3498
+ if (state.ownsContext) {
3499
+ await state.context.close();
3500
+ }
3501
+ } finally {
3502
+ this.sessions.delete(sessionId);
3503
+ }
3504
+ return {
3505
+ text: `Closed browser session ${sessionId}`,
3506
+ metadata: { sessionId, mode: this.getResolvedMode() }
3507
+ };
3508
+ }
3509
+ async dispose() {
3510
+ const ids = Array.from(this.sessions.keys());
3511
+ for (const sessionId of ids) {
3512
+ await this.closeSession(sessionId);
3513
+ }
3514
+ if (this.browser) {
3515
+ await this.browser.close();
3516
+ }
3517
+ this.browser = null;
3518
+ this.resolvedMode = null;
3519
+ }
3520
+ async getPage(sessionId) {
3521
+ const existing = this.sessions.get(sessionId);
3522
+ if (existing && !existing.page.isClosed()) {
3523
+ return existing.page;
3524
+ }
3525
+ const browser = await this.ensureBrowser();
3526
+ const mode = this.resolvedMode;
3527
+ if (!mode) {
3528
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Browser mode could not be resolved.", false);
3529
+ }
3530
+ let context;
3531
+ let ownsContext = true;
3532
+ if (mode === "cdp") {
3533
+ context = browser.contexts()[0] || await browser.newContext({ acceptDownloads: true });
3534
+ ownsContext = false;
3535
+ } else {
3536
+ context = await browser.newContext({ acceptDownloads: true });
3537
+ }
3538
+ const page = await context.newPage();
3539
+ const session = {
3540
+ sessionId,
3541
+ page,
3542
+ context,
3543
+ ownsContext
3544
+ };
3545
+ this.sessions.set(sessionId, session);
3546
+ return page;
3547
+ }
3548
+ async ensureBrowser() {
3549
+ if (!this.config.enabled) {
3550
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Browser automation is disabled in config.", false);
3551
+ }
3552
+ if (this.browser) return this.browser;
3553
+ const desiredMode = this.config.mode;
3554
+ if (desiredMode === "cdp") {
3555
+ this.browser = await this.connectCdpOrThrow();
3556
+ this.resolvedMode = "cdp";
3557
+ return this.browser;
3558
+ }
3559
+ if (desiredMode === "remote") {
3560
+ this.browser = await this.connectRemoteOrThrow();
3561
+ this.resolvedMode = "remote";
3562
+ return this.browser;
3563
+ }
3564
+ if (desiredMode === "headless") {
3565
+ this.browser = await this.connectHeadless();
3566
+ this.resolvedMode = "headless";
3567
+ return this.browser;
3568
+ }
3569
+ try {
3570
+ this.browser = await this.connectCdpOrThrow();
3571
+ this.resolvedMode = "cdp";
3572
+ return this.browser;
3573
+ } catch {
3574
+ }
3575
+ if (this.config.remoteUrl) {
3576
+ try {
3577
+ this.browser = await this.connectRemoteOrThrow();
3578
+ this.resolvedMode = "remote";
3579
+ return this.browser;
3580
+ } catch {
3581
+ }
3582
+ }
3583
+ this.browser = await this.connectHeadless();
3584
+ this.resolvedMode = "headless";
3585
+ return this.browser;
3586
+ }
3587
+ async connectCdpOrThrow() {
3588
+ const url = `http://127.0.0.1:${this.config.cdpPort}`;
3589
+ const reachable = await this.isCdpReachable(url);
3590
+ if (!reachable && this.config.launchHostBrowser && !this.attemptedHostLaunch) {
3591
+ this.launchHostChrome();
3592
+ this.attemptedHostLaunch = true;
3593
+ await sleep(1500);
3594
+ }
3595
+ const reachableAfterLaunch = await this.isCdpReachable(url);
3596
+ if (!reachableAfterLaunch) {
3597
+ throw createBrowserToolError(
3598
+ "CAPABILITY_UNAVAILABLE",
3599
+ `CDP endpoint ${url} is not reachable.`,
3600
+ false
3601
+ );
3602
+ }
3603
+ return chromium.connectOverCDP(url);
3604
+ }
3605
+ async connectRemoteOrThrow() {
3606
+ if (!this.config.remoteUrl) {
3607
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Remote browser URL is not configured.", false);
3608
+ }
3609
+ return chromium.connectOverCDP(this.config.remoteUrl);
3610
+ }
3611
+ async connectHeadless() {
3612
+ return chromium.launch({
3613
+ headless: true,
3614
+ args: ["--disable-dev-shm-usage", "--no-sandbox"]
3615
+ });
3616
+ }
3617
+ async isCdpReachable(baseUrl) {
3618
+ try {
3619
+ const controller = new AbortController();
3620
+ const timeout = setTimeout(() => controller.abort(), 1500);
3621
+ try {
3622
+ const response = await fetch(`${baseUrl}/json/version`, {
3623
+ signal: controller.signal
3624
+ });
3625
+ return response.ok;
3626
+ } finally {
3627
+ clearTimeout(timeout);
3628
+ }
3629
+ } catch {
3630
+ return false;
3631
+ }
3632
+ }
3633
+ launchHostChrome() {
3634
+ const binary = this.resolveChromeBinary();
3635
+ if (!binary) {
3636
+ throw createBrowserToolError(
3637
+ "CAPABILITY_UNAVAILABLE",
3638
+ "CDP launch requested but no Chrome executable could be resolved.",
3639
+ false
3640
+ );
3641
+ }
3642
+ const userDataDir = join8(process.cwd(), ".lydia-artifacts", "chrome-profile");
3643
+ const args = [
3644
+ `--remote-debugging-port=${this.config.cdpPort}`,
3645
+ "--no-first-run",
3646
+ "--no-default-browser-check",
3647
+ `--user-data-dir=${userDataDir}`
3648
+ ];
3649
+ spawn(binary, args, {
3650
+ detached: true,
3651
+ stdio: "ignore",
3652
+ windowsHide: true
3653
+ }).unref();
3654
+ }
3655
+ resolveChromeBinary() {
3656
+ if (this.config.chromePath) {
3657
+ return this.config.chromePath;
3658
+ }
3659
+ const os10 = platform();
3660
+ if (os10 === "win32") {
3661
+ return "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe";
3662
+ }
3663
+ if (os10 === "darwin") {
3664
+ return "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome";
3665
+ }
3666
+ return "google-chrome";
3667
+ }
3668
+ async saveDownload(download, requestedPath) {
3669
+ const filename = requestedPath ? resolve5(requestedPath) : resolve5(this.config.downloadDir, `${Date.now().toString(36)}-${download.suggestedFilename()}`);
3670
+ await mkdir4(dirname5(filename), { recursive: true });
3671
+ await download.saveAs(filename);
3672
+ return filename;
3673
+ }
3674
+ };
3675
+ function normalizeBrowserRuntimeError(error) {
3676
+ if (error && typeof error === "object" && "code" in error && typeof error.code === "string") {
3677
+ return error;
3678
+ }
3679
+ if (error instanceof Error) {
3680
+ const message = error.message || "Unknown browser error";
3681
+ const lowered = message.toLowerCase();
3682
+ if (lowered.includes("timeout")) {
3683
+ return createBrowserToolError("BROWSER_TIMEOUT", message, true);
3684
+ }
3685
+ if (lowered.includes("not found") || lowered.includes("waiting for locator")) {
3686
+ return createBrowserToolError("ELEMENT_NOT_FOUND", message, true);
3687
+ }
3688
+ if (lowered.includes("not visible") || lowered.includes("not enabled") || lowered.includes("intercept")) {
3689
+ return createBrowserToolError("ELEMENT_NOT_INTERACTABLE", message, true);
3690
+ }
3691
+ if (lowered.includes("net::") || lowered.includes("navigation")) {
3692
+ return createBrowserToolError("NAVIGATION_BLOCKED", message, true);
3693
+ }
3694
+ if (lowered.includes("download")) {
3695
+ return createBrowserToolError("DOWNLOAD_FAILED", message, true);
3696
+ }
3697
+ if (lowered.includes("upload") || lowered.includes("input files")) {
3698
+ return createBrowserToolError("UPLOAD_FAILED", message, true);
3699
+ }
3700
+ if (lowered.includes("target page, context or browser has been closed")) {
3701
+ return createBrowserToolError("SESSION_CLOSED", message, true);
3702
+ }
3703
+ if (lowered.includes("executable") || lowered.includes("playwright")) {
3704
+ return createBrowserToolError("CAPABILITY_UNAVAILABLE", message, false);
3705
+ }
3706
+ return createBrowserToolError("UNKNOWN", message, true);
3707
+ }
3708
+ return createBrowserToolError("UNKNOWN", String(error), true);
3709
+ }
3710
+ function sleep(ms) {
3711
+ return new Promise((resolveDelay) => setTimeout(resolveDelay, ms));
3712
+ }
3713
+
3714
+ // src/mcp/servers/browser.ts
3715
+ var SESSION_ARG = "__lydiaSessionId";
3716
+ var BrowserServer = class {
3717
+ server;
3718
+ runtime;
3719
+ constructor(config = {}, runtime = new BrowserAutomationManager(createDefaultBrowserRuntimeConfig(config))) {
3720
+ this.runtime = runtime;
3721
+ this.server = new Server6(
3722
+ {
3723
+ name: "internal-browser",
3724
+ version: "0.1.2"
3725
+ },
3726
+ {
3727
+ capabilities: {
3728
+ tools: {}
3729
+ }
3730
+ }
3731
+ );
3732
+ this.setupHandlers();
3733
+ }
3734
+ async closeSession(sessionId) {
3735
+ if (!sessionId) return;
3736
+ await this.runtime.closeSession(sessionId);
3737
+ }
3738
+ async dispose() {
3739
+ await this.runtime.dispose();
3740
+ }
3741
+ setupHandlers() {
3742
+ this.server.setRequestHandler(ListToolsRequestSchema6, async () => ({
3743
+ tools: [
3744
+ {
3745
+ name: "browser_navigate",
3746
+ description: "Navigate the current browser session to a URL.",
3747
+ inputSchema: {
3748
+ type: "object",
3749
+ properties: {
3750
+ url: { type: "string", description: "Destination URL" },
3751
+ waitUntil: {
3752
+ type: "string",
3753
+ enum: ["load", "domcontentloaded", "networkidle", "commit"],
3754
+ description: "Navigation completion condition"
3755
+ },
3756
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3757
+ },
3758
+ required: ["url"]
3759
+ }
3760
+ },
3761
+ {
3762
+ name: "browser_click",
3763
+ description: "Click an element in the current page by selector.",
3764
+ inputSchema: {
3765
+ type: "object",
3766
+ properties: {
3767
+ selector: { type: "string", description: "CSS selector for the target element" },
3768
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3769
+ },
3770
+ required: ["selector"]
3771
+ }
3772
+ },
3773
+ {
3774
+ name: "browser_type",
3775
+ description: "Type text into an element in the current page.",
3776
+ inputSchema: {
3777
+ type: "object",
3778
+ properties: {
3779
+ selector: { type: "string", description: "CSS selector for the target input" },
3780
+ text: { type: "string", description: "Text to enter" },
3781
+ clearExisting: { type: "boolean", description: "Clear existing value before typing" },
3782
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3783
+ },
3784
+ required: ["selector", "text"]
3785
+ }
3786
+ },
3787
+ {
3788
+ name: "browser_select",
3789
+ description: "Select one or more values from a select element.",
3790
+ inputSchema: {
3791
+ type: "object",
3792
+ properties: {
3793
+ selector: { type: "string", description: "CSS selector for the select element" },
3794
+ value: {
3795
+ oneOf: [{ type: "string" }, { type: "array", items: { type: "string" } }],
3796
+ description: "Value or list of values to select"
3797
+ },
3798
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3799
+ },
3800
+ required: ["selector", "value"]
3801
+ }
3802
+ },
3803
+ {
3804
+ name: "browser_wait_for",
3805
+ description: "Wait for a selector to reach a state.",
3806
+ inputSchema: {
3807
+ type: "object",
3808
+ properties: {
3809
+ selector: { type: "string", description: "CSS selector to wait for" },
3810
+ state: {
3811
+ type: "string",
3812
+ enum: ["attached", "detached", "visible", "hidden"],
3813
+ description: "Target element state"
3814
+ },
3815
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3816
+ },
3817
+ required: ["selector"]
3818
+ }
3819
+ },
3820
+ {
3821
+ name: "browser_extract_text",
3822
+ description: "Extract visible text from an element.",
3823
+ inputSchema: {
3824
+ type: "object",
3825
+ properties: {
3826
+ selector: { type: "string", description: "CSS selector for the text source" },
3827
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3828
+ },
3829
+ required: ["selector"]
3830
+ }
3831
+ },
3832
+ {
3833
+ name: "browser_screenshot",
3834
+ description: "Capture a screenshot of the current page.",
3835
+ inputSchema: {
3836
+ type: "object",
3837
+ properties: {
3838
+ fullPage: { type: "boolean", description: "Capture the full page instead of only the viewport" },
3839
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3840
+ }
3841
+ }
3842
+ },
3843
+ {
3844
+ name: "browser_download",
3845
+ description: "Download a browser artifact from a URL or via a click action.",
3846
+ inputSchema: {
3847
+ type: "object",
3848
+ properties: {
3849
+ selector: { type: "string", description: "Selector to click to trigger a download" },
3850
+ url: { type: "string", description: "Direct download URL" },
3851
+ saveAs: { type: "string", description: "Optional output path override" },
3852
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3853
+ }
3854
+ }
3855
+ },
3856
+ {
3857
+ name: "browser_upload",
3858
+ description: "Upload a local file into a file input element.",
3859
+ inputSchema: {
3860
+ type: "object",
3861
+ properties: {
3862
+ selector: { type: "string", description: "CSS selector for the file input" },
3863
+ path: { type: "string", description: "Absolute or relative local file path" },
3864
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3865
+ },
3866
+ required: ["selector", "path"]
3867
+ }
3868
+ },
3869
+ {
3870
+ name: "browser_close",
3871
+ description: "Close the current Lydia browser session.",
3872
+ inputSchema: {
3873
+ type: "object",
3874
+ properties: {}
3875
+ }
3876
+ }
3877
+ ]
3878
+ }));
3879
+ this.server.setRequestHandler(CallToolRequestSchema6, async (request) => {
3880
+ const rawArgs = request.params.arguments || {};
3881
+ const sessionId = typeof rawArgs[SESSION_ARG] === "string" && rawArgs[SESSION_ARG] ? rawArgs[SESSION_ARG] : "default";
3882
+ const args = Object.fromEntries(
3883
+ Object.entries(rawArgs).filter(([key]) => key !== SESSION_ARG)
3884
+ );
3885
+ try {
3886
+ switch (request.params.name) {
3887
+ case "browser_navigate":
3888
+ return this.ok(await this.runtime.navigate(sessionId, args));
3889
+ case "browser_click":
3890
+ return this.ok(await this.runtime.click(sessionId, args));
3891
+ case "browser_type":
3892
+ return this.ok(await this.runtime.type(sessionId, args));
3893
+ case "browser_select":
3894
+ return this.ok(await this.runtime.select(sessionId, args));
3895
+ case "browser_wait_for":
3896
+ return this.ok(await this.runtime.waitFor(sessionId, args));
3897
+ case "browser_extract_text":
3898
+ return this.ok(await this.runtime.extractText(sessionId, args));
3899
+ case "browser_screenshot":
3900
+ return this.ok(await this.runtime.screenshot(sessionId, args));
3901
+ case "browser_download":
3902
+ return this.ok(await this.runtime.download(sessionId, args));
3903
+ case "browser_upload":
3904
+ return this.ok(await this.runtime.upload(sessionId, args));
3905
+ case "browser_close":
3906
+ return this.ok(await this.runtime.closeSession(sessionId));
3907
+ default:
3908
+ throw new Error(`Unknown tool: ${request.params.name}`);
3909
+ }
3910
+ } catch (error) {
3911
+ const normalized = normalizeBrowserRuntimeError(error);
3912
+ return {
3913
+ content: [{ type: "text", text: normalized.message }],
3914
+ isError: true
3915
+ };
3916
+ }
3917
+ });
3918
+ }
3919
+ ok(result) {
3920
+ const content = [{ type: "text", text: result.text }];
3921
+ if (result.imageBase64 && result.mediaType) {
3922
+ content.push({
3923
+ type: "image",
3924
+ data: result.imageBase64,
3925
+ mimeType: result.mediaType
3926
+ });
3927
+ }
3928
+ return {
3929
+ content,
3930
+ artifactPath: result.artifactPath,
3931
+ downloadPath: result.downloadPath,
3932
+ metadata: result.metadata
3933
+ };
3934
+ }
3935
+ };
3936
+
3312
3937
  // src/mcp/client.ts
3313
3938
  import { Client } from "@modelcontextprotocol/sdk/client/index.js";
3314
3939
  import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
@@ -3432,9 +4057,12 @@ var McpCanonicalCapabilityAdapter = class {
3432
4057
  blocks.push({ type: "text", text: contentBlock.text });
3433
4058
  continue;
3434
4059
  }
3435
- if (contentBlock?.type === "image" && contentBlock.source?.type === "base64" && typeof contentBlock.source.media_type === "string" && typeof contentBlock.source.data === "string") {
3436
- const mediaType = contentBlock.source.media_type;
3437
- const base64Data = contentBlock.source.data;
4060
+ if (contentBlock?.type === "image") {
4061
+ const mediaType = typeof contentBlock.source?.media_type === "string" ? contentBlock.source.media_type : typeof contentBlock.mimeType === "string" ? contentBlock.mimeType : void 0;
4062
+ const base64Data = typeof contentBlock.source?.data === "string" ? contentBlock.source.data : typeof contentBlock.data === "string" ? contentBlock.data : void 0;
4063
+ if (!mediaType || !base64Data) {
4064
+ continue;
4065
+ }
3438
4066
  const dataRef = base64Data.length <= this.maxInlineImageBase64Length ? `data:${mediaType};base64,${base64Data}` : `inline://image/${mediaType}/${base64Data.length}`;
3439
4067
  blocks.push({
3440
4068
  type: "image",
@@ -5483,6 +6111,7 @@ var Agent = class extends EventEmitter5 {
5483
6111
  currentTaskCreatedAt;
5484
6112
  // Centralized built-in server descriptors keep MCP wiring declarative.
5485
6113
  builtinServerSpecs = [];
6114
+ browserServer;
5486
6115
  options;
5487
6116
  computerUseAdapter;
5488
6117
  computerUseOrchestrator;
@@ -5582,6 +6211,13 @@ var Agent = class extends EventEmitter5 {
5582
6211
  { id: "internal-fs", create: () => new FileSystemServer().server },
5583
6212
  { id: "internal-git", create: () => new GitServer().server }
5584
6213
  ];
6214
+ if (config.browser?.enabled !== false) {
6215
+ this.browserServer = new BrowserServer(config.browser);
6216
+ this.builtinServerSpecs.push({
6217
+ id: "internal-browser",
6218
+ create: () => this.browserServer.server
6219
+ });
6220
+ }
5585
6221
  await this.connectBuiltinServers();
5586
6222
  await this.connectExternalMcpServers(config.mcpServers);
5587
6223
  this.isInitialized = true;
@@ -5815,6 +6451,7 @@ ${planGuidance}` : baseSystemPrompt;
5815
6451
  } catch {
5816
6452
  }
5817
6453
  if (this.computerUseSessionId) {
6454
+ await this.closeBrowserAutomationSession(this.computerUseSessionId);
5818
6455
  const terminalCheckpoint = this.computerUseOrchestrator.endSession(this.computerUseSessionId);
5819
6456
  if (terminalCheckpoint) {
5820
6457
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -5920,6 +6557,7 @@ ${planGuidance}` : baseSystemPrompt;
5920
6557
  } catch {
5921
6558
  }
5922
6559
  if (this.computerUseSessionId) {
6560
+ await this.closeBrowserAutomationSession(this.computerUseSessionId);
5923
6561
  const terminalCheckpoint = this.computerUseOrchestrator.endSession(this.computerUseSessionId);
5924
6562
  if (terminalCheckpoint) {
5925
6563
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -6504,7 +7142,10 @@ ${steps.join("\n")}`;
6504
7142
  action,
6505
7143
  adapter: this.computerUseAdapter,
6506
7144
  toolName,
6507
- invokeTool: async (resolvedToolName, resolvedArgs) => await this.mcpClientManager.callTool(resolvedToolName, resolvedArgs)
7145
+ invokeTool: async (resolvedToolName, resolvedArgs) => await this.mcpClientManager.callTool(
7146
+ resolvedToolName,
7147
+ this.attachInternalBrowserSessionArg(resolvedToolName, sessionId, resolvedArgs)
7148
+ )
6508
7149
  });
6509
7150
  this.memoryManager.recordObservationFrame(this.currentTaskId, dispatchResult.frame);
6510
7151
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -6544,6 +7185,23 @@ ${steps.join("\n")}`;
6544
7185
  }
6545
7186
  return void 0;
6546
7187
  }
7188
+ attachInternalBrowserSessionArg(toolName, sessionId, args) {
7189
+ const toolInfo = this.mcpClientManager.getToolInfo(toolName);
7190
+ if (!toolInfo || toolInfo.serverId !== "internal-browser") {
7191
+ return args;
7192
+ }
7193
+ return {
7194
+ ...args,
7195
+ __lydiaSessionId: sessionId
7196
+ };
7197
+ }
7198
+ async closeBrowserAutomationSession(sessionId) {
7199
+ if (!sessionId || !this.browserServer) return;
7200
+ try {
7201
+ await this.browserServer.closeSession(sessionId);
7202
+ } catch {
7203
+ }
7204
+ }
6547
7205
  inferComputerUseDomain(canonicalAction) {
6548
7206
  return canonicalAction.startsWith("desktop_") ? "desktop" : "browser";
6549
7207
  }
@@ -6586,13 +7244,18 @@ ${steps.join("\n")}`;
6586
7244
  normalized.push({ type: "text", text: block.text });
6587
7245
  continue;
6588
7246
  }
6589
- if (block?.type === "image" && block.source?.type === "base64" && typeof block.source.media_type === "string" && typeof block.source.data === "string") {
7247
+ if (block?.type === "image") {
7248
+ const mediaType = typeof block.source?.media_type === "string" ? block.source.media_type : typeof block.mimeType === "string" ? block.mimeType : void 0;
7249
+ const data = typeof block.source?.data === "string" ? block.source.data : typeof block.data === "string" ? block.data : void 0;
7250
+ if (!mediaType || !data) {
7251
+ continue;
7252
+ }
6590
7253
  normalized.push({
6591
7254
  type: "image",
6592
7255
  source: {
6593
7256
  type: "base64",
6594
- media_type: block.source.media_type,
6595
- data: block.source.data
7257
+ media_type: mediaType,
7258
+ data
6596
7259
  }
6597
7260
  });
6598
7261
  }
@@ -6706,7 +7369,7 @@ ${steps.join("\n")}`;
6706
7369
  return msg.includes("rate limit") || msg.includes("429") || msg.includes("500") || msg.includes("502") || msg.includes("503") || msg.includes("504") || msg.includes("timeout") || msg.includes("econnreset") || msg.includes("econnrefused") || msg.includes("network") || msg.includes("fetch failed");
6707
7370
  }
6708
7371
  sleep(ms) {
6709
- return new Promise((resolve6) => setTimeout(resolve6, ms));
7372
+ return new Promise((resolve7) => setTimeout(resolve7, ms));
6710
7373
  }
6711
7374
  // ─── Interaction ──────────────────────────────────────────────────────
6712
7375
  resolveInteraction(id, response) {
@@ -7894,6 +8557,8 @@ export {
7894
8557
  Agent,
7895
8558
  AnthropicProvider,
7896
8559
  BasicStrategyGate,
8560
+ BrowserAutomationManager,
8561
+ BrowserServer,
7897
8562
  COMPUTER_USE_ERROR_CODES,
7898
8563
  ComputerUseSessionOrchestrator,
7899
8564
  ConfigLoader,
@@ -7966,6 +8631,8 @@ export {
7966
8631
  ToolResultContentSchema,
7967
8632
  ToolUseContentSchema,
7968
8633
  assessRisk,
8634
+ createBrowserToolError,
8635
+ createDefaultBrowserRuntimeConfig,
7969
8636
  createLLMFromConfig,
7970
8637
  getSkillContent,
7971
8638
  hasContent,
@@ -7973,6 +8640,7 @@ export {
7973
8640
  isComputerUseErrorCode,
7974
8641
  isDynamicSkill,
7975
8642
  listCanonicalComputerUseActions,
8643
+ normalizeBrowserRuntimeError,
7976
8644
  normalizeComputerUseError,
7977
8645
  resolveCanonicalComputerUseToolName
7978
8646
  };