@lydia-agent/core 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -33,6 +33,8 @@ __export(index_exports, {
33
33
  Agent: () => Agent,
34
34
  AnthropicProvider: () => AnthropicProvider,
35
35
  BasicStrategyGate: () => BasicStrategyGate,
36
+ BrowserAutomationManager: () => BrowserAutomationManager,
37
+ BrowserServer: () => BrowserServer,
36
38
  COMPUTER_USE_ERROR_CODES: () => COMPUTER_USE_ERROR_CODES,
37
39
  ComputerUseSessionOrchestrator: () => ComputerUseSessionOrchestrator,
38
40
  ConfigLoader: () => ConfigLoader,
@@ -105,6 +107,8 @@ __export(index_exports, {
105
107
  ToolResultContentSchema: () => ToolResultContentSchema,
106
108
  ToolUseContentSchema: () => ToolUseContentSchema,
107
109
  assessRisk: () => assessRisk,
110
+ createBrowserToolError: () => createBrowserToolError,
111
+ createDefaultBrowserRuntimeConfig: () => createDefaultBrowserRuntimeConfig,
108
112
  createLLMFromConfig: () => createLLMFromConfig,
109
113
  getSkillContent: () => getSkillContent,
110
114
  hasContent: () => hasContent,
@@ -112,12 +116,13 @@ __export(index_exports, {
112
116
  isComputerUseErrorCode: () => isComputerUseErrorCode,
113
117
  isDynamicSkill: () => isDynamicSkill,
114
118
  listCanonicalComputerUseActions: () => listCanonicalComputerUseActions,
119
+ normalizeBrowserRuntimeError: () => normalizeBrowserRuntimeError,
115
120
  normalizeComputerUseError: () => normalizeComputerUseError,
116
121
  resolveCanonicalComputerUseToolName: () => resolveCanonicalComputerUseToolName
117
122
  });
118
123
  module.exports = __toCommonJS(index_exports);
119
124
 
120
- // ../../node_modules/.pnpm/tsup@8.5.1_jiti@1.21.7_post_ace0071d99dc9ba490e13d092e298186/node_modules/tsup/assets/cjs_shims.js
125
+ // ../../node_modules/.pnpm/tsup@8.5.1_jiti@1.21.7_postcss@8.5.6_tsx@4.21.0_typescript@5.9.3_yaml@2.8.2/node_modules/tsup/assets/cjs_shims.js
121
126
  var getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.tagName.toUpperCase() === "SCRIPT" ? document.currentScript.src : new URL("main.js", document.baseURI).href;
122
127
  var importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
123
128
 
@@ -1519,6 +1524,17 @@ var ConfigSchema = import_zod5.z.object({
1519
1524
  checkpointTtlHours: import_zod5.z.number().default(24),
1520
1525
  observationFrameTtlHours: import_zod5.z.number().default(24 * 7)
1521
1526
  }).default({}),
1527
+ browser: import_zod5.z.object({
1528
+ enabled: import_zod5.z.boolean().default(true),
1529
+ mode: import_zod5.z.enum(["auto", "cdp", "headless", "remote"]).default("auto"),
1530
+ cdpPort: import_zod5.z.number().int().positive().default(9222),
1531
+ remoteUrl: import_zod5.z.string().default(""),
1532
+ chromePath: import_zod5.z.string().default(""),
1533
+ launchHostBrowser: import_zod5.z.boolean().default(false),
1534
+ navigationTimeoutMs: import_zod5.z.number().positive().default(3e4),
1535
+ actionTimeoutMs: import_zod5.z.number().positive().default(1e4),
1536
+ downloadDir: import_zod5.z.string().default("")
1537
+ }).default({}),
1522
1538
  skills: import_zod5.z.object({
1523
1539
  /** Maximum number of skills whose full content is injected into the prompt (default: 3) */
1524
1540
  matchTopK: import_zod5.z.number().default(3),
@@ -1582,6 +1598,10 @@ var ConfigLoader = class {
1582
1598
  ...current.memory || {},
1583
1599
  ...partial.memory
1584
1600
  },
1601
+ browser: {
1602
+ ...current.browser || {},
1603
+ ...partial.browser
1604
+ },
1585
1605
  skills: {
1586
1606
  ...current.skills || {},
1587
1607
  ...partial.skills
@@ -3412,8 +3432,8 @@ var InteractionServer = class extends import_node_events3.EventEmitter {
3412
3432
  prompt: args.prompt
3413
3433
  };
3414
3434
  this.emit("request", interaction);
3415
- const responseText = await new Promise((resolve6) => {
3416
- this.pendingInteractions.set(id, resolve6);
3435
+ const responseText = await new Promise((resolve7) => {
3436
+ this.pendingInteractions.set(id, resolve7);
3417
3437
  });
3418
3438
  return {
3419
3439
  content: [{ type: "text", text: responseText }]
@@ -3432,6 +3452,616 @@ var InteractionServer = class extends import_node_events3.EventEmitter {
3432
3452
  }
3433
3453
  };
3434
3454
 
3455
+ // src/mcp/servers/browser.ts
3456
+ var import_server6 = require("@modelcontextprotocol/sdk/server/index.js");
3457
+ var import_types8 = require("@modelcontextprotocol/sdk/types.js");
3458
+
3459
+ // src/browser/manager.ts
3460
+ var import_promises2 = require("fs/promises");
3461
+ var import_node_path2 = require("path");
3462
+ var import_node_os2 = require("os");
3463
+ var import_node_child_process2 = require("child_process");
3464
+ var import_playwright = require("playwright");
3465
+ function createBrowserToolError(code, message, retryable = true) {
3466
+ const error = new Error(`${code}: ${message}`);
3467
+ error.code = code;
3468
+ error.retryable = retryable;
3469
+ return error;
3470
+ }
3471
+ function createDefaultBrowserRuntimeConfig(partial = {}) {
3472
+ return {
3473
+ enabled: partial.enabled ?? true,
3474
+ mode: partial.mode ?? "auto",
3475
+ cdpPort: partial.cdpPort ?? 9222,
3476
+ remoteUrl: partial.remoteUrl ?? "",
3477
+ chromePath: partial.chromePath ?? "",
3478
+ launchHostBrowser: partial.launchHostBrowser ?? false,
3479
+ navigationTimeoutMs: partial.navigationTimeoutMs ?? 3e4,
3480
+ actionTimeoutMs: partial.actionTimeoutMs ?? 1e4,
3481
+ downloadDir: partial.downloadDir || (0, import_node_path2.join)(process.cwd(), ".lydia-artifacts", "browser-downloads")
3482
+ };
3483
+ }
3484
+ var BrowserAutomationManager = class {
3485
+ config;
3486
+ sessions = /* @__PURE__ */ new Map();
3487
+ browser = null;
3488
+ resolvedMode = null;
3489
+ attemptedHostLaunch = false;
3490
+ constructor(config = {}) {
3491
+ this.config = createDefaultBrowserRuntimeConfig(config);
3492
+ }
3493
+ getResolvedMode() {
3494
+ return this.resolvedMode;
3495
+ }
3496
+ async navigate(sessionId, args) {
3497
+ const page = await this.getPage(sessionId);
3498
+ const response = await page.goto(args.url, {
3499
+ waitUntil: args.waitUntil ?? "domcontentloaded",
3500
+ timeout: args.timeoutMs ?? this.config.navigationTimeoutMs
3501
+ });
3502
+ return {
3503
+ text: `Navigated to ${page.url()} (${await page.title() || "untitled"}) [mode=${this.getResolvedMode() || "unknown"} status=${response?.status() ?? "n/a"}]`,
3504
+ metadata: {
3505
+ url: page.url(),
3506
+ title: await page.title(),
3507
+ status: response?.status() ?? null,
3508
+ mode: this.getResolvedMode()
3509
+ }
3510
+ };
3511
+ }
3512
+ async click(sessionId, args) {
3513
+ const page = await this.getPage(sessionId);
3514
+ const locator = page.locator(args.selector).first();
3515
+ await locator.waitFor({ state: "visible", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3516
+ await locator.click({ timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3517
+ return {
3518
+ text: `Clicked ${args.selector} on ${page.url()}`,
3519
+ metadata: { url: page.url(), selector: args.selector, mode: this.getResolvedMode() }
3520
+ };
3521
+ }
3522
+ async type(sessionId, args) {
3523
+ const page = await this.getPage(sessionId);
3524
+ const locator = page.locator(args.selector).first();
3525
+ await locator.waitFor({ state: "visible", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3526
+ if (args.clearExisting !== false) {
3527
+ await locator.fill("", { timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3528
+ }
3529
+ await locator.fill(args.text, { timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3530
+ return {
3531
+ text: `Typed into ${args.selector} on ${page.url()}`,
3532
+ metadata: { url: page.url(), selector: args.selector, length: args.text.length }
3533
+ };
3534
+ }
3535
+ async select(sessionId, args) {
3536
+ const page = await this.getPage(sessionId);
3537
+ const values = Array.isArray(args.value) ? args.value : [args.value];
3538
+ await page.locator(args.selector).first().selectOption(values, {
3539
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3540
+ });
3541
+ return {
3542
+ text: `Selected ${values.join(", ")} in ${args.selector} on ${page.url()}`,
3543
+ metadata: { url: page.url(), selector: args.selector, values }
3544
+ };
3545
+ }
3546
+ async waitFor(sessionId, args) {
3547
+ const page = await this.getPage(sessionId);
3548
+ const state = args.state ?? "visible";
3549
+ await page.locator(args.selector).first().waitFor({
3550
+ state,
3551
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3552
+ });
3553
+ return {
3554
+ text: `Wait condition satisfied for ${args.selector} (${state}) on ${page.url()}`,
3555
+ metadata: { url: page.url(), selector: args.selector, state }
3556
+ };
3557
+ }
3558
+ async extractText(sessionId, args) {
3559
+ const page = await this.getPage(sessionId);
3560
+ const locator = page.locator(args.selector).first();
3561
+ await locator.waitFor({ state: "attached", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3562
+ const text = (await locator.innerText({ timeout: args.timeoutMs ?? this.config.actionTimeoutMs })).trim();
3563
+ return {
3564
+ text: text || `[empty text at ${args.selector}]`,
3565
+ metadata: { url: page.url(), selector: args.selector }
3566
+ };
3567
+ }
3568
+ async screenshot(sessionId, args) {
3569
+ const page = await this.getPage(sessionId);
3570
+ const buffer = await page.screenshot({
3571
+ fullPage: args.fullPage ?? true,
3572
+ timeout: args.timeoutMs ?? this.config.navigationTimeoutMs,
3573
+ type: "png"
3574
+ });
3575
+ return {
3576
+ text: `Captured screenshot for ${page.url()} [mode=${this.getResolvedMode() || "unknown"}]`,
3577
+ imageBase64: buffer.toString("base64"),
3578
+ mediaType: "image/png",
3579
+ metadata: { url: page.url(), fullPage: args.fullPage ?? true }
3580
+ };
3581
+ }
3582
+ async download(sessionId, args) {
3583
+ const page = await this.getPage(sessionId);
3584
+ if (!args.selector && !args.url) {
3585
+ throw createBrowserToolError("DOWNLOAD_FAILED", 'Either "selector" or "url" is required.', false);
3586
+ }
3587
+ const timeout = args.timeoutMs ?? this.config.navigationTimeoutMs;
3588
+ const downloadPromise = page.waitForEvent("download", { timeout });
3589
+ if (args.selector) {
3590
+ await page.locator(args.selector).first().click({ timeout });
3591
+ } else if (args.url) {
3592
+ await page.goto(args.url, { waitUntil: "commit", timeout });
3593
+ }
3594
+ const download = await downloadPromise;
3595
+ const downloadPath = await this.saveDownload(download, args.saveAs);
3596
+ return {
3597
+ text: `Downloaded artifact to ${downloadPath}`,
3598
+ downloadPath,
3599
+ metadata: { url: page.url(), suggestedFilename: download.suggestedFilename() }
3600
+ };
3601
+ }
3602
+ async upload(sessionId, args) {
3603
+ const page = await this.getPage(sessionId);
3604
+ const locator = page.locator(args.selector).first();
3605
+ await locator.setInputFiles((0, import_node_path2.resolve)(args.path), {
3606
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3607
+ });
3608
+ return {
3609
+ text: `Uploaded ${(0, import_node_path2.resolve)(args.path)} into ${args.selector}`,
3610
+ artifactPath: (0, import_node_path2.resolve)(args.path),
3611
+ metadata: { url: page.url(), selector: args.selector, path: (0, import_node_path2.resolve)(args.path) }
3612
+ };
3613
+ }
3614
+ async closeSession(sessionId) {
3615
+ const state = this.sessions.get(sessionId);
3616
+ if (!state) {
3617
+ return {
3618
+ text: `Session ${sessionId} already closed`,
3619
+ metadata: { sessionId, mode: this.getResolvedMode() }
3620
+ };
3621
+ }
3622
+ try {
3623
+ if (!state.page.isClosed()) {
3624
+ await state.page.close();
3625
+ }
3626
+ if (state.ownsContext) {
3627
+ await state.context.close();
3628
+ }
3629
+ } finally {
3630
+ this.sessions.delete(sessionId);
3631
+ }
3632
+ return {
3633
+ text: `Closed browser session ${sessionId}`,
3634
+ metadata: { sessionId, mode: this.getResolvedMode() }
3635
+ };
3636
+ }
3637
+ async dispose() {
3638
+ const ids = Array.from(this.sessions.keys());
3639
+ for (const sessionId of ids) {
3640
+ await this.closeSession(sessionId);
3641
+ }
3642
+ if (this.browser) {
3643
+ await this.browser.close();
3644
+ }
3645
+ this.browser = null;
3646
+ this.resolvedMode = null;
3647
+ }
3648
+ async getPage(sessionId) {
3649
+ const existing = this.sessions.get(sessionId);
3650
+ if (existing && !existing.page.isClosed()) {
3651
+ return existing.page;
3652
+ }
3653
+ const browser = await this.ensureBrowser();
3654
+ const mode = this.resolvedMode;
3655
+ if (!mode) {
3656
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Browser mode could not be resolved.", false);
3657
+ }
3658
+ let context;
3659
+ let ownsContext = true;
3660
+ if (mode === "cdp") {
3661
+ context = browser.contexts()[0] || await browser.newContext({ acceptDownloads: true });
3662
+ ownsContext = false;
3663
+ } else {
3664
+ context = await browser.newContext({ acceptDownloads: true });
3665
+ }
3666
+ const page = await context.newPage();
3667
+ const session = {
3668
+ sessionId,
3669
+ page,
3670
+ context,
3671
+ ownsContext
3672
+ };
3673
+ this.sessions.set(sessionId, session);
3674
+ return page;
3675
+ }
3676
+ async ensureBrowser() {
3677
+ if (!this.config.enabled) {
3678
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Browser automation is disabled in config.", false);
3679
+ }
3680
+ if (this.browser) return this.browser;
3681
+ const desiredMode = this.config.mode;
3682
+ if (desiredMode === "cdp") {
3683
+ this.browser = await this.connectCdpOrThrow();
3684
+ this.resolvedMode = "cdp";
3685
+ return this.browser;
3686
+ }
3687
+ if (desiredMode === "remote") {
3688
+ this.browser = await this.connectRemoteOrThrow();
3689
+ this.resolvedMode = "remote";
3690
+ return this.browser;
3691
+ }
3692
+ if (desiredMode === "headless") {
3693
+ this.browser = await this.connectHeadless();
3694
+ this.resolvedMode = "headless";
3695
+ return this.browser;
3696
+ }
3697
+ try {
3698
+ this.browser = await this.connectCdpOrThrow();
3699
+ this.resolvedMode = "cdp";
3700
+ return this.browser;
3701
+ } catch {
3702
+ }
3703
+ if (this.config.remoteUrl) {
3704
+ try {
3705
+ this.browser = await this.connectRemoteOrThrow();
3706
+ this.resolvedMode = "remote";
3707
+ return this.browser;
3708
+ } catch {
3709
+ }
3710
+ }
3711
+ this.browser = await this.connectHeadless();
3712
+ this.resolvedMode = "headless";
3713
+ return this.browser;
3714
+ }
3715
+ async connectCdpOrThrow() {
3716
+ const url = `http://127.0.0.1:${this.config.cdpPort}`;
3717
+ const reachable = await this.isCdpReachable(url);
3718
+ if (!reachable && this.config.launchHostBrowser && !this.attemptedHostLaunch) {
3719
+ this.launchHostChrome();
3720
+ this.attemptedHostLaunch = true;
3721
+ await sleep(1500);
3722
+ }
3723
+ const reachableAfterLaunch = await this.isCdpReachable(url);
3724
+ if (!reachableAfterLaunch) {
3725
+ throw createBrowserToolError(
3726
+ "CAPABILITY_UNAVAILABLE",
3727
+ `CDP endpoint ${url} is not reachable.`,
3728
+ false
3729
+ );
3730
+ }
3731
+ return import_playwright.chromium.connectOverCDP(url);
3732
+ }
3733
+ async connectRemoteOrThrow() {
3734
+ if (!this.config.remoteUrl) {
3735
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Remote browser URL is not configured.", false);
3736
+ }
3737
+ return import_playwright.chromium.connectOverCDP(this.config.remoteUrl);
3738
+ }
3739
+ async connectHeadless() {
3740
+ return import_playwright.chromium.launch({
3741
+ headless: true,
3742
+ args: ["--disable-dev-shm-usage", "--no-sandbox"]
3743
+ });
3744
+ }
3745
+ async isCdpReachable(baseUrl) {
3746
+ try {
3747
+ const controller = new AbortController();
3748
+ const timeout = setTimeout(() => controller.abort(), 1500);
3749
+ try {
3750
+ const response = await fetch(`${baseUrl}/json/version`, {
3751
+ signal: controller.signal
3752
+ });
3753
+ return response.ok;
3754
+ } finally {
3755
+ clearTimeout(timeout);
3756
+ }
3757
+ } catch {
3758
+ return false;
3759
+ }
3760
+ }
3761
+ launchHostChrome() {
3762
+ const binary = this.resolveChromeBinary();
3763
+ if (!binary) {
3764
+ throw createBrowserToolError(
3765
+ "CAPABILITY_UNAVAILABLE",
3766
+ "CDP launch requested but no Chrome executable could be resolved.",
3767
+ false
3768
+ );
3769
+ }
3770
+ const userDataDir = (0, import_node_path2.join)(process.cwd(), ".lydia-artifacts", "chrome-profile");
3771
+ const args = [
3772
+ `--remote-debugging-port=${this.config.cdpPort}`,
3773
+ "--no-first-run",
3774
+ "--no-default-browser-check",
3775
+ `--user-data-dir=${userDataDir}`
3776
+ ];
3777
+ (0, import_node_child_process2.spawn)(binary, args, {
3778
+ detached: true,
3779
+ stdio: "ignore",
3780
+ windowsHide: true
3781
+ }).unref();
3782
+ }
3783
+ resolveChromeBinary() {
3784
+ if (this.config.chromePath) {
3785
+ return this.config.chromePath;
3786
+ }
3787
+ const os10 = (0, import_node_os2.platform)();
3788
+ if (os10 === "win32") {
3789
+ return "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe";
3790
+ }
3791
+ if (os10 === "darwin") {
3792
+ return "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome";
3793
+ }
3794
+ return "google-chrome";
3795
+ }
3796
+ async saveDownload(download, requestedPath) {
3797
+ const filename = requestedPath ? (0, import_node_path2.resolve)(requestedPath) : (0, import_node_path2.resolve)(this.config.downloadDir, `${Date.now().toString(36)}-${download.suggestedFilename()}`);
3798
+ await (0, import_promises2.mkdir)((0, import_node_path2.dirname)(filename), { recursive: true });
3799
+ await download.saveAs(filename);
3800
+ return filename;
3801
+ }
3802
+ };
3803
+ function normalizeBrowserRuntimeError(error) {
3804
+ if (error && typeof error === "object" && "code" in error && typeof error.code === "string") {
3805
+ return error;
3806
+ }
3807
+ if (error instanceof Error) {
3808
+ const message = error.message || "Unknown browser error";
3809
+ const lowered = message.toLowerCase();
3810
+ if (lowered.includes("timeout")) {
3811
+ return createBrowserToolError("BROWSER_TIMEOUT", message, true);
3812
+ }
3813
+ if (lowered.includes("not found") || lowered.includes("waiting for locator")) {
3814
+ return createBrowserToolError("ELEMENT_NOT_FOUND", message, true);
3815
+ }
3816
+ if (lowered.includes("not visible") || lowered.includes("not enabled") || lowered.includes("intercept")) {
3817
+ return createBrowserToolError("ELEMENT_NOT_INTERACTABLE", message, true);
3818
+ }
3819
+ if (lowered.includes("net::") || lowered.includes("navigation")) {
3820
+ return createBrowserToolError("NAVIGATION_BLOCKED", message, true);
3821
+ }
3822
+ if (lowered.includes("download")) {
3823
+ return createBrowserToolError("DOWNLOAD_FAILED", message, true);
3824
+ }
3825
+ if (lowered.includes("upload") || lowered.includes("input files")) {
3826
+ return createBrowserToolError("UPLOAD_FAILED", message, true);
3827
+ }
3828
+ if (lowered.includes("target page, context or browser has been closed")) {
3829
+ return createBrowserToolError("SESSION_CLOSED", message, true);
3830
+ }
3831
+ if (lowered.includes("executable") || lowered.includes("playwright")) {
3832
+ return createBrowserToolError("CAPABILITY_UNAVAILABLE", message, false);
3833
+ }
3834
+ return createBrowserToolError("UNKNOWN", message, true);
3835
+ }
3836
+ return createBrowserToolError("UNKNOWN", String(error), true);
3837
+ }
3838
+ function sleep(ms) {
3839
+ return new Promise((resolveDelay) => setTimeout(resolveDelay, ms));
3840
+ }
3841
+
3842
+ // src/mcp/servers/browser.ts
3843
+ var SESSION_ARG = "__lydiaSessionId";
3844
+ var BrowserServer = class {
3845
+ server;
3846
+ runtime;
3847
+ constructor(config = {}, runtime = new BrowserAutomationManager(createDefaultBrowserRuntimeConfig(config))) {
3848
+ this.runtime = runtime;
3849
+ this.server = new import_server6.Server(
3850
+ {
3851
+ name: "internal-browser",
3852
+ version: "0.1.2"
3853
+ },
3854
+ {
3855
+ capabilities: {
3856
+ tools: {}
3857
+ }
3858
+ }
3859
+ );
3860
+ this.setupHandlers();
3861
+ }
3862
+ async closeSession(sessionId) {
3863
+ if (!sessionId) return;
3864
+ await this.runtime.closeSession(sessionId);
3865
+ }
3866
+ async dispose() {
3867
+ await this.runtime.dispose();
3868
+ }
3869
+ setupHandlers() {
3870
+ this.server.setRequestHandler(import_types8.ListToolsRequestSchema, async () => ({
3871
+ tools: [
3872
+ {
3873
+ name: "browser_navigate",
3874
+ description: "Navigate the current browser session to a URL.",
3875
+ inputSchema: {
3876
+ type: "object",
3877
+ properties: {
3878
+ url: { type: "string", description: "Destination URL" },
3879
+ waitUntil: {
3880
+ type: "string",
3881
+ enum: ["load", "domcontentloaded", "networkidle", "commit"],
3882
+ description: "Navigation completion condition"
3883
+ },
3884
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3885
+ },
3886
+ required: ["url"]
3887
+ }
3888
+ },
3889
+ {
3890
+ name: "browser_click",
3891
+ description: "Click an element in the current page by selector.",
3892
+ inputSchema: {
3893
+ type: "object",
3894
+ properties: {
3895
+ selector: { type: "string", description: "CSS selector for the target element" },
3896
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3897
+ },
3898
+ required: ["selector"]
3899
+ }
3900
+ },
3901
+ {
3902
+ name: "browser_type",
3903
+ description: "Type text into an element in the current page.",
3904
+ inputSchema: {
3905
+ type: "object",
3906
+ properties: {
3907
+ selector: { type: "string", description: "CSS selector for the target input" },
3908
+ text: { type: "string", description: "Text to enter" },
3909
+ clearExisting: { type: "boolean", description: "Clear existing value before typing" },
3910
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3911
+ },
3912
+ required: ["selector", "text"]
3913
+ }
3914
+ },
3915
+ {
3916
+ name: "browser_select",
3917
+ description: "Select one or more values from a select element.",
3918
+ inputSchema: {
3919
+ type: "object",
3920
+ properties: {
3921
+ selector: { type: "string", description: "CSS selector for the select element" },
3922
+ value: {
3923
+ oneOf: [{ type: "string" }, { type: "array", items: { type: "string" } }],
3924
+ description: "Value or list of values to select"
3925
+ },
3926
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3927
+ },
3928
+ required: ["selector", "value"]
3929
+ }
3930
+ },
3931
+ {
3932
+ name: "browser_wait_for",
3933
+ description: "Wait for a selector to reach a state.",
3934
+ inputSchema: {
3935
+ type: "object",
3936
+ properties: {
3937
+ selector: { type: "string", description: "CSS selector to wait for" },
3938
+ state: {
3939
+ type: "string",
3940
+ enum: ["attached", "detached", "visible", "hidden"],
3941
+ description: "Target element state"
3942
+ },
3943
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3944
+ },
3945
+ required: ["selector"]
3946
+ }
3947
+ },
3948
+ {
3949
+ name: "browser_extract_text",
3950
+ description: "Extract visible text from an element.",
3951
+ inputSchema: {
3952
+ type: "object",
3953
+ properties: {
3954
+ selector: { type: "string", description: "CSS selector for the text source" },
3955
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3956
+ },
3957
+ required: ["selector"]
3958
+ }
3959
+ },
3960
+ {
3961
+ name: "browser_screenshot",
3962
+ description: "Capture a screenshot of the current page.",
3963
+ inputSchema: {
3964
+ type: "object",
3965
+ properties: {
3966
+ fullPage: { type: "boolean", description: "Capture the full page instead of only the viewport" },
3967
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3968
+ }
3969
+ }
3970
+ },
3971
+ {
3972
+ name: "browser_download",
3973
+ description: "Download a browser artifact from a URL or via a click action.",
3974
+ inputSchema: {
3975
+ type: "object",
3976
+ properties: {
3977
+ selector: { type: "string", description: "Selector to click to trigger a download" },
3978
+ url: { type: "string", description: "Direct download URL" },
3979
+ saveAs: { type: "string", description: "Optional output path override" },
3980
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3981
+ }
3982
+ }
3983
+ },
3984
+ {
3985
+ name: "browser_upload",
3986
+ description: "Upload a local file into a file input element.",
3987
+ inputSchema: {
3988
+ type: "object",
3989
+ properties: {
3990
+ selector: { type: "string", description: "CSS selector for the file input" },
3991
+ path: { type: "string", description: "Absolute or relative local file path" },
3992
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3993
+ },
3994
+ required: ["selector", "path"]
3995
+ }
3996
+ },
3997
+ {
3998
+ name: "browser_close",
3999
+ description: "Close the current Lydia browser session.",
4000
+ inputSchema: {
4001
+ type: "object",
4002
+ properties: {}
4003
+ }
4004
+ }
4005
+ ]
4006
+ }));
4007
+ this.server.setRequestHandler(import_types8.CallToolRequestSchema, async (request) => {
4008
+ const rawArgs = request.params.arguments || {};
4009
+ const sessionId = typeof rawArgs[SESSION_ARG] === "string" && rawArgs[SESSION_ARG] ? rawArgs[SESSION_ARG] : "default";
4010
+ const args = Object.fromEntries(
4011
+ Object.entries(rawArgs).filter(([key]) => key !== SESSION_ARG)
4012
+ );
4013
+ try {
4014
+ switch (request.params.name) {
4015
+ case "browser_navigate":
4016
+ return this.ok(await this.runtime.navigate(sessionId, args));
4017
+ case "browser_click":
4018
+ return this.ok(await this.runtime.click(sessionId, args));
4019
+ case "browser_type":
4020
+ return this.ok(await this.runtime.type(sessionId, args));
4021
+ case "browser_select":
4022
+ return this.ok(await this.runtime.select(sessionId, args));
4023
+ case "browser_wait_for":
4024
+ return this.ok(await this.runtime.waitFor(sessionId, args));
4025
+ case "browser_extract_text":
4026
+ return this.ok(await this.runtime.extractText(sessionId, args));
4027
+ case "browser_screenshot":
4028
+ return this.ok(await this.runtime.screenshot(sessionId, args));
4029
+ case "browser_download":
4030
+ return this.ok(await this.runtime.download(sessionId, args));
4031
+ case "browser_upload":
4032
+ return this.ok(await this.runtime.upload(sessionId, args));
4033
+ case "browser_close":
4034
+ return this.ok(await this.runtime.closeSession(sessionId));
4035
+ default:
4036
+ throw new Error(`Unknown tool: ${request.params.name}`);
4037
+ }
4038
+ } catch (error) {
4039
+ const normalized = normalizeBrowserRuntimeError(error);
4040
+ return {
4041
+ content: [{ type: "text", text: normalized.message }],
4042
+ isError: true
4043
+ };
4044
+ }
4045
+ });
4046
+ }
4047
+ ok(result) {
4048
+ const content = [{ type: "text", text: result.text }];
4049
+ if (result.imageBase64 && result.mediaType) {
4050
+ content.push({
4051
+ type: "image",
4052
+ data: result.imageBase64,
4053
+ mimeType: result.mediaType
4054
+ });
4055
+ }
4056
+ return {
4057
+ content,
4058
+ artifactPath: result.artifactPath,
4059
+ downloadPath: result.downloadPath,
4060
+ metadata: result.metadata
4061
+ };
4062
+ }
4063
+ };
4064
+
3435
4065
  // src/mcp/client.ts
3436
4066
  var import_client = require("@modelcontextprotocol/sdk/client/index.js");
3437
4067
  var import_stdio = require("@modelcontextprotocol/sdk/client/stdio.js");
@@ -3555,9 +4185,12 @@ var McpCanonicalCapabilityAdapter = class {
3555
4185
  blocks.push({ type: "text", text: contentBlock.text });
3556
4186
  continue;
3557
4187
  }
3558
- if (contentBlock?.type === "image" && contentBlock.source?.type === "base64" && typeof contentBlock.source.media_type === "string" && typeof contentBlock.source.data === "string") {
3559
- const mediaType = contentBlock.source.media_type;
3560
- const base64Data = contentBlock.source.data;
4188
+ if (contentBlock?.type === "image") {
4189
+ const mediaType = typeof contentBlock.source?.media_type === "string" ? contentBlock.source.media_type : typeof contentBlock.mimeType === "string" ? contentBlock.mimeType : void 0;
4190
+ const base64Data = typeof contentBlock.source?.data === "string" ? contentBlock.source.data : typeof contentBlock.data === "string" ? contentBlock.data : void 0;
4191
+ if (!mediaType || !base64Data) {
4192
+ continue;
4193
+ }
3561
4194
  const dataRef = base64Data.length <= this.maxInlineImageBase64Length ? `data:${mediaType};base64,${base64Data}` : `inline://image/${mediaType}/${base64Data.length}`;
3562
4195
  blocks.push({
3563
4196
  type: "image",
@@ -5606,6 +6239,7 @@ var Agent = class extends import_node_events5.EventEmitter {
5606
6239
  currentTaskCreatedAt;
5607
6240
  // Centralized built-in server descriptors keep MCP wiring declarative.
5608
6241
  builtinServerSpecs = [];
6242
+ browserServer;
5609
6243
  options;
5610
6244
  computerUseAdapter;
5611
6245
  computerUseOrchestrator;
@@ -5705,6 +6339,13 @@ var Agent = class extends import_node_events5.EventEmitter {
5705
6339
  { id: "internal-fs", create: () => new FileSystemServer().server },
5706
6340
  { id: "internal-git", create: () => new GitServer().server }
5707
6341
  ];
6342
+ if (config.browser?.enabled !== false) {
6343
+ this.browserServer = new BrowserServer(config.browser);
6344
+ this.builtinServerSpecs.push({
6345
+ id: "internal-browser",
6346
+ create: () => this.browserServer.server
6347
+ });
6348
+ }
5708
6349
  await this.connectBuiltinServers();
5709
6350
  await this.connectExternalMcpServers(config.mcpServers);
5710
6351
  this.isInitialized = true;
@@ -5938,6 +6579,7 @@ ${planGuidance}` : baseSystemPrompt;
5938
6579
  } catch {
5939
6580
  }
5940
6581
  if (this.computerUseSessionId) {
6582
+ await this.closeBrowserAutomationSession(this.computerUseSessionId);
5941
6583
  const terminalCheckpoint = this.computerUseOrchestrator.endSession(this.computerUseSessionId);
5942
6584
  if (terminalCheckpoint) {
5943
6585
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -6043,6 +6685,7 @@ ${planGuidance}` : baseSystemPrompt;
6043
6685
  } catch {
6044
6686
  }
6045
6687
  if (this.computerUseSessionId) {
6688
+ await this.closeBrowserAutomationSession(this.computerUseSessionId);
6046
6689
  const terminalCheckpoint = this.computerUseOrchestrator.endSession(this.computerUseSessionId);
6047
6690
  if (terminalCheckpoint) {
6048
6691
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -6627,7 +7270,10 @@ ${steps.join("\n")}`;
6627
7270
  action,
6628
7271
  adapter: this.computerUseAdapter,
6629
7272
  toolName,
6630
- invokeTool: async (resolvedToolName, resolvedArgs) => await this.mcpClientManager.callTool(resolvedToolName, resolvedArgs)
7273
+ invokeTool: async (resolvedToolName, resolvedArgs) => await this.mcpClientManager.callTool(
7274
+ resolvedToolName,
7275
+ this.attachInternalBrowserSessionArg(resolvedToolName, sessionId, resolvedArgs)
7276
+ )
6631
7277
  });
6632
7278
  this.memoryManager.recordObservationFrame(this.currentTaskId, dispatchResult.frame);
6633
7279
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -6667,6 +7313,23 @@ ${steps.join("\n")}`;
6667
7313
  }
6668
7314
  return void 0;
6669
7315
  }
7316
+ attachInternalBrowserSessionArg(toolName, sessionId, args) {
7317
+ const toolInfo = this.mcpClientManager.getToolInfo(toolName);
7318
+ if (!toolInfo || toolInfo.serverId !== "internal-browser") {
7319
+ return args;
7320
+ }
7321
+ return {
7322
+ ...args,
7323
+ __lydiaSessionId: sessionId
7324
+ };
7325
+ }
7326
+ async closeBrowserAutomationSession(sessionId) {
7327
+ if (!sessionId || !this.browserServer) return;
7328
+ try {
7329
+ await this.browserServer.closeSession(sessionId);
7330
+ } catch {
7331
+ }
7332
+ }
6670
7333
  inferComputerUseDomain(canonicalAction) {
6671
7334
  return canonicalAction.startsWith("desktop_") ? "desktop" : "browser";
6672
7335
  }
@@ -6709,13 +7372,18 @@ ${steps.join("\n")}`;
6709
7372
  normalized.push({ type: "text", text: block.text });
6710
7373
  continue;
6711
7374
  }
6712
- if (block?.type === "image" && block.source?.type === "base64" && typeof block.source.media_type === "string" && typeof block.source.data === "string") {
7375
+ if (block?.type === "image") {
7376
+ const mediaType = typeof block.source?.media_type === "string" ? block.source.media_type : typeof block.mimeType === "string" ? block.mimeType : void 0;
7377
+ const data = typeof block.source?.data === "string" ? block.source.data : typeof block.data === "string" ? block.data : void 0;
7378
+ if (!mediaType || !data) {
7379
+ continue;
7380
+ }
6713
7381
  normalized.push({
6714
7382
  type: "image",
6715
7383
  source: {
6716
7384
  type: "base64",
6717
- media_type: block.source.media_type,
6718
- data: block.source.data
7385
+ media_type: mediaType,
7386
+ data
6719
7387
  }
6720
7388
  });
6721
7389
  }
@@ -6829,7 +7497,7 @@ ${steps.join("\n")}`;
6829
7497
  return msg.includes("rate limit") || msg.includes("429") || msg.includes("500") || msg.includes("502") || msg.includes("503") || msg.includes("504") || msg.includes("timeout") || msg.includes("econnreset") || msg.includes("econnrefused") || msg.includes("network") || msg.includes("fetch failed");
6830
7498
  }
6831
7499
  sleep(ms) {
6832
- return new Promise((resolve6) => setTimeout(resolve6, ms));
7500
+ return new Promise((resolve7) => setTimeout(resolve7, ms));
6833
7501
  }
6834
7502
  // ─── Interaction ──────────────────────────────────────────────────────
6835
7503
  resolveInteraction(id, response) {
@@ -8018,6 +8686,8 @@ async function createLLMFromConfig(options) {
8018
8686
  Agent,
8019
8687
  AnthropicProvider,
8020
8688
  BasicStrategyGate,
8689
+ BrowserAutomationManager,
8690
+ BrowserServer,
8021
8691
  COMPUTER_USE_ERROR_CODES,
8022
8692
  ComputerUseSessionOrchestrator,
8023
8693
  ConfigLoader,
@@ -8090,6 +8760,8 @@ async function createLLMFromConfig(options) {
8090
8760
  ToolResultContentSchema,
8091
8761
  ToolUseContentSchema,
8092
8762
  assessRisk,
8763
+ createBrowserToolError,
8764
+ createDefaultBrowserRuntimeConfig,
8093
8765
  createLLMFromConfig,
8094
8766
  getSkillContent,
8095
8767
  hasContent,
@@ -8097,6 +8769,7 @@ async function createLLMFromConfig(options) {
8097
8769
  isComputerUseErrorCode,
8098
8770
  isDynamicSkill,
8099
8771
  listCanonicalComputerUseActions,
8772
+ normalizeBrowserRuntimeError,
8100
8773
  normalizeComputerUseError,
8101
8774
  resolveCanonicalComputerUseToolName
8102
8775
  });