@lydia-agent/core 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -33,6 +33,8 @@ __export(index_exports, {
33
33
  Agent: () => Agent,
34
34
  AnthropicProvider: () => AnthropicProvider,
35
35
  BasicStrategyGate: () => BasicStrategyGate,
36
+ BrowserAutomationManager: () => BrowserAutomationManager,
37
+ BrowserServer: () => BrowserServer,
36
38
  COMPUTER_USE_ERROR_CODES: () => COMPUTER_USE_ERROR_CODES,
37
39
  ComputerUseSessionOrchestrator: () => ComputerUseSessionOrchestrator,
38
40
  ConfigLoader: () => ConfigLoader,
@@ -105,6 +107,8 @@ __export(index_exports, {
105
107
  ToolResultContentSchema: () => ToolResultContentSchema,
106
108
  ToolUseContentSchema: () => ToolUseContentSchema,
107
109
  assessRisk: () => assessRisk,
110
+ createBrowserToolError: () => createBrowserToolError,
111
+ createDefaultBrowserRuntimeConfig: () => createDefaultBrowserRuntimeConfig,
108
112
  createLLMFromConfig: () => createLLMFromConfig,
109
113
  getSkillContent: () => getSkillContent,
110
114
  hasContent: () => hasContent,
@@ -112,12 +116,13 @@ __export(index_exports, {
112
116
  isComputerUseErrorCode: () => isComputerUseErrorCode,
113
117
  isDynamicSkill: () => isDynamicSkill,
114
118
  listCanonicalComputerUseActions: () => listCanonicalComputerUseActions,
119
+ normalizeBrowserRuntimeError: () => normalizeBrowserRuntimeError,
115
120
  normalizeComputerUseError: () => normalizeComputerUseError,
116
121
  resolveCanonicalComputerUseToolName: () => resolveCanonicalComputerUseToolName
117
122
  });
118
123
  module.exports = __toCommonJS(index_exports);
119
124
 
120
- // ../../node_modules/.pnpm/tsup@8.5.1_jiti@1.21.7_post_ace0071d99dc9ba490e13d092e298186/node_modules/tsup/assets/cjs_shims.js
125
+ // ../../node_modules/.pnpm/tsup@8.5.1_jiti@1.21.7_postcss@8.5.6_tsx@4.21.0_typescript@5.9.3_yaml@2.8.2/node_modules/tsup/assets/cjs_shims.js
121
126
  var getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.tagName.toUpperCase() === "SCRIPT" ? document.currentScript.src : new URL("main.js", document.baseURI).href;
122
127
  var importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
123
128
 
@@ -652,6 +657,8 @@ var StrategyRegistry = class {
652
657
  // User Home
653
658
  path.join(os.homedir(), ".lydia", "strategies", "default.yml"),
654
659
  // Package Built-in
660
+ path.resolve(__dirname, "../strategies/base-v1.yml"),
661
+ // Package Built-in (older path assumption)
655
662
  path.resolve(__dirname, "../../strategies/base-v1.yml"),
656
663
  // Package Built-in (dev/src)
657
664
  path.resolve(__dirname, "../../../strategies/base-v1.yml")
@@ -1517,6 +1524,17 @@ var ConfigSchema = import_zod5.z.object({
1517
1524
  checkpointTtlHours: import_zod5.z.number().default(24),
1518
1525
  observationFrameTtlHours: import_zod5.z.number().default(24 * 7)
1519
1526
  }).default({}),
1527
+ browser: import_zod5.z.object({
1528
+ enabled: import_zod5.z.boolean().default(true),
1529
+ mode: import_zod5.z.enum(["auto", "cdp", "headless", "remote"]).default("auto"),
1530
+ cdpPort: import_zod5.z.number().int().positive().default(9222),
1531
+ remoteUrl: import_zod5.z.string().default(""),
1532
+ chromePath: import_zod5.z.string().default(""),
1533
+ launchHostBrowser: import_zod5.z.boolean().default(false),
1534
+ navigationTimeoutMs: import_zod5.z.number().positive().default(3e4),
1535
+ actionTimeoutMs: import_zod5.z.number().positive().default(1e4),
1536
+ downloadDir: import_zod5.z.string().default("")
1537
+ }).default({}),
1520
1538
  skills: import_zod5.z.object({
1521
1539
  /** Maximum number of skills whose full content is injected into the prompt (default: 3) */
1522
1540
  matchTopK: import_zod5.z.number().default(3),
@@ -1580,6 +1598,10 @@ var ConfigLoader = class {
1580
1598
  ...current.memory || {},
1581
1599
  ...partial.memory
1582
1600
  },
1601
+ browser: {
1602
+ ...current.browser || {},
1603
+ ...partial.browser
1604
+ },
1583
1605
  skills: {
1584
1606
  ...current.skills || {},
1585
1607
  ...partial.skills
@@ -2580,7 +2602,7 @@ var ShellServer = class {
2580
2602
  this.server = new import_server.Server(
2581
2603
  {
2582
2604
  name: "internal-shell",
2583
- version: "0.1.1"
2605
+ version: "0.1.2"
2584
2606
  },
2585
2607
  {
2586
2608
  capabilities: {
@@ -2658,7 +2680,7 @@ var FileSystemServer = class {
2658
2680
  this.server = new import_server2.Server(
2659
2681
  {
2660
2682
  name: "internal-fs",
2661
- version: "0.1.1"
2683
+ version: "0.1.2"
2662
2684
  },
2663
2685
  {
2664
2686
  capabilities: {
@@ -3076,7 +3098,7 @@ var GitServer = class {
3076
3098
  this.server = new import_server3.Server(
3077
3099
  {
3078
3100
  name: "internal-git",
3079
- version: "0.1.1"
3101
+ version: "0.1.2"
3080
3102
  },
3081
3103
  {
3082
3104
  capabilities: {
@@ -3253,7 +3275,7 @@ var MemoryServer = class {
3253
3275
  this.server = new import_server4.Server(
3254
3276
  {
3255
3277
  name: "internal-memory",
3256
- version: "0.1.1"
3278
+ version: "0.1.2"
3257
3279
  },
3258
3280
  {
3259
3281
  capabilities: {
@@ -3379,7 +3401,7 @@ var InteractionServer = class extends import_node_events3.EventEmitter {
3379
3401
  constructor() {
3380
3402
  super();
3381
3403
  this.server = new import_server5.Server(
3382
- { name: "internal-interaction", version: "0.1.1" },
3404
+ { name: "internal-interaction", version: "0.1.2" },
3383
3405
  { capabilities: { tools: {} } }
3384
3406
  );
3385
3407
  this.setupHandlers();
@@ -3410,8 +3432,8 @@ var InteractionServer = class extends import_node_events3.EventEmitter {
3410
3432
  prompt: args.prompt
3411
3433
  };
3412
3434
  this.emit("request", interaction);
3413
- const responseText = await new Promise((resolve6) => {
3414
- this.pendingInteractions.set(id, resolve6);
3435
+ const responseText = await new Promise((resolve7) => {
3436
+ this.pendingInteractions.set(id, resolve7);
3415
3437
  });
3416
3438
  return {
3417
3439
  content: [{ type: "text", text: responseText }]
@@ -3430,6 +3452,616 @@ var InteractionServer = class extends import_node_events3.EventEmitter {
3430
3452
  }
3431
3453
  };
3432
3454
 
3455
+ // src/mcp/servers/browser.ts
3456
+ var import_server6 = require("@modelcontextprotocol/sdk/server/index.js");
3457
+ var import_types8 = require("@modelcontextprotocol/sdk/types.js");
3458
+
3459
+ // src/browser/manager.ts
3460
+ var import_promises2 = require("fs/promises");
3461
+ var import_node_path2 = require("path");
3462
+ var import_node_os2 = require("os");
3463
+ var import_node_child_process2 = require("child_process");
3464
+ var import_playwright = require("playwright");
3465
+ function createBrowserToolError(code, message, retryable = true) {
3466
+ const error = new Error(`${code}: ${message}`);
3467
+ error.code = code;
3468
+ error.retryable = retryable;
3469
+ return error;
3470
+ }
3471
+ function createDefaultBrowserRuntimeConfig(partial = {}) {
3472
+ return {
3473
+ enabled: partial.enabled ?? true,
3474
+ mode: partial.mode ?? "auto",
3475
+ cdpPort: partial.cdpPort ?? 9222,
3476
+ remoteUrl: partial.remoteUrl ?? "",
3477
+ chromePath: partial.chromePath ?? "",
3478
+ launchHostBrowser: partial.launchHostBrowser ?? false,
3479
+ navigationTimeoutMs: partial.navigationTimeoutMs ?? 3e4,
3480
+ actionTimeoutMs: partial.actionTimeoutMs ?? 1e4,
3481
+ downloadDir: partial.downloadDir || (0, import_node_path2.join)(process.cwd(), ".lydia-artifacts", "browser-downloads")
3482
+ };
3483
+ }
3484
+ var BrowserAutomationManager = class {
3485
+ config;
3486
+ sessions = /* @__PURE__ */ new Map();
3487
+ browser = null;
3488
+ resolvedMode = null;
3489
+ attemptedHostLaunch = false;
3490
+ constructor(config = {}) {
3491
+ this.config = createDefaultBrowserRuntimeConfig(config);
3492
+ }
3493
+ getResolvedMode() {
3494
+ return this.resolvedMode;
3495
+ }
3496
+ async navigate(sessionId, args) {
3497
+ const page = await this.getPage(sessionId);
3498
+ const response = await page.goto(args.url, {
3499
+ waitUntil: args.waitUntil ?? "domcontentloaded",
3500
+ timeout: args.timeoutMs ?? this.config.navigationTimeoutMs
3501
+ });
3502
+ return {
3503
+ text: `Navigated to ${page.url()} (${await page.title() || "untitled"}) [mode=${this.getResolvedMode() || "unknown"} status=${response?.status() ?? "n/a"}]`,
3504
+ metadata: {
3505
+ url: page.url(),
3506
+ title: await page.title(),
3507
+ status: response?.status() ?? null,
3508
+ mode: this.getResolvedMode()
3509
+ }
3510
+ };
3511
+ }
3512
+ async click(sessionId, args) {
3513
+ const page = await this.getPage(sessionId);
3514
+ const locator = page.locator(args.selector).first();
3515
+ await locator.waitFor({ state: "visible", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3516
+ await locator.click({ timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3517
+ return {
3518
+ text: `Clicked ${args.selector} on ${page.url()}`,
3519
+ metadata: { url: page.url(), selector: args.selector, mode: this.getResolvedMode() }
3520
+ };
3521
+ }
3522
+ async type(sessionId, args) {
3523
+ const page = await this.getPage(sessionId);
3524
+ const locator = page.locator(args.selector).first();
3525
+ await locator.waitFor({ state: "visible", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3526
+ if (args.clearExisting !== false) {
3527
+ await locator.fill("", { timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3528
+ }
3529
+ await locator.fill(args.text, { timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3530
+ return {
3531
+ text: `Typed into ${args.selector} on ${page.url()}`,
3532
+ metadata: { url: page.url(), selector: args.selector, length: args.text.length }
3533
+ };
3534
+ }
3535
+ async select(sessionId, args) {
3536
+ const page = await this.getPage(sessionId);
3537
+ const values = Array.isArray(args.value) ? args.value : [args.value];
3538
+ await page.locator(args.selector).first().selectOption(values, {
3539
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3540
+ });
3541
+ return {
3542
+ text: `Selected ${values.join(", ")} in ${args.selector} on ${page.url()}`,
3543
+ metadata: { url: page.url(), selector: args.selector, values }
3544
+ };
3545
+ }
3546
+ async waitFor(sessionId, args) {
3547
+ const page = await this.getPage(sessionId);
3548
+ const state = args.state ?? "visible";
3549
+ await page.locator(args.selector).first().waitFor({
3550
+ state,
3551
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3552
+ });
3553
+ return {
3554
+ text: `Wait condition satisfied for ${args.selector} (${state}) on ${page.url()}`,
3555
+ metadata: { url: page.url(), selector: args.selector, state }
3556
+ };
3557
+ }
3558
+ async extractText(sessionId, args) {
3559
+ const page = await this.getPage(sessionId);
3560
+ const locator = page.locator(args.selector).first();
3561
+ await locator.waitFor({ state: "attached", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3562
+ const text = (await locator.innerText({ timeout: args.timeoutMs ?? this.config.actionTimeoutMs })).trim();
3563
+ return {
3564
+ text: text || `[empty text at ${args.selector}]`,
3565
+ metadata: { url: page.url(), selector: args.selector }
3566
+ };
3567
+ }
3568
+ async screenshot(sessionId, args) {
3569
+ const page = await this.getPage(sessionId);
3570
+ const buffer = await page.screenshot({
3571
+ fullPage: args.fullPage ?? true,
3572
+ timeout: args.timeoutMs ?? this.config.navigationTimeoutMs,
3573
+ type: "png"
3574
+ });
3575
+ return {
3576
+ text: `Captured screenshot for ${page.url()} [mode=${this.getResolvedMode() || "unknown"}]`,
3577
+ imageBase64: buffer.toString("base64"),
3578
+ mediaType: "image/png",
3579
+ metadata: { url: page.url(), fullPage: args.fullPage ?? true }
3580
+ };
3581
+ }
3582
+ async download(sessionId, args) {
3583
+ const page = await this.getPage(sessionId);
3584
+ if (!args.selector && !args.url) {
3585
+ throw createBrowserToolError("DOWNLOAD_FAILED", 'Either "selector" or "url" is required.', false);
3586
+ }
3587
+ const timeout = args.timeoutMs ?? this.config.navigationTimeoutMs;
3588
+ const downloadPromise = page.waitForEvent("download", { timeout });
3589
+ if (args.selector) {
3590
+ await page.locator(args.selector).first().click({ timeout });
3591
+ } else if (args.url) {
3592
+ await page.goto(args.url, { waitUntil: "commit", timeout });
3593
+ }
3594
+ const download = await downloadPromise;
3595
+ const downloadPath = await this.saveDownload(download, args.saveAs);
3596
+ return {
3597
+ text: `Downloaded artifact to ${downloadPath}`,
3598
+ downloadPath,
3599
+ metadata: { url: page.url(), suggestedFilename: download.suggestedFilename() }
3600
+ };
3601
+ }
3602
+ async upload(sessionId, args) {
3603
+ const page = await this.getPage(sessionId);
3604
+ const locator = page.locator(args.selector).first();
3605
+ await locator.setInputFiles((0, import_node_path2.resolve)(args.path), {
3606
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3607
+ });
3608
+ return {
3609
+ text: `Uploaded ${(0, import_node_path2.resolve)(args.path)} into ${args.selector}`,
3610
+ artifactPath: (0, import_node_path2.resolve)(args.path),
3611
+ metadata: { url: page.url(), selector: args.selector, path: (0, import_node_path2.resolve)(args.path) }
3612
+ };
3613
+ }
3614
+ async closeSession(sessionId) {
3615
+ const state = this.sessions.get(sessionId);
3616
+ if (!state) {
3617
+ return {
3618
+ text: `Session ${sessionId} already closed`,
3619
+ metadata: { sessionId, mode: this.getResolvedMode() }
3620
+ };
3621
+ }
3622
+ try {
3623
+ if (!state.page.isClosed()) {
3624
+ await state.page.close();
3625
+ }
3626
+ if (state.ownsContext) {
3627
+ await state.context.close();
3628
+ }
3629
+ } finally {
3630
+ this.sessions.delete(sessionId);
3631
+ }
3632
+ return {
3633
+ text: `Closed browser session ${sessionId}`,
3634
+ metadata: { sessionId, mode: this.getResolvedMode() }
3635
+ };
3636
+ }
3637
+ async dispose() {
3638
+ const ids = Array.from(this.sessions.keys());
3639
+ for (const sessionId of ids) {
3640
+ await this.closeSession(sessionId);
3641
+ }
3642
+ if (this.browser) {
3643
+ await this.browser.close();
3644
+ }
3645
+ this.browser = null;
3646
+ this.resolvedMode = null;
3647
+ }
3648
+ async getPage(sessionId) {
3649
+ const existing = this.sessions.get(sessionId);
3650
+ if (existing && !existing.page.isClosed()) {
3651
+ return existing.page;
3652
+ }
3653
+ const browser = await this.ensureBrowser();
3654
+ const mode = this.resolvedMode;
3655
+ if (!mode) {
3656
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Browser mode could not be resolved.", false);
3657
+ }
3658
+ let context;
3659
+ let ownsContext = true;
3660
+ if (mode === "cdp") {
3661
+ context = browser.contexts()[0] || await browser.newContext({ acceptDownloads: true });
3662
+ ownsContext = false;
3663
+ } else {
3664
+ context = await browser.newContext({ acceptDownloads: true });
3665
+ }
3666
+ const page = await context.newPage();
3667
+ const session = {
3668
+ sessionId,
3669
+ page,
3670
+ context,
3671
+ ownsContext
3672
+ };
3673
+ this.sessions.set(sessionId, session);
3674
+ return page;
3675
+ }
3676
+ async ensureBrowser() {
3677
+ if (!this.config.enabled) {
3678
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Browser automation is disabled in config.", false);
3679
+ }
3680
+ if (this.browser) return this.browser;
3681
+ const desiredMode = this.config.mode;
3682
+ if (desiredMode === "cdp") {
3683
+ this.browser = await this.connectCdpOrThrow();
3684
+ this.resolvedMode = "cdp";
3685
+ return this.browser;
3686
+ }
3687
+ if (desiredMode === "remote") {
3688
+ this.browser = await this.connectRemoteOrThrow();
3689
+ this.resolvedMode = "remote";
3690
+ return this.browser;
3691
+ }
3692
+ if (desiredMode === "headless") {
3693
+ this.browser = await this.connectHeadless();
3694
+ this.resolvedMode = "headless";
3695
+ return this.browser;
3696
+ }
3697
+ try {
3698
+ this.browser = await this.connectCdpOrThrow();
3699
+ this.resolvedMode = "cdp";
3700
+ return this.browser;
3701
+ } catch {
3702
+ }
3703
+ if (this.config.remoteUrl) {
3704
+ try {
3705
+ this.browser = await this.connectRemoteOrThrow();
3706
+ this.resolvedMode = "remote";
3707
+ return this.browser;
3708
+ } catch {
3709
+ }
3710
+ }
3711
+ this.browser = await this.connectHeadless();
3712
+ this.resolvedMode = "headless";
3713
+ return this.browser;
3714
+ }
3715
+ async connectCdpOrThrow() {
3716
+ const url = `http://127.0.0.1:${this.config.cdpPort}`;
3717
+ const reachable = await this.isCdpReachable(url);
3718
+ if (!reachable && this.config.launchHostBrowser && !this.attemptedHostLaunch) {
3719
+ this.launchHostChrome();
3720
+ this.attemptedHostLaunch = true;
3721
+ await sleep(1500);
3722
+ }
3723
+ const reachableAfterLaunch = await this.isCdpReachable(url);
3724
+ if (!reachableAfterLaunch) {
3725
+ throw createBrowserToolError(
3726
+ "CAPABILITY_UNAVAILABLE",
3727
+ `CDP endpoint ${url} is not reachable.`,
3728
+ false
3729
+ );
3730
+ }
3731
+ return import_playwright.chromium.connectOverCDP(url);
3732
+ }
3733
+ async connectRemoteOrThrow() {
3734
+ if (!this.config.remoteUrl) {
3735
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Remote browser URL is not configured.", false);
3736
+ }
3737
+ return import_playwright.chromium.connectOverCDP(this.config.remoteUrl);
3738
+ }
3739
+ async connectHeadless() {
3740
+ return import_playwright.chromium.launch({
3741
+ headless: true,
3742
+ args: ["--disable-dev-shm-usage", "--no-sandbox"]
3743
+ });
3744
+ }
3745
+ async isCdpReachable(baseUrl) {
3746
+ try {
3747
+ const controller = new AbortController();
3748
+ const timeout = setTimeout(() => controller.abort(), 1500);
3749
+ try {
3750
+ const response = await fetch(`${baseUrl}/json/version`, {
3751
+ signal: controller.signal
3752
+ });
3753
+ return response.ok;
3754
+ } finally {
3755
+ clearTimeout(timeout);
3756
+ }
3757
+ } catch {
3758
+ return false;
3759
+ }
3760
+ }
3761
+ launchHostChrome() {
3762
+ const binary = this.resolveChromeBinary();
3763
+ if (!binary) {
3764
+ throw createBrowserToolError(
3765
+ "CAPABILITY_UNAVAILABLE",
3766
+ "CDP launch requested but no Chrome executable could be resolved.",
3767
+ false
3768
+ );
3769
+ }
3770
+ const userDataDir = (0, import_node_path2.join)(process.cwd(), ".lydia-artifacts", "chrome-profile");
3771
+ const args = [
3772
+ `--remote-debugging-port=${this.config.cdpPort}`,
3773
+ "--no-first-run",
3774
+ "--no-default-browser-check",
3775
+ `--user-data-dir=${userDataDir}`
3776
+ ];
3777
+ (0, import_node_child_process2.spawn)(binary, args, {
3778
+ detached: true,
3779
+ stdio: "ignore",
3780
+ windowsHide: true
3781
+ }).unref();
3782
+ }
3783
+ resolveChromeBinary() {
3784
+ if (this.config.chromePath) {
3785
+ return this.config.chromePath;
3786
+ }
3787
+ const os10 = (0, import_node_os2.platform)();
3788
+ if (os10 === "win32") {
3789
+ return "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe";
3790
+ }
3791
+ if (os10 === "darwin") {
3792
+ return "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome";
3793
+ }
3794
+ return "google-chrome";
3795
+ }
3796
+ async saveDownload(download, requestedPath) {
3797
+ const filename = requestedPath ? (0, import_node_path2.resolve)(requestedPath) : (0, import_node_path2.resolve)(this.config.downloadDir, `${Date.now().toString(36)}-${download.suggestedFilename()}`);
3798
+ await (0, import_promises2.mkdir)((0, import_node_path2.dirname)(filename), { recursive: true });
3799
+ await download.saveAs(filename);
3800
+ return filename;
3801
+ }
3802
+ };
3803
+ function normalizeBrowserRuntimeError(error) {
3804
+ if (error && typeof error === "object" && "code" in error && typeof error.code === "string") {
3805
+ return error;
3806
+ }
3807
+ if (error instanceof Error) {
3808
+ const message = error.message || "Unknown browser error";
3809
+ const lowered = message.toLowerCase();
3810
+ if (lowered.includes("timeout")) {
3811
+ return createBrowserToolError("BROWSER_TIMEOUT", message, true);
3812
+ }
3813
+ if (lowered.includes("not found") || lowered.includes("waiting for locator")) {
3814
+ return createBrowserToolError("ELEMENT_NOT_FOUND", message, true);
3815
+ }
3816
+ if (lowered.includes("not visible") || lowered.includes("not enabled") || lowered.includes("intercept")) {
3817
+ return createBrowserToolError("ELEMENT_NOT_INTERACTABLE", message, true);
3818
+ }
3819
+ if (lowered.includes("net::") || lowered.includes("navigation")) {
3820
+ return createBrowserToolError("NAVIGATION_BLOCKED", message, true);
3821
+ }
3822
+ if (lowered.includes("download")) {
3823
+ return createBrowserToolError("DOWNLOAD_FAILED", message, true);
3824
+ }
3825
+ if (lowered.includes("upload") || lowered.includes("input files")) {
3826
+ return createBrowserToolError("UPLOAD_FAILED", message, true);
3827
+ }
3828
+ if (lowered.includes("target page, context or browser has been closed")) {
3829
+ return createBrowserToolError("SESSION_CLOSED", message, true);
3830
+ }
3831
+ if (lowered.includes("executable") || lowered.includes("playwright")) {
3832
+ return createBrowserToolError("CAPABILITY_UNAVAILABLE", message, false);
3833
+ }
3834
+ return createBrowserToolError("UNKNOWN", message, true);
3835
+ }
3836
+ return createBrowserToolError("UNKNOWN", String(error), true);
3837
+ }
3838
+ function sleep(ms) {
3839
+ return new Promise((resolveDelay) => setTimeout(resolveDelay, ms));
3840
+ }
3841
+
3842
+ // src/mcp/servers/browser.ts
3843
+ var SESSION_ARG = "__lydiaSessionId";
3844
+ var BrowserServer = class {
3845
+ server;
3846
+ runtime;
3847
+ constructor(config = {}, runtime = new BrowserAutomationManager(createDefaultBrowserRuntimeConfig(config))) {
3848
+ this.runtime = runtime;
3849
+ this.server = new import_server6.Server(
3850
+ {
3851
+ name: "internal-browser",
3852
+ version: "0.1.2"
3853
+ },
3854
+ {
3855
+ capabilities: {
3856
+ tools: {}
3857
+ }
3858
+ }
3859
+ );
3860
+ this.setupHandlers();
3861
+ }
3862
+ async closeSession(sessionId) {
3863
+ if (!sessionId) return;
3864
+ await this.runtime.closeSession(sessionId);
3865
+ }
3866
+ async dispose() {
3867
+ await this.runtime.dispose();
3868
+ }
3869
+ setupHandlers() {
3870
+ this.server.setRequestHandler(import_types8.ListToolsRequestSchema, async () => ({
3871
+ tools: [
3872
+ {
3873
+ name: "browser_navigate",
3874
+ description: "Navigate the current browser session to a URL.",
3875
+ inputSchema: {
3876
+ type: "object",
3877
+ properties: {
3878
+ url: { type: "string", description: "Destination URL" },
3879
+ waitUntil: {
3880
+ type: "string",
3881
+ enum: ["load", "domcontentloaded", "networkidle", "commit"],
3882
+ description: "Navigation completion condition"
3883
+ },
3884
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3885
+ },
3886
+ required: ["url"]
3887
+ }
3888
+ },
3889
+ {
3890
+ name: "browser_click",
3891
+ description: "Click an element in the current page by selector.",
3892
+ inputSchema: {
3893
+ type: "object",
3894
+ properties: {
3895
+ selector: { type: "string", description: "CSS selector for the target element" },
3896
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3897
+ },
3898
+ required: ["selector"]
3899
+ }
3900
+ },
3901
+ {
3902
+ name: "browser_type",
3903
+ description: "Type text into an element in the current page.",
3904
+ inputSchema: {
3905
+ type: "object",
3906
+ properties: {
3907
+ selector: { type: "string", description: "CSS selector for the target input" },
3908
+ text: { type: "string", description: "Text to enter" },
3909
+ clearExisting: { type: "boolean", description: "Clear existing value before typing" },
3910
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3911
+ },
3912
+ required: ["selector", "text"]
3913
+ }
3914
+ },
3915
+ {
3916
+ name: "browser_select",
3917
+ description: "Select one or more values from a select element.",
3918
+ inputSchema: {
3919
+ type: "object",
3920
+ properties: {
3921
+ selector: { type: "string", description: "CSS selector for the select element" },
3922
+ value: {
3923
+ oneOf: [{ type: "string" }, { type: "array", items: { type: "string" } }],
3924
+ description: "Value or list of values to select"
3925
+ },
3926
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3927
+ },
3928
+ required: ["selector", "value"]
3929
+ }
3930
+ },
3931
+ {
3932
+ name: "browser_wait_for",
3933
+ description: "Wait for a selector to reach a state.",
3934
+ inputSchema: {
3935
+ type: "object",
3936
+ properties: {
3937
+ selector: { type: "string", description: "CSS selector to wait for" },
3938
+ state: {
3939
+ type: "string",
3940
+ enum: ["attached", "detached", "visible", "hidden"],
3941
+ description: "Target element state"
3942
+ },
3943
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3944
+ },
3945
+ required: ["selector"]
3946
+ }
3947
+ },
3948
+ {
3949
+ name: "browser_extract_text",
3950
+ description: "Extract visible text from an element.",
3951
+ inputSchema: {
3952
+ type: "object",
3953
+ properties: {
3954
+ selector: { type: "string", description: "CSS selector for the text source" },
3955
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3956
+ },
3957
+ required: ["selector"]
3958
+ }
3959
+ },
3960
+ {
3961
+ name: "browser_screenshot",
3962
+ description: "Capture a screenshot of the current page.",
3963
+ inputSchema: {
3964
+ type: "object",
3965
+ properties: {
3966
+ fullPage: { type: "boolean", description: "Capture the full page instead of only the viewport" },
3967
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3968
+ }
3969
+ }
3970
+ },
3971
+ {
3972
+ name: "browser_download",
3973
+ description: "Download a browser artifact from a URL or via a click action.",
3974
+ inputSchema: {
3975
+ type: "object",
3976
+ properties: {
3977
+ selector: { type: "string", description: "Selector to click to trigger a download" },
3978
+ url: { type: "string", description: "Direct download URL" },
3979
+ saveAs: { type: "string", description: "Optional output path override" },
3980
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3981
+ }
3982
+ }
3983
+ },
3984
+ {
3985
+ name: "browser_upload",
3986
+ description: "Upload a local file into a file input element.",
3987
+ inputSchema: {
3988
+ type: "object",
3989
+ properties: {
3990
+ selector: { type: "string", description: "CSS selector for the file input" },
3991
+ path: { type: "string", description: "Absolute or relative local file path" },
3992
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3993
+ },
3994
+ required: ["selector", "path"]
3995
+ }
3996
+ },
3997
+ {
3998
+ name: "browser_close",
3999
+ description: "Close the current Lydia browser session.",
4000
+ inputSchema: {
4001
+ type: "object",
4002
+ properties: {}
4003
+ }
4004
+ }
4005
+ ]
4006
+ }));
4007
+ this.server.setRequestHandler(import_types8.CallToolRequestSchema, async (request) => {
4008
+ const rawArgs = request.params.arguments || {};
4009
+ const sessionId = typeof rawArgs[SESSION_ARG] === "string" && rawArgs[SESSION_ARG] ? rawArgs[SESSION_ARG] : "default";
4010
+ const args = Object.fromEntries(
4011
+ Object.entries(rawArgs).filter(([key]) => key !== SESSION_ARG)
4012
+ );
4013
+ try {
4014
+ switch (request.params.name) {
4015
+ case "browser_navigate":
4016
+ return this.ok(await this.runtime.navigate(sessionId, args));
4017
+ case "browser_click":
4018
+ return this.ok(await this.runtime.click(sessionId, args));
4019
+ case "browser_type":
4020
+ return this.ok(await this.runtime.type(sessionId, args));
4021
+ case "browser_select":
4022
+ return this.ok(await this.runtime.select(sessionId, args));
4023
+ case "browser_wait_for":
4024
+ return this.ok(await this.runtime.waitFor(sessionId, args));
4025
+ case "browser_extract_text":
4026
+ return this.ok(await this.runtime.extractText(sessionId, args));
4027
+ case "browser_screenshot":
4028
+ return this.ok(await this.runtime.screenshot(sessionId, args));
4029
+ case "browser_download":
4030
+ return this.ok(await this.runtime.download(sessionId, args));
4031
+ case "browser_upload":
4032
+ return this.ok(await this.runtime.upload(sessionId, args));
4033
+ case "browser_close":
4034
+ return this.ok(await this.runtime.closeSession(sessionId));
4035
+ default:
4036
+ throw new Error(`Unknown tool: ${request.params.name}`);
4037
+ }
4038
+ } catch (error) {
4039
+ const normalized = normalizeBrowserRuntimeError(error);
4040
+ return {
4041
+ content: [{ type: "text", text: normalized.message }],
4042
+ isError: true
4043
+ };
4044
+ }
4045
+ });
4046
+ }
4047
+ ok(result) {
4048
+ const content = [{ type: "text", text: result.text }];
4049
+ if (result.imageBase64 && result.mediaType) {
4050
+ content.push({
4051
+ type: "image",
4052
+ data: result.imageBase64,
4053
+ mimeType: result.mediaType
4054
+ });
4055
+ }
4056
+ return {
4057
+ content,
4058
+ artifactPath: result.artifactPath,
4059
+ downloadPath: result.downloadPath,
4060
+ metadata: result.metadata
4061
+ };
4062
+ }
4063
+ };
4064
+
3433
4065
  // src/mcp/client.ts
3434
4066
  var import_client = require("@modelcontextprotocol/sdk/client/index.js");
3435
4067
  var import_stdio = require("@modelcontextprotocol/sdk/client/stdio.js");
@@ -3553,9 +4185,12 @@ var McpCanonicalCapabilityAdapter = class {
3553
4185
  blocks.push({ type: "text", text: contentBlock.text });
3554
4186
  continue;
3555
4187
  }
3556
- if (contentBlock?.type === "image" && contentBlock.source?.type === "base64" && typeof contentBlock.source.media_type === "string" && typeof contentBlock.source.data === "string") {
3557
- const mediaType = contentBlock.source.media_type;
3558
- const base64Data = contentBlock.source.data;
4188
+ if (contentBlock?.type === "image") {
4189
+ const mediaType = typeof contentBlock.source?.media_type === "string" ? contentBlock.source.media_type : typeof contentBlock.mimeType === "string" ? contentBlock.mimeType : void 0;
4190
+ const base64Data = typeof contentBlock.source?.data === "string" ? contentBlock.source.data : typeof contentBlock.data === "string" ? contentBlock.data : void 0;
4191
+ if (!mediaType || !base64Data) {
4192
+ continue;
4193
+ }
3559
4194
  const dataRef = base64Data.length <= this.maxInlineImageBase64Length ? `data:${mediaType};base64,${base64Data}` : `inline://image/${mediaType}/${base64Data.length}`;
3560
4195
  blocks.push({
3561
4196
  type: "image",
@@ -3760,7 +4395,7 @@ var McpClientManager = class {
3760
4395
  const client = new import_client.Client(
3761
4396
  {
3762
4397
  name: "lydia-client",
3763
- version: "0.1.1"
4398
+ version: "0.1.2"
3764
4399
  },
3765
4400
  {
3766
4401
  capabilities: {
@@ -5604,6 +6239,7 @@ var Agent = class extends import_node_events5.EventEmitter {
5604
6239
  currentTaskCreatedAt;
5605
6240
  // Centralized built-in server descriptors keep MCP wiring declarative.
5606
6241
  builtinServerSpecs = [];
6242
+ browserServer;
5607
6243
  options;
5608
6244
  computerUseAdapter;
5609
6245
  computerUseOrchestrator;
@@ -5703,6 +6339,13 @@ var Agent = class extends import_node_events5.EventEmitter {
5703
6339
  { id: "internal-fs", create: () => new FileSystemServer().server },
5704
6340
  { id: "internal-git", create: () => new GitServer().server }
5705
6341
  ];
6342
+ if (config.browser?.enabled !== false) {
6343
+ this.browserServer = new BrowserServer(config.browser);
6344
+ this.builtinServerSpecs.push({
6345
+ id: "internal-browser",
6346
+ create: () => this.browserServer.server
6347
+ });
6348
+ }
5706
6349
  await this.connectBuiltinServers();
5707
6350
  await this.connectExternalMcpServers(config.mcpServers);
5708
6351
  this.isInitialized = true;
@@ -5936,6 +6579,7 @@ ${planGuidance}` : baseSystemPrompt;
5936
6579
  } catch {
5937
6580
  }
5938
6581
  if (this.computerUseSessionId) {
6582
+ await this.closeBrowserAutomationSession(this.computerUseSessionId);
5939
6583
  const terminalCheckpoint = this.computerUseOrchestrator.endSession(this.computerUseSessionId);
5940
6584
  if (terminalCheckpoint) {
5941
6585
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -6041,6 +6685,7 @@ ${planGuidance}` : baseSystemPrompt;
6041
6685
  } catch {
6042
6686
  }
6043
6687
  if (this.computerUseSessionId) {
6688
+ await this.closeBrowserAutomationSession(this.computerUseSessionId);
6044
6689
  const terminalCheckpoint = this.computerUseOrchestrator.endSession(this.computerUseSessionId);
6045
6690
  if (terminalCheckpoint) {
6046
6691
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -6625,7 +7270,10 @@ ${steps.join("\n")}`;
6625
7270
  action,
6626
7271
  adapter: this.computerUseAdapter,
6627
7272
  toolName,
6628
- invokeTool: async (resolvedToolName, resolvedArgs) => await this.mcpClientManager.callTool(resolvedToolName, resolvedArgs)
7273
+ invokeTool: async (resolvedToolName, resolvedArgs) => await this.mcpClientManager.callTool(
7274
+ resolvedToolName,
7275
+ this.attachInternalBrowserSessionArg(resolvedToolName, sessionId, resolvedArgs)
7276
+ )
6629
7277
  });
6630
7278
  this.memoryManager.recordObservationFrame(this.currentTaskId, dispatchResult.frame);
6631
7279
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -6665,6 +7313,23 @@ ${steps.join("\n")}`;
6665
7313
  }
6666
7314
  return void 0;
6667
7315
  }
7316
+ attachInternalBrowserSessionArg(toolName, sessionId, args) {
7317
+ const toolInfo = this.mcpClientManager.getToolInfo(toolName);
7318
+ if (!toolInfo || toolInfo.serverId !== "internal-browser") {
7319
+ return args;
7320
+ }
7321
+ return {
7322
+ ...args,
7323
+ __lydiaSessionId: sessionId
7324
+ };
7325
+ }
7326
+ async closeBrowserAutomationSession(sessionId) {
7327
+ if (!sessionId || !this.browserServer) return;
7328
+ try {
7329
+ await this.browserServer.closeSession(sessionId);
7330
+ } catch {
7331
+ }
7332
+ }
6668
7333
  inferComputerUseDomain(canonicalAction) {
6669
7334
  return canonicalAction.startsWith("desktop_") ? "desktop" : "browser";
6670
7335
  }
@@ -6707,13 +7372,18 @@ ${steps.join("\n")}`;
6707
7372
  normalized.push({ type: "text", text: block.text });
6708
7373
  continue;
6709
7374
  }
6710
- if (block?.type === "image" && block.source?.type === "base64" && typeof block.source.media_type === "string" && typeof block.source.data === "string") {
7375
+ if (block?.type === "image") {
7376
+ const mediaType = typeof block.source?.media_type === "string" ? block.source.media_type : typeof block.mimeType === "string" ? block.mimeType : void 0;
7377
+ const data = typeof block.source?.data === "string" ? block.source.data : typeof block.data === "string" ? block.data : void 0;
7378
+ if (!mediaType || !data) {
7379
+ continue;
7380
+ }
6711
7381
  normalized.push({
6712
7382
  type: "image",
6713
7383
  source: {
6714
7384
  type: "base64",
6715
- media_type: block.source.media_type,
6716
- data: block.source.data
7385
+ media_type: mediaType,
7386
+ data
6717
7387
  }
6718
7388
  });
6719
7389
  }
@@ -6827,7 +7497,7 @@ ${steps.join("\n")}`;
6827
7497
  return msg.includes("rate limit") || msg.includes("429") || msg.includes("500") || msg.includes("502") || msg.includes("503") || msg.includes("504") || msg.includes("timeout") || msg.includes("econnreset") || msg.includes("econnrefused") || msg.includes("network") || msg.includes("fetch failed");
6828
7498
  }
6829
7499
  sleep(ms) {
6830
- return new Promise((resolve6) => setTimeout(resolve6, ms));
7500
+ return new Promise((resolve7) => setTimeout(resolve7, ms));
6831
7501
  }
6832
7502
  // ─── Interaction ──────────────────────────────────────────────────────
6833
7503
  resolveInteraction(id, response) {
@@ -8016,6 +8686,8 @@ async function createLLMFromConfig(options) {
8016
8686
  Agent,
8017
8687
  AnthropicProvider,
8018
8688
  BasicStrategyGate,
8689
+ BrowserAutomationManager,
8690
+ BrowserServer,
8019
8691
  COMPUTER_USE_ERROR_CODES,
8020
8692
  ComputerUseSessionOrchestrator,
8021
8693
  ConfigLoader,
@@ -8088,6 +8760,8 @@ async function createLLMFromConfig(options) {
8088
8760
  ToolResultContentSchema,
8089
8761
  ToolUseContentSchema,
8090
8762
  assessRisk,
8763
+ createBrowserToolError,
8764
+ createDefaultBrowserRuntimeConfig,
8091
8765
  createLLMFromConfig,
8092
8766
  getSkillContent,
8093
8767
  hasContent,
@@ -8095,6 +8769,7 @@ async function createLLMFromConfig(options) {
8095
8769
  isComputerUseErrorCode,
8096
8770
  isDynamicSkill,
8097
8771
  listCanonicalComputerUseActions,
8772
+ normalizeBrowserRuntimeError,
8098
8773
  normalizeComputerUseError,
8099
8774
  resolveCanonicalComputerUseToolName
8100
8775
  });