@lydia-agent/core 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -529,6 +529,8 @@ var StrategyRegistry = class {
529
529
  // User Home
530
530
  path.join(os.homedir(), ".lydia", "strategies", "default.yml"),
531
531
  // Package Built-in
532
+ path.resolve(__dirname2, "../strategies/base-v1.yml"),
533
+ // Package Built-in (older path assumption)
532
534
  path.resolve(__dirname2, "../../strategies/base-v1.yml"),
533
535
  // Package Built-in (dev/src)
534
536
  path.resolve(__dirname2, "../../../strategies/base-v1.yml")
@@ -1394,6 +1396,17 @@ var ConfigSchema = z5.object({
1394
1396
  checkpointTtlHours: z5.number().default(24),
1395
1397
  observationFrameTtlHours: z5.number().default(24 * 7)
1396
1398
  }).default({}),
1399
+ browser: z5.object({
1400
+ enabled: z5.boolean().default(true),
1401
+ mode: z5.enum(["auto", "cdp", "headless", "remote"]).default("auto"),
1402
+ cdpPort: z5.number().int().positive().default(9222),
1403
+ remoteUrl: z5.string().default(""),
1404
+ chromePath: z5.string().default(""),
1405
+ launchHostBrowser: z5.boolean().default(false),
1406
+ navigationTimeoutMs: z5.number().positive().default(3e4),
1407
+ actionTimeoutMs: z5.number().positive().default(1e4),
1408
+ downloadDir: z5.string().default("")
1409
+ }).default({}),
1397
1410
  skills: z5.object({
1398
1411
  /** Maximum number of skills whose full content is injected into the prompt (default: 3) */
1399
1412
  matchTopK: z5.number().default(3),
@@ -1457,6 +1470,10 @@ var ConfigLoader = class {
1457
1470
  ...current.memory || {},
1458
1471
  ...partial.memory
1459
1472
  },
1473
+ browser: {
1474
+ ...current.browser || {},
1475
+ ...partial.browser
1476
+ },
1460
1477
  skills: {
1461
1478
  ...current.skills || {},
1462
1479
  ...partial.skills
@@ -2457,7 +2474,7 @@ var ShellServer = class {
2457
2474
  this.server = new Server(
2458
2475
  {
2459
2476
  name: "internal-shell",
2460
- version: "0.1.1"
2477
+ version: "0.1.2"
2461
2478
  },
2462
2479
  {
2463
2480
  capabilities: {
@@ -2535,7 +2552,7 @@ var FileSystemServer = class {
2535
2552
  this.server = new Server2(
2536
2553
  {
2537
2554
  name: "internal-fs",
2538
- version: "0.1.1"
2555
+ version: "0.1.2"
2539
2556
  },
2540
2557
  {
2541
2558
  capabilities: {
@@ -2953,7 +2970,7 @@ var GitServer = class {
2953
2970
  this.server = new Server3(
2954
2971
  {
2955
2972
  name: "internal-git",
2956
- version: "0.1.1"
2973
+ version: "0.1.2"
2957
2974
  },
2958
2975
  {
2959
2976
  capabilities: {
@@ -3130,7 +3147,7 @@ var MemoryServer = class {
3130
3147
  this.server = new Server4(
3131
3148
  {
3132
3149
  name: "internal-memory",
3133
- version: "0.1.1"
3150
+ version: "0.1.2"
3134
3151
  },
3135
3152
  {
3136
3153
  capabilities: {
@@ -3256,7 +3273,7 @@ var InteractionServer = class extends EventEmitter3 {
3256
3273
  constructor() {
3257
3274
  super();
3258
3275
  this.server = new Server5(
3259
- { name: "internal-interaction", version: "0.1.1" },
3276
+ { name: "internal-interaction", version: "0.1.2" },
3260
3277
  { capabilities: { tools: {} } }
3261
3278
  );
3262
3279
  this.setupHandlers();
@@ -3287,8 +3304,8 @@ var InteractionServer = class extends EventEmitter3 {
3287
3304
  prompt: args.prompt
3288
3305
  };
3289
3306
  this.emit("request", interaction);
3290
- const responseText = await new Promise((resolve6) => {
3291
- this.pendingInteractions.set(id, resolve6);
3307
+ const responseText = await new Promise((resolve7) => {
3308
+ this.pendingInteractions.set(id, resolve7);
3292
3309
  });
3293
3310
  return {
3294
3311
  content: [{ type: "text", text: responseText }]
@@ -3307,6 +3324,616 @@ var InteractionServer = class extends EventEmitter3 {
3307
3324
  }
3308
3325
  };
3309
3326
 
3327
+ // src/mcp/servers/browser.ts
3328
+ import { Server as Server6 } from "@modelcontextprotocol/sdk/server/index.js";
3329
+ import { CallToolRequestSchema as CallToolRequestSchema6, ListToolsRequestSchema as ListToolsRequestSchema6 } from "@modelcontextprotocol/sdk/types.js";
3330
+
3331
+ // src/browser/manager.ts
3332
+ import { mkdir as mkdir4 } from "fs/promises";
3333
+ import { dirname as dirname5, join as join8, resolve as resolve5 } from "path";
3334
+ import { platform } from "os";
3335
+ import { spawn } from "child_process";
3336
+ import { chromium } from "playwright";
3337
+ function createBrowserToolError(code, message, retryable = true) {
3338
+ const error = new Error(`${code}: ${message}`);
3339
+ error.code = code;
3340
+ error.retryable = retryable;
3341
+ return error;
3342
+ }
3343
+ function createDefaultBrowserRuntimeConfig(partial = {}) {
3344
+ return {
3345
+ enabled: partial.enabled ?? true,
3346
+ mode: partial.mode ?? "auto",
3347
+ cdpPort: partial.cdpPort ?? 9222,
3348
+ remoteUrl: partial.remoteUrl ?? "",
3349
+ chromePath: partial.chromePath ?? "",
3350
+ launchHostBrowser: partial.launchHostBrowser ?? false,
3351
+ navigationTimeoutMs: partial.navigationTimeoutMs ?? 3e4,
3352
+ actionTimeoutMs: partial.actionTimeoutMs ?? 1e4,
3353
+ downloadDir: partial.downloadDir || join8(process.cwd(), ".lydia-artifacts", "browser-downloads")
3354
+ };
3355
+ }
3356
+ var BrowserAutomationManager = class {
3357
+ config;
3358
+ sessions = /* @__PURE__ */ new Map();
3359
+ browser = null;
3360
+ resolvedMode = null;
3361
+ attemptedHostLaunch = false;
3362
+ constructor(config = {}) {
3363
+ this.config = createDefaultBrowserRuntimeConfig(config);
3364
+ }
3365
+ getResolvedMode() {
3366
+ return this.resolvedMode;
3367
+ }
3368
+ async navigate(sessionId, args) {
3369
+ const page = await this.getPage(sessionId);
3370
+ const response = await page.goto(args.url, {
3371
+ waitUntil: args.waitUntil ?? "domcontentloaded",
3372
+ timeout: args.timeoutMs ?? this.config.navigationTimeoutMs
3373
+ });
3374
+ return {
3375
+ text: `Navigated to ${page.url()} (${await page.title() || "untitled"}) [mode=${this.getResolvedMode() || "unknown"} status=${response?.status() ?? "n/a"}]`,
3376
+ metadata: {
3377
+ url: page.url(),
3378
+ title: await page.title(),
3379
+ status: response?.status() ?? null,
3380
+ mode: this.getResolvedMode()
3381
+ }
3382
+ };
3383
+ }
3384
+ async click(sessionId, args) {
3385
+ const page = await this.getPage(sessionId);
3386
+ const locator = page.locator(args.selector).first();
3387
+ await locator.waitFor({ state: "visible", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3388
+ await locator.click({ timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3389
+ return {
3390
+ text: `Clicked ${args.selector} on ${page.url()}`,
3391
+ metadata: { url: page.url(), selector: args.selector, mode: this.getResolvedMode() }
3392
+ };
3393
+ }
3394
+ async type(sessionId, args) {
3395
+ const page = await this.getPage(sessionId);
3396
+ const locator = page.locator(args.selector).first();
3397
+ await locator.waitFor({ state: "visible", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3398
+ if (args.clearExisting !== false) {
3399
+ await locator.fill("", { timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3400
+ }
3401
+ await locator.fill(args.text, { timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3402
+ return {
3403
+ text: `Typed into ${args.selector} on ${page.url()}`,
3404
+ metadata: { url: page.url(), selector: args.selector, length: args.text.length }
3405
+ };
3406
+ }
3407
+ async select(sessionId, args) {
3408
+ const page = await this.getPage(sessionId);
3409
+ const values = Array.isArray(args.value) ? args.value : [args.value];
3410
+ await page.locator(args.selector).first().selectOption(values, {
3411
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3412
+ });
3413
+ return {
3414
+ text: `Selected ${values.join(", ")} in ${args.selector} on ${page.url()}`,
3415
+ metadata: { url: page.url(), selector: args.selector, values }
3416
+ };
3417
+ }
3418
+ async waitFor(sessionId, args) {
3419
+ const page = await this.getPage(sessionId);
3420
+ const state = args.state ?? "visible";
3421
+ await page.locator(args.selector).first().waitFor({
3422
+ state,
3423
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3424
+ });
3425
+ return {
3426
+ text: `Wait condition satisfied for ${args.selector} (${state}) on ${page.url()}`,
3427
+ metadata: { url: page.url(), selector: args.selector, state }
3428
+ };
3429
+ }
3430
+ async extractText(sessionId, args) {
3431
+ const page = await this.getPage(sessionId);
3432
+ const locator = page.locator(args.selector).first();
3433
+ await locator.waitFor({ state: "attached", timeout: args.timeoutMs ?? this.config.actionTimeoutMs });
3434
+ const text = (await locator.innerText({ timeout: args.timeoutMs ?? this.config.actionTimeoutMs })).trim();
3435
+ return {
3436
+ text: text || `[empty text at ${args.selector}]`,
3437
+ metadata: { url: page.url(), selector: args.selector }
3438
+ };
3439
+ }
3440
+ async screenshot(sessionId, args) {
3441
+ const page = await this.getPage(sessionId);
3442
+ const buffer = await page.screenshot({
3443
+ fullPage: args.fullPage ?? true,
3444
+ timeout: args.timeoutMs ?? this.config.navigationTimeoutMs,
3445
+ type: "png"
3446
+ });
3447
+ return {
3448
+ text: `Captured screenshot for ${page.url()} [mode=${this.getResolvedMode() || "unknown"}]`,
3449
+ imageBase64: buffer.toString("base64"),
3450
+ mediaType: "image/png",
3451
+ metadata: { url: page.url(), fullPage: args.fullPage ?? true }
3452
+ };
3453
+ }
3454
+ async download(sessionId, args) {
3455
+ const page = await this.getPage(sessionId);
3456
+ if (!args.selector && !args.url) {
3457
+ throw createBrowserToolError("DOWNLOAD_FAILED", 'Either "selector" or "url" is required.', false);
3458
+ }
3459
+ const timeout = args.timeoutMs ?? this.config.navigationTimeoutMs;
3460
+ const downloadPromise = page.waitForEvent("download", { timeout });
3461
+ if (args.selector) {
3462
+ await page.locator(args.selector).first().click({ timeout });
3463
+ } else if (args.url) {
3464
+ await page.goto(args.url, { waitUntil: "commit", timeout });
3465
+ }
3466
+ const download = await downloadPromise;
3467
+ const downloadPath = await this.saveDownload(download, args.saveAs);
3468
+ return {
3469
+ text: `Downloaded artifact to ${downloadPath}`,
3470
+ downloadPath,
3471
+ metadata: { url: page.url(), suggestedFilename: download.suggestedFilename() }
3472
+ };
3473
+ }
3474
+ async upload(sessionId, args) {
3475
+ const page = await this.getPage(sessionId);
3476
+ const locator = page.locator(args.selector).first();
3477
+ await locator.setInputFiles(resolve5(args.path), {
3478
+ timeout: args.timeoutMs ?? this.config.actionTimeoutMs
3479
+ });
3480
+ return {
3481
+ text: `Uploaded ${resolve5(args.path)} into ${args.selector}`,
3482
+ artifactPath: resolve5(args.path),
3483
+ metadata: { url: page.url(), selector: args.selector, path: resolve5(args.path) }
3484
+ };
3485
+ }
3486
+ async closeSession(sessionId) {
3487
+ const state = this.sessions.get(sessionId);
3488
+ if (!state) {
3489
+ return {
3490
+ text: `Session ${sessionId} already closed`,
3491
+ metadata: { sessionId, mode: this.getResolvedMode() }
3492
+ };
3493
+ }
3494
+ try {
3495
+ if (!state.page.isClosed()) {
3496
+ await state.page.close();
3497
+ }
3498
+ if (state.ownsContext) {
3499
+ await state.context.close();
3500
+ }
3501
+ } finally {
3502
+ this.sessions.delete(sessionId);
3503
+ }
3504
+ return {
3505
+ text: `Closed browser session ${sessionId}`,
3506
+ metadata: { sessionId, mode: this.getResolvedMode() }
3507
+ };
3508
+ }
3509
+ async dispose() {
3510
+ const ids = Array.from(this.sessions.keys());
3511
+ for (const sessionId of ids) {
3512
+ await this.closeSession(sessionId);
3513
+ }
3514
+ if (this.browser) {
3515
+ await this.browser.close();
3516
+ }
3517
+ this.browser = null;
3518
+ this.resolvedMode = null;
3519
+ }
3520
+ async getPage(sessionId) {
3521
+ const existing = this.sessions.get(sessionId);
3522
+ if (existing && !existing.page.isClosed()) {
3523
+ return existing.page;
3524
+ }
3525
+ const browser = await this.ensureBrowser();
3526
+ const mode = this.resolvedMode;
3527
+ if (!mode) {
3528
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Browser mode could not be resolved.", false);
3529
+ }
3530
+ let context;
3531
+ let ownsContext = true;
3532
+ if (mode === "cdp") {
3533
+ context = browser.contexts()[0] || await browser.newContext({ acceptDownloads: true });
3534
+ ownsContext = false;
3535
+ } else {
3536
+ context = await browser.newContext({ acceptDownloads: true });
3537
+ }
3538
+ const page = await context.newPage();
3539
+ const session = {
3540
+ sessionId,
3541
+ page,
3542
+ context,
3543
+ ownsContext
3544
+ };
3545
+ this.sessions.set(sessionId, session);
3546
+ return page;
3547
+ }
3548
+ async ensureBrowser() {
3549
+ if (!this.config.enabled) {
3550
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Browser automation is disabled in config.", false);
3551
+ }
3552
+ if (this.browser) return this.browser;
3553
+ const desiredMode = this.config.mode;
3554
+ if (desiredMode === "cdp") {
3555
+ this.browser = await this.connectCdpOrThrow();
3556
+ this.resolvedMode = "cdp";
3557
+ return this.browser;
3558
+ }
3559
+ if (desiredMode === "remote") {
3560
+ this.browser = await this.connectRemoteOrThrow();
3561
+ this.resolvedMode = "remote";
3562
+ return this.browser;
3563
+ }
3564
+ if (desiredMode === "headless") {
3565
+ this.browser = await this.connectHeadless();
3566
+ this.resolvedMode = "headless";
3567
+ return this.browser;
3568
+ }
3569
+ try {
3570
+ this.browser = await this.connectCdpOrThrow();
3571
+ this.resolvedMode = "cdp";
3572
+ return this.browser;
3573
+ } catch {
3574
+ }
3575
+ if (this.config.remoteUrl) {
3576
+ try {
3577
+ this.browser = await this.connectRemoteOrThrow();
3578
+ this.resolvedMode = "remote";
3579
+ return this.browser;
3580
+ } catch {
3581
+ }
3582
+ }
3583
+ this.browser = await this.connectHeadless();
3584
+ this.resolvedMode = "headless";
3585
+ return this.browser;
3586
+ }
3587
+ async connectCdpOrThrow() {
3588
+ const url = `http://127.0.0.1:${this.config.cdpPort}`;
3589
+ const reachable = await this.isCdpReachable(url);
3590
+ if (!reachable && this.config.launchHostBrowser && !this.attemptedHostLaunch) {
3591
+ this.launchHostChrome();
3592
+ this.attemptedHostLaunch = true;
3593
+ await sleep(1500);
3594
+ }
3595
+ const reachableAfterLaunch = await this.isCdpReachable(url);
3596
+ if (!reachableAfterLaunch) {
3597
+ throw createBrowserToolError(
3598
+ "CAPABILITY_UNAVAILABLE",
3599
+ `CDP endpoint ${url} is not reachable.`,
3600
+ false
3601
+ );
3602
+ }
3603
+ return chromium.connectOverCDP(url);
3604
+ }
3605
+ async connectRemoteOrThrow() {
3606
+ if (!this.config.remoteUrl) {
3607
+ throw createBrowserToolError("CAPABILITY_UNAVAILABLE", "Remote browser URL is not configured.", false);
3608
+ }
3609
+ return chromium.connectOverCDP(this.config.remoteUrl);
3610
+ }
3611
+ async connectHeadless() {
3612
+ return chromium.launch({
3613
+ headless: true,
3614
+ args: ["--disable-dev-shm-usage", "--no-sandbox"]
3615
+ });
3616
+ }
3617
+ async isCdpReachable(baseUrl) {
3618
+ try {
3619
+ const controller = new AbortController();
3620
+ const timeout = setTimeout(() => controller.abort(), 1500);
3621
+ try {
3622
+ const response = await fetch(`${baseUrl}/json/version`, {
3623
+ signal: controller.signal
3624
+ });
3625
+ return response.ok;
3626
+ } finally {
3627
+ clearTimeout(timeout);
3628
+ }
3629
+ } catch {
3630
+ return false;
3631
+ }
3632
+ }
3633
+ launchHostChrome() {
3634
+ const binary = this.resolveChromeBinary();
3635
+ if (!binary) {
3636
+ throw createBrowserToolError(
3637
+ "CAPABILITY_UNAVAILABLE",
3638
+ "CDP launch requested but no Chrome executable could be resolved.",
3639
+ false
3640
+ );
3641
+ }
3642
+ const userDataDir = join8(process.cwd(), ".lydia-artifacts", "chrome-profile");
3643
+ const args = [
3644
+ `--remote-debugging-port=${this.config.cdpPort}`,
3645
+ "--no-first-run",
3646
+ "--no-default-browser-check",
3647
+ `--user-data-dir=${userDataDir}`
3648
+ ];
3649
+ spawn(binary, args, {
3650
+ detached: true,
3651
+ stdio: "ignore",
3652
+ windowsHide: true
3653
+ }).unref();
3654
+ }
3655
+ resolveChromeBinary() {
3656
+ if (this.config.chromePath) {
3657
+ return this.config.chromePath;
3658
+ }
3659
+ const os10 = platform();
3660
+ if (os10 === "win32") {
3661
+ return "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe";
3662
+ }
3663
+ if (os10 === "darwin") {
3664
+ return "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome";
3665
+ }
3666
+ return "google-chrome";
3667
+ }
3668
+ async saveDownload(download, requestedPath) {
3669
+ const filename = requestedPath ? resolve5(requestedPath) : resolve5(this.config.downloadDir, `${Date.now().toString(36)}-${download.suggestedFilename()}`);
3670
+ await mkdir4(dirname5(filename), { recursive: true });
3671
+ await download.saveAs(filename);
3672
+ return filename;
3673
+ }
3674
+ };
3675
+ function normalizeBrowserRuntimeError(error) {
3676
+ if (error && typeof error === "object" && "code" in error && typeof error.code === "string") {
3677
+ return error;
3678
+ }
3679
+ if (error instanceof Error) {
3680
+ const message = error.message || "Unknown browser error";
3681
+ const lowered = message.toLowerCase();
3682
+ if (lowered.includes("timeout")) {
3683
+ return createBrowserToolError("BROWSER_TIMEOUT", message, true);
3684
+ }
3685
+ if (lowered.includes("not found") || lowered.includes("waiting for locator")) {
3686
+ return createBrowserToolError("ELEMENT_NOT_FOUND", message, true);
3687
+ }
3688
+ if (lowered.includes("not visible") || lowered.includes("not enabled") || lowered.includes("intercept")) {
3689
+ return createBrowserToolError("ELEMENT_NOT_INTERACTABLE", message, true);
3690
+ }
3691
+ if (lowered.includes("net::") || lowered.includes("navigation")) {
3692
+ return createBrowserToolError("NAVIGATION_BLOCKED", message, true);
3693
+ }
3694
+ if (lowered.includes("download")) {
3695
+ return createBrowserToolError("DOWNLOAD_FAILED", message, true);
3696
+ }
3697
+ if (lowered.includes("upload") || lowered.includes("input files")) {
3698
+ return createBrowserToolError("UPLOAD_FAILED", message, true);
3699
+ }
3700
+ if (lowered.includes("target page, context or browser has been closed")) {
3701
+ return createBrowserToolError("SESSION_CLOSED", message, true);
3702
+ }
3703
+ if (lowered.includes("executable") || lowered.includes("playwright")) {
3704
+ return createBrowserToolError("CAPABILITY_UNAVAILABLE", message, false);
3705
+ }
3706
+ return createBrowserToolError("UNKNOWN", message, true);
3707
+ }
3708
+ return createBrowserToolError("UNKNOWN", String(error), true);
3709
+ }
3710
+ function sleep(ms) {
3711
+ return new Promise((resolveDelay) => setTimeout(resolveDelay, ms));
3712
+ }
3713
+
3714
+ // src/mcp/servers/browser.ts
3715
+ var SESSION_ARG = "__lydiaSessionId";
3716
+ var BrowserServer = class {
3717
+ server;
3718
+ runtime;
3719
+ constructor(config = {}, runtime = new BrowserAutomationManager(createDefaultBrowserRuntimeConfig(config))) {
3720
+ this.runtime = runtime;
3721
+ this.server = new Server6(
3722
+ {
3723
+ name: "internal-browser",
3724
+ version: "0.1.2"
3725
+ },
3726
+ {
3727
+ capabilities: {
3728
+ tools: {}
3729
+ }
3730
+ }
3731
+ );
3732
+ this.setupHandlers();
3733
+ }
3734
+ async closeSession(sessionId) {
3735
+ if (!sessionId) return;
3736
+ await this.runtime.closeSession(sessionId);
3737
+ }
3738
+ async dispose() {
3739
+ await this.runtime.dispose();
3740
+ }
3741
+ setupHandlers() {
3742
+ this.server.setRequestHandler(ListToolsRequestSchema6, async () => ({
3743
+ tools: [
3744
+ {
3745
+ name: "browser_navigate",
3746
+ description: "Navigate the current browser session to a URL.",
3747
+ inputSchema: {
3748
+ type: "object",
3749
+ properties: {
3750
+ url: { type: "string", description: "Destination URL" },
3751
+ waitUntil: {
3752
+ type: "string",
3753
+ enum: ["load", "domcontentloaded", "networkidle", "commit"],
3754
+ description: "Navigation completion condition"
3755
+ },
3756
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3757
+ },
3758
+ required: ["url"]
3759
+ }
3760
+ },
3761
+ {
3762
+ name: "browser_click",
3763
+ description: "Click an element in the current page by selector.",
3764
+ inputSchema: {
3765
+ type: "object",
3766
+ properties: {
3767
+ selector: { type: "string", description: "CSS selector for the target element" },
3768
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3769
+ },
3770
+ required: ["selector"]
3771
+ }
3772
+ },
3773
+ {
3774
+ name: "browser_type",
3775
+ description: "Type text into an element in the current page.",
3776
+ inputSchema: {
3777
+ type: "object",
3778
+ properties: {
3779
+ selector: { type: "string", description: "CSS selector for the target input" },
3780
+ text: { type: "string", description: "Text to enter" },
3781
+ clearExisting: { type: "boolean", description: "Clear existing value before typing" },
3782
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3783
+ },
3784
+ required: ["selector", "text"]
3785
+ }
3786
+ },
3787
+ {
3788
+ name: "browser_select",
3789
+ description: "Select one or more values from a select element.",
3790
+ inputSchema: {
3791
+ type: "object",
3792
+ properties: {
3793
+ selector: { type: "string", description: "CSS selector for the select element" },
3794
+ value: {
3795
+ oneOf: [{ type: "string" }, { type: "array", items: { type: "string" } }],
3796
+ description: "Value or list of values to select"
3797
+ },
3798
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3799
+ },
3800
+ required: ["selector", "value"]
3801
+ }
3802
+ },
3803
+ {
3804
+ name: "browser_wait_for",
3805
+ description: "Wait for a selector to reach a state.",
3806
+ inputSchema: {
3807
+ type: "object",
3808
+ properties: {
3809
+ selector: { type: "string", description: "CSS selector to wait for" },
3810
+ state: {
3811
+ type: "string",
3812
+ enum: ["attached", "detached", "visible", "hidden"],
3813
+ description: "Target element state"
3814
+ },
3815
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3816
+ },
3817
+ required: ["selector"]
3818
+ }
3819
+ },
3820
+ {
3821
+ name: "browser_extract_text",
3822
+ description: "Extract visible text from an element.",
3823
+ inputSchema: {
3824
+ type: "object",
3825
+ properties: {
3826
+ selector: { type: "string", description: "CSS selector for the text source" },
3827
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3828
+ },
3829
+ required: ["selector"]
3830
+ }
3831
+ },
3832
+ {
3833
+ name: "browser_screenshot",
3834
+ description: "Capture a screenshot of the current page.",
3835
+ inputSchema: {
3836
+ type: "object",
3837
+ properties: {
3838
+ fullPage: { type: "boolean", description: "Capture the full page instead of only the viewport" },
3839
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3840
+ }
3841
+ }
3842
+ },
3843
+ {
3844
+ name: "browser_download",
3845
+ description: "Download a browser artifact from a URL or via a click action.",
3846
+ inputSchema: {
3847
+ type: "object",
3848
+ properties: {
3849
+ selector: { type: "string", description: "Selector to click to trigger a download" },
3850
+ url: { type: "string", description: "Direct download URL" },
3851
+ saveAs: { type: "string", description: "Optional output path override" },
3852
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3853
+ }
3854
+ }
3855
+ },
3856
+ {
3857
+ name: "browser_upload",
3858
+ description: "Upload a local file into a file input element.",
3859
+ inputSchema: {
3860
+ type: "object",
3861
+ properties: {
3862
+ selector: { type: "string", description: "CSS selector for the file input" },
3863
+ path: { type: "string", description: "Absolute or relative local file path" },
3864
+ timeoutMs: { type: "number", description: "Optional timeout override in milliseconds" }
3865
+ },
3866
+ required: ["selector", "path"]
3867
+ }
3868
+ },
3869
+ {
3870
+ name: "browser_close",
3871
+ description: "Close the current Lydia browser session.",
3872
+ inputSchema: {
3873
+ type: "object",
3874
+ properties: {}
3875
+ }
3876
+ }
3877
+ ]
3878
+ }));
3879
+ this.server.setRequestHandler(CallToolRequestSchema6, async (request) => {
3880
+ const rawArgs = request.params.arguments || {};
3881
+ const sessionId = typeof rawArgs[SESSION_ARG] === "string" && rawArgs[SESSION_ARG] ? rawArgs[SESSION_ARG] : "default";
3882
+ const args = Object.fromEntries(
3883
+ Object.entries(rawArgs).filter(([key]) => key !== SESSION_ARG)
3884
+ );
3885
+ try {
3886
+ switch (request.params.name) {
3887
+ case "browser_navigate":
3888
+ return this.ok(await this.runtime.navigate(sessionId, args));
3889
+ case "browser_click":
3890
+ return this.ok(await this.runtime.click(sessionId, args));
3891
+ case "browser_type":
3892
+ return this.ok(await this.runtime.type(sessionId, args));
3893
+ case "browser_select":
3894
+ return this.ok(await this.runtime.select(sessionId, args));
3895
+ case "browser_wait_for":
3896
+ return this.ok(await this.runtime.waitFor(sessionId, args));
3897
+ case "browser_extract_text":
3898
+ return this.ok(await this.runtime.extractText(sessionId, args));
3899
+ case "browser_screenshot":
3900
+ return this.ok(await this.runtime.screenshot(sessionId, args));
3901
+ case "browser_download":
3902
+ return this.ok(await this.runtime.download(sessionId, args));
3903
+ case "browser_upload":
3904
+ return this.ok(await this.runtime.upload(sessionId, args));
3905
+ case "browser_close":
3906
+ return this.ok(await this.runtime.closeSession(sessionId));
3907
+ default:
3908
+ throw new Error(`Unknown tool: ${request.params.name}`);
3909
+ }
3910
+ } catch (error) {
3911
+ const normalized = normalizeBrowserRuntimeError(error);
3912
+ return {
3913
+ content: [{ type: "text", text: normalized.message }],
3914
+ isError: true
3915
+ };
3916
+ }
3917
+ });
3918
+ }
3919
+ ok(result) {
3920
+ const content = [{ type: "text", text: result.text }];
3921
+ if (result.imageBase64 && result.mediaType) {
3922
+ content.push({
3923
+ type: "image",
3924
+ data: result.imageBase64,
3925
+ mimeType: result.mediaType
3926
+ });
3927
+ }
3928
+ return {
3929
+ content,
3930
+ artifactPath: result.artifactPath,
3931
+ downloadPath: result.downloadPath,
3932
+ metadata: result.metadata
3933
+ };
3934
+ }
3935
+ };
3936
+
3310
3937
  // src/mcp/client.ts
3311
3938
  import { Client } from "@modelcontextprotocol/sdk/client/index.js";
3312
3939
  import { StdioClientTransport } from "@modelcontextprotocol/sdk/client/stdio.js";
@@ -3430,9 +4057,12 @@ var McpCanonicalCapabilityAdapter = class {
3430
4057
  blocks.push({ type: "text", text: contentBlock.text });
3431
4058
  continue;
3432
4059
  }
3433
- if (contentBlock?.type === "image" && contentBlock.source?.type === "base64" && typeof contentBlock.source.media_type === "string" && typeof contentBlock.source.data === "string") {
3434
- const mediaType = contentBlock.source.media_type;
3435
- const base64Data = contentBlock.source.data;
4060
+ if (contentBlock?.type === "image") {
4061
+ const mediaType = typeof contentBlock.source?.media_type === "string" ? contentBlock.source.media_type : typeof contentBlock.mimeType === "string" ? contentBlock.mimeType : void 0;
4062
+ const base64Data = typeof contentBlock.source?.data === "string" ? contentBlock.source.data : typeof contentBlock.data === "string" ? contentBlock.data : void 0;
4063
+ if (!mediaType || !base64Data) {
4064
+ continue;
4065
+ }
3436
4066
  const dataRef = base64Data.length <= this.maxInlineImageBase64Length ? `data:${mediaType};base64,${base64Data}` : `inline://image/${mediaType}/${base64Data.length}`;
3437
4067
  blocks.push({
3438
4068
  type: "image",
@@ -3637,7 +4267,7 @@ var McpClientManager = class {
3637
4267
  const client = new Client(
3638
4268
  {
3639
4269
  name: "lydia-client",
3640
- version: "0.1.1"
4270
+ version: "0.1.2"
3641
4271
  },
3642
4272
  {
3643
4273
  capabilities: {
@@ -5481,6 +6111,7 @@ var Agent = class extends EventEmitter5 {
5481
6111
  currentTaskCreatedAt;
5482
6112
  // Centralized built-in server descriptors keep MCP wiring declarative.
5483
6113
  builtinServerSpecs = [];
6114
+ browserServer;
5484
6115
  options;
5485
6116
  computerUseAdapter;
5486
6117
  computerUseOrchestrator;
@@ -5580,6 +6211,13 @@ var Agent = class extends EventEmitter5 {
5580
6211
  { id: "internal-fs", create: () => new FileSystemServer().server },
5581
6212
  { id: "internal-git", create: () => new GitServer().server }
5582
6213
  ];
6214
+ if (config.browser?.enabled !== false) {
6215
+ this.browserServer = new BrowserServer(config.browser);
6216
+ this.builtinServerSpecs.push({
6217
+ id: "internal-browser",
6218
+ create: () => this.browserServer.server
6219
+ });
6220
+ }
5583
6221
  await this.connectBuiltinServers();
5584
6222
  await this.connectExternalMcpServers(config.mcpServers);
5585
6223
  this.isInitialized = true;
@@ -5813,6 +6451,7 @@ ${planGuidance}` : baseSystemPrompt;
5813
6451
  } catch {
5814
6452
  }
5815
6453
  if (this.computerUseSessionId) {
6454
+ await this.closeBrowserAutomationSession(this.computerUseSessionId);
5816
6455
  const terminalCheckpoint = this.computerUseOrchestrator.endSession(this.computerUseSessionId);
5817
6456
  if (terminalCheckpoint) {
5818
6457
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -5918,6 +6557,7 @@ ${planGuidance}` : baseSystemPrompt;
5918
6557
  } catch {
5919
6558
  }
5920
6559
  if (this.computerUseSessionId) {
6560
+ await this.closeBrowserAutomationSession(this.computerUseSessionId);
5921
6561
  const terminalCheckpoint = this.computerUseOrchestrator.endSession(this.computerUseSessionId);
5922
6562
  if (terminalCheckpoint) {
5923
6563
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -6502,7 +7142,10 @@ ${steps.join("\n")}`;
6502
7142
  action,
6503
7143
  adapter: this.computerUseAdapter,
6504
7144
  toolName,
6505
- invokeTool: async (resolvedToolName, resolvedArgs) => await this.mcpClientManager.callTool(resolvedToolName, resolvedArgs)
7145
+ invokeTool: async (resolvedToolName, resolvedArgs) => await this.mcpClientManager.callTool(
7146
+ resolvedToolName,
7147
+ this.attachInternalBrowserSessionArg(resolvedToolName, sessionId, resolvedArgs)
7148
+ )
6506
7149
  });
6507
7150
  this.memoryManager.recordObservationFrame(this.currentTaskId, dispatchResult.frame);
6508
7151
  this.memoryManager.upsertComputerUseSessionSummary({
@@ -6542,6 +7185,23 @@ ${steps.join("\n")}`;
6542
7185
  }
6543
7186
  return void 0;
6544
7187
  }
7188
+ attachInternalBrowserSessionArg(toolName, sessionId, args) {
7189
+ const toolInfo = this.mcpClientManager.getToolInfo(toolName);
7190
+ if (!toolInfo || toolInfo.serverId !== "internal-browser") {
7191
+ return args;
7192
+ }
7193
+ return {
7194
+ ...args,
7195
+ __lydiaSessionId: sessionId
7196
+ };
7197
+ }
7198
+ async closeBrowserAutomationSession(sessionId) {
7199
+ if (!sessionId || !this.browserServer) return;
7200
+ try {
7201
+ await this.browserServer.closeSession(sessionId);
7202
+ } catch {
7203
+ }
7204
+ }
6545
7205
  inferComputerUseDomain(canonicalAction) {
6546
7206
  return canonicalAction.startsWith("desktop_") ? "desktop" : "browser";
6547
7207
  }
@@ -6584,13 +7244,18 @@ ${steps.join("\n")}`;
6584
7244
  normalized.push({ type: "text", text: block.text });
6585
7245
  continue;
6586
7246
  }
6587
- if (block?.type === "image" && block.source?.type === "base64" && typeof block.source.media_type === "string" && typeof block.source.data === "string") {
7247
+ if (block?.type === "image") {
7248
+ const mediaType = typeof block.source?.media_type === "string" ? block.source.media_type : typeof block.mimeType === "string" ? block.mimeType : void 0;
7249
+ const data = typeof block.source?.data === "string" ? block.source.data : typeof block.data === "string" ? block.data : void 0;
7250
+ if (!mediaType || !data) {
7251
+ continue;
7252
+ }
6588
7253
  normalized.push({
6589
7254
  type: "image",
6590
7255
  source: {
6591
7256
  type: "base64",
6592
- media_type: block.source.media_type,
6593
- data: block.source.data
7257
+ media_type: mediaType,
7258
+ data
6594
7259
  }
6595
7260
  });
6596
7261
  }
@@ -6704,7 +7369,7 @@ ${steps.join("\n")}`;
6704
7369
  return msg.includes("rate limit") || msg.includes("429") || msg.includes("500") || msg.includes("502") || msg.includes("503") || msg.includes("504") || msg.includes("timeout") || msg.includes("econnreset") || msg.includes("econnrefused") || msg.includes("network") || msg.includes("fetch failed");
6705
7370
  }
6706
7371
  sleep(ms) {
6707
- return new Promise((resolve6) => setTimeout(resolve6, ms));
7372
+ return new Promise((resolve7) => setTimeout(resolve7, ms));
6708
7373
  }
6709
7374
  // ─── Interaction ──────────────────────────────────────────────────────
6710
7375
  resolveInteraction(id, response) {
@@ -7892,6 +8557,8 @@ export {
7892
8557
  Agent,
7893
8558
  AnthropicProvider,
7894
8559
  BasicStrategyGate,
8560
+ BrowserAutomationManager,
8561
+ BrowserServer,
7895
8562
  COMPUTER_USE_ERROR_CODES,
7896
8563
  ComputerUseSessionOrchestrator,
7897
8564
  ConfigLoader,
@@ -7964,6 +8631,8 @@ export {
7964
8631
  ToolResultContentSchema,
7965
8632
  ToolUseContentSchema,
7966
8633
  assessRisk,
8634
+ createBrowserToolError,
8635
+ createDefaultBrowserRuntimeConfig,
7967
8636
  createLLMFromConfig,
7968
8637
  getSkillContent,
7969
8638
  hasContent,
@@ -7971,6 +8640,7 @@ export {
7971
8640
  isComputerUseErrorCode,
7972
8641
  isDynamicSkill,
7973
8642
  listCanonicalComputerUseActions,
8643
+ normalizeBrowserRuntimeError,
7974
8644
  normalizeComputerUseError,
7975
8645
  resolveCanonicalComputerUseToolName
7976
8646
  };