@roll-agent/browser-use-agent 0.13.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -14,6 +14,11 @@ metadata:
14
14
  - 先启动 `browser-use-agent` HTTP 常驻服务;浏览器 session 跨调用持久。
15
15
  - 通过 Roll 调用本 Agent 时,先用 `roll skills get browser-use-agent --include-references --json` 读取当前说明和 `references/*`,再用 `roll agent tools browser-use-agent --json` 读取真实 schema。
16
16
  - 完整 `inputSchema` 以 `roll agent tools browser-use-agent --json` 为准。
17
+ - 多账号/多 profile 场景下,Roll 会从 `browser.instances` 注入 `BROWSER_INSTANCES_JSON`;所有 browser-use tool 都支持可选 `browserInstance` 输入,用于选择目标 `profile/userDataDir + cdpPort + sessionsDir`。未传时按 `browser.defaultInstance`,再按单实例自动选择;多实例且无默认值时会返回 `needs_input`。
18
+ - 多个 `managed-cdp` 实例首次启动时会自动把 Chrome profile 展示名设为实例 ID,并按声明顺序自适应平铺窗口:2–3 个实例横向并列并撑满桌面可用高度;4 个实例 2×2 铺满屏幕;5 个及以上按「最多 4 列、每行撑满宽度」均衡排列(5→3+2、6→3+3、8→4+4、10→4+3+3)。macOS 使用只读 `system_profiler SPDisplaysDataType` 探测逻辑分辨率;Windows 使用只读 PowerShell/.NET `PrimaryScreen.WorkingArea` 探测扣除任务栏后的工作区;探测不到时回退默认工作区;也可通过 `ROLL_BROWSER_WORK_AREA=x,y,width,height` 覆盖。需要固定布局时在实例上配置 `profile-name` / `window-bounds`。
19
+ - 浏览器实例采用 **lazy start**:agent 启动不会立刻拉起全部 Chrome,首次访问某个 `browserInstance` 时才启动对应 profile/CDP runtime。
20
+ - `browser_status` 是无副作用诊断工具;它不会为了查询状态而启动尚未启动的 Chrome。需要启动某个实例时,调用带 `browserInstance` 的业务工具,例如 `open_platform({ browserInstance, platform:"zhipin" })`。
21
+ - `browser_status.primaryInstanceId` 表示顶层 `running/headless/mode/security` 所采用的 primary bundle;多实例详情请看 `instances[]`。
17
22
  - `REPLY_AUTHORITY_URL` / `REPLY_AUTHORITY_BEARER_TOKEN` 是生成智能回复预览的必填环境变量;`REPLY_AUTHORITY_KEYS_URL` 是发送预备回复前验签的必填环境变量。`roll doctor` 会通过 `references/env.yaml` 和 `browser_status.effectiveEnvSources` 检查它们是否声明并在运行态生效。
18
23
  - `BROWSER_SECURITY_JSON` 可选配置浏览器硬安全策略;`browser_status.security` 会返回实际加载后的 `domainAllowlist`、`maxPageContentBytes`、`maxSnapshotNodes`、`actionPolicy` 和 `foregroundPolicy`。`foregroundPolicy` 默认 `when-minimized`,普通后台窗口不抢桌面焦点;仅需要旧行为时才显式设为 `always`。
19
24
  - `BROWSER_USE_POLICY_JSON` 可选配置 browser-use 工具级业务策略;日常推荐只把 `zhipin_send_prepared_reply` 配为 `confirm`。
@@ -23,6 +28,94 @@ metadata:
23
28
  - `BROWSER_VISUAL_ACTIVITY`:读取、识别、提取等操作显示状态胶囊和区域高亮。
24
29
  - 需要关闭反馈时,将对应环境变量设为 `false`。
25
30
 
31
+ ## 多 Boss 账号 / 多 Profile 托管模式
32
+
33
+ 目标:让 orchestrator 同时托管多个 BOSS 招聘账号时,每个账号固定绑定一个独立 Chrome profile、CDP port、session 目录和招聘事件归因 ID。
34
+
35
+ 声明模型:
36
+
37
+ ```text
38
+ browserInstance
39
+ -> userDataDir/profile
40
+ -> cdpPort 或 cdpUrl
41
+ -> sessionsDir
42
+ -> trackingAgentId
43
+ ```
44
+
45
+ 配置示例:
46
+
47
+ ```yaml
48
+ browser:
49
+ default-instance: boss-a
50
+ instances:
51
+ boss-a:
52
+ platform: zhipin
53
+ mode: managed-cdp
54
+ cdp-port: 9222
55
+ user-data-dir: ~/.roll-agent/browser/profiles/boss-a
56
+ sessions-dir: ~/.roll-agent/browser/sessions/boss-a
57
+ # window-bounds 可选;省略时按实例数量自动平铺
58
+ tracking-agent-id: zhipin-boss-a
59
+ boss-b:
60
+ platform: zhipin
61
+ mode: managed-cdp
62
+ cdp-port: 9223
63
+ user-data-dir: ~/.roll-agent/browser/profiles/boss-b
64
+ sessions-dir: ~/.roll-agent/browser/sessions/boss-b
65
+ tracking-agent-id: zhipin-boss-b
66
+ ```
67
+
68
+ orchestrator 规则:
69
+
70
+ 1. 多账号托管时,把 `browserInstance` 当作账号路由键;同一个任务线程中的每一次 browser-use tool call 都必须传同一个 `browserInstance`。
71
+ 2. 不要把 `boss-a` 产生的 `pageId`、`@eN`、`@cN`、`@jN`、`preparedReplyId` 或当前页面状态传给 `boss-b`。
72
+ 3. `browserInstance` 只标识浏览器/profile;业务归因使用该实例的 `trackingAgentId`,缺失时才 fallback 到 `RECRUITMENT_EVENTS_DEFAULT_AGENT_ID`,仍缺失则跳过招聘事件上报并 warn。
73
+ 4. `platform` 与实例配置不一致时会返回 `platform_mismatch`。例如 `browserInstance:"boss-a"` 声明为 `zhipin`,就不要调用 `yupao_*` 工具或 `open_platform({ platform:"yupao" })`。
74
+ 5. 多实例没有 `browser.defaultInstance` 时,任何未显式传 `browserInstance` 的业务调用都会返回 `needs_input`。并行托管建议显式传,不依赖 default。
75
+ 6. `browser_status()` 可先用于读取声明态/运行态;真正启动某个账号 profile 使用 `open_platform({ browserInstance, platform:"zhipin" })`。
76
+ 7. 每个账号首次托管时,需要人工在对应 Chrome 窗口完成 BOSS 登录;之后 session 跟随对应 `userDataDir` 和 `sessionsDir`。
77
+ 8. Chrome 原生 tab group 只通过扩展 API 暴露,browser-use 不注入扩展;用 profile 名称和窗口并排布局作为稳定识别方式。
78
+
79
+ 启动/检查流程:
80
+
81
+ ```text
82
+ roll doctor --json
83
+ -> roll agent health --json # parse browser-use-agent entry
84
+ -> roll run browser-use-agent browser_status --json
85
+ -> roll run browser-use-agent open_platform --input-json '{"browserInstance":"boss-a","platform":"zhipin"}' --json
86
+ -> 人工确认 boss-a 窗口登录
87
+ -> roll run browser-use-agent zhipin_get_username --input-json '{"browserInstance":"boss-a"}' --json
88
+ ```
89
+
90
+ 多账号批量示例:
91
+
92
+ ```json
93
+ [
94
+ {
95
+ "agent": "browser-use-agent",
96
+ "tool": "open_platform",
97
+ "input": { "browserInstance": "boss-a", "platform": "zhipin" },
98
+ "label": "boss-a-open"
99
+ },
100
+ {
101
+ "agent": "browser-use-agent",
102
+ "tool": "open_platform",
103
+ "input": { "browserInstance": "boss-b", "platform": "zhipin" },
104
+ "label": "boss-b-open"
105
+ }
106
+ ]
107
+ ```
108
+
109
+ Boss 聊天托管模板:
110
+
111
+ ```text
112
+ 对每个 browserInstance 独立执行:
113
+ zhipin_read_messages({ browserInstance, onlyUnread:true, limit:N })
114
+ -> zhipin_generate_reply_preview({ browserInstance, conversationId })
115
+ -> zhipin_send_prepared_reply({ browserInstance, preparedReplyId })
116
+ -> zhipin_read_messages({ browserInstance, onlyUnread:true, limit:N }) # 验证
117
+ ```
118
+
26
119
  ## 通用 Tools
27
120
 
28
121
  | Tool | 用途 |
@@ -0,0 +1,21 @@
1
+ import type { BrowserWindowBounds } from "@roll-agent/browser";
2
+ export interface WorkArea {
3
+ readonly x: number;
4
+ readonly y: number;
5
+ readonly width: number;
6
+ readonly height: number;
7
+ }
8
+ export declare function resetPrimaryWorkAreaCacheForTests(): void;
9
+ export declare function getPrimaryWorkArea(): WorkArea;
10
+ export declare function computeAutoLayoutGrid(total: number): {
11
+ cols: number;
12
+ rows: number;
13
+ };
14
+ export declare function computeAutoLayoutRows(total: number): readonly number[];
15
+ export declare function resolveAutoWindowBoundsForIndex(input: {
16
+ readonly index: number;
17
+ readonly total: number;
18
+ readonly workArea: WorkArea;
19
+ }): BrowserWindowBounds;
20
+ export declare function parseMacOsDisplayBounds(raw: string): WorkArea | undefined;
21
+ export declare function parseWindowsWorkAreaJson(raw: string): WorkArea | undefined;
@@ -0,0 +1,29 @@
1
+ import { BrowserContextManager, BrowserRuntime, SessionStore, type BrowserInstanceStatus, type BrowserRuntimeConfig, type Platform } from "@roll-agent/browser";
2
+ import type { BrowserInstancesConfig } from "./runtime-config.ts";
3
+ export declare const LEGACY_INSTANCE_ID = "default";
4
+ export interface BrowserRuntimeBundle {
5
+ readonly id: string;
6
+ readonly platform?: Platform;
7
+ readonly trackingAgentId?: string;
8
+ readonly runtime: BrowserRuntime;
9
+ readonly contextManager: BrowserContextManager;
10
+ readonly sessionStore: SessionStore;
11
+ readonly config: BrowserRuntimeConfig;
12
+ }
13
+ export declare class BrowserInstancePool {
14
+ private readonly bundles;
15
+ private readonly defaultInstanceId;
16
+ private readonly startPromises;
17
+ constructor(globalRuntimeConfig: BrowserRuntimeConfig, instancesConfig: BrowserInstancesConfig | undefined);
18
+ getDefaultInstanceId(): string | undefined;
19
+ listBundles(): readonly BrowserRuntimeBundle[];
20
+ isLegacySingleInstancePool(): boolean;
21
+ resolvePrimaryInstanceId(): string | undefined;
22
+ ensureBundleStarted(browserInstance?: string): Promise<BrowserRuntimeBundle>;
23
+ ensureStarted(bundleId: string): Promise<void>;
24
+ getBundle(browserInstance?: string): BrowserRuntimeBundle;
25
+ getInstanceStatuses(): Promise<BrowserInstanceStatus[]>;
26
+ closeAll(): Promise<void>;
27
+ private getOnlyInstanceId;
28
+ }
29
+ export declare function runWithBrowserInstance<T>(browserInstance: string | undefined, run: () => Promise<T>): Promise<T>;
@@ -1,5 +1,5 @@
1
1
  import { z } from "zod";
2
- export declare const BROWSER_USE_DECLARED_ENV_KEYS: readonly ["REPLY_AUTHORITY_URL", "REPLY_AUTHORITY_BEARER_TOKEN", "REPLY_AUTHORITY_KEYS_URL", "RECRUITMENT_EVENTS_ENABLED", "RECRUITMENT_EVENTS_API_BASE_URL", "RECRUITMENT_EVENTS_API_TOKEN", "RECRUITMENT_EVENTS_DEFAULT_AGENT_ID", "BROWSER_SECURITY_JSON", "BROWSER_USE_POLICY_JSON", "BROWSER_VISUAL_CURSOR", "BROWSER_VISUAL_ACTIVITY"];
2
+ export declare const BROWSER_USE_DECLARED_ENV_KEYS: readonly ["REPLY_AUTHORITY_URL", "REPLY_AUTHORITY_BEARER_TOKEN", "REPLY_AUTHORITY_KEYS_URL", "RECRUITMENT_EVENTS_ENABLED", "RECRUITMENT_EVENTS_API_BASE_URL", "RECRUITMENT_EVENTS_API_TOKEN", "RECRUITMENT_EVENTS_DEFAULT_AGENT_ID", "BROWSER_SECURITY_JSON", "BROWSER_INSTANCES_JSON", "BROWSER_USE_POLICY_JSON", "BROWSER_VISUAL_CURSOR", "BROWSER_VISUAL_ACTIVITY"];
3
3
  export declare const EffectiveEnvSourceSchema: z.ZodObject<{
4
4
  present: z.ZodBoolean;
5
5
  fingerprint: z.ZodOptional<z.ZodString>;