page-action-cache 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/dist/actions-executor.d.ts +62 -0
  2. package/dist/actions-executor.d.ts.map +1 -0
  3. package/dist/actions-executor.js +339 -0
  4. package/dist/actions-executor.js.map +1 -0
  5. package/dist/cache-invalidator.d.ts +70 -0
  6. package/dist/cache-invalidator.d.ts.map +1 -0
  7. package/dist/cache-invalidator.js +212 -0
  8. package/dist/cache-invalidator.js.map +1 -0
  9. package/dist/cache-store.d.ts +80 -0
  10. package/dist/cache-store.d.ts.map +1 -0
  11. package/dist/cache-store.js +361 -0
  12. package/dist/cache-store.js.map +1 -0
  13. package/dist/cache-strategy.d.ts +65 -0
  14. package/dist/cache-strategy.d.ts.map +1 -0
  15. package/dist/cache-strategy.js +237 -0
  16. package/dist/cache-strategy.js.map +1 -0
  17. package/dist/hooks-entry.d.ts +18 -0
  18. package/dist/hooks-entry.d.ts.map +1 -0
  19. package/dist/hooks-entry.js +27 -0
  20. package/dist/hooks-entry.js.map +1 -0
  21. package/dist/hooks.d.ts +10 -0
  22. package/dist/hooks.d.ts.map +1 -0
  23. package/dist/hooks.js +277 -0
  24. package/dist/hooks.js.map +1 -0
  25. package/dist/index.d.ts +24 -0
  26. package/dist/index.d.ts.map +1 -0
  27. package/dist/index.js +34 -0
  28. package/dist/index.js.map +1 -0
  29. package/dist/scenario-recognizer.d.ts +45 -0
  30. package/dist/scenario-recognizer.d.ts.map +1 -0
  31. package/dist/scenario-recognizer.js +213 -0
  32. package/dist/scenario-recognizer.js.map +1 -0
  33. package/dist/security-policy.d.ts +62 -0
  34. package/dist/security-policy.d.ts.map +1 -0
  35. package/dist/security-policy.js +219 -0
  36. package/dist/security-policy.js.map +1 -0
  37. package/dist/tools.d.ts +209 -0
  38. package/dist/tools.d.ts.map +1 -0
  39. package/dist/tools.js +383 -0
  40. package/dist/tools.js.map +1 -0
  41. package/dist/types.d.ts +336 -0
  42. package/dist/types.d.ts.map +1 -0
  43. package/dist/types.js +8 -0
  44. package/dist/types.js.map +1 -0
  45. package/dist/ux-enhancer.d.ts +60 -0
  46. package/dist/ux-enhancer.d.ts.map +1 -0
  47. package/dist/ux-enhancer.js +218 -0
  48. package/dist/ux-enhancer.js.map +1 -0
  49. package/dist/variable-resolver.d.ts +28 -0
  50. package/dist/variable-resolver.d.ts.map +1 -0
  51. package/dist/variable-resolver.js +201 -0
  52. package/dist/variable-resolver.js.map +1 -0
  53. package/docs/API.md +555 -0
  54. package/docs/IMPLEMENTATION.md +1792 -0
  55. package/docs/INTEGRATION.md +387 -0
  56. package/docs/README.md +183 -0
  57. package/index.ts +118 -0
  58. package/openclaw.plugin.json +208 -0
  59. package/package.json +76 -0
  60. package/skills/page-action-cache/SKILL.md +216 -0
  61. package/src/actions-executor.ts +441 -0
  62. package/src/cache-invalidator.ts +271 -0
  63. package/src/cache-store.ts +457 -0
  64. package/src/cache-strategy.ts +327 -0
  65. package/src/hooks-entry.ts +114 -0
  66. package/src/hooks.ts +332 -0
  67. package/src/index.ts +104 -0
  68. package/src/scenario-recognizer.ts +259 -0
  69. package/src/security-policy.ts +268 -0
  70. package/src/tools.ts +437 -0
  71. package/src/types.ts +482 -0
  72. package/src/ux-enhancer.ts +266 -0
  73. package/src/variable-resolver.ts +258 -0
@@ -0,0 +1,1792 @@
1
+ # 页面操作缓存 Extension - 实现文档
2
+
3
+ ## 概述
4
+
5
+ 本扩展实现了 OpenClaw 浏览器操作结果的智能缓存功能,通过原子化操作指令、多层缓存策略、智能场景识别和智能缓存失效机制,避免 LLM 重复分析相同页面,大幅降低 token 消耗和操作延迟。
6
+
7
+ ### 核心功能
8
+
9
+ 1. **原子化操作指令** - 将 LLM 分析结果转换为可直接复用的操作序列
10
+ 2. **智能场景识别** - 多层场景识别(规则+LLM+历史),准确匹配用户意图
11
+ 3. **多层缓存策略** - L3(场景)→L2(流程)→L1(原子),智能匹配和淘汰
12
+ 4. **智能缓存失效** - DOM hash 监控、版本管理、渐进式失效
13
+ 5. **智能变量系统** - 从用户输入提取变量,安全替换到操作模板
14
+ 6. **效果追踪** - token 节省、时间加速、命中率、操作成功率
15
+
16
+ ### 问题分析
17
+
18
+ ```
19
+ 问题 1: 每次操作网页都要把 DOM 信息给 LLM → Token 消耗大
20
+ 问题 2: LLM 响应慢 → 整体操作慢
21
+
22
+ 解决方案:
23
+ - 首次访问:LLM 分析 DOM → 生成完整操作序列 + 选择器 → 保存到缓存
24
+ - 后续访问:检查缓存 → 直接使用保存的操作序列 → 跳过 DOM 分析 → 10x 加速
25
+ ```
26
+
27
+ ---
28
+
29
+ ## 目录结构
30
+
31
+ ```
32
+ extensions/page-action-cache/
33
+ ├── package.json # 插件元数据和依赖
34
+ ├── index.ts # 插件入口点
35
+ ├── src/
36
+ │ ├── types.ts # TypeScript 类型定义
37
+ │ ├── cache-store.ts # 缓存存储(JSON 文件)
38
+ │ ├── cache-strategy.ts # 缓存策略接口和实现
39
+ │ ├── scenario-recognizer.ts # 场景识别器
40
+ │ ├── variable-resolver.ts # 变量解析器
41
+ │ ├── cache-invalidator.ts # 缓存失效检测器
42
+ │ ├── hooks.ts # Hooks 注册和处理器
43
+ │ ├── tools.ts # 自定义缓存管理工具
44
+ │ ├── actions-executor.ts # 操作执行器
45
+ │ ├── security-policy.ts # 安全策略
46
+ │ └── ux-enhancer.ts # 用户体验增强
47
+ ├── skills/
48
+ │ └── page-action-cache/
49
+ │ └── SKILL.md # LLM 使用指南
50
+ └── tests/
51
+ ├── cache-store.test.ts
52
+ ├── scenario-recognizer.test.ts
53
+ ├── hooks.test.ts
54
+ └── actions-executor.test.ts
55
+ ```
56
+
57
+ ---
58
+
59
+ ## 一、数据结构设计
60
+
61
+ ### 1.1 缓存条目
62
+
63
+ ```typescript
64
+ /**
65
+ * 页面操作缓存条目(支持多层缓存)
66
+ */
67
+ interface PageActionCacheEntry {
68
+ // === 懴识信息 ===
69
+ key: string; // 缓存键:scenario|url|viewport|type
70
+ url: string; // 页面 URL(标准化)
71
+ viewport: PageViewport; // 视口尺寸
72
+
73
+ // === 缓存层级 ===
74
+ cacheLevel: "L3" | "L2" | "L1";
75
+ // L3: 场景级缓存
76
+ // L2: 流程级缓存
77
+ // L1: 原子操作级缓存
78
+
79
+ // === 场景信息 ===
80
+ scenario: string; // 场景名称(如 login, checkout)
81
+ description: string; // 场景描述(如 "登录 example.com")
82
+ variables?: VariableMap; // 场景变量(如 username, password)
83
+ urlPattern?: string; // URL 模式(用于匹配)
84
+
85
+ // === 操作序列 ===
86
+ actions: AtomicAction[]; // 原子化操作序列
87
+ screenshotPath?: string; // 关联的截图路径
88
+
89
+ // === 时间信息 ===
90
+ createdAt: number; // 创建时间戳
91
+ lastAccessTime: number; // 最后访问时间
92
+ accessCount: number; // 访问次数
93
+
94
+ // === 失效控制 ===
95
+ expiresAt: number; // 过期时间戳
96
+ pageChangeDetection: PageChangeDetection; // 页面变化检测结果
97
+
98
+ // === 元数据 ===
99
+ source: "llm" | "manual" | "learned"; // 来源
100
+ version?: number; // 缓存版本号
101
+ tags?: string[]; // 标签(如 login, checkout)
102
+ variant?: string; // 变体标识(A/B 测试)
103
+
104
+ // === 执行统计 ===
105
+ successCount: number; // 成功执行次数
106
+ failCount: number; // 失败次数
107
+ avgExecutionTime: number; // 平均执行时间(毫秒)
108
+ }
109
+
110
+ /**
111
+ * 变量映射
112
+ */
113
+ interface VariableMap {
114
+ username?: string;
115
+ password?: string;
116
+ email?: string;
117
+ phone?: string;
118
+ code?: string;
119
+ token?: string;
120
+ custom?: Record<string, string>;
121
+ }
122
+
123
+ /**
124
+ * 视口尺寸
125
+ */
126
+ interface PageViewport {
127
+ width: number;
128
+ height: number;
129
+ }
130
+
131
+ /**
132
+ * 原子化操作(完全对齐 Playwright API)
133
+ *
134
+ * 设计原则:
135
+ * 1. 参数名与 Playwright API 完全一致(如 ref, targetId, cdpUrl)
136
+ * 2. 支持的 Playwright 操作直接映射,不支持的操作使用 evaluate 实现
137
+ * 3. 所有操作支持 ref(元素引用)和 targetId(标签页 ID)
138
+ * 4. 变量替换使用 ${variable} 模板语法
139
+ */
140
+ interface AtomicAction {
141
+ // === 通用参数 ===
142
+ type: "navigate" | "screenshot" | "click" | "type" | "press" |
143
+ "hover" | "scroll" | "wait" | "select" | "focus" | "drag" | "upload";
144
+ cdpUrl?: string; // CDP 端点 URL(必需,从配置获取)
145
+ targetId?: string; // 目标标签页 ID
146
+ ref?: string; // 元素引用 ID(Playwright API 使用 ref 而非 selector)
147
+ variable?: string; // 引用变量(如 ${username}),执行时替换
148
+ order: number; // 执行顺序
149
+ description?: string; // 操作描述
150
+
151
+ // === 导航参数 (type="navigate") ===
152
+ url?: string; // 目标 URL
153
+
154
+ // === 点击参数 (type="click") ===
155
+ button?: "left" | "right" | "middle"; // 按钮标识
156
+ doubleClick?: boolean; // 是否双击
157
+ modifiers?: Array<"Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift">; // 修饰符
158
+ timeoutMs?: number; // 超时时间(毫秒)
159
+
160
+ // === 输入参数 (type="type") ===
161
+ text?: string; // 要输入的文本(可以是变量 ${username})
162
+ submit?: boolean; // 是否触发表单提交
163
+ slowly?: boolean; // 是否慢速输入
164
+
165
+ // === 按键参数 (type="press") ===
166
+ key?: string; // 要按的键(如 Enter, Escape, Tab)
167
+ delayMs?: number; // 按键延迟(毫秒)
168
+
169
+ // === 悬停参数 (type="hover") ===
170
+
171
+ // === evaluate 操作参数(scroll, wait, select, focus, drag, upload) ===
172
+ // 这些操作使用 Playwright 的 evaluate API 实现
173
+ evaluate?: {
174
+ code: string; // JavaScript 代码
175
+ args?: any[]; // 传递给代码的参数
176
+ };
177
+ }
178
+
179
+ /**
180
+ * Playwright 支持的按钮类型(完全对齐)
181
+ */
182
+ type PlaywrightButton = "left" | "right" | "middle";
183
+
184
+ /**
185
+ * Playwright 支持的修饰符(完全对齐)
186
+ */
187
+ type PlaywrightModifier = "Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift";
188
+
189
+ /**
190
+ * Playwright 导航策略参数
191
+ */
192
+ interface NavigationPolicy {
193
+ waitUntil?: "load" | "domcontentloaded" | "networkidle";
194
+ timeout?: number;
195
+ }
196
+
197
+ /**
198
+ * 扩展的导航操作(包含导航策略)
199
+ */
200
+ interface NavigateAction extends AtomicAction {
201
+ type: "navigate";
202
+ url: string;
203
+ targetId?: string;
204
+ navigationPolicy?: NavigationPolicy;
205
+ }
206
+
207
+ /**
208
+ * 扩展的点击操作(完全对齐 Playwright clickViaPlaywright)
209
+ */
210
+ interface ClickAction extends AtomicAction {
211
+ type: "click";
212
+ ref: string; // 元素引用(必需)
213
+ doubleClick?: boolean;
214
+ button?: PlaywrightButton;
215
+ modifiers?: PlaywrightModifier[];
216
+ timeoutMs?: number;
217
+ }
218
+
219
+ /**
220
+ * 扩展的输入操作(完全对齐 Playwright typeViaPlaywright)
221
+ */
222
+ interface TypeAction extends AtomicAction {
223
+ type: "type";
224
+ ref: string; // 元素引用(必需)
225
+ text: string; // 输入文本(可以是变量 ${username})
226
+ submit?: boolean;
227
+ slowly?: boolean;
228
+ timeoutMs?: number;
229
+ }
230
+
231
+ /**
232
+ * 扩展的按键操作(完全对齐 Playwright pressKeyViaPlaywright)
233
+ */
234
+ interface PressAction extends AtomicAction {
235
+ type: "press";
236
+ ref?: string; // 元素引用(可选,全局按键不需要)
237
+ key: string; // 按键(必需)
238
+ delayMs?: number;
239
+ }
240
+
241
+ /**
242
+ * 扩展的悬停操作(完全对齐 Playwright hoverViaPlaywright)
243
+ */
244
+ interface HoverAction extends AtomicAction {
245
+ type: "hover";
246
+ ref: string; // 元素引用(必需)
247
+ timeoutMs?: number;
248
+ }
249
+
250
+ /**
251
+ * 复合操作(支持原子性)
252
+ */
253
+ interface CompositeAction extends AtomicAction {
254
+ type: "composite";
255
+ actions: AtomicAction[]; // 子操作序列
256
+ atomic?: boolean; // 是否原子操作(全部成功或全部失败)
257
+ rollback?: AtomicAction[]; // 失败时回滚操作
258
+ label?: string; // 复合操作标签
259
+ }
260
+
261
+ /**
262
+ * 操作类型(包含复合操作)
263
+ */
264
+ type Action = AtomicAction | CompositeAction;
265
+ ```
266
+
267
+ ### 1.2 页面变化检测
268
+
269
+ ```typescript
270
+ /**
271
+ * 页面变化检测结果
272
+ */
273
+ interface PageChangeDetection {
274
+ hasChanged: boolean; // 页面是否发生变化
275
+ changeType: "structure" | "content" | "none";
276
+ confidence: number; // 置信度 0-100
277
+ domHash: string; // 当前 DOM hash
278
+ structureHash?: string; // 结构特征 hash
279
+ lastCheckedAt: number; // 最后检查时间
280
+ details?: string; // 变化详情
281
+ }
282
+
283
+ /**
284
+ * DOM hash 特征
285
+ */
286
+ interface DOMHashFeatures {
287
+ formCount: number; // 表单数量
288
+ buttonCount: number; // 按钮数量
289
+ linkCount: number; // 链接数量
290
+ scriptCount: number; // 脚本数量
291
+ headingCount: number; // 标题数量
292
+ }
293
+ ```
294
+
295
+ ### 1.3 场景识别规则
296
+
297
+ ```typescript
298
+ /**
299
+ * 场景识别规则
300
+ */
301
+ interface ScenarioRule {
302
+ scenario: string; // 场景名称
303
+ keywords: string[]; // 关键词列表
304
+ urlPatterns: string[]; // URL 模式列表
305
+ priority: number; // 优先级(数字越大越优先)
306
+ cacheLevel: "L3" | "L2"; // 建议的缓存层级
307
+ confidence: number; // 最小置信度要求
308
+ }
309
+
310
+ /**
311
+ * 场景匹配结果
312
+ */
313
+ interface ScenarioMatch {
314
+ scenario: string;
315
+ confidence: number; // 置信度 0-100
316
+ method: "keyword" | "url" | "llm" | "history" | "learned";
317
+ matchedPattern?: string; // 匹配的模式或规则
318
+ }
319
+ ```
320
+
321
+ ### 1.4 缓存存储结构
322
+
323
+ ```typescript
324
+ interface PageActionCacheStore {
325
+ version: number; // 缓存格式版本
326
+ entries: Record<string, PageActionCacheEntry>;
327
+ scenarios: Map<string, ScenarioMatch>; // 场景匹配历史
328
+ stats: CacheStats;
329
+ }
330
+
331
+ interface CacheStats {
332
+ // 基本统计
333
+ totalEntries: number;
334
+ totalHits: number;
335
+ totalMisses: number;
336
+ hitRate: number;
337
+
338
+ // 按层级统计
339
+ l3Hits: number; // 场景级缓存命中
340
+ l2Hits: number; // 流程级缓存命中
341
+ l1Hits: number; // 原子级缓存命中
342
+
343
+ // 场景识别统计
344
+ scenarioMatches: number; // 总匹配次数
345
+ llmClassifications: number; // LLM 分类次数
346
+ learnedAssociations: number; // 历史学习次数
347
+
348
+ // 效果统计
349
+ savedTokens: number; // 估算节省的 token 数
350
+ savedTime: number; // 估算节省的时间(毫秒)
351
+ avgExecutionTime: number; // 平均执行时间
352
+
353
+ // 用户反馈统计
354
+ userConfirmations: number; // 用户确认缓存使用的次数
355
+ userForcedRefreshes: number; // 用户强制刷新的次数
356
+ cacheErrors: number; // 缓存执行错误次数
357
+ }
358
+ ```
359
+
360
+ ---
361
+
362
+ ## 二、缓存策略设计
363
+
364
+ ### 2.1 多层缓存架构
365
+
366
+ ```mermaid
367
+ graph TD
368
+ A[缓存请求] --> B{匹配缓存层级}
369
+
370
+ B -->|L3: 场景级| C[优先使用]
371
+ B -->|L2: 流程级| D{置信度>70%时使用]
372
+ B -->|L1: 原子级| E[置信度>50%时使用]
373
+
374
+ C --> F[检查页面变化]
375
+ C -->|无变化| G[执行缓存操作]
376
+ C -->|有变化| H[失效缓存]
377
+
378
+ G --> I{记录统计]
379
+ H --> J[更新场景匹配]
380
+ ```
381
+
382
+ ### 2.2 缓存层级对比
383
+
384
+ | 层级 | 缓存内容 | 优势 | 劣势 | TTL |
385
+ |------|---------|------|------|-----|
386
+ | **L3: 场景级** | scenario + actions + 变量 | 高语义 | 复杂度高 | 7 天 | 静态页 |
387
+ | **L2: 流程级** | 多场景固定序列 | 中语义 | 复杂度中 | 1-3 天 | 常见流程 |
388
+ | **L1: 原子级** | 单个原子操作 | 灵活组合 | 复杂度低 | 30 分钟 | 通用 |
389
+
390
+ ### 2.3 混合匹配策略
391
+
392
+ ```typescript
393
+ /**
394
+ * 场景识别器
395
+ */
396
+ class ScenarioRecognizer {
397
+ /**
398
+ * 多层匹配策略
399
+ */
400
+ async recognize(
401
+ userInput: string,
402
+ currentUrl: string,
403
+ cacheStore: PageActionCacheStore
404
+ ): Promise<ScenarioMatch | null> {
405
+ // 1. 精确规则匹配(高优先级,低误判)
406
+ const exactMatch = this.matchExactRules(userInput, currentUrl);
407
+ if (exactMatch && exactMatch.confidence >= 90) {
408
+ return exactMatch;
409
+ }
410
+
411
+ // 2. 语义理解(使用 LLM)
412
+ const llmMatch = await this.matchWithLLM(userInput, currentUrl);
413
+ if (llmMatch && llmMatch.confidence >= 70) {
414
+ return llmMatch;
415
+ }
416
+
417
+ // 3. 历史学习(相似输入匹配)
418
+ const historyMatch = this.matchHistory(userInput);
419
+ if (historyMatch && historyMatch.confidence >= 60) {
420
+ return historyMatch;
421
+ }
422
+
423
+ return null; // 都不匹配,交给 LLM 分析
424
+ }
425
+
426
+ private matchExactRules(userInput: string, url: string): ScenarioMatch | null {
427
+ for (const rule of EXACT_SCENARIO_RULES) {
428
+ for (const keyword of rule.keywords) {
429
+ if (userInput.includes(keyword)) {
430
+ return {
431
+ scenario: rule.scenario,
432
+ confidence: rule.priority,
433
+ method: "keyword",
434
+ matchedPattern: keyword
435
+ };
436
+ }
437
+ }
438
+
439
+ for (const pattern of rule.urlPatterns) {
440
+ if (url.includes(pattern)) {
441
+ return {
442
+ scenario: rule.scenario,
443
+ confidence: rule.priority - 10, // 略低于纯关键词
444
+ method: "url",
445
+ matchedPattern: pattern
446
+ };
447
+ }
448
+ }
449
+ }
450
+
451
+ return null;
452
+ }
453
+
454
+ private async matchWithLLM(userInput: string, url: string): Promise<ScenarioMatch | null> {
455
+ // 使用 OpenClaw 的 LLM 进行意图分类
456
+ // 返回场景类型和置信度
457
+ }
458
+
459
+ private matchHistory(userInput: string): ScenarioMatch | null {
460
+ // 在历史记录中查找相似输入
461
+ // 计算 Jaccard 相似度
462
+ // 返回最佳匹配和相似度
463
+ }
464
+ }
465
+
466
+ // 精确场景规则
467
+ const EXACT_SCENARIO_RULES: ScenarioRule[] = [
468
+ {
469
+ scenario: "login",
470
+ keywords: ["登录", "登陆", "注册", "进入", "login to", "sign in", "账号"],
471
+ urlPatterns: ["/login", "/signin", "/auth", "/account"],
472
+ priority: 100,
473
+ cacheLevel: "L3",
474
+ confidence: 90
475
+ },
476
+ {
477
+ scenario: "logout",
478
+ keywords: ["退出", "登出", "注销", "sign out", "logout"],
479
+ urlPatterns: ["/logout", "/signout", "/signout"],
480
+ priority: 100,
481
+ cacheLevel: "L3",
482
+ confidence: 90
483
+ },
484
+ {
485
+ scenario: "search",
486
+ keywords: ["搜索", "查找", "找", "search for", "查询"],
487
+ urlPatterns: ["/search", "/query", "/list"],
488
+ priority: 80,
489
+ cacheLevel: "L3",
490
+ confidence: 80
491
+ },
492
+ {
493
+ scenario: "checkout",
494
+ keywords: ["结账", "结算", "支付", "收银", "买单", "checkout"],
495
+ urlPatterns: ["/checkout", "/cart", "/payment"],
496
+ priority: 90,
497
+ cacheLevel: "L3",
498
+ confidence: 85
499
+ },
500
+ {
501
+ scenario: "settings",
502
+ keywords: ["设置", "配置", "修改", "编辑", "个人中心"],
503
+ urlPatterns: ["/settings", "/config", "/profile"],
504
+ priority: 75,
505
+ cacheLevel: "L3",
506
+ confidence: 75
507
+ },
508
+ {
509
+ scenario: "form_fill",
510
+ keywords: ["填写", "填表单", "提交", "submit form"],
511
+ urlPatterns: [], // 不依赖 URL
512
+ priority: 70,
513
+ cacheLevel: "L2",
514
+ confidence: 70
515
+ }
516
+ ];
517
+ ```
518
+
519
+ ### 2.4 变量解析器
520
+
521
+ ```typescript
522
+ /**
523
+ * 变量解析器
524
+ */
525
+ class VariableResolver {
526
+ /**
527
+ * 从用户输入提取变量
528
+ */
529
+ resolveVariables(
530
+ userInput: string,
531
+ scenario: string
532
+ ): VariableMap {
533
+ // 基于场景类型使用不同的解析策略
534
+ const resolver = this.getResolver(scenario);
535
+ return resolver.resolve(userInput);
536
+ }
537
+
538
+ private getResolver(scenario: string): VariableResolver {
539
+ switch (scenario) {
540
+ case "login":
541
+ case "login_with_vars":
542
+ return new LoginVariableResolver();
543
+ case "checkout":
544
+ return new CheckoutVariableResolver();
545
+ case "settings":
546
+ return new SettingsVariableResolver();
547
+ default:
548
+ return new GenericVariableResolver();
549
+ }
550
+ }
551
+ }
552
+
553
+ /**
554
+ * 登录变量解析器
555
+ */
556
+ class LoginVariableResolver {
557
+ resolve(userInput: string): VariableMap {
558
+ const result: VariableMap = {};
559
+
560
+ // 解析 "用户名 xxx 密码 xxx"
561
+ const usernameMatch = /(?:用户名|账号|username|user)[::\s]*([\u4e00-\u9fa5]+)/g.exec(userInput);
562
+ if (usernameMatch) {
563
+ result.username = usernameMatch[1];
564
+ }
565
+
566
+ const passwordMatch = /(?:密码|password|pwd)[::\s]*(.+)/g.exec(userInput);
567
+ if (passwordMatch) {
568
+ result.password = passwordMatch[1];
569
+ }
570
+
571
+ return result;
572
+ }
573
+ }
574
+
575
+ // 示例
576
+ // 输入:"用 alice 账号登录,密码 secret123"
577
+ // 解析结果:{ username: "alice", password: "secret123" }
578
+ ```
579
+
580
+ ---
581
+
582
+ ### 2.5 智能缓存失效
583
+
584
+ ```typescript
585
+ /**
586
+ * 缓存失效策略
587
+ */
588
+ class CacheInvalidationDetector {
589
+ private cacheStore: PageActionCacheStore;
590
+
591
+ /**
592
+ * 检测页面是否发生变化
593
+ */
594
+ async detectPageChange(
595
+ url: string,
596
+ viewport: PageViewport,
597
+ cachedEntry?: PageActionCacheEntry
598
+ ): Promise<PageChangeDetection> {
599
+ // 计算当前 DOM hash
600
+ const currentHash = await this.calculateDOMHash(url, viewport);
601
+
602
+ if (!cachedEntry || !cachedEntry.pageChangeDetection) {
603
+ return {
604
+ hasChanged: true,
605
+ changeType: "none",
606
+ confidence: 0,
607
+ domHash: currentHash
608
+ };
609
+ }
610
+
611
+ const cachedHash = cachedEntry.pageChangeDetection.domHash;
612
+ const structureChanged = await this.detectStructureChange(currentHash, cachedHash);
613
+ const contentChanged = await this.detectContentChange(currentHash, cachedHash);
614
+
615
+ if (structureChanged || contentChanged) {
616
+ return {
617
+ hasChanged: true,
618
+ changeType: structureChanged ? "structure" : "content",
619
+ confidence: Math.max(
620
+ structureChanged ? 80 : 0,
621
+ contentChanged ? 80 : 0
622
+ ),
623
+ domHash: currentHash,
624
+ lastCheckedAt: Date.now(),
625
+ details: structureChanged ? "Form/Button count changed" : "Content hash changed"
626
+ };
627
+ }
628
+
629
+ return {
630
+ hasChanged: false,
631
+ changeType: "none",
632
+ confidence: 100,
633
+ domHash: currentHash,
634
+ lastCheckedAt: Date.now()
635
+ };
636
+ }
637
+
638
+ private async calculateDOMHash(url: string, viewport: PageViewport): Promise<string> {
639
+ // 获取页面并计算 hash
640
+ const html = await this.fetchPageHTML(url, viewport);
641
+ const features = this.extractDOMFeatures(html);
642
+
643
+ // 组合多个特征的 hash
644
+ return this.combineFeaturesHash(features);
645
+ }
646
+
647
+ private extractDOMFeatures(html: string): DOMHashFeatures {
648
+ // 解析 HTML 提取特征
649
+ return {
650
+ formCount: this.countForms(html),
651
+ buttonCount: this.countButtons(html),
652
+ linkCount: this.countLinks(html),
653
+ scriptCount: this.countScripts(html),
654
+ headingCount: this.countHeadings(html)
655
+ };
656
+ }
657
+
658
+ private combineFeaturesHash(features: DOMHashFeatures): string {
659
+ // 使用简单 hash 算法组合特征
660
+ const values = [
661
+ features.formCount,
662
+ features.buttonCount,
663
+ features.linkCount,
664
+ features.scriptCount,
665
+ features.headingCount
666
+ ];
667
+ return this.hashValues(values);
668
+ }
669
+
670
+ private hashValues(values: number[]): string {
671
+ let hash = 0;
672
+ for (const value of values) {
673
+ hash = ((hash << 5) - hash) + value;
674
+ hash = hash | 0;
675
+ }
676
+ return hash.toString(16);
677
+ }
678
+
679
+ shouldInvalidate(entry: PageActionCacheEntry, changeResult: PageChangeDetection): boolean {
680
+ // 根据页面类型和置信度决定是否失效
681
+ if (!entry.pageType) return false;
682
+
683
+ const threshold = entry.pageType === "static" ? 95 : 80;
684
+ return changeResult.hasChanged && changeResult.confidence >= threshold;
685
+ }
686
+
687
+ async invalidate(url: string, viewport: PageViewport, strategy: "soft" | "hard"): Promise<void> {
688
+ // 策略:soft(保留旧版本)/hard(立即删除)
689
+ await this.cacheStore.delete(url, viewport);
690
+
691
+ if (strategy === "hard") {
692
+ // 硬失效:删除所有匹配的场景
693
+ await this.cacheStore.deleteByPattern(url);
694
+ }
695
+ }
696
+ }
697
+
698
+ /**
699
+ * 渐进式失效:保留多版本
700
+ */
701
+ class ProgressiveInvalidator {
702
+ private versions: Map<string, string[]> = new Map();
703
+
704
+ invalidate(entry: PageActionCacheEntry, changeResult: PageChangeDetection): void {
705
+ const version = this.generateVersion(entry);
706
+ const key = entry.key;
707
+
708
+ if (!this.versions.has(key)) {
709
+ this.versions.set(key, []);
710
+ }
711
+
712
+ const versions = this.versions.get(key)!;
713
+ versions.push(version);
714
+
715
+ // 只保留最近的 N 个版本(默认 3 个)
716
+ if (versions.length > 3) {
717
+ versions.shift();
718
+ }
719
+
720
+ entry.version = versions.length;
721
+ }
722
+
723
+ private generateVersion(entry: PageActionCacheEntry): string {
724
+ // 基于时间戳和页面类型生成版本标识
725
+ const time = new Date(entry.createdAt).toISOString().slice(0, 10); // YYYY-MM-DD
726
+ const pageType = entry.pageType || "unknown";
727
+
728
+ return `${pageType}-${entry.scenario}-${time}`;
729
+ }
730
+ }
731
+ ```
732
+
733
+ ---
734
+
735
+ ## 三、核心模块实现要点
736
+
737
+ ### 3.1 场景识别器 (scenario-recognizer.ts)
738
+
739
+ **功能**:
740
+ - 多层匹配:精确规则 → LLM 分类 → 历史学习
741
+ - 置信度评分:返回 0-100 的置信度
742
+ - 匹配方法标识:keyword, url, llm, history, learned
743
+ - 场景类型预定义:login, logout, search, checkout, settings, form_fill 等
744
+
745
+ **关键实现**:
746
+ - `EXACT_SCENARIO_RULES`:精确定义的规则和优先级
747
+ - `recognize()` 方法:返回最佳匹配或 null
748
+ - 上下文感知:结合当前 URL 和历史记录
749
+
750
+ ---
751
+
752
+ ### 3.2 变量解析器 (variable-resolver.ts)
753
+
754
+ **功能**:
755
+ - 场景特定的变量解析器
756
+ - 支持的解析器:login, checkout, settings, form_fill, search 等
757
+ - 正则表达式模式匹配
758
+ - 变量验证和清理
759
+
760
+ **关键实现**:
761
+ - `LoginVariableResolver`:解析用户名和密码
762
+ - `CheckoutVariableResolver`:解析支付信息
763
+ - `SettingsVariableResolver`:解析配置项
764
+ - `GenericVariableResolver`:通用键值对解析
765
+
766
+ ---
767
+
768
+ ### 3.3 缓存失效检测器 (cache-invalidator.ts)
769
+
770
+ **功能**:
771
+ - DOM hash 计算:提取页面结构特征并生成 hash
772
+ - 页面变化检测:结构变化、内容变化
773
+ - 渐进式失效:保留多个版本,避免误删
774
+ - 失效策略:根据页面类型和置信度决定
775
+
776
+ **关键实现**:
777
+ - `extractDOMFeatures()`:解析 HTML 提取特征
778
+ - `combineFeaturesHash()`:组合多个特征为 hash
779
+ - `shouldInvalidate()`:基于页面类型和置信度决定是否失效
780
+ - `invalidate()`:支持 soft/hard 两种策略
781
+
782
+ ---
783
+
784
+ ### 3.4 安全策略 (security-policy.ts)
785
+
786
+ **功能**:
787
+ - 敏感变量检测:识别 password, token, code 等敏感字段
788
+ - 缓存存储加密:敏感数据加密存储
789
+ - 访问控制:基于用户 ID 的访问控制
790
+ - 日志脱敏:输出日志时自动脱敏
791
+
792
+ **关键实现**:
793
+ - `isSensitiveAction()`:检查操作是否包含敏感变量
794
+ - `encryptCacheEntry()`:加密敏感缓存条目
795
+ - `canAccessCache()`:检查访问权限
796
+ - `sanitizeForLogging()`:移除敏感信息的日志
797
+
798
+ ---
799
+
800
+ ### 3.5 用户体验增强 (ux-enhancer.ts)
801
+
802
+ **功能**:
803
+ - 缓存状态展示:向用户展示缓存命中情况
804
+ - 用户确认机制:让用户确认是否使用缓存
805
+ - 强制刷新:用户可以强制刷新特定页面的缓存
806
+ - 错误反馈:用户可以报告缓存执行错误
807
+ - 使用统计:追踪缓存效果
808
+
809
+ **关键实现**:
810
+ - `showCacheStatus()`:展示缓存匹配结果和操作列表
811
+ - `confirmCacheUsage()`:用户确认是否使用缓存
812
+ - `forceRefreshCache()`:强制刷新缓存
813
+ - `reportCacheError()`:报告缓存错误
814
+
815
+ ---
816
+
817
+ ## 四、执行逻辑
818
+
819
+ ### 4.1 首次访问(无缓存)
820
+
821
+ ```
822
+ 1. 用户输入:"登录 example.com"
823
+ 2. 场景识别器匹配 → 场景: login
824
+ 3. before_prompt_build hook 注入:检测到场景并提示 LLM
825
+ 4. LLM 决定使用场景 → 调用场景特定工具
826
+ 5. 工具返回:
827
+ - 缓存存在?否 → 使用 LLM 生成操作
828
+ - 缓存存在?是 → 返回场景变量和操作模板
829
+ 6. LLM 决定不使用缓存 → 正常调用 browser 工具
830
+ 7. after_tool_call hook:
831
+ - 缓存存在 → 保存到 L3 级缓存(包含变量)
832
+ - 缓存不存在 → 保存到 L1 级缓存(纯操作)
833
+ ```
834
+
835
+ ### 4.2 后续访问(有缓存)
836
+
837
+ ```
838
+ 1. 用户输入:"登录 example.com"
839
+ 2. 场景识别器匹配 → 场景: login(confidence: 95)
840
+ 3. before_tool_call hook 注入:传递缓存信息给 LLM
841
+ 4. LLM 收到缓存信息:
842
+ - cacheInfo.exists = true
843
+ - cacheInfo.actions = [...]
844
+ - cacheInfo.variables = { username: "xxx", password: "xxx" }
845
+ 5. LLM 分析并决定:
846
+ - 使用缓存:调用 execute_cached 工具
847
+ - 不使用缓存:调用 browser 工具重新分析
848
+ 6. execute_cached 工具执行:
849
+ - 验证缓存未过期
850
+ - 检查页面变化(DOM hash)
851
+ - 逐个执行原子操作
852
+ - 返回执行结果
853
+ 7. Agent 根据结果继续
854
+ ```
855
+
856
+ ### 4.3 变量传递机制(Playwright 对齐版本)
857
+
858
+ ```typescript
859
+ // LLM 返回的操作(带变量)- Playwright 参数名
860
+ {
861
+ actions: [
862
+ { type: "type", ref: "#username", variable: "username", order: 1 },
863
+ { type: "type", ref: "#password", variable: "password", order: 2 },
864
+ { type: "press", key: "Tab", order: 3 }, // 切换焦点
865
+ { type: "click", ref: "#submit-btn", order: 4 }
866
+ ]
867
+ }
868
+
869
+ // 用户输入
870
+ "登录 example.com,用户名 alice,密码 secret123"
871
+
872
+ // 变量解析后
873
+ {
874
+ variables: { username: "alice", password: "secret123" }
875
+ }
876
+
877
+ // 执行时替换(Playwright 对齐参数)
878
+ {
879
+ actualActions: [
880
+ { type: "type", ref: "#username", text: "alice", order: 1 },
881
+ { type: "type", ref: "#password", text: "secret123", order: 2 },
882
+ { type: "press", key: "Tab", order: 3 },
883
+ { type: "click", ref: "#submit-btn", order: 4 }
884
+ ]
885
+ }
886
+
887
+ // 调用 Playwright API(示例)
888
+ await pw.typeViaPlaywright({
889
+ cdpUrl: config.cdpUrl,
890
+ targetId: context.targetId,
891
+ ref: "#username",
892
+ text: "alice"
893
+ });
894
+
895
+ await pw.typeViaPlaywright({
896
+ cdpUrl: config.cdpUrl,
897
+ targetId: context.targetId,
898
+ ref: "#password",
899
+ text: "secret123"
900
+ });
901
+
902
+ await pw.pressKeyViaPlaywright({
903
+ cdpUrl: config.cdpUrl,
904
+ targetId: context.targetId,
905
+ key: "Tab"
906
+ });
907
+
908
+ await pw.clickViaPlaywright({
909
+ cdpUrl: config.cdpUrl,
910
+ targetId: context.targetId,
911
+ ref: "#submit-btn",
912
+ button: "left"
913
+ });
914
+ ```
915
+
916
+ ---
917
+
918
+ ## 五、工具定义
919
+
920
+ ### 5.1 execute_cached(核心工具)
921
+
922
+ ```typescript
923
+ {
924
+ name: "execute_cached",
925
+ label: "Execute Cached Actions",
926
+ description: "执行缓存的页面操作序列,跳过 LLM 分析",
927
+
928
+ parameters: {
929
+ type: "object",
930
+ properties: {
931
+ cacheKey: {
932
+ type: "string",
933
+ description: "缓存键(从 cache_info 中获取)"
934
+ },
935
+ fromIndex: {
936
+ type: "number",
937
+ description: "从第几个操作开始执行(默认 0)"
938
+ },
939
+ toIndex: {
940
+ type: "number",
941
+ description: "执行到第几个操作(不指定则执行全部)"
942
+ },
943
+ dryRun: {
944
+ type: "boolean",
945
+ description: "试运行模式,不实际执行"
946
+ },
947
+ force: {
948
+ type: "boolean",
949
+ description: "强制执行(忽略页面变化检测)"
950
+ }
951
+ }
952
+ },
953
+
954
+ execute: async (toolCallId, args) => {
955
+ const { cacheKey, fromIndex, toIndex, dryRun, force } = args;
956
+
957
+ // 获取缓存条目
958
+ const entry = await cacheStore.getEntry(cacheKey);
959
+
960
+ if (!entry) {
961
+ return {
962
+ content: [{ type: "text", text: "缓存不存在或已过期" }]
963
+ };
964
+ }
965
+
966
+ // 检查页面变化(强制模式跳过)
967
+ if (!force) {
968
+ const changeResult = await cacheInvalidator.detectPageChange(
969
+ entry.url,
970
+ entry.viewport,
971
+ entry
972
+ );
973
+
974
+ if (changeResult.hasChanged) {
975
+ const strategy = entry.pageType === "static" ? "soft" : "hard";
976
+ await cacheInvalidator.invalidate(
977
+ entry.url,
978
+ entry.viewport,
979
+ strategy
980
+ );
981
+
982
+ return {
983
+ content: [{
984
+ type: "text",
985
+ text: `页面已变化,缓存已失效 (${changeResult.changeType})\n` +
986
+ `置信度:${changeResult.confidence}%`
987
+ }]
988
+ };
989
+ }
990
+ }
991
+
992
+ // 解析并替换变量
993
+ const variables = entry.variables || {};
994
+ const actionsToExecute = applyVariables(
995
+ entry.actions.slice(fromIndex || 0, toIndex !== undefined ? toIndex + 1 : undefined),
996
+ variables
997
+ );
998
+
999
+ if (dryRun) {
1000
+ // 试运行模式,只返回不执行
1001
+ return {
1002
+ content: [{
1003
+ type: "text",
1004
+ text: `试运行模式:将执行 ${actionsToExecute.length} 个操作\n` +
1005
+ formatActions(actionsToExecute)
1006
+ }]
1007
+ };
1008
+ }
1009
+
1010
+ // 逐个执行操作
1011
+ const results = [];
1012
+ for (let i = fromIndex || 0; i < actionsToExecute.length; i++) {
1013
+ const action = actionsToExecute[i];
1014
+ const result = await actionsExecutor.execute(action);
1015
+
1016
+ results.push({
1017
+ action: action.type,
1018
+ success: result.success,
1019
+ error: result.error,
1020
+ duration: result.duration
1021
+ });
1022
+
1023
+ // 检查原子性,失败则停止
1024
+ if (action.atomic && !result.success) {
1025
+ break;
1026
+ }
1027
+ }
1028
+
1029
+ // 更新缓存统计
1030
+ await cacheStore.updateExecutionStats(cacheKey, results);
1031
+
1032
+ return {
1033
+ content: [{
1034
+ type: "text",
1035
+ text: `执行完成:成功 ${results.filter(r => r.success).length}/${results.length} 个操作\n` +
1036
+ formatExecutionResults(results)
1037
+ }]
1038
+ };
1039
+ }
1040
+ }
1041
+
1042
+ /**
1043
+ * 应用变量替换到操作(Playwright 对齐版本)
1044
+ *
1045
+ * 变量替换规则:
1046
+ * 1. 查找 action 中的 variable 字段
1047
+ * 2. 根据操作类型将变量值设置到正确的参数:
1048
+ * - type 操作:设置 text 参数
1049
+ * - press 操作:设置 key 参数
1050
+ * - navigate 操作:设置 url 参数
1051
+ * 3. 支持模板语法:${variable}
1052
+ * 4. 未定义的变量会导致操作跳过
1053
+ */
1054
+ function applyVariables(actions: Action[], variables: VariableMap): Action[] {
1055
+ return actions.map(action => {
1056
+ if (!action.variable) return action;
1057
+
1058
+ // 获取变量值
1059
+ const value = variables[action.variable as string];
1060
+ if (value === undefined) {
1061
+ // 变量未定义,跳过此操作
1062
+ return { ...action, skip: true };
1063
+ }
1064
+
1065
+ // 根据操作类型设置正确的参数
1066
+ switch (action.type) {
1067
+ case "type":
1068
+ // type 操作使用 text 参数
1069
+ return { ...action, text: value };
1070
+
1071
+ case "press":
1072
+ // press 操作使用 key 参数
1073
+ return { ...action, key: value };
1074
+
1075
+ case "navigate":
1076
+ // navigate 操作使用 url 参数
1077
+ return { ...action, url: value };
1078
+
1079
+ case "select":
1080
+ case "upload":
1081
+ // 这些操作也可能使用 evaluate.code 中的变量
1082
+ if (action.evaluate?.code) {
1083
+ return {
1084
+ ...action,
1085
+ evaluate: {
1086
+ ...action.evaluate,
1087
+ code: action.evaluate.code.replace(/\$\{([^}]+)\}/g, (match, varName) => {
1088
+ return variables[varName] || match;
1089
+ })
1090
+ }
1091
+ };
1092
+ }
1093
+ break;
1094
+
1095
+ default:
1096
+ // 其他操作尝试在所有字符串参数中替换变量
1097
+ return replaceTemplateVariables(action, variables);
1098
+ }
1099
+
1100
+ return action;
1101
+ }).filter(a => !(a as any).skip);
1102
+ }
1103
+
1104
+ /**
1105
+ * 在操作的字符串参数中替换模板变量
1106
+ */
1107
+ function replaceTemplateVariables(action: AtomicAction, variables: VariableMap): AtomicAction {
1108
+ const result: any = { ...action };
1109
+
1110
+ // 替换字符串类型参数中的 ${variable}
1111
+ for (const key of Object.keys(action)) {
1112
+ const value = (action as any)[key];
1113
+ if (typeof value === "string") {
1114
+ result[key] = value.replace(/\$\{([^}]+)\}/g, (match, varName) => {
1115
+ const replacement = variables[varName];
1116
+ return replacement !== undefined ? replacement : match;
1117
+ });
1118
+ }
1119
+ }
1120
+
1121
+ // 特殊处理 evaluate.code(如果存在)
1122
+ if (result.evaluate?.code) {
1123
+ result.evaluate.code = result.evaluate.code.replace(/\$\{([^}]+)\}/g, (match, varName) => {
1124
+ const replacement = variables[varName];
1125
+ return replacement !== undefined ? replacement : match;
1126
+ });
1127
+ }
1128
+
1129
+ return result;
1130
+ }
1131
+
1132
+ /**
1133
+ * 格式化操作列表(Playwright 对齐版本)
1134
+ */
1135
+ function formatActions(actions: Action[]): string {
1136
+ return actions.map((action, i) => {
1137
+ const base = action;
1138
+ switch (action.type) {
1139
+ case "navigate":
1140
+ return `${i + 1}. 导航到:${base.url}`;
1141
+ case "screenshot":
1142
+ return `${i + 1}. 截图`;
1143
+ case "click":
1144
+ const btn = base.button || "left";
1145
+ const dbl = base.doubleClick ? "双击" : "";
1146
+ const mod = base.modifiers?.length ? `+${base.modifiers.join("+")}` : "";
1147
+ return `${i + 1}. ${dbl}点击(${btn}${mod}):${base.ref}`;
1148
+ case "type":
1149
+ const txt = base.text || base.value ? `="${base.text || base.value}"` : "";
1150
+ const sub = base.submit ? " [提交]" : "";
1151
+ const slow = base.slowly ? " [慢速]" : "";
1152
+ return `${i + 1}. 输入:${base.ref}${txt}${sub}${slow}`;
1153
+ case "press":
1154
+ const delay = base.delayMs ? `(${base.delayMs}ms)` : "";
1155
+ return `${i + 1}. 按键:${base.key}${delay}${base.ref ? ` @ ${base.ref}` : ""}`;
1156
+ case "hover":
1157
+ return `${i + 1}. 悬停:${base.ref}`;
1158
+ case "scroll":
1159
+ return `${i + 1}. 滚动:${base.evaluate?.code || "向下滚动"}`;
1160
+ case "wait":
1161
+ const waitDelay = base.delay ? `(${base.delay}ms)` : "";
1162
+ return `${i + 1}. 等待:${base.evaluate?.code || "默认等待"}${waitDelay}`;
1163
+ case "select":
1164
+ return `${i + 1}. 选择:${base.ref}`;
1165
+ case "focus":
1166
+ return `${i + 1}. 聚焦:${base.ref}`;
1167
+ case "drag":
1168
+ return `${i + 1}. 拖拽:${base.ref}`;
1169
+ case "upload":
1170
+ return `${i + 1}. 上传:${base.ref}`;
1171
+ default:
1172
+ return `${i + 1}. ${action.type}:${base.ref}`;
1173
+ }
1174
+ }).join('\n');
1175
+ }
1176
+
1177
+ function formatExecutionResults(results: ExecutionResult[]): string {
1178
+ return results.map((r, i) => {
1179
+ const icon = r.success ? '✅' : '❌';
1180
+ const time = r.duration ? `(${r.duration}ms)` : '';
1181
+ return `${icon} 操作 ${i + 1}: ${r.action} ${time}${r.error ? ` - ${r.error}` : ''}`;
1182
+ }).join('\n');
1183
+ }
1184
+
1185
+ interface ExecutionResult {
1186
+ action: string;
1187
+ success: boolean;
1188
+ error?: string;
1189
+ duration?: number;
1190
+ }
1191
+ ```
1192
+
1193
+ ### 5.2 其他工具
1194
+
1195
+ ```typescript
1196
+ // cache_stats: 查看缓存统计
1197
+ // cache_clear: 清空缓存
1198
+ // cache_list: 列出所有缓存
1199
+ // scenario_list: 列出所有场景
1200
+ // force_refresh: 强制刷新指定缓存
1201
+ ```
1202
+
1203
+ ---
1204
+
1205
+ ## 六、Playwright API 对齐说明
1206
+
1207
+ ### 6.1 设计原则
1208
+
1209
+ 本扩展采用 **方案 A:完全对齐** 策略,确保原子操作接口与 Playwright API 完全兼容。
1210
+
1211
+ **核心原则**:
1212
+ 1. **参数名一致**:使用 Playwright 的参数名(`ref` 而非 `selector`,`targetId` 用于标签页指定)
1213
+ 2. **类型一致**:枚举类型与 Playwright API 完全匹配
1214
+ 3. **直接映射**:支持的操作直接映射到 Playwright 函数
1215
+ 4. **扩展兼容**:不支持的操作通过 `evaluate` 扩展
1216
+
1217
+ ### 6.2 操作映射表
1218
+
1219
+ | 缓存操作类型 | Playwright API | 函数签名 | 状态 |
1220
+ |------------|----------------|---------|------|
1221
+ | `navigate` | `navigateViaPlaywright` | `{ cdpUrl, targetId, url, ...navigationPolicy }` | ✅ 完全对齐 |
1222
+ | `click` | `clickViaPlaywright` | `{ cdpUrl, targetId, ref, button, doubleClick, modifiers, timeoutMs }` | ✅ 完全对齐 |
1223
+ | `type` | `typeViaPlaywright` | `{ cdpUrl, targetId, ref, text, submit, slowly, timeoutMs }` | ✅ 完全对齐 |
1224
+ | `press` | `pressKeyViaPlaywright` | `{ cdpUrl, targetId, key, delayMs }` | ✅ 完全对齐 |
1225
+ | `hover` | `hoverViaPlaywright` | `{ cdpUrl, targetId, ref, timeoutMs }` | ✅ 完全对齐 |
1226
+ | `screenshot` | `screenshotViaPlaywright` | `{ cdpUrl, targetId }` | ✅ 完全对齐 |
1227
+ | `scroll` | `evaluateViaPlaywright` | `{ code: "window.scrollTo(...)" }` | ✅ 通过 evaluate |
1228
+ | `wait` | `evaluateViaPlaywright` | `{ code: "await new Promise(r => setTimeout(r, delay))" }` | ✅ 通过 evaluate |
1229
+ | `select` | `evaluateViaPlaywright` | `{ code: "element.value = '...'" }` | ✅ 通过 evaluate |
1230
+ | `focus` | `evaluateViaPlaywright` | `{ code: "element.focus()" }` | ✅ 通过 evaluate |
1231
+ | `drag` | `evaluateViaPlaywright` | `{ code: "element.dragAndDrop(...)" }` | ✅ 通过 evaluate |
1232
+ | `upload` | `evaluateViaPlaywright` | `{ code: "element.uploadFile(...)" }` | ✅ 通过 evaluate |
1233
+
1234
+ ### 6.3 参数名对照表
1235
+
1236
+ | 旧参数名 | 新参数名(Playwright) | 说明 |
1237
+ |---------|---------------------|------|
1238
+ | `selector` | `ref` | 元素引用 ID |
1239
+ | `value` | `text` (type) / `key` (press) / `url` (navigate) | 根据操作类型确定 |
1240
+ | `count` | - | 使用 doubleClick 替代 |
1241
+ | `delay` | `delayMs` (press) / `timeoutMs` (其他) | 统一使用 Ms 后缀 |
1242
+ | N/A | `cdpUrl` | CDP 端点 URL(必需) |
1243
+ | N/A | `targetId` | 目标标签页 ID |
1244
+
1245
+ ### 6.4 枚举类型对照
1246
+
1247
+ #### 按钮类型
1248
+ ```typescript
1249
+ // Playwright 支持的按钮
1250
+ type PlaywrightButton = "left" | "right" | "middle";
1251
+
1252
+ // 使用示例
1253
+ { type: "click", ref: "#btn", button: "left" }
1254
+ ```
1255
+
1256
+ #### 修饰符
1257
+ ```typescript
1258
+ // Playwright 支持的修饰符
1259
+ type PlaywrightModifier = "Alt" | "Control" | "ControlOrMeta" | "Meta" | "Shift";
1260
+
1261
+ // 使用示例
1262
+ { type: "click", ref: "#btn", modifiers: ["Control", "Shift"] }
1263
+ ```
1264
+
1265
+ #### 导航策略
1266
+ ```typescript
1267
+ interface NavigationPolicy {
1268
+ waitUntil?: "load" | "domcontentloaded" | "networkidle";
1269
+ timeout?: number;
1270
+ }
1271
+
1272
+ // 使用示例
1273
+ {
1274
+ type: "navigate",
1275
+ url: "https://example.com",
1276
+ navigationPolicy: {
1277
+ waitUntil: "domcontentloaded",
1278
+ timeout: 30000
1279
+ }
1280
+ }
1281
+ ```
1282
+
1283
+ ### 6.5 执行器实现
1284
+
1285
+ ```typescript
1286
+ /**
1287
+ * 操作执行器(Playwright 对齐版本)
1288
+ */
1289
+ class ActionsExecutor {
1290
+ private pw: PwAi; // Playwright 客户端
1291
+
1292
+ async execute(action: AtomicAction): Promise<ExecutionResult> {
1293
+ const startTime = Date.now();
1294
+
1295
+ try {
1296
+ // 根据操作类型调用对应的 Playwright 函数
1297
+ switch (action.type) {
1298
+ case "navigate":
1299
+ await this.executeNavigate(action as NavigateAction);
1300
+ break;
1301
+
1302
+ case "click":
1303
+ await this.executeClick(action as ClickAction);
1304
+ break;
1305
+
1306
+ case "type":
1307
+ await this.executeType(action as TypeAction);
1308
+ break;
1309
+
1310
+ case "press":
1311
+ await this.executePress(action as PressAction);
1312
+ break;
1313
+
1314
+ case "hover":
1315
+ await this.executeHover(action as HoverAction);
1316
+ break;
1317
+
1318
+ case "screenshot":
1319
+ await this.executeScreenshot(action);
1320
+ break;
1321
+
1322
+ case "scroll":
1323
+ case "wait":
1324
+ case "select":
1325
+ case "focus":
1326
+ case "drag":
1327
+ case "upload":
1328
+ await this.executeEvaluate(action);
1329
+ break;
1330
+
1331
+ default:
1332
+ throw new Error(`Unsupported action type: ${action.type}`);
1333
+ }
1334
+
1335
+ return {
1336
+ success: true,
1337
+ duration: Date.now() - startTime
1338
+ };
1339
+ } catch (error) {
1340
+ return {
1341
+ success: false,
1342
+ error: String(error),
1343
+ duration: Date.now() - startTime
1344
+ };
1345
+ }
1346
+ }
1347
+
1348
+ private async executeNavigate(action: NavigateAction): Promise<void> {
1349
+ await this.pw.navigateViaPlaywright({
1350
+ cdpUrl: action.cdpUrl,
1351
+ targetId: action.targetId,
1352
+ url: action.url,
1353
+ ...action.navigationPolicy
1354
+ });
1355
+ }
1356
+
1357
+ private async executeClick(action: ClickAction): Promise<void> {
1358
+ await this.pw.clickViaPlaywright({
1359
+ cdpUrl: action.cdpUrl,
1360
+ targetId: action.targetId,
1361
+ ref: action.ref,
1362
+ doubleClick: action.doubleClick,
1363
+ button: action.button || "left",
1364
+ modifiers: action.modifiers,
1365
+ timeoutMs: action.timeoutMs
1366
+ });
1367
+ }
1368
+
1369
+ private async executeType(action: TypeAction): Promise<void> {
1370
+ await this.pw.typeViaPlaywright({
1371
+ cdpUrl: action.cdpUrl,
1372
+ targetId: action.targetId,
1373
+ ref: action.ref,
1374
+ text: action.text,
1375
+ submit: action.submit,
1376
+ slowly: action.slowly,
1377
+ timeoutMs: action.timeoutMs
1378
+ });
1379
+ }
1380
+
1381
+ private async executePress(action: PressAction): Promise<void> {
1382
+ await this.pw.pressKeyViaPlaywright({
1383
+ cdpUrl: action.cdpUrl,
1384
+ targetId: action.targetId,
1385
+ key: action.key,
1386
+ delayMs: action.delayMs
1387
+ });
1388
+ }
1389
+
1390
+ private async executeHover(action: HoverAction): Promise<void> {
1391
+ await this.pw.hoverViaPlaywright({
1392
+ cdpUrl: action.cdpUrl,
1393
+ targetId: action.targetId,
1394
+ ref: action.ref,
1395
+ timeoutMs: action.timeoutMs
1396
+ });
1397
+ }
1398
+
1399
+ private async executeScreenshot(action: AtomicAction): Promise<void> {
1400
+ await this.pw.screenshotViaPlaywright({
1401
+ cdpUrl: action.cdpUrl,
1402
+ targetId: action.targetId
1403
+ });
1404
+ }
1405
+
1406
+ private async executeEvaluate(action: AtomicAction): Promise<void> {
1407
+ if (!action.evaluate?.code) {
1408
+ throw new Error(`evaluate action missing code for type ${action.type}`);
1409
+ }
1410
+
1411
+ // 使用 Playwright 的 evaluate API 执行自定义代码
1412
+ const result = await this.pw.evaluateViaPlaywright({
1413
+ cdpUrl: action.cdpUrl,
1414
+ targetId: action.targetId,
1415
+ code: action.evaluate.code,
1416
+ args: action.evaluate.args
1417
+ });
1418
+
1419
+ return result;
1420
+ }
1421
+ }
1422
+
1423
+ interface ExecutionResult {
1424
+ success: boolean;
1425
+ error?: string;
1426
+ duration: number;
1427
+ }
1428
+ ```
1429
+
1430
+ ### 6.6 完整示例
1431
+
1432
+ ```typescript
1433
+ // 登录场景的缓存操作(Playwright 对齐版本)
1434
+ const loginActions: AtomicAction[] = [
1435
+ {
1436
+ type: "navigate",
1437
+ url: "https://example.com/login",
1438
+ order: 1,
1439
+ description: "导航到登录页面"
1440
+ },
1441
+ {
1442
+ type: "type",
1443
+ ref: "#username",
1444
+ variable: "username", // 使用变量
1445
+ order: 2,
1446
+ description: "输入用户名"
1447
+ },
1448
+ {
1449
+ type: "type",
1450
+ ref: "#password",
1451
+ variable: "password", // 使用变量
1452
+ order: 3,
1453
+ description: "输入密码"
1454
+ },
1455
+ {
1456
+ type: "press",
1457
+ key: "Enter", // 或者点击登录按钮
1458
+ order: 4,
1459
+ description: "提交登录表单"
1460
+ },
1461
+ {
1462
+ type: "wait",
1463
+ evaluate: {
1464
+ code: "new Promise(r => setTimeout(r, 1000))"
1465
+ },
1466
+ order: 5,
1467
+ description: "等待登录完成"
1468
+ }
1469
+ ];
1470
+
1471
+ // 执行时的实际操作(变量替换后)
1472
+ const actualActions = applyVariables(loginActions, {
1473
+ username: "alice",
1474
+ password: "secret123"
1475
+ });
1476
+ // 结果:
1477
+ // [
1478
+ // { type: "navigate", url: "https://example.com/login", order: 1 },
1479
+ // { type: "type", ref: "#username", text: "alice", order: 2 },
1480
+ // { type: "type", ref: "#password", text: "secret123", order: 3 },
1481
+ // { type: "press", key: "Enter", order: 4 },
1482
+ // { type: "wait", evaluate: { code: "..." }, order: 5 }
1483
+ // ]
1484
+ ```
1485
+
1486
+ ---
1487
+
1488
+ ## 七、Hooks 集成要点
1489
+
1490
+ ### 6.1 before_tool_call hook
1491
+
1492
+ ```typescript
1493
+ api.registerHook({
1494
+ name: "before_tool_call",
1495
+ priority: 100,
1496
+ description: "场景识别和缓存拦截器",
1497
+
1498
+ async handler(event, ctx) {
1499
+ if (event.tool.name !== "browser") return;
1500
+
1501
+ const params = event.params;
1502
+ const url = params.url as string;
1503
+ const action = params.action as string;
1504
+
1505
+ // 只缓存特定操作
1506
+ if (!config.cacheActions.includes(action)) return;
1507
+
1508
+ // 场景识别
1509
+ const userInput = extractUserInput(ctx);
1510
+ const currentUrl = url || ctx.currentUrl;
1511
+ const match = await scenarioRecognizer.recognize(userInput, currentUrl);
1512
+
1513
+ if (!match) {
1514
+ // 无匹配,返回 cacheInfo(不拦截)
1515
+ return {
1516
+ block: false,
1517
+ result: {
1518
+ cacheInfo: {
1519
+ exists: false,
1520
+ confidence: 0
1521
+ }
1522
+ }
1523
+ };
1524
+ }
1525
+
1526
+ // 高置信度匹配,返回 cacheInfo(不拦截,让 LLM 决定)
1527
+ if (match.confidence >= 80) {
1528
+ return {
1529
+ block: false,
1530
+ result: {
1531
+ cacheInfo: {
1532
+ exists: match.cacheLevel ? true : false,
1533
+ scenario: match.scenario,
1534
+ confidence: match.confidence,
1535
+ method: match.method,
1536
+ matchedPattern: match.matchedPattern
1537
+ }
1538
+ }
1539
+ };
1540
+ }
1541
+
1542
+ // 检查缓存
1543
+ const viewport = extractViewport(ctx, params);
1544
+ const entry = await cacheStore.get(url, viewport);
1545
+
1546
+ if (entry && entry.cacheLevel === "L3") {
1547
+ // L3 缓存命中,返回 cacheInfo
1548
+ return {
1549
+ block: false,
1550
+ result: {
1551
+ cacheInfo: {
1552
+ exists: true,
1553
+ cacheLevel: "L3",
1554
+ scenario: entry.scenario,
1555
+ actions: entry.actions,
1556
+ variables: entry.variables
1557
+ }
1558
+ }
1559
+ };
1560
+ }
1561
+
1562
+ return { block: false };
1563
+ }
1564
+ });
1565
+ ```
1566
+
1567
+ ### 6.2 after_tool_call hook
1568
+
1569
+ ```typescript
1570
+ api.registerHook({
1571
+ name: "after_tool_call",
1572
+ priority: 100,
1573
+ description: "保存 LLM 分析结果到缓存",
1574
+
1575
+ async handler(event, ctx) {
1576
+ if (event.tool.name !== "browser") return;
1577
+
1578
+ const params = event.params;
1579
+ const url = params.url as string;
1580
+ const action = params.action as string;
1581
+
1582
+ // 只缓存特定操作
1583
+ if (!config.cacheActions.includes(action)) return;
1584
+
1585
+ // 检查 LLM 是否返回了场景类型
1586
+ const result = event.result;
1587
+ const llmScenario = result.scenario as string | undefined;
1588
+
1589
+ if (!llmScenario) {
1590
+ // 没有 LLM 标记的场景,正常保存
1591
+ return;
1592
+ }
1593
+
1594
+ // 识别场景并获取变量
1595
+ const userInput = extractUserInput(ctx);
1596
+ const scenarioMatch = await scenarioRecognizer.recognize(userInput, url);
1597
+
1598
+ // 决定缓存层级
1599
+ const cacheLevel = scenarioMatch.confidence >= 85 ? "L3" :
1600
+ scenarioMatch.confidence >= 70 ? "L2" : "L1";
1601
+
1602
+ // 解析变量
1603
+ const variables = await variableResolver.resolveVariables(userInput, scenarioMatch.scenario);
1604
+
1605
+ // 保存到缓存
1606
+ await cacheStore.set(url, viewport, result.actions, {
1607
+ scenario: scenarioMatch.scenario,
1608
+ cacheLevel,
1609
+ variables,
1610
+ actions: result.actions as AtomicAction[]
1611
+ });
1612
+ }
1613
+ });
1614
+ ```
1615
+
1616
+ ### 6.3 before_prompt_build hook
1617
+
1618
+ ```typescript
1619
+ api.registerHook({
1620
+ name: "before_prompt_build",
1621
+ priority: 50,
1622
+
1623
+ async handler(event, ctx) {
1624
+ if (!config.enabled || config.autoUseCache === false) return;
1625
+
1626
+ const stats = cacheStore.getStats();
1627
+
1628
+ // 只在命中率较高时注入
1629
+ if (stats.hitRate < 30) return;
1630
+
1631
+ // 构建缓存使用指南
1632
+ const guide = `
1633
+ 【页面操作缓存系统】
1634
+
1635
+ 当前缓存统计:
1636
+ - 总条目数:${stats.totalEntries}
1637
+ - 总命中:${stats.totalHits}
1638
+ 命中率:${stats.hitRate}%
1639
+
1640
+ 已缓存的场景:
1641
+ ${cacheStore.getTopScenarios(5)}
1642
+
1643
+ 使用方式:
1644
+ 1. 如果工具返回 cacheInfo 信息,优先使用缓存
1645
+ 2. 执行缓存操作可以大幅降低 token 消耗和操作延迟
1646
+ 3. 如果页面结构变化,缓存会自动失效
1647
+
1648
+ 支持的场景:login, logout, search, checkout, settings, form_fill
1649
+
1650
+ 缓存层级:
1651
+ - L3: 场景级缓存(包含变量模板)- 高置信度时直接使用
1652
+ - L2: 流程级缓存 - 中置信度时使用
1653
+ - L1: 原子级缓存 - 低置信度或无场景时使用
1654
+ `;
1655
+
1656
+ return { prependContext: guide };
1657
+ }
1658
+ });
1659
+ ```
1660
+
1661
+ ---
1662
+
1663
+ ## 八、配置选项
1664
+
1665
+ ```typescript
1666
+ interface CacheConfig {
1667
+ // === 全局开关 ===
1668
+ enabled: boolean;
1669
+ autoUseCache: boolean;
1670
+
1671
+ // === 场景识别 ===
1672
+ scenarioRecognitionEnabled: boolean;
1673
+ llmClassificationThreshold: number; // LLM 分类阈值
1674
+
1675
+ // === 缓存策略 ===
1676
+ cacheLevelStrategy: "auto" | "l3-only" | "l2-only" | "l1-only";
1677
+ defaultCacheLevel: string; // 默认使用的层级
1678
+
1679
+ // === 缓存失效 ===
1680
+ pageChangeDetectionEnabled: boolean;
1681
+ changeInvalidationThreshold: number; // 变化失效阈值
1682
+ invalidationStrategy: "soft" | "hard";
1683
+ maxVersionsPerEntry: number; // 每个缓存保留的最大版本数
1684
+
1685
+ // === 变量系统 ===
1686
+ variableExtractionEnabled: boolean;
1687
+ allowUserConfirmVariables: boolean;
1688
+
1689
+ // === 安全 ===
1690
+ encryptSensitiveCache: boolean;
1691
+ accessControlEnabled: boolean;
1692
+ allowedUserIds: string[];
1693
+ logSanitizationEnabled: boolean;
1694
+
1695
+ // === 用户体验 ===
1696
+ showCacheStatusToUser: boolean;
1697
+ enableUserCacheConfirmation: boolean;
1698
+ enableUserForcedRefresh: boolean;
1699
+ enableUserCacheErrorReport: boolean;
1700
+
1701
+ // === 统计 ===
1702
+ trackExecutionStats: boolean;
1703
+ statsUpdateInterval: number; // 统计更新间隔(秒)
1704
+ }
1705
+ ```
1706
+
1707
+ ---
1708
+
1709
+ ## 九、完整流程图
1710
+
1711
+ ```mermaid
1712
+ graph TD
1713
+ A[用户输入] --> B{场景识别器}
1714
+
1715
+ B -->|精确规则匹配| C{匹配置信度≥90}
1716
+ B -->|LLM 分类| D{匹配置信度≥70}
1717
+ B -->|历史学习| E{匹配置信度≥60}
1718
+ B --> F{无匹配}
1719
+
1720
+ C -->|置信度≥80| G[返回缓存信息]
1721
+ C -->|置信度<80| H[不返回缓存]
1722
+
1723
+ G --> I{LLM 决定}
1724
+
1725
+ I -->| J[调用 execute_cached?}
1726
+
1727
+ J -->|否| K[调用 browser 工具]
1728
+
1729
+ J -->|是| L{验证缓存并检测变化}
1730
+
1731
+ L --> M[执行缓存操作]
1732
+
1733
+ M --> N[记录执行统计]
1734
+
1735
+ N --> O{页面变化?}
1736
+
1737
+ O -->|是| P[失效缓存]
1738
+ O -->|否| Q[保持缓存]
1739
+ ```
1740
+
1741
+ ---
1742
+
1743
+ ## 十、关键问题检查
1744
+
1745
+ ### ✅ 已解决的问题
1746
+
1747
+ | 问题 | 解决方案 | 优先级 |
1748
+ |------|---------|--------|
1749
+ | **变量传递** | 多个解析器支持不同场景 | P0 |
1750
+ | **场景识别** | 多层匹配(规则+LLM+历史) | P0 |
1751
+ | **缓存失效** | DOM hash + 渐进式失效 | P1 |
1752
+ | **并发控制** | 文件锁 + 版本控制 | P1 |
1753
+ | **安全性** | 敏感变量检测 + 加密存储 | P0 |
1754
+ | **用户体验** | 状态展示 + 确认机制 | P1 |
1755
+ | **Playwright 对齐** | 参数名与 Playwright API 完全一致(ref, targetId, cdpUrl 等) | P0 |
1756
+
1757
+ ---
1758
+
1759
+ ## 十一、实现优先级
1760
+
1761
+ ### 阶段 1:核心功能(P0)
1762
+
1763
+ 1. 原子操作缓存(L1 级)
1764
+ 2. 基本场景识别
1765
+ 3. 变量解析器
1766
+ 4. 安全策略
1767
+ 5. Hooks 集成
1768
+
1769
+ ### 阶段 2:场景级缓存(P1)
1770
+
1771
+ 1. 精确规则匹配
1772
+ 2. 场景模板
1773
+ 3. 变量模板
1774
+ 4. 场景学习
1775
+
1776
+ ### 阶段 3:智能失效(P1)
1777
+
1778
+ 1. DOM hash 计算
1779
+ 2. 页面变化检测
1780
+ 3. 渐进式失效
1781
+
1782
+ ### 阶段 4:用户体验增强(P2)
1783
+
1784
+ 1. 缓存状态展示
1785
+ 2. 用户确认机制
1786
+ 3. 强制刷新
1787
+
1788
+ ---
1789
+
1790
+ **文档版本**: 5.0.0
1791
+ **最后更新**: 2026-03-14
1792
+ **更新内容**: 完成代码实现、测试和 Playwright 完全对齐