@predicatelabs/sdk 0.99.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. package/LICENSE +24 -0
  2. package/README.md +252 -0
  3. package/dist/actions.d.ts +185 -0
  4. package/dist/actions.d.ts.map +1 -0
  5. package/dist/actions.js +1120 -0
  6. package/dist/actions.js.map +1 -0
  7. package/dist/agent-runtime.d.ts +352 -0
  8. package/dist/agent-runtime.d.ts.map +1 -0
  9. package/dist/agent-runtime.js +1170 -0
  10. package/dist/agent-runtime.js.map +1 -0
  11. package/dist/agent.d.ts +164 -0
  12. package/dist/agent.d.ts.map +1 -0
  13. package/dist/agent.js +408 -0
  14. package/dist/agent.js.map +1 -0
  15. package/dist/asserts/expect.d.ts +159 -0
  16. package/dist/asserts/expect.d.ts.map +1 -0
  17. package/dist/asserts/expect.js +547 -0
  18. package/dist/asserts/expect.js.map +1 -0
  19. package/dist/asserts/index.d.ts +58 -0
  20. package/dist/asserts/index.d.ts.map +1 -0
  21. package/dist/asserts/index.js +70 -0
  22. package/dist/asserts/index.js.map +1 -0
  23. package/dist/asserts/query.d.ts +199 -0
  24. package/dist/asserts/query.d.ts.map +1 -0
  25. package/dist/asserts/query.js +288 -0
  26. package/dist/asserts/query.js.map +1 -0
  27. package/dist/backends/actions.d.ts +119 -0
  28. package/dist/backends/actions.d.ts.map +1 -0
  29. package/dist/backends/actions.js +291 -0
  30. package/dist/backends/actions.js.map +1 -0
  31. package/dist/backends/browser-use-adapter.d.ts +131 -0
  32. package/dist/backends/browser-use-adapter.d.ts.map +1 -0
  33. package/dist/backends/browser-use-adapter.js +219 -0
  34. package/dist/backends/browser-use-adapter.js.map +1 -0
  35. package/dist/backends/cdp-backend.d.ts +66 -0
  36. package/dist/backends/cdp-backend.d.ts.map +1 -0
  37. package/dist/backends/cdp-backend.js +273 -0
  38. package/dist/backends/cdp-backend.js.map +1 -0
  39. package/dist/backends/index.d.ts +80 -0
  40. package/dist/backends/index.d.ts.map +1 -0
  41. package/dist/backends/index.js +101 -0
  42. package/dist/backends/index.js.map +1 -0
  43. package/dist/backends/protocol.d.ts +156 -0
  44. package/dist/backends/protocol.d.ts.map +1 -0
  45. package/dist/backends/protocol.js +16 -0
  46. package/dist/backends/protocol.js.map +1 -0
  47. package/dist/backends/sentience-context.d.ts +143 -0
  48. package/dist/backends/sentience-context.d.ts.map +1 -0
  49. package/dist/backends/sentience-context.js +359 -0
  50. package/dist/backends/sentience-context.js.map +1 -0
  51. package/dist/backends/snapshot.d.ts +188 -0
  52. package/dist/backends/snapshot.d.ts.map +1 -0
  53. package/dist/backends/snapshot.js +360 -0
  54. package/dist/backends/snapshot.js.map +1 -0
  55. package/dist/browser.d.ts +154 -0
  56. package/dist/browser.d.ts.map +1 -0
  57. package/dist/browser.js +920 -0
  58. package/dist/browser.js.map +1 -0
  59. package/dist/canonicalization.d.ts +126 -0
  60. package/dist/canonicalization.d.ts.map +1 -0
  61. package/dist/canonicalization.js +161 -0
  62. package/dist/canonicalization.js.map +1 -0
  63. package/dist/captcha/strategies.d.ts +12 -0
  64. package/dist/captcha/strategies.d.ts.map +1 -0
  65. package/dist/captcha/strategies.js +43 -0
  66. package/dist/captcha/strategies.js.map +1 -0
  67. package/dist/captcha/types.d.ts +45 -0
  68. package/dist/captcha/types.d.ts.map +1 -0
  69. package/dist/captcha/types.js +12 -0
  70. package/dist/captcha/types.js.map +1 -0
  71. package/dist/cli.d.ts +5 -0
  72. package/dist/cli.d.ts.map +1 -0
  73. package/dist/cli.js +422 -0
  74. package/dist/cli.js.map +1 -0
  75. package/dist/conversational-agent.d.ts +123 -0
  76. package/dist/conversational-agent.d.ts.map +1 -0
  77. package/dist/conversational-agent.js +341 -0
  78. package/dist/conversational-agent.js.map +1 -0
  79. package/dist/cursor-policy.d.ts +41 -0
  80. package/dist/cursor-policy.d.ts.map +1 -0
  81. package/dist/cursor-policy.js +81 -0
  82. package/dist/cursor-policy.js.map +1 -0
  83. package/dist/debugger.d.ts +28 -0
  84. package/dist/debugger.d.ts.map +1 -0
  85. package/dist/debugger.js +107 -0
  86. package/dist/debugger.js.map +1 -0
  87. package/dist/expect.d.ts +16 -0
  88. package/dist/expect.d.ts.map +1 -0
  89. package/dist/expect.js +67 -0
  90. package/dist/expect.js.map +1 -0
  91. package/dist/failure-artifacts.d.ts +95 -0
  92. package/dist/failure-artifacts.d.ts.map +1 -0
  93. package/dist/failure-artifacts.js +805 -0
  94. package/dist/failure-artifacts.js.map +1 -0
  95. package/dist/generator.d.ts +16 -0
  96. package/dist/generator.d.ts.map +1 -0
  97. package/dist/generator.js +205 -0
  98. package/dist/generator.js.map +1 -0
  99. package/dist/index.d.ts +37 -0
  100. package/dist/index.d.ts.map +1 -0
  101. package/dist/index.js +160 -0
  102. package/dist/index.js.map +1 -0
  103. package/dist/inspector.d.ts +13 -0
  104. package/dist/inspector.d.ts.map +1 -0
  105. package/dist/inspector.js +153 -0
  106. package/dist/inspector.js.map +1 -0
  107. package/dist/llm-provider.d.ts +144 -0
  108. package/dist/llm-provider.d.ts.map +1 -0
  109. package/dist/llm-provider.js +460 -0
  110. package/dist/llm-provider.js.map +1 -0
  111. package/dist/ordinal.d.ts +90 -0
  112. package/dist/ordinal.d.ts.map +1 -0
  113. package/dist/ordinal.js +249 -0
  114. package/dist/ordinal.js.map +1 -0
  115. package/dist/overlay.d.ts +63 -0
  116. package/dist/overlay.d.ts.map +1 -0
  117. package/dist/overlay.js +102 -0
  118. package/dist/overlay.js.map +1 -0
  119. package/dist/protocols/browser-protocol.d.ts +79 -0
  120. package/dist/protocols/browser-protocol.d.ts.map +1 -0
  121. package/dist/protocols/browser-protocol.js +9 -0
  122. package/dist/protocols/browser-protocol.js.map +1 -0
  123. package/dist/query.d.ts +66 -0
  124. package/dist/query.d.ts.map +1 -0
  125. package/dist/query.js +482 -0
  126. package/dist/query.js.map +1 -0
  127. package/dist/read.d.ts +47 -0
  128. package/dist/read.d.ts.map +1 -0
  129. package/dist/read.js +128 -0
  130. package/dist/read.js.map +1 -0
  131. package/dist/recorder.d.ts +44 -0
  132. package/dist/recorder.d.ts.map +1 -0
  133. package/dist/recorder.js +262 -0
  134. package/dist/recorder.js.map +1 -0
  135. package/dist/runtime-agent.d.ts +72 -0
  136. package/dist/runtime-agent.d.ts.map +1 -0
  137. package/dist/runtime-agent.js +357 -0
  138. package/dist/runtime-agent.js.map +1 -0
  139. package/dist/screenshot.d.ts +17 -0
  140. package/dist/screenshot.d.ts.map +1 -0
  141. package/dist/screenshot.js +40 -0
  142. package/dist/screenshot.js.map +1 -0
  143. package/dist/snapshot-diff.d.ts +23 -0
  144. package/dist/snapshot-diff.d.ts.map +1 -0
  145. package/dist/snapshot-diff.js +119 -0
  146. package/dist/snapshot-diff.js.map +1 -0
  147. package/dist/snapshot.d.ts +47 -0
  148. package/dist/snapshot.d.ts.map +1 -0
  149. package/dist/snapshot.js +358 -0
  150. package/dist/snapshot.js.map +1 -0
  151. package/dist/textSearch.d.ts +64 -0
  152. package/dist/textSearch.d.ts.map +1 -0
  153. package/dist/textSearch.js +113 -0
  154. package/dist/textSearch.js.map +1 -0
  155. package/dist/tools/context.d.ts +18 -0
  156. package/dist/tools/context.d.ts.map +1 -0
  157. package/dist/tools/context.js +40 -0
  158. package/dist/tools/context.js.map +1 -0
  159. package/dist/tools/defaults.d.ts +5 -0
  160. package/dist/tools/defaults.d.ts.map +1 -0
  161. package/dist/tools/defaults.js +368 -0
  162. package/dist/tools/defaults.js.map +1 -0
  163. package/dist/tools/filesystem.d.ts +12 -0
  164. package/dist/tools/filesystem.d.ts.map +1 -0
  165. package/dist/tools/filesystem.js +137 -0
  166. package/dist/tools/filesystem.js.map +1 -0
  167. package/dist/tools/index.d.ts +5 -0
  168. package/dist/tools/index.d.ts.map +1 -0
  169. package/dist/tools/index.js +15 -0
  170. package/dist/tools/index.js.map +1 -0
  171. package/dist/tools/registry.d.ts +38 -0
  172. package/dist/tools/registry.d.ts.map +1 -0
  173. package/dist/tools/registry.js +100 -0
  174. package/dist/tools/registry.js.map +1 -0
  175. package/dist/tracing/cloud-sink.d.ts +189 -0
  176. package/dist/tracing/cloud-sink.d.ts.map +1 -0
  177. package/dist/tracing/cloud-sink.js +1067 -0
  178. package/dist/tracing/cloud-sink.js.map +1 -0
  179. package/dist/tracing/index-schema.d.ts +231 -0
  180. package/dist/tracing/index-schema.d.ts.map +1 -0
  181. package/dist/tracing/index-schema.js +235 -0
  182. package/dist/tracing/index-schema.js.map +1 -0
  183. package/dist/tracing/index.d.ts +12 -0
  184. package/dist/tracing/index.d.ts.map +1 -0
  185. package/dist/tracing/index.js +28 -0
  186. package/dist/tracing/index.js.map +1 -0
  187. package/dist/tracing/indexer.d.ts +20 -0
  188. package/dist/tracing/indexer.d.ts.map +1 -0
  189. package/dist/tracing/indexer.js +347 -0
  190. package/dist/tracing/indexer.js.map +1 -0
  191. package/dist/tracing/jsonl-sink.d.ts +51 -0
  192. package/dist/tracing/jsonl-sink.d.ts.map +1 -0
  193. package/dist/tracing/jsonl-sink.js +329 -0
  194. package/dist/tracing/jsonl-sink.js.map +1 -0
  195. package/dist/tracing/sink.d.ts +25 -0
  196. package/dist/tracing/sink.d.ts.map +1 -0
  197. package/dist/tracing/sink.js +15 -0
  198. package/dist/tracing/sink.js.map +1 -0
  199. package/dist/tracing/tracer-factory.d.ts +102 -0
  200. package/dist/tracing/tracer-factory.d.ts.map +1 -0
  201. package/dist/tracing/tracer-factory.js +375 -0
  202. package/dist/tracing/tracer-factory.js.map +1 -0
  203. package/dist/tracing/tracer.d.ts +140 -0
  204. package/dist/tracing/tracer.d.ts.map +1 -0
  205. package/dist/tracing/tracer.js +336 -0
  206. package/dist/tracing/tracer.js.map +1 -0
  207. package/dist/tracing/types.d.ts +203 -0
  208. package/dist/tracing/types.d.ts.map +1 -0
  209. package/dist/tracing/types.js +8 -0
  210. package/dist/tracing/types.js.map +1 -0
  211. package/dist/types.d.ts +422 -0
  212. package/dist/types.d.ts.map +1 -0
  213. package/dist/types.js +6 -0
  214. package/dist/types.js.map +1 -0
  215. package/dist/utils/action-executor.d.ts +25 -0
  216. package/dist/utils/action-executor.d.ts.map +1 -0
  217. package/dist/utils/action-executor.js +121 -0
  218. package/dist/utils/action-executor.js.map +1 -0
  219. package/dist/utils/browser-evaluator.d.ts +76 -0
  220. package/dist/utils/browser-evaluator.d.ts.map +1 -0
  221. package/dist/utils/browser-evaluator.js +130 -0
  222. package/dist/utils/browser-evaluator.js.map +1 -0
  223. package/dist/utils/browser.d.ts +30 -0
  224. package/dist/utils/browser.d.ts.map +1 -0
  225. package/dist/utils/browser.js +75 -0
  226. package/dist/utils/browser.js.map +1 -0
  227. package/dist/utils/element-filter.d.ts +76 -0
  228. package/dist/utils/element-filter.d.ts.map +1 -0
  229. package/dist/utils/element-filter.js +195 -0
  230. package/dist/utils/element-filter.js.map +1 -0
  231. package/dist/utils/grid-utils.d.ts +37 -0
  232. package/dist/utils/grid-utils.d.ts.map +1 -0
  233. package/dist/utils/grid-utils.js +283 -0
  234. package/dist/utils/grid-utils.js.map +1 -0
  235. package/dist/utils/llm-interaction-handler.d.ts +41 -0
  236. package/dist/utils/llm-interaction-handler.d.ts.map +1 -0
  237. package/dist/utils/llm-interaction-handler.js +171 -0
  238. package/dist/utils/llm-interaction-handler.js.map +1 -0
  239. package/dist/utils/llm-response-builder.d.ts +56 -0
  240. package/dist/utils/llm-response-builder.d.ts.map +1 -0
  241. package/dist/utils/llm-response-builder.js +130 -0
  242. package/dist/utils/llm-response-builder.js.map +1 -0
  243. package/dist/utils/selector-utils.d.ts +12 -0
  244. package/dist/utils/selector-utils.d.ts.map +1 -0
  245. package/dist/utils/selector-utils.js +32 -0
  246. package/dist/utils/selector-utils.js.map +1 -0
  247. package/dist/utils/snapshot-event-builder.d.ts +28 -0
  248. package/dist/utils/snapshot-event-builder.d.ts.map +1 -0
  249. package/dist/utils/snapshot-event-builder.js +88 -0
  250. package/dist/utils/snapshot-event-builder.js.map +1 -0
  251. package/dist/utils/snapshot-processor.d.ts +27 -0
  252. package/dist/utils/snapshot-processor.d.ts.map +1 -0
  253. package/dist/utils/snapshot-processor.js +47 -0
  254. package/dist/utils/snapshot-processor.js.map +1 -0
  255. package/dist/utils/trace-event-builder.d.ts +122 -0
  256. package/dist/utils/trace-event-builder.d.ts.map +1 -0
  257. package/dist/utils/trace-event-builder.js +365 -0
  258. package/dist/utils/trace-event-builder.js.map +1 -0
  259. package/dist/utils/trace-file-manager.d.ts +70 -0
  260. package/dist/utils/trace-file-manager.d.ts.map +1 -0
  261. package/dist/utils/trace-file-manager.js +194 -0
  262. package/dist/utils/trace-file-manager.js.map +1 -0
  263. package/dist/utils/zod.d.ts +5 -0
  264. package/dist/utils/zod.d.ts.map +1 -0
  265. package/dist/utils/zod.js +80 -0
  266. package/dist/utils/zod.js.map +1 -0
  267. package/dist/utils.d.ts +8 -0
  268. package/dist/utils.d.ts.map +1 -0
  269. package/dist/utils.js +13 -0
  270. package/dist/utils.js.map +1 -0
  271. package/dist/verification.d.ts +194 -0
  272. package/dist/verification.d.ts.map +1 -0
  273. package/dist/verification.js +530 -0
  274. package/dist/verification.js.map +1 -0
  275. package/dist/vision-executor.d.ts +18 -0
  276. package/dist/vision-executor.d.ts.map +1 -0
  277. package/dist/vision-executor.js +60 -0
  278. package/dist/vision-executor.js.map +1 -0
  279. package/dist/visual-agent.d.ts +120 -0
  280. package/dist/visual-agent.d.ts.map +1 -0
  281. package/dist/visual-agent.js +796 -0
  282. package/dist/visual-agent.js.map +1 -0
  283. package/dist/wait.d.ts +35 -0
  284. package/dist/wait.d.ts.map +1 -0
  285. package/dist/wait.js +76 -0
  286. package/dist/wait.js.map +1 -0
  287. package/package.json +94 -0
  288. package/spec/README.md +72 -0
  289. package/spec/SNAPSHOT_V1.md +208 -0
  290. package/spec/sdk-types.md +259 -0
  291. package/spec/snapshot.schema.json +148 -0
  292. package/src/extension/background.js +104 -0
  293. package/src/extension/content.js +162 -0
  294. package/src/extension/injected_api.js +1399 -0
  295. package/src/extension/manifest.json +36 -0
  296. package/src/extension/pkg/README.md +1340 -0
  297. package/src/extension/pkg/package.json +15 -0
  298. package/src/extension/pkg/sentience_core.d.ts +51 -0
  299. package/src/extension/pkg/sentience_core.js +371 -0
  300. package/src/extension/pkg/sentience_core_bg.wasm +0 -0
  301. package/src/extension/pkg/sentience_core_bg.wasm.d.ts +10 -0
  302. package/src/extension/release.json +116 -0
@@ -0,0 +1,796 @@
1
+ "use strict";
2
+ /**
3
+ * Visual Agent - Uses labeled screenshots with vision-capable LLMs
4
+ *
5
+ * This agent extends SentienceAgent to use visual prompts:
6
+ * 1. Takes snapshot with screenshot enabled
7
+ * 2. Draws bounding boxes and labels element IDs on the screenshot
8
+ * 3. Uses anti-collision algorithm to position labels (4 sides + 4 corners)
9
+ * 4. Sends labeled screenshot to vision-capable LLM
10
+ * 5. Extracts element ID from LLM response
11
+ * 6. Clicks the element using click()
12
+ *
13
+ * Dependencies:
14
+ * - sharp: Required for image processing
15
+ * Install with: npm install sharp
16
+ * - canvas: Required for drawing on images
17
+ * Install with: npm install canvas
18
+ * - Vision-capable LLM: Requires an LLM provider that supports vision (e.g., GPT-4o, Claude 3)
19
+ */
20
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
21
+ if (k2 === undefined) k2 = k;
22
+ var desc = Object.getOwnPropertyDescriptor(m, k);
23
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
24
+ desc = { enumerable: true, get: function() { return m[k]; } };
25
+ }
26
+ Object.defineProperty(o, k2, desc);
27
+ }) : (function(o, m, k, k2) {
28
+ if (k2 === undefined) k2 = k;
29
+ o[k2] = m[k];
30
+ }));
31
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
32
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
33
+ }) : function(o, v) {
34
+ o["default"] = v;
35
+ });
36
+ var __importStar = (this && this.__importStar) || (function () {
37
+ var ownKeys = function(o) {
38
+ ownKeys = Object.getOwnPropertyNames || function (o) {
39
+ var ar = [];
40
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
41
+ return ar;
42
+ };
43
+ return ownKeys(o);
44
+ };
45
+ return function (mod) {
46
+ if (mod && mod.__esModule) return mod;
47
+ var result = {};
48
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
49
+ __setModuleDefault(result, mod);
50
+ return result;
51
+ };
52
+ })();
53
+ Object.defineProperty(exports, "__esModule", { value: true });
54
+ exports.PredicateVisualAgent = exports.SentienceVisualAgent = void 0;
55
+ const snapshot_1 = require("./snapshot");
56
+ const crypto_1 = require("crypto");
57
+ const trace_event_builder_1 = require("./utils/trace-event-builder");
58
+ const snapshot_event_builder_1 = require("./utils/snapshot-event-builder");
59
+ const snapshot_processor_1 = require("./utils/snapshot-processor");
60
+ const actions_1 = require("./actions");
61
+ const agent_1 = require("./agent");
62
+ const path = __importStar(require("path"));
63
+ const fs = __importStar(require("fs"));
64
+ const uuid_1 = require("uuid");
65
+ // Check for required image processing libraries
66
+ let sharp;
67
+ let canvas;
68
+ let CANVAS_AVAILABLE = false;
69
+ let SHARP_AVAILABLE = false;
70
+ try {
71
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
72
+ sharp = require('sharp');
73
+ SHARP_AVAILABLE = true;
74
+ }
75
+ catch {
76
+ sharp = undefined;
77
+ console.warn('⚠️ Warning: sharp not available. Install with: npm install sharp');
78
+ }
79
+ try {
80
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
81
+ canvas = require('canvas');
82
+ CANVAS_AVAILABLE = true;
83
+ }
84
+ catch {
85
+ canvas = undefined;
86
+ console.warn('⚠️ Warning: canvas not available. Install with: npm install canvas');
87
+ }
88
+ /**
89
+ * Visual agent that uses labeled screenshots with vision-capable LLMs.
90
+ *
91
+ * Extends SentienceAgent to override act() method with visual prompting.
92
+ *
93
+ * Requirements:
94
+ * - sharp: Required for image processing
95
+ * Install with: npm install sharp
96
+ * - canvas: Required for drawing on images
97
+ * Install with: npm install canvas
98
+ * - Vision-capable LLM: Requires an LLM provider that supports vision (e.g., GPT-4o, Claude 3)
99
+ */
100
+ class SentienceVisualAgent extends agent_1.SentienceAgent {
101
+ /**
102
+ * Initialize Visual Agent
103
+ *
104
+ * @param browser - SentienceBrowser instance
105
+ * @param llm - LLM provider (must support vision, e.g., GPT-4o, Claude 3)
106
+ * @param snapshotLimit - Default maximum elements to include
107
+ * @param verbose - Print execution logs
108
+ * @param tracer - Optional Tracer instance
109
+ * @param showOverlay - Show green bbox overlay in browser
110
+ */
111
+ constructor(browser, llm, snapshotLimit = 50, verbose = true, tracer, showOverlay = false) {
112
+ super(browser, llm, snapshotLimit, verbose, tracer, showOverlay);
113
+ if (!SHARP_AVAILABLE || !CANVAS_AVAILABLE) {
114
+ throw new Error('sharp and canvas are required for SentienceVisualAgent. ' +
115
+ 'Install with: npm install sharp canvas');
116
+ }
117
+ // Track previous snapshot for diff computation (stored in base class)
118
+ }
119
+ /**
120
+ * Decode base64 screenshot data URL to image buffer
121
+ *
122
+ * @param screenshotDataUrl - Base64-encoded data URL (e.g., "data:image/png;base64,...")
123
+ * @returns Image buffer
124
+ */
125
+ decodeScreenshot(screenshotDataUrl) {
126
+ // Extract base64 data from data URL
127
+ if (screenshotDataUrl.startsWith('data:image/')) {
128
+ // Format: "data:image/png;base64,<base64_data>"
129
+ const base64Data = screenshotDataUrl.split(',', 2)[1];
130
+ return Buffer.from(base64Data, 'base64');
131
+ }
132
+ else {
133
+ // Assume it's already base64
134
+ return Buffer.from(screenshotDataUrl, 'base64');
135
+ }
136
+ }
137
+ /**
138
+ * Find best position for label using anti-collision algorithm.
139
+ *
140
+ * Tries 8 positions: 4 sides (top, bottom, left, right) + 4 corners.
141
+ * Returns the first position that doesn't collide with existing labels.
142
+ *
143
+ * @param elementBbox - Element bounding box {x, y, width, height}
144
+ * @param existingLabels - List of existing label bounding boxes
145
+ * @param imageWidth - Image width in pixels
146
+ * @param imageHeight - Image height in pixels
147
+ * @param labelWidth - Label width in pixels
148
+ * @param labelHeight - Label height in pixels
149
+ * @returns (x, y) position for label
150
+ */
151
+ findLabelPosition(elementBbox, existingLabels, imageWidth, imageHeight, labelWidth, labelHeight) {
152
+ const { x, y, width, height } = elementBbox;
153
+ // Offset from element edge
154
+ const labelOffset = 15; // Increased from 5px for better separation
155
+ // Try 8 positions: top, bottom, left, right, top-left, top-right, bottom-left, bottom-right
156
+ const positions = [
157
+ [Math.floor(x + width / 2 - labelWidth / 2), Math.floor(y - labelHeight - labelOffset)], // Top
158
+ [Math.floor(x + width / 2 - labelWidth / 2), Math.floor(y + height + labelOffset)], // Bottom
159
+ [Math.floor(x - labelWidth - labelOffset), Math.floor(y + height / 2 - labelHeight / 2)], // Left
160
+ [Math.floor(x + width + labelOffset), Math.floor(y + height / 2 - labelHeight / 2)], // Right
161
+ [Math.floor(x - labelWidth - labelOffset), Math.floor(y - labelHeight - labelOffset)], // Top-left
162
+ [Math.floor(x + width + labelOffset), Math.floor(y - labelHeight - labelOffset)], // Top-right
163
+ [Math.floor(x - labelWidth - labelOffset), Math.floor(y + height + labelOffset)], // Bottom-left
164
+ [Math.floor(x + width + labelOffset), Math.floor(y + height + labelOffset)], // Bottom-right
165
+ ];
166
+ // Check each position for collisions
167
+ for (const [posX, posY] of positions) {
168
+ // Check bounds
169
+ if (posX < 0 ||
170
+ posY < 0 ||
171
+ posX + labelWidth > imageWidth ||
172
+ posY + labelHeight > imageHeight) {
173
+ continue;
174
+ }
175
+ // Check collision with existing labels
176
+ const labelBbox = {
177
+ x: posX,
178
+ y: posY,
179
+ width: labelWidth,
180
+ height: labelHeight,
181
+ };
182
+ let collision = false;
183
+ for (const existing of existingLabels) {
184
+ // Simple AABB collision detection
185
+ if (!(labelBbox.x + labelBbox.width < existing.x ||
186
+ labelBbox.x > existing.x + existing.width ||
187
+ labelBbox.y + labelBbox.height < existing.y ||
188
+ labelBbox.y > existing.y + existing.height)) {
189
+ collision = true;
190
+ break;
191
+ }
192
+ }
193
+ if (!collision) {
194
+ return [posX, posY];
195
+ }
196
+ }
197
+ // If all positions collide, use top position with increased offset
198
+ return [
199
+ Math.floor(x + width / 2 - labelWidth / 2),
200
+ Math.floor(y - labelHeight - labelOffset * 2),
201
+ ];
202
+ }
203
+ /**
204
+ * Draw labeled screenshot with bounding boxes and element IDs.
205
+ *
206
+ * @param snapshot - Snapshot with screenshot data
207
+ * @param elements - List of elements to label
208
+ * @returns Image buffer with labels drawn
209
+ */
210
+ async drawLabeledScreenshot(snapshot, elements) {
211
+ if (!snapshot.screenshot) {
212
+ throw new Error('Screenshot not available in snapshot');
213
+ }
214
+ // Decode screenshot
215
+ const imageBuffer = this.decodeScreenshot(snapshot.screenshot);
216
+ if (!sharp) {
217
+ throw new Error('sharp is not available. Install with: npm install sharp');
218
+ }
219
+ const img = await sharp(imageBuffer);
220
+ const metadata = await img.metadata();
221
+ const imageWidth = metadata.width || 0;
222
+ const imageHeight = metadata.height || 0;
223
+ // Create canvas for drawing
224
+ if (!canvas) {
225
+ throw new Error('canvas is not available. Install with: npm install canvas');
226
+ }
227
+ const { createCanvas, loadImage } = canvas;
228
+ const canvasElement = createCanvas(imageWidth, imageHeight);
229
+ const ctx = canvasElement.getContext('2d');
230
+ // Draw original image on canvas
231
+ const image = await loadImage(imageBuffer);
232
+ ctx.drawImage(image, 0, 0);
233
+ // Load font (fallback to default if not available)
234
+ let font = '16px Arial';
235
+ try {
236
+ // Try to use system font
237
+ font = '16px Helvetica';
238
+ }
239
+ catch {
240
+ // Use default
241
+ font = '16px Arial';
242
+ }
243
+ const existingLabels = [];
244
+ // Neon green color: #39FF14 (bright, vibrant green)
245
+ const neonGreen = '#39FF14';
246
+ // Draw bounding boxes and labels for each element
247
+ for (const element of elements) {
248
+ const bbox = element.bbox;
249
+ const x = bbox.x;
250
+ const y = bbox.y;
251
+ const width = bbox.width;
252
+ const height = bbox.height;
253
+ // Draw bounding box rectangle (neon green with 2px width)
254
+ ctx.strokeStyle = neonGreen;
255
+ ctx.lineWidth = 2;
256
+ ctx.strokeRect(x, y, width, height);
257
+ // Prepare label text (just the number - keep it simple and compact)
258
+ const labelText = String(element.id);
259
+ // Measure label text size
260
+ ctx.font = font;
261
+ const textMetrics = ctx.measureText(labelText);
262
+ const labelWidth = textMetrics.width;
263
+ const labelHeight = 16; // Approximate height for 16px font
264
+ // Find best position for label (anti-collision)
265
+ const [labelX, labelY] = this.findLabelPosition({ x, y, width, height }, existingLabels, imageWidth, imageHeight, labelWidth + 8, // Add padding
266
+ labelHeight + 4 // Add padding
267
+ );
268
+ // Calculate connection points for a clearer visual link
269
+ const elementCenterX = x + width / 2;
270
+ const elementCenterY = y + height / 2;
271
+ const labelCenterX = labelX + labelWidth / 2;
272
+ const labelCenterY = labelY + labelHeight / 2;
273
+ // Determine which edge of the element is closest to the label
274
+ const distTop = Math.abs(labelCenterY - y);
275
+ const distBottom = Math.abs(labelCenterY - (y + height));
276
+ const distLeft = Math.abs(labelCenterX - x);
277
+ const distRight = Math.abs(labelCenterX - (x + width));
278
+ const minDist = Math.min(distTop, distBottom, distLeft, distRight);
279
+ let lineStart;
280
+ if (minDist === distTop) {
281
+ lineStart = [elementCenterX, y];
282
+ }
283
+ else if (minDist === distBottom) {
284
+ lineStart = [elementCenterX, y + height];
285
+ }
286
+ else if (minDist === distLeft) {
287
+ lineStart = [x, elementCenterY];
288
+ }
289
+ else {
290
+ lineStart = [x + width, elementCenterY];
291
+ }
292
+ // Draw connecting line from element edge to label
293
+ ctx.strokeStyle = neonGreen;
294
+ ctx.lineWidth = 2;
295
+ ctx.beginPath();
296
+ ctx.moveTo(lineStart[0], lineStart[1]);
297
+ ctx.lineTo(labelCenterX, labelCenterY);
298
+ ctx.stroke();
299
+ // Draw label background (white with neon green border)
300
+ const labelBgX1 = labelX - 4;
301
+ const labelBgY1 = labelY - 2;
302
+ const labelBgX2 = labelX + labelWidth + 4;
303
+ const labelBgY2 = labelY + labelHeight + 2;
304
+ // Draw white background
305
+ ctx.fillStyle = 'white';
306
+ ctx.fillRect(labelBgX1, labelBgY1, labelBgX2 - labelBgX1, labelBgY2 - labelBgY1);
307
+ // Draw neon green border
308
+ ctx.strokeStyle = neonGreen;
309
+ ctx.lineWidth = 2;
310
+ ctx.strokeRect(labelBgX1, labelBgY1, labelBgX2 - labelBgX1, labelBgY2 - labelBgY1);
311
+ // Draw label text (black for high contrast)
312
+ ctx.fillStyle = 'black';
313
+ ctx.font = font;
314
+ ctx.fillText(labelText, labelX, labelY + labelHeight);
315
+ // Record label position for collision detection
316
+ existingLabels.push({
317
+ x: labelBgX1,
318
+ y: labelBgY1,
319
+ width: labelBgX2 - labelBgX1,
320
+ height: labelBgY2 - labelBgY1,
321
+ });
322
+ }
323
+ // Convert canvas to buffer
324
+ return canvasElement.toBuffer('image/png');
325
+ }
326
+ /**
327
+ * Encode image buffer to base64 data URL with size optimization.
328
+ *
329
+ * Vision LLM APIs typically have size limits (e.g., 20MB for OpenAI).
330
+ * This function automatically compresses images if they're too large.
331
+ *
332
+ * @param imageBuffer - Image buffer
333
+ * @param format - Image format ('PNG' or 'JPEG')
334
+ * @param maxSizeMb - Maximum size in MB before compression (default: 20MB)
335
+ * @returns Base64-encoded data URL
336
+ */
337
+ async encodeImageToBase64(imageBuffer, format = 'PNG', maxSizeMb = 20.0) {
338
+ if (!sharp) {
339
+ throw new Error('sharp is not available. Install with: npm install sharp');
340
+ }
341
+ let quality = 95; // Start with high quality
342
+ let outputBuffer = imageBuffer;
343
+ // Try to fit within size limit
344
+ for (let attempt = 0; attempt < 3; attempt++) {
345
+ if (format === 'JPEG') {
346
+ outputBuffer = await sharp(imageBuffer).jpeg({ quality, mozjpeg: true }).toBuffer();
347
+ }
348
+ else {
349
+ outputBuffer = await sharp(imageBuffer).png({ compressionLevel: 9 }).toBuffer();
350
+ }
351
+ const sizeMb = outputBuffer.length / (1024 * 1024);
352
+ if (sizeMb <= maxSizeMb) {
353
+ break;
354
+ }
355
+ // Reduce quality for next attempt
356
+ quality = Math.max(70, quality - 15);
357
+ if (this.verbose && attempt === 0) {
358
+ console.log(` ⚠️ Image size ${sizeMb.toFixed(2)}MB exceeds limit, compressing...`);
359
+ }
360
+ }
361
+ const finalSizeMb = outputBuffer.length / (1024 * 1024);
362
+ if (this.verbose) {
363
+ console.log(` 📸 Image encoded: ${finalSizeMb.toFixed(2)}MB (${outputBuffer.length} bytes)`);
364
+ }
365
+ const base64Data = outputBuffer.toString('base64');
366
+ const mimeType = format === 'PNG' ? 'image/png' : 'image/jpeg';
367
+ return `data:${mimeType};base64,${base64Data}`;
368
+ }
369
+ /**
370
+ * Query LLM with vision (labeled screenshot).
371
+ *
372
+ * @param imageDataUrl - Base64-encoded image data URL
373
+ * @param goal - User's goal/task
374
+ * @returns LLMResponse with element ID
375
+ */
376
+ async queryLLMWithVision(imageDataUrl, goal) {
377
+ const systemPrompt = `You are a web automation assistant. You will see a screenshot of a web page with labeled element IDs.
378
+ Each clickable element has:
379
+ - A bright neon green (#39FF14) bounding box around the element
380
+ - A white label box with a number (the element ID) connected by a green line
381
+ - The label is clearly separate from the element (not part of the UI)
382
+
383
+ CRITICAL INSTRUCTIONS:
384
+ 1. Look at the screenshot carefully
385
+ 2. Find the element that matches the user's goal (ignore the white label boxes - they are annotations, not UI elements)
386
+ 3. Follow the green line from that element to find its label box with the ID number
387
+ 4. Respond with ONLY that integer ID number (e.g., "42" or "1567")
388
+ 5. Do NOT include any explanation, reasoning, or other text
389
+ 6. Do NOT say "element 1" or "the first element" - just return the number
390
+ 7. Do NOT confuse the white label box with an interactive element - labels are annotations connected by green lines
391
+
392
+ Example responses:
393
+ - Correct: "42"
394
+ - Correct: "1567"
395
+ - Wrong: "I see element 42"
396
+ - Wrong: "The element ID is 42"
397
+ - Wrong: "42 (the search box)"`;
398
+ const userPrompt = `Goal: ${goal}
399
+
400
+ Look at the screenshot. Each element has a neon green bounding box with a white label showing its ID number.
401
+ Find the element that should be clicked to accomplish this goal.
402
+ Return ONLY the integer ID number from the label, nothing else.`;
403
+ // Check if LLM provider supports vision (OpenAI GPT-4o, Claude, etc.)
404
+ // For now, we'll use a fallback approach - try to pass image via the generate method
405
+ // Individual LLM providers should implement vision support in their generate methods
406
+ try {
407
+ // Try to use vision API if available
408
+ // This is a placeholder - actual implementation depends on LLM provider
409
+ const response = await this.llm.generate(systemPrompt, userPrompt, {
410
+ image: imageDataUrl,
411
+ temperature: 0.0,
412
+ });
413
+ return response;
414
+ }
415
+ catch {
416
+ // Fallback: Try to pass image via text description
417
+ const fallbackPrompt = `${userPrompt}\n\n[Image data: ${imageDataUrl.substring(0, 200)}...]`;
418
+ const fallbackResponse = await this.llm.generate(systemPrompt, fallbackPrompt, {
419
+ temperature: 0.0,
420
+ });
421
+ if (this.verbose) {
422
+ console.log(' ⚠️ Using fallback method (may not support vision)');
423
+ }
424
+ return fallbackResponse;
425
+ }
426
+ }
427
+ /**
428
+ * Extract element ID integer from LLM response.
429
+ *
430
+ * @param llmResponse - LLM response text
431
+ * @returns Element ID as integer, or undefined if not found
432
+ */
433
+ extractElementId(llmResponse) {
434
+ if (this.verbose) {
435
+ console.log(`🔍 Raw LLM response: ${JSON.stringify(llmResponse)}`);
436
+ }
437
+ // Clean the response - remove leading/trailing whitespace
438
+ let cleaned = llmResponse.trim();
439
+ if (this.verbose) {
440
+ console.log(` 🧹 After strip: ${JSON.stringify(cleaned)}`);
441
+ }
442
+ // Remove common prefixes that LLMs might add
443
+ const prefixesToRemove = [
444
+ 'element',
445
+ 'id',
446
+ 'the element',
447
+ 'element id',
448
+ 'the id',
449
+ 'click',
450
+ 'click on',
451
+ 'select',
452
+ 'choose',
453
+ ];
454
+ for (const prefix of prefixesToRemove) {
455
+ if (cleaned.toLowerCase().startsWith(prefix)) {
456
+ cleaned = cleaned.substring(prefix.length).trim();
457
+ // Remove any remaining punctuation
458
+ cleaned = cleaned.replace(/^[:.,;!?()[\]{}]+/, '').trim();
459
+ if (this.verbose) {
460
+ console.log(` 🧹 After removing prefix '${prefix}': ${JSON.stringify(cleaned)}`);
461
+ }
462
+ }
463
+ }
464
+ // Try to find all integers in the cleaned response
465
+ const numbers = cleaned.match(/\d+/g);
466
+ if (this.verbose) {
467
+ console.log(` 🔢 Numbers found: ${numbers}`);
468
+ }
469
+ if (numbers && numbers.length > 0) {
470
+ // If multiple numbers found, prefer the largest one (likely the actual element ID)
471
+ // Element IDs are typically larger numbers, not small ones like "1"
472
+ try {
473
+ const intNumbers = numbers.map(n => parseInt(n, 10));
474
+ if (this.verbose) {
475
+ console.log(` 🔢 As integers: ${intNumbers}`);
476
+ }
477
+ // Prefer larger numbers (element IDs are usually > 10)
478
+ // But if only small numbers exist, use the first one
479
+ const largeNumbers = intNumbers.filter(n => n > 10);
480
+ let elementId;
481
+ if (largeNumbers.length > 0) {
482
+ elementId = Math.max(...largeNumbers); // Take the largest
483
+ if (this.verbose) {
484
+ console.log(` ✅ Selected largest number > 10: ${elementId}`);
485
+ }
486
+ }
487
+ else {
488
+ elementId = intNumbers[0]; // Fallback to first if all are small
489
+ if (this.verbose) {
490
+ console.log(` ⚠️ All numbers ≤ 10, using first: ${elementId}`);
491
+ }
492
+ }
493
+ if (this.verbose) {
494
+ console.log(`✅ Extracted element ID: ${elementId} (from ${numbers})`);
495
+ }
496
+ return elementId;
497
+ }
498
+ catch {
499
+ if (this.verbose) {
500
+ console.log(' ❌ Failed to convert numbers to integers');
501
+ }
502
+ }
503
+ }
504
+ if (this.verbose) {
505
+ console.log(`⚠️ Could not extract element ID from response: ${llmResponse}`);
506
+ }
507
+ return undefined;
508
+ }
509
+ /**
510
+ * Override act() method to use visual prompting with full tracing support.
511
+ *
512
+ * @param goal - User's goal/task
513
+ * @param maxRetries - Maximum retry attempts
514
+ * @param snapshotOptions - Optional snapshot options (screenshot will be enabled)
515
+ * @returns AgentActResult
516
+ */
517
+ async act(goal, _maxRetries = 2, snapshotOptions) {
518
+ if (this.verbose) {
519
+ console.log('\n' + '='.repeat(70));
520
+ console.log(`🤖 Visual Agent Goal: ${goal}`);
521
+ console.log('='.repeat(70));
522
+ }
523
+ // Increment step counter and generate step ID
524
+ const stepCount = this.stepCount + 1;
525
+ this.stepCount = stepCount;
526
+ const stepId = (0, crypto_1.randomUUID)();
527
+ // Emit step_start event
528
+ const tracer = this.tracer;
529
+ if (tracer) {
530
+ const page = this.browser.getPage();
531
+ const currentUrl = page ? page.url() : 'unknown';
532
+ tracer.emitStepStart(stepId, stepCount, goal, 0, currentUrl);
533
+ }
534
+ const startTime = Date.now();
535
+ // Track data collected during step execution for step_end emission on failure
536
+ let stepSnapWithDiff = null;
537
+ let stepPreUrl = null;
538
+ let stepLlmResponse = null;
539
+ try {
540
+ // Ensure screenshot is enabled
541
+ const snapOpts = {
542
+ ...snapshotOptions,
543
+ screenshot: snapshotOptions?.screenshot ?? true,
544
+ goal: snapshotOptions?.goal ?? goal,
545
+ limit: snapshotOptions?.limit || this.snapshotLimit,
546
+ };
547
+ if (this.verbose) {
548
+ console.log(`🎯 Goal: ${goal}`);
549
+ console.log('📸 Taking snapshot with screenshot...');
550
+ }
551
+ // 1. Take snapshot with screenshot
552
+ const snap = await (0, snapshot_1.snapshot)(this.browser, snapOpts);
553
+ if (snap.status !== 'success') {
554
+ throw new Error(`Snapshot failed: ${snap.error}`);
555
+ }
556
+ if (!snap.screenshot) {
557
+ throw new Error('Screenshot not available in snapshot');
558
+ }
559
+ // Process snapshot: compute diff status and filter elements
560
+ const processed = snapshot_processor_1.SnapshotProcessor.process(snap, this.previousSnapshot, goal, this.snapshotLimit);
561
+ // Update previous snapshot for next comparison
562
+ this.previousSnapshot = snap;
563
+ const snapWithDiff = processed.withDiff;
564
+ // Track for step_end emission on failure
565
+ stepSnapWithDiff = snapWithDiff;
566
+ stepPreUrl = snap.url;
567
+ // Emit snapshot event
568
+ if (tracer) {
569
+ const snapshotData = snapshot_event_builder_1.SnapshotEventBuilder.buildSnapshotEventData(snapWithDiff, stepId);
570
+ tracer.emit('snapshot', snapshotData, stepId);
571
+ }
572
+ if (this.verbose) {
573
+ console.log(`✅ Snapshot taken: ${snap.elements.length} elements`);
574
+ }
575
+ // 2. Draw labeled screenshot
576
+ if (this.verbose) {
577
+ console.log('🎨 Drawing bounding boxes and labels...');
578
+ console.log(` Elements to label: ${snap.elements.length}`);
579
+ if (snap.elements.length > 0) {
580
+ const elementIds = snap.elements.slice(0, 10).map(el => el.id); // Show first 10
581
+ console.log(` Sample element IDs: ${elementIds}`);
582
+ }
583
+ }
584
+ const labeledImageBuffer = await this.drawLabeledScreenshot(snap, snap.elements);
585
+ // Save labeled image to disk for debugging
586
+ try {
587
+ const cwd = process.cwd();
588
+ let playgroundPath;
589
+ // Check if current working directory contains playground
590
+ if (fs.existsSync(path.join(cwd, 'playground'))) {
591
+ playgroundPath = path.join(cwd, 'playground', 'images');
592
+ }
593
+ else {
594
+ // Check if we're in a playground context via module path
595
+ const modulePaths = require.resolve.paths('@predicatelabs/sdk') || [];
596
+ for (const modulePath of modulePaths) {
597
+ const potentialPlayground = path.join(modulePath, '..', 'playground', 'images');
598
+ if (fs.existsSync(path.dirname(potentialPlayground))) {
599
+ playgroundPath = potentialPlayground;
600
+ break;
601
+ }
602
+ }
603
+ }
604
+ if (!playgroundPath) {
605
+ // Fallback: use current working directory
606
+ playgroundPath = path.join(cwd, 'playground', 'images');
607
+ }
608
+ const imagesDir = playgroundPath;
609
+ if (!fs.existsSync(imagesDir)) {
610
+ fs.mkdirSync(imagesDir, { recursive: true });
611
+ }
612
+ const imageUuid = (0, uuid_1.v4)();
613
+ const imageFilename = `labeled_screenshot_${imageUuid}.png`;
614
+ const imagePath = path.join(imagesDir, imageFilename);
615
+ fs.writeFileSync(imagePath, labeledImageBuffer);
616
+ if (this.verbose) {
617
+ console.log(` 💾 Saved labeled screenshot: ${path.resolve(imagePath)}`);
618
+ }
619
+ }
620
+ catch (saveError) {
621
+ // Don't fail if image save fails - it's just for debugging
622
+ if (this.verbose) {
623
+ console.log(` ⚠️ Could not save labeled screenshot: ${saveError.message}`);
624
+ }
625
+ }
626
+ // Use JPEG for better compression (smaller file size for vision APIs)
627
+ const labeledImageDataUrl = await this.encodeImageToBase64(labeledImageBuffer, 'JPEG', 20.0);
628
+ // 3. Query LLM with vision
629
+ if (this.verbose) {
630
+ console.log('🧠 Querying LLM with labeled screenshot...');
631
+ }
632
+ const llmResponse = await this.queryLLMWithVision(labeledImageDataUrl, goal);
633
+ // Track for step_end emission on failure
634
+ stepLlmResponse = llmResponse;
635
+ // Emit LLM query event
636
+ if (tracer) {
637
+ tracer.emit('llm_query', {
638
+ prompt_tokens: llmResponse.promptTokens,
639
+ completion_tokens: llmResponse.completionTokens,
640
+ model: llmResponse.modelName,
641
+ response_text: llmResponse.content.substring(0, 200), // Truncate for brevity
642
+ }, stepId);
643
+ }
644
+ if (this.verbose) {
645
+ console.log(`💭 LLM Response: ${llmResponse.content}`);
646
+ }
647
+ // Track token usage
648
+ this.trackTokens(goal, llmResponse);
649
+ // 4. Extract element ID
650
+ const elementId = this.extractElementId(llmResponse.content);
651
+ if (elementId === undefined) {
652
+ throw new Error(`Could not extract element ID from LLM response: ${llmResponse.content}`);
653
+ }
654
+ if (this.verbose) {
655
+ console.log(`🎯 Extracted Element ID: ${elementId}`);
656
+ }
657
+ // 5. Click the element
658
+ if (this.verbose) {
659
+ console.log(`🖱️ Clicking element ${elementId}...`);
660
+ }
661
+ const clickResult = await (0, actions_1.click)(this.browser, elementId);
662
+ const durationMs = Date.now() - startTime;
663
+ // Create AgentActResult from click result
664
+ const result = {
665
+ success: clickResult.success,
666
+ action: 'click',
667
+ goal,
668
+ durationMs,
669
+ attempt: 0,
670
+ elementId,
671
+ outcome: clickResult.outcome,
672
+ urlChanged: clickResult.url_changed || false,
673
+ error: clickResult.error?.reason,
674
+ };
675
+ // Emit action execution event
676
+ if (tracer) {
677
+ const page = this.browser.getPage();
678
+ const postUrl = page ? page.url() : null;
679
+ // Include element data for live overlay visualization
680
+ const elementsData = snap.elements.slice(0, 50).map(el => ({
681
+ id: el.id,
682
+ bbox: {
683
+ x: el.bbox.x,
684
+ y: el.bbox.y,
685
+ width: el.bbox.width,
686
+ height: el.bbox.height,
687
+ },
688
+ role: el.role,
689
+ text: el.text ? el.text.substring(0, 50) : '',
690
+ }));
691
+ tracer.emit('action', {
692
+ action_type: result.action,
693
+ action: result.action,
694
+ element_id: result.elementId,
695
+ success: result.success,
696
+ // Note: duration_ms and other custom fields are not in TraceEventData type
697
+ // but are accepted at runtime for custom visualization
698
+ post_url: postUrl,
699
+ elements: elementsData, // Add element data for overlay
700
+ target_element_id: result.elementId, // Highlight target in red
701
+ }, // Type assertion needed for custom visualization fields
702
+ stepId);
703
+ }
704
+ // Record history
705
+ const history = this.history;
706
+ history.push({
707
+ goal,
708
+ action: `CLICK(${elementId})`,
709
+ result,
710
+ success: result.success,
711
+ attempt: 0,
712
+ durationMs,
713
+ });
714
+ if (this.verbose) {
715
+ const status = result.success ? '✅' : '❌';
716
+ console.log(`${status} Completed in ${durationMs}ms`);
717
+ }
718
+ // Emit step completion event
719
+ if (tracer) {
720
+ const preUrl = snap.url;
721
+ const page = this.browser.getPage();
722
+ const postUrl = page ? page.url() || null : null;
723
+ let postSnapshotDigest;
724
+ try {
725
+ const postSnap = await (0, snapshot_1.snapshot)(this.browser, {
726
+ goal: `${goal} (post)`,
727
+ limit: Math.min(this.snapshotLimit, 10),
728
+ show_overlay: this.showOverlay,
729
+ });
730
+ if (postSnap.status === 'success') {
731
+ postSnapshotDigest = trace_event_builder_1.TraceEventBuilder.buildSnapshotDigest(postSnap);
732
+ }
733
+ }
734
+ catch {
735
+ postSnapshotDigest = undefined;
736
+ }
737
+ // Build complete step_end event
738
+ // Note: snapshotDigest, llmResponseText, execData, and verifyData are computed
739
+ // inside TraceEventBuilder.buildStepEndData, so we don't need them here
740
+ // Build complete step_end event
741
+ const stepEndData = trace_event_builder_1.TraceEventBuilder.buildStepEndData({
742
+ stepId,
743
+ stepIndex: stepCount,
744
+ goal,
745
+ attempt: 0,
746
+ preUrl,
747
+ postUrl: postUrl || preUrl,
748
+ postSnapshotDigest,
749
+ snapshot: snapWithDiff,
750
+ llmResponse,
751
+ result,
752
+ });
753
+ tracer.emit('step_end', stepEndData, stepId);
754
+ }
755
+ return result;
756
+ }
757
+ catch (error) {
758
+ // Emit error event
759
+ if (tracer) {
760
+ tracer.emitError(stepId, error.message, 0);
761
+ }
762
+ // Emit step_end with whatever data we collected before failure
763
+ // This ensures diff_status and other fields are preserved in traces
764
+ if (tracer && stepSnapWithDiff) {
765
+ const page = this.browser.getPage();
766
+ const postUrl = page ? page.url() || null : null;
767
+ const durationMs = Date.now() - startTime;
768
+ const stepEndData = trace_event_builder_1.TraceEventBuilder.buildPartialStepEndData({
769
+ stepId,
770
+ stepIndex: stepCount,
771
+ goal,
772
+ attempt: 0,
773
+ preUrl: stepPreUrl,
774
+ postUrl,
775
+ snapshot: stepSnapWithDiff,
776
+ llmResponse: stepLlmResponse,
777
+ error: error.message,
778
+ durationMs,
779
+ });
780
+ tracer.emit('step_end', stepEndData, stepId);
781
+ }
782
+ if (this.verbose) {
783
+ console.log(`❌ Error: ${error.message}`);
784
+ }
785
+ // Re-raise the exception
786
+ throw error;
787
+ }
788
+ }
789
+ }
790
+ exports.SentienceVisualAgent = SentienceVisualAgent;
791
+ /**
792
+ * Predicate rebrand alias for SentienceVisualAgent.
793
+ * Kept as a runtime alias to avoid breaking existing integrations.
794
+ */
795
+ exports.PredicateVisualAgent = SentienceVisualAgent;
796
+ //# sourceMappingURL=visual-agent.js.map