automify 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/LICENSE +21 -0
  3. package/README.md +401 -0
  4. package/SECURITY.md +17 -0
  5. package/examples/anthropic-provider.js +18 -0
  6. package/examples/browser-basic.js +30 -0
  7. package/examples/browser-with-safety.js +38 -0
  8. package/examples/claude-model-adapter.js +141 -0
  9. package/examples/cli-basic.js +20 -0
  10. package/examples/cli-docker.js +42 -0
  11. package/examples/custom-computer.js +18 -0
  12. package/examples/custom-model-adapter.js +48 -0
  13. package/examples/desktop-docker.js +37 -0
  14. package/examples/desktop-local.js +28 -0
  15. package/examples/evaluate-image.js +26 -0
  16. package/examples/files-and-shared-folder.js +42 -0
  17. package/package.json +74 -0
  18. package/scripts/generate-argument-reference.js +17 -0
  19. package/scripts/install-browser.js +12 -0
  20. package/scripts/install-desktop.js +281 -0
  21. package/src/index.d.ts +1049 -0
  22. package/src/index.js +83 -0
  23. package/src/lib/adapter-locks.js +93 -0
  24. package/src/lib/adapter-toolkit.js +239 -0
  25. package/src/lib/anthropic-model-adapter.js +451 -0
  26. package/src/lib/argument-reference.js +98 -0
  27. package/src/lib/automify.js +938 -0
  28. package/src/lib/browser-automify.js +89 -0
  29. package/src/lib/cli-automify.js +520 -0
  30. package/src/lib/computer-automify.js +103 -0
  31. package/src/lib/docker-cli-automify.js +517 -0
  32. package/src/lib/docker-desktop-computer.js +725 -0
  33. package/src/lib/errors.js +24 -0
  34. package/src/lib/file-data.js +140 -0
  35. package/src/lib/init.js +217 -0
  36. package/src/lib/local-desktop-computer.js +963 -0
  37. package/src/lib/model-adapter.js +32 -0
  38. package/src/lib/openai-responses-client.js +162 -0
  39. package/src/lib/output.js +57 -0
  40. package/src/lib/playwright-computer.js +363 -0
  41. package/src/lib/presets.js +141 -0
  42. package/src/lib/result.js +95 -0
  43. package/src/lib/runtime.js +471 -0
  44. package/src/lib/virtual-shared-folder.js +109 -0
  45. package/src/lib/zod-output.js +26 -0
  46. package/src/zod.d.ts +12 -0
  47. package/src/zod.js +5 -0
@@ -0,0 +1,938 @@
1
+ import { mkdir, writeFile } from "node:fs/promises";
2
+ import { dirname, join } from "node:path";
3
+
4
+ import { AutomifyError, MaxStepsExceededError, SafetyCheckError } from "./errors.js";
5
+ import { OpenAIResponsesClient } from "./openai-responses-client.js";
6
+ import { toDataUrl } from "./adapter-toolkit.js";
7
+ import { filesToEvaluate } from "./file-data.js";
8
+ import { buildRunResult, buildTextConfig } from "./result.js";
9
+ import {
10
+ callHook,
11
+ debugLog,
12
+ mergeRequestOptions,
13
+ normalizeAutomifyOptions,
14
+ normalizeDoArguments,
15
+ summarizePayload,
16
+ summarizeResponse,
17
+ writeDebugLogFile
18
+ } from "./runtime.js";
19
+
20
+ const DEFAULT_MAX_STEPS = 1000;
21
+ const DEFAULT_SCREENSHOT_DETAIL = "auto";
22
+ const DEFAULT_SCREENSHOT_MAX_WIDTH = 1440;
23
+ const DEFAULT_SCREENSHOT_MAX_HEIGHT = 1440;
24
+
25
+ export { AutomifyError, MaxStepsExceededError, SafetyCheckError };
26
+
27
+ export function createAutomify(options) {
28
+ return new Automify(options);
29
+ }
30
+
31
+ export class Automify {
32
+ constructor(options = {}) {
33
+ const {
34
+ openaiApiKey,
35
+ client,
36
+ computer,
37
+ model,
38
+ baseURL,
39
+ fetchImpl,
40
+ maxSteps = DEFAULT_MAX_STEPS,
41
+ requestOptions,
42
+ displayWidth,
43
+ displayHeight,
44
+ environment,
45
+ reasoning,
46
+ safetyIdentifier,
47
+ allowedDomains,
48
+ onStep,
49
+ onRequest,
50
+ onResponse,
51
+ onComplete,
52
+ redactScreenshot,
53
+ screenshotDetail,
54
+ screenshotMaxWidth = DEFAULT_SCREENSHOT_MAX_WIDTH,
55
+ screenshotMaxHeight = DEFAULT_SCREENSHOT_MAX_HEIGHT,
56
+ screenshotResize,
57
+ sendInitialScreenshot,
58
+ initialScreenshot,
59
+ finalScreenshot,
60
+ actionScreenshots,
61
+ trace,
62
+ silent,
63
+ debug,
64
+ logFile
65
+ } = normalizeAutomifyOptions(options);
66
+ this.client = client ?? new OpenAIResponsesClient({ openaiApiKey, baseURL, fetchImpl });
67
+ this.computer = computer;
68
+ this.model = model;
69
+ this.maxSteps = maxSteps;
70
+ this.requestOptions = requestOptions;
71
+ this.displayWidth = displayWidth;
72
+ this.displayHeight = displayHeight;
73
+ this.environment = environment;
74
+ this.reasoning = reasoning;
75
+ this.safetyIdentifier = safetyIdentifier;
76
+ this.allowedDomains = allowedDomains;
77
+ this.onStep = onStep;
78
+ this.onRequest = onRequest;
79
+ this.onResponse = onResponse;
80
+ this.onComplete = onComplete;
81
+ this.redactScreenshot = redactScreenshot;
82
+ this.screenshotDetail = screenshotDetail;
83
+ this.screenshotMaxWidth = screenshotMaxWidth;
84
+ this.screenshotMaxHeight = screenshotMaxHeight;
85
+ this.screenshotResize = screenshotResize;
86
+ this.sendInitialScreenshot = sendInitialScreenshot;
87
+ this.initialScreenshot = initialScreenshot;
88
+ this.finalScreenshot = finalScreenshot;
89
+ this.actionScreenshots = actionScreenshots;
90
+ this.trace = trace;
91
+ this.silent = silent;
92
+ this.debug = debug;
93
+ this.logFile = logFile;
94
+
95
+ assertComputer(computer);
96
+ }
97
+
98
+ async do(instruction, runOptions = {}, maybeOptions) {
99
+ if (typeof instruction !== "string" || instruction.trim() === "") {
100
+ throw new AutomifyError("instruction must be a non-empty string.");
101
+ }
102
+
103
+ const { data, options } = normalizeDoArguments(runOptions, maybeOptions);
104
+ const previousSilent = this.silent;
105
+ if ("silent" in options) this.silent = options.silent;
106
+
107
+ try {
108
+ const maxSteps = options.maxSteps ?? this.maxSteps;
109
+ const model = assertModel(options.model ?? this.model);
110
+ const tools = [this.#computerTool(options)];
111
+ const steps = [];
112
+ const traceEnabled = options.trace ?? this.trace;
113
+ const traceEvents = [];
114
+ const trace = (event) => {
115
+ if (traceEnabled) {
116
+ traceEvents.push({ at: new Date().toISOString(), ...event });
117
+ }
118
+ };
119
+
120
+ const initialScreenshotPath = initialScreenshotPathFor(options, this);
121
+ const finalScreenshotPath = finalScreenshotPathFor(options, this);
122
+ const actionScreenshotsPath = actionScreenshotsPathFor(options, this);
123
+ this.#debug("run_start", {
124
+ model,
125
+ maxSteps,
126
+ initialScreenshot: initialScreenshotPath ?? null,
127
+ finalScreenshot: finalScreenshotPath ?? null,
128
+ actionScreenshots: actionScreenshotsPath ?? null,
129
+ screenshotDetail: screenshotDetailFor(options, this),
130
+ screenshotMaxWidth: screenshotMaxWidthFor(options, this),
131
+ screenshotMaxHeight: screenshotMaxHeightFor(options, this)
132
+ });
133
+ trace({
134
+ type: "run_start",
135
+ model,
136
+ maxSteps,
137
+ initialScreenshot: initialScreenshotPath ?? null,
138
+ finalScreenshot: finalScreenshotPath ?? null,
139
+ actionScreenshots: actionScreenshotsPath ?? null,
140
+ screenshotDetail: screenshotDetailFor(options, this)
141
+ });
142
+
143
+ await this.#assertAllowedCurrentUrl(options);
144
+
145
+ const initial = await this.#initialInput(
146
+ instruction,
147
+ data,
148
+ {
149
+ ...options,
150
+ allowedDomains: options.allowedDomains ?? this.allowedDomains
151
+ },
152
+ trace
153
+ );
154
+ let actionCoordinateTransform = initial.actionCoordinateTransform;
155
+ let lastPreparedScreenshot = initial.preparedScreenshot;
156
+ let response = await this.#createResponse(
157
+ mergeRequestOptions(options.requestOptions ?? this.requestOptions, {
158
+ model,
159
+ tools,
160
+ input: initial.input,
161
+ text: buildTextConfig(options.output),
162
+ reasoning: options.reasoning ?? this.reasoning ?? { summary: "concise" },
163
+ safety_identifier: options.safetyIdentifier ?? this.safetyIdentifier,
164
+ truncation: "auto"
165
+ }),
166
+ { phase: "initial", surface: "computer", requestOptions: options.requestOptions ?? this.requestOptions, trace }
167
+ );
168
+
169
+ for (let step = 0; step < maxSteps; step += 1) {
170
+ const computerCall = findComputerCall(response);
171
+
172
+ if (!computerCall) {
173
+ const result = buildRunResult(response, steps, options.output);
174
+ const finalScreenshot = await this.#saveFinalScreenshot({ response, steps }, options, trace);
175
+ if (finalScreenshot) result.finalScreenshot = finalScreenshot;
176
+ if (traceEnabled) result.trace = traceEvents;
177
+ await this.#complete(result, { instruction, data }, options);
178
+ return result;
179
+ }
180
+
181
+ await this.#handleSafetyChecks(computerCall, response, options);
182
+
183
+ const actions = getComputerActions(computerCall);
184
+ await this.#emitStep(
185
+ {
186
+ index: step,
187
+ phase: "before_action",
188
+ action: actions[0],
189
+ actions,
190
+ call: computerCall,
191
+ response
192
+ },
193
+ options
194
+ );
195
+ const executedActions = [];
196
+ const actionScreenshots = [];
197
+ for (let actionIndex = 0; actionIndex < actions.length; actionIndex += 1) {
198
+ const action = actions[actionIndex];
199
+ const executableAction = scaleComputerAction(action, actionCoordinateTransform);
200
+ executedActions.push(executableAction);
201
+ const beforeScreenshot = await this.#saveActionScreenshot(
202
+ {
203
+ step,
204
+ actionIndex,
205
+ phase: "before",
206
+ action,
207
+ executableAction,
208
+ call: computerCall,
209
+ response
210
+ },
211
+ options,
212
+ trace
213
+ );
214
+ const actionStartedAt = Date.now();
215
+ await this.computer.execute(executableAction, {
216
+ call: computerCall,
217
+ response,
218
+ step,
219
+ actionIndex
220
+ });
221
+ const durationMs = Date.now() - actionStartedAt;
222
+ const afterScreenshot = await this.#saveActionScreenshot(
223
+ {
224
+ step,
225
+ actionIndex,
226
+ phase: "after",
227
+ action,
228
+ executableAction,
229
+ call: computerCall,
230
+ response,
231
+ durationMs
232
+ },
233
+ options,
234
+ trace
235
+ );
236
+ if (beforeScreenshot || afterScreenshot) {
237
+ actionScreenshots.push({
238
+ actionIndex,
239
+ action,
240
+ executableAction,
241
+ before: beforeScreenshot,
242
+ after: afterScreenshot
243
+ });
244
+ }
245
+ this.#debug("action_executed", {
246
+ step,
247
+ actionIndex,
248
+ action,
249
+ executableAction,
250
+ coordinateTransform: actionCoordinateTransform,
251
+ durationMs
252
+ });
253
+ trace({
254
+ type: "action",
255
+ step,
256
+ actionIndex,
257
+ action,
258
+ executableAction,
259
+ coordinateTransform: actionCoordinateTransform,
260
+ durationMs
261
+ });
262
+ }
263
+ await this.#assertAllowedCurrentUrl(options);
264
+
265
+ const screenshotStartedAt = Date.now();
266
+ const preparedScreenshot = canReuseLastScreenshot(actions, lastPreparedScreenshot)
267
+ ? {
268
+ ...lastPreparedScreenshot,
269
+ meta: {
270
+ ...lastPreparedScreenshot.meta,
271
+ reused: true
272
+ }
273
+ }
274
+ : await this.#capturePreparedScreenshot(
275
+ {
276
+ call: computerCall,
277
+ response,
278
+ step
279
+ },
280
+ options
281
+ );
282
+ lastPreparedScreenshot = preparedScreenshot;
283
+ actionCoordinateTransform = preparedScreenshot.actionCoordinateTransform;
284
+ const screenshotDurationMs = Date.now() - screenshotStartedAt;
285
+ this.#debug("screenshot", {
286
+ step,
287
+ phase: "after_action",
288
+ ...preparedScreenshot.meta,
289
+ detail: screenshotDetailFor(options, this),
290
+ durationMs: screenshotDurationMs
291
+ });
292
+ trace({
293
+ type: "screenshot",
294
+ step,
295
+ phase: "after_action",
296
+ ...preparedScreenshot.meta,
297
+ detail: screenshotDetailFor(options, this),
298
+ durationMs: screenshotDurationMs
299
+ });
300
+
301
+ const currentUrl = await getCurrentUrl(this.computer);
302
+ const input = {
303
+ type: "computer_call_output",
304
+ call_id: computerCall.call_id,
305
+ output: {
306
+ type: "computer_screenshot",
307
+ image_url: toDataUrl(preparedScreenshot.screenshot),
308
+ detail: screenshotDetailFor(options, this)
309
+ }
310
+ };
311
+
312
+ if (computerCall.pending_safety_checks?.length) {
313
+ input.acknowledged_safety_checks = computerCall.pending_safety_checks;
314
+ }
315
+
316
+ steps.push({
317
+ index: step,
318
+ action: actions[0],
319
+ actions,
320
+ executedActions,
321
+ actionScreenshots,
322
+ callId: computerCall.call_id,
323
+ safetyChecks: computerCall.pending_safety_checks ?? [],
324
+ responseId: response.id
325
+ });
326
+ await this.#emitStep(
327
+ {
328
+ index: step,
329
+ phase: "after_action",
330
+ action: actions[0],
331
+ actions,
332
+ call: computerCall,
333
+ response,
334
+ currentUrl
335
+ },
336
+ options
337
+ );
338
+
339
+ response = await this.#createResponse(
340
+ mergeRequestOptions(options.requestOptions ?? this.requestOptions, {
341
+ model,
342
+ previous_response_id: response.id,
343
+ tools,
344
+ input: [input],
345
+ text: buildTextConfig(options.output),
346
+ truncation: "auto"
347
+ }),
348
+ {
349
+ phase: "continue",
350
+ surface: "computer",
351
+ step,
352
+ requestOptions: options.requestOptions ?? this.requestOptions,
353
+ trace
354
+ }
355
+ );
356
+ }
357
+
358
+ throw new MaxStepsExceededError(maxSteps);
359
+ } finally {
360
+ this.silent = previousSilent;
361
+ }
362
+ }
363
+
364
+ async #initialInput(instruction, data, options, trace) {
365
+ const content = [{ type: "input_text", text: formatInstruction(instruction, data, this.computer, options) }];
366
+ content.push(...(await evaluationContentFor(options.filesToEvaluate)));
367
+ let actionCoordinateTransform = null;
368
+ let preparedScreenshot = null;
369
+
370
+ const path = initialScreenshotPathFor(options, this);
371
+ if (path || sendInitialScreenshotFor(options, this)) {
372
+ const startedAt = Date.now();
373
+ const rawScreenshot = await this.#captureRawScreenshot({ initial: true }, options);
374
+ const writtenBytes = path ? await writeScreenshotFile(path, rawScreenshot) : null;
375
+ preparedScreenshot = await prepareScreenshot(rawScreenshot, options, this);
376
+ actionCoordinateTransform = preparedScreenshot.actionCoordinateTransform;
377
+ const durationMs = Date.now() - startedAt;
378
+ this.#debug("screenshot", {
379
+ phase: "initial",
380
+ path: path ?? null,
381
+ writtenBytes,
382
+ ...preparedScreenshot.meta,
383
+ detail: screenshotDetailFor(options, this),
384
+ durationMs
385
+ });
386
+ trace?.({
387
+ type: "screenshot",
388
+ phase: "initial",
389
+ path: path ?? null,
390
+ writtenBytes,
391
+ ...preparedScreenshot.meta,
392
+ detail: screenshotDetailFor(options, this),
393
+ durationMs
394
+ });
395
+ content.push({
396
+ type: "input_image",
397
+ image_url: toDataUrl(preparedScreenshot.screenshot),
398
+ detail: screenshotDetailFor(options, this)
399
+ });
400
+ }
401
+
402
+ return {
403
+ input: [{ role: "user", content }],
404
+ actionCoordinateTransform,
405
+ preparedScreenshot
406
+ };
407
+ }
408
+
409
+ async #capturePreparedScreenshot(context, options) {
410
+ let screenshot = await this.computer.screenshot(context);
411
+ screenshot = await this.#redactScreenshot(screenshot, context, options);
412
+ return prepareScreenshot(screenshot, options, this);
413
+ }
414
+
415
+ async #saveFinalScreenshot(context, options, trace) {
416
+ const path = finalScreenshotPathFor(options, this);
417
+ if (!path) return null;
418
+
419
+ const startedAt = Date.now();
420
+ const screenshot = await this.#captureRawScreenshot(
421
+ {
422
+ ...context,
423
+ final: true
424
+ },
425
+ options
426
+ );
427
+ const bytes = await writeScreenshotFile(path, screenshot);
428
+ const durationMs = Date.now() - startedAt;
429
+
430
+ this.#debug("screenshot", {
431
+ phase: "final",
432
+ path,
433
+ bytes,
434
+ durationMs
435
+ });
436
+ trace?.({
437
+ type: "screenshot",
438
+ phase: "final",
439
+ path,
440
+ bytes,
441
+ durationMs
442
+ });
443
+
444
+ return {
445
+ path,
446
+ bytes
447
+ };
448
+ }
449
+
450
+ async #saveActionScreenshot(context, options, trace) {
451
+ const directory = actionScreenshotsPathFor(options, this);
452
+ if (!directory) return null;
453
+
454
+ const startedAt = Date.now();
455
+ const path = actionScreenshotFilePath(directory, context);
456
+ const screenshot = await this.#captureRawScreenshot(
457
+ {
458
+ ...context,
459
+ actionScreenshot: true
460
+ },
461
+ options
462
+ );
463
+ const bytes = await writeScreenshotFile(path, screenshot);
464
+ const durationMs = Date.now() - startedAt;
465
+
466
+ this.#debug("screenshot", {
467
+ phase: `action_${context.phase}`,
468
+ step: context.step,
469
+ actionIndex: context.actionIndex,
470
+ action: context.action,
471
+ path,
472
+ bytes,
473
+ durationMs
474
+ });
475
+ trace?.({
476
+ type: "screenshot",
477
+ phase: `action_${context.phase}`,
478
+ step: context.step,
479
+ actionIndex: context.actionIndex,
480
+ action: context.action,
481
+ path,
482
+ bytes,
483
+ durationMs
484
+ });
485
+
486
+ return {
487
+ path,
488
+ bytes
489
+ };
490
+ }
491
+
492
+ async #captureRawScreenshot(context, options) {
493
+ let screenshot = await this.computer.screenshot(context);
494
+ return this.#redactScreenshot(screenshot, context, options);
495
+ }
496
+
497
+ #computerTool(options) {
498
+ return cleanUndefined({
499
+ type: "computer",
500
+ environment: options.environment ?? this.environment ?? this.computer.environment,
501
+ displayWidth: options.displayWidth ?? this.displayWidth ?? this.computer.displayWidth,
502
+ displayHeight: options.displayHeight ?? this.displayHeight ?? this.computer.displayHeight
503
+ });
504
+ }
505
+
506
+ async #handleSafetyChecks(computerCall, response, options) {
507
+ const checks = computerCall.pending_safety_checks ?? [];
508
+ if (checks.length === 0) return;
509
+
510
+ if (typeof options.onSafetyCheck !== "function") {
511
+ throw new SafetyCheckError(checks, computerCall.action);
512
+ }
513
+
514
+ const acknowledged = await options.onSafetyCheck({
515
+ checks,
516
+ action: computerCall.action,
517
+ call: computerCall,
518
+ response
519
+ });
520
+
521
+ if (!acknowledged) {
522
+ throw new SafetyCheckError(checks, computerCall.action);
523
+ }
524
+ }
525
+
526
+ async #createResponse(payload, meta) {
527
+ const { trace, ...publicMeta } = meta;
528
+ await callHook(this.onRequest, payload, publicMeta);
529
+ this.#debug("request", { meta: publicMeta, payload: summarizePayload(payload) });
530
+ const startedAt = Date.now();
531
+ const response = await this.client.createResponse(payload, publicMeta);
532
+ const durationMs = Date.now() - startedAt;
533
+ await callHook(this.onResponse, response, publicMeta);
534
+ this.#debug("response", { meta: publicMeta, durationMs, response: summarizeResponse(response) });
535
+ trace?.({
536
+ type: "response",
537
+ phase: publicMeta.phase,
538
+ step: publicMeta.step,
539
+ responseId: response?.id,
540
+ durationMs
541
+ });
542
+ return response;
543
+ }
544
+
545
+ async #emitStep(event, options) {
546
+ await callHook(this.onStep, event);
547
+ await callHook(options.onStep, event);
548
+ this.#debug("step", event);
549
+ }
550
+
551
+ async #complete(result, context, options) {
552
+ const event = {
553
+ instruction: context.instruction,
554
+ data: context.data,
555
+ result,
556
+ response: result.response,
557
+ steps: result.steps,
558
+ ok: result.ok,
559
+ status: result.status,
560
+ completed: result.completed,
561
+ stopReason: result.stopReason,
562
+ surface: surfaceFromComputer(this.computer)
563
+ };
564
+
565
+ await callHook(this.onComplete, event);
566
+ await callHook(options.onComplete, event);
567
+ this.#debug("complete", event);
568
+ }
569
+
570
+ async #redactScreenshot(screenshot, context, options) {
571
+ const redactor = options.redactScreenshot ?? this.redactScreenshot;
572
+ if (typeof redactor !== "function") return screenshot;
573
+ return redactor(screenshot, context);
574
+ }
575
+
576
+ async #assertAllowedCurrentUrl(options) {
577
+ const allowedDomains = options.allowedDomains ?? this.allowedDomains;
578
+ if (!allowedDomains?.length || typeof this.computer.currentUrl !== "function") return;
579
+
580
+ const currentUrl = await this.computer.currentUrl();
581
+ if (!currentUrl || !isAllowedUrl(currentUrl, allowedDomains)) {
582
+ throw new AutomifyError(`Current URL is not allowed: ${currentUrl ?? "unknown"}`);
583
+ }
584
+ }
585
+
586
+ #debug(message, details) {
587
+ writeDebugLogFile(this.logFile, "automify", message, details, { silent: this.silent });
588
+ debugLog(this.debug, "automify", message, details, { silent: this.silent });
589
+ }
590
+ }
591
+
592
+ function assertComputer(computer) {
593
+ if (!computer || typeof computer !== "object") {
594
+ throw new AutomifyError("A computer adapter is required.");
595
+ }
596
+
597
+ if (typeof computer.execute !== "function") {
598
+ throw new AutomifyError("The computer adapter must provide execute(action, context).");
599
+ }
600
+
601
+ if (typeof computer.screenshot !== "function") {
602
+ throw new AutomifyError("The computer adapter must provide screenshot(context).");
603
+ }
604
+ }
605
+
606
+ function cleanUndefined(value) {
607
+ return Object.fromEntries(Object.entries(value).filter(([, item]) => item !== undefined));
608
+ }
609
+
610
+ function formatInstruction(instruction, data, computer, options = {}) {
611
+ const guidance = [computer?.instructions, options.instructions, domainPolicyGuidance(options.allowedDomains)]
612
+ .filter((item) => typeof item === "string" && item.trim() !== "")
613
+ .join("\n\n");
614
+ const baseInstruction = guidance ? `${guidance}\n\nTask:\n${instruction}` : instruction;
615
+
616
+ if (data == null || (typeof data === "object" && Object.keys(data).length === 0)) {
617
+ return baseInstruction;
618
+ }
619
+
620
+ return `${baseInstruction}\n\nData:\n${JSON.stringify(data, null, 2)}`;
621
+ }
622
+
623
+ function domainPolicyGuidance(allowedDomains) {
624
+ const domains = domainRulesGuidance(allowedDomains);
625
+ if (!domains) return "";
626
+ return `Navigation policy:\nStay within these allowed domains: ${domains}. Do not navigate to other domains.`;
627
+ }
628
+
629
+ function domainRulesGuidance(rules) {
630
+ if (!Array.isArray(rules) || rules.length === 0) return "";
631
+ return rules.map(domainRuleGuidance).join(", ");
632
+ }
633
+
634
+ function domainRuleGuidance(rule) {
635
+ if (rule instanceof RegExp) return rule.toString();
636
+ if (typeof rule === "function") return "[custom domain rule]";
637
+ const value = String(rule);
638
+ return `${JSON.stringify(value)} (domain and subdomains)`;
639
+ }
640
+
641
+ async function evaluationContentFor(files) {
642
+ if (files == null) return [];
643
+ return filesToEvaluate(files);
644
+ }
645
+
646
+ function assertModel(model) {
647
+ if (typeof model !== "string" || model.trim() === "") {
648
+ throw new AutomifyError("A model is required. Pass model to initAutomify(), the surface factory, or do().");
649
+ }
650
+
651
+ return model;
652
+ }
653
+
654
+ async function prepareScreenshot(screenshot, options, automify) {
655
+ const originalBytes = byteLength(screenshot);
656
+ const originalDimensions = pngDimensions(screenshot);
657
+ const maxWidth = screenshotMaxWidthFor(options, automify);
658
+ const maxHeight = screenshotMaxHeightFor(options, automify);
659
+ const target = fitDimensions(originalDimensions, maxWidth, maxHeight);
660
+
661
+ if (!target || (target.width === originalDimensions.width && target.height === originalDimensions.height)) {
662
+ return {
663
+ screenshot,
664
+ actionCoordinateTransform: coordinateTransform(originalDimensions, originalDimensions),
665
+ meta: {
666
+ originalBytes,
667
+ bytes: originalBytes,
668
+ originalWidth: originalDimensions?.width,
669
+ originalHeight: originalDimensions?.height,
670
+ width: originalDimensions?.width,
671
+ height: originalDimensions?.height,
672
+ resized: false
673
+ }
674
+ };
675
+ }
676
+
677
+ const resized = await resizeScreenshot(screenshot, target, options, automify);
678
+ if (!resized) {
679
+ return {
680
+ screenshot,
681
+ actionCoordinateTransform: coordinateTransform(originalDimensions, originalDimensions),
682
+ meta: {
683
+ originalBytes,
684
+ bytes: originalBytes,
685
+ originalWidth: originalDimensions.width,
686
+ originalHeight: originalDimensions.height,
687
+ width: originalDimensions.width,
688
+ height: originalDimensions.height,
689
+ resized: false,
690
+ resizeSkipped: true
691
+ }
692
+ };
693
+ }
694
+
695
+ const resizedDimensions = pngDimensions(resized) ?? target;
696
+ return {
697
+ screenshot: resized,
698
+ actionCoordinateTransform: coordinateTransform(originalDimensions, resizedDimensions),
699
+ meta: {
700
+ originalBytes,
701
+ bytes: byteLength(resized),
702
+ originalWidth: originalDimensions.width,
703
+ originalHeight: originalDimensions.height,
704
+ width: resizedDimensions.width,
705
+ height: resizedDimensions.height,
706
+ resized: true
707
+ }
708
+ };
709
+ }
710
+
711
+ async function resizeScreenshot(screenshot, target, options, automify) {
712
+ const customResize = options.screenshotResize ?? automify.screenshotResize;
713
+ if (typeof customResize === "function") {
714
+ return customResize(screenshot, target);
715
+ }
716
+
717
+ try {
718
+ const { default: Jimp } = await import("jimp");
719
+ const image = await Jimp.read(Buffer.from(screenshot));
720
+ image.resize(target.width, target.height);
721
+ return image.getBufferAsync(Jimp.MIME_PNG);
722
+ } catch (error) {
723
+ debugLog(automify.debug, "automify", "screenshot_resize_skipped", {
724
+ reason: error?.message,
725
+ target
726
+ });
727
+ return null;
728
+ }
729
+ }
730
+
731
+ function fitDimensions(dimensions, maxWidth, maxHeight) {
732
+ if (!dimensions?.width || !dimensions?.height) return null;
733
+ const widthLimit = positiveNumber(maxWidth);
734
+ const heightLimit = positiveNumber(maxHeight);
735
+ if (!widthLimit && !heightLimit) return null;
736
+
737
+ const scale = Math.min(
738
+ widthLimit ? widthLimit / dimensions.width : 1,
739
+ heightLimit ? heightLimit / dimensions.height : 1,
740
+ 1
741
+ );
742
+ if (scale >= 1) return dimensions;
743
+
744
+ return {
745
+ width: Math.max(1, Math.round(dimensions.width * scale)),
746
+ height: Math.max(1, Math.round(dimensions.height * scale))
747
+ };
748
+ }
749
+
750
+ function coordinateTransform(originalDimensions, modelDimensions) {
751
+ if (
752
+ !originalDimensions?.width ||
753
+ !originalDimensions?.height ||
754
+ !modelDimensions?.width ||
755
+ !modelDimensions?.height
756
+ ) {
757
+ return null;
758
+ }
759
+
760
+ return {
761
+ scaleX: originalDimensions.width / modelDimensions.width,
762
+ scaleY: originalDimensions.height / modelDimensions.height,
763
+ modelWidth: modelDimensions.width,
764
+ modelHeight: modelDimensions.height,
765
+ computerWidth: originalDimensions.width,
766
+ computerHeight: originalDimensions.height
767
+ };
768
+ }
769
+
770
+ function scaleComputerAction(action, transform) {
771
+ if (!transform || (transform.scaleX === 1 && transform.scaleY === 1)) return action;
772
+
773
+ const scaled = { ...action };
774
+ if ("x" in scaled) scaled.x = scaleCoordinate(scaled.x, transform.scaleX);
775
+ if ("y" in scaled) scaled.y = scaleCoordinate(scaled.y, transform.scaleY);
776
+ if (Array.isArray(scaled.path)) {
777
+ scaled.path = scaled.path.map((point) => ({
778
+ ...point,
779
+ x: scaleCoordinate(point.x, transform.scaleX),
780
+ y: scaleCoordinate(point.y, transform.scaleY)
781
+ }));
782
+ }
783
+ return scaled;
784
+ }
785
+
786
+ function scaleCoordinate(value, scale) {
787
+ return Math.round((Number(value) || 0) * scale);
788
+ }
789
+
790
+ function canReuseLastScreenshot(actions, lastPreparedScreenshot) {
791
+ return Boolean(
792
+ lastPreparedScreenshot && actions.length > 0 && actions.every((action) => action?.type === "screenshot")
793
+ );
794
+ }
795
+
796
+ function screenshotDetailFor(options, automify) {
797
+ return options.screenshotDetail ?? automify.screenshotDetail ?? DEFAULT_SCREENSHOT_DETAIL;
798
+ }
799
+
800
+ function sendInitialScreenshotFor(options, automify) {
801
+ return options.sendInitialScreenshot ?? automify.sendInitialScreenshot ?? false;
802
+ }
803
+
804
+ function initialScreenshotPathFor(options, automify) {
805
+ return resolveScreenshotPath(options.initialScreenshot ?? automify.initialScreenshot);
806
+ }
807
+
808
+ function finalScreenshotPathFor(options, automify) {
809
+ return resolveScreenshotPath(options.finalScreenshot ?? automify.finalScreenshot);
810
+ }
811
+
812
+ function actionScreenshotsPathFor(options, automify) {
813
+ return resolveScreenshotPath(options.actionScreenshots ?? automify.actionScreenshots);
814
+ }
815
+
816
+ function resolveScreenshotPath(value) {
817
+ return typeof value === "string" && value.length > 0 ? value : null;
818
+ }
819
+
820
+ function actionScreenshotFilePath(directory, { step, actionIndex, phase, action }) {
821
+ const actionType = sanitizePathSegment(action?.type ?? "action");
822
+ return join(directory, `step-${padNumber(step)}-action-${padNumber(actionIndex)}-${phase}-${actionType}.png`);
823
+ }
824
+
825
+ function padNumber(value) {
826
+ return String(value).padStart(4, "0");
827
+ }
828
+
829
+ function sanitizePathSegment(value) {
830
+ return (
831
+ String(value)
832
+ .toLowerCase()
833
+ .replace(/[^a-z0-9._-]+/g, "-")
834
+ .replace(/^-+|-+$/g, "") || "action"
835
+ );
836
+ }
837
+
838
+ async function writeScreenshotFile(path, screenshot) {
839
+ const buffer = screenshotToBuffer(screenshot);
840
+ await mkdir(dirname(path), { recursive: true });
841
+ await writeFile(path, buffer);
842
+ return buffer.byteLength;
843
+ }
844
+
845
+ function screenshotMaxWidthFor(options, automify) {
846
+ return options.screenshotMaxWidth ?? automify.screenshotMaxWidth ?? DEFAULT_SCREENSHOT_MAX_WIDTH;
847
+ }
848
+
849
+ function screenshotMaxHeightFor(options, automify) {
850
+ return options.screenshotMaxHeight ?? automify.screenshotMaxHeight ?? DEFAULT_SCREENSHOT_MAX_HEIGHT;
851
+ }
852
+
853
+ function positiveNumber(value) {
854
+ const number = Number(value);
855
+ return Number.isFinite(number) && number > 0 ? number : null;
856
+ }
857
+
858
+ function pngDimensions(value) {
859
+ const buffer = Buffer.from(value);
860
+ if (buffer.length < 24) return null;
861
+ if (
862
+ buffer[0] !== 0x89 ||
863
+ buffer[1] !== 0x50 ||
864
+ buffer[2] !== 0x4e ||
865
+ buffer[3] !== 0x47 ||
866
+ buffer[4] !== 0x0d ||
867
+ buffer[5] !== 0x0a ||
868
+ buffer[6] !== 0x1a ||
869
+ buffer[7] !== 0x0a
870
+ ) {
871
+ return null;
872
+ }
873
+
874
+ return {
875
+ width: buffer.readUInt32BE(16),
876
+ height: buffer.readUInt32BE(20)
877
+ };
878
+ }
879
+
880
+ function byteLength(value) {
881
+ if (typeof value === "string") return Buffer.byteLength(value);
882
+ if (Buffer.isBuffer(value)) return value.byteLength;
883
+ if (value instanceof Uint8Array || value instanceof ArrayBuffer) return value.byteLength;
884
+ return undefined;
885
+ }
886
+
887
+ function screenshotToBuffer(value) {
888
+ if (typeof value === "string") {
889
+ const dataUrlMatch = /^data:[^;]+;base64,(.*)$/s.exec(value);
890
+ return Buffer.from(dataUrlMatch ? dataUrlMatch[1] : value, dataUrlMatch ? "base64" : "utf8");
891
+ }
892
+
893
+ return Buffer.from(value);
894
+ }
895
+
896
+ function findComputerCall(response) {
897
+ return response?.output?.find((item) => item.type === "computer_call") ?? null;
898
+ }
899
+
900
+ function getComputerActions(computerCall) {
901
+ if (Array.isArray(computerCall.actions) && computerCall.actions.length > 0) {
902
+ return computerCall.actions;
903
+ }
904
+
905
+ if (computerCall.action) {
906
+ return [computerCall.action];
907
+ }
908
+
909
+ return [{ type: "screenshot" }];
910
+ }
911
+
912
+ async function getCurrentUrl(computer) {
913
+ if (typeof computer.currentUrl !== "function") return null;
914
+ return computer.currentUrl();
915
+ }
916
+
917
+ function surfaceFromComputer(computer) {
918
+ if (computer?.environment === "browser") return "browser";
919
+ if (["mac", "windows", "ubuntu", "linux"].includes(computer?.environment)) return "desktop";
920
+ return computer?.environment ?? "computer";
921
+ }
922
+
923
+ function isAllowedUrl(url, allowedDomains) {
924
+ let parsed;
925
+ try {
926
+ parsed = new URL(url);
927
+ } catch {
928
+ return false;
929
+ }
930
+
931
+ return allowedDomains.some((domain) => {
932
+ if (domain instanceof RegExp) return domain.test(parsed.hostname);
933
+ if (typeof domain === "function") return domain(parsed);
934
+ const normalized = String(domain).toLowerCase();
935
+ const hostname = parsed.hostname.toLowerCase();
936
+ return hostname === normalized || hostname.endsWith(`.${normalized}`);
937
+ });
938
+ }