automify 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/LICENSE +21 -0
  3. package/README.md +401 -0
  4. package/SECURITY.md +17 -0
  5. package/examples/anthropic-provider.js +18 -0
  6. package/examples/browser-basic.js +30 -0
  7. package/examples/browser-with-safety.js +38 -0
  8. package/examples/claude-model-adapter.js +141 -0
  9. package/examples/cli-basic.js +20 -0
  10. package/examples/cli-docker.js +42 -0
  11. package/examples/custom-computer.js +18 -0
  12. package/examples/custom-model-adapter.js +48 -0
  13. package/examples/desktop-docker.js +37 -0
  14. package/examples/desktop-local.js +28 -0
  15. package/examples/evaluate-image.js +26 -0
  16. package/examples/files-and-shared-folder.js +42 -0
  17. package/package.json +74 -0
  18. package/scripts/generate-argument-reference.js +17 -0
  19. package/scripts/install-browser.js +12 -0
  20. package/scripts/install-desktop.js +281 -0
  21. package/src/index.d.ts +1049 -0
  22. package/src/index.js +83 -0
  23. package/src/lib/adapter-locks.js +93 -0
  24. package/src/lib/adapter-toolkit.js +239 -0
  25. package/src/lib/anthropic-model-adapter.js +451 -0
  26. package/src/lib/argument-reference.js +98 -0
  27. package/src/lib/automify.js +938 -0
  28. package/src/lib/browser-automify.js +89 -0
  29. package/src/lib/cli-automify.js +520 -0
  30. package/src/lib/computer-automify.js +103 -0
  31. package/src/lib/docker-cli-automify.js +517 -0
  32. package/src/lib/docker-desktop-computer.js +725 -0
  33. package/src/lib/errors.js +24 -0
  34. package/src/lib/file-data.js +140 -0
  35. package/src/lib/init.js +217 -0
  36. package/src/lib/local-desktop-computer.js +963 -0
  37. package/src/lib/model-adapter.js +32 -0
  38. package/src/lib/openai-responses-client.js +162 -0
  39. package/src/lib/output.js +57 -0
  40. package/src/lib/playwright-computer.js +363 -0
  41. package/src/lib/presets.js +141 -0
  42. package/src/lib/result.js +95 -0
  43. package/src/lib/runtime.js +471 -0
  44. package/src/lib/virtual-shared-folder.js +109 -0
  45. package/src/lib/zod-output.js +26 -0
  46. package/src/zod.d.ts +12 -0
  47. package/src/zod.js +5 -0
@@ -0,0 +1,451 @@
1
+ import {
2
+ computerCall,
3
+ getFunctionOutputs,
4
+ getLastComputerScreenshot,
5
+ getInputText,
6
+ message,
7
+ parseDataUrl,
8
+ response,
9
+ runCommandCall
10
+ } from "./adapter-toolkit.js";
11
+ import { AutomifyError } from "./errors.js";
12
+ import { buildOutputInstruction } from "./result.js";
13
+
14
+ const DEFAULT_ANTHROPIC_BASE_URL = "https://api.anthropic.com/v1";
15
+ const DEFAULT_ANTHROPIC_VERSION = "2023-06-01";
16
+ const DEFAULT_MAX_TOKENS = 4096;
17
+
18
+ export function createAnthropicModelAdapter(options = {}) {
19
+ return new AnthropicModelAdapter(options);
20
+ }
21
+
22
+ export class AnthropicModelAdapter {
23
+ constructor({
24
+ anthropicApiKey,
25
+ baseURL = DEFAULT_ANTHROPIC_BASE_URL,
26
+ version = DEFAULT_ANTHROPIC_VERSION,
27
+ betas = ["computer-use-2025-01-24"],
28
+ fetchImpl = globalThis.fetch,
29
+ maxTokens = DEFAULT_MAX_TOKENS,
30
+ computerToolType = "computer_20250124",
31
+ requestTransform,
32
+ responseTransform
33
+ } = {}) {
34
+ this.anthropicApiKey = anthropicApiKey;
35
+ this.baseURL = baseURL.replace(/\/$/, "");
36
+ this.version = version;
37
+ this.betas = betas;
38
+ this.fetch = fetchImpl;
39
+ this.maxTokens = maxTokens;
40
+ this.computerToolType = computerToolType;
41
+ this.requestTransform = requestTransform;
42
+ this.responseTransform = responseTransform;
43
+ this.transcripts = new Map();
44
+
45
+ if (!this.anthropicApiKey) {
46
+ throw new AutomifyError("An anthropicApiKey is required.");
47
+ }
48
+
49
+ if (typeof this.fetch !== "function") {
50
+ throw new AutomifyError("A fetch implementation is required. Use Node 18+ or pass fetchImpl.");
51
+ }
52
+ }
53
+
54
+ async createResponse(payload, context = {}) {
55
+ const request = await this.#toAnthropicRequest(payload, context);
56
+ const finalRequest =
57
+ typeof this.requestTransform === "function"
58
+ ? await this.requestTransform(request, { payload, context })
59
+ : request;
60
+
61
+ const res = await this.fetch(`${this.baseURL}/messages`, {
62
+ method: "POST",
63
+ headers: this.#headers(),
64
+ body: JSON.stringify(finalRequest)
65
+ });
66
+ const text = await res.text();
67
+ const data = parseJson(text);
68
+
69
+ if (!res.ok) {
70
+ throw new AutomifyError(`Anthropic request failed: ${data?.error?.message ?? data?.message ?? res.statusText}`);
71
+ }
72
+
73
+ const finalData =
74
+ typeof this.responseTransform === "function"
75
+ ? await this.responseTransform(data, { payload, context, request: finalRequest })
76
+ : data;
77
+ const automifyResponse = this.#fromAnthropicResponse(finalData, context, payload);
78
+ const previous = payload.previous_response_id ? this.transcripts.get(payload.previous_response_id) ?? [] : [];
79
+ this.transcripts.set(automifyResponse.id, compactMessagesForStorage([
80
+ ...previous,
81
+ ...this.#userMessagesFromPayload(payload, context),
82
+ { role: "assistant", content: finalData.content ?? [] }
83
+ ], context));
84
+
85
+ return automifyResponse;
86
+ }
87
+
88
+ #headers() {
89
+ const headers = {
90
+ "x-api-key": this.anthropicApiKey,
91
+ "anthropic-version": this.version,
92
+ "content-type": "application/json"
93
+ };
94
+
95
+ if (this.betas?.length) {
96
+ headers["anthropic-beta"] = Array.isArray(this.betas) ? this.betas.join(",") : this.betas;
97
+ }
98
+
99
+ return headers;
100
+ }
101
+
102
+ async #toAnthropicRequest(payload, context) {
103
+ const previous = payload.previous_response_id ? this.transcripts.get(payload.previous_response_id) ?? [] : [];
104
+ const messages = compactMessagesForRequest([...previous, ...this.#userMessagesFromPayload(payload, context)], context);
105
+ const tools = this.#toolsFromPayload(payload);
106
+
107
+ return removeUndefined({
108
+ model: assertModel(payload.model),
109
+ max_tokens: payload.max_tokens ?? payload.maxTokens ?? this.maxTokens,
110
+ temperature: payload.temperature,
111
+ top_p: payload.top_p,
112
+ top_k: payload.top_k,
113
+ stop_sequences: payload.stop_sequences,
114
+ system: payload.system ?? payload.instructions,
115
+ metadata: payload.metadata,
116
+ thinking: payload.thinking,
117
+ messages,
118
+ tools: tools.length ? tools : undefined
119
+ });
120
+ }
121
+
122
+ #userMessagesFromPayload(payload, context) {
123
+ const outputInstruction = buildOutputInstruction(payload.text);
124
+
125
+ if (context.phase === "continue") {
126
+ const content = [];
127
+
128
+ for (const item of payload.input ?? []) {
129
+ if (item.type === "computer_call_output") {
130
+ content.push({
131
+ type: "tool_result",
132
+ tool_use_id: item.call_id,
133
+ content: computerResultContent(item)
134
+ });
135
+ }
136
+
137
+ if (item.type === "function_call_output") {
138
+ content.push({
139
+ type: "tool_result",
140
+ tool_use_id: item.call_id,
141
+ content: typeof item.output === "string" ? item.output : JSON.stringify(item.output)
142
+ });
143
+ }
144
+ }
145
+
146
+ if (outputInstruction) {
147
+ content.push({ type: "text", text: outputInstruction });
148
+ }
149
+
150
+ if (content.length) return [{ role: "user", content }];
151
+ }
152
+
153
+ const content = [];
154
+ const text = getInputText(payload);
155
+ const finalText = [text, outputInstruction].filter(Boolean).join("\n\n");
156
+ if (finalText) content.push({ type: "text", text: finalText });
157
+
158
+ for (const item of payload.input ?? []) {
159
+ for (const block of item.content ?? []) {
160
+ if ((block.type === "input_image" || block.type === "computer_screenshot") && block.image_url) {
161
+ content.push(imageBlock(block.image_url));
162
+ }
163
+ }
164
+ }
165
+
166
+ if (content.length === 0) {
167
+ content.push({ type: "text", text: "Continue." });
168
+ }
169
+
170
+ return [{ role: "user", content }];
171
+ }
172
+
173
+ #toolsFromPayload(payload) {
174
+ const tools = [];
175
+ const screenshotDimensions = imageDimensions(getLastComputerScreenshot(payload)?.buffer);
176
+
177
+ for (const tool of payload.tools ?? []) {
178
+ if (tool.type === "computer") {
179
+ tools.push({
180
+ type: this.computerToolType,
181
+ name: "computer",
182
+ display_width_px: screenshotDimensions?.width ?? tool.display_width ?? tool.displayWidth ?? 1024,
183
+ display_height_px: screenshotDimensions?.height ?? tool.display_height ?? tool.displayHeight ?? 768
184
+ });
185
+ } else if (tool.type === "function") {
186
+ tools.push({
187
+ name: tool.name,
188
+ description: tool.description,
189
+ input_schema: tool.parameters
190
+ });
191
+ }
192
+ }
193
+
194
+ return tools;
195
+ }
196
+
197
+ #fromAnthropicResponse(data, context, payload = {}) {
198
+ const output = [];
199
+
200
+ for (const item of data.content ?? []) {
201
+ if (item.type === "text" && item.text) {
202
+ output.push(message(normalizeStructuredText(item.text, payload.text)));
203
+ }
204
+
205
+ if (item.type === "tool_use") {
206
+ if (item.name === "run_command") {
207
+ output.push(runCommandCall(item.input?.command ?? "", {
208
+ callId: item.id,
209
+ cwd: item.input?.cwd,
210
+ timeoutMs: item.input?.timeoutMs
211
+ }));
212
+ } else if (item.name === "computer") {
213
+ output.push(computerCall(mapAnthropicComputerAction(item.input), { callId: item.id }));
214
+ } else {
215
+ output.push({
216
+ type: "function_call",
217
+ name: item.name,
218
+ call_id: item.id,
219
+ arguments: JSON.stringify(item.input ?? {})
220
+ });
221
+ }
222
+ }
223
+ }
224
+
225
+ return response({
226
+ id: data.id ?? `anthropic_${Date.now()}`,
227
+ output
228
+ });
229
+ }
230
+ }
231
+
232
+ function compactMessagesForRequest(messages, context) {
233
+ if (context.surface !== "computer" || context.phase !== "continue") return messages;
234
+
235
+ const currentUser = messages.at(-1);
236
+ const latestAssistant = [...messages].reverse().find((item) => item?.role === "assistant");
237
+ const initialUser = messages.find((item) => item?.role === "user");
238
+ if (!currentUser || currentUser.role !== "user" || !latestAssistant || !initialUser) return messages;
239
+
240
+ return [textOnlyUserMessage(initialUser), latestAssistant, currentUser].filter(Boolean);
241
+ }
242
+
243
+ function compactMessagesForStorage(messages, context) {
244
+ if (context.surface !== "computer") return messages;
245
+
246
+ const latestAssistant = [...messages].reverse().find((item) => item?.role === "assistant");
247
+ const initialUser = messages.find((item) => item?.role === "user");
248
+ if (!latestAssistant || !initialUser) return messages;
249
+
250
+ return [textOnlyUserMessage(initialUser), latestAssistant].filter(Boolean);
251
+ }
252
+
253
+ function textOnlyUserMessage(message) {
254
+ const content = Array.isArray(message?.content)
255
+ ? message.content.filter((block) => block?.type === "text" && typeof block.text === "string")
256
+ : [];
257
+ if (content.length === 0) return null;
258
+ return { role: "user", content };
259
+ }
260
+
261
+ function imageDimensions(value) {
262
+ if (!value) return null;
263
+ const buffer = Buffer.from(value);
264
+ if (buffer.length < 24) return null;
265
+ if (
266
+ buffer[0] !== 0x89 ||
267
+ buffer[1] !== 0x50 ||
268
+ buffer[2] !== 0x4e ||
269
+ buffer[3] !== 0x47 ||
270
+ buffer[4] !== 0x0d ||
271
+ buffer[5] !== 0x0a ||
272
+ buffer[6] !== 0x1a ||
273
+ buffer[7] !== 0x0a
274
+ ) {
275
+ return null;
276
+ }
277
+
278
+ return {
279
+ width: buffer.readUInt32BE(16),
280
+ height: buffer.readUInt32BE(20)
281
+ };
282
+ }
283
+
284
+ function computerResultContent(item) {
285
+ const content = [];
286
+
287
+ if (item.current_url) {
288
+ content.push({ type: "text", text: `Current URL: ${item.current_url}` });
289
+ }
290
+
291
+ if (item.output?.image_url) {
292
+ content.push(imageBlock(item.output.image_url));
293
+ }
294
+
295
+ return content.length ? content : "Done.";
296
+ }
297
+
298
+ function imageBlock(imageUrl) {
299
+ const image = parseDataUrl(imageUrl);
300
+ return {
301
+ type: "image",
302
+ source: {
303
+ type: "base64",
304
+ media_type: image.mediaType,
305
+ data: image.base64
306
+ }
307
+ };
308
+ }
309
+
310
+ function mapAnthropicComputerAction(input = {}) {
311
+ const action = input.action ?? input.type;
312
+ const coordinate = input.coordinate ?? input.coordinates;
313
+ const x = input.x ?? coordinate?.[0] ?? 0;
314
+ const y = input.y ?? coordinate?.[1] ?? 0;
315
+
316
+ switch (action) {
317
+ case "left_click":
318
+ case "click":
319
+ return { type: "click", x, y, button: "left" };
320
+ case "right_click":
321
+ return { type: "click", x, y, button: "right" };
322
+ case "middle_click":
323
+ return { type: "click", x, y, button: "middle" };
324
+ case "double_click":
325
+ return { type: "double_click", x, y, button: "left" };
326
+ case "mouse_move":
327
+ return { type: "move", x, y };
328
+ case "type":
329
+ return { type: "type", text: input.text ?? "" };
330
+ case "key":
331
+ return { type: "keypress", keys: [input.text ?? input.key].filter(Boolean) };
332
+ case "scroll": {
333
+ const amount = input.scroll_amount ?? input.amount ?? 0;
334
+ const direction = input.scroll_direction ?? input.direction;
335
+ return {
336
+ type: "scroll",
337
+ x,
338
+ y,
339
+ scroll_x: direction === "left" ? -amount : direction === "right" ? amount : 0,
340
+ scroll_y: direction === "up" ? -amount : amount
341
+ };
342
+ }
343
+ case "screenshot":
344
+ return { type: "screenshot" };
345
+ case "wait":
346
+ return { type: "wait" };
347
+ default:
348
+ return input.type ? input : { ...input, type: action ?? "unknown" };
349
+ }
350
+ }
351
+
352
+ function removeUndefined(object) {
353
+ return Object.fromEntries(Object.entries(object).filter(([, value]) => value !== undefined));
354
+ }
355
+
356
+ function normalizeStructuredText(text, textConfig) {
357
+ const format = textConfig?.format;
358
+ if (!format || format.type === "text") return text;
359
+
360
+ return extractJsonText(text) ?? text;
361
+ }
362
+
363
+ function extractJsonText(text) {
364
+ if (typeof text !== "string") return null;
365
+
366
+ const trimmed = text.trim();
367
+ if (!trimmed) return null;
368
+
369
+ if (isJson(trimmed)) return trimmed;
370
+
371
+ for (const match of trimmed.matchAll(/```(?:json)?\s*([\s\S]*?)```/gi)) {
372
+ const candidate = match[1].trim();
373
+ if (isJson(candidate)) return candidate;
374
+ }
375
+
376
+ for (let index = 0; index < trimmed.length; index += 1) {
377
+ const char = trimmed[index];
378
+ if (char !== "{" && char !== "[") continue;
379
+
380
+ const candidate = balancedJsonCandidate(trimmed, index);
381
+ if (candidate && isJson(candidate)) return candidate;
382
+ }
383
+
384
+ return null;
385
+ }
386
+
387
+ function balancedJsonCandidate(text, start) {
388
+ const stack = [];
389
+ let inString = false;
390
+ let escaped = false;
391
+
392
+ for (let index = start; index < text.length; index += 1) {
393
+ const char = text[index];
394
+
395
+ if (inString) {
396
+ if (escaped) {
397
+ escaped = false;
398
+ } else if (char === "\\") {
399
+ escaped = true;
400
+ } else if (char === '"') {
401
+ inString = false;
402
+ }
403
+ continue;
404
+ }
405
+
406
+ if (char === '"') {
407
+ inString = true;
408
+ continue;
409
+ }
410
+
411
+ if (char === "{" || char === "[") {
412
+ stack.push(char);
413
+ continue;
414
+ }
415
+
416
+ if (char === "}" || char === "]") {
417
+ const open = stack.pop();
418
+ if ((char === "}" && open !== "{") || (char === "]" && open !== "[")) return null;
419
+ if (stack.length === 0) return text.slice(start, index + 1).trim();
420
+ }
421
+ }
422
+
423
+ return null;
424
+ }
425
+
426
+ function isJson(text) {
427
+ try {
428
+ JSON.parse(text);
429
+ return true;
430
+ } catch {
431
+ return false;
432
+ }
433
+ }
434
+
435
+ function parseJson(text) {
436
+ if (!text) return null;
437
+
438
+ try {
439
+ return JSON.parse(text);
440
+ } catch (error) {
441
+ throw new AutomifyError("Anthropic request returned invalid JSON.", { cause: error });
442
+ }
443
+ }
444
+
445
+ function assertModel(model) {
446
+ if (typeof model !== "string" || model.trim() === "") {
447
+ throw new AutomifyError("A model is required for Anthropic requests.");
448
+ }
449
+
450
+ return model;
451
+ }
@@ -0,0 +1,98 @@
1
+ export const argumentReference = [
2
+ {
3
+ surface: ".do()",
4
+ preferred: [
5
+ "data",
6
+ "evaluate",
7
+ "output",
8
+ "limits",
9
+ "request",
10
+ "safety",
11
+ "hooks",
12
+ "screenshots",
13
+ "screenshot",
14
+ "command"
15
+ ],
16
+ notes:
17
+ "Use data for structured JSON, evaluate for files the model should inspect directly, and command only on CLI surfaces."
18
+ },
19
+ {
20
+ surface: "automify.browser()",
21
+ preferred: [
22
+ "preset",
23
+ "startUrl",
24
+ "browser",
25
+ "viewport",
26
+ "launch",
27
+ "context",
28
+ "navigation",
29
+ "actionDelayMs",
30
+ "logFile"
31
+ ],
32
+ notes:
33
+ 'Use preset: "browser-review" for a high-detail, bounded browser inspection run. Use logFile to capture browser and automation events.'
34
+ },
35
+ {
36
+ surface: "automify.cli()",
37
+ preferred: ["preset", "command", "limits", "request", "hooks", "logFile"],
38
+ notes:
39
+ "Factory command policy is the default. Per-run command options override it for a single do() call. Use logFile to append CLI debug events as JSON Lines."
40
+ },
41
+ {
42
+ surface: "automify.dockerCli()",
43
+ preferred: [
44
+ "preset",
45
+ "container",
46
+ "additionalAptPackages",
47
+ "workdir",
48
+ "shared",
49
+ "sharedFiles",
50
+ "command",
51
+ "logFile"
52
+ ],
53
+ notes:
54
+ 'Use additionalAptPackages to apt-install Debian packages before commands run. Use preset: "repo" to mount the current workspace at /workspace and allow common repo commands. Use logFile to capture CLI and Docker container events.'
55
+ },
56
+ {
57
+ surface: "automify.dockerComputer()",
58
+ preferred: [
59
+ "preset",
60
+ "container",
61
+ "viewport",
62
+ "desktop",
63
+ "additionalAptPackages",
64
+ "shared",
65
+ "sharedFiles",
66
+ "logFile"
67
+ ],
68
+ notes:
69
+ "Creates a Docker-backed Linux desktop runner. Pass startupCommand or desktop.startupCommand to launch the initial app. Use additionalAptPackages to apt-install extra Debian packages. Use logFile to capture automation and Docker desktop events. Explicit container names are locked per name until close()."
70
+ },
71
+ {
72
+ surface: "automify.localComputer()",
73
+ preferred: ["viewport", "mouse", "keyboard", "calibration", "virtualDisplay", "logFile"],
74
+ notes:
75
+ "Creates a local desktop runner and takes an exclusive cross-process lock until close(). Use logFile to capture automation and local desktop events."
76
+ },
77
+ {
78
+ surface: "createLocalDesktopComputer()",
79
+ preferred: ["viewport", "mouse", "keyboard", "calibration", "virtualDisplay", "logFile"],
80
+ notes:
81
+ "Grouped mouse, keyboard, and calibration options are preferred over the older flat names. Use logFile to capture local desktop events. Local desktop control takes an exclusive cross-process lock until close()."
82
+ },
83
+ {
84
+ surface: "createDockerDesktopComputer()",
85
+ preferred: [
86
+ "preset",
87
+ "container",
88
+ "viewport",
89
+ "desktop",
90
+ "additionalAptPackages",
91
+ "shared",
92
+ "sharedFiles",
93
+ "logFile"
94
+ ],
95
+ notes:
96
+ "container controls Docker and resource limits; startupCommand or desktop.startupCommand is required; shared/sharedFiles control host file access. Use additionalAptPackages to apt-install extra Debian packages and logFile to capture Docker desktop events."
97
+ }
98
+ ];