@oagi/oagi 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,474 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Agent Execution Report</title>
7
+ <style>
8
+ body {
9
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
10
+ max-width: 1200px;
11
+ margin: 0 auto;
12
+ padding: 20px;
13
+ background: #f5f5f5;
14
+ }
15
+
16
+ h1 {
17
+ color: #333;
18
+ border-bottom: 2px solid #007bff;
19
+ padding-bottom: 10px;
20
+ }
21
+
22
+ .step, .plan {
23
+ background: white;
24
+ border-radius: 8px;
25
+ padding: 20px;
26
+ margin: 20px 0;
27
+ box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
28
+ }
29
+
30
+ .step h2 {
31
+ margin-top: 0;
32
+ color: #007bff;
33
+ }
34
+
35
+ .plan {
36
+ background: #e7f3ff;
37
+ }
38
+
39
+ .plan h3 {
40
+ margin-top: 0;
41
+ color: #0056b3;
42
+ }
43
+
44
+ .timestamp {
45
+ color: #666;
46
+ font-size: 0.9em;
47
+ }
48
+
49
+ .task-id, .request-id {
50
+ color: #666;
51
+ font-size: 0.9em;
52
+ margin-left: 10px;
53
+ }
54
+
55
+ .task-id code, .request-id code {
56
+ background: #e9ecef;
57
+ padding: 2px 6px;
58
+ border-radius: 3px;
59
+ font-family: monospace;
60
+ }
61
+
62
+ .screenshot-container {
63
+ position: relative;
64
+ display: inline-block;
65
+ margin: 10px 0;
66
+ }
67
+
68
+ .screenshot {
69
+ max-width: 100%;
70
+ border: 1px solid #ddd;
71
+ border-radius: 4px;
72
+ display: block;
73
+ }
74
+
75
+ .reasoning {
76
+ background: #f8f9fa;
77
+ padding: 10px;
78
+ border-left: 3px solid #007bff;
79
+ margin: 10px 0;
80
+ white-space: pre-wrap;
81
+ }
82
+
83
+ .actions {
84
+ margin: 10px 0;
85
+ }
86
+
87
+ .actions ul {
88
+ margin: 5px 0;
89
+ padding-left: 20px;
90
+ }
91
+
92
+ .actions code {
93
+ background: #e9ecef;
94
+ padding: 2px 6px;
95
+ border-radius: 3px;
96
+ }
97
+
98
+ .complete {
99
+ background: #d4edda;
100
+ color: #155724;
101
+ padding: 10px;
102
+ border-radius: 4px;
103
+ margin-top: 10px;
104
+ }
105
+
106
+ .action-result {
107
+ padding: 10px;
108
+ margin: 5px 0;
109
+ }
110
+
111
+ .success {
112
+ color: #155724;
113
+ }
114
+
115
+ .error {
116
+ color: #721c24;
117
+ background: #f8d7da;
118
+ padding: 10px;
119
+ border-radius: 4px;
120
+ }
121
+
122
+ .log {
123
+ background: #fff3cd;
124
+ padding: 10px;
125
+ margin: 10px 0;
126
+ border-radius: 4px;
127
+ }
128
+
129
+ .split {
130
+ text-align: center;
131
+ margin: 30px 0;
132
+ }
133
+
134
+ .split h3 {
135
+ color: #666;
136
+ }
137
+
138
+ .split-line {
139
+ border: none;
140
+ border-top: 2px dashed #ccc;
141
+ margin: 30px 0;
142
+ }
143
+
144
+ .url {
145
+ word-break: break-all;
146
+ }
147
+
148
+ .plan-result {
149
+ background: #d1ecf1;
150
+ color: #0c5460;
151
+ padding: 10px;
152
+ border-radius: 4px;
153
+ margin-top: 10px;
154
+ }
155
+
156
+ /* Cursor indicators */
157
+ .click-indicator {
158
+ position: absolute;
159
+ width: 20px;
160
+ height: 20px;
161
+ border-radius: 50%;
162
+ background: rgba(255, 0, 0, 0.8);
163
+ border: 2px solid #fff;
164
+ box-shadow: 0 0 10px rgba(255, 0, 0, 0.6);
165
+ transform: translate(-50%, -50%);
166
+ pointer-events: none;
167
+ z-index: 10;
168
+ }
169
+
170
+ .drag-indicator {
171
+ position: absolute;
172
+ width: 20px;
173
+ height: 20px;
174
+ border-radius: 50%;
175
+ border: 2px solid #fff;
176
+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.3);
177
+ transform: translate(-50%, -50%);
178
+ pointer-events: none;
179
+ z-index: 10;
180
+ }
181
+
182
+ .drag-start {
183
+ background: rgba(0, 255, 0, 0.8);
184
+ box-shadow: 0 0 10px rgba(0, 255, 0, 0.6);
185
+ }
186
+
187
+ .drag-end {
188
+ background: rgba(255, 0, 0, 0.8);
189
+ box-shadow: 0 0 10px rgba(255, 0, 0, 0.6);
190
+ }
191
+
192
+ .drag-line {
193
+ position: absolute;
194
+ background: rgba(255, 255, 0, 0.8);
195
+ height: 3px;
196
+ border-radius: 2px;
197
+ box-shadow: 0 0 5px rgba(255, 255, 0, 0.4);
198
+ pointer-events: none;
199
+ z-index: 9;
200
+ transform-origin: left center;
201
+ }
202
+
203
+ .scroll-indicator {
204
+ position: absolute;
205
+ width: 40px;
206
+ height: 40px;
207
+ border-radius: 50%;
208
+ background: rgba(138, 43, 226, 0.9);
209
+ border: 2px solid #fff;
210
+ box-shadow: 0 0 15px rgba(138, 43, 226, 0.6);
211
+ transform: translate(-50%, -50%);
212
+ pointer-events: none;
213
+ z-index: 10;
214
+ display: flex;
215
+ align-items: center;
216
+ justify-content: center;
217
+ font-size: 20px;
218
+ color: white;
219
+ font-weight: bold;
220
+ }
221
+ </style>
222
+ </head>
223
+ <body>
224
+ <h1>Agent Execution Report</h1>
225
+ <div id="content"></div>
226
+
227
+ <script>
228
+ const eventsData = {EVENTS_DATA};
229
+
230
+ function escapeHtml(text) {
231
+ const div = document.createElement('div');
232
+ div.textContent = text;
233
+ return div.innerHTML;
234
+ }
235
+
236
+ function removeIndicators(container) {
237
+ container.querySelectorAll('.click-indicator, .drag-indicator, .drag-line, .scroll-indicator')
238
+ .forEach(el => el.remove());
239
+ }
240
+
241
+ function addIndicators(container) {
242
+ const img = container.querySelector('.screenshot');
243
+ const actionsAttr = container.getAttribute('data-actions');
244
+ if (!actionsAttr || !img) return;
245
+
246
+ // Remove existing indicators before adding new ones
247
+ removeIndicators(container);
248
+
249
+ const actions = JSON.parse(actionsAttr);
250
+ const imgWidth = img.offsetWidth;
251
+ const imgHeight = img.offsetHeight;
252
+
253
+ actions.forEach(action => {
254
+ switch (action.type) {
255
+ case 'click':
256
+ addClickIndicator(container, action, imgWidth, imgHeight);
257
+ break;
258
+ case 'drag':
259
+ addDragIndicator(container, action, imgWidth, imgHeight);
260
+ break;
261
+ case 'scroll':
262
+ addScrollIndicator(container, action, imgWidth, imgHeight);
263
+ break;
264
+ }
265
+ });
266
+ }
267
+
268
+ // Recalculate indicator positions on window resize
269
+ let resizeTimeout;
270
+ window.addEventListener('resize', function () {
271
+ clearTimeout(resizeTimeout);
272
+ resizeTimeout = setTimeout(function () {
273
+ document.querySelectorAll('.screenshot-container').forEach(container => {
274
+ addIndicators(container);
275
+ });
276
+ }, 100);
277
+ });
278
+
279
+ function addClickIndicator(container, action, imgWidth, imgHeight) {
280
+ const x = (action.x / 1000) * imgWidth;
281
+ const y = (action.y / 1000) * imgHeight;
282
+
283
+ const indicator = document.createElement('div');
284
+ indicator.className = 'click-indicator';
285
+ indicator.style.left = x + 'px';
286
+ indicator.style.top = y + 'px';
287
+ container.appendChild(indicator);
288
+ }
289
+
290
+ function addDragIndicator(container, action, imgWidth, imgHeight) {
291
+ const x1 = (action.x1 / 1000) * imgWidth;
292
+ const y1 = (action.y1 / 1000) * imgHeight;
293
+ const x2 = (action.x2 / 1000) * imgWidth;
294
+ const y2 = (action.y2 / 1000) * imgHeight;
295
+
296
+ const startIndicator = document.createElement('div');
297
+ startIndicator.className = 'drag-indicator drag-start';
298
+ startIndicator.style.left = x1 + 'px';
299
+ startIndicator.style.top = y1 + 'px';
300
+ container.appendChild(startIndicator);
301
+
302
+ const endIndicator = document.createElement('div');
303
+ endIndicator.className = 'drag-indicator drag-end';
304
+ endIndicator.style.left = x2 + 'px';
305
+ endIndicator.style.top = y2 + 'px';
306
+ container.appendChild(endIndicator);
307
+
308
+ const line = document.createElement('div');
309
+ line.className = 'drag-line';
310
+ const deltaX = x2 - x1;
311
+ const deltaY = y2 - y1;
312
+ const length = Math.sqrt(deltaX * deltaX + deltaY * deltaY);
313
+ const angle = Math.atan2(deltaY, deltaX) * (180 / Math.PI);
314
+ line.style.left = x1 + 'px';
315
+ line.style.top = y1 + 'px';
316
+ line.style.width = length + 'px';
317
+ line.style.transform = 'rotate(' + angle + 'deg)';
318
+ container.appendChild(line);
319
+ }
320
+
321
+ function addScrollIndicator(container, action, imgWidth, imgHeight) {
322
+ const x = (action.x / 1000) * imgWidth;
323
+ const y = (action.y / 1000) * imgHeight;
324
+
325
+ const indicator = document.createElement('div');
326
+ indicator.className = 'scroll-indicator';
327
+ indicator.style.left = x + 'px';
328
+ indicator.style.top = y + 'px';
329
+
330
+ if (action.direction === 'up') {
331
+ indicator.innerHTML = '&#8593;';
332
+ indicator.title = 'Scroll Up';
333
+ } else if (action.direction === 'down') {
334
+ indicator.innerHTML = '&#8595;';
335
+ indicator.title = 'Scroll Down';
336
+ } else {
337
+ indicator.innerHTML = '&#8597;';
338
+ indicator.title = 'Scroll';
339
+ }
340
+ container.appendChild(indicator);
341
+ }
342
+
343
+ function renderEvents() {
344
+ const content = document.getElementById('content');
345
+ let html = '';
346
+
347
+ eventsData.forEach(event => {
348
+ const timestamp = event.timestamp;
349
+
350
+ switch (event.event_type) {
351
+ case 'step':
352
+ html += '<div class="step">';
353
+ html += `<h2>Step ${event.step_num}</h2>`;
354
+ html += `<span class="timestamp">${timestamp}</span>`;
355
+ if (event.task_id) {
356
+ html += ` <span class="task-id">Task ID: <code>${event.task_id}</code></span>`;
357
+ }
358
+
359
+ if (event.image) {
360
+ const actionsJson = JSON.stringify(event.action_coords || []).replace(/"/g, '&quot;');
361
+ html += `<div class="screenshot-container" data-actions="${actionsJson}">`;
362
+ if (event.image.startsWith('data:') || event.image.startsWith('http')) {
363
+ html += `<img src="${event.image}" alt="Step ${event.step_num}" class="screenshot"/>`;
364
+ } else {
365
+ html += `<img src="data:image/png;base64,${event.image}" alt="Step ${event.step_num}" class="screenshot"/>`;
366
+ }
367
+ html += '</div>';
368
+ }
369
+
370
+ if (event.reason) {
371
+ html += '<div class="reasoning">';
372
+ html += `<strong>Reasoning:</strong><p>${escapeHtml(event.reason)}</p>`;
373
+ html += '</div>';
374
+ }
375
+
376
+ if (event.actions && event.actions.length > 0) {
377
+ html += '<div class="actions"><strong>Planned Actions:</strong><ul>';
378
+ event.actions.forEach(action => {
379
+ const countStr = action.count > 1 ? ` (x${action.count})` : '';
380
+ html += `<li><code>${action.type}</code>: ${escapeHtml(action.argument)}${countStr}</li>`;
381
+ });
382
+ html += '</ul></div>';
383
+ }
384
+
385
+ if (event.stop) {
386
+ html += '<div class="complete">Task Complete</div>';
387
+ }
388
+ html += '</div>';
389
+ break;
390
+
391
+ case 'action':
392
+ html += '<div class="action-result">';
393
+ html += `<span class="timestamp">${timestamp}</span>`;
394
+ if (event.error) {
395
+ html += `<div class="error">Error: ${escapeHtml(event.error)}</div>`;
396
+ } else {
397
+ html += '<div class="success">Actions executed successfully</div>';
398
+ }
399
+ html += '</div>';
400
+ break;
401
+
402
+ case 'log':
403
+ html += '<div class="log">';
404
+ html += `<span class="timestamp">${timestamp}</span>`;
405
+ html += `<p>${escapeHtml(event.message)}</p>`;
406
+ html += '</div>';
407
+ break;
408
+
409
+ case 'split':
410
+ if (event.label) {
411
+ html += `<div class="split"><h3>${escapeHtml(event.label)}</h3></div>`;
412
+ } else {
413
+ html += '<hr class="split-line"/>';
414
+ }
415
+ break;
416
+
417
+ case 'plan':
418
+ const phaseTitles = {
419
+ 'initial': 'Initial Planning',
420
+ 'reflection': 'Reflection',
421
+ 'summary': 'Summary'
422
+ };
423
+ const phaseTitle = phaseTitles[event.phase] || event.phase;
424
+
425
+ html += '<div class="plan">';
426
+ html += `<h3>${phaseTitle}</h3>`;
427
+ html += `<span class="timestamp">${timestamp}</span>`;
428
+ if (event.request_id) {
429
+ html += ` <span class="request-id">Request ID: <code>${event.request_id}</code></span>`;
430
+ }
431
+
432
+ if (event.image) {
433
+ html += '<div class="screenshot-container">';
434
+ if (event.image.startsWith('data:') || event.image.startsWith('http')) {
435
+ html += `<img src="${event.image}" alt="${phaseTitle}" class="screenshot"/>`;
436
+ } else {
437
+ html += `<img src="data:image/png;base64,${event.image}" alt="${phaseTitle}" class="screenshot"/>`;
438
+ }
439
+ html += '</div>';
440
+ }
441
+
442
+ if (event.reasoning) {
443
+ html += '<div class="reasoning">';
444
+ html += `<strong>Reasoning:</strong><p>${escapeHtml(event.reasoning)}</p>`;
445
+ html += '</div>';
446
+ }
447
+
448
+ if (event.result) {
449
+ html += `<div class="plan-result"><strong>Result:</strong> ${escapeHtml(event.result)}</div>`;
450
+ }
451
+ html += '</div>';
452
+ break;
453
+ }
454
+ });
455
+
456
+ content.innerHTML = html;
457
+
458
+ // Add cursor indicators after images load
459
+ document.querySelectorAll('.screenshot-container').forEach(container => {
460
+ const img = container.querySelector('.screenshot');
461
+ if (img) {
462
+ if (img.complete) {
463
+ addIndicators(container);
464
+ } else {
465
+ img.onload = () => addIndicators(container);
466
+ }
467
+ }
468
+ });
469
+ }
470
+
471
+ document.addEventListener('DOMContentLoaded', renderEvents);
472
+ </script>
473
+ </body>
474
+ </html>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oagi/oagi",
3
- "version": "0.1.4",
3
+ "version": "0.1.5",
4
4
  "description": "Official API of OpenAGI Foundation",
5
5
  "license": "MIT",
6
6
  "homepage": "https://agiopen.org/",
@@ -43,10 +43,12 @@
43
43
  "husky": "^9.1.7",
44
44
  "prettier": "3.7.4",
45
45
  "tsup": "^8.5.1",
46
+ "tsx": "^4.21.0",
46
47
  "typescript": "^5.9.3"
47
48
  },
48
49
  "scripts": {
49
50
  "build": "tsup",
50
- "start": "node dist/cli.js"
51
+ "start": "node dist/cli.js",
52
+ "dev": "tsx src/cli.ts"
51
53
  }
52
54
  }
package/README.md DELETED
@@ -1,154 +0,0 @@
1
- # OAGI TypeScript SDK
2
-
3
- TypeScript SDK for the OAGI API - vision-based task automation.
4
-
5
- ## What is OAGI?
6
-
7
- OAGI is the TypeScript SDK for **Lux**, the world's most advanced computer-use model from the OpenAGI Foundation.
8
-
9
- **Computer Use** is AI's ability to operate human-facing software — not just through APIs, but by operating computers natively, just as human users do. It's a paradigm shift in what AI can do: not just generating, reasoning, or researching, but actually operating on your computer.
10
-
11
- Lux comes in three modes, giving you control over depth, speed, and style of execution:
12
-
13
- - **Tasker** — Strictly follows step-by-step instructions with ultra-stable, controllable execution
14
- - **Actor** — Ideal for immediate tasks, completing actions at near-instant speed
15
- - **Thinker** — Understands vague, complex goals, performing hour-long executions
16
-
17
- ### Use Cases
18
-
19
- With Lux, possibilities are endless. Here are a few examples:
20
-
21
- - **Web Scraping & Data Crawl** — Navigate websites, sort results, and collect product information autonomously
22
- - **Software QA** — Automate repetitive testing tasks, navigate applications, perform test actions, and validate expected behaviors
23
- - **Financial Data Extraction** — Navigate to sites like NASDAQ and extract insider activity data
24
- - **Data Entry** — Enter accurate data across dashboards and forms
25
- - **Workflow Automation** — Chain together multi-step tasks across different applications
26
-
27
- ## Table of Contents
28
-
29
- - [OAGI TypeScript SDK](#oagi-typescript-sdk)
30
- - [What is OAGI?](#what-is-oagi)
31
- - [Use Cases](#use-cases)
32
- - [Table of Contents](#table-of-contents)
33
- - [Installation](#installation)
34
- - [Quick Start](#quick-start)
35
- - [Automated Task Execution](#automated-task-execution)
36
- - [Command Line Interface](#command-line-interface)
37
- - [Image Processing](#image-processing)
38
- - [Manual Control with Actor](#manual-control-with-actor)
39
- - [Documentation](#documentation)
40
- - [License](#license)
41
-
42
- ## Installation
43
-
44
- ```bash
45
- # If you are using Node.js
46
- npm install @oagi/oagi
47
- yarn add @oagi/oagi
48
- pnpm add @oagi/oagi
49
-
50
- # If you are using Deno
51
- deno add npm:@oagi/oagi
52
-
53
- # If you are using Bun
54
- bun install @oagi/oagi
55
- ```
56
-
57
- ## Quick Start
58
-
59
- Set your API credentials:
60
- ```bash
61
- export OAGI_API_KEY="your-api-key" # get your API key from https://developer.agiopen.org/
62
- # export OAGI_BASE_URL="https://api.agiopen.org/", # optional, defaults to production endpoint
63
- ```
64
-
65
- ### Automated Task Execution
66
-
67
- Run tasks automatically with screenshot capture and action execution:
68
-
69
- ```typescript
70
- import { DefaultActionHandler, DefaultAgent, ScreenshotMaker } from '@oagi/oagi';
71
-
72
- const agent = new DefaultAgent();
73
- await agent.execute(
74
- 'Search weather on Google',
75
- new DefaultActionHandler(),
76
- new ScreenshotMaker(),
77
- );
78
- ```
79
-
80
- ### Command Line Interface
81
-
82
- Run agents directly from the terminal:
83
-
84
- ```bash
85
- # Run with actor model
86
- oagi agent run "Go to nasdaq.com, search for AAPL. Under More, go to Insider Activity" --model lux-actor-1
87
-
88
- # Run with thinker mode (uses lux-thinker-1 model with more steps)
89
- oagi agent run "Look up the store hours for the nearest Apple Store to zip code 23456 using the Apple Store Locator" --model lux-thinker-1
90
-
91
- # Run pre-configured tasker workflows (no instruction needed)
92
- oagi agent run --mode tasker:software_qa
93
-
94
- # List all available modes
95
- oagi agent modes
96
-
97
- # Check macOS permissions (screen recording & accessibility)
98
- oagi agent permission
99
-
100
- # Export execution history
101
- oagi agent run "Complete the form" --export html --export-file report.html
102
- ```
103
-
104
- CLI options:
105
- - `--mode`: Agent mode (default: actor). Use `oagi agent modes` to list available modes
106
- - `--model`: Override the model (default: determined by mode)
107
- - `--max-steps`: Maximum steps (default: determined by mode)
108
- - `--temperature`: Sampling temperature (default: determined by mode)
109
- - `--step-delay`: Delay after each action before next screenshot (default: 0.3s)
110
- - `--export`: Export format (markdown, html, json)
111
- - `--export-file`: Output file path for export
112
-
113
- ### Image Processing
114
-
115
- Process and optimize images before sending to API:
116
-
117
- ```typescript
118
- import sharp from 'sharp';
119
-
120
- const compressed = await sharp('large_screenshot.png')
121
- .resize(1260, 700, { fit: 'fill' })
122
- .jpeg({ quality: 85 })
123
- .toBuffer();
124
- ```
125
-
126
- ### Manual Control with Actor
127
-
128
- For step-by-step control over task execution:
129
-
130
- ```typescript
131
- import { Actor, DefaultActionHandler, ScreenshotMaker } from '@oagi/oagi';
132
-
133
- const actor = new Actor();
134
- actor.initTask('Complete the form');
135
- const image_provider = new ScreenshotMaker();
136
- const action_handler = new DefaultActionHandler();
137
-
138
- for (let i = 0; i < 10; ++i) {
139
- const image = await image_provider.provide();
140
- const step = await actor.step(image);
141
-
142
- if (step.stop) break;
143
-
144
- await action_handler.handle(step.actions);
145
- }
146
- ```
147
-
148
- ## Documentation
149
-
150
- For full Lux documentation and guides, visit the [OAGI Developer Documentation](https://developer.agiopen.org/docs/index).
151
-
152
- ## License
153
-
154
- MIT