otoro-cli 1.3.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/otoro.js +29 -0
- package/lib/agent.js +21 -0
- package/lib/computer.js +74 -0
- package/lib/screen.js +281 -0
- package/package.json +1 -1
package/bin/otoro.js
CHANGED
|
@@ -68,6 +68,35 @@ program
|
|
|
68
68
|
await generateImage(prompt.join(' '))
|
|
69
69
|
})
|
|
70
70
|
|
|
71
|
+
program
|
|
72
|
+
.command('computer <task...>')
|
|
73
|
+
.alias('use')
|
|
74
|
+
.description('Computer Use — Otoro sees your screen and controls mouse/keyboard')
|
|
75
|
+
.action(async (task) => {
|
|
76
|
+
requireAuth()
|
|
77
|
+
const { runComputerTask } = require('../lib/computer')
|
|
78
|
+
await runComputerTask(task.join(' '))
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
program
|
|
82
|
+
.command('screen')
|
|
83
|
+
.description('Take a screenshot and describe what Otoro sees')
|
|
84
|
+
.action(async () => {
|
|
85
|
+
requireAuth()
|
|
86
|
+
const { analyzeScreen } = require('../lib/screen')
|
|
87
|
+
const chalk = require('chalk')
|
|
88
|
+
const ora = require('ora')
|
|
89
|
+
const spinner = ora({ text: chalk.gray('Looking at screen...'), color: 'cyan' }).start()
|
|
90
|
+
const result = await analyzeScreen()
|
|
91
|
+
if (result.success) {
|
|
92
|
+
spinner.succeed(chalk.green('Screen analyzed'))
|
|
93
|
+
console.log(chalk.cyan('\n What Otoro sees:\n'))
|
|
94
|
+
console.log(' ' + result.description.split('\n').join('\n ') + '\n')
|
|
95
|
+
} else {
|
|
96
|
+
spinner.fail(chalk.red(result.error))
|
|
97
|
+
}
|
|
98
|
+
})
|
|
99
|
+
|
|
71
100
|
program
|
|
72
101
|
.command('start')
|
|
73
102
|
.description('Start Otoro agent daemon — connects to server for remote tasks')
|
package/lib/agent.js
CHANGED
|
@@ -4,6 +4,8 @@ const path = require('path')
|
|
|
4
4
|
const chalk = require('chalk')
|
|
5
5
|
const { getConfig } = require('./config')
|
|
6
6
|
const { readFile, writeFile, editFile, listFiles, runCommand, searchCode, openApp, openUrl, getSystemInfo, takeScreenshot } = require('./tools')
|
|
7
|
+
const { mouseClick, mouseMove, typeText, pressKey, analyzeScreen } = require('./screen')
|
|
8
|
+
const { runComputerTask } = require('./computer')
|
|
7
9
|
const { chatCompletion } = require('./api')
|
|
8
10
|
|
|
9
11
|
class OtoroAgent {
|
|
@@ -132,6 +134,25 @@ class OtoroAgent {
|
|
|
132
134
|
case 'system_info':
|
|
133
135
|
result = getSystemInfo()
|
|
134
136
|
break
|
|
137
|
+
case 'mouse_click':
|
|
138
|
+
result = mouseClick(payload.x, payload.y, payload.button)
|
|
139
|
+
break
|
|
140
|
+
case 'mouse_move':
|
|
141
|
+
result = mouseMove(payload.x, payload.y)
|
|
142
|
+
break
|
|
143
|
+
case 'type_text':
|
|
144
|
+
result = typeText(payload.text)
|
|
145
|
+
break
|
|
146
|
+
case 'press_key':
|
|
147
|
+
result = pressKey(payload.key)
|
|
148
|
+
break
|
|
149
|
+
case 'analyze_screen':
|
|
150
|
+
result = await analyzeScreen(payload.question)
|
|
151
|
+
break
|
|
152
|
+
case 'computer_use':
|
|
153
|
+
console.log(chalk.cyan(` 🖥️ Computer Use: ${payload.task}`))
|
|
154
|
+
result = await runComputerTask(payload.task, payload.max_steps || 10)
|
|
155
|
+
break
|
|
135
156
|
case 'list_files':
|
|
136
157
|
result = listFiles(payload.dir || '.', payload.pattern || '')
|
|
137
158
|
break
|
package/lib/computer.js
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
const chalk = require('chalk')
|
|
2
|
+
const ora = require('ora')
|
|
3
|
+
const { analyzeScreen, computerUseStep, executeComputerActions, takeScreenshot } = require('./screen')
|
|
4
|
+
|
|
5
|
+
async function runComputerTask(task, maxSteps = 15) {
|
|
6
|
+
console.log(chalk.cyan.bold('\n 🐙 Otoro Computer Use\n'))
|
|
7
|
+
console.log(chalk.gray(` Task: ${task}`))
|
|
8
|
+
console.log(chalk.gray(` Platform: ${process.platform}\n`))
|
|
9
|
+
|
|
10
|
+
// Step 1: See the screen
|
|
11
|
+
let spinner = ora({ text: chalk.gray('Looking at screen...'), color: 'cyan' }).start()
|
|
12
|
+
let screen = await analyzeScreen()
|
|
13
|
+
if (!screen.success) {
|
|
14
|
+
spinner.fail(chalk.red(`Can't see screen: ${screen.error}`))
|
|
15
|
+
console.log(chalk.yellow('\n Tips:'))
|
|
16
|
+
if (process.platform === 'linux') console.log(chalk.gray(' • Install scrot or gnome-screenshot: sudo apt install scrot'))
|
|
17
|
+
if (process.platform === 'darwin') console.log(chalk.gray(' • Grant Screen Recording permission: System Settings → Privacy → Screen Recording'))
|
|
18
|
+
return
|
|
19
|
+
}
|
|
20
|
+
spinner.succeed(chalk.green('Screen captured'))
|
|
21
|
+
console.log(chalk.gray(` I see: ${screen.description.slice(0, 150)}...\n`))
|
|
22
|
+
|
|
23
|
+
// Step 2: Loop — AI sees screen, decides action, executes, repeat
|
|
24
|
+
for (let step = 1; step <= maxSteps; step++) {
|
|
25
|
+
console.log(chalk.cyan(` Step ${step}/${maxSteps}`))
|
|
26
|
+
|
|
27
|
+
spinner = ora({ text: chalk.gray('Deciding action...'), color: 'cyan' }).start()
|
|
28
|
+
const aiResponse = await computerUseStep(task, screen.description)
|
|
29
|
+
spinner.stop()
|
|
30
|
+
|
|
31
|
+
if (!aiResponse) {
|
|
32
|
+
console.log(chalk.red(' No response from AI'))
|
|
33
|
+
break
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Show what Otoro is thinking (strip action tags for display)
|
|
37
|
+
const thinking = aiResponse.replace(/<action:[^>]*\/>/g, '').trim()
|
|
38
|
+
if (thinking) console.log(chalk.gray(` ${thinking.split('\n')[0].slice(0, 100)}`))
|
|
39
|
+
|
|
40
|
+
// Execute actions
|
|
41
|
+
const { results, isDone } = await executeComputerActions(aiResponse)
|
|
42
|
+
|
|
43
|
+
for (const r of results) {
|
|
44
|
+
if (r.success) {
|
|
45
|
+
if (r.x !== undefined) console.log(chalk.green(` ✓ Click (${r.x}, ${r.y})`))
|
|
46
|
+
else if (r.typed) console.log(chalk.green(` ✓ Typed ${r.typed}`))
|
|
47
|
+
else if (r.key) console.log(chalk.green(` ✓ Pressed ${r.key}`))
|
|
48
|
+
else if (r.waited) console.log(chalk.green(` ✓ Waited ${r.waited}`))
|
|
49
|
+
} else {
|
|
50
|
+
console.log(chalk.red(` ✗ ${r.error}`))
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (isDone) {
|
|
55
|
+
console.log(chalk.green.bold('\n ✓ Task complete!\n'))
|
|
56
|
+
return
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Wait a moment then re-capture screen
|
|
60
|
+
await new Promise(r => setTimeout(r, 1000))
|
|
61
|
+
spinner = ora({ text: chalk.gray('Looking at updated screen...'), color: 'cyan' }).start()
|
|
62
|
+
screen = await analyzeScreen()
|
|
63
|
+
spinner.stop()
|
|
64
|
+
|
|
65
|
+
if (screen.success) {
|
|
66
|
+
console.log(chalk.gray(` Screen: ${screen.description.slice(0, 100)}...`))
|
|
67
|
+
}
|
|
68
|
+
console.log()
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
console.log(chalk.yellow(`\n Reached max steps (${maxSteps}). Task may be incomplete.\n`))
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
module.exports = { runComputerTask }
|
package/lib/screen.js
ADDED
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
const { execSync, exec } = require('child_process')
|
|
2
|
+
const fs = require('fs')
|
|
3
|
+
const path = require('path')
|
|
4
|
+
const os = require('os')
|
|
5
|
+
const chalk = require('chalk')
|
|
6
|
+
const http = require('http')
|
|
7
|
+
const { getConfig } = require('./config')
|
|
8
|
+
|
|
9
|
+
const SCREENSHOT_DIR = path.join(os.tmpdir(), 'otoro-screenshots')
|
|
10
|
+
fs.mkdirSync(SCREENSHOT_DIR, { recursive: true })
|
|
11
|
+
|
|
12
|
+
// ─── Screenshots ──────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
function takeScreenshot() {
|
|
15
|
+
const platform = process.platform
|
|
16
|
+
const file = path.join(SCREENSHOT_DIR, `screen-${Date.now()}.png`)
|
|
17
|
+
try {
|
|
18
|
+
if (platform === 'darwin') {
|
|
19
|
+
execSync(`screencapture -x "${file}"`, { timeout: 5000 })
|
|
20
|
+
} else if (platform === 'win32') {
|
|
21
|
+
// PowerShell screenshot
|
|
22
|
+
execSync(`powershell -command "Add-Type -AssemblyName System.Windows.Forms; $screen = [System.Windows.Forms.Screen]::PrimaryScreen.Bounds; $bitmap = New-Object System.Drawing.Bitmap($screen.Width, $screen.Height); $graphics = [System.Drawing.Graphics]::FromImage($bitmap); $graphics.CopyFromScreen(0, 0, 0, 0, $screen.Size); $bitmap.Save('${file.replace(/\\/g, '\\\\')}'); $graphics.Dispose(); $bitmap.Dispose()"`, { timeout: 10000 })
|
|
23
|
+
} else {
|
|
24
|
+
// Linux — try multiple tools
|
|
25
|
+
try { execSync(`gnome-screenshot -f "${file}" 2>/dev/null`, { timeout: 5000 }) }
|
|
26
|
+
catch {
|
|
27
|
+
try { execSync(`scrot "${file}" 2>/dev/null`, { timeout: 5000 }) }
|
|
28
|
+
catch { execSync(`import -window root "${file}" 2>/dev/null`, { timeout: 5000 }) }
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
if (fs.existsSync(file)) return { success: true, path: file, size: fs.statSync(file).size }
|
|
32
|
+
return { success: false, error: 'Screenshot not created' }
|
|
33
|
+
} catch (e) {
|
|
34
|
+
return { success: false, error: e.message }
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// ─── Mouse Control ────────────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
function mouseClick(x, y, button = 'left') {
|
|
41
|
+
const platform = process.platform
|
|
42
|
+
try {
|
|
43
|
+
if (platform === 'darwin') {
|
|
44
|
+
const btn = button === 'right' ? 'rc' : 'c'
|
|
45
|
+
execSync(`osascript -e 'tell application "System Events" to click at {${x}, ${y}}'`, { timeout: 3000 })
|
|
46
|
+
} else if (platform === 'win32') {
|
|
47
|
+
const btnCode = button === 'right' ? '$right = $true' : ''
|
|
48
|
+
execSync(`powershell -command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${x}, ${y}); Add-Type -MemberDefinition '[DllImport(\\\"user32.dll\\\")] public static extern void mouse_event(int f,int x,int y,int d,int i);' -Name U -Namespace W; [W.U]::mouse_event(${button === 'right' ? '0x0008' : '0x0002'},0,0,0,0); [W.U]::mouse_event(${button === 'right' ? '0x0010' : '0x0004'},0,0,0,0)"`, { timeout: 5000 })
|
|
49
|
+
} else {
|
|
50
|
+
execSync(`xdotool mousemove ${x} ${y} click ${button === 'right' ? '3' : '1'}`, { timeout: 3000 })
|
|
51
|
+
}
|
|
52
|
+
return { success: true, x, y, button }
|
|
53
|
+
} catch (e) {
|
|
54
|
+
return { success: false, error: e.message }
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
function mouseMove(x, y) {
|
|
59
|
+
const platform = process.platform
|
|
60
|
+
try {
|
|
61
|
+
if (platform === 'darwin') {
|
|
62
|
+
execSync(`osascript -e 'tell application "System Events" to set position of cursor to {${x}, ${y}}'`, { timeout: 3000 })
|
|
63
|
+
} else if (platform === 'win32') {
|
|
64
|
+
execSync(`powershell -command "[System.Windows.Forms.Cursor]::Position = New-Object System.Drawing.Point(${x}, ${y})"`, { timeout: 3000 })
|
|
65
|
+
} else {
|
|
66
|
+
execSync(`xdotool mousemove ${x} ${y}`, { timeout: 3000 })
|
|
67
|
+
}
|
|
68
|
+
return { success: true, x, y }
|
|
69
|
+
} catch (e) {
|
|
70
|
+
return { success: false, error: e.message }
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ─── Keyboard Control ─────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
function typeText(text) {
|
|
77
|
+
const platform = process.platform
|
|
78
|
+
try {
|
|
79
|
+
if (platform === 'darwin') {
|
|
80
|
+
// Escape special chars for AppleScript
|
|
81
|
+
const escaped = text.replace(/\\/g, '\\\\').replace(/"/g, '\\"')
|
|
82
|
+
execSync(`osascript -e 'tell application "System Events" to keystroke "${escaped}"'`, { timeout: 5000 })
|
|
83
|
+
} else if (platform === 'win32') {
|
|
84
|
+
const escaped = text.replace(/'/g, "''")
|
|
85
|
+
execSync(`powershell -command "Add-Type -AssemblyName System.Windows.Forms; [System.Windows.Forms.SendKeys]::SendWait('${escaped}')"`, { timeout: 5000 })
|
|
86
|
+
} else {
|
|
87
|
+
execSync(`xdotool type --clearmodifiers "${text.replace(/"/g, '\\"')}"`, { timeout: 5000 })
|
|
88
|
+
}
|
|
89
|
+
return { success: true, typed: text.length + ' chars' }
|
|
90
|
+
} catch (e) {
|
|
91
|
+
return { success: false, error: e.message }
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function pressKey(key) {
|
|
96
|
+
// key: "enter", "tab", "escape", "backspace", "ctrl+c", "cmd+s", etc.
|
|
97
|
+
const platform = process.platform
|
|
98
|
+
try {
|
|
99
|
+
if (platform === 'darwin') {
|
|
100
|
+
const keyMap = { enter: 'return', tab: 'tab', escape: 'escape', backspace: 'delete', space: 'space' }
|
|
101
|
+
const mapped = keyMap[key.toLowerCase()] || key.toLowerCase()
|
|
102
|
+
if (key.includes('+')) {
|
|
103
|
+
const [mod, k] = key.split('+')
|
|
104
|
+
const modMap = { ctrl: 'control', cmd: 'command', alt: 'option', shift: 'shift' }
|
|
105
|
+
execSync(`osascript -e 'tell application "System Events" to key code 0 using {${modMap[mod] || mod} down}'`, { timeout: 3000 })
|
|
106
|
+
} else {
|
|
107
|
+
execSync(`osascript -e 'tell application "System Events" to keystroke "${mapped}"'`, { timeout: 3000 })
|
|
108
|
+
}
|
|
109
|
+
} else if (platform === 'win32') {
|
|
110
|
+
const keyMap = { enter: '{ENTER}', tab: '{TAB}', escape: '{ESC}', backspace: '{BS}', space: ' ' }
|
|
111
|
+
const mapped = keyMap[key.toLowerCase()] || `{${key.toUpperCase()}}`
|
|
112
|
+
execSync(`powershell -command "[System.Windows.Forms.SendKeys]::SendWait('${mapped}')"`, { timeout: 3000 })
|
|
113
|
+
} else {
|
|
114
|
+
execSync(`xdotool key ${key.replace('cmd', 'super').replace('ctrl', 'ctrl')}`, { timeout: 3000 })
|
|
115
|
+
}
|
|
116
|
+
return { success: true, key }
|
|
117
|
+
} catch (e) {
|
|
118
|
+
return { success: false, error: e.message }
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// ─── Vision — Send screenshot to Qwen-VL for understanding ───────────────────
|
|
123
|
+
|
|
124
|
+
async function analyzeScreen(question = 'What is on the screen? Describe the UI elements, buttons, and text visible.') {
|
|
125
|
+
const screenshot = takeScreenshot()
|
|
126
|
+
if (!screenshot.success) return { success: false, error: screenshot.error }
|
|
127
|
+
|
|
128
|
+
const config = getConfig()
|
|
129
|
+
const imageData = fs.readFileSync(screenshot.path)
|
|
130
|
+
const b64 = imageData.toString('base64')
|
|
131
|
+
|
|
132
|
+
// Send to Qwen-VL vision model
|
|
133
|
+
const body = JSON.stringify({
|
|
134
|
+
model: 'qwen-vl',
|
|
135
|
+
messages: [
|
|
136
|
+
{ role: 'user', content: [
|
|
137
|
+
{ type: 'text', text: question },
|
|
138
|
+
{ type: 'image_url', image_url: { url: `data:image/png;base64,${b64}` } }
|
|
139
|
+
]}
|
|
140
|
+
],
|
|
141
|
+
max_tokens: 1024,
|
|
142
|
+
})
|
|
143
|
+
|
|
144
|
+
return new Promise((resolve) => {
|
|
145
|
+
const url = new URL(`${config.gpu_url}/v1/chat/completions`)
|
|
146
|
+
const req = http.request(url, {
|
|
147
|
+
method: 'POST',
|
|
148
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${config.api_key}` },
|
|
149
|
+
timeout: 30000,
|
|
150
|
+
}, (res) => {
|
|
151
|
+
let data = ''
|
|
152
|
+
res.on('data', c => data += c)
|
|
153
|
+
res.on('end', () => {
|
|
154
|
+
try {
|
|
155
|
+
const result = JSON.parse(data)
|
|
156
|
+
const description = result.choices?.[0]?.message?.content || ''
|
|
157
|
+
resolve({ success: true, description, screenshot: screenshot.path })
|
|
158
|
+
} catch { resolve({ success: false, error: 'Bad response from vision model' }) }
|
|
159
|
+
})
|
|
160
|
+
})
|
|
161
|
+
req.on('error', (e) => resolve({ success: false, error: e.message }))
|
|
162
|
+
req.write(body)
|
|
163
|
+
req.end()
|
|
164
|
+
})
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// ─── Live Screen Monitor ──────────────────────────────────────────────────────
|
|
168
|
+
|
|
169
|
+
class ScreenMonitor {
|
|
170
|
+
constructor(intervalMs = 3000) {
|
|
171
|
+
this.interval = intervalMs
|
|
172
|
+
this.running = false
|
|
173
|
+
this.timer = null
|
|
174
|
+
this.lastDescription = ''
|
|
175
|
+
this.onUpdate = null
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
start(callback) {
|
|
179
|
+
this.running = true
|
|
180
|
+
this.onUpdate = callback
|
|
181
|
+
console.log(chalk.cyan(` 👁 Screen monitor started (every ${this.interval / 1000}s)`))
|
|
182
|
+
this.tick()
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
stop() {
|
|
186
|
+
this.running = false
|
|
187
|
+
if (this.timer) clearTimeout(this.timer)
|
|
188
|
+
console.log(chalk.gray(' Screen monitor stopped'))
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
async tick() {
|
|
192
|
+
if (!this.running) return
|
|
193
|
+
const result = await analyzeScreen('Briefly describe what is currently visible on screen. Note any dialogs, windows, error messages, or UI changes.')
|
|
194
|
+
if (result.success && result.description !== this.lastDescription) {
|
|
195
|
+
this.lastDescription = result.description
|
|
196
|
+
if (this.onUpdate) this.onUpdate(result.description, result.screenshot)
|
|
197
|
+
}
|
|
198
|
+
this.timer = setTimeout(() => this.tick(), this.interval)
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// ─── Computer Use Agent — AI sees screen and controls computer ────────────────
|
|
203
|
+
|
|
204
|
+
async function computerUseStep(task, screenDescription) {
|
|
205
|
+
const config = getConfig()
|
|
206
|
+
const body = JSON.stringify({
|
|
207
|
+
model: 'qwen-coder',
|
|
208
|
+
messages: [
|
|
209
|
+
{ role: 'system', content: `You are Otoro controlling a ${process.platform} computer. You can see the screen and control mouse/keyboard.
|
|
210
|
+
|
|
211
|
+
Available actions (use XML tags):
|
|
212
|
+
- <action:click x="123" y="456"/> — click at coordinates
|
|
213
|
+
- <action:rightclick x="123" y="456"/> — right-click
|
|
214
|
+
- <action:type text="hello world"/> — type text
|
|
215
|
+
- <action:key press="enter"/> — press a key (enter, tab, escape, ctrl+c, cmd+s, etc.)
|
|
216
|
+
- <action:move x="123" y="456"/> — move mouse
|
|
217
|
+
- <action:screenshot/> — take a new screenshot
|
|
218
|
+
- <action:wait ms="1000"/> — wait before next action
|
|
219
|
+
- <action:done/> — task is complete
|
|
220
|
+
|
|
221
|
+
Current screen: ${screenDescription}
|
|
222
|
+
|
|
223
|
+
Execute the task step by step. After each action, I'll show you the updated screen.` },
|
|
224
|
+
{ role: 'user', content: task }
|
|
225
|
+
],
|
|
226
|
+
max_tokens: 2048,
|
|
227
|
+
})
|
|
228
|
+
|
|
229
|
+
return new Promise((resolve) => {
|
|
230
|
+
const url = new URL(`${config.gpu_url}/v1/chat/completions`)
|
|
231
|
+
const req = http.request(url, {
|
|
232
|
+
method: 'POST',
|
|
233
|
+
headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${config.api_key}` },
|
|
234
|
+
timeout: 60000,
|
|
235
|
+
}, (res) => {
|
|
236
|
+
let data = ''
|
|
237
|
+
res.on('data', c => data += c)
|
|
238
|
+
res.on('end', () => {
|
|
239
|
+
try {
|
|
240
|
+
const result = JSON.parse(data)
|
|
241
|
+
resolve(result.choices?.[0]?.message?.content || '')
|
|
242
|
+
} catch { resolve('') }
|
|
243
|
+
})
|
|
244
|
+
})
|
|
245
|
+
req.on('error', () => resolve(''))
|
|
246
|
+
req.write(body)
|
|
247
|
+
req.end()
|
|
248
|
+
})
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async function executeComputerActions(response) {
|
|
252
|
+
const results = []
|
|
253
|
+
|
|
254
|
+
for (const match of response.matchAll(/<action:click\s+x="(\d+)"\s+y="(\d+)"\s*\/>/g)) {
|
|
255
|
+
results.push(mouseClick(parseInt(match[1]), parseInt(match[2])))
|
|
256
|
+
}
|
|
257
|
+
for (const match of response.matchAll(/<action:rightclick\s+x="(\d+)"\s+y="(\d+)"\s*\/>/g)) {
|
|
258
|
+
results.push(mouseClick(parseInt(match[1]), parseInt(match[2]), 'right'))
|
|
259
|
+
}
|
|
260
|
+
for (const match of response.matchAll(/<action:type\s+text="([^"]+)"\s*\/>/g)) {
|
|
261
|
+
results.push(typeText(match[1]))
|
|
262
|
+
}
|
|
263
|
+
for (const match of response.matchAll(/<action:key\s+press="([^"]+)"\s*\/>/g)) {
|
|
264
|
+
results.push(pressKey(match[1]))
|
|
265
|
+
}
|
|
266
|
+
for (const match of response.matchAll(/<action:move\s+x="(\d+)"\s+y="(\d+)"\s*\/>/g)) {
|
|
267
|
+
results.push(mouseMove(parseInt(match[1]), parseInt(match[2])))
|
|
268
|
+
}
|
|
269
|
+
for (const match of response.matchAll(/<action:wait\s+ms="(\d+)"\s*\/>/g)) {
|
|
270
|
+
await new Promise(r => setTimeout(r, parseInt(match[1])))
|
|
271
|
+
results.push({ success: true, waited: match[1] + 'ms' })
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const isDone = response.includes('<action:done/>')
|
|
275
|
+
return { results, isDone }
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
module.exports = {
|
|
279
|
+
takeScreenshot, mouseClick, mouseMove, typeText, pressKey,
|
|
280
|
+
analyzeScreen, ScreenMonitor, computerUseStep, executeComputerActions
|
|
281
|
+
}
|
package/package.json
CHANGED