@mediar-ai/terminator 0.23.34 → 0.23.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.d.ts +60 -0
- package/package.json +6 -6
- package/src/desktop.rs +25 -1
- package/src/element.rs +8 -0
- package/src/types.rs +96 -0
package/index.d.ts
CHANGED
|
@@ -101,6 +101,47 @@ export interface FontStyle {
|
|
|
101
101
|
bold: boolean
|
|
102
102
|
color: number
|
|
103
103
|
}
|
|
104
|
+
/** A single step in the computer use execution */
|
|
105
|
+
export interface ComputerUseStep {
|
|
106
|
+
/** Step number (1-indexed) */
|
|
107
|
+
step: number
|
|
108
|
+
/** Action that was executed */
|
|
109
|
+
action: string
|
|
110
|
+
/** Arguments passed to the action (as JSON string) */
|
|
111
|
+
args: string
|
|
112
|
+
/** Whether the action succeeded */
|
|
113
|
+
success: boolean
|
|
114
|
+
/** Error message if action failed */
|
|
115
|
+
error?: string
|
|
116
|
+
/** Model's reasoning text for this step */
|
|
117
|
+
text?: string
|
|
118
|
+
}
|
|
119
|
+
/** Pending confirmation info when safety check triggers */
|
|
120
|
+
export interface ComputerUsePendingConfirmation {
|
|
121
|
+
/** Action that needs confirmation */
|
|
122
|
+
action: string
|
|
123
|
+
/** Arguments for the action (as JSON string) */
|
|
124
|
+
args: string
|
|
125
|
+
/** Model's explanation text */
|
|
126
|
+
text?: string
|
|
127
|
+
}
|
|
128
|
+
/** Result of the computer use execution */
|
|
129
|
+
export interface ComputerUseResult {
|
|
130
|
+
/** Status: "success", "failed", "needs_confirmation", "max_steps_reached" */
|
|
131
|
+
status: string
|
|
132
|
+
/** The goal that was attempted */
|
|
133
|
+
goal: string
|
|
134
|
+
/** Number of steps executed */
|
|
135
|
+
stepsExecuted: number
|
|
136
|
+
/** Last action performed */
|
|
137
|
+
finalAction: string
|
|
138
|
+
/** Final text response from model */
|
|
139
|
+
finalText?: string
|
|
140
|
+
/** History of all steps */
|
|
141
|
+
steps: Array<ComputerUseStep>
|
|
142
|
+
/** Pending confirmation info if status is "needs_confirmation" */
|
|
143
|
+
pendingConfirmation?: ComputerUsePendingConfirmation
|
|
144
|
+
}
|
|
104
145
|
/** Main entry point for desktop automation. */
|
|
105
146
|
export declare class Desktop {
|
|
106
147
|
/**
|
|
@@ -330,6 +371,19 @@ export declare class Desktop {
|
|
|
330
371
|
* @param {number} percentage - The zoom percentage (e.g., 100 for 100%, 150 for 150%, 50 for 50%).
|
|
331
372
|
*/
|
|
332
373
|
setZoom(percentage: number): Promise<void>
|
|
374
|
+
/**
|
|
375
|
+
* (async) Run Gemini Computer Use agentic loop.
|
|
376
|
+
*
|
|
377
|
+
* Provide a goal and target process, and this will autonomously take actions
|
|
378
|
+
* (click, type, scroll, etc.) until the goal is achieved or max_steps is reached.
|
|
379
|
+
* Uses Gemini's vision model to analyze screenshots and decide actions.
|
|
380
|
+
*
|
|
381
|
+
* @param {string} process - Process name of the target application (e.g., "chrome", "notepad")
|
|
382
|
+
* @param {string} goal - What to achieve (e.g., "Open Notepad and type Hello World")
|
|
383
|
+
* @param {number} [maxSteps=20] - Maximum number of steps before stopping
|
|
384
|
+
* @returns {Promise<ComputerUseResult>} Result with status, steps executed, and history
|
|
385
|
+
*/
|
|
386
|
+
geminiComputerUse(process: string, goal: string, maxSteps?: number | undefined | null): Promise<ComputerUseResult>
|
|
333
387
|
}
|
|
334
388
|
/** A UI element in the accessibility tree. */
|
|
335
389
|
export declare class Element {
|
|
@@ -535,6 +589,12 @@ export declare class Element {
|
|
|
535
589
|
* @returns {number} The process ID.
|
|
536
590
|
*/
|
|
537
591
|
processId(): number
|
|
592
|
+
/**
|
|
593
|
+
* Get the process name of the application containing this element.
|
|
594
|
+
*
|
|
595
|
+
* @returns {string} The process name (e.g., "chrome", "notepad").
|
|
596
|
+
*/
|
|
597
|
+
processName(): string
|
|
538
598
|
toString(): string
|
|
539
599
|
/**
|
|
540
600
|
* Sets the transparency of the window.
|
package/package.json
CHANGED
|
@@ -41,11 +41,11 @@
|
|
|
41
41
|
}
|
|
42
42
|
},
|
|
43
43
|
"optionalDependencies": {
|
|
44
|
-
"@mediar-ai/terminator-darwin-arm64": "0.23.
|
|
45
|
-
"@mediar-ai/terminator-darwin-x64": "0.23.
|
|
46
|
-
"@mediar-ai/terminator-linux-x64-gnu": "0.23.
|
|
47
|
-
"@mediar-ai/terminator-win32-arm64-msvc": "0.23.
|
|
48
|
-
"@mediar-ai/terminator-win32-x64-msvc": "0.23.
|
|
44
|
+
"@mediar-ai/terminator-darwin-arm64": "0.23.36",
|
|
45
|
+
"@mediar-ai/terminator-darwin-x64": "0.23.36",
|
|
46
|
+
"@mediar-ai/terminator-linux-x64-gnu": "0.23.36",
|
|
47
|
+
"@mediar-ai/terminator-win32-arm64-msvc": "0.23.36",
|
|
48
|
+
"@mediar-ai/terminator-win32-x64-msvc": "0.23.36"
|
|
49
49
|
},
|
|
50
50
|
"repository": {
|
|
51
51
|
"type": "git",
|
|
@@ -63,5 +63,5 @@
|
|
|
63
63
|
"test-hook": "powershell.exe -ExecutionPolicy Bypass -File \"../../.git/hooks/pre-push.ps1\""
|
|
64
64
|
},
|
|
65
65
|
"types": "wrapper.d.ts",
|
|
66
|
-
"version": "0.23.
|
|
66
|
+
"version": "0.23.36"
|
|
67
67
|
}
|
package/src/desktop.rs
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use crate::types::{Monitor, MonitorScreenshotPair};
|
|
1
|
+
use crate::types::{ComputerUseResult, Monitor, MonitorScreenshotPair};
|
|
2
2
|
use crate::Selector;
|
|
3
3
|
use crate::{
|
|
4
4
|
map_error, CommandOutput, Element, Locator, ScreenshotResult, TreeBuildConfig, UINode,
|
|
@@ -524,4 +524,28 @@ impl Desktop {
|
|
|
524
524
|
pub async fn set_zoom(&self, percentage: u32) -> napi::Result<()> {
|
|
525
525
|
self.inner.set_zoom(percentage).await.map_err(map_error)
|
|
526
526
|
}
|
|
527
|
+
|
|
528
|
+
/// (async) Run Gemini Computer Use agentic loop.
|
|
529
|
+
///
|
|
530
|
+
/// Provide a goal and target process, and this will autonomously take actions
|
|
531
|
+
/// (click, type, scroll, etc.) until the goal is achieved or max_steps is reached.
|
|
532
|
+
/// Uses Gemini's vision model to analyze screenshots and decide actions.
|
|
533
|
+
///
|
|
534
|
+
/// @param {string} process - Process name of the target application (e.g., "chrome", "notepad")
|
|
535
|
+
/// @param {string} goal - What to achieve (e.g., "Open Notepad and type Hello World")
|
|
536
|
+
/// @param {number} [maxSteps=20] - Maximum number of steps before stopping
|
|
537
|
+
/// @returns {Promise<ComputerUseResult>} Result with status, steps executed, and history
|
|
538
|
+
#[napi]
|
|
539
|
+
pub async fn gemini_computer_use(
|
|
540
|
+
&self,
|
|
541
|
+
process: String,
|
|
542
|
+
goal: String,
|
|
543
|
+
max_steps: Option<u32>,
|
|
544
|
+
) -> napi::Result<ComputerUseResult> {
|
|
545
|
+
self.inner
|
|
546
|
+
.gemini_computer_use(&process, &goal, max_steps, None)
|
|
547
|
+
.await
|
|
548
|
+
.map(ComputerUseResult::from)
|
|
549
|
+
.map_err(|e| napi::Error::from_reason(e.to_string()))
|
|
550
|
+
}
|
|
527
551
|
}
|
package/src/element.rs
CHANGED
|
@@ -427,6 +427,14 @@ impl Element {
|
|
|
427
427
|
self.inner.process_id().map_err(map_error)
|
|
428
428
|
}
|
|
429
429
|
|
|
430
|
+
/// Get the process name of the application containing this element.
|
|
431
|
+
///
|
|
432
|
+
/// @returns {string} The process name (e.g., "chrome", "notepad").
|
|
433
|
+
#[napi]
|
|
434
|
+
pub fn process_name(&self) -> napi::Result<String> {
|
|
435
|
+
self.inner.process_name().map_err(map_error)
|
|
436
|
+
}
|
|
437
|
+
|
|
430
438
|
#[napi]
|
|
431
439
|
pub fn to_string(&self) -> napi::Result<String> {
|
|
432
440
|
let id_part = self.inner.id().map_or("null".to_string(), |id| id);
|
package/src/types.rs
CHANGED
|
@@ -307,3 +307,99 @@ pub(crate) fn serializable_to_ui_node(elem: &terminator::SerializableUIElement)
|
|
|
307
307
|
children,
|
|
308
308
|
}
|
|
309
309
|
}
|
|
310
|
+
|
|
311
|
+
// ===== Computer Use Types =====
|
|
312
|
+
|
|
313
|
+
/// A single step in the computer use execution
|
|
314
|
+
#[napi(object)]
|
|
315
|
+
pub struct ComputerUseStep {
|
|
316
|
+
/// Step number (1-indexed)
|
|
317
|
+
pub step: u32,
|
|
318
|
+
/// Action that was executed
|
|
319
|
+
pub action: String,
|
|
320
|
+
/// Arguments passed to the action (as JSON string)
|
|
321
|
+
pub args: String,
|
|
322
|
+
/// Whether the action succeeded
|
|
323
|
+
pub success: bool,
|
|
324
|
+
/// Error message if action failed
|
|
325
|
+
pub error: Option<String>,
|
|
326
|
+
/// Model's reasoning text for this step
|
|
327
|
+
pub text: Option<String>,
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/// Pending confirmation info when safety check triggers
|
|
331
|
+
#[napi(object)]
|
|
332
|
+
pub struct ComputerUsePendingConfirmation {
|
|
333
|
+
/// Action that needs confirmation
|
|
334
|
+
pub action: String,
|
|
335
|
+
/// Arguments for the action (as JSON string)
|
|
336
|
+
pub args: String,
|
|
337
|
+
/// Model's explanation text
|
|
338
|
+
pub text: Option<String>,
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
/// Result of the computer use execution
|
|
342
|
+
#[napi(object)]
|
|
343
|
+
pub struct ComputerUseResult {
|
|
344
|
+
/// Status: "success", "failed", "needs_confirmation", "max_steps_reached"
|
|
345
|
+
pub status: String,
|
|
346
|
+
/// The goal that was attempted
|
|
347
|
+
pub goal: String,
|
|
348
|
+
/// Number of steps executed
|
|
349
|
+
pub steps_executed: u32,
|
|
350
|
+
/// Last action performed
|
|
351
|
+
pub final_action: String,
|
|
352
|
+
/// Final text response from model
|
|
353
|
+
pub final_text: Option<String>,
|
|
354
|
+
/// History of all steps
|
|
355
|
+
pub steps: Vec<ComputerUseStep>,
|
|
356
|
+
/// Pending confirmation info if status is "needs_confirmation"
|
|
357
|
+
pub pending_confirmation: Option<ComputerUsePendingConfirmation>,
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
impl From<terminator::ComputerUseStep> for ComputerUseStep {
|
|
361
|
+
fn from(step: terminator::ComputerUseStep) -> Self {
|
|
362
|
+
ComputerUseStep {
|
|
363
|
+
step: step.step,
|
|
364
|
+
action: step.action,
|
|
365
|
+
args: step.args.to_string(),
|
|
366
|
+
success: step.success,
|
|
367
|
+
error: step.error,
|
|
368
|
+
text: step.text,
|
|
369
|
+
}
|
|
370
|
+
}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
impl From<terminator::ComputerUseResult> for ComputerUseResult {
|
|
374
|
+
fn from(result: terminator::ComputerUseResult) -> Self {
|
|
375
|
+
let pending_confirmation =
|
|
376
|
+
result
|
|
377
|
+
.pending_confirmation
|
|
378
|
+
.map(|pc| ComputerUsePendingConfirmation {
|
|
379
|
+
action: pc
|
|
380
|
+
.get("action")
|
|
381
|
+
.and_then(|v| v.as_str())
|
|
382
|
+
.unwrap_or("")
|
|
383
|
+
.to_string(),
|
|
384
|
+
args: pc.get("args").map(|v| v.to_string()).unwrap_or_default(),
|
|
385
|
+
text: pc
|
|
386
|
+
.get("text")
|
|
387
|
+
.and_then(|v| v.as_str())
|
|
388
|
+
.map(|s| s.to_string()),
|
|
389
|
+
});
|
|
390
|
+
|
|
391
|
+
ComputerUseResult {
|
|
392
|
+
status: result.status,
|
|
393
|
+
goal: result.goal,
|
|
394
|
+
steps_executed: result.steps_executed,
|
|
395
|
+
final_action: result.final_action,
|
|
396
|
+
final_text: result.final_text,
|
|
397
|
+
steps: result
|
|
398
|
+
.steps
|
|
399
|
+
.into_iter()
|
|
400
|
+
.map(ComputerUseStep::from)
|
|
401
|
+
.collect(),
|
|
402
|
+
pending_confirmation,
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
}
|