@mediar-ai/terminator 0.23.34 → 0.23.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.d.ts CHANGED
@@ -101,6 +101,47 @@ export interface FontStyle {
101
101
  bold: boolean
102
102
  color: number
103
103
  }
104
+ /** A single step in the computer use execution */
105
+ export interface ComputerUseStep {
106
+ /** Step number (1-indexed) */
107
+ step: number
108
+ /** Action that was executed */
109
+ action: string
110
+ /** Arguments passed to the action (as JSON string) */
111
+ args: string
112
+ /** Whether the action succeeded */
113
+ success: boolean
114
+ /** Error message if action failed */
115
+ error?: string
116
+ /** Model's reasoning text for this step */
117
+ text?: string
118
+ }
119
+ /** Pending confirmation info when safety check triggers */
120
+ export interface ComputerUsePendingConfirmation {
121
+ /** Action that needs confirmation */
122
+ action: string
123
+ /** Arguments for the action (as JSON string) */
124
+ args: string
125
+ /** Model's explanation text */
126
+ text?: string
127
+ }
128
+ /** Result of the computer use execution */
129
+ export interface ComputerUseResult {
130
+ /** Status: "success", "failed", "needs_confirmation", "max_steps_reached" */
131
+ status: string
132
+ /** The goal that was attempted */
133
+ goal: string
134
+ /** Number of steps executed */
135
+ stepsExecuted: number
136
+ /** Last action performed */
137
+ finalAction: string
138
+ /** Final text response from model */
139
+ finalText?: string
140
+ /** History of all steps */
141
+ steps: Array<ComputerUseStep>
142
+ /** Pending confirmation info if status is "needs_confirmation" */
143
+ pendingConfirmation?: ComputerUsePendingConfirmation
144
+ }
104
145
  /** Main entry point for desktop automation. */
105
146
  export declare class Desktop {
106
147
  /**
@@ -330,6 +371,19 @@ export declare class Desktop {
330
371
  * @param {number} percentage - The zoom percentage (e.g., 100 for 100%, 150 for 150%, 50 for 50%).
331
372
  */
332
373
  setZoom(percentage: number): Promise<void>
374
+ /**
375
+ * (async) Run Gemini Computer Use agentic loop.
376
+ *
377
+ * Provide a goal and target process, and this will autonomously take actions
378
+ * (click, type, scroll, etc.) until the goal is achieved or max_steps is reached.
379
+ * Uses Gemini's vision model to analyze screenshots and decide actions.
380
+ *
381
+ * @param {string} process - Process name of the target application (e.g., "chrome", "notepad")
382
+ * @param {string} goal - What to achieve (e.g., "Open Notepad and type Hello World")
383
+ * @param {number} [maxSteps=20] - Maximum number of steps before stopping
384
+ * @returns {Promise<ComputerUseResult>} Result with status, steps executed, and history
385
+ */
386
+ geminiComputerUse(process: string, goal: string, maxSteps?: number | undefined | null): Promise<ComputerUseResult>
333
387
  }
334
388
  /** A UI element in the accessibility tree. */
335
389
  export declare class Element {
@@ -535,6 +589,12 @@ export declare class Element {
535
589
  * @returns {number} The process ID.
536
590
  */
537
591
  processId(): number
592
+ /**
593
+ * Get the process name of the application containing this element.
594
+ *
595
+ * @returns {string} The process name (e.g., "chrome", "notepad").
596
+ */
597
+ processName(): string
538
598
  toString(): string
539
599
  /**
540
600
  * Sets the transparency of the window.
package/package.json CHANGED
@@ -41,11 +41,11 @@
41
41
  }
42
42
  },
43
43
  "optionalDependencies": {
44
- "@mediar-ai/terminator-darwin-arm64": "0.23.34",
45
- "@mediar-ai/terminator-darwin-x64": "0.23.34",
46
- "@mediar-ai/terminator-linux-x64-gnu": "0.23.34",
47
- "@mediar-ai/terminator-win32-arm64-msvc": "0.23.34",
48
- "@mediar-ai/terminator-win32-x64-msvc": "0.23.34"
44
+ "@mediar-ai/terminator-darwin-arm64": "0.23.36",
45
+ "@mediar-ai/terminator-darwin-x64": "0.23.36",
46
+ "@mediar-ai/terminator-linux-x64-gnu": "0.23.36",
47
+ "@mediar-ai/terminator-win32-arm64-msvc": "0.23.36",
48
+ "@mediar-ai/terminator-win32-x64-msvc": "0.23.36"
49
49
  },
50
50
  "repository": {
51
51
  "type": "git",
@@ -63,5 +63,5 @@
63
63
  "test-hook": "powershell.exe -ExecutionPolicy Bypass -File \"../../.git/hooks/pre-push.ps1\""
64
64
  },
65
65
  "types": "wrapper.d.ts",
66
- "version": "0.23.34"
66
+ "version": "0.23.36"
67
67
  }
package/src/desktop.rs CHANGED
@@ -1,4 +1,4 @@
1
- use crate::types::{Monitor, MonitorScreenshotPair};
1
+ use crate::types::{ComputerUseResult, Monitor, MonitorScreenshotPair};
2
2
  use crate::Selector;
3
3
  use crate::{
4
4
  map_error, CommandOutput, Element, Locator, ScreenshotResult, TreeBuildConfig, UINode,
@@ -524,4 +524,28 @@ impl Desktop {
524
524
  pub async fn set_zoom(&self, percentage: u32) -> napi::Result<()> {
525
525
  self.inner.set_zoom(percentage).await.map_err(map_error)
526
526
  }
527
+
528
+ /// (async) Run Gemini Computer Use agentic loop.
529
+ ///
530
+ /// Provide a goal and target process, and this will autonomously take actions
531
+ /// (click, type, scroll, etc.) until the goal is achieved or max_steps is reached.
532
+ /// Uses Gemini's vision model to analyze screenshots and decide actions.
533
+ ///
534
+ /// @param {string} process - Process name of the target application (e.g., "chrome", "notepad")
535
+ /// @param {string} goal - What to achieve (e.g., "Open Notepad and type Hello World")
536
+ /// @param {number} [maxSteps=20] - Maximum number of steps before stopping
537
+ /// @returns {Promise<ComputerUseResult>} Result with status, steps executed, and history
538
+ #[napi]
539
+ pub async fn gemini_computer_use(
540
+ &self,
541
+ process: String,
542
+ goal: String,
543
+ max_steps: Option<u32>,
544
+ ) -> napi::Result<ComputerUseResult> {
545
+ self.inner
546
+ .gemini_computer_use(&process, &goal, max_steps, None)
547
+ .await
548
+ .map(ComputerUseResult::from)
549
+ .map_err(|e| napi::Error::from_reason(e.to_string()))
550
+ }
527
551
  }
package/src/element.rs CHANGED
@@ -427,6 +427,14 @@ impl Element {
427
427
  self.inner.process_id().map_err(map_error)
428
428
  }
429
429
 
430
+ /// Get the process name of the application containing this element.
431
+ ///
432
+ /// @returns {string} The process name (e.g., "chrome", "notepad").
433
+ #[napi]
434
+ pub fn process_name(&self) -> napi::Result<String> {
435
+ self.inner.process_name().map_err(map_error)
436
+ }
437
+
430
438
  #[napi]
431
439
  pub fn to_string(&self) -> napi::Result<String> {
432
440
  let id_part = self.inner.id().map_or("null".to_string(), |id| id);
package/src/types.rs CHANGED
@@ -307,3 +307,99 @@ pub(crate) fn serializable_to_ui_node(elem: &terminator::SerializableUIElement)
307
307
  children,
308
308
  }
309
309
  }
310
+
311
+ // ===== Computer Use Types =====
312
+
313
+ /// A single step in the computer use execution
314
+ #[napi(object)]
315
+ pub struct ComputerUseStep {
316
+ /// Step number (1-indexed)
317
+ pub step: u32,
318
+ /// Action that was executed
319
+ pub action: String,
320
+ /// Arguments passed to the action (as JSON string)
321
+ pub args: String,
322
+ /// Whether the action succeeded
323
+ pub success: bool,
324
+ /// Error message if action failed
325
+ pub error: Option<String>,
326
+ /// Model's reasoning text for this step
327
+ pub text: Option<String>,
328
+ }
329
+
330
+ /// Pending confirmation info when safety check triggers
331
+ #[napi(object)]
332
+ pub struct ComputerUsePendingConfirmation {
333
+ /// Action that needs confirmation
334
+ pub action: String,
335
+ /// Arguments for the action (as JSON string)
336
+ pub args: String,
337
+ /// Model's explanation text
338
+ pub text: Option<String>,
339
+ }
340
+
341
+ /// Result of the computer use execution
342
+ #[napi(object)]
343
+ pub struct ComputerUseResult {
344
+ /// Status: "success", "failed", "needs_confirmation", "max_steps_reached"
345
+ pub status: String,
346
+ /// The goal that was attempted
347
+ pub goal: String,
348
+ /// Number of steps executed
349
+ pub steps_executed: u32,
350
+ /// Last action performed
351
+ pub final_action: String,
352
+ /// Final text response from model
353
+ pub final_text: Option<String>,
354
+ /// History of all steps
355
+ pub steps: Vec<ComputerUseStep>,
356
+ /// Pending confirmation info if status is "needs_confirmation"
357
+ pub pending_confirmation: Option<ComputerUsePendingConfirmation>,
358
+ }
359
+
360
+ impl From<terminator::ComputerUseStep> for ComputerUseStep {
361
+ fn from(step: terminator::ComputerUseStep) -> Self {
362
+ ComputerUseStep {
363
+ step: step.step,
364
+ action: step.action,
365
+ args: step.args.to_string(),
366
+ success: step.success,
367
+ error: step.error,
368
+ text: step.text,
369
+ }
370
+ }
371
+ }
372
+
373
+ impl From<terminator::ComputerUseResult> for ComputerUseResult {
374
+ fn from(result: terminator::ComputerUseResult) -> Self {
375
+ let pending_confirmation =
376
+ result
377
+ .pending_confirmation
378
+ .map(|pc| ComputerUsePendingConfirmation {
379
+ action: pc
380
+ .get("action")
381
+ .and_then(|v| v.as_str())
382
+ .unwrap_or("")
383
+ .to_string(),
384
+ args: pc.get("args").map(|v| v.to_string()).unwrap_or_default(),
385
+ text: pc
386
+ .get("text")
387
+ .and_then(|v| v.as_str())
388
+ .map(|s| s.to_string()),
389
+ });
390
+
391
+ ComputerUseResult {
392
+ status: result.status,
393
+ goal: result.goal,
394
+ steps_executed: result.steps_executed,
395
+ final_action: result.final_action,
396
+ final_text: result.final_text,
397
+ steps: result
398
+ .steps
399
+ .into_iter()
400
+ .map(ComputerUseStep::from)
401
+ .collect(),
402
+ pending_confirmation,
403
+ }
404
+ }
405
+ }