npm - @mediar-ai/terminator - Versions diffs - 0.23.34 → 0.23.36 - Mend

@mediar-ai/terminator 0.23.34 → 0.23.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/index.d.ts CHANGED Viewed

@@ -101,6 +101,47 @@ export interface FontStyle {
   bold: boolean
   color: number
 }
+/** A single step in the computer use execution */
+export interface ComputerUseStep {
+  /** Step number (1-indexed) */
+  step: number
+  /** Action that was executed */
+  action: string
+  /** Arguments passed to the action (as JSON string) */
+  args: string
+  /** Whether the action succeeded */
+  success: boolean
+  /** Error message if action failed */
+  error?: string
+  /** Model's reasoning text for this step */
+  text?: string
+}
+/** Pending confirmation info when safety check triggers */
+export interface ComputerUsePendingConfirmation {
+  /** Action that needs confirmation */
+  action: string
+  /** Arguments for the action (as JSON string) */
+  args: string
+  /** Model's explanation text */
+  text?: string
+}
+/** Result of the computer use execution */
+export interface ComputerUseResult {
+  /** Status: "success", "failed", "needs_confirmation", "max_steps_reached" */
+  status: string
+  /** The goal that was attempted */
+  goal: string
+  /** Number of steps executed */
+  stepsExecuted: number
+  /** Last action performed */
+  finalAction: string
+  /** Final text response from model */
+  finalText?: string
+  /** History of all steps */
+  steps: Array<ComputerUseStep>
+  /** Pending confirmation info if status is "needs_confirmation" */
+  pendingConfirmation?: ComputerUsePendingConfirmation
+}
 /** Main entry point for desktop automation. */
 export declare class Desktop {
   /**
@@ -330,6 +371,19 @@ export declare class Desktop {
    * @param {number} percentage - The zoom percentage (e.g., 100 for 100%, 150 for 150%, 50 for 50%).
    */
   setZoom(percentage: number): Promise<void>
+  /**
+   * (async) Run Gemini Computer Use agentic loop.
+   *
+   * Provide a goal and target process, and this will autonomously take actions
+   * (click, type, scroll, etc.) until the goal is achieved or max_steps is reached.
+   * Uses Gemini's vision model to analyze screenshots and decide actions.
+   *
+   * @param {string} process - Process name of the target application (e.g., "chrome", "notepad")
+   * @param {string} goal - What to achieve (e.g., "Open Notepad and type Hello World")
+   * @param {number} [maxSteps=20] - Maximum number of steps before stopping
+   * @returns {Promise<ComputerUseResult>} Result with status, steps executed, and history
+   */
+  geminiComputerUse(process: string, goal: string, maxSteps?: number | undefined | null): Promise<ComputerUseResult>
 }
 /** A UI element in the accessibility tree. */
 export declare class Element {
@@ -535,6 +589,12 @@ export declare class Element {
    * @returns {number} The process ID.
    */
   processId(): number
+  /**
+   * Get the process name of the application containing this element.
+   *
+   * @returns {string} The process name (e.g., "chrome", "notepad").
+   */
+  processName(): string
   toString(): string
   /**
    * Sets the transparency of the window.

package/package.json CHANGED Viewed

@@ -41,11 +41,11 @@
     }
   },
   "optionalDependencies": {
-    "@mediar-ai/terminator-darwin-arm64": "0.23.34",
-    "@mediar-ai/terminator-darwin-x64": "0.23.34",
-    "@mediar-ai/terminator-linux-x64-gnu": "0.23.34",
-    "@mediar-ai/terminator-win32-arm64-msvc": "0.23.34",
-    "@mediar-ai/terminator-win32-x64-msvc": "0.23.34"
+    "@mediar-ai/terminator-darwin-arm64": "0.23.36",
+    "@mediar-ai/terminator-darwin-x64": "0.23.36",
+    "@mediar-ai/terminator-linux-x64-gnu": "0.23.36",
+    "@mediar-ai/terminator-win32-arm64-msvc": "0.23.36",
+    "@mediar-ai/terminator-win32-x64-msvc": "0.23.36"
   },
   "repository": {
     "type": "git",
@@ -63,5 +63,5 @@
     "test-hook": "powershell.exe -ExecutionPolicy Bypass -File \"../../.git/hooks/pre-push.ps1\""
   },
   "types": "wrapper.d.ts",
-  "version": "0.23.34"
+  "version": "0.23.36"
 }

package/src/desktop.rs CHANGED Viewed

@@ -1,4 +1,4 @@
-use crate::types::{Monitor, MonitorScreenshotPair};
+use crate::types::{ComputerUseResult, Monitor, MonitorScreenshotPair};
 use crate::Selector;
 use crate::{
     map_error, CommandOutput, Element, Locator, ScreenshotResult, TreeBuildConfig, UINode,
@@ -524,4 +524,28 @@ impl Desktop {
     pub async fn set_zoom(&self, percentage: u32) -> napi::Result<()> {
         self.inner.set_zoom(percentage).await.map_err(map_error)
     }
+    /// (async) Run Gemini Computer Use agentic loop.
+    ///
+    /// Provide a goal and target process, and this will autonomously take actions
+    /// (click, type, scroll, etc.) until the goal is achieved or max_steps is reached.
+    /// Uses Gemini's vision model to analyze screenshots and decide actions.
+    ///
+    /// @param {string} process - Process name of the target application (e.g., "chrome", "notepad")
+    /// @param {string} goal - What to achieve (e.g., "Open Notepad and type Hello World")
+    /// @param {number} [maxSteps=20] - Maximum number of steps before stopping
+    /// @returns {Promise<ComputerUseResult>} Result with status, steps executed, and history
+    #[napi]
+    pub async fn gemini_computer_use(
+        &self,
+        process: String,
+        goal: String,
+        max_steps: Option<u32>,
+    ) -> napi::Result<ComputerUseResult> {
+        self.inner
+            .gemini_computer_use(&process, &goal, max_steps, None)
+            .await
+            .map(ComputerUseResult::from)
+            .map_err(|e| napi::Error::from_reason(e.to_string()))
+    }
 }

package/src/element.rs CHANGED Viewed

@@ -427,6 +427,14 @@ impl Element {
         self.inner.process_id().map_err(map_error)
     }
+    /// Get the process name of the application containing this element.
+    ///
+    /// @returns {string} The process name (e.g., "chrome", "notepad").
+    #[napi]
+    pub fn process_name(&self) -> napi::Result<String> {
+        self.inner.process_name().map_err(map_error)
+    }
     #[napi]
     pub fn to_string(&self) -> napi::Result<String> {
         let id_part = self.inner.id().map_or("null".to_string(), |id| id);

package/src/types.rs CHANGED Viewed

@@ -307,3 +307,99 @@ pub(crate) fn serializable_to_ui_node(elem: &terminator::SerializableUIElement)
         children,
     }
 }
+// ===== Computer Use Types =====
+/// A single step in the computer use execution
+#[napi(object)]
+pub struct ComputerUseStep {
+    /// Step number (1-indexed)
+    pub step: u32,
+    /// Action that was executed
+    pub action: String,
+    /// Arguments passed to the action (as JSON string)
+    pub args: String,
+    /// Whether the action succeeded
+    pub success: bool,
+    /// Error message if action failed
+    pub error: Option<String>,
+    /// Model's reasoning text for this step
+    pub text: Option<String>,
+}
+/// Pending confirmation info when safety check triggers
+#[napi(object)]
+pub struct ComputerUsePendingConfirmation {
+    /// Action that needs confirmation
+    pub action: String,
+    /// Arguments for the action (as JSON string)
+    pub args: String,
+    /// Model's explanation text
+    pub text: Option<String>,
+}
+/// Result of the computer use execution
+#[napi(object)]
+pub struct ComputerUseResult {
+    /// Status: "success", "failed", "needs_confirmation", "max_steps_reached"
+    pub status: String,
+    /// The goal that was attempted
+    pub goal: String,
+    /// Number of steps executed
+    pub steps_executed: u32,
+    /// Last action performed
+    pub final_action: String,
+    /// Final text response from model
+    pub final_text: Option<String>,
+    /// History of all steps
+    pub steps: Vec<ComputerUseStep>,
+    /// Pending confirmation info if status is "needs_confirmation"
+    pub pending_confirmation: Option<ComputerUsePendingConfirmation>,
+}
+impl From<terminator::ComputerUseStep> for ComputerUseStep {
+    fn from(step: terminator::ComputerUseStep) -> Self {
+        ComputerUseStep {
+            step: step.step,
+            action: step.action,
+            args: step.args.to_string(),
+            success: step.success,
+            error: step.error,
+            text: step.text,
+        }
+    }
+}
+impl From<terminator::ComputerUseResult> for ComputerUseResult {
+    fn from(result: terminator::ComputerUseResult) -> Self {
+        let pending_confirmation =
+            result
+                .pending_confirmation
+                .map(|pc| ComputerUsePendingConfirmation {
+                    action: pc
+                        .get("action")
+                        .and_then(|v| v.as_str())
+                        .unwrap_or("")
+                        .to_string(),
+                    args: pc.get("args").map(|v| v.to_string()).unwrap_or_default(),
+                    text: pc
+                        .get("text")
+                        .and_then(|v| v.as_str())
+                        .map(|s| s.to_string()),
+                });
+        ComputerUseResult {
+            status: result.status,
+            goal: result.goal,
+            steps_executed: result.steps_executed,
+            final_action: result.final_action,
+            final_text: result.final_text,
+            steps: result
+                .steps
+                .into_iter()
+                .map(ComputerUseStep::from)
+                .collect(),
+            pending_confirmation,
+        }
+    }
+}