npm - windows-use - Versions diffs - 0.1.0 → 0.2.0 - Mend

windows-use 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -7,7 +7,7 @@ declare const ConfigSchema: z.ZodObject<{
     baseURL: z.ZodString;
     model: z.ZodString;
     maxSteps: z.ZodDefault<z.ZodNumber>;
-    contextWindowSize: z.ZodDefault<z.ZodNumber>;
+    maxRounds: z.ZodDefault<z.ZodNumber>;
     cdpUrl: z.ZodDefault<z.ZodString>;
     timeoutMs: z.ZodDefault<z.ZodNumber>;
 }, "strip", z.ZodTypeAny, {
@@ -15,7 +15,7 @@ declare const ConfigSchema: z.ZodObject<{
     baseURL: string;
     model: string;
     maxSteps: number;
-    contextWindowSize: number;
+    maxRounds: number;
     cdpUrl: string;
     timeoutMs: number;
 }, {
@@ -23,25 +23,31 @@ declare const ConfigSchema: z.ZodObject<{
     baseURL: string;
     model: string;
     maxSteps?: number | undefined;
-    contextWindowSize?: number | undefined;
+    maxRounds?: number | undefined;
     cdpUrl?: string | undefined;
     timeoutMs?: number | undefined;
 }>;
 type Config = z.infer<typeof ConfigSchema>;
+/**
+ * Load config with priority: overrides > env vars > config file > defaults
+ */
 declare function loadConfig(overrides?: Partial<Config>): Config;
 /**
  * Manages a Playwright CDP connection to the user's Chrome.
- * Lazy-initialized: only connects when first browser tool is called.
+ * Auto-launches Chrome with --remote-debugging-port if not already running.
+ * Syncs user's Chrome profile to preserve cookies/login state.
  */
 declare class BrowserClient {
     private browser;
     private context;
     private _page;
     private cdpUrl;
+    private chromeProcess;
     constructor(cdpUrl: string);
     connect(): Promise<void>;
+    private launchChrome;
     getPage(): Promise<Page>;
     /** Create a new tab and switch to it. */
     newPage(): Promise<Page>;
@@ -49,11 +55,49 @@ declare class BrowserClient {
     get connected(): boolean;
 }
+interface StoredScreenshot {
+    id: string;
+    base64: string;
+    mimeType: 'image/png' | 'image/jpeg';
+    label: string;
+}
+/**
+ * Simple in-memory screenshot store.
+ * Screenshot tools save images here with auto-incrementing IDs.
+ * Report content references them via [Image:img_1] markers.
+ */
+declare class ScreenshotStore {
+    private counter;
+    private store;
+    save(base64: string, mimeType: 'image/png' | 'image/jpeg', label: string): string;
+    get(id: string): StoredScreenshot | undefined;
+    listIds(): string[];
+}
+/** A block in parsed report content */
+type ContentBlock = {
+    type: 'text';
+    text: string;
+} | {
+    type: 'image';
+    id: string;
+    base64: string;
+    mimeType: 'image/png' | 'image/jpeg';
+    label: string;
+};
+/**
+ * Parse report content string, expanding [Image:img_X] markers into image blocks.
+ * Returns an array of text and image content blocks.
+ */
+declare function parseReportContent(content: string, store: ScreenshotStore): ContentBlock[];
+/** Strip [Image:...] markers, returning text-only content */
+declare function stripImageMarkers(content: string): string;
 interface ToolContext {
     sessionId: string;
     cdpUrl: string;
     /** Lazy browser client getter — only connects on first call */
     getBrowser: () => Promise<BrowserClient>;
+    /** Screenshot store — tools save screenshots here, report references by [Image:id] */
+    screenshots: ScreenshotStore;
 }
 type ToolResult = {
     type: 'text';
@@ -62,11 +106,11 @@ type ToolResult = {
     type: 'image';
     base64: string;
     mimeType: 'image/png' | 'image/jpeg';
+    screenshotId: string;
 } | {
     type: 'report';
     status: 'completed' | 'blocked' | 'need_guidance';
-    summary: string;
-    screenshot?: string;
+    content: string;
     data?: unknown;
 };
 interface ToolDefinition {
@@ -86,17 +130,15 @@ declare class ToolRegistry {
 type Message = OpenAI.Chat.Completions.ChatCompletionMessageParam;
 /**
- * Sliding window message history.
- * Always keeps: system prompt (index 0) + most recent N messages.
+ * Simple message history — stores all messages without windowing.
+ * Small models are cheap, no need to truncate context.
  */
 declare class ContextManager {
     private messages;
-    private readonly maxMessages;
-    constructor(maxMessages: number);
     append(message: Message): void;
-    /** Returns the system prompt + the most recent messages within the window. */
-    getWindow(): Message[];
-    /** Total messages stored (before windowing). */
+    /** Returns all messages. */
+    getMessages(): Message[];
+    /** Total messages stored. */
     get length(): number;
 }
@@ -109,11 +151,31 @@ declare class LLMClient {
 interface RunResult {
     status: 'completed' | 'blocked' | 'need_guidance';
-    summary: string;
-    screenshot?: string;
+    /** Rich content with [Image:img_X] markers. Use parseReportContent() to expand. */
+    content: string;
     data?: unknown;
     stepsUsed: number;
 }
+type StepEvent = {
+    type: 'thinking';
+    step: number;
+    content: string;
+} | {
+    type: 'tool_call';
+    step: number;
+    name: string;
+    args: unknown;
+} | {
+    type: 'tool_result';
+    step: number;
+    name: string;
+    result: string;
+} | {
+    type: 'error';
+    step: number;
+    message: string;
+};
+type OnStepCallback = (event: StepEvent) => void;
 declare class AgentRunner {
     private llmClient;
     private contextManager;
@@ -121,7 +183,16 @@ declare class AgentRunner {
     private config;
     private toolContext;
     private initialized;
+    private onStep;
+    private roundsUsed;
     constructor(llmClient: LLMClient, contextManager: ContextManager, toolRegistry: ToolRegistry, config: Config, toolContext: ToolContext);
+    /** Register a callback to receive step-by-step progress events */
+    setOnStep(cb: OnStepCallback): void;
+    private emit;
+    /** How many instruction rounds have been used in this session */
+    get currentRound(): number;
+    /** Whether this session has exhausted its max rounds */
+    get roundsExhausted(): boolean;
     run(instruction: string): Promise<RunResult>;
 }
@@ -132,6 +203,7 @@ interface Session {
     config: Config;
     runner: AgentRunner;
     browserClient: BrowserClient;
+    screenshots: ScreenshotStore;
     timeoutHandle: ReturnType<typeof setTimeout>;
 }
 declare class SessionRegistry {
@@ -145,4 +217,4 @@ declare class SessionRegistry {
 declare function createToolRegistry(): ToolRegistry;
-export { AgentRunner, BrowserClient, type Config, ContextManager, LLMClient, type RunResult, type Session, SessionRegistry, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResult, createToolRegistry, loadConfig };
+export { AgentRunner, BrowserClient, type Config, type ContentBlock, ContextManager, LLMClient, type OnStepCallback, type RunResult, ScreenshotStore, type Session, SessionRegistry, type StepEvent, type StoredScreenshot, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResult, createToolRegistry, loadConfig, parseReportContent, stripImageMarkers };