windows-use 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -7,7 +7,7 @@ declare const ConfigSchema: z.ZodObject<{
7
7
  baseURL: z.ZodString;
8
8
  model: z.ZodString;
9
9
  maxSteps: z.ZodDefault<z.ZodNumber>;
10
- contextWindowSize: z.ZodDefault<z.ZodNumber>;
10
+ maxRounds: z.ZodDefault<z.ZodNumber>;
11
11
  cdpUrl: z.ZodDefault<z.ZodString>;
12
12
  timeoutMs: z.ZodDefault<z.ZodNumber>;
13
13
  }, "strip", z.ZodTypeAny, {
@@ -15,7 +15,7 @@ declare const ConfigSchema: z.ZodObject<{
15
15
  baseURL: string;
16
16
  model: string;
17
17
  maxSteps: number;
18
- contextWindowSize: number;
18
+ maxRounds: number;
19
19
  cdpUrl: string;
20
20
  timeoutMs: number;
21
21
  }, {
@@ -23,25 +23,31 @@ declare const ConfigSchema: z.ZodObject<{
23
23
  baseURL: string;
24
24
  model: string;
25
25
  maxSteps?: number | undefined;
26
- contextWindowSize?: number | undefined;
26
+ maxRounds?: number | undefined;
27
27
  cdpUrl?: string | undefined;
28
28
  timeoutMs?: number | undefined;
29
29
  }>;
30
30
  type Config = z.infer<typeof ConfigSchema>;
31
31
 
32
+ /**
33
+ * Load config with priority: overrides > env vars > config file > defaults
34
+ */
32
35
  declare function loadConfig(overrides?: Partial<Config>): Config;
33
36
 
34
37
  /**
35
38
  * Manages a Playwright CDP connection to the user's Chrome.
36
- * Lazy-initialized: only connects when first browser tool is called.
39
+ * Auto-launches Chrome with --remote-debugging-port if not already running.
40
+ * Syncs user's Chrome profile to preserve cookies/login state.
37
41
  */
38
42
  declare class BrowserClient {
39
43
  private browser;
40
44
  private context;
41
45
  private _page;
42
46
  private cdpUrl;
47
+ private chromeProcess;
43
48
  constructor(cdpUrl: string);
44
49
  connect(): Promise<void>;
50
+ private launchChrome;
45
51
  getPage(): Promise<Page>;
46
52
  /** Create a new tab and switch to it. */
47
53
  newPage(): Promise<Page>;
@@ -49,11 +55,49 @@ declare class BrowserClient {
49
55
  get connected(): boolean;
50
56
  }
51
57
 
58
+ interface StoredScreenshot {
59
+ id: string;
60
+ base64: string;
61
+ mimeType: 'image/png' | 'image/jpeg';
62
+ label: string;
63
+ }
64
+ /**
65
+ * Simple in-memory screenshot store.
66
+ * Screenshot tools save images here with auto-incrementing IDs.
67
+ * Report content references them via [Image:img_1] markers.
68
+ */
69
+ declare class ScreenshotStore {
70
+ private counter;
71
+ private store;
72
+ save(base64: string, mimeType: 'image/png' | 'image/jpeg', label: string): string;
73
+ get(id: string): StoredScreenshot | undefined;
74
+ listIds(): string[];
75
+ }
76
+ /** A block in parsed report content */
77
+ type ContentBlock = {
78
+ type: 'text';
79
+ text: string;
80
+ } | {
81
+ type: 'image';
82
+ id: string;
83
+ base64: string;
84
+ mimeType: 'image/png' | 'image/jpeg';
85
+ label: string;
86
+ };
87
+ /**
88
+ * Parse report content string, expanding [Image:img_X] markers into image blocks.
89
+ * Returns an array of text and image content blocks.
90
+ */
91
+ declare function parseReportContent(content: string, store: ScreenshotStore): ContentBlock[];
92
+ /** Strip [Image:...] markers, returning text-only content */
93
+ declare function stripImageMarkers(content: string): string;
52
94
  interface ToolContext {
53
95
  sessionId: string;
54
96
  cdpUrl: string;
55
97
  /** Lazy browser client getter — only connects on first call */
56
98
  getBrowser: () => Promise<BrowserClient>;
99
+ /** Screenshot store — tools save screenshots here, report references by [Image:id] */
100
+ screenshots: ScreenshotStore;
57
101
  }
58
102
  type ToolResult = {
59
103
  type: 'text';
@@ -62,11 +106,11 @@ type ToolResult = {
62
106
  type: 'image';
63
107
  base64: string;
64
108
  mimeType: 'image/png' | 'image/jpeg';
109
+ screenshotId: string;
65
110
  } | {
66
111
  type: 'report';
67
112
  status: 'completed' | 'blocked' | 'need_guidance';
68
- summary: string;
69
- screenshot?: string;
113
+ content: string;
70
114
  data?: unknown;
71
115
  };
72
116
  interface ToolDefinition {
@@ -86,17 +130,15 @@ declare class ToolRegistry {
86
130
 
87
131
  type Message = OpenAI.Chat.Completions.ChatCompletionMessageParam;
88
132
  /**
89
- * Sliding window message history.
90
- * Always keeps: system prompt (index 0) + most recent N messages.
133
+ * Simple message history — stores all messages without windowing.
134
+ * Small models are cheap, no need to truncate context.
91
135
  */
92
136
  declare class ContextManager {
93
137
  private messages;
94
- private readonly maxMessages;
95
- constructor(maxMessages: number);
96
138
  append(message: Message): void;
97
- /** Returns the system prompt + the most recent messages within the window. */
98
- getWindow(): Message[];
99
- /** Total messages stored (before windowing). */
139
+ /** Returns all messages. */
140
+ getMessages(): Message[];
141
+ /** Total messages stored. */
100
142
  get length(): number;
101
143
  }
102
144
 
@@ -109,11 +151,31 @@ declare class LLMClient {
109
151
 
110
152
  interface RunResult {
111
153
  status: 'completed' | 'blocked' | 'need_guidance';
112
- summary: string;
113
- screenshot?: string;
154
+ /** Rich content with [Image:img_X] markers. Use parseReportContent() to expand. */
155
+ content: string;
114
156
  data?: unknown;
115
157
  stepsUsed: number;
116
158
  }
159
+ type StepEvent = {
160
+ type: 'thinking';
161
+ step: number;
162
+ content: string;
163
+ } | {
164
+ type: 'tool_call';
165
+ step: number;
166
+ name: string;
167
+ args: unknown;
168
+ } | {
169
+ type: 'tool_result';
170
+ step: number;
171
+ name: string;
172
+ result: string;
173
+ } | {
174
+ type: 'error';
175
+ step: number;
176
+ message: string;
177
+ };
178
+ type OnStepCallback = (event: StepEvent) => void;
117
179
  declare class AgentRunner {
118
180
  private llmClient;
119
181
  private contextManager;
@@ -121,7 +183,16 @@ declare class AgentRunner {
121
183
  private config;
122
184
  private toolContext;
123
185
  private initialized;
186
+ private onStep;
187
+ private roundsUsed;
124
188
  constructor(llmClient: LLMClient, contextManager: ContextManager, toolRegistry: ToolRegistry, config: Config, toolContext: ToolContext);
189
+ /** Register a callback to receive step-by-step progress events */
190
+ setOnStep(cb: OnStepCallback): void;
191
+ private emit;
192
+ /** How many instruction rounds have been used in this session */
193
+ get currentRound(): number;
194
+ /** Whether this session has exhausted its max rounds */
195
+ get roundsExhausted(): boolean;
125
196
  run(instruction: string): Promise<RunResult>;
126
197
  }
127
198
 
@@ -132,6 +203,7 @@ interface Session {
132
203
  config: Config;
133
204
  runner: AgentRunner;
134
205
  browserClient: BrowserClient;
206
+ screenshots: ScreenshotStore;
135
207
  timeoutHandle: ReturnType<typeof setTimeout>;
136
208
  }
137
209
  declare class SessionRegistry {
@@ -145,4 +217,4 @@ declare class SessionRegistry {
145
217
 
146
218
  declare function createToolRegistry(): ToolRegistry;
147
219
 
148
- export { AgentRunner, BrowserClient, type Config, ContextManager, LLMClient, type RunResult, type Session, SessionRegistry, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResult, createToolRegistry, loadConfig };
220
+ export { AgentRunner, BrowserClient, type Config, type ContentBlock, ContextManager, LLMClient, type OnStepCallback, type RunResult, ScreenshotStore, type Session, SessionRegistry, type StepEvent, type StoredScreenshot, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResult, createToolRegistry, loadConfig, parseReportContent, stripImageMarkers };