windows-use 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +188 -70
- package/dist/cli.js +662 -129
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +88 -16
- package/dist/index.js +467 -88
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +504 -124
- package/dist/mcp/server.js.map +1 -1
- package/package.json +10 -1
package/dist/index.d.ts
CHANGED
|
@@ -7,7 +7,7 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
7
7
|
baseURL: z.ZodString;
|
|
8
8
|
model: z.ZodString;
|
|
9
9
|
maxSteps: z.ZodDefault<z.ZodNumber>;
|
|
10
|
-
|
|
10
|
+
maxRounds: z.ZodDefault<z.ZodNumber>;
|
|
11
11
|
cdpUrl: z.ZodDefault<z.ZodString>;
|
|
12
12
|
timeoutMs: z.ZodDefault<z.ZodNumber>;
|
|
13
13
|
}, "strip", z.ZodTypeAny, {
|
|
@@ -15,7 +15,7 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
15
15
|
baseURL: string;
|
|
16
16
|
model: string;
|
|
17
17
|
maxSteps: number;
|
|
18
|
-
|
|
18
|
+
maxRounds: number;
|
|
19
19
|
cdpUrl: string;
|
|
20
20
|
timeoutMs: number;
|
|
21
21
|
}, {
|
|
@@ -23,25 +23,31 @@ declare const ConfigSchema: z.ZodObject<{
|
|
|
23
23
|
baseURL: string;
|
|
24
24
|
model: string;
|
|
25
25
|
maxSteps?: number | undefined;
|
|
26
|
-
|
|
26
|
+
maxRounds?: number | undefined;
|
|
27
27
|
cdpUrl?: string | undefined;
|
|
28
28
|
timeoutMs?: number | undefined;
|
|
29
29
|
}>;
|
|
30
30
|
type Config = z.infer<typeof ConfigSchema>;
|
|
31
31
|
|
|
32
|
+
/**
|
|
33
|
+
* Load config with priority: overrides > env vars > config file > defaults
|
|
34
|
+
*/
|
|
32
35
|
declare function loadConfig(overrides?: Partial<Config>): Config;
|
|
33
36
|
|
|
34
37
|
/**
|
|
35
38
|
* Manages a Playwright CDP connection to the user's Chrome.
|
|
36
|
-
*
|
|
39
|
+
* Auto-launches Chrome with --remote-debugging-port if not already running.
|
|
40
|
+
* Syncs user's Chrome profile to preserve cookies/login state.
|
|
37
41
|
*/
|
|
38
42
|
declare class BrowserClient {
|
|
39
43
|
private browser;
|
|
40
44
|
private context;
|
|
41
45
|
private _page;
|
|
42
46
|
private cdpUrl;
|
|
47
|
+
private chromeProcess;
|
|
43
48
|
constructor(cdpUrl: string);
|
|
44
49
|
connect(): Promise<void>;
|
|
50
|
+
private launchChrome;
|
|
45
51
|
getPage(): Promise<Page>;
|
|
46
52
|
/** Create a new tab and switch to it. */
|
|
47
53
|
newPage(): Promise<Page>;
|
|
@@ -49,11 +55,49 @@ declare class BrowserClient {
|
|
|
49
55
|
get connected(): boolean;
|
|
50
56
|
}
|
|
51
57
|
|
|
58
|
+
interface StoredScreenshot {
|
|
59
|
+
id: string;
|
|
60
|
+
base64: string;
|
|
61
|
+
mimeType: 'image/png' | 'image/jpeg';
|
|
62
|
+
label: string;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Simple in-memory screenshot store.
|
|
66
|
+
* Screenshot tools save images here with auto-incrementing IDs.
|
|
67
|
+
* Report content references them via [Image:img_1] markers.
|
|
68
|
+
*/
|
|
69
|
+
declare class ScreenshotStore {
|
|
70
|
+
private counter;
|
|
71
|
+
private store;
|
|
72
|
+
save(base64: string, mimeType: 'image/png' | 'image/jpeg', label: string): string;
|
|
73
|
+
get(id: string): StoredScreenshot | undefined;
|
|
74
|
+
listIds(): string[];
|
|
75
|
+
}
|
|
76
|
+
/** A block in parsed report content */
|
|
77
|
+
type ContentBlock = {
|
|
78
|
+
type: 'text';
|
|
79
|
+
text: string;
|
|
80
|
+
} | {
|
|
81
|
+
type: 'image';
|
|
82
|
+
id: string;
|
|
83
|
+
base64: string;
|
|
84
|
+
mimeType: 'image/png' | 'image/jpeg';
|
|
85
|
+
label: string;
|
|
86
|
+
};
|
|
87
|
+
/**
|
|
88
|
+
* Parse report content string, expanding [Image:img_X] markers into image blocks.
|
|
89
|
+
* Returns an array of text and image content blocks.
|
|
90
|
+
*/
|
|
91
|
+
declare function parseReportContent(content: string, store: ScreenshotStore): ContentBlock[];
|
|
92
|
+
/** Strip [Image:...] markers, returning text-only content */
|
|
93
|
+
declare function stripImageMarkers(content: string): string;
|
|
52
94
|
interface ToolContext {
|
|
53
95
|
sessionId: string;
|
|
54
96
|
cdpUrl: string;
|
|
55
97
|
/** Lazy browser client getter — only connects on first call */
|
|
56
98
|
getBrowser: () => Promise<BrowserClient>;
|
|
99
|
+
/** Screenshot store — tools save screenshots here, report references by [Image:id] */
|
|
100
|
+
screenshots: ScreenshotStore;
|
|
57
101
|
}
|
|
58
102
|
type ToolResult = {
|
|
59
103
|
type: 'text';
|
|
@@ -62,11 +106,11 @@ type ToolResult = {
|
|
|
62
106
|
type: 'image';
|
|
63
107
|
base64: string;
|
|
64
108
|
mimeType: 'image/png' | 'image/jpeg';
|
|
109
|
+
screenshotId: string;
|
|
65
110
|
} | {
|
|
66
111
|
type: 'report';
|
|
67
112
|
status: 'completed' | 'blocked' | 'need_guidance';
|
|
68
|
-
|
|
69
|
-
screenshot?: string;
|
|
113
|
+
content: string;
|
|
70
114
|
data?: unknown;
|
|
71
115
|
};
|
|
72
116
|
interface ToolDefinition {
|
|
@@ -86,17 +130,15 @@ declare class ToolRegistry {
|
|
|
86
130
|
|
|
87
131
|
type Message = OpenAI.Chat.Completions.ChatCompletionMessageParam;
|
|
88
132
|
/**
|
|
89
|
-
*
|
|
90
|
-
*
|
|
133
|
+
* Simple message history — stores all messages without windowing.
|
|
134
|
+
* Small models are cheap, no need to truncate context.
|
|
91
135
|
*/
|
|
92
136
|
declare class ContextManager {
|
|
93
137
|
private messages;
|
|
94
|
-
private readonly maxMessages;
|
|
95
|
-
constructor(maxMessages: number);
|
|
96
138
|
append(message: Message): void;
|
|
97
|
-
/** Returns
|
|
98
|
-
|
|
99
|
-
/** Total messages stored
|
|
139
|
+
/** Returns all messages. */
|
|
140
|
+
getMessages(): Message[];
|
|
141
|
+
/** Total messages stored. */
|
|
100
142
|
get length(): number;
|
|
101
143
|
}
|
|
102
144
|
|
|
@@ -109,11 +151,31 @@ declare class LLMClient {
|
|
|
109
151
|
|
|
110
152
|
interface RunResult {
|
|
111
153
|
status: 'completed' | 'blocked' | 'need_guidance';
|
|
112
|
-
|
|
113
|
-
|
|
154
|
+
/** Rich content with [Image:img_X] markers. Use parseReportContent() to expand. */
|
|
155
|
+
content: string;
|
|
114
156
|
data?: unknown;
|
|
115
157
|
stepsUsed: number;
|
|
116
158
|
}
|
|
159
|
+
type StepEvent = {
|
|
160
|
+
type: 'thinking';
|
|
161
|
+
step: number;
|
|
162
|
+
content: string;
|
|
163
|
+
} | {
|
|
164
|
+
type: 'tool_call';
|
|
165
|
+
step: number;
|
|
166
|
+
name: string;
|
|
167
|
+
args: unknown;
|
|
168
|
+
} | {
|
|
169
|
+
type: 'tool_result';
|
|
170
|
+
step: number;
|
|
171
|
+
name: string;
|
|
172
|
+
result: string;
|
|
173
|
+
} | {
|
|
174
|
+
type: 'error';
|
|
175
|
+
step: number;
|
|
176
|
+
message: string;
|
|
177
|
+
};
|
|
178
|
+
type OnStepCallback = (event: StepEvent) => void;
|
|
117
179
|
declare class AgentRunner {
|
|
118
180
|
private llmClient;
|
|
119
181
|
private contextManager;
|
|
@@ -121,7 +183,16 @@ declare class AgentRunner {
|
|
|
121
183
|
private config;
|
|
122
184
|
private toolContext;
|
|
123
185
|
private initialized;
|
|
186
|
+
private onStep;
|
|
187
|
+
private roundsUsed;
|
|
124
188
|
constructor(llmClient: LLMClient, contextManager: ContextManager, toolRegistry: ToolRegistry, config: Config, toolContext: ToolContext);
|
|
189
|
+
/** Register a callback to receive step-by-step progress events */
|
|
190
|
+
setOnStep(cb: OnStepCallback): void;
|
|
191
|
+
private emit;
|
|
192
|
+
/** How many instruction rounds have been used in this session */
|
|
193
|
+
get currentRound(): number;
|
|
194
|
+
/** Whether this session has exhausted its max rounds */
|
|
195
|
+
get roundsExhausted(): boolean;
|
|
125
196
|
run(instruction: string): Promise<RunResult>;
|
|
126
197
|
}
|
|
127
198
|
|
|
@@ -132,6 +203,7 @@ interface Session {
|
|
|
132
203
|
config: Config;
|
|
133
204
|
runner: AgentRunner;
|
|
134
205
|
browserClient: BrowserClient;
|
|
206
|
+
screenshots: ScreenshotStore;
|
|
135
207
|
timeoutHandle: ReturnType<typeof setTimeout>;
|
|
136
208
|
}
|
|
137
209
|
declare class SessionRegistry {
|
|
@@ -145,4 +217,4 @@ declare class SessionRegistry {
|
|
|
145
217
|
|
|
146
218
|
declare function createToolRegistry(): ToolRegistry;
|
|
147
219
|
|
|
148
|
-
export { AgentRunner, BrowserClient, type Config, ContextManager, LLMClient, type RunResult, type Session, SessionRegistry, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResult, createToolRegistry, loadConfig };
|
|
220
|
+
export { AgentRunner, BrowserClient, type Config, type ContentBlock, ContextManager, LLMClient, type OnStepCallback, type RunResult, ScreenshotStore, type Session, SessionRegistry, type StepEvent, type StoredScreenshot, type ToolContext, type ToolDefinition, ToolRegistry, type ToolResult, createToolRegistry, loadConfig, parseReportContent, stripImageMarkers };
|