screenpipe-mcp 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -20
- package/dist/index.js +158 -489
- package/manifest.json +3 -31
- package/package.json +2 -2
- package/src/index.ts +159 -521
package/src/index.ts
CHANGED
|
@@ -30,12 +30,6 @@ function getCurrentDateInfo(): { isoDate: string; localDate: string } {
|
|
|
30
30
|
};
|
|
31
31
|
}
|
|
32
32
|
|
|
33
|
-
// Detect OS
|
|
34
|
-
const CURRENT_OS = process.platform;
|
|
35
|
-
const IS_MACOS = CURRENT_OS === "darwin";
|
|
36
|
-
const IS_WINDOWS = CURRENT_OS === "win32";
|
|
37
|
-
const IS_LINUX = CURRENT_OS === "linux";
|
|
38
|
-
|
|
39
33
|
// Parse command line arguments
|
|
40
34
|
const args = process.argv.slice(2);
|
|
41
35
|
let port = 3030;
|
|
@@ -51,7 +45,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
|
|
|
51
45
|
const server = new Server(
|
|
52
46
|
{
|
|
53
47
|
name: "screenpipe",
|
|
54
|
-
version: "0.
|
|
48
|
+
version: "0.7.0",
|
|
55
49
|
},
|
|
56
50
|
{
|
|
57
51
|
capabilities: {
|
|
@@ -85,7 +79,7 @@ const BASE_TOOLS: Tool[] = [
|
|
|
85
79
|
content_type: {
|
|
86
80
|
type: "string",
|
|
87
81
|
enum: ["all", "ocr", "audio", "ui"],
|
|
88
|
-
description: "Content type filter. Default: 'all'",
|
|
82
|
+
description: "Content type filter: 'ocr' (screen text), 'audio' (transcriptions), 'ui' (legacy UI monitoring), 'all'. Default: 'all'. For keyboard/mouse/accessibility events, use search-ui-events tool instead.",
|
|
89
83
|
default: "all",
|
|
90
84
|
},
|
|
91
85
|
limit: {
|
|
@@ -129,57 +123,15 @@ const BASE_TOOLS: Tool[] = [
|
|
|
129
123
|
description: "Include base64 screenshots (OCR only). Default: false",
|
|
130
124
|
default: false,
|
|
131
125
|
},
|
|
132
|
-
|
|
133
|
-
},
|
|
134
|
-
},
|
|
135
|
-
{
|
|
136
|
-
name: "pixel-control",
|
|
137
|
-
description:
|
|
138
|
-
"Control mouse and keyboard at the pixel level. This is a cross-platform tool that works on all operating systems. " +
|
|
139
|
-
"Use this to type text, press keys, move the mouse, and click buttons.",
|
|
140
|
-
annotations: {
|
|
141
|
-
title: "Pixel Control",
|
|
142
|
-
destructiveHint: true,
|
|
143
|
-
},
|
|
144
|
-
inputSchema: {
|
|
145
|
-
type: "object",
|
|
146
|
-
properties: {
|
|
147
|
-
action_type: {
|
|
126
|
+
speaker_ids: {
|
|
148
127
|
type: "string",
|
|
149
|
-
|
|
150
|
-
description: "Type of input action to perform",
|
|
128
|
+
description: "Comma-separated speaker IDs to filter audio results (e.g., '1,2,3')",
|
|
151
129
|
},
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
type: "string",
|
|
156
|
-
description:
|
|
157
|
-
"Text to type or key to press (for WriteText and KeyPress)",
|
|
158
|
-
},
|
|
159
|
-
{
|
|
160
|
-
type: "object",
|
|
161
|
-
properties: {
|
|
162
|
-
x: {
|
|
163
|
-
type: "integer",
|
|
164
|
-
description: "X coordinate for mouse movement",
|
|
165
|
-
},
|
|
166
|
-
y: {
|
|
167
|
-
type: "integer",
|
|
168
|
-
description: "Y coordinate for mouse movement",
|
|
169
|
-
},
|
|
170
|
-
},
|
|
171
|
-
description: "Coordinates for MouseMove",
|
|
172
|
-
},
|
|
173
|
-
{
|
|
174
|
-
type: "string",
|
|
175
|
-
enum: ["left", "right", "middle"],
|
|
176
|
-
description: "Button to click for MouseClick",
|
|
177
|
-
},
|
|
178
|
-
],
|
|
179
|
-
description: "Action-specific data",
|
|
130
|
+
speaker_name: {
|
|
131
|
+
type: "string",
|
|
132
|
+
description: "Filter audio by speaker name (case-insensitive partial match)",
|
|
180
133
|
},
|
|
181
134
|
},
|
|
182
|
-
required: ["action_type", "data"],
|
|
183
135
|
},
|
|
184
136
|
},
|
|
185
137
|
{
|
|
@@ -220,236 +172,91 @@ const BASE_TOOLS: Tool[] = [
|
|
|
220
172
|
required: ["start_time", "end_time"],
|
|
221
173
|
},
|
|
222
174
|
},
|
|
223
|
-
];
|
|
224
|
-
|
|
225
|
-
const MACOS_TOOLS: Tool[] = [
|
|
226
175
|
{
|
|
227
|
-
name: "
|
|
176
|
+
name: "search-ui-events",
|
|
228
177
|
description:
|
|
229
|
-
"
|
|
230
|
-
"This
|
|
231
|
-
"
|
|
232
|
-
"
|
|
233
|
-
"
|
|
234
|
-
"- Text inputs can be: 'AXTextField', 'AXTextArea', 'AXComboBox', 'AXSearchField'\n" +
|
|
235
|
-
"- Clickable items: 'AXButton', 'AXMenuItem', 'AXMenuBarItem', 'AXImage', 'AXStaticText'\n" +
|
|
236
|
-
"- Web content may use: 'AXWebArea', 'AXLink', 'AXHeading', 'AXRadioButton'\n\n" +
|
|
237
|
-
"Use MacOS Accessibility Inspector app to identify the exact roles in your target application.",
|
|
178
|
+
"Search UI input events captured via accessibility APIs (macOS). " +
|
|
179
|
+
"This is the third modality alongside vision (OCR) and audio. " +
|
|
180
|
+
"Captures: mouse clicks, keyboard text input, scroll events, app/window switches, clipboard operations. " +
|
|
181
|
+
"Events include app context, element info (accessibility labels), and precise timestamps. " +
|
|
182
|
+
"Great for understanding user workflow, what was typed, clicked, or copied.",
|
|
238
183
|
annotations: {
|
|
239
|
-
title: "
|
|
184
|
+
title: "Search UI Events (Accessibility)",
|
|
240
185
|
readOnlyHint: true,
|
|
241
186
|
},
|
|
242
187
|
inputSchema: {
|
|
243
188
|
type: "object",
|
|
244
189
|
properties: {
|
|
245
|
-
|
|
246
|
-
type: "string",
|
|
247
|
-
description:
|
|
248
|
-
"The name of the application (e.g., 'Chrome', 'Finder', 'Terminal')",
|
|
249
|
-
},
|
|
250
|
-
window: {
|
|
251
|
-
type: "string",
|
|
252
|
-
description: "The window name or title (optional)",
|
|
253
|
-
},
|
|
254
|
-
role: {
|
|
255
|
-
type: "string",
|
|
256
|
-
description:
|
|
257
|
-
"The role to search for (e.g., 'button', 'textfield', 'AXButton', 'AXTextField'). For best results, use MacOS AX prefixed roles.",
|
|
258
|
-
},
|
|
259
|
-
max_results: {
|
|
260
|
-
type: "integer",
|
|
261
|
-
description: "Maximum number of elements to return",
|
|
262
|
-
default: 10,
|
|
263
|
-
},
|
|
264
|
-
max_depth: {
|
|
265
|
-
type: "integer",
|
|
266
|
-
description: "Maximum depth of element tree to search",
|
|
267
|
-
},
|
|
268
|
-
use_background_apps: {
|
|
269
|
-
type: "boolean",
|
|
270
|
-
description: "Whether to look in background apps",
|
|
271
|
-
default: true,
|
|
272
|
-
},
|
|
273
|
-
activate_app: {
|
|
274
|
-
type: "boolean",
|
|
275
|
-
description: "Whether to activate the app before searching",
|
|
276
|
-
default: true,
|
|
277
|
-
},
|
|
278
|
-
},
|
|
279
|
-
required: ["app", "role"],
|
|
280
|
-
},
|
|
281
|
-
},
|
|
282
|
-
{
|
|
283
|
-
name: "click-element",
|
|
284
|
-
description:
|
|
285
|
-
"Click an element in an application using its id (MacOS only)",
|
|
286
|
-
annotations: {
|
|
287
|
-
title: "Click Element",
|
|
288
|
-
destructiveHint: true,
|
|
289
|
-
},
|
|
290
|
-
inputSchema: {
|
|
291
|
-
type: "object",
|
|
292
|
-
properties: {
|
|
293
|
-
app: {
|
|
294
|
-
type: "string",
|
|
295
|
-
description: "The name of the application",
|
|
296
|
-
},
|
|
297
|
-
window: {
|
|
298
|
-
type: "string",
|
|
299
|
-
description: "The window name (optional)",
|
|
300
|
-
},
|
|
301
|
-
id: {
|
|
302
|
-
type: "string",
|
|
303
|
-
description: "The id of the element to click",
|
|
304
|
-
},
|
|
305
|
-
use_background_apps: {
|
|
306
|
-
type: "boolean",
|
|
307
|
-
description: "Whether to look in background apps",
|
|
308
|
-
default: true,
|
|
309
|
-
},
|
|
310
|
-
activate_app: {
|
|
311
|
-
type: "boolean",
|
|
312
|
-
description: "Whether to activate the app before clicking",
|
|
313
|
-
default: true,
|
|
314
|
-
},
|
|
315
|
-
},
|
|
316
|
-
required: ["app", "id"],
|
|
317
|
-
},
|
|
318
|
-
},
|
|
319
|
-
{
|
|
320
|
-
name: "fill-element",
|
|
321
|
-
description: "Type text into an element in an application (MacOS only)",
|
|
322
|
-
annotations: {
|
|
323
|
-
title: "Fill Element",
|
|
324
|
-
destructiveHint: true,
|
|
325
|
-
},
|
|
326
|
-
inputSchema: {
|
|
327
|
-
type: "object",
|
|
328
|
-
properties: {
|
|
329
|
-
app: {
|
|
330
|
-
type: "string",
|
|
331
|
-
description: "The name of the application",
|
|
332
|
-
},
|
|
333
|
-
window: {
|
|
334
|
-
type: "string",
|
|
335
|
-
description: "The window name (optional)",
|
|
336
|
-
},
|
|
337
|
-
id: {
|
|
190
|
+
q: {
|
|
338
191
|
type: "string",
|
|
339
|
-
description: "
|
|
192
|
+
description: "Search query for text content, app name, window title. Optional - omit to return recent events.",
|
|
340
193
|
},
|
|
341
|
-
|
|
194
|
+
event_type: {
|
|
342
195
|
type: "string",
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
use_background_apps: {
|
|
346
|
-
type: "boolean",
|
|
347
|
-
description: "Whether to look in background apps",
|
|
348
|
-
default: true,
|
|
196
|
+
enum: ["click", "text", "scroll", "key", "app_switch", "window_focus", "clipboard"],
|
|
197
|
+
description: "Filter by event type. 'text' = aggregated keyboard input, 'click' = mouse clicks with element context, 'app_switch'/'window_focus' = app usage tracking, 'clipboard' = copy/paste events.",
|
|
349
198
|
},
|
|
350
|
-
|
|
351
|
-
type: "boolean",
|
|
352
|
-
description: "Whether to activate the app before typing",
|
|
353
|
-
default: true,
|
|
354
|
-
},
|
|
355
|
-
},
|
|
356
|
-
required: ["app", "id", "text"],
|
|
357
|
-
},
|
|
358
|
-
},
|
|
359
|
-
{
|
|
360
|
-
name: "scroll-element",
|
|
361
|
-
description: "Scroll an element in a specific direction (MacOS only)",
|
|
362
|
-
annotations: {
|
|
363
|
-
title: "Scroll Element",
|
|
364
|
-
destructiveHint: true,
|
|
365
|
-
},
|
|
366
|
-
inputSchema: {
|
|
367
|
-
type: "object",
|
|
368
|
-
properties: {
|
|
369
|
-
app: {
|
|
199
|
+
app_name: {
|
|
370
200
|
type: "string",
|
|
371
|
-
description: "
|
|
201
|
+
description: "Filter by application name (e.g., 'Google Chrome', 'Slack', 'Code')",
|
|
372
202
|
},
|
|
373
|
-
|
|
203
|
+
window_name: {
|
|
374
204
|
type: "string",
|
|
375
|
-
description: "
|
|
205
|
+
description: "Filter by window title",
|
|
376
206
|
},
|
|
377
|
-
|
|
207
|
+
start_time: {
|
|
378
208
|
type: "string",
|
|
379
|
-
|
|
209
|
+
format: "date-time",
|
|
210
|
+
description: "ISO 8601 UTC start time (e.g., 2024-01-15T10:00:00Z)",
|
|
380
211
|
},
|
|
381
|
-
|
|
212
|
+
end_time: {
|
|
382
213
|
type: "string",
|
|
383
|
-
|
|
384
|
-
description: "
|
|
214
|
+
format: "date-time",
|
|
215
|
+
description: "ISO 8601 UTC end time (e.g., 2024-01-15T18:00:00Z)",
|
|
385
216
|
},
|
|
386
|
-
|
|
217
|
+
limit: {
|
|
387
218
|
type: "integer",
|
|
388
|
-
description: "
|
|
219
|
+
description: "Max results. Default: 50",
|
|
220
|
+
default: 50,
|
|
389
221
|
},
|
|
390
|
-
|
|
391
|
-
type: "
|
|
392
|
-
description: "
|
|
393
|
-
default:
|
|
394
|
-
},
|
|
395
|
-
activate_app: {
|
|
396
|
-
type: "boolean",
|
|
397
|
-
description: "Whether to activate the app before scrolling",
|
|
398
|
-
default: true,
|
|
399
|
-
},
|
|
400
|
-
},
|
|
401
|
-
required: ["app", "id", "direction", "amount"],
|
|
402
|
-
},
|
|
403
|
-
},
|
|
404
|
-
{
|
|
405
|
-
name: "open-application",
|
|
406
|
-
description: "Open an application by name",
|
|
407
|
-
annotations: {
|
|
408
|
-
title: "Open Application",
|
|
409
|
-
destructiveHint: true,
|
|
410
|
-
},
|
|
411
|
-
inputSchema: {
|
|
412
|
-
type: "object",
|
|
413
|
-
properties: {
|
|
414
|
-
app_name: {
|
|
415
|
-
type: "string",
|
|
416
|
-
description: "The name of the application to open",
|
|
222
|
+
offset: {
|
|
223
|
+
type: "integer",
|
|
224
|
+
description: "Skip N results for pagination. Default: 0",
|
|
225
|
+
default: 0,
|
|
417
226
|
},
|
|
418
227
|
},
|
|
419
|
-
required: ["app_name"],
|
|
420
228
|
},
|
|
421
229
|
},
|
|
422
230
|
{
|
|
423
|
-
name: "
|
|
424
|
-
description:
|
|
231
|
+
name: "get-ui-event-stats",
|
|
232
|
+
description:
|
|
233
|
+
"Get aggregated statistics of UI events by app and event type. " +
|
|
234
|
+
"Useful for understanding app usage patterns, productivity analysis, or finding which apps were used most.",
|
|
425
235
|
annotations: {
|
|
426
|
-
title: "
|
|
427
|
-
|
|
236
|
+
title: "UI Event Statistics",
|
|
237
|
+
readOnlyHint: true,
|
|
428
238
|
},
|
|
429
239
|
inputSchema: {
|
|
430
240
|
type: "object",
|
|
431
241
|
properties: {
|
|
432
|
-
|
|
242
|
+
start_time: {
|
|
433
243
|
type: "string",
|
|
434
|
-
|
|
244
|
+
format: "date-time",
|
|
245
|
+
description: "ISO 8601 UTC start time for stats period",
|
|
435
246
|
},
|
|
436
|
-
|
|
247
|
+
end_time: {
|
|
437
248
|
type: "string",
|
|
438
|
-
|
|
249
|
+
format: "date-time",
|
|
250
|
+
description: "ISO 8601 UTC end time for stats period",
|
|
439
251
|
},
|
|
440
252
|
},
|
|
441
|
-
required: ["url"],
|
|
442
253
|
},
|
|
443
254
|
},
|
|
444
255
|
];
|
|
445
256
|
|
|
446
257
|
// List tools handler
|
|
447
258
|
server.setRequestHandler(ListToolsRequestSchema, async () => {
|
|
448
|
-
|
|
449
|
-
if (IS_MACOS) {
|
|
450
|
-
tools.push(...MACOS_TOOLS);
|
|
451
|
-
}
|
|
452
|
-
return { tools };
|
|
259
|
+
return { tools: BASE_TOOLS };
|
|
453
260
|
});
|
|
454
261
|
|
|
455
262
|
// MCP Resources - provide dynamic context data
|
|
@@ -518,18 +325,20 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
|
|
|
518
325
|
mimeType: "text/markdown",
|
|
519
326
|
text: `# Screenpipe Search Guide
|
|
520
327
|
|
|
328
|
+
## Three Data Modalities
|
|
329
|
+
|
|
330
|
+
Screenpipe captures three types of data:
|
|
331
|
+
1. **Vision (OCR)** - Screen text from screenshots
|
|
332
|
+
2. **Audio** - Transcribed speech from microphone/system audio
|
|
333
|
+
3. **UI Events (Accessibility)** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
|
|
334
|
+
|
|
521
335
|
## Quick Start
|
|
522
336
|
- **Get recent activity**: Call search-content with no parameters
|
|
523
337
|
- **Search text**: \`{"q": "search term", "content_type": "ocr"}\`
|
|
524
|
-
- **
|
|
338
|
+
- **Get keyboard input**: Use search-ui-events with \`event_type: "text"\`
|
|
339
|
+
- **Track app usage**: Use get-ui-event-stats for aggregated data
|
|
525
340
|
|
|
526
|
-
##
|
|
527
|
-
- \`ocr\`: Screen text (what you see)
|
|
528
|
-
- \`audio\`: Transcribed speech
|
|
529
|
-
- \`ui\`: UI element interactions
|
|
530
|
-
- \`all\`: Everything (default)
|
|
531
|
-
|
|
532
|
-
## Key Parameters
|
|
341
|
+
## search-content (Vision + Audio)
|
|
533
342
|
| Parameter | Description | Default |
|
|
534
343
|
|-----------|-------------|---------|
|
|
535
344
|
| q | Search query | (none - returns all) |
|
|
@@ -540,11 +349,27 @@ server.setRequestHandler(ReadResourceRequestSchema, async (request) => {
|
|
|
540
349
|
| app_name | Filter by app | (no filter) |
|
|
541
350
|
| include_frames | Include screenshots | false |
|
|
542
351
|
|
|
352
|
+
## search-ui-events (Accessibility Data)
|
|
353
|
+
| Parameter | Description | Default |
|
|
354
|
+
|-----------|-------------|---------|
|
|
355
|
+
| q | Search text content, app, window | (none) |
|
|
356
|
+
| event_type | click/text/scroll/key/app_switch/window_focus/clipboard | (all types) |
|
|
357
|
+
| app_name | Filter by application | (no filter) |
|
|
358
|
+
| limit | Max results | 50 |
|
|
359
|
+
|
|
360
|
+
### Event Types
|
|
361
|
+
- \`text\`: Aggregated keyboard input (what was typed)
|
|
362
|
+
- \`click\`: Mouse clicks with element context (accessibility labels)
|
|
363
|
+
- \`app_switch\`: When user switched applications
|
|
364
|
+
- \`window_focus\`: When window focus changed
|
|
365
|
+
- \`clipboard\`: Copy/paste operations
|
|
366
|
+
- \`scroll\`: Scroll events with delta values
|
|
367
|
+
|
|
543
368
|
## Tips
|
|
544
369
|
1. Read screenpipe://context first to get current timestamps
|
|
545
|
-
2.
|
|
546
|
-
3. Use
|
|
547
|
-
4. Combine
|
|
370
|
+
2. Use search-ui-events for "what did I type?" queries
|
|
371
|
+
3. Use get-ui-event-stats to understand app usage patterns
|
|
372
|
+
4. Combine search-content (what was on screen) with search-ui-events (what was done)`,
|
|
548
373
|
},
|
|
549
374
|
],
|
|
550
375
|
};
|
|
@@ -750,27 +575,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
750
575
|
throw new Error("Missing arguments");
|
|
751
576
|
}
|
|
752
577
|
|
|
753
|
-
// Check if the tool is MacOS-only and we're not on MacOS
|
|
754
|
-
const macosOnlyTools = [
|
|
755
|
-
"click-element",
|
|
756
|
-
"fill-element",
|
|
757
|
-
"find-elements",
|
|
758
|
-
"scroll-element",
|
|
759
|
-
"open-application",
|
|
760
|
-
"open-url",
|
|
761
|
-
];
|
|
762
|
-
|
|
763
|
-
if (macosOnlyTools.includes(name) && !IS_MACOS) {
|
|
764
|
-
return {
|
|
765
|
-
content: [
|
|
766
|
-
{
|
|
767
|
-
type: "text",
|
|
768
|
-
text: `The '${name}' tool is only available on MacOS. Current platform: ${CURRENT_OS}`,
|
|
769
|
-
},
|
|
770
|
-
],
|
|
771
|
-
};
|
|
772
|
-
}
|
|
773
|
-
|
|
774
578
|
try {
|
|
775
579
|
switch (name) {
|
|
776
580
|
case "search-content": {
|
|
@@ -860,50 +664,6 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
860
664
|
return { content: contentItems };
|
|
861
665
|
}
|
|
862
666
|
|
|
863
|
-
case "pixel-control": {
|
|
864
|
-
const action = {
|
|
865
|
-
type: args.action_type,
|
|
866
|
-
data: args.data,
|
|
867
|
-
};
|
|
868
|
-
|
|
869
|
-
const response = await fetchAPI("/experimental/operator/pixel", {
|
|
870
|
-
method: "POST",
|
|
871
|
-
body: JSON.stringify({ action }),
|
|
872
|
-
});
|
|
873
|
-
|
|
874
|
-
if (!response.ok) {
|
|
875
|
-
throw new Error(`HTTP error: ${response.status}`);
|
|
876
|
-
}
|
|
877
|
-
|
|
878
|
-
const data = await response.json();
|
|
879
|
-
if (!data.success) {
|
|
880
|
-
return {
|
|
881
|
-
content: [
|
|
882
|
-
{
|
|
883
|
-
type: "text",
|
|
884
|
-
text: `Failed to perform input control: ${data.error || "unknown error"}`,
|
|
885
|
-
},
|
|
886
|
-
],
|
|
887
|
-
};
|
|
888
|
-
}
|
|
889
|
-
|
|
890
|
-
let resultText = "Successfully performed input control action";
|
|
891
|
-
if (args.action_type === "WriteText") {
|
|
892
|
-
resultText = `Successfully typed text: '${args.data}'`;
|
|
893
|
-
} else if (args.action_type === "KeyPress") {
|
|
894
|
-
resultText = `Successfully pressed key: '${args.data}'`;
|
|
895
|
-
} else if (args.action_type === "MouseMove") {
|
|
896
|
-
const coords = args.data as { x: number; y: number };
|
|
897
|
-
resultText = `Successfully moved mouse to coordinates: x=${coords.x}, y=${coords.y}`;
|
|
898
|
-
} else if (args.action_type === "MouseClick") {
|
|
899
|
-
resultText = `Successfully clicked ${args.data} mouse button`;
|
|
900
|
-
}
|
|
901
|
-
|
|
902
|
-
return {
|
|
903
|
-
content: [{ type: "text", text: resultText }],
|
|
904
|
-
};
|
|
905
|
-
}
|
|
906
|
-
|
|
907
667
|
case "export-video": {
|
|
908
668
|
const startTime = args.start_time as string;
|
|
909
669
|
const endTime = args.end_time as string;
|
|
@@ -976,7 +736,8 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
976
736
|
frameIds.sort((a, b) => a - b);
|
|
977
737
|
|
|
978
738
|
// Step 2: Connect to WebSocket and export video
|
|
979
|
-
|
|
739
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
740
|
+
const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
|
|
980
741
|
|
|
981
742
|
const exportResult = await new Promise<{
|
|
982
743
|
success: boolean;
|
|
@@ -995,6 +756,11 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
995
756
|
}
|
|
996
757
|
}, 5 * 60 * 1000); // 5 minute timeout
|
|
997
758
|
|
|
759
|
+
ws.on("open", () => {
|
|
760
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
761
|
+
ws.send(JSON.stringify({ frame_ids: frameIds }));
|
|
762
|
+
});
|
|
763
|
+
|
|
998
764
|
ws.on("error", (error) => {
|
|
999
765
|
if (!resolved) {
|
|
1000
766
|
resolved = true;
|
|
@@ -1070,259 +836,131 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
|
1070
836
|
}
|
|
1071
837
|
}
|
|
1072
838
|
|
|
1073
|
-
case "
|
|
1074
|
-
const
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
|
|
1078
|
-
|
|
1079
|
-
|
|
1080
|
-
};
|
|
1081
|
-
|
|
1082
|
-
const response = await fetchAPI("/experimental/operator/click", {
|
|
1083
|
-
method: "POST",
|
|
1084
|
-
body: JSON.stringify({ selector }),
|
|
1085
|
-
});
|
|
1086
|
-
|
|
1087
|
-
if (!response.ok) {
|
|
1088
|
-
throw new Error(`HTTP error: ${response.status}`);
|
|
1089
|
-
}
|
|
1090
|
-
|
|
1091
|
-
const data = await response.json();
|
|
1092
|
-
if (!data.success) {
|
|
1093
|
-
return {
|
|
1094
|
-
content: [
|
|
1095
|
-
{
|
|
1096
|
-
type: "text",
|
|
1097
|
-
text: `Failed to click element: ${data.error || "unknown error"}`,
|
|
1098
|
-
},
|
|
1099
|
-
],
|
|
1100
|
-
};
|
|
1101
|
-
}
|
|
1102
|
-
|
|
1103
|
-
const result = data.result || {};
|
|
1104
|
-
const method = result.method || "unknown";
|
|
1105
|
-
const details = result.details || "click operation completed";
|
|
1106
|
-
|
|
1107
|
-
return {
|
|
1108
|
-
content: [
|
|
1109
|
-
{
|
|
1110
|
-
type: "text",
|
|
1111
|
-
text: `Successfully clicked element using ${method}. ${details}`,
|
|
1112
|
-
},
|
|
1113
|
-
],
|
|
1114
|
-
};
|
|
1115
|
-
}
|
|
1116
|
-
|
|
1117
|
-
case "fill-element": {
|
|
1118
|
-
const selector = {
|
|
1119
|
-
app_name: args.app,
|
|
1120
|
-
window_name: args.window,
|
|
1121
|
-
locator: `#${args.id}`,
|
|
1122
|
-
use_background_apps: args.use_background_apps ?? true,
|
|
1123
|
-
activate_app: args.activate_app ?? true,
|
|
1124
|
-
};
|
|
1125
|
-
|
|
1126
|
-
const response = await fetchAPI("/experimental/operator/type", {
|
|
1127
|
-
method: "POST",
|
|
1128
|
-
body: JSON.stringify({ selector, text: args.text || "" }),
|
|
1129
|
-
});
|
|
1130
|
-
|
|
1131
|
-
if (!response.ok) {
|
|
1132
|
-
throw new Error(`HTTP error: ${response.status}`);
|
|
1133
|
-
}
|
|
1134
|
-
|
|
1135
|
-
const data = await response.json();
|
|
1136
|
-
if (!data.success) {
|
|
1137
|
-
return {
|
|
1138
|
-
content: [
|
|
1139
|
-
{
|
|
1140
|
-
type: "text",
|
|
1141
|
-
text: `Failed to fill element: ${data.error || "unknown error"}`,
|
|
1142
|
-
},
|
|
1143
|
-
],
|
|
1144
|
-
};
|
|
839
|
+
case "search-ui-events": {
|
|
840
|
+
const params = new URLSearchParams();
|
|
841
|
+
for (const [key, value] of Object.entries(args)) {
|
|
842
|
+
if (value !== null && value !== undefined) {
|
|
843
|
+
// Map event_type to the API parameter
|
|
844
|
+
params.append(key, String(value));
|
|
845
|
+
}
|
|
1145
846
|
}
|
|
1146
847
|
|
|
1147
|
-
|
|
1148
|
-
content: [
|
|
1149
|
-
{ type: "text", text: "Successfully filled element with text" },
|
|
1150
|
-
],
|
|
1151
|
-
};
|
|
1152
|
-
}
|
|
1153
|
-
|
|
1154
|
-
case "find-elements": {
|
|
1155
|
-
const selector = {
|
|
1156
|
-
app_name: args.app,
|
|
1157
|
-
window_name: args.window,
|
|
1158
|
-
locator: args.role || "",
|
|
1159
|
-
use_background_apps: args.use_background_apps ?? true,
|
|
1160
|
-
activate_app: args.activate_app ?? true,
|
|
1161
|
-
};
|
|
1162
|
-
|
|
1163
|
-
const response = await fetchAPI("/experimental/operator", {
|
|
1164
|
-
method: "POST",
|
|
1165
|
-
body: JSON.stringify({
|
|
1166
|
-
selector,
|
|
1167
|
-
max_results: args.max_results || 10,
|
|
1168
|
-
max_depth: args.max_depth,
|
|
1169
|
-
}),
|
|
1170
|
-
});
|
|
1171
|
-
|
|
848
|
+
const response = await fetchAPI(`/ui-events?${params.toString()}`);
|
|
1172
849
|
if (!response.ok) {
|
|
1173
850
|
throw new Error(`HTTP error: ${response.status}`);
|
|
1174
851
|
}
|
|
1175
852
|
|
|
1176
853
|
const data = await response.json();
|
|
1177
|
-
|
|
1178
|
-
|
|
1179
|
-
content: [
|
|
1180
|
-
{
|
|
1181
|
-
type: "text",
|
|
1182
|
-
text: `Failed to find elements: ${data.error || "unknown error"}`,
|
|
1183
|
-
},
|
|
1184
|
-
],
|
|
1185
|
-
};
|
|
1186
|
-
}
|
|
854
|
+
const events = data.data || [];
|
|
855
|
+
const pagination = data.pagination || {};
|
|
1187
856
|
|
|
1188
|
-
|
|
1189
|
-
if (elements.length === 0) {
|
|
857
|
+
if (events.length === 0) {
|
|
1190
858
|
return {
|
|
1191
859
|
content: [
|
|
1192
860
|
{
|
|
1193
861
|
type: "text",
|
|
1194
|
-
text:
|
|
862
|
+
text: "No UI events found. This feature requires:\n" +
|
|
863
|
+
"1. macOS with Accessibility permissions granted\n" +
|
|
864
|
+
"2. UI Events enabled in screenpipe settings\n" +
|
|
865
|
+
"Try: broader time range or different event_type filter.",
|
|
1195
866
|
},
|
|
1196
867
|
],
|
|
1197
868
|
};
|
|
1198
869
|
}
|
|
1199
870
|
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
|
|
1203
|
-
`
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
-
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
};
|
|
1224
|
-
|
|
1225
|
-
const response = await fetchAPI("/experimental/operator/scroll", {
|
|
1226
|
-
method: "POST",
|
|
1227
|
-
body: JSON.stringify({
|
|
1228
|
-
selector,
|
|
1229
|
-
direction: args.direction,
|
|
1230
|
-
amount: args.amount,
|
|
1231
|
-
}),
|
|
1232
|
-
});
|
|
871
|
+
const formattedEvents: string[] = [];
|
|
872
|
+
for (const event of events) {
|
|
873
|
+
const parts = [
|
|
874
|
+
`[${event.event_type?.toUpperCase() || "?"}]`,
|
|
875
|
+
event.app_name || "?",
|
|
876
|
+
event.window_title ? `| ${event.window_title}` : "",
|
|
877
|
+
];
|
|
878
|
+
|
|
879
|
+
let details = "";
|
|
880
|
+
if (event.event_type === "text" && event.text_content) {
|
|
881
|
+
details = `Text: "${event.text_content}"`;
|
|
882
|
+
} else if (event.event_type === "click") {
|
|
883
|
+
details = `Click at (${event.x || 0}, ${event.y || 0})`;
|
|
884
|
+
if (event.element?.label) {
|
|
885
|
+
details += ` on "${event.element.label}"`;
|
|
886
|
+
}
|
|
887
|
+
} else if (event.event_type === "clipboard" && event.text_content) {
|
|
888
|
+
details = `Clipboard: "${event.text_content.substring(0, 100)}${event.text_content.length > 100 ? "..." : ""}"`;
|
|
889
|
+
} else if (event.event_type === "app_switch" || event.event_type === "window_focus") {
|
|
890
|
+
details = `Switched to: ${event.app_name}${event.window_title ? ` - ${event.window_title}` : ""}`;
|
|
891
|
+
} else if (event.event_type === "scroll") {
|
|
892
|
+
details = `Scroll: dx=${event.delta_x || 0}, dy=${event.delta_y || 0}`;
|
|
893
|
+
}
|
|
1233
894
|
|
|
1234
|
-
|
|
1235
|
-
|
|
895
|
+
formattedEvents.push(
|
|
896
|
+
`${parts.join(" ")}\n` +
|
|
897
|
+
`${event.timestamp || ""}\n` +
|
|
898
|
+
`${details}`
|
|
899
|
+
);
|
|
1236
900
|
}
|
|
1237
901
|
|
|
1238
|
-
const
|
|
1239
|
-
|
|
1240
|
-
return {
|
|
1241
|
-
content: [
|
|
1242
|
-
{
|
|
1243
|
-
type: "text",
|
|
1244
|
-
text: `Failed to scroll element: ${data.error || "unknown error"}`,
|
|
1245
|
-
},
|
|
1246
|
-
],
|
|
1247
|
-
};
|
|
1248
|
-
}
|
|
902
|
+
const header = `UI Events: ${events.length}/${pagination.total || "?"}` +
|
|
903
|
+
(pagination.total > events.length ? ` (use offset=${(pagination.offset || 0) + events.length} for more)` : "");
|
|
1249
904
|
|
|
1250
905
|
return {
|
|
1251
906
|
content: [
|
|
1252
907
|
{
|
|
1253
908
|
type: "text",
|
|
1254
|
-
text:
|
|
909
|
+
text: header + "\n\n" + formattedEvents.join("\n---\n"),
|
|
1255
910
|
},
|
|
1256
911
|
],
|
|
1257
912
|
};
|
|
1258
913
|
}
|
|
1259
914
|
|
|
1260
|
-
case "
|
|
1261
|
-
const
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
method: "POST",
|
|
1265
|
-
body: JSON.stringify({ app_name: args.app_name || "" }),
|
|
1266
|
-
}
|
|
1267
|
-
);
|
|
915
|
+
case "get-ui-event-stats": {
|
|
916
|
+
const params = new URLSearchParams();
|
|
917
|
+
if (args.start_time) params.append("start_time", String(args.start_time));
|
|
918
|
+
if (args.end_time) params.append("end_time", String(args.end_time));
|
|
1268
919
|
|
|
920
|
+
const response = await fetchAPI(`/ui-events/stats?${params.toString()}`);
|
|
1269
921
|
if (!response.ok) {
|
|
1270
922
|
throw new Error(`HTTP error: ${response.status}`);
|
|
1271
923
|
}
|
|
1272
924
|
|
|
1273
|
-
const
|
|
1274
|
-
|
|
925
|
+
const stats = await response.json();
|
|
926
|
+
|
|
927
|
+
if (!stats || stats.length === 0) {
|
|
1275
928
|
return {
|
|
1276
929
|
content: [
|
|
1277
930
|
{
|
|
1278
931
|
type: "text",
|
|
1279
|
-
text:
|
|
932
|
+
text: "No UI event statistics available. UI Events may not be enabled or no events have been captured yet.",
|
|
1280
933
|
},
|
|
1281
934
|
],
|
|
1282
935
|
};
|
|
1283
936
|
}
|
|
1284
937
|
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
},
|
|
1291
|
-
|
|
1292
|
-
|
|
1293
|
-
|
|
1294
|
-
|
|
1295
|
-
case "open-url": {
|
|
1296
|
-
const response = await fetchAPI("/experimental/operator/open-url", {
|
|
1297
|
-
method: "POST",
|
|
1298
|
-
body: JSON.stringify({
|
|
1299
|
-
url: args.url || "",
|
|
1300
|
-
browser: args.browser,
|
|
1301
|
-
}),
|
|
1302
|
-
});
|
|
1303
|
-
|
|
1304
|
-
if (!response.ok) {
|
|
1305
|
-
throw new Error(`HTTP error: ${response.status}`);
|
|
938
|
+
// Group by app
|
|
939
|
+
const byApp: Record<string, { app: string; events: Record<string, number>; total: number }> = {};
|
|
940
|
+
for (const stat of stats) {
|
|
941
|
+
const app = stat.app_name || "Unknown";
|
|
942
|
+
if (!byApp[app]) {
|
|
943
|
+
byApp[app] = { app, events: {}, total: 0 };
|
|
944
|
+
}
|
|
945
|
+
byApp[app].events[stat.event_type] = stat.count;
|
|
946
|
+
byApp[app].total += stat.count;
|
|
1306
947
|
}
|
|
1307
948
|
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1311
|
-
|
|
1312
|
-
|
|
1313
|
-
|
|
1314
|
-
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
};
|
|
1318
|
-
}
|
|
949
|
+
// Sort by total events
|
|
950
|
+
const sorted = Object.values(byApp).sort((a, b) => b.total - a.total);
|
|
951
|
+
|
|
952
|
+
const lines = sorted.map(({ app, events, total }) => {
|
|
953
|
+
const eventDetails = Object.entries(events)
|
|
954
|
+
.map(([type, count]) => `${type}: ${count}`)
|
|
955
|
+
.join(", ");
|
|
956
|
+
return `${app}: ${total} events (${eventDetails})`;
|
|
957
|
+
});
|
|
1319
958
|
|
|
1320
|
-
const browserInfo = args.browser ? ` using ${args.browser}` : "";
|
|
1321
959
|
return {
|
|
1322
960
|
content: [
|
|
1323
961
|
{
|
|
1324
962
|
type: "text",
|
|
1325
|
-
text: `
|
|
963
|
+
text: `UI Event Statistics:\n\n${lines.join("\n")}`,
|
|
1326
964
|
},
|
|
1327
965
|
],
|
|
1328
966
|
};
|