screenpipe-mcp 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -20
- package/dist/index.js +158 -489
- package/manifest.json +3 -31
- package/package.json +2 -2
- package/src/index.ts +159 -521
package/dist/index.js
CHANGED
|
@@ -54,11 +54,6 @@ function getCurrentDateInfo() {
|
|
|
54
54
|
}),
|
|
55
55
|
};
|
|
56
56
|
}
|
|
57
|
-
// Detect OS
|
|
58
|
-
const CURRENT_OS = process.platform;
|
|
59
|
-
const IS_MACOS = CURRENT_OS === "darwin";
|
|
60
|
-
const IS_WINDOWS = CURRENT_OS === "win32";
|
|
61
|
-
const IS_LINUX = CURRENT_OS === "linux";
|
|
62
57
|
// Parse command line arguments
|
|
63
58
|
const args = process.argv.slice(2);
|
|
64
59
|
let port = 3030;
|
|
@@ -71,7 +66,7 @@ const SCREENPIPE_API = `http://localhost:${port}`;
|
|
|
71
66
|
// Initialize server
|
|
72
67
|
const server = new index_js_1.Server({
|
|
73
68
|
name: "screenpipe",
|
|
74
|
-
version: "0.
|
|
69
|
+
version: "0.7.0",
|
|
75
70
|
}, {
|
|
76
71
|
capabilities: {
|
|
77
72
|
tools: {},
|
|
@@ -101,7 +96,7 @@ const BASE_TOOLS = [
|
|
|
101
96
|
content_type: {
|
|
102
97
|
type: "string",
|
|
103
98
|
enum: ["all", "ocr", "audio", "ui"],
|
|
104
|
-
description: "Content type filter. Default: 'all'",
|
|
99
|
+
description: "Content type filter: 'ocr' (screen text), 'audio' (transcriptions), 'ui' (legacy UI monitoring), 'all'. Default: 'all'. For keyboard/mouse/accessibility events, use search-ui-events tool instead.",
|
|
105
100
|
default: "all",
|
|
106
101
|
},
|
|
107
102
|
limit: {
|
|
@@ -145,55 +140,15 @@ const BASE_TOOLS = [
|
|
|
145
140
|
description: "Include base64 screenshots (OCR only). Default: false",
|
|
146
141
|
default: false,
|
|
147
142
|
},
|
|
148
|
-
|
|
149
|
-
},
|
|
150
|
-
},
|
|
151
|
-
{
|
|
152
|
-
name: "pixel-control",
|
|
153
|
-
description: "Control mouse and keyboard at the pixel level. This is a cross-platform tool that works on all operating systems. " +
|
|
154
|
-
"Use this to type text, press keys, move the mouse, and click buttons.",
|
|
155
|
-
annotations: {
|
|
156
|
-
title: "Pixel Control",
|
|
157
|
-
destructiveHint: true,
|
|
158
|
-
},
|
|
159
|
-
inputSchema: {
|
|
160
|
-
type: "object",
|
|
161
|
-
properties: {
|
|
162
|
-
action_type: {
|
|
143
|
+
speaker_ids: {
|
|
163
144
|
type: "string",
|
|
164
|
-
|
|
165
|
-
description: "Type of input action to perform",
|
|
145
|
+
description: "Comma-separated speaker IDs to filter audio results (e.g., '1,2,3')",
|
|
166
146
|
},
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
type: "string",
|
|
171
|
-
description: "Text to type or key to press (for WriteText and KeyPress)",
|
|
172
|
-
},
|
|
173
|
-
{
|
|
174
|
-
type: "object",
|
|
175
|
-
properties: {
|
|
176
|
-
x: {
|
|
177
|
-
type: "integer",
|
|
178
|
-
description: "X coordinate for mouse movement",
|
|
179
|
-
},
|
|
180
|
-
y: {
|
|
181
|
-
type: "integer",
|
|
182
|
-
description: "Y coordinate for mouse movement",
|
|
183
|
-
},
|
|
184
|
-
},
|
|
185
|
-
description: "Coordinates for MouseMove",
|
|
186
|
-
},
|
|
187
|
-
{
|
|
188
|
-
type: "string",
|
|
189
|
-
enum: ["left", "right", "middle"],
|
|
190
|
-
description: "Button to click for MouseClick",
|
|
191
|
-
},
|
|
192
|
-
],
|
|
193
|
-
description: "Action-specific data",
|
|
147
|
+
speaker_name: {
|
|
148
|
+
type: "string",
|
|
149
|
+
description: "Filter audio by speaker name (case-insensitive partial match)",
|
|
194
150
|
},
|
|
195
151
|
},
|
|
196
|
-
required: ["action_type", "data"],
|
|
197
152
|
},
|
|
198
153
|
},
|
|
199
154
|
{
|
|
@@ -230,230 +185,88 @@ const BASE_TOOLS = [
|
|
|
230
185
|
required: ["start_time", "end_time"],
|
|
231
186
|
},
|
|
232
187
|
},
|
|
233
|
-
];
|
|
234
|
-
const MACOS_TOOLS = [
|
|
235
188
|
{
|
|
236
|
-
name: "
|
|
237
|
-
description: "
|
|
238
|
-
"This
|
|
239
|
-
"
|
|
240
|
-
"
|
|
241
|
-
"
|
|
242
|
-
"- Text inputs can be: 'AXTextField', 'AXTextArea', 'AXComboBox', 'AXSearchField'\n" +
|
|
243
|
-
"- Clickable items: 'AXButton', 'AXMenuItem', 'AXMenuBarItem', 'AXImage', 'AXStaticText'\n" +
|
|
244
|
-
"- Web content may use: 'AXWebArea', 'AXLink', 'AXHeading', 'AXRadioButton'\n\n" +
|
|
245
|
-
"Use MacOS Accessibility Inspector app to identify the exact roles in your target application.",
|
|
189
|
+
name: "search-ui-events",
|
|
190
|
+
description: "Search UI input events captured via accessibility APIs (macOS). " +
|
|
191
|
+
"This is the third modality alongside vision (OCR) and audio. " +
|
|
192
|
+
"Captures: mouse clicks, keyboard text input, scroll events, app/window switches, clipboard operations. " +
|
|
193
|
+
"Events include app context, element info (accessibility labels), and precise timestamps. " +
|
|
194
|
+
"Great for understanding user workflow, what was typed, clicked, or copied.",
|
|
246
195
|
annotations: {
|
|
247
|
-
title: "
|
|
196
|
+
title: "Search UI Events (Accessibility)",
|
|
248
197
|
readOnlyHint: true,
|
|
249
198
|
},
|
|
250
199
|
inputSchema: {
|
|
251
200
|
type: "object",
|
|
252
201
|
properties: {
|
|
253
|
-
|
|
254
|
-
type: "string",
|
|
255
|
-
description: "The name of the application (e.g., 'Chrome', 'Finder', 'Terminal')",
|
|
256
|
-
},
|
|
257
|
-
window: {
|
|
258
|
-
type: "string",
|
|
259
|
-
description: "The window name or title (optional)",
|
|
260
|
-
},
|
|
261
|
-
role: {
|
|
262
|
-
type: "string",
|
|
263
|
-
description: "The role to search for (e.g., 'button', 'textfield', 'AXButton', 'AXTextField'). For best results, use MacOS AX prefixed roles.",
|
|
264
|
-
},
|
|
265
|
-
max_results: {
|
|
266
|
-
type: "integer",
|
|
267
|
-
description: "Maximum number of elements to return",
|
|
268
|
-
default: 10,
|
|
269
|
-
},
|
|
270
|
-
max_depth: {
|
|
271
|
-
type: "integer",
|
|
272
|
-
description: "Maximum depth of element tree to search",
|
|
273
|
-
},
|
|
274
|
-
use_background_apps: {
|
|
275
|
-
type: "boolean",
|
|
276
|
-
description: "Whether to look in background apps",
|
|
277
|
-
default: true,
|
|
278
|
-
},
|
|
279
|
-
activate_app: {
|
|
280
|
-
type: "boolean",
|
|
281
|
-
description: "Whether to activate the app before searching",
|
|
282
|
-
default: true,
|
|
283
|
-
},
|
|
284
|
-
},
|
|
285
|
-
required: ["app", "role"],
|
|
286
|
-
},
|
|
287
|
-
},
|
|
288
|
-
{
|
|
289
|
-
name: "click-element",
|
|
290
|
-
description: "Click an element in an application using its id (MacOS only)",
|
|
291
|
-
annotations: {
|
|
292
|
-
title: "Click Element",
|
|
293
|
-
destructiveHint: true,
|
|
294
|
-
},
|
|
295
|
-
inputSchema: {
|
|
296
|
-
type: "object",
|
|
297
|
-
properties: {
|
|
298
|
-
app: {
|
|
299
|
-
type: "string",
|
|
300
|
-
description: "The name of the application",
|
|
301
|
-
},
|
|
302
|
-
window: {
|
|
303
|
-
type: "string",
|
|
304
|
-
description: "The window name (optional)",
|
|
305
|
-
},
|
|
306
|
-
id: {
|
|
307
|
-
type: "string",
|
|
308
|
-
description: "The id of the element to click",
|
|
309
|
-
},
|
|
310
|
-
use_background_apps: {
|
|
311
|
-
type: "boolean",
|
|
312
|
-
description: "Whether to look in background apps",
|
|
313
|
-
default: true,
|
|
314
|
-
},
|
|
315
|
-
activate_app: {
|
|
316
|
-
type: "boolean",
|
|
317
|
-
description: "Whether to activate the app before clicking",
|
|
318
|
-
default: true,
|
|
319
|
-
},
|
|
320
|
-
},
|
|
321
|
-
required: ["app", "id"],
|
|
322
|
-
},
|
|
323
|
-
},
|
|
324
|
-
{
|
|
325
|
-
name: "fill-element",
|
|
326
|
-
description: "Type text into an element in an application (MacOS only)",
|
|
327
|
-
annotations: {
|
|
328
|
-
title: "Fill Element",
|
|
329
|
-
destructiveHint: true,
|
|
330
|
-
},
|
|
331
|
-
inputSchema: {
|
|
332
|
-
type: "object",
|
|
333
|
-
properties: {
|
|
334
|
-
app: {
|
|
335
|
-
type: "string",
|
|
336
|
-
description: "The name of the application",
|
|
337
|
-
},
|
|
338
|
-
window: {
|
|
339
|
-
type: "string",
|
|
340
|
-
description: "The window name (optional)",
|
|
341
|
-
},
|
|
342
|
-
id: {
|
|
202
|
+
q: {
|
|
343
203
|
type: "string",
|
|
344
|
-
description: "
|
|
204
|
+
description: "Search query for text content, app name, window title. Optional - omit to return recent events.",
|
|
345
205
|
},
|
|
346
|
-
|
|
206
|
+
event_type: {
|
|
347
207
|
type: "string",
|
|
348
|
-
|
|
208
|
+
enum: ["click", "text", "scroll", "key", "app_switch", "window_focus", "clipboard"],
|
|
209
|
+
description: "Filter by event type. 'text' = aggregated keyboard input, 'click' = mouse clicks with element context, 'app_switch'/'window_focus' = app usage tracking, 'clipboard' = copy/paste events.",
|
|
349
210
|
},
|
|
350
|
-
|
|
351
|
-
type: "boolean",
|
|
352
|
-
description: "Whether to look in background apps",
|
|
353
|
-
default: true,
|
|
354
|
-
},
|
|
355
|
-
activate_app: {
|
|
356
|
-
type: "boolean",
|
|
357
|
-
description: "Whether to activate the app before typing",
|
|
358
|
-
default: true,
|
|
359
|
-
},
|
|
360
|
-
},
|
|
361
|
-
required: ["app", "id", "text"],
|
|
362
|
-
},
|
|
363
|
-
},
|
|
364
|
-
{
|
|
365
|
-
name: "scroll-element",
|
|
366
|
-
description: "Scroll an element in a specific direction (MacOS only)",
|
|
367
|
-
annotations: {
|
|
368
|
-
title: "Scroll Element",
|
|
369
|
-
destructiveHint: true,
|
|
370
|
-
},
|
|
371
|
-
inputSchema: {
|
|
372
|
-
type: "object",
|
|
373
|
-
properties: {
|
|
374
|
-
app: {
|
|
211
|
+
app_name: {
|
|
375
212
|
type: "string",
|
|
376
|
-
description: "
|
|
213
|
+
description: "Filter by application name (e.g., 'Google Chrome', 'Slack', 'Code')",
|
|
377
214
|
},
|
|
378
|
-
|
|
215
|
+
window_name: {
|
|
379
216
|
type: "string",
|
|
380
|
-
description: "
|
|
217
|
+
description: "Filter by window title",
|
|
381
218
|
},
|
|
382
|
-
|
|
219
|
+
start_time: {
|
|
383
220
|
type: "string",
|
|
384
|
-
|
|
221
|
+
format: "date-time",
|
|
222
|
+
description: "ISO 8601 UTC start time (e.g., 2024-01-15T10:00:00Z)",
|
|
385
223
|
},
|
|
386
|
-
|
|
224
|
+
end_time: {
|
|
387
225
|
type: "string",
|
|
388
|
-
|
|
389
|
-
description: "
|
|
226
|
+
format: "date-time",
|
|
227
|
+
description: "ISO 8601 UTC end time (e.g., 2024-01-15T18:00:00Z)",
|
|
390
228
|
},
|
|
391
|
-
|
|
229
|
+
limit: {
|
|
392
230
|
type: "integer",
|
|
393
|
-
description: "
|
|
394
|
-
|
|
395
|
-
use_background_apps: {
|
|
396
|
-
type: "boolean",
|
|
397
|
-
description: "Whether to look in background apps",
|
|
398
|
-
default: true,
|
|
399
|
-
},
|
|
400
|
-
activate_app: {
|
|
401
|
-
type: "boolean",
|
|
402
|
-
description: "Whether to activate the app before scrolling",
|
|
403
|
-
default: true,
|
|
231
|
+
description: "Max results. Default: 50",
|
|
232
|
+
default: 50,
|
|
404
233
|
},
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
{
|
|
410
|
-
name: "open-application",
|
|
411
|
-
description: "Open an application by name",
|
|
412
|
-
annotations: {
|
|
413
|
-
title: "Open Application",
|
|
414
|
-
destructiveHint: true,
|
|
415
|
-
},
|
|
416
|
-
inputSchema: {
|
|
417
|
-
type: "object",
|
|
418
|
-
properties: {
|
|
419
|
-
app_name: {
|
|
420
|
-
type: "string",
|
|
421
|
-
description: "The name of the application to open",
|
|
234
|
+
offset: {
|
|
235
|
+
type: "integer",
|
|
236
|
+
description: "Skip N results for pagination. Default: 0",
|
|
237
|
+
default: 0,
|
|
422
238
|
},
|
|
423
239
|
},
|
|
424
|
-
required: ["app_name"],
|
|
425
240
|
},
|
|
426
241
|
},
|
|
427
242
|
{
|
|
428
|
-
name: "
|
|
429
|
-
description: "
|
|
243
|
+
name: "get-ui-event-stats",
|
|
244
|
+
description: "Get aggregated statistics of UI events by app and event type. " +
|
|
245
|
+
"Useful for understanding app usage patterns, productivity analysis, or finding which apps were used most.",
|
|
430
246
|
annotations: {
|
|
431
|
-
title: "
|
|
432
|
-
|
|
247
|
+
title: "UI Event Statistics",
|
|
248
|
+
readOnlyHint: true,
|
|
433
249
|
},
|
|
434
250
|
inputSchema: {
|
|
435
251
|
type: "object",
|
|
436
252
|
properties: {
|
|
437
|
-
|
|
253
|
+
start_time: {
|
|
438
254
|
type: "string",
|
|
439
|
-
|
|
255
|
+
format: "date-time",
|
|
256
|
+
description: "ISO 8601 UTC start time for stats period",
|
|
440
257
|
},
|
|
441
|
-
|
|
258
|
+
end_time: {
|
|
442
259
|
type: "string",
|
|
443
|
-
|
|
260
|
+
format: "date-time",
|
|
261
|
+
description: "ISO 8601 UTC end time for stats period",
|
|
444
262
|
},
|
|
445
263
|
},
|
|
446
|
-
required: ["url"],
|
|
447
264
|
},
|
|
448
265
|
},
|
|
449
266
|
];
|
|
450
267
|
// List tools handler
|
|
451
268
|
server.setRequestHandler(types_js_1.ListToolsRequestSchema, async () => {
|
|
452
|
-
|
|
453
|
-
if (IS_MACOS) {
|
|
454
|
-
tools.push(...MACOS_TOOLS);
|
|
455
|
-
}
|
|
456
|
-
return { tools };
|
|
269
|
+
return { tools: BASE_TOOLS };
|
|
457
270
|
});
|
|
458
271
|
// MCP Resources - provide dynamic context data
|
|
459
272
|
const RESOURCES = [
|
|
@@ -517,18 +330,20 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
|
|
|
517
330
|
mimeType: "text/markdown",
|
|
518
331
|
text: `# Screenpipe Search Guide
|
|
519
332
|
|
|
333
|
+
## Three Data Modalities
|
|
334
|
+
|
|
335
|
+
Screenpipe captures three types of data:
|
|
336
|
+
1. **Vision (OCR)** - Screen text from screenshots
|
|
337
|
+
2. **Audio** - Transcribed speech from microphone/system audio
|
|
338
|
+
3. **UI Events (Accessibility)** - Keyboard input, mouse clicks, app switches, clipboard (macOS)
|
|
339
|
+
|
|
520
340
|
## Quick Start
|
|
521
341
|
- **Get recent activity**: Call search-content with no parameters
|
|
522
342
|
- **Search text**: \`{"q": "search term", "content_type": "ocr"}\`
|
|
523
|
-
- **
|
|
343
|
+
- **Get keyboard input**: Use search-ui-events with \`event_type: "text"\`
|
|
344
|
+
- **Track app usage**: Use get-ui-event-stats for aggregated data
|
|
524
345
|
|
|
525
|
-
##
|
|
526
|
-
- \`ocr\`: Screen text (what you see)
|
|
527
|
-
- \`audio\`: Transcribed speech
|
|
528
|
-
- \`ui\`: UI element interactions
|
|
529
|
-
- \`all\`: Everything (default)
|
|
530
|
-
|
|
531
|
-
## Key Parameters
|
|
346
|
+
## search-content (Vision + Audio)
|
|
532
347
|
| Parameter | Description | Default |
|
|
533
348
|
|-----------|-------------|---------|
|
|
534
349
|
| q | Search query | (none - returns all) |
|
|
@@ -539,11 +354,27 @@ server.setRequestHandler(types_js_1.ReadResourceRequestSchema, async (request) =
|
|
|
539
354
|
| app_name | Filter by app | (no filter) |
|
|
540
355
|
| include_frames | Include screenshots | false |
|
|
541
356
|
|
|
357
|
+
## search-ui-events (Accessibility Data)
|
|
358
|
+
| Parameter | Description | Default |
|
|
359
|
+
|-----------|-------------|---------|
|
|
360
|
+
| q | Search text content, app, window | (none) |
|
|
361
|
+
| event_type | click/text/scroll/key/app_switch/window_focus/clipboard | (all types) |
|
|
362
|
+
| app_name | Filter by application | (no filter) |
|
|
363
|
+
| limit | Max results | 50 |
|
|
364
|
+
|
|
365
|
+
### Event Types
|
|
366
|
+
- \`text\`: Aggregated keyboard input (what was typed)
|
|
367
|
+
- \`click\`: Mouse clicks with element context (accessibility labels)
|
|
368
|
+
- \`app_switch\`: When user switched applications
|
|
369
|
+
- \`window_focus\`: When window focus changed
|
|
370
|
+
- \`clipboard\`: Copy/paste operations
|
|
371
|
+
- \`scroll\`: Scroll events with delta values
|
|
372
|
+
|
|
542
373
|
## Tips
|
|
543
374
|
1. Read screenpipe://context first to get current timestamps
|
|
544
|
-
2.
|
|
545
|
-
3. Use
|
|
546
|
-
4. Combine
|
|
375
|
+
2. Use search-ui-events for "what did I type?" queries
|
|
376
|
+
3. Use get-ui-event-stats to understand app usage patterns
|
|
377
|
+
4. Combine search-content (what was on screen) with search-ui-events (what was done)`,
|
|
547
378
|
},
|
|
548
379
|
],
|
|
549
380
|
};
|
|
@@ -731,25 +562,6 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
731
562
|
if (!args) {
|
|
732
563
|
throw new Error("Missing arguments");
|
|
733
564
|
}
|
|
734
|
-
// Check if the tool is MacOS-only and we're not on MacOS
|
|
735
|
-
const macosOnlyTools = [
|
|
736
|
-
"click-element",
|
|
737
|
-
"fill-element",
|
|
738
|
-
"find-elements",
|
|
739
|
-
"scroll-element",
|
|
740
|
-
"open-application",
|
|
741
|
-
"open-url",
|
|
742
|
-
];
|
|
743
|
-
if (macosOnlyTools.includes(name) && !IS_MACOS) {
|
|
744
|
-
return {
|
|
745
|
-
content: [
|
|
746
|
-
{
|
|
747
|
-
type: "text",
|
|
748
|
-
text: `The '${name}' tool is only available on MacOS. Current platform: ${CURRENT_OS}`,
|
|
749
|
-
},
|
|
750
|
-
],
|
|
751
|
-
};
|
|
752
|
-
}
|
|
753
565
|
try {
|
|
754
566
|
switch (name) {
|
|
755
567
|
case "search-content": {
|
|
@@ -821,47 +633,6 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
821
633
|
}
|
|
822
634
|
return { content: contentItems };
|
|
823
635
|
}
|
|
824
|
-
case "pixel-control": {
|
|
825
|
-
const action = {
|
|
826
|
-
type: args.action_type,
|
|
827
|
-
data: args.data,
|
|
828
|
-
};
|
|
829
|
-
const response = await fetchAPI("/experimental/operator/pixel", {
|
|
830
|
-
method: "POST",
|
|
831
|
-
body: JSON.stringify({ action }),
|
|
832
|
-
});
|
|
833
|
-
if (!response.ok) {
|
|
834
|
-
throw new Error(`HTTP error: ${response.status}`);
|
|
835
|
-
}
|
|
836
|
-
const data = await response.json();
|
|
837
|
-
if (!data.success) {
|
|
838
|
-
return {
|
|
839
|
-
content: [
|
|
840
|
-
{
|
|
841
|
-
type: "text",
|
|
842
|
-
text: `Failed to perform input control: ${data.error || "unknown error"}`,
|
|
843
|
-
},
|
|
844
|
-
],
|
|
845
|
-
};
|
|
846
|
-
}
|
|
847
|
-
let resultText = "Successfully performed input control action";
|
|
848
|
-
if (args.action_type === "WriteText") {
|
|
849
|
-
resultText = `Successfully typed text: '${args.data}'`;
|
|
850
|
-
}
|
|
851
|
-
else if (args.action_type === "KeyPress") {
|
|
852
|
-
resultText = `Successfully pressed key: '${args.data}'`;
|
|
853
|
-
}
|
|
854
|
-
else if (args.action_type === "MouseMove") {
|
|
855
|
-
const coords = args.data;
|
|
856
|
-
resultText = `Successfully moved mouse to coordinates: x=${coords.x}, y=${coords.y}`;
|
|
857
|
-
}
|
|
858
|
-
else if (args.action_type === "MouseClick") {
|
|
859
|
-
resultText = `Successfully clicked ${args.data} mouse button`;
|
|
860
|
-
}
|
|
861
|
-
return {
|
|
862
|
-
content: [{ type: "text", text: resultText }],
|
|
863
|
-
};
|
|
864
|
-
}
|
|
865
636
|
case "export-video": {
|
|
866
637
|
const startTime = args.start_time;
|
|
867
638
|
const endTime = args.end_time;
|
|
@@ -925,7 +696,8 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
925
696
|
// Sort frame IDs
|
|
926
697
|
frameIds.sort((a, b) => a - b);
|
|
927
698
|
// Step 2: Connect to WebSocket and export video
|
|
928
|
-
|
|
699
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
700
|
+
const wsUrl = `ws://localhost:${port}/frames/export?fps=${fps}`;
|
|
929
701
|
const exportResult = await new Promise((resolve) => {
|
|
930
702
|
const ws = new ws_1.WebSocket(wsUrl);
|
|
931
703
|
let resolved = false;
|
|
@@ -936,6 +708,10 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
936
708
|
resolve({ success: false, error: "Export timed out after 5 minutes" });
|
|
937
709
|
}
|
|
938
710
|
}, 5 * 60 * 1000); // 5 minute timeout
|
|
711
|
+
ws.on("open", () => {
|
|
712
|
+
// Send frame_ids in message body to avoid URL length limits
|
|
713
|
+
ws.send(JSON.stringify({ frame_ids: frameIds }));
|
|
714
|
+
});
|
|
939
715
|
ws.on("error", (error) => {
|
|
940
716
|
if (!resolved) {
|
|
941
717
|
resolved = true;
|
|
@@ -1007,226 +783,119 @@ server.setRequestHandler(types_js_1.CallToolRequestSchema, async (request) => {
|
|
|
1007
783
|
};
|
|
1008
784
|
}
|
|
1009
785
|
}
|
|
1010
|
-
case "
|
|
1011
|
-
const
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
};
|
|
1018
|
-
const response = await fetchAPI("/experimental/operator/click", {
|
|
1019
|
-
method: "POST",
|
|
1020
|
-
body: JSON.stringify({ selector }),
|
|
1021
|
-
});
|
|
1022
|
-
if (!response.ok) {
|
|
1023
|
-
throw new Error(`HTTP error: ${response.status}`);
|
|
1024
|
-
}
|
|
1025
|
-
const data = await response.json();
|
|
1026
|
-
if (!data.success) {
|
|
1027
|
-
return {
|
|
1028
|
-
content: [
|
|
1029
|
-
{
|
|
1030
|
-
type: "text",
|
|
1031
|
-
text: `Failed to click element: ${data.error || "unknown error"}`,
|
|
1032
|
-
},
|
|
1033
|
-
],
|
|
1034
|
-
};
|
|
1035
|
-
}
|
|
1036
|
-
const result = data.result || {};
|
|
1037
|
-
const method = result.method || "unknown";
|
|
1038
|
-
const details = result.details || "click operation completed";
|
|
1039
|
-
return {
|
|
1040
|
-
content: [
|
|
1041
|
-
{
|
|
1042
|
-
type: "text",
|
|
1043
|
-
text: `Successfully clicked element using ${method}. ${details}`,
|
|
1044
|
-
},
|
|
1045
|
-
],
|
|
1046
|
-
};
|
|
1047
|
-
}
|
|
1048
|
-
case "fill-element": {
|
|
1049
|
-
const selector = {
|
|
1050
|
-
app_name: args.app,
|
|
1051
|
-
window_name: args.window,
|
|
1052
|
-
locator: `#${args.id}`,
|
|
1053
|
-
use_background_apps: args.use_background_apps ?? true,
|
|
1054
|
-
activate_app: args.activate_app ?? true,
|
|
1055
|
-
};
|
|
1056
|
-
const response = await fetchAPI("/experimental/operator/type", {
|
|
1057
|
-
method: "POST",
|
|
1058
|
-
body: JSON.stringify({ selector, text: args.text || "" }),
|
|
1059
|
-
});
|
|
1060
|
-
if (!response.ok) {
|
|
1061
|
-
throw new Error(`HTTP error: ${response.status}`);
|
|
1062
|
-
}
|
|
1063
|
-
const data = await response.json();
|
|
1064
|
-
if (!data.success) {
|
|
1065
|
-
return {
|
|
1066
|
-
content: [
|
|
1067
|
-
{
|
|
1068
|
-
type: "text",
|
|
1069
|
-
text: `Failed to fill element: ${data.error || "unknown error"}`,
|
|
1070
|
-
},
|
|
1071
|
-
],
|
|
1072
|
-
};
|
|
786
|
+
case "search-ui-events": {
|
|
787
|
+
const params = new URLSearchParams();
|
|
788
|
+
for (const [key, value] of Object.entries(args)) {
|
|
789
|
+
if (value !== null && value !== undefined) {
|
|
790
|
+
// Map event_type to the API parameter
|
|
791
|
+
params.append(key, String(value));
|
|
792
|
+
}
|
|
1073
793
|
}
|
|
1074
|
-
|
|
1075
|
-
content: [
|
|
1076
|
-
{ type: "text", text: "Successfully filled element with text" },
|
|
1077
|
-
],
|
|
1078
|
-
};
|
|
1079
|
-
}
|
|
1080
|
-
case "find-elements": {
|
|
1081
|
-
const selector = {
|
|
1082
|
-
app_name: args.app,
|
|
1083
|
-
window_name: args.window,
|
|
1084
|
-
locator: args.role || "",
|
|
1085
|
-
use_background_apps: args.use_background_apps ?? true,
|
|
1086
|
-
activate_app: args.activate_app ?? true,
|
|
1087
|
-
};
|
|
1088
|
-
const response = await fetchAPI("/experimental/operator", {
|
|
1089
|
-
method: "POST",
|
|
1090
|
-
body: JSON.stringify({
|
|
1091
|
-
selector,
|
|
1092
|
-
max_results: args.max_results || 10,
|
|
1093
|
-
max_depth: args.max_depth,
|
|
1094
|
-
}),
|
|
1095
|
-
});
|
|
794
|
+
const response = await fetchAPI(`/ui-events?${params.toString()}`);
|
|
1096
795
|
if (!response.ok) {
|
|
1097
796
|
throw new Error(`HTTP error: ${response.status}`);
|
|
1098
797
|
}
|
|
1099
798
|
const data = await response.json();
|
|
1100
|
-
|
|
1101
|
-
|
|
1102
|
-
|
|
1103
|
-
{
|
|
1104
|
-
type: "text",
|
|
1105
|
-
text: `Failed to find elements: ${data.error || "unknown error"}`,
|
|
1106
|
-
},
|
|
1107
|
-
],
|
|
1108
|
-
};
|
|
1109
|
-
}
|
|
1110
|
-
const elements = data.data || [];
|
|
1111
|
-
if (elements.length === 0) {
|
|
799
|
+
const events = data.data || [];
|
|
800
|
+
const pagination = data.pagination || {};
|
|
801
|
+
if (events.length === 0) {
|
|
1112
802
|
return {
|
|
1113
803
|
content: [
|
|
1114
804
|
{
|
|
1115
805
|
type: "text",
|
|
1116
|
-
text:
|
|
806
|
+
text: "No UI events found. This feature requires:\n" +
|
|
807
|
+
"1. macOS with Accessibility permissions granted\n" +
|
|
808
|
+
"2. UI Events enabled in screenpipe settings\n" +
|
|
809
|
+
"Try: broader time range or different event_type filter.",
|
|
1117
810
|
},
|
|
1118
811
|
],
|
|
1119
812
|
};
|
|
1120
813
|
}
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
`
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1132
|
-
|
|
1133
|
-
|
|
1134
|
-
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
|
|
1150
|
-
});
|
|
1151
|
-
if (!response.ok) {
|
|
1152
|
-
throw new Error(`HTTP error: ${response.status}`);
|
|
1153
|
-
}
|
|
1154
|
-
const data = await response.json();
|
|
1155
|
-
if (!data.success) {
|
|
1156
|
-
return {
|
|
1157
|
-
content: [
|
|
1158
|
-
{
|
|
1159
|
-
type: "text",
|
|
1160
|
-
text: `Failed to scroll element: ${data.error || "unknown error"}`,
|
|
1161
|
-
},
|
|
1162
|
-
],
|
|
1163
|
-
};
|
|
814
|
+
const formattedEvents = [];
|
|
815
|
+
for (const event of events) {
|
|
816
|
+
const parts = [
|
|
817
|
+
`[${event.event_type?.toUpperCase() || "?"}]`,
|
|
818
|
+
event.app_name || "?",
|
|
819
|
+
event.window_title ? `| ${event.window_title}` : "",
|
|
820
|
+
];
|
|
821
|
+
let details = "";
|
|
822
|
+
if (event.event_type === "text" && event.text_content) {
|
|
823
|
+
details = `Text: "${event.text_content}"`;
|
|
824
|
+
}
|
|
825
|
+
else if (event.event_type === "click") {
|
|
826
|
+
details = `Click at (${event.x || 0}, ${event.y || 0})`;
|
|
827
|
+
if (event.element?.label) {
|
|
828
|
+
details += ` on "${event.element.label}"`;
|
|
829
|
+
}
|
|
830
|
+
}
|
|
831
|
+
else if (event.event_type === "clipboard" && event.text_content) {
|
|
832
|
+
details = `Clipboard: "${event.text_content.substring(0, 100)}${event.text_content.length > 100 ? "..." : ""}"`;
|
|
833
|
+
}
|
|
834
|
+
else if (event.event_type === "app_switch" || event.event_type === "window_focus") {
|
|
835
|
+
details = `Switched to: ${event.app_name}${event.window_title ? ` - ${event.window_title}` : ""}`;
|
|
836
|
+
}
|
|
837
|
+
else if (event.event_type === "scroll") {
|
|
838
|
+
details = `Scroll: dx=${event.delta_x || 0}, dy=${event.delta_y || 0}`;
|
|
839
|
+
}
|
|
840
|
+
formattedEvents.push(`${parts.join(" ")}\n` +
|
|
841
|
+
`${event.timestamp || ""}\n` +
|
|
842
|
+
`${details}`);
|
|
1164
843
|
}
|
|
844
|
+
const header = `UI Events: ${events.length}/${pagination.total || "?"}` +
|
|
845
|
+
(pagination.total > events.length ? ` (use offset=${(pagination.offset || 0) + events.length} for more)` : "");
|
|
1165
846
|
return {
|
|
1166
847
|
content: [
|
|
1167
848
|
{
|
|
1168
849
|
type: "text",
|
|
1169
|
-
text:
|
|
850
|
+
text: header + "\n\n" + formattedEvents.join("\n---\n"),
|
|
1170
851
|
},
|
|
1171
852
|
],
|
|
1172
853
|
};
|
|
1173
854
|
}
|
|
1174
|
-
case "
|
|
1175
|
-
const
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
855
|
+
case "get-ui-event-stats": {
|
|
856
|
+
const params = new URLSearchParams();
|
|
857
|
+
if (args.start_time)
|
|
858
|
+
params.append("start_time", String(args.start_time));
|
|
859
|
+
if (args.end_time)
|
|
860
|
+
params.append("end_time", String(args.end_time));
|
|
861
|
+
const response = await fetchAPI(`/ui-events/stats?${params.toString()}`);
|
|
1179
862
|
if (!response.ok) {
|
|
1180
863
|
throw new Error(`HTTP error: ${response.status}`);
|
|
1181
864
|
}
|
|
1182
|
-
const
|
|
1183
|
-
if (!
|
|
865
|
+
const stats = await response.json();
|
|
866
|
+
if (!stats || stats.length === 0) {
|
|
1184
867
|
return {
|
|
1185
868
|
content: [
|
|
1186
869
|
{
|
|
1187
870
|
type: "text",
|
|
1188
|
-
text:
|
|
871
|
+
text: "No UI event statistics available. UI Events may not be enabled or no events have been captured yet.",
|
|
1189
872
|
},
|
|
1190
873
|
],
|
|
1191
874
|
};
|
|
1192
875
|
}
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
},
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
case "open-url": {
|
|
1203
|
-
const response = await fetchAPI("/experimental/operator/open-url", {
|
|
1204
|
-
method: "POST",
|
|
1205
|
-
body: JSON.stringify({
|
|
1206
|
-
url: args.url || "",
|
|
1207
|
-
browser: args.browser,
|
|
1208
|
-
}),
|
|
1209
|
-
});
|
|
1210
|
-
if (!response.ok) {
|
|
1211
|
-
throw new Error(`HTTP error: ${response.status}`);
|
|
1212
|
-
}
|
|
1213
|
-
const data = await response.json();
|
|
1214
|
-
if (!data.success) {
|
|
1215
|
-
return {
|
|
1216
|
-
content: [
|
|
1217
|
-
{
|
|
1218
|
-
type: "text",
|
|
1219
|
-
text: `Failed to open URL: ${data.error || "unknown error"}`,
|
|
1220
|
-
},
|
|
1221
|
-
],
|
|
1222
|
-
};
|
|
876
|
+
// Group by app
|
|
877
|
+
const byApp = {};
|
|
878
|
+
for (const stat of stats) {
|
|
879
|
+
const app = stat.app_name || "Unknown";
|
|
880
|
+
if (!byApp[app]) {
|
|
881
|
+
byApp[app] = { app, events: {}, total: 0 };
|
|
882
|
+
}
|
|
883
|
+
byApp[app].events[stat.event_type] = stat.count;
|
|
884
|
+
byApp[app].total += stat.count;
|
|
1223
885
|
}
|
|
1224
|
-
|
|
886
|
+
// Sort by total events
|
|
887
|
+
const sorted = Object.values(byApp).sort((a, b) => b.total - a.total);
|
|
888
|
+
const lines = sorted.map(({ app, events, total }) => {
|
|
889
|
+
const eventDetails = Object.entries(events)
|
|
890
|
+
.map(([type, count]) => `${type}: ${count}`)
|
|
891
|
+
.join(", ");
|
|
892
|
+
return `${app}: ${total} events (${eventDetails})`;
|
|
893
|
+
});
|
|
1225
894
|
return {
|
|
1226
895
|
content: [
|
|
1227
896
|
{
|
|
1228
897
|
type: "text",
|
|
1229
|
-
text: `
|
|
898
|
+
text: `UI Event Statistics:\n\n${lines.join("\n")}`,
|
|
1230
899
|
},
|
|
1231
900
|
],
|
|
1232
901
|
};
|