@vortex-os/computer-use 0.7.0 → 0.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,73 +1,74 @@
1
- {
2
- "name": "@vortex-os/computer-use",
3
- "version": "0.7.0",
4
- "description": "Add-on — read-only screen perception (structured UIA tree + pixel fallback + noise-filtered background watch with an event buffer + sub-second reflex alerts: beep / fixed-phrase / OCR or optional local-VLM description spoken locally, optional higher-quality Supertonic neural TTS with per-app audio ducking, adaptive companion that classifies the on-screen activity and branches its help) exposed as an MCP server, layered on @vortex-os/base. Windows-first. Control (mouse/keyboard) is intentionally out of scope.",
5
- "license": "MIT",
6
- "author": "vortex-os-project",
7
- "homepage": "https://github.com/vortex-os-project/vortex#readme",
8
- "repository": {
9
- "type": "git",
10
- "url": "git+https://github.com/vortex-os-project/vortex.git",
11
- "directory": "modules/computer-use"
12
- },
13
- "type": "module",
14
- "files": [
15
- "scripts/mcp-stdio.mjs",
16
- "scripts/noise-filter.mjs",
17
- "scripts/speech-safety.mjs",
18
- "scripts/vlm.mjs",
19
- "scripts/ocr.ps1",
20
- "scripts/speak.ps1",
21
- "scripts/speak-supertonic.mjs",
22
- "scripts/audio-duck.ps1",
23
- "scripts/fetch-supertonic.mjs",
24
- "scripts/worker.ps1",
25
- "scripts/lib.ps1",
26
- "scripts/probe.ps1",
27
- "scripts/read-ui.ps1",
28
- "scripts/classify.ps1",
29
- "scripts/activity.mjs",
30
- "scripts/point-to-ask.ps1",
31
- "computer-use.config.example.json",
32
- "README.md"
33
- ],
34
- "bin": {
35
- "vortex-mcp-computer-use": "scripts/mcp-stdio.mjs"
36
- },
37
- "vortex": {
38
- "mcpServers": {
39
- "vortex-computer-use": {
40
- "command": "node",
41
- "args": ["node_modules/@vortex-os/computer-use/scripts/mcp-stdio.mjs"]
42
- }
43
- }
44
- },
45
- "scripts": {
46
- "verify": "node scripts/verify.mjs",
47
- "test:filter": "node scripts/test-noise-filter.mjs",
48
- "test:speech": "node scripts/test-speech-safety.mjs",
49
- "test:vlm": "node scripts/test-vlm.mjs",
50
- "test:activity": "node scripts/test-activity.mjs"
51
- },
52
- "peerDependencies": {
53
- "@vortex-os/base": ">=0.3.0 <1.0.0"
54
- },
55
- "peerDependenciesMeta": {
56
- "@vortex-os/base": {
57
- "optional": false
58
- }
59
- },
60
- "optionalDependencies": {
61
- "@modelcontextprotocol/sdk": "^1.21.0",
62
- "onnxruntime-node": "^1.19.2"
63
- },
64
- "engines": {
65
- "node": ">=22"
66
- },
67
- "os": [
68
- "win32"
69
- ],
70
- "publishConfig": {
71
- "access": "public"
72
- }
73
- }
1
+ {
2
+ "name": "@vortex-os/computer-use",
3
+ "version": "0.7.2",
4
+ "description": "Add-on — read-only screen perception (structured UIA tree + pixel fallback + noise-filtered background watch with an event buffer + sub-second reflex alerts: beep / fixed-phrase / OCR or optional local-VLM description spoken locally, optional higher-quality Supertonic neural TTS with per-app audio ducking, adaptive companion that classifies the on-screen activity and branches its help) exposed as an MCP server, layered on @vortex-os/base. Windows-first. Control (mouse/keyboard) is intentionally out of scope.",
5
+ "license": "MIT",
6
+ "author": "vortex-os-project",
7
+ "homepage": "https://github.com/vortex-os-project/vortex#readme",
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "git+https://github.com/vortex-os-project/vortex.git",
11
+ "directory": "modules/computer-use"
12
+ },
13
+ "type": "module",
14
+ "files": [
15
+ "scripts/mcp-stdio.mjs",
16
+ "scripts/noise-filter.mjs",
17
+ "scripts/speech-safety.mjs",
18
+ "scripts/vlm.mjs",
19
+ "scripts/ocr.ps1",
20
+ "scripts/speak.ps1",
21
+ "scripts/speak-supertonic.mjs",
22
+ "scripts/audio-duck.ps1",
23
+ "scripts/fetch-supertonic.mjs",
24
+ "scripts/worker.ps1",
25
+ "scripts/lib.ps1",
26
+ "scripts/probe.ps1",
27
+ "scripts/read-ui.ps1",
28
+ "scripts/classify.ps1",
29
+ "scripts/activity.mjs",
30
+ "scripts/point-to-ask.ps1",
31
+ "computer-use.config.example.json",
32
+ "README.md"
33
+ ],
34
+ "bin": {
35
+ "vortex-mcp-computer-use": "scripts/mcp-stdio.mjs"
36
+ },
37
+ "vortex": {
38
+ "mcpServers": {
39
+ "vortex-computer-use": {
40
+ "command": "node",
41
+ "args": ["node_modules/@vortex-os/computer-use/scripts/mcp-stdio.mjs"]
42
+ }
43
+ }
44
+ },
45
+ "scripts": {
46
+ "verify": "node scripts/verify.mjs",
47
+ "test:filter": "node scripts/test-noise-filter.mjs",
48
+ "test:speech": "node scripts/test-speech-safety.mjs",
49
+ "test:vlm": "node scripts/test-vlm.mjs",
50
+ "test:activity": "node scripts/test-activity.mjs",
51
+ "prepublishOnly": "node scripts/require-changelog.mjs && node scripts/require-docs-current.mjs && node scripts/scan-leaks.mjs"
52
+ },
53
+ "peerDependencies": {
54
+ "@vortex-os/base": ">=0.3.0 <1.0.0"
55
+ },
56
+ "peerDependenciesMeta": {
57
+ "@vortex-os/base": {
58
+ "optional": false
59
+ }
60
+ },
61
+ "optionalDependencies": {
62
+ "@modelcontextprotocol/sdk": "^1.21.0",
63
+ "onnxruntime-node": "^1.19.2"
64
+ },
65
+ "engines": {
66
+ "node": ">=22"
67
+ },
68
+ "os": [
69
+ "win32"
70
+ ],
71
+ "publishConfig": {
72
+ "access": "public"
73
+ }
74
+ }
@@ -1,92 +1,92 @@
1
- // computer-use — activity classifier (pure, testable). Turns the raw signals from classify.ps1
2
- // (foreground process/title, notification state, UIA count, fullscreen) into an activity CLASS and a help
3
- // PROFILE the companion uses to pick its cadence/triggers/style. See docs/adaptive-companion.md.
4
- //
5
- // Deliberately conservative: process name is the strongest prior; UIA-emptiness marks a GPU canvas
6
- // (game/video) only when the process isn't a known browser/player; everything is overridable by config and by
7
- // explicit user requests. All thresholds are starting points to calibrate, surfaced via opts.
8
-
9
- // Known-app tables — lowercase process names, no ".exe". Extend via opts.apps.{dev,media,browser,productivity}.
10
- const DEV = ['code', 'cursor', 'devenv', 'rider64', 'idea64', 'pycharm64', 'webstorm64', 'clion64', 'goland64',
11
- 'sublime_text', 'notepad++', 'windowsterminal', 'wt', 'powershell', 'pwsh', 'cmd', 'conhost', 'alacritty',
12
- 'wezterm', 'hx', 'nvim', 'vim', 'emacs'];
13
- const MEDIA = ['mpv', 'vlc', 'mpc-hc64', 'mpc-be64', 'potplayermini64', 'potplayer', 'wmplayer', 'smplayer',
14
- 'kodi', 'plex', 'plexmediaplayer', 'netflix'];
15
- const BROWSER = ['chrome', 'msedge', 'firefox', 'brave', 'opera', 'vivaldi', 'arc', 'librewolf'];
16
- const PRODUCTIVITY = ['winword', 'excel', 'powerpnt', 'onenote', 'acrobat', 'acrord32', 'sumatrapdf', 'foxitpdfreader',
17
- 'notepad', 'obsidian', 'notion', 'hwp', 'soffice'];
18
-
19
- // Window-title hints that a browser tab is actually video (so a browser → MEDIA, not BROWSING).
20
- const VIDEO_TITLE = /youtube|netflix|twitch|vimeo|prime\s*video|disney\+?|wavve|tving|watcha|\bwatch\b/i;
21
-
22
- // SHQueryUserNotificationState values.
23
- const NS_NAME = { 1: 'NOT_PRESENT', 2: 'BUSY', 3: 'D3D_FULLSCREEN', 4: 'PRESENTATION', 5: 'ACCEPTS', 6: 'QUIET_TIME', 7: 'APP' };
24
-
25
- // Per-class help profile defaults (overridable via config `companion` section).
26
- export const PROFILES = {
27
- GAME: { proactive: true, cadenceSec: 30, mode: 'periodic',
28
- note: 'sample change-rate (poll_change) to split fast-action vs strategy; fast-action = break-gated only, announce the limit once' },
29
- DEV: { proactive: false, cadenceSec: 0, mode: 'event-error',
30
- note: 'silent until an error/build failure/stack trace; scaffolding help on request' },
31
- MEDIA: { proactive: false, cadenceSec: 0, mode: 'silent',
32
- note: 'on explicit request only; do not talk over it; DRM audio cannot be ducked' },
33
- BROWSING: { proactive: false, cadenceSec: 0, mode: 'silent',
34
- note: 'on request: summarize / translate / explain' },
35
- PRODUCTIVITY: { proactive: false, cadenceSec: 0, mode: 'quiet',
36
- note: 'on request; optional gentle risk flags' },
37
- UNKNOWN: { proactive: false, cadenceSec: 0, mode: 'low-intrusion',
38
- note: 'offer help once, then stay quiet until asked' },
39
- };
40
-
41
- const has = (list, p) => list.includes(p);
42
-
43
- /**
44
- * Derive activity class + profile from raw signals.
45
- * @param {object} raw { process, title, notificationState, uiaCount, uiaCapped, fullscreen, ... }
46
- * @param {object} opts { uiaCanvasMax=5, apps?:{dev,media,browser,productivity}, profiles?:{<CLASS>:{cadenceSec?,proactive?,mode?}} }
47
- */
48
- export function classifyActivity(raw = {}, opts = {}) {
49
- const uiaCanvasMax = opts.uiaCanvasMax ?? 5;
50
- const apps = opts.apps || {};
51
- const dev = apps.dev || DEV, media = apps.media || MEDIA, browser = apps.browser || BROWSER, prod = apps.productivity || PRODUCTIVITY;
52
-
53
- const proc = String(raw.process || '').toLowerCase();
54
- const title = String(raw.title || '');
55
- const ns = Number(raw.notificationState || 0);
56
- const nsName = NS_NAME[ns] || 'UNKNOWN';
57
- const interruptible = ns === 5; // only ACCEPTS_NOTIFICATIONS is unconditionally clear to speak
58
- // uiaCount is null when the UIA walk FAILED (≠ "found 0") — so a UIA error never reads as an empty canvas.
59
- const hasUia = typeof raw.uiaCount === 'number';
60
- // A capped count means there were MORE elements than the backend cap → never a sparse canvas, even if a large
61
- // uiaCanvasMax is configured. So canvas requires a real, uncapped, below-threshold count.
62
- const canvas = hasUia && !raw.uiaCapped && raw.uiaCount < uiaCanvasMax;
63
-
64
- let cls;
65
- if (has(dev, proc)) cls = 'DEV';
66
- else if (has(media, proc)) cls = 'MEDIA';
67
- else if (has(browser, proc)) cls = VIDEO_TITLE.test(title) ? 'MEDIA' : 'BROWSING';
68
- else if (has(prod, proc)) cls = 'PRODUCTIVITY';
69
- else if (canvas && proc) cls = 'GAME'; // screen-canvas app that isn't a known player/browser
70
- else if (hasUia && raw.uiaCount >= uiaCanvasMax) cls = 'PRODUCTIVITY'; // rich tree, unknown app → reading/productivity
71
- else cls = 'UNKNOWN';
72
-
73
- // Profile defaults, with optional per-class overrides from the `companion` config (cadence/proactive/mode).
74
- const profile = { ...PROFILES[cls], ...((opts.profiles && opts.profiles[cls]) || {}) };
75
-
76
- return {
77
- class: cls,
78
- process: raw.process || '',
79
- title,
80
- notificationState: nsName,
81
- interruptible,
82
- canvas,
83
- uiaCount: hasUia ? raw.uiaCount : null,
84
- fullscreen: !!raw.fullscreen,
85
- profile,
86
- // GAME needs a change-rate sample to split fast-action (break-gated) vs strategy (periodic) — the agent
87
- // takes a couple of poll_change reads and applies the thresholds in docs/adaptive-companion.md.
88
- needsChangeRate: cls === 'GAME',
89
- };
90
- }
91
-
92
- export default classifyActivity;
1
+ // computer-use — activity classifier (pure, testable). Turns the raw signals from classify.ps1
2
+ // (foreground process/title, notification state, UIA count, fullscreen) into an activity CLASS and a help
3
+ // PROFILE the companion uses to pick its cadence/triggers/style. See docs/adaptive-companion.md.
4
+ //
5
+ // Deliberately conservative: process name is the strongest prior; UIA-emptiness marks a GPU canvas
6
+ // (game/video) only when the process isn't a known browser/player; everything is overridable by config and by
7
+ // explicit user requests. All thresholds are starting points to calibrate, surfaced via opts.
8
+
9
+ // Known-app tables — lowercase process names, no ".exe". Extend via opts.apps.{dev,media,browser,productivity}.
10
+ const DEV = ['code', 'cursor', 'devenv', 'rider64', 'idea64', 'pycharm64', 'webstorm64', 'clion64', 'goland64',
11
+ 'sublime_text', 'notepad++', 'windowsterminal', 'wt', 'powershell', 'pwsh', 'cmd', 'conhost', 'alacritty',
12
+ 'wezterm', 'hx', 'nvim', 'vim', 'emacs'];
13
+ const MEDIA = ['mpv', 'vlc', 'mpc-hc64', 'mpc-be64', 'potplayermini64', 'potplayer', 'wmplayer', 'smplayer',
14
+ 'kodi', 'plex', 'plexmediaplayer', 'netflix'];
15
+ const BROWSER = ['chrome', 'msedge', 'firefox', 'brave', 'opera', 'vivaldi', 'arc', 'librewolf'];
16
+ const PRODUCTIVITY = ['winword', 'excel', 'powerpnt', 'onenote', 'acrobat', 'acrord32', 'sumatrapdf', 'foxitpdfreader',
17
+ 'notepad', 'obsidian', 'notion', 'hwp', 'soffice'];
18
+
19
+ // Window-title hints that a browser tab is actually video (so a browser → MEDIA, not BROWSING).
20
+ const VIDEO_TITLE = /youtube|netflix|twitch|vimeo|prime\s*video|disney\+?|wavve|tving|watcha|\bwatch\b/i;
21
+
22
+ // SHQueryUserNotificationState values.
23
+ const NS_NAME = { 1: 'NOT_PRESENT', 2: 'BUSY', 3: 'D3D_FULLSCREEN', 4: 'PRESENTATION', 5: 'ACCEPTS', 6: 'QUIET_TIME', 7: 'APP' };
24
+
25
+ // Per-class help profile defaults (overridable via config `companion` section).
26
+ export const PROFILES = {
27
+ GAME: { proactive: true, cadenceSec: 30, mode: 'periodic',
28
+ note: 'sample change-rate (poll_change) to split fast-action vs strategy; fast-action = break-gated only, announce the limit once' },
29
+ DEV: { proactive: false, cadenceSec: 0, mode: 'event-error',
30
+ note: 'silent until an error/build failure/stack trace; scaffolding help on request' },
31
+ MEDIA: { proactive: false, cadenceSec: 0, mode: 'silent',
32
+ note: 'on explicit request only; do not talk over it; DRM audio cannot be ducked' },
33
+ BROWSING: { proactive: false, cadenceSec: 0, mode: 'silent',
34
+ note: 'on request: summarize / translate / explain' },
35
+ PRODUCTIVITY: { proactive: false, cadenceSec: 0, mode: 'quiet',
36
+ note: 'on request; optional gentle risk flags' },
37
+ UNKNOWN: { proactive: false, cadenceSec: 0, mode: 'low-intrusion',
38
+ note: 'offer help once, then stay quiet until asked' },
39
+ };
40
+
41
+ const has = (list, p) => list.includes(p);
42
+
43
+ /**
44
+ * Derive activity class + profile from raw signals.
45
+ * @param {object} raw { process, title, notificationState, uiaCount, uiaCapped, fullscreen, ... }
46
+ * @param {object} opts { uiaCanvasMax=5, apps?:{dev,media,browser,productivity}, profiles?:{<CLASS>:{cadenceSec?,proactive?,mode?}} }
47
+ */
48
+ export function classifyActivity(raw = {}, opts = {}) {
49
+ const uiaCanvasMax = opts.uiaCanvasMax ?? 5;
50
+ const apps = opts.apps || {};
51
+ const dev = apps.dev || DEV, media = apps.media || MEDIA, browser = apps.browser || BROWSER, prod = apps.productivity || PRODUCTIVITY;
52
+
53
+ const proc = String(raw.process || '').toLowerCase();
54
+ const title = String(raw.title || '');
55
+ const ns = Number(raw.notificationState || 0);
56
+ const nsName = NS_NAME[ns] || 'UNKNOWN';
57
+ const interruptible = ns === 5; // only ACCEPTS_NOTIFICATIONS is unconditionally clear to speak
58
+ // uiaCount is null when the UIA walk FAILED (≠ "found 0") — so a UIA error never reads as an empty canvas.
59
+ const hasUia = typeof raw.uiaCount === 'number';
60
+ // A capped count means there were MORE elements than the backend cap → never a sparse canvas, even if a large
61
+ // uiaCanvasMax is configured. So canvas requires a real, uncapped, below-threshold count.
62
+ const canvas = hasUia && !raw.uiaCapped && raw.uiaCount < uiaCanvasMax;
63
+
64
+ let cls;
65
+ if (has(dev, proc)) cls = 'DEV';
66
+ else if (has(media, proc)) cls = 'MEDIA';
67
+ else if (has(browser, proc)) cls = VIDEO_TITLE.test(title) ? 'MEDIA' : 'BROWSING';
68
+ else if (has(prod, proc)) cls = 'PRODUCTIVITY';
69
+ else if (canvas && proc) cls = 'GAME'; // screen-canvas app that isn't a known player/browser
70
+ else if (hasUia && raw.uiaCount >= uiaCanvasMax) cls = 'PRODUCTIVITY'; // rich tree, unknown app → reading/productivity
71
+ else cls = 'UNKNOWN';
72
+
73
+ // Profile defaults, with optional per-class overrides from the `companion` config (cadence/proactive/mode).
74
+ const profile = { ...PROFILES[cls], ...((opts.profiles && opts.profiles[cls]) || {}) };
75
+
76
+ return {
77
+ class: cls,
78
+ process: raw.process || '',
79
+ title,
80
+ notificationState: nsName,
81
+ interruptible,
82
+ canvas,
83
+ uiaCount: hasUia ? raw.uiaCount : null,
84
+ fullscreen: !!raw.fullscreen,
85
+ profile,
86
+ // GAME needs a change-rate sample to split fast-action (break-gated) vs strategy (periodic) — the agent
87
+ // takes a couple of poll_change reads and applies the thresholds in docs/adaptive-companion.md.
88
+ needsChangeRate: cls === 'GAME',
89
+ };
90
+ }
91
+
92
+ export default classifyActivity;