screenhand 0.3.8 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -2
- package/dist/mcp-desktop.js +132 -0
- package/dist/src/ingestion/coverage-auditor.js +11 -0
- package/dist/src/ingestion/feature-extractor.js +366 -0
- package/dist/src/ingestion/reference-merger.js +17 -0
- package/dist/src/learning/engine.js +20 -0
- package/dist/src/perception/coordinator.js +23 -8
- package/dist/src/perception/types.js +1 -0
- package/dist/src/runtime/execution-contract.js +14 -1
- package/dist/src/state/ladder-generator.js +60 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -128,7 +128,7 @@ On Windows, use `npm run build:native:windows` instead.
|
|
|
128
128
|
|
|
129
129
|
## What It Does
|
|
130
130
|
|
|
131
|
-
ScreenHand gives AI agents
|
|
131
|
+
ScreenHand gives AI agents eight capabilities:
|
|
132
132
|
|
|
133
133
|
### Desktop Control — 19 tools
|
|
134
134
|
Click buttons, type text, read UI trees, navigate menus, drag, scroll — all via native Accessibility APIs in ~50ms. Works with any app: Finder, Notes, VS Code, Xcode, System Settings, etc.
|
|
@@ -145,6 +145,9 @@ Gets smarter every session. Logs tool calls, saves winning strategies, tracks er
|
|
|
145
145
|
### App Mastery Map — automatic per-app spatial understanding
|
|
146
146
|
Builds a persistent reverse-engineered blueprint of every app from normal tool usage. 8 features record automatically: page zones, navigation graph (BFS pathfinding), hierarchy, I/O contracts, state machine, element visibility, timing profiles, and ready signals. Mastery levels (beginner → pro → expert → grandmaster) honestly reflect how well ScreenHand knows each app. Maps stored at `~/.screenhand/app-maps/`.
|
|
147
147
|
|
|
148
|
+
### Website Feature Discovery — real features, not generic ladders
|
|
149
|
+
`discover_features` fetches an app's official website and extracts real product features (headings, feature cards, definition lists). Assigns difficulty tiers automatically and generates value-add features only ScreenHand can provide: bulk operations, cross-app export, content summarization, auto-organize, and change monitoring. No LLM calls needed — pure rule-based extraction. Features merge into the reference file and enrich the mastery ladder.
|
|
150
|
+
|
|
148
151
|
### Jobs & Orchestration — 34 tools
|
|
149
152
|
Queue multi-step jobs, run them via background worker daemon, coordinate multiple AI agents with session leases, detect stalls, auto-recover. Survives client restarts.
|
|
150
153
|
|
|
@@ -294,7 +297,7 @@ Accessibility: ~50ms. Chrome CDP: ~10ms (background, no focus needed). OCR: ~600
|
|
|
294
297
|
```bash
|
|
295
298
|
git clone https://github.com/manushi4/screenhand.git
|
|
296
299
|
cd screenhand && npm install && npm run build:native
|
|
297
|
-
npm test #
|
|
300
|
+
npm test # 1405 tests, 56 files
|
|
298
301
|
```
|
|
299
302
|
|
|
300
303
|
## Contact
|
package/dist/mcp-desktop.js
CHANGED
|
@@ -69,6 +69,7 @@ import os from "node:os";
|
|
|
69
69
|
import { MenuScanner } from "./src/ingestion/menu-scanner.js";
|
|
70
70
|
import { DocParser } from "./src/ingestion/doc-parser.js";
|
|
71
71
|
import { TutorialExtractor } from "./src/ingestion/tutorial-extractor.js";
|
|
72
|
+
import { extractFeaturesFromHTML } from "./src/ingestion/feature-extractor.js";
|
|
72
73
|
import { CoverageAuditor } from "./src/ingestion/coverage-auditor.js";
|
|
73
74
|
import { ReferenceMerger } from "./src/ingestion/reference-merger.js";
|
|
74
75
|
import { PlaybookPublisher } from "./src/community/publisher.js";
|
|
@@ -280,6 +281,7 @@ coverage_report(bundleId, appName) → tells you exactly what ScreenHand knows
|
|
|
280
281
|
- "0 selectors, 0 flows" → LEARN FIRST (Step 0a)
|
|
281
282
|
- "Has selectors + flows" → GO (skip to Step 1)
|
|
282
283
|
- "Has error patterns for your tool" → use *_with_fallback tools
|
|
284
|
+
- "Website features: 0" → run discover_features first (Step 0b)
|
|
283
285
|
|
|
284
286
|
learning_status(bundleId) → tells you WHICH tools to use
|
|
285
287
|
- AX score > 0.9 → use ui_press/ui_tree (fastest, ~50ms)
|
|
@@ -294,6 +296,16 @@ platform_guide("platform") → load curated selectors/flows/errors
|
|
|
294
296
|
memory_recall("task description") → reuse past strategies
|
|
295
297
|
Then go to Step 1.
|
|
296
298
|
|
|
299
|
+
### Step 0b: DISCOVER FEATURES (if website features = 0)
|
|
300
|
+
discover_features(url, bundleId, appName) → fetch official app website, extract real features
|
|
301
|
+
→ parses headings, feature cards, definition lists from HTML
|
|
302
|
+
→ assigns levels: beginner/pro/expert/grandmaster
|
|
303
|
+
→ generates value-add features: bulk ops, cross-app, summarize, organize, monitor
|
|
304
|
+
→ merges into reference file, enriches the feature ladder
|
|
305
|
+
→ coverage_report will now show real feature count
|
|
306
|
+
Priority: discover_features BEFORE scan_menu_bar (features give meaningful ladder)
|
|
307
|
+
Then continue to Step 0a or Step 1.
|
|
308
|
+
|
|
297
309
|
### Step 1: SEE
|
|
298
310
|
perception_start() → turns on continuous monitoring (3 rates: AX 100ms, CDP 300ms, Vision 1s)
|
|
299
311
|
world_state() → verify windows + controls are tracked
|
|
@@ -4219,6 +4231,27 @@ server.tool("click_with_fallback", "Click a target by text using the canonical f
|
|
|
4219
4231
|
}
|
|
4220
4232
|
throw new Error("Target not found via OCR");
|
|
4221
4233
|
}
|
|
4234
|
+
case "window_buffer": {
|
|
4235
|
+
// Last resort: capture GPU window buffer (works even when window is hidden),
|
|
4236
|
+
// OCR it, find target text, translate window-relative to screen-absolute coords
|
|
4237
|
+
const wbWindowId = await resolveWindowId(targetPid);
|
|
4238
|
+
if (!wbWindowId)
|
|
4239
|
+
throw new Error("No window found for window_buffer capture");
|
|
4240
|
+
const wbShot = await bridge.call("cg.captureWindow", { windowId: wbWindowId });
|
|
4241
|
+
const wbMatches = await bridge.call("vision.findText", { imagePath: wbShot.path, searchText: target });
|
|
4242
|
+
const wbMatch = Array.isArray(wbMatches) ? wbMatches[0] : null;
|
|
4243
|
+
if (!wbMatch?.bounds)
|
|
4244
|
+
throw new Error("Target not found via window buffer OCR");
|
|
4245
|
+
// Translate window-relative coords to screen-absolute
|
|
4246
|
+
const allWins = await bridge.call("app.windows");
|
|
4247
|
+
const winInfo = allWins.find((w) => w.windowId === wbWindowId);
|
|
4248
|
+
const winX = winInfo?.bounds?.x ?? 0;
|
|
4249
|
+
const winY = winInfo?.bounds?.y ?? 0;
|
|
4250
|
+
const absX = winX + wbMatch.bounds.x + wbMatch.bounds.width / 2;
|
|
4251
|
+
const absY = winY + wbMatch.bounds.y + wbMatch.bounds.height / 2;
|
|
4252
|
+
await bridge.call("cg.mouseClick", { x: absX, y: absY });
|
|
4253
|
+
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${target} at (${Math.round(absX)},${Math.round(absY)}) [window_buffer]` };
|
|
4254
|
+
}
|
|
4222
4255
|
}
|
|
4223
4256
|
throw new Error(`Unknown method: ${method}`);
|
|
4224
4257
|
}
|
|
@@ -4553,6 +4586,22 @@ server.tool("read_with_fallback", "Read text content from the screen or a specif
|
|
|
4553
4586
|
const ocr = await bridge.call("vision.ocr", { imagePath: shot.path });
|
|
4554
4587
|
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: ocr.text?.slice(0, 4000) ?? "" };
|
|
4555
4588
|
}
|
|
4589
|
+
case "window_buffer": {
|
|
4590
|
+
// GPU window buffer capture — reads content even when window is behind other apps
|
|
4591
|
+
const rbWindowId = await resolveWindowId(targetPid);
|
|
4592
|
+
if (!rbWindowId)
|
|
4593
|
+
throw new Error("No window found for window_buffer read");
|
|
4594
|
+
const rbShot = await bridge.call("cg.captureWindow", { windowId: rbWindowId });
|
|
4595
|
+
if (target) {
|
|
4596
|
+
const rbMatches = await bridge.call("vision.findText", { imagePath: rbShot.path, searchText: target });
|
|
4597
|
+
const rbMatch = Array.isArray(rbMatches) ? rbMatches[0] : null;
|
|
4598
|
+
if (!rbMatch)
|
|
4599
|
+
throw new Error("Text not found via window buffer OCR");
|
|
4600
|
+
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: rbMatch.text };
|
|
4601
|
+
}
|
|
4602
|
+
const rbOcr = await bridge.call("vision.ocr", { imagePath: rbShot.path });
|
|
4603
|
+
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: rbOcr.text?.slice(0, 4000) ?? "" };
|
|
4604
|
+
}
|
|
4556
4605
|
}
|
|
4557
4606
|
throw new Error(`Method ${method} does not support read`);
|
|
4558
4607
|
}
|
|
@@ -4668,6 +4717,29 @@ server.tool("locate_with_fallback", "Find an element's position on screen using
|
|
|
4668
4717
|
const b = match.bounds;
|
|
4669
4718
|
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${target} at (${b.x},${b.y} ${b.width}x${b.height})` };
|
|
4670
4719
|
}
|
|
4720
|
+
case "window_buffer": {
|
|
4721
|
+
// GPU window buffer capture + OCR — works even when window is hidden
|
|
4722
|
+
const lbWindowId = await resolveWindowId(targetPid);
|
|
4723
|
+
if (!lbWindowId)
|
|
4724
|
+
throw new Error("No window found for window_buffer locate");
|
|
4725
|
+
const lbShot = await bridge.call("cg.captureWindow", { windowId: lbWindowId });
|
|
4726
|
+
const lbMatches = await bridge.call("vision.findText", { imagePath: lbShot.path, searchText: target });
|
|
4727
|
+
const lbMatch = Array.isArray(lbMatches) ? lbMatches[0] : null;
|
|
4728
|
+
if (!lbMatch?.bounds)
|
|
4729
|
+
throw new Error("Target not found via window buffer OCR");
|
|
4730
|
+
// Translate window-relative to screen-absolute bounds
|
|
4731
|
+
const lbWins = await bridge.call("app.windows");
|
|
4732
|
+
const lbWinInfo = lbWins.find((w) => w.windowId === lbWindowId);
|
|
4733
|
+
const lbOffX = lbWinInfo?.bounds?.x ?? 0;
|
|
4734
|
+
const lbOffY = lbWinInfo?.bounds?.y ?? 0;
|
|
4735
|
+
const lbBounds = {
|
|
4736
|
+
x: lbOffX + lbMatch.bounds.x,
|
|
4737
|
+
y: lbOffY + lbMatch.bounds.y,
|
|
4738
|
+
width: lbMatch.bounds.width,
|
|
4739
|
+
height: lbMatch.bounds.height,
|
|
4740
|
+
};
|
|
4741
|
+
return { ok: true, method, durationMs: Date.now() - start, fallbackFrom: null, retries: attempt, error: null, target: `${target} at (${lbBounds.x},${lbBounds.y} ${lbBounds.width}x${lbBounds.height}) [window_buffer]` };
|
|
4742
|
+
}
|
|
4671
4743
|
}
|
|
4672
4744
|
throw new Error(`Method ${method} does not support locate`);
|
|
4673
4745
|
}
|
|
@@ -6353,6 +6425,65 @@ server.tool("ingest_tutorial", "Extract structured playbook steps from a video t
|
|
|
6353
6425
|
}],
|
|
6354
6426
|
};
|
|
6355
6427
|
});
|
|
6428
|
+
server.tool("discover_features", "Extract features from an app's official website and generate ScreenHand value-add features. Fetches the page, parses feature headings/cards/lists, assigns difficulty levels, and generates bulk/cross-app/intelligence/organization/monitoring value-adds. Merges into the reference file and enriches the feature ladder.", {
|
|
6429
|
+
url: z.string().url().describe("Official app website URL (e.g. https://www.apple.com/notes)"),
|
|
6430
|
+
bundleId: z.string().describe("macOS bundle ID (e.g. com.apple.Notes)"),
|
|
6431
|
+
appName: z.string().describe("Human-readable app name (e.g. Notes)"),
|
|
6432
|
+
}, async ({ url, bundleId, appName }) => {
|
|
6433
|
+
// SSRF protection: only allow http/https to public hosts
|
|
6434
|
+
const parsed = new URL(url);
|
|
6435
|
+
if (!["http:", "https:"].includes(parsed.protocol)) {
|
|
6436
|
+
throw new Error("Only http/https URLs are allowed");
|
|
6437
|
+
}
|
|
6438
|
+
const hostname = parsed.hostname.toLowerCase();
|
|
6439
|
+
if (hostname === "localhost" ||
|
|
6440
|
+
hostname === "metadata.google.internal" ||
|
|
6441
|
+
/^(127\.|10\.|172\.(1[6-9]|2\d|3[01])\.|192\.168\.|169\.254\.|0\.|0x|::1|\[::1\])/.test(hostname) ||
|
|
6442
|
+
/^\d+$/.test(hostname)) {
|
|
6443
|
+
throw new Error("URL points to internal/private network — blocked for security");
|
|
6444
|
+
}
|
|
6445
|
+
const MAX_HTML_BYTES = 5 * 1024 * 1024; // 5MB
|
|
6446
|
+
const resp = await fetch(url, {
|
|
6447
|
+
headers: { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" },
|
|
6448
|
+
signal: AbortSignal.timeout(15000),
|
|
6449
|
+
redirect: "follow",
|
|
6450
|
+
});
|
|
6451
|
+
if (!resp.ok)
|
|
6452
|
+
throw new Error(`Failed to fetch ${url}: ${resp.status}`);
|
|
6453
|
+
// Check Content-Length before buffering
|
|
6454
|
+
const contentLength = resp.headers.get("content-length");
|
|
6455
|
+
if (contentLength && parseInt(contentLength) > MAX_HTML_BYTES) {
|
|
6456
|
+
throw new Error(`Response too large: ${contentLength} bytes (max ${MAX_HTML_BYTES})`);
|
|
6457
|
+
}
|
|
6458
|
+
const html = await resp.text();
|
|
6459
|
+
if (html.length > MAX_HTML_BYTES) {
|
|
6460
|
+
throw new Error(`Response body too large: ${html.length} chars (max ${MAX_HTML_BYTES})`);
|
|
6461
|
+
}
|
|
6462
|
+
const result = extractFeaturesFromHTML(html, appName, url);
|
|
6463
|
+
const mergeResult = referenceMerger.mergeWebsiteFeatures(result, bundleId, appName);
|
|
6464
|
+
const lines = [
|
|
6465
|
+
`Feature discovery: ${appName} (${bundleId})`,
|
|
6466
|
+
`Source: ${url}`,
|
|
6467
|
+
`Website features: ${result.websiteFeatures.length}`,
|
|
6468
|
+
`Value-add features: ${result.valueAddFeatures.length}`,
|
|
6469
|
+
`Reference updated: ${mergeResult.filePath} (${mergeResult.added} new features added)`,
|
|
6470
|
+
"",
|
|
6471
|
+
];
|
|
6472
|
+
if (result.websiteFeatures.length > 0) {
|
|
6473
|
+
lines.push("Website Features:");
|
|
6474
|
+
for (const f of result.websiteFeatures) {
|
|
6475
|
+
lines.push(` [${f.level}] ${f.name}: ${f.description.slice(0, 80)}`);
|
|
6476
|
+
}
|
|
6477
|
+
lines.push("");
|
|
6478
|
+
}
|
|
6479
|
+
if (result.valueAddFeatures.length > 0) {
|
|
6480
|
+
lines.push("ScreenHand Value-Adds:");
|
|
6481
|
+
for (const f of result.valueAddFeatures) {
|
|
6482
|
+
lines.push(` [${f.category}] ${f.name}: ${f.description}`);
|
|
6483
|
+
}
|
|
6484
|
+
}
|
|
6485
|
+
return { content: [{ type: "text", text: lines.join("\n") }] };
|
|
6486
|
+
});
|
|
6356
6487
|
server.tool("coverage_report", "Check what ScreenHand knows about an app: shortcuts, selectors, flows, playbooks, error patterns, and stability %. Useful before complex workflows to decide strategy: learn first (if empty), go fast (if high coverage), or use fallback tools (if error patterns exist). Optional for quick actions.", {
|
|
6357
6488
|
bundleId: z.string().describe("macOS bundle ID (e.g. com.blackmagic-design.DaVinciResolveLite)"),
|
|
6358
6489
|
appName: z.string().describe("Human-readable app name"),
|
|
@@ -6375,6 +6506,7 @@ server.tool("coverage_report", "Check what ScreenHand knows about an app: shortc
|
|
|
6375
6506
|
` Flows: ${report.flowsKnown}`,
|
|
6376
6507
|
` Playbooks: ${report.playbooksAvailable}`,
|
|
6377
6508
|
` Error patterns: ${report.errorsDocumented}`,
|
|
6509
|
+
` Website features: ${report.websiteFeaturesKnown}`,
|
|
6378
6510
|
];
|
|
6379
6511
|
if (report.selectorStabilityScore > 0) {
|
|
6380
6512
|
lines.push(` Selector stability: ${(report.selectorStabilityScore * 100).toFixed(0)}%`);
|
|
@@ -63,6 +63,13 @@ export class CoverageAuditor {
|
|
|
63
63
|
errorsDocumented += ref.errors.length;
|
|
64
64
|
}
|
|
65
65
|
}
|
|
66
|
+
// Count website features
|
|
67
|
+
let websiteFeaturesKnown = 0;
|
|
68
|
+
for (const ref of refs) {
|
|
69
|
+
const wf = ref.websiteFeatures;
|
|
70
|
+
if (Array.isArray(wf))
|
|
71
|
+
websiteFeaturesKnown += wf.length;
|
|
72
|
+
}
|
|
66
73
|
// Compare menu scan against reference shortcuts
|
|
67
74
|
const menuPathsNotCovered = [];
|
|
68
75
|
const shortcutsNotInReference = [];
|
|
@@ -153,6 +160,9 @@ export class CoverageAuditor {
|
|
|
153
160
|
if (errorsDocumented === 0) {
|
|
154
161
|
highValueGaps.push("No error patterns documented — errors will be learned automatically over time");
|
|
155
162
|
}
|
|
163
|
+
if (websiteFeaturesKnown === 0) {
|
|
164
|
+
highValueGaps.push("No website features extracted — run discover_features to learn app capabilities from official website");
|
|
165
|
+
}
|
|
156
166
|
if (workflowsWithNoPlaybook.length > 0) {
|
|
157
167
|
highValueGaps.push(`Common workflows without playbooks: ${workflowsWithNoPlaybook.join(", ")}`);
|
|
158
168
|
}
|
|
@@ -167,6 +177,7 @@ export class CoverageAuditor {
|
|
|
167
177
|
flowsKnown,
|
|
168
178
|
playbooksAvailable: playbooks.length,
|
|
169
179
|
errorsDocumented,
|
|
180
|
+
websiteFeaturesKnown,
|
|
170
181
|
menuPathsNotCovered: menuPathsNotCovered.slice(0, 50),
|
|
171
182
|
shortcutsNotInReference: shortcutsNotInReference.slice(0, 50),
|
|
172
183
|
workflowsWithNoPlaybook,
|
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
// Copyright (C) 2025 Clazro Technology Private Limited
|
|
2
|
+
// SPDX-License-Identifier: AGPL-3.0-only
|
|
3
|
+
// ── Level assignment keywords ─────────────────────────────────────
|
|
4
|
+
/** Single-word keywords checked individually */
|
|
5
|
+
const BEGINNER_WORDS = new Set([
|
|
6
|
+
"basic", "create", "view", "share", "read",
|
|
7
|
+
"browse", "search", "home", "start", "launch", "write",
|
|
8
|
+
]);
|
|
9
|
+
const PRO_WORDS = new Set([
|
|
10
|
+
"organize", "format", "customize", "template", "tag", "folder",
|
|
11
|
+
"sort", "filter", "pin", "archive", "move", "rename", "duplicate",
|
|
12
|
+
"favorites", "bookmark", "list", "table", "style", "font",
|
|
13
|
+
]);
|
|
14
|
+
const EXPERT_WORDS = new Set([
|
|
15
|
+
"automate", "shortcut", "export", "import", "collaborate", "sync",
|
|
16
|
+
"scan", "link", "mention", "embed", "attachment", "password",
|
|
17
|
+
"encrypt", "lock", "version", "history", "recover", "backup",
|
|
18
|
+
]);
|
|
19
|
+
const GRANDMASTER_WORDS = new Set([
|
|
20
|
+
"api", "integrate", "plugin", "advanced", "workflow", "script",
|
|
21
|
+
"extension", "developer", "sdk", "automation", "pipeline", "webhook",
|
|
22
|
+
]);
|
|
23
|
+
/** Multi-word phrases checked via substring match */
|
|
24
|
+
const GRANDMASTER_PHRASES = [
|
|
25
|
+
"custom action", "get started",
|
|
26
|
+
];
|
|
27
|
+
// ── HTML entity decoding ──────────────────────────────────────────
|
|
28
|
+
const HTML_ENTITIES = {
|
|
29
|
+
"&": "&", "<": "<", ">": ">", """: '"',
|
|
30
|
+
"'": "'", "'": "'", " ": " ",
|
|
31
|
+
"'": "'", "/": "/",
|
|
32
|
+
};
|
|
33
|
+
function decodeHTMLEntities(text) {
|
|
34
|
+
let result = text;
|
|
35
|
+
for (const [entity, char] of Object.entries(HTML_ENTITIES)) {
|
|
36
|
+
result = result.replaceAll(entity, char);
|
|
37
|
+
}
|
|
38
|
+
// Numeric entities: { or 
|
|
39
|
+
// Only decode printable chars (>= 0x20), skip control chars
|
|
40
|
+
result = result.replace(/&#(\d+);/g, (orig, code) => {
|
|
41
|
+
const n = Number(code);
|
|
42
|
+
return n >= 0x20 && n !== 0x7F ? String.fromCharCode(n) : "";
|
|
43
|
+
});
|
|
44
|
+
result = result.replace(/&#x([0-9a-fA-F]+);/g, (orig, hex) => {
|
|
45
|
+
const n = parseInt(hex, 16);
|
|
46
|
+
return n >= 0x20 && n !== 0x7F ? String.fromCharCode(n) : "";
|
|
47
|
+
});
|
|
48
|
+
return result;
|
|
49
|
+
}
|
|
50
|
+
// ── Strip control characters ──────────────────────────────────────
|
|
51
|
+
function stripControlChars(text) {
|
|
52
|
+
// Remove ASCII control chars (0x00-0x08, 0x0B, 0x0C, 0x0E-0x1F, 0x7F)
|
|
53
|
+
// Preserve \t (0x09), \n (0x0A), \r (0x0D)
|
|
54
|
+
return text.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/g, "");
|
|
55
|
+
}
|
|
56
|
+
// ── Strip HTML tags — O(n) single-pass state machine ──────────────
|
|
57
|
+
function stripTags(html) {
|
|
58
|
+
let result = "";
|
|
59
|
+
let inTag = false;
|
|
60
|
+
for (let i = 0; i < html.length; i++) {
|
|
61
|
+
const ch = html[i];
|
|
62
|
+
if (ch === "<") {
|
|
63
|
+
inTag = true;
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
if (ch === ">") {
|
|
67
|
+
inTag = false;
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
if (!inTag)
|
|
71
|
+
result += ch;
|
|
72
|
+
}
|
|
73
|
+
return result.trim();
|
|
74
|
+
}
|
|
75
|
+
// ── Strip script and style blocks before processing ───────────────
|
|
76
|
+
function stripScriptsAndStyles(html) {
|
|
77
|
+
// Remove <script>...</script> and <style>...</style> blocks
|
|
78
|
+
// Use bounded match to prevent backtracking on malformed HTML
|
|
79
|
+
return html
|
|
80
|
+
.replace(/<script\b[^>]*>[\s\S]{0,100000}?<\/script>/gi, "")
|
|
81
|
+
.replace(/<style\b[^>]*>[\s\S]{0,100000}?<\/style>/gi, "")
|
|
82
|
+
.replace(/<!--[\s\S]{0,50000}?-->/g, "");
|
|
83
|
+
}
|
|
84
|
+
// ── Clean extracted text ──────────────────────────────────────────
|
|
85
|
+
function cleanText(rawHtml) {
|
|
86
|
+
return stripControlChars(decodeHTMLEntities(stripTags(rawHtml)));
|
|
87
|
+
}
|
|
88
|
+
// ── Normalize feature name to ID ──────────────────────────────────
|
|
89
|
+
function nameToId(name) {
|
|
90
|
+
return name
|
|
91
|
+
.toLowerCase()
|
|
92
|
+
.replace(/[^a-z0-9]+/g, "_")
|
|
93
|
+
.replace(/^_|_$/g, "")
|
|
94
|
+
.slice(0, 80);
|
|
95
|
+
}
|
|
96
|
+
// ── Assign level based on keywords ────────────────────────────────
|
|
97
|
+
function assignLevel(name, description) {
|
|
98
|
+
const text = `${name} ${description}`.toLowerCase();
|
|
99
|
+
const words = text.split(/\s+/);
|
|
100
|
+
// Check multi-word grandmaster phrases first
|
|
101
|
+
for (const phrase of GRANDMASTER_PHRASES) {
|
|
102
|
+
if (text.includes(phrase))
|
|
103
|
+
return "grandmaster";
|
|
104
|
+
}
|
|
105
|
+
// Check single-word keywords
|
|
106
|
+
for (const w of words) {
|
|
107
|
+
if (GRANDMASTER_WORDS.has(w))
|
|
108
|
+
return "grandmaster";
|
|
109
|
+
}
|
|
110
|
+
for (const w of words) {
|
|
111
|
+
if (EXPERT_WORDS.has(w))
|
|
112
|
+
return "expert";
|
|
113
|
+
}
|
|
114
|
+
for (const w of words) {
|
|
115
|
+
if (PRO_WORDS.has(w))
|
|
116
|
+
return "pro";
|
|
117
|
+
}
|
|
118
|
+
for (const w of words) {
|
|
119
|
+
if (BEGINNER_WORDS.has(w))
|
|
120
|
+
return "beginner";
|
|
121
|
+
}
|
|
122
|
+
// Fallback: longer descriptions suggest complexity
|
|
123
|
+
if (description.length > 80)
|
|
124
|
+
return "pro";
|
|
125
|
+
return "beginner";
|
|
126
|
+
}
|
|
127
|
+
// ── Main: Extract features from HTML ──────────────────────────────
|
|
128
|
+
export function extractFeaturesFromHTML(html, appName, url) {
|
|
129
|
+
const seen = new Map();
|
|
130
|
+
// Pre-process: strip scripts, styles, and comments to avoid content leakage
|
|
131
|
+
const cleanHtml = stripScriptsAndStyles(html);
|
|
132
|
+
// ── 1. Extract from headings (h2, h3, h4) ─────────────────────
|
|
133
|
+
const headingRegex = /<h([2-4])[^>]*>([\s\S]*?)<\/h\1>/gi;
|
|
134
|
+
let match;
|
|
135
|
+
while ((match = headingRegex.exec(cleanHtml)) !== null) {
|
|
136
|
+
const text = cleanText(match[2]).trim();
|
|
137
|
+
if (!text || text.length < 3 || text.length > 120)
|
|
138
|
+
continue;
|
|
139
|
+
// Skip navigation/generic headings
|
|
140
|
+
if (/^(menu|nav|footer|header|copyright|legal|privacy)$/i.test(text))
|
|
141
|
+
continue;
|
|
142
|
+
const id = nameToId(text);
|
|
143
|
+
if (!id || seen.has(id))
|
|
144
|
+
continue;
|
|
145
|
+
// Try to find a nearby paragraph for description
|
|
146
|
+
const afterHeading = cleanHtml.slice(match.index + match[0].length, match.index + match[0].length + 500);
|
|
147
|
+
const pMatch = afterHeading.match(/<p[^>]*>([\s\S]*?)<\/p>/i);
|
|
148
|
+
const description = pMatch
|
|
149
|
+
? cleanText(pMatch[1]).trim().slice(0, 200)
|
|
150
|
+
: text;
|
|
151
|
+
seen.set(id, {
|
|
152
|
+
id,
|
|
153
|
+
name: text,
|
|
154
|
+
description,
|
|
155
|
+
sourceHeading: text,
|
|
156
|
+
level: assignLevel(text, description),
|
|
157
|
+
});
|
|
158
|
+
}
|
|
159
|
+
// ── 2. Extract from feature cards ──────────────────────────────
|
|
160
|
+
// Pattern: <div class="...feature..."> with a heading inside
|
|
161
|
+
// Bounded to 3000 chars to prevent backtracking on deeply nested divs
|
|
162
|
+
const cardRegex = /<div[^>]*class="[^"]*feature[^"]*"[^>]*>([\s\S]{0,3000}?)<\/div>/gi;
|
|
163
|
+
while ((match = cardRegex.exec(cleanHtml)) !== null) {
|
|
164
|
+
const cardHtml = match[1];
|
|
165
|
+
// Find heading inside card
|
|
166
|
+
const innerHeading = cardHtml.match(/<h[2-5][^>]*>([\s\S]*?)<\/h[2-5]>/i);
|
|
167
|
+
if (!innerHeading)
|
|
168
|
+
continue;
|
|
169
|
+
const name = cleanText(innerHeading[1]).trim();
|
|
170
|
+
if (!name || name.length < 3)
|
|
171
|
+
continue;
|
|
172
|
+
const id = nameToId(name);
|
|
173
|
+
if (seen.has(id))
|
|
174
|
+
continue;
|
|
175
|
+
// Description from paragraph
|
|
176
|
+
const pMatch = cardHtml.match(/<p[^>]*>([\s\S]*?)<\/p>/i);
|
|
177
|
+
const description = pMatch
|
|
178
|
+
? cleanText(pMatch[1]).trim().slice(0, 200)
|
|
179
|
+
: name;
|
|
180
|
+
seen.set(id, {
|
|
181
|
+
id,
|
|
182
|
+
name,
|
|
183
|
+
description,
|
|
184
|
+
sourceHeading: name,
|
|
185
|
+
level: assignLevel(name, description),
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
// ── 3. Extract from list items (feature lists) ─────────────────
|
|
189
|
+
// Look for <ul> or <ol> near "feature" context
|
|
190
|
+
const listItemRegex = /<li[^>]*>([\s\S]*?)<\/li>/gi;
|
|
191
|
+
while ((match = listItemRegex.exec(cleanHtml)) !== null) {
|
|
192
|
+
const rawText = cleanText(match[1]).trim();
|
|
193
|
+
// Only accept list items that look like feature names (not too long, not too short)
|
|
194
|
+
if (!rawText || rawText.length < 5 || rawText.length > 100)
|
|
195
|
+
continue;
|
|
196
|
+
// Skip items that look like navigation
|
|
197
|
+
if (/^(home|about|contact|blog|pricing|sign up|log in|download)$/i.test(rawText))
|
|
198
|
+
continue;
|
|
199
|
+
// Skip items with too many sentences (likely paragraphs, not feature names)
|
|
200
|
+
if ((rawText.match(/\./g) ?? []).length > 2)
|
|
201
|
+
continue;
|
|
202
|
+
const id = nameToId(rawText);
|
|
203
|
+
if (seen.has(id))
|
|
204
|
+
continue;
|
|
205
|
+
// Only add if the surrounding context mentions "feature" (within 500 chars before)
|
|
206
|
+
const contextBefore = cleanHtml.slice(Math.max(0, match.index - 500), match.index).toLowerCase();
|
|
207
|
+
if (!contextBefore.includes("feature") && !contextBefore.includes("capability") && !contextBefore.includes("what you can"))
|
|
208
|
+
continue;
|
|
209
|
+
seen.set(id, {
|
|
210
|
+
id,
|
|
211
|
+
name: rawText,
|
|
212
|
+
description: rawText,
|
|
213
|
+
sourceHeading: rawText,
|
|
214
|
+
level: assignLevel(rawText, ""),
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
// ── 4. Extract from definition lists ───────────────────────────
|
|
218
|
+
const dtRegex = /<dt[^>]*>([\s\S]*?)<\/dt>\s*<dd[^>]*>([\s\S]*?)<\/dd>/gi;
|
|
219
|
+
while ((match = dtRegex.exec(cleanHtml)) !== null) {
|
|
220
|
+
const name = cleanText(match[1]).trim();
|
|
221
|
+
const desc = cleanText(match[2]).trim().slice(0, 200);
|
|
222
|
+
if (!name || name.length < 3)
|
|
223
|
+
continue;
|
|
224
|
+
const id = nameToId(name);
|
|
225
|
+
if (seen.has(id))
|
|
226
|
+
continue;
|
|
227
|
+
seen.set(id, {
|
|
228
|
+
id,
|
|
229
|
+
name,
|
|
230
|
+
description: desc || name,
|
|
231
|
+
sourceHeading: name,
|
|
232
|
+
level: assignLevel(name, desc),
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
const websiteFeatures = [...seen.values()];
|
|
236
|
+
// ── Generate value-add features ────────────────────────────────
|
|
237
|
+
const valueAddFeatures = generateValueAddFeatures(appName, websiteFeatures);
|
|
238
|
+
return {
|
|
239
|
+
appName,
|
|
240
|
+
url,
|
|
241
|
+
websiteFeatures,
|
|
242
|
+
valueAddFeatures,
|
|
243
|
+
extractedAt: new Date().toISOString(),
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
const VALUE_ADD_RULES = [
|
|
247
|
+
{
|
|
248
|
+
category: "bulk",
|
|
249
|
+
trigger: (fs) => fs.some((f) => /\b(creates?|adds?)\b/i.test(f.name)),
|
|
250
|
+
generate: (app) => ({
|
|
251
|
+
id: "bulk_create",
|
|
252
|
+
name: "Bulk Create",
|
|
253
|
+
description: `Create multiple ${app} items from a list or template`,
|
|
254
|
+
category: "bulk",
|
|
255
|
+
level: "pro",
|
|
256
|
+
}),
|
|
257
|
+
},
|
|
258
|
+
{
|
|
259
|
+
category: "bulk",
|
|
260
|
+
trigger: (fs) => fs.some((f) => /\b(deletes?|removes?|trash)\b/i.test(f.name)),
|
|
261
|
+
generate: (app) => ({
|
|
262
|
+
id: "bulk_delete",
|
|
263
|
+
name: "Bulk Delete",
|
|
264
|
+
description: `Delete multiple ${app} items matching criteria`,
|
|
265
|
+
category: "bulk",
|
|
266
|
+
level: "pro",
|
|
267
|
+
}),
|
|
268
|
+
},
|
|
269
|
+
{
|
|
270
|
+
category: "bulk",
|
|
271
|
+
trigger: (fs) => fs.some((f) => /\b(exports?|downloads?)\b/i.test(f.name)),
|
|
272
|
+
generate: (app) => ({
|
|
273
|
+
id: "bulk_export",
|
|
274
|
+
name: "Bulk Export",
|
|
275
|
+
description: `Export all ${app} items at once`,
|
|
276
|
+
category: "bulk",
|
|
277
|
+
level: "pro",
|
|
278
|
+
}),
|
|
279
|
+
},
|
|
280
|
+
{
|
|
281
|
+
category: "organization",
|
|
282
|
+
trigger: (fs) => fs.some((f) => /\b(folders?|tags?|labels?|categor(?:y|ies))\b/i.test(f.name)),
|
|
283
|
+
generate: (app) => ({
|
|
284
|
+
id: "auto_organize",
|
|
285
|
+
name: "Auto-Organize",
|
|
286
|
+
description: `Sort and organize ${app} items by content, date, or type`,
|
|
287
|
+
category: "organization",
|
|
288
|
+
level: "expert",
|
|
289
|
+
}),
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
category: "organization",
|
|
293
|
+
trigger: (fs) => fs.some((f) => /\b(search|finds?)\b/i.test(f.name)),
|
|
294
|
+
generate: (app) => ({
|
|
295
|
+
id: "smart_search",
|
|
296
|
+
name: "Smart Search",
|
|
297
|
+
description: `Search across all ${app} content with pattern matching`,
|
|
298
|
+
category: "organization",
|
|
299
|
+
level: "pro",
|
|
300
|
+
}),
|
|
301
|
+
},
|
|
302
|
+
{
|
|
303
|
+
category: "intelligence",
|
|
304
|
+
trigger: () => true, // Always available
|
|
305
|
+
generate: (app) => ({
|
|
306
|
+
id: "summarize_all",
|
|
307
|
+
name: "Summarize",
|
|
308
|
+
description: `Read and summarize all ${app} content`,
|
|
309
|
+
category: "intelligence",
|
|
310
|
+
level: "expert",
|
|
311
|
+
}),
|
|
312
|
+
},
|
|
313
|
+
{
|
|
314
|
+
category: "intelligence",
|
|
315
|
+
trigger: (fs) => fs.some((f) => /\b(duplicates?|similar)\b/i.test(f.name)),
|
|
316
|
+
generate: (app) => ({
|
|
317
|
+
id: "find_duplicates",
|
|
318
|
+
name: "Find Duplicates",
|
|
319
|
+
description: `Identify duplicate or near-duplicate ${app} items`,
|
|
320
|
+
category: "intelligence",
|
|
321
|
+
level: "expert",
|
|
322
|
+
}),
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
category: "cross_app",
|
|
326
|
+
trigger: (fs) => fs.some((f) => /\b(shares?|exports?|sends?)\b/i.test(f.name)),
|
|
327
|
+
generate: (app) => ({
|
|
328
|
+
id: "cross_app_export",
|
|
329
|
+
name: "Cross-App Export",
|
|
330
|
+
description: `Export ${app} content to other apps automatically`,
|
|
331
|
+
category: "cross_app",
|
|
332
|
+
level: "expert",
|
|
333
|
+
}),
|
|
334
|
+
},
|
|
335
|
+
{
|
|
336
|
+
category: "cross_app",
|
|
337
|
+
trigger: (fs) => fs.some((f) => /\bimports?\b/i.test(f.name)),
|
|
338
|
+
generate: (app) => ({
|
|
339
|
+
id: "cross_app_import",
|
|
340
|
+
name: "Cross-App Import",
|
|
341
|
+
description: `Import content from other apps into ${app}`,
|
|
342
|
+
category: "cross_app",
|
|
343
|
+
level: "expert",
|
|
344
|
+
}),
|
|
345
|
+
},
|
|
346
|
+
{
|
|
347
|
+
category: "monitoring",
|
|
348
|
+
trigger: () => true, // Always available
|
|
349
|
+
generate: (app) => ({
|
|
350
|
+
id: "change_monitor",
|
|
351
|
+
name: "Change Monitor",
|
|
352
|
+
description: `Monitor ${app} for changes and notify`,
|
|
353
|
+
category: "monitoring",
|
|
354
|
+
level: "grandmaster",
|
|
355
|
+
}),
|
|
356
|
+
},
|
|
357
|
+
];
|
|
358
|
+
export function generateValueAddFeatures(appName, websiteFeatures) {
|
|
359
|
+
const results = [];
|
|
360
|
+
for (const rule of VALUE_ADD_RULES) {
|
|
361
|
+
if (rule.trigger(websiteFeatures)) {
|
|
362
|
+
results.push(rule.generate(appName));
|
|
363
|
+
}
|
|
364
|
+
}
|
|
365
|
+
return results;
|
|
366
|
+
}
|
|
@@ -95,6 +95,23 @@ export class ReferenceMerger {
|
|
|
95
95
|
const filePath = this.save(ref);
|
|
96
96
|
return { filePath, added };
|
|
97
97
|
}
|
|
98
|
+
/**
|
|
99
|
+
* Merge website-extracted features into the reference file.
|
|
100
|
+
*/
|
|
101
|
+
mergeWebsiteFeatures(result, bundleId, appName) {
|
|
102
|
+
const ref = this.loadOrCreate(bundleId, appName);
|
|
103
|
+
const existing = ref.websiteFeatures;
|
|
104
|
+
const existingIds = new Set((existing ?? []).map((f) => f.id));
|
|
105
|
+
const newFeatures = result.websiteFeatures.filter((f) => !existingIds.has(f.id));
|
|
106
|
+
ref.websiteFeatures = [...(existing ?? []), ...newFeatures];
|
|
107
|
+
// Merge value-add features by id (don't overwrite existing)
|
|
108
|
+
const existingVA = ref.valueAddFeatures;
|
|
109
|
+
const existingVAIds = new Set((existingVA ?? []).map((f) => f.id));
|
|
110
|
+
const newVA = result.valueAddFeatures.filter((f) => !existingVAIds.has(f.id));
|
|
111
|
+
ref.valueAddFeatures = [...(existingVA ?? []), ...newVA];
|
|
112
|
+
const filePath = this.save(ref);
|
|
113
|
+
return { filePath, added: newFeatures.length };
|
|
114
|
+
}
|
|
98
115
|
/**
|
|
99
116
|
* Merge errors/solutions into reference.
|
|
100
117
|
*/
|
|
@@ -126,6 +126,26 @@ export class LearningEngine {
|
|
|
126
126
|
rankSensors(bundleId) {
|
|
127
127
|
return this.sensors.rank(bundleId);
|
|
128
128
|
}
|
|
129
|
+
/**
|
|
130
|
+
* Detect whether an app is "vision-only" — AX can't see its content,
|
|
131
|
+
* so window buffer capture + OCR is the only viable perception source.
|
|
132
|
+
* Returns true when AX has failed enough times with a low score and
|
|
133
|
+
* at least one other source (vision/ocr) has succeeded.
|
|
134
|
+
*/
|
|
135
|
+
isVisionOnlyApp(bundleId) {
|
|
136
|
+
const ranked = this.sensors.rank(bundleId);
|
|
137
|
+
if (ranked.length < 2)
|
|
138
|
+
return false;
|
|
139
|
+
const ax = ranked.find(r => r.sourceType === "ax");
|
|
140
|
+
const vision = ranked.find(r => r.sourceType === "vision" || r.sourceType === "ocr");
|
|
141
|
+
// AX score near zero + vision/ocr has some success
|
|
142
|
+
if (ax && ax.score < 0.15 && vision && vision.score > 0.3)
|
|
143
|
+
return true;
|
|
144
|
+
// No AX entry at all but vision works
|
|
145
|
+
if (!ax && vision && vision.score > 0.3)
|
|
146
|
+
return true;
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
129
149
|
/**
|
|
130
150
|
* Query verified UI patterns for a given app, optionally filtered by tool.
|
|
131
151
|
*/
|
|
@@ -768,13 +768,18 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
768
768
|
// Safe CLI mode is already enabled via setSafeCLI() in start().
|
|
769
769
|
// This allows vision/OCR for canvas-heavy apps like Canva in Chrome.
|
|
770
770
|
// Skip vision if learning engine shows it consistently fails for this app,
|
|
771
|
-
// but retry every 20th cycle to re-evaluate (apps may gain windows later)
|
|
771
|
+
// but retry every 20th cycle to re-evaluate (apps may gain windows later).
|
|
772
|
+
// Exception: vision-only apps (AX blind) — vision/OCR is their ONLY perception
|
|
773
|
+
// source, so never skip it. Window buffer capture works even when window is hidden.
|
|
772
774
|
if (this.learningEngine && this.activeAppContext) {
|
|
773
|
-
const
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
775
|
+
const isVisionOnly = this.learningEngine.isVisionOnlyApp(this.activeAppContext.bundleId);
|
|
776
|
+
if (!isVisionOnly) {
|
|
777
|
+
const ranked = this.learningEngine.rankSensors(this.activeAppContext.bundleId);
|
|
778
|
+
const visionRank = ranked.find(r => r.sourceType === "vision");
|
|
779
|
+
if (visionRank && visionRank.score < 0.1 && ranked.length >= 2 && this.stats.slowCycles % 20 !== 0) {
|
|
780
|
+
this.stats.slowCycles++;
|
|
781
|
+
return; // Vision consistently fails for this app — skip (retry every 20th cycle)
|
|
782
|
+
}
|
|
778
783
|
}
|
|
779
784
|
}
|
|
780
785
|
const timestamp = new Date().toISOString();
|
|
@@ -860,14 +865,24 @@ export class PerceptionCoordinator extends EventEmitter {
|
|
|
860
865
|
},
|
|
861
866
|
});
|
|
862
867
|
}
|
|
863
|
-
// Record vision sensor outcome
|
|
868
|
+
// Record vision sensor outcome — also record as window_buffer for vision-only apps
|
|
869
|
+
// so the fallback chain knows this source works for element location
|
|
864
870
|
if (this.learningEngine && this.activeAppContext) {
|
|
871
|
+
const latencyMs = Date.now() - new Date(timestamp).getTime();
|
|
865
872
|
this.learningEngine.recordSensorOutcome({
|
|
866
873
|
bundleId: this.activeAppContext.bundleId,
|
|
867
874
|
sourceType: "vision",
|
|
868
875
|
success: !!diffEvent,
|
|
869
|
-
latencyMs
|
|
876
|
+
latencyMs,
|
|
870
877
|
});
|
|
878
|
+
if (this.learningEngine.isVisionOnlyApp(this.activeAppContext.bundleId) && ocrEvent) {
|
|
879
|
+
this.learningEngine.recordSensorOutcome({
|
|
880
|
+
bundleId: this.activeAppContext.bundleId,
|
|
881
|
+
sourceType: "window_buffer",
|
|
882
|
+
success: true,
|
|
883
|
+
latencyMs,
|
|
884
|
+
});
|
|
885
|
+
}
|
|
871
886
|
}
|
|
872
887
|
}
|
|
873
888
|
catch {
|
|
@@ -23,7 +23,7 @@
|
|
|
23
23
|
*/
|
|
24
24
|
// ── 1. Fallback Chain ──────────────────────────────────────────────────
|
|
25
25
|
/** Ordered list of execution methods, from fastest/most reliable to slowest/least reliable */
|
|
26
|
-
const EXECUTION_METHODS = ["ax", "cdp", "ocr", "coordinates"];
|
|
26
|
+
const EXECUTION_METHODS = ["ax", "cdp", "ocr", "window_buffer", "coordinates"];
|
|
27
27
|
const METHOD_CAPABILITIES = {
|
|
28
28
|
ax: {
|
|
29
29
|
method: "ax",
|
|
@@ -61,6 +61,18 @@ const METHOD_CAPABILITIES = {
|
|
|
61
61
|
requiresBridge: true,
|
|
62
62
|
requiresCDP: false,
|
|
63
63
|
},
|
|
64
|
+
window_buffer: {
|
|
65
|
+
method: "window_buffer",
|
|
66
|
+
canClick: true,
|
|
67
|
+
canType: false,
|
|
68
|
+
canRead: true,
|
|
69
|
+
canLocate: true,
|
|
70
|
+
canSelect: false,
|
|
71
|
+
canScroll: false,
|
|
72
|
+
avgLatencyMs: 350,
|
|
73
|
+
requiresBridge: true,
|
|
74
|
+
requiresCDP: false,
|
|
75
|
+
},
|
|
64
76
|
coordinates: {
|
|
65
77
|
method: "coordinates",
|
|
66
78
|
canClick: true,
|
|
@@ -90,6 +102,7 @@ const SENSOR_TO_METHOD = {
|
|
|
90
102
|
chrome: "cdp",
|
|
91
103
|
ocr: "ocr",
|
|
92
104
|
vision: "ocr",
|
|
105
|
+
window_buffer: "window_buffer",
|
|
93
106
|
coordinates: "coordinates",
|
|
94
107
|
};
|
|
95
108
|
/**
|
|
@@ -32,8 +32,10 @@ export function generateLadderFromReference(ref) {
|
|
|
32
32
|
const selectorGroups = ref.selectors ?? {};
|
|
33
33
|
const flows = ref.flows ?? {};
|
|
34
34
|
// Minimum threshold: need at least 2 meaningful selector groups
|
|
35
|
+
// (but website features can stand alone — they come from official sources)
|
|
35
36
|
const meaningfulGroups = Object.keys(selectorGroups).filter(k => !SKIP_GROUPS.has(k));
|
|
36
|
-
|
|
37
|
+
const hasWebsiteFeatures = (ref.websiteFeatures?.length ?? 0) > 0 || (ref.valueAddFeatures?.length ?? 0) > 0;
|
|
38
|
+
if (meaningfulGroups.length < 2 && Object.keys(flows).length < 2 && !hasWebsiteFeatures) {
|
|
37
39
|
return { ladder: [], signals: {}, hash: computeHash(ref) };
|
|
38
40
|
}
|
|
39
41
|
// Track which flow names are already covered by selector groups
|
|
@@ -76,6 +78,50 @@ export function generateLadderFromReference(ref) {
|
|
|
76
78
|
const keywords = extractKeywordsFromFlow(flowName, flow);
|
|
77
79
|
signals[featureId] = keywords;
|
|
78
80
|
}
|
|
81
|
+
// ── Step 2.5: Features from website extraction ─────────────────
|
|
82
|
+
if (ref.websiteFeatures) {
|
|
83
|
+
for (const wf of ref.websiteFeatures) {
|
|
84
|
+
const featureId = `web_${wf.id}`;
|
|
85
|
+
if (features.some((f) => f.id === featureId))
|
|
86
|
+
continue;
|
|
87
|
+
// Skip if an exact selector group or flow already covers this feature
|
|
88
|
+
// (use exact key match, not fuzzy flowNamesRelated — avoids false positives
|
|
89
|
+
// where a short website feature id like "export" matches "export_pdf_flow")
|
|
90
|
+
if (selectorGroups[wf.id] !== undefined || flows[wf.id] !== undefined)
|
|
91
|
+
continue;
|
|
92
|
+
const level = (["beginner", "pro", "expert", "grandmaster"].includes(wf.level)
|
|
93
|
+
? wf.level
|
|
94
|
+
: "beginner");
|
|
95
|
+
const weight = assignWeight(wf.id, 0, level);
|
|
96
|
+
features.push({
|
|
97
|
+
id: featureId,
|
|
98
|
+
description: wf.description || wf.name,
|
|
99
|
+
level,
|
|
100
|
+
weight,
|
|
101
|
+
critical: false,
|
|
102
|
+
});
|
|
103
|
+
signals[featureId] = extractKeywordsFromName(wf.name);
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
// ── Step 2.6: ScreenHand value-add features ───────────────────
|
|
107
|
+
if (ref.valueAddFeatures) {
|
|
108
|
+
for (const va of ref.valueAddFeatures) {
|
|
109
|
+
const featureId = `va_${va.id}`;
|
|
110
|
+
if (features.some((f) => f.id === featureId))
|
|
111
|
+
continue;
|
|
112
|
+
const level = (["pro", "expert", "grandmaster"].includes(va.level)
|
|
113
|
+
? va.level
|
|
114
|
+
: "expert");
|
|
115
|
+
features.push({
|
|
116
|
+
id: featureId,
|
|
117
|
+
description: va.description,
|
|
118
|
+
level,
|
|
119
|
+
weight: 2,
|
|
120
|
+
critical: false,
|
|
121
|
+
});
|
|
122
|
+
signals[featureId] = extractKeywordsFromName(va.name);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
79
125
|
// ── Step 3: Sort by level progression ──────────────────────────
|
|
80
126
|
const levelOrder = {
|
|
81
127
|
beginner: 0, pro: 1, expert: 2, grandmaster: 3,
|
|
@@ -204,6 +250,17 @@ function extractKeywordsFromFlow(flowName, flow) {
|
|
|
204
250
|
}
|
|
205
251
|
return deduplicateArray(keywords);
|
|
206
252
|
}
|
|
253
|
+
// ── Keyword extraction from feature name ──────────────────────
|
|
254
|
+
function extractKeywordsFromName(name) {
|
|
255
|
+
const keywords = [];
|
|
256
|
+
for (const part of name.split(/[\s_-]+/)) {
|
|
257
|
+
const lower = part.toLowerCase().replace(/[^a-z0-9]/g, "");
|
|
258
|
+
if (lower.length > 2 && !STOP_WORDS.has(lower)) {
|
|
259
|
+
keywords.push(lower);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
return deduplicateArray(keywords);
|
|
263
|
+
}
|
|
207
264
|
// ── Flow-to-selector group name matching ─────────────────────────
|
|
208
265
|
function flowNamesRelated(groupName, flowName) {
|
|
209
266
|
const gParts = new Set(groupName.split("_"));
|
|
@@ -222,6 +279,8 @@ function computeHash(ref) {
|
|
|
222
279
|
const keys = [
|
|
223
280
|
...Object.keys(ref.selectors ?? {}).sort(),
|
|
224
281
|
...Object.keys(ref.flows ?? {}).sort(),
|
|
282
|
+
...(ref.websiteFeatures ?? []).map((f) => f.id).sort(),
|
|
283
|
+
...(ref.valueAddFeatures ?? []).map((f) => f.id).sort(),
|
|
225
284
|
].join("|");
|
|
226
285
|
// Simple string hash (djb2)
|
|
227
286
|
let hash = 5381;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "screenhand",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.4.0",
|
|
4
4
|
"mcpName": "io.github.manushi4/screenhand",
|
|
5
5
|
"description": "Give AI eyes and hands on your desktop. ScreenHand is an open-source MCP server that lets Claude and other AI agents see your screen, click buttons, type text, and control any app on macOS and Windows.",
|
|
6
6
|
"homepage": "https://screenhand.com",
|