computeruseprotocol 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cup/search.py ADDED
@@ -0,0 +1,583 @@
1
+ """Semantic search engine for CUP accessibility trees.
2
+
3
+ Searches the full (unpruned) tree with:
4
+ - Semantic role matching (natural-language role synonyms)
5
+ - Fuzzy name matching (token overlap, prefix matching)
6
+ - Relevance-ranked results (role + name + context scoring)
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+ import unicodedata
13
+ from dataclasses import dataclass
14
+
15
+ # ---------------------------------------------------------------------------
16
+ # All canonical CUP roles
17
+ # ---------------------------------------------------------------------------
18
+
19
+ ALL_ROLES: frozenset[str] = frozenset(
20
+ {
21
+ "alert",
22
+ "alertdialog",
23
+ "application",
24
+ "banner",
25
+ "blockquote",
26
+ "button",
27
+ "caption",
28
+ "cell",
29
+ "checkbox",
30
+ "code",
31
+ "columnheader",
32
+ "combobox",
33
+ "complementary",
34
+ "contentinfo",
35
+ "deletion",
36
+ "dialog",
37
+ "document",
38
+ "emphasis",
39
+ "figure",
40
+ "form",
41
+ "generic",
42
+ "grid",
43
+ "group",
44
+ "heading",
45
+ "img",
46
+ "insertion",
47
+ "link",
48
+ "list",
49
+ "listitem",
50
+ "log",
51
+ "main",
52
+ "marquee",
53
+ "math",
54
+ "menu",
55
+ "menubar",
56
+ "menuitem",
57
+ "menuitemcheckbox",
58
+ "menuitemradio",
59
+ "navigation",
60
+ "none",
61
+ "note",
62
+ "option",
63
+ "paragraph",
64
+ "progressbar",
65
+ "radio",
66
+ "region",
67
+ "row",
68
+ "rowheader",
69
+ "scrollbar",
70
+ "search",
71
+ "searchbox",
72
+ "separator",
73
+ "slider",
74
+ "spinbutton",
75
+ "status",
76
+ "strong",
77
+ "subscript",
78
+ "superscript",
79
+ "switch",
80
+ "tab",
81
+ "table",
82
+ "tablist",
83
+ "tabpanel",
84
+ "text",
85
+ "textbox",
86
+ "timer",
87
+ "titlebar",
88
+ "toolbar",
89
+ "tooltip",
90
+ "tree",
91
+ "treeitem",
92
+ "window",
93
+ }
94
+ )
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Semantic role synonyms
98
+ # ---------------------------------------------------------------------------
99
+
100
+ ROLE_SYNONYMS: dict[str, frozenset[str]] = {
101
+ # -- text input --
102
+ "input": frozenset({"textbox", "combobox", "searchbox", "spinbutton", "slider"}),
103
+ "text input": frozenset({"textbox", "searchbox", "combobox"}),
104
+ "text field": frozenset({"textbox", "searchbox", "combobox"}),
105
+ "text box": frozenset({"textbox", "searchbox"}),
106
+ "textarea": frozenset({"textbox", "document"}),
107
+ "edit": frozenset({"textbox", "searchbox", "combobox", "document"}),
108
+ "editor": frozenset({"textbox", "document"}),
109
+ # -- search --
110
+ "search": frozenset({"search", "searchbox", "textbox", "combobox"}),
111
+ "search bar": frozenset({"search", "searchbox", "textbox", "combobox"}),
112
+ "search box": frozenset({"search", "searchbox", "textbox", "combobox"}),
113
+ "search field": frozenset({"search", "searchbox", "textbox", "combobox"}),
114
+ "search input": frozenset({"search", "searchbox", "textbox", "combobox"}),
115
+ # -- buttons --
116
+ "btn": frozenset({"button"}),
117
+ "clickable": frozenset({"button", "link", "menuitem", "tab", "treeitem", "listitem"}),
118
+ # -- links --
119
+ "hyperlink": frozenset({"link"}),
120
+ "anchor": frozenset({"link"}),
121
+ # -- dropdowns / selects --
122
+ "dropdown": frozenset({"combobox", "menu", "list"}),
123
+ "select": frozenset({"combobox", "list", "listitem"}),
124
+ "combo": frozenset({"combobox"}),
125
+ "combo box": frozenset({"combobox"}),
126
+ # -- toggles --
127
+ "check": frozenset({"checkbox", "switch", "menuitemcheckbox"}),
128
+ "toggle": frozenset({"switch", "checkbox"}),
129
+ "radio button": frozenset({"radio", "menuitemradio"}),
130
+ "option": frozenset({"option", "radio", "listitem", "menuitemradio"}),
131
+ # -- sliders / ranges --
132
+ "range": frozenset({"slider", "progressbar", "spinbutton"}),
133
+ "progress": frozenset({"progressbar"}),
134
+ "progress bar": frozenset({"progressbar"}),
135
+ "spinner": frozenset({"spinbutton"}),
136
+ # -- tabs --
137
+ "tab bar": frozenset({"tablist"}),
138
+ "tab list": frozenset({"tablist"}),
139
+ "tabs": frozenset({"tablist", "tab"}),
140
+ "tab panel": frozenset({"tabpanel"}),
141
+ # -- menus --
142
+ "menu bar": frozenset({"menubar"}),
143
+ "menu item": frozenset({"menuitem", "menuitemcheckbox", "menuitemradio"}),
144
+ # -- dialogs --
145
+ "modal": frozenset({"dialog", "alertdialog"}),
146
+ "popup": frozenset({"dialog", "alertdialog", "tooltip", "menu"}),
147
+ "notification": frozenset({"alert", "status", "log"}),
148
+ "message": frozenset({"alert", "status", "log"}),
149
+ # -- headings / titles --
150
+ "title": frozenset({"heading", "titlebar"}),
151
+ "header": frozenset({"heading", "banner", "columnheader", "rowheader"}),
152
+ # -- images --
153
+ "image": frozenset({"img"}),
154
+ "picture": frozenset({"img"}),
155
+ "icon": frozenset({"img", "button"}),
156
+ # -- trees / lists --
157
+ "tree item": frozenset({"treeitem"}),
158
+ "list item": frozenset({"listitem"}),
159
+ # -- tables / grids --
160
+ "table": frozenset({"table", "grid"}),
161
+ # -- navigation --
162
+ "nav": frozenset({"navigation"}),
163
+ "sidebar": frozenset({"complementary", "navigation"}),
164
+ # -- containers --
165
+ "panel": frozenset({"region", "group", "tabpanel"}),
166
+ "section": frozenset({"region", "group", "main"}),
167
+ "container": frozenset({"region", "group", "generic"}),
168
+ # -- misc --
169
+ "divider": frozenset({"separator"}),
170
+ "scroll": frozenset({"scrollbar"}),
171
+ "status bar": frozenset({"status"}),
172
+ "tool bar": frozenset({"toolbar"}),
173
+ }
174
+
175
+ # Add identity mappings: every CUP role maps to itself.
176
+ for _r in ALL_ROLES:
177
+ ROLE_SYNONYMS.setdefault(_r, frozenset({_r}))
178
+
179
+
180
+ # ---------------------------------------------------------------------------
181
+ # Noise words filtered from freeform queries
182
+ # ---------------------------------------------------------------------------
183
+
184
+ _NOISE_WORDS: frozenset[str] = frozenset(
185
+ {
186
+ "the",
187
+ "a",
188
+ "an",
189
+ "this",
190
+ "that",
191
+ "for",
192
+ "in",
193
+ "on",
194
+ "of",
195
+ "with",
196
+ "to",
197
+ "and",
198
+ "or",
199
+ "is",
200
+ "it",
201
+ "its",
202
+ "my",
203
+ "your",
204
+ }
205
+ )
206
+
207
+
208
+ # ---------------------------------------------------------------------------
209
+ # Tokenization
210
+ # ---------------------------------------------------------------------------
211
+
212
+ _SPLIT_RE = re.compile(r"[^a-z0-9]+")
213
+
214
+
215
+ def _tokenize(text: str) -> list[str]:
216
+ """Split text into lowercase tokens, stripping accents and punctuation."""
217
+ normalized = unicodedata.normalize("NFD", text.lower())
218
+ stripped = "".join(c for c in normalized if unicodedata.category(c) != "Mn")
219
+ return [t for t in _SPLIT_RE.split(stripped) if t]
220
+
221
+
222
+ # ---------------------------------------------------------------------------
223
+ # Role resolution
224
+ # ---------------------------------------------------------------------------
225
+
226
+
227
+ def resolve_roles(role_query: str) -> frozenset[str] | None:
228
+ """Resolve a role query to a set of matching CUP roles.
229
+
230
+ Returns None if the query doesn't constrain roles at all.
231
+ """
232
+ q = role_query.strip().lower()
233
+
234
+ # Direct synonym lookup (covers exact CUP roles + natural language)
235
+ if q in ROLE_SYNONYMS:
236
+ return ROLE_SYNONYMS[q]
237
+
238
+ # Token-based fallback: try each token
239
+ for token in _tokenize(q):
240
+ if token in ROLE_SYNONYMS:
241
+ return ROLE_SYNONYMS[token]
242
+
243
+ # Last resort: check if the query IS a substring of a role name.
244
+ # Don't check the reverse (role in query) — too many false positives
245
+ # (e.g., "none" found inside "xyznonexistent").
246
+ if len(q) >= 3:
247
+ matches = frozenset(r for r in ALL_ROLES if q in r)
248
+ if matches:
249
+ return matches
250
+
251
+ return None # don't filter by role
252
+
253
+
254
+ # ---------------------------------------------------------------------------
255
+ # Query parsing
256
+ # ---------------------------------------------------------------------------
257
+
258
+
259
+ def _parse_query(query: str) -> tuple[str | None, list[str]]:
260
+ """Parse a freeform query into (role_hint, name_tokens).
261
+
262
+ Tries longest-first token subsequences against ROLE_SYNONYMS.
263
+ Remaining tokens (minus noise words) become the name query.
264
+
265
+ Examples:
266
+ "the play button" -> ("button", ["play"])
267
+ "search input" -> ("search input", [])
268
+ "Submit" -> (None, ["submit"])
269
+ "volume slider" -> ("slider", ["volume"])
270
+ """
271
+ tokens = _tokenize(query)
272
+ if not tokens:
273
+ return None, []
274
+
275
+ # Try longest-first subsequences (max 3 words)
276
+ best_role: str | None = None
277
+ best_span: tuple[int, int] = (0, 0)
278
+
279
+ for length in range(min(len(tokens), 3), 0, -1):
280
+ for start in range(len(tokens) - length + 1):
281
+ candidate = " ".join(tokens[start : start + length])
282
+ if candidate in ROLE_SYNONYMS:
283
+ best_role = candidate
284
+ best_span = (start, start + length)
285
+ break
286
+ if best_role:
287
+ break
288
+
289
+ # Remaining tokens = name query (filter noise)
290
+ name_tokens = tokens[: best_span[0]] + tokens[best_span[1] :]
291
+ name_tokens = [t for t in name_tokens if t not in _NOISE_WORDS]
292
+
293
+ return best_role, name_tokens
294
+
295
+
296
+ # ---------------------------------------------------------------------------
297
+ # Name scoring
298
+ # ---------------------------------------------------------------------------
299
+
300
+
301
+ def _score_name(
302
+ query_tokens: list[str],
303
+ node_name: str,
304
+ node_description: str = "",
305
+ node_value: str = "",
306
+ placeholder: str = "",
307
+ ) -> float:
308
+ """Score how well a node's text fields match the query tokens.
309
+
310
+ Returns a score in [0.0, 1.0].
311
+ """
312
+ if not query_tokens:
313
+ return 1.0 # no name filter = everything matches
314
+
315
+ query_joined = " ".join(query_tokens)
316
+ name_lower = node_name.lower()
317
+
318
+ # Signal 1: full substring match in name
319
+ full_substr = 0.0
320
+ if query_joined in name_lower:
321
+ full_substr = 1.0 if query_joined == name_lower else 0.85
322
+
323
+ # Signal 2: token-level matching
324
+ name_tokens = set(_tokenize(node_name))
325
+ token_score = 0.0
326
+
327
+ if name_tokens:
328
+ matched = 0.0
329
+ for qt in query_tokens:
330
+ if qt in name_tokens:
331
+ matched += 1.0
332
+ elif any(nt.startswith(qt) for nt in name_tokens):
333
+ matched += 0.7 # prefix: "sub" matches "submit"
334
+ elif any(qt.startswith(nt) for nt in name_tokens):
335
+ matched += 0.5 # reverse prefix
336
+ elif any(qt in nt for nt in name_tokens):
337
+ matched += 0.6 # substring within token
338
+ token_score = matched / len(query_tokens)
339
+
340
+ name_score = max(full_substr, token_score)
341
+
342
+ # Exactness bonus: prefer tighter matches (fewer extra tokens in name)
343
+ if name_tokens and name_score > 0:
344
+ overlap = len(set(query_tokens) & name_tokens) / max(len(name_tokens), 1)
345
+ name_score = name_score * (0.85 + 0.15 * overlap)
346
+
347
+ # Boost from secondary fields
348
+ secondary = _score_secondary(query_tokens, node_description, node_value, placeholder)
349
+
350
+ return min(1.0, name_score + secondary * 0.15)
351
+
352
+
353
+ def _score_secondary(
354
+ query_tokens: list[str],
355
+ description: str,
356
+ value: str,
357
+ placeholder: str,
358
+ ) -> float:
359
+ """Score secondary text fields (description, value, placeholder)."""
360
+ best = 0.0
361
+ for field in (description, value, placeholder):
362
+ if not field:
363
+ continue
364
+ field_tokens = set(_tokenize(field))
365
+ if not field_tokens:
366
+ continue
367
+ matched = sum(1 for qt in query_tokens if qt in field_tokens)
368
+ best = max(best, matched / len(query_tokens))
369
+ return best
370
+
371
+
372
+ # ---------------------------------------------------------------------------
373
+ # Context scoring
374
+ # ---------------------------------------------------------------------------
375
+
376
+
377
+ def _score_context(
378
+ node: dict,
379
+ parent_chain: list[dict],
380
+ query_tokens: list[str],
381
+ target_roles: frozenset[str] | None,
382
+ ) -> float:
383
+ """Score contextual relevance of a node."""
384
+ score = 0.0
385
+
386
+ # Ancestor name matches query tokens
387
+ if query_tokens:
388
+ qt_set = set(query_tokens)
389
+ for ancestor in parent_chain:
390
+ if set(_tokenize(ancestor.get("name", ""))) & qt_set:
391
+ score += 0.1
392
+ break
393
+
394
+ # Ancestor role matches target roles
395
+ if target_roles:
396
+ for ancestor in parent_chain:
397
+ if ancestor.get("role") in target_roles:
398
+ score += 0.1
399
+ break
400
+
401
+ # Interactive bonus
402
+ actions = node.get("actions", [])
403
+ if any(a != "focus" for a in actions):
404
+ score += 0.05
405
+
406
+ # Visibility bonus
407
+ states = node.get("states", [])
408
+ if "offscreen" not in states:
409
+ score += 0.05
410
+
411
+ # Focused bonus
412
+ if "focused" in states:
413
+ score += 0.02
414
+
415
+ return score
416
+
417
+
418
+ # ---------------------------------------------------------------------------
419
+ # Per-node scoring
420
+ # ---------------------------------------------------------------------------
421
+
422
+
423
+ def _score_node(
424
+ node: dict,
425
+ parent_chain: list[dict],
426
+ target_roles: frozenset[str] | None,
427
+ name_tokens: list[str],
428
+ state: str | None,
429
+ ) -> float:
430
+ """Score a single node. Returns 0.0 if hard-filtered out.
431
+
432
+ Weight budget: role=0.35, name=0.50, state=0.10, context≤0.25
433
+ """
434
+ # State: hard filter
435
+ if state is not None and state not in node.get("states", []):
436
+ return 0.0
437
+
438
+ # Role: hard filter when specified
439
+ node_role = node.get("role", "")
440
+ role_score = 0.0
441
+ if target_roles is not None:
442
+ if node_role in target_roles:
443
+ role_score = 0.35
444
+ else:
445
+ return 0.0
446
+
447
+ # Name scoring
448
+ if name_tokens:
449
+ raw = _score_name(
450
+ name_tokens,
451
+ node.get("name", ""),
452
+ node.get("description", ""),
453
+ node.get("value", ""),
454
+ (node.get("attributes") or {}).get("placeholder", ""),
455
+ )
456
+ if raw == 0.0:
457
+ return 0.0 # hard filter: name specified but no match at all
458
+ name_score = raw * 0.50
459
+ else:
460
+ # No name filter: partial credit if role matched
461
+ name_score = 0.15 if target_roles else 0.0
462
+
463
+ # State bonus
464
+ state_score = 0.10 if state is not None else 0.0
465
+
466
+ # Context
467
+ context_score = _score_context(node, parent_chain, name_tokens, target_roles)
468
+
469
+ return role_score + name_score + state_score + context_score
470
+
471
+
472
+ # ---------------------------------------------------------------------------
473
+ # Tree walking
474
+ # ---------------------------------------------------------------------------
475
+
476
+
477
+ def _walk_and_score(
478
+ nodes: list[dict],
479
+ parent_chain: list[dict],
480
+ target_roles: frozenset[str] | None,
481
+ name_tokens: list[str],
482
+ state: str | None,
483
+ results: list[SearchResult],
484
+ threshold: float,
485
+ ) -> None:
486
+ """Recursively walk the tree, scoring each node."""
487
+ for node in nodes:
488
+ score = _score_node(node, parent_chain, target_roles, name_tokens, state)
489
+
490
+ if score >= threshold:
491
+ result_node = {k: v for k, v in node.items() if k != "children"}
492
+ results.append(SearchResult(node=result_node, score=score))
493
+
494
+ children = node.get("children", [])
495
+ if children:
496
+ _walk_and_score(
497
+ children,
498
+ parent_chain + [node],
499
+ target_roles,
500
+ name_tokens,
501
+ state,
502
+ results,
503
+ threshold,
504
+ )
505
+
506
+
507
+ # ---------------------------------------------------------------------------
508
+ # Result type
509
+ # ---------------------------------------------------------------------------
510
+
511
+
512
+ @dataclass
513
+ class SearchResult:
514
+ """A scored search result."""
515
+
516
+ node: dict
517
+ score: float
518
+
519
+
520
+ # ---------------------------------------------------------------------------
521
+ # Main entry point
522
+ # ---------------------------------------------------------------------------
523
+
524
+
525
+ def search_tree(
526
+ tree: list[dict],
527
+ *,
528
+ query: str | None = None,
529
+ role: str | None = None,
530
+ name: str | None = None,
531
+ state: str | None = None,
532
+ limit: int = 5,
533
+ threshold: float = 0.15,
534
+ ) -> list[SearchResult]:
535
+ """Search a CUP tree with semantic matching and relevance ranking.
536
+
537
+ Searches the full (unpruned) tree.
538
+
539
+ Args:
540
+ tree: Raw CUP tree nodes.
541
+ query: Freeform semantic query ("play button", "search input").
542
+ Auto-parsed into role + name signals.
543
+ role: Role filter (exact CUP role or synonym like "search bar").
544
+ name: Name filter (fuzzy token matching).
545
+ state: State filter (exact match).
546
+ limit: Max results to return.
547
+ threshold: Minimum score to include.
548
+
549
+ Returns:
550
+ List of SearchResult sorted by descending score.
551
+ """
552
+ # Parse inputs
553
+ effective_role = role
554
+ effective_name_tokens: list[str] = []
555
+
556
+ if query:
557
+ parsed_role, parsed_name = _parse_query(query)
558
+ effective_role = role or parsed_role
559
+ effective_name_tokens = _tokenize(name) if name else parsed_name
560
+ elif name:
561
+ effective_name_tokens = _tokenize(name)
562
+
563
+ # Resolve roles
564
+ target_roles: frozenset[str] | None = None
565
+ if effective_role:
566
+ target_roles = resolve_roles(effective_role)
567
+
568
+ # Walk and score
569
+ results: list[SearchResult] = []
570
+ _walk_and_score(
571
+ tree,
572
+ parent_chain=[],
573
+ target_roles=target_roles,
574
+ name_tokens=effective_name_tokens,
575
+ state=state,
576
+ results=results,
577
+ threshold=threshold,
578
+ )
579
+
580
+ # Sort by score descending (stable: preserves tree order for equal scores)
581
+ results.sort(key=lambda r: -r.score)
582
+
583
+ return results[:limit]