imgl 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
imgl/__init__.py ADDED
@@ -0,0 +1,51 @@
1
+ """
2
+ imgl - Image to Layout
3
+
4
+ Convert screenshots into semantic UI models (JSON/HTML/SVG) with OCR text
5
+ and element bounding boxes.
6
+ """
7
+
8
+ __version__ = "0.7.1"
9
+ __author__ = "Tom Sapletta"
10
+ __email__ = "tom@sapletta.com"
11
+
12
+ from imgl.actions import ActionTarget, ElementNotFoundError, SceneActions, TypeAction, actions
13
+ from imgl.config import ImglConfig
14
+ from imgl.diagnose import BlankImageError, diagnose_content, worth_analyzing
15
+ from imgl.export import (
16
+ scene_from_json,
17
+ scene_to_html,
18
+ scene_to_json,
19
+ scene_to_svg,
20
+ scene_to_vql,
21
+ scene_to_vql_json,
22
+ write_vql_program,
23
+ )
24
+ from imgl.pipeline import analyze
25
+ from imgl.types import BBox, Element, OcrBox, Scene, Window
26
+
27
+ __all__ = [
28
+ "__version__",
29
+ "analyze",
30
+ "scene_to_json",
31
+ "scene_from_json",
32
+ "scene_to_html",
33
+ "scene_to_svg",
34
+ "scene_to_vql",
35
+ "scene_to_vql_json",
36
+ "write_vql_program",
37
+ "actions",
38
+ "SceneActions",
39
+ "ActionTarget",
40
+ "TypeAction",
41
+ "ElementNotFoundError",
42
+ "BlankImageError",
43
+ "diagnose_content",
44
+ "worth_analyzing",
45
+ "ImglConfig",
46
+ "BBox",
47
+ "OcrBox",
48
+ "Element",
49
+ "Window",
50
+ "Scene",
51
+ ]
imgl/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Main entry point: python -m imgl"""
2
+
3
+ from imgl.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ raise SystemExit(main())
imgl/actions.py ADDED
@@ -0,0 +1,259 @@
1
+ """Text-based UI actions on analyzed scenes."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from typing import Any, Iterable
7
+
8
+ from imgl.geometry import bbox_distance
9
+ from imgl.types import Element, Scene, Window
10
+
11
+
12
+ @dataclass
13
+ class ActionTarget:
14
+ """A resolved UI element that can be clicked or typed into."""
15
+
16
+ element: Element
17
+ window: Window | None = None
18
+
19
+ def center(self) -> tuple[int, int]:
20
+ bbox = self.element.bbox
21
+ return (bbox.x + bbox.w // 2, bbox.y + bbox.h // 2)
22
+
23
+ def click_coords(self) -> tuple[int, int]:
24
+ """Return pixel coordinates for a click at element center."""
25
+ return self.center()
26
+
27
+ def to_click_action(self) -> dict[str, Any]:
28
+ x, y = self.click_coords()
29
+ return {
30
+ "action": "click",
31
+ "x": x,
32
+ "y": y,
33
+ "element_id": self.element.id,
34
+ "element_type": self.element.type,
35
+ "text": self.element.text,
36
+ "window_id": self.window.id if self.window else None,
37
+ "bbox": self.element.bbox.to_dict(),
38
+ }
39
+
40
+
41
+ @dataclass
42
+ class TypeAction:
43
+ """Type text into an input field."""
44
+
45
+ target: ActionTarget
46
+ value: str
47
+ label: str | None = None
48
+
49
+ def coords(self) -> tuple[int, int]:
50
+ return self.target.click_coords()
51
+
52
+ def to_dict(self) -> dict[str, Any]:
53
+ x, y = self.coords()
54
+ return {
55
+ "action": "type",
56
+ "x": x,
57
+ "y": y,
58
+ "text": self.value,
59
+ "element_id": self.target.element.id,
60
+ "element_type": self.target.element.type,
61
+ "label": self.label,
62
+ "window_id": self.target.window.id if self.target.window else None,
63
+ "bbox": self.target.element.bbox.to_dict(),
64
+ }
65
+
66
+
67
+ @dataclass
68
+ class SceneActions:
69
+ """Find and interact with elements in a Scene."""
70
+
71
+ scene: Scene
72
+
73
+ def find(
74
+ self,
75
+ element_type: str | None = None,
76
+ *,
77
+ text: str | None = None,
78
+ label: str | None = None,
79
+ window: str | None = None,
80
+ contains: bool = True,
81
+ ) -> list[ActionTarget]:
82
+ """Find elements matching type, text, label, or window."""
83
+ targets: list[ActionTarget] = []
84
+
85
+ for win, element in _iter_elements(self.scene, window=window):
86
+ if element_type and element.type != element_type:
87
+ continue
88
+ if text is not None and not _text_matches(element.text, text, contains=contains):
89
+ continue
90
+ if label is not None:
91
+ if element.type == "input":
92
+ input_label = element.metadata.get("label") or ""
93
+ if not _text_matches(str(input_label), label, contains=contains):
94
+ continue
95
+ elif element.type == "label":
96
+ if not _text_matches(element.text, label, contains=contains):
97
+ continue
98
+ else:
99
+ continue
100
+ targets.append(ActionTarget(element=element, window=win))
101
+
102
+ if label is not None and element_type in {None, "input"}:
103
+ for win, element in _iter_elements(self.scene, window=window):
104
+ if element.type != "input":
105
+ continue
106
+ if any(target.element.id == element.id for target in targets):
107
+ continue
108
+ matched_label = _find_label_for_input(self.scene, element, win)
109
+ if matched_label and _text_matches(matched_label.text, label, contains=contains):
110
+ targets.append(ActionTarget(element=element, window=win))
111
+
112
+ return targets
113
+
114
+ def find_one(
115
+ self,
116
+ element_type: str | None = None,
117
+ *,
118
+ text: str | None = None,
119
+ label: str | None = None,
120
+ window: str | None = None,
121
+ contains: bool = True,
122
+ ) -> ActionTarget | None:
123
+ matches = self.find(
124
+ element_type,
125
+ text=text,
126
+ label=label,
127
+ window=window,
128
+ contains=contains,
129
+ )
130
+ return matches[0] if matches else None
131
+
132
+ def click(
133
+ self,
134
+ element_type: str | None = None,
135
+ *,
136
+ text: str | None = None,
137
+ label: str | None = None,
138
+ window: str | None = None,
139
+ ) -> dict[str, Any]:
140
+ """Resolve a click action for the first matching element."""
141
+ target = self.find_one(element_type, text=text, label=label, window=window)
142
+ if target is None:
143
+ raise ElementNotFoundError(
144
+ _format_query(element_type, text=text, label=label, window=window)
145
+ )
146
+ return target.to_click_action()
147
+
148
+ def type_into(
149
+ self,
150
+ value: str,
151
+ *,
152
+ label: str | None = None,
153
+ text: str | None = None,
154
+ window: str | None = None,
155
+ ) -> dict[str, Any]:
156
+ """Resolve a type action for an input field."""
157
+ target = self.find_one("input", label=label, text=text, window=window)
158
+ if target is None and text is not None:
159
+ target = self.find_one("input", text=text, window=window)
160
+ if target is None:
161
+ raise ElementNotFoundError(
162
+ _format_query("input", text=text, label=label, window=window)
163
+ )
164
+ resolved_label = label or target.element.metadata.get("label")
165
+ return TypeAction(target=target, value=value, label=resolved_label).to_dict()
166
+
167
+ def list_actions(self) -> list[dict[str, Any]]:
168
+ """List available click/type actions for interactive elements."""
169
+ actions: list[dict[str, Any]] = []
170
+ for _, element in _iter_elements(self.scene):
171
+ if element.type in {"button", "icon_button"}:
172
+ actions.append(ActionTarget(element=element).to_click_action())
173
+ elif element.type == "input":
174
+ actions.append(
175
+ TypeAction(
176
+ target=ActionTarget(element=element),
177
+ value=element.text or "",
178
+ label=element.metadata.get("label"),
179
+ ).to_dict()
180
+ )
181
+ return actions
182
+
183
+
184
+ class ElementNotFoundError(LookupError):
185
+ """Raised when no element matches the query."""
186
+
187
+
188
+ def actions(scene: Scene) -> SceneActions:
189
+ """Create a SceneActions helper for a scene."""
190
+ return SceneActions(scene)
191
+
192
+
193
+ def _format_query(
194
+ element_type: str | None,
195
+ *,
196
+ text: str | None,
197
+ label: str | None,
198
+ window: str | None,
199
+ ) -> str:
200
+ parts = []
201
+ if element_type:
202
+ parts.append(f"type={element_type}")
203
+ if text:
204
+ parts.append(f"text={text!r}")
205
+ if label:
206
+ parts.append(f"label={label!r}")
207
+ if window:
208
+ parts.append(f"window={window!r}")
209
+ return "element not found: " + ", ".join(parts)
210
+
211
+
212
+ def _text_matches(value: str | None, query: str, *, contains: bool) -> bool:
213
+ if value is None:
214
+ return False
215
+ left = value.casefold()
216
+ right = query.casefold()
217
+ return right in left if contains else left == right
218
+
219
+
220
+ def _iter_elements(
221
+ scene: Scene,
222
+ *,
223
+ window: str | None = None,
224
+ ) -> Iterable[tuple[Window | None, Element]]:
225
+ for win in scene.windows:
226
+ if window is not None and not _window_matches(win, window):
227
+ continue
228
+ for element in win.elements:
229
+ yield win, element
230
+ if window is None:
231
+ for element in scene.orphan_elements:
232
+ yield None, element
233
+
234
+
235
+ def _window_matches(window: Window, query: str) -> bool:
236
+ query_cf = query.casefold()
237
+ if window.id.casefold() == query_cf:
238
+ return True
239
+ if window.title and query_cf in window.title.casefold():
240
+ return True
241
+ return False
242
+
243
+
244
+ def _find_label_for_input(
245
+ scene: Scene,
246
+ input_element: Element,
247
+ window: Window | None,
248
+ ) -> Element | None:
249
+ candidates: list[Element] = []
250
+ for win, element in _iter_elements(scene, window=window.id if window else None):
251
+ if element.type != "label":
252
+ continue
253
+ if element.metadata.get("for_input") == input_element.id:
254
+ return element
255
+ candidates.append(element)
256
+
257
+ if not candidates:
258
+ return None
259
+ return min(candidates, key=lambda label: bbox_distance(label.bbox, input_element.bbox))
imgl/capture.py ADDED
@@ -0,0 +1,204 @@
1
+ """Screenshot capture helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import shutil
7
+ import subprocess
8
+ from datetime import UTC, datetime
9
+ from pathlib import Path
10
+
11
+
12
+ class CaptureError(RuntimeError):
13
+ """Raised when screen capture fails."""
14
+
15
+
16
+ class BlankCaptureError(CaptureError):
17
+ """Raised when capture succeeded but image is empty/black."""
18
+
19
+
20
+ def default_capture_path(out: str | Path | None = None) -> Path:
21
+ if out:
22
+ path = Path(out).expanduser()
23
+ path.parent.mkdir(parents=True, exist_ok=True)
24
+ return path
25
+ ts = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
26
+ path = Path.home() / ".imgl" / "captures" / f"screen_{ts}.png"
27
+ path.parent.mkdir(parents=True, exist_ok=True)
28
+ return path
29
+
30
+
31
+ def _is_wayland() -> bool:
32
+ session = (os.environ.get("XDG_SESSION_TYPE") or "").lower()
33
+ return session == "wayland" or bool(os.environ.get("WAYLAND_DISPLAY"))
34
+
35
+
36
+ def capture_screen(
37
+ out: str | Path | None = None,
38
+ *,
39
+ monitor: int = 1,
40
+ interactive: bool = False,
41
+ allow_blank: bool = False,
42
+ ) -> Path:
43
+ """
44
+ Capture the desktop to a PNG file.
45
+
46
+ Tries vql capture (if installed), then grim/gnome-screenshot/scrot.
47
+ On Wayland, mss is avoided (usually returns a black frame).
48
+ """
49
+ path = default_capture_path(out)
50
+ errors: list[str] = []
51
+
52
+ if _try_vql_capture(path, monitor=monitor, interactive=interactive, allow_blank=allow_blank):
53
+ return path
54
+
55
+ for name, runner in _native_backends(interactive=interactive):
56
+ try:
57
+ if runner(path):
58
+ if allow_blank or not _is_blank_image(path):
59
+ return path
60
+ errors.append(f"{name}: captured but image is blank")
61
+ continue
62
+ errors.append(f"{name}: command failed")
63
+ except Exception as exc:
64
+ errors.append(f"{name}: {exc}")
65
+
66
+ if not _is_wayland():
67
+ try:
68
+ if _capture_with_mss(path, monitor=monitor):
69
+ if allow_blank or not _is_blank_image(path):
70
+ return path
71
+ errors.append("mss: captured but image is blank")
72
+ except Exception as exc:
73
+ errors.append(f"mss: {exc}")
74
+
75
+ hint = (
76
+ "Screen capture failed or produced a blank image (common on GNOME/Wayland). "
77
+ "Try: imgl capture --interactive OR use an existing PNG:\n"
78
+ " imgl vql /tmp/screen.png -o layout.vql.json\n"
79
+ "Install vql for portal capture: pip install -e ~/github/oqlos/vql"
80
+ )
81
+ raise BlankCaptureError(f"{hint}\nTried: {'; '.join(errors) or 'no backends'}")
82
+
83
+
84
+ def _try_vql_capture(
85
+ path: Path,
86
+ *,
87
+ monitor: int,
88
+ interactive: bool,
89
+ allow_blank: bool,
90
+ ) -> bool:
91
+ try:
92
+ from vql.adopt.window import capture_screen as vql_capture
93
+ except ImportError:
94
+ return False
95
+
96
+ try:
97
+ info = vql_capture(path, monitor=monitor, interactive=interactive)
98
+ captured = Path(info.path)
99
+ if allow_blank or not _is_blank_image(captured):
100
+ return True
101
+ except Exception:
102
+ pass
103
+ return False
104
+
105
+
106
+ def _native_backends(*, interactive: bool) -> list[tuple[str, callable]]:
107
+ backends: list[tuple[str, callable]] = []
108
+
109
+ if interactive:
110
+ portal = _capture_with_portal
111
+ backends.append(("portal-interactive", lambda p: portal(p, interactive=True)))
112
+
113
+ if _is_wayland():
114
+ order = (
115
+ ("gnome-screenshot", _capture_with_gnome_screenshot),
116
+ ("scrot", _capture_with_scrot),
117
+ ("grim", _capture_with_grim),
118
+ )
119
+ else:
120
+ order = (
121
+ ("scrot", _capture_with_scrot),
122
+ ("gnome-screenshot", _capture_with_gnome_screenshot),
123
+ ("grim", _capture_with_grim),
124
+ )
125
+ backends.extend(order)
126
+ return backends
127
+
128
+
129
+ def _run_command(cmd: list[str], path: Path, *, timeout: int = 20) -> bool:
130
+ path.parent.mkdir(parents=True, exist_ok=True)
131
+ proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, check=False)
132
+ return proc.returncode == 0 and path.is_file() and path.stat().st_size > 0
133
+
134
+
135
+ def _capture_with_grim(path: Path) -> bool:
136
+ if not shutil.which("grim"):
137
+ return False
138
+ return _run_command(["grim", str(path)], path)
139
+
140
+
141
+ def _capture_with_gnome_screenshot(path: Path) -> bool:
142
+ if not shutil.which("gnome-screenshot"):
143
+ return False
144
+ return _run_command(["gnome-screenshot", "-f", str(path)], path, timeout=25)
145
+
146
+
147
+ def _capture_with_scrot(path: Path) -> bool:
148
+ if not shutil.which("scrot"):
149
+ return False
150
+ return _run_command(["scrot", str(path)], path)
151
+
152
+
153
+ def _capture_with_portal(path: Path, *, interactive: bool) -> bool:
154
+ """xdg-desktop-portal screenshot via vql helper script when available."""
155
+ try:
156
+ from vql.adopt.portal_capture import capture_via_portal
157
+ except ImportError:
158
+ return False
159
+
160
+ result = capture_via_portal(str(path), interactive=interactive)
161
+ return bool(result.get("ok")) and path.is_file()
162
+
163
+
164
+ def _capture_with_mss(path: Path, *, monitor: int) -> bool:
165
+ import mss
166
+ from PIL import Image
167
+
168
+ with mss.mss() as grabber:
169
+ monitors = grabber.monitors
170
+ index = min(max(monitor, 1), len(monitors) - 1)
171
+ shot = grabber.grab(monitors[index])
172
+ image = Image.frombytes("RGB", shot.size, shot.bgra, "raw", "BGRX")
173
+ image.save(path)
174
+ return path.is_file()
175
+
176
+
177
+ def _is_blank_image(path: Path) -> bool:
178
+ try:
179
+ from imgl.diagnose import diagnose_content, worth_analyzing
180
+
181
+ diag = diagnose_content(path)
182
+ return bool(diag.get("ok")) and not worth_analyzing(diag)
183
+ except Exception:
184
+ from PIL import Image
185
+
186
+ image = Image.open(path).convert("RGB")
187
+ small = image.resize((32, 32))
188
+ pixels = list(small.get_flattened_data())
189
+ if not pixels:
190
+ return True
191
+ if len(set(pixels)) <= 1:
192
+ return True
193
+ brightness = [int(0.299 * r + 0.587 * g + 0.114 * b) for r, g, b in pixels]
194
+ return max(brightness) < 8
195
+
196
+
197
+ def capture_status_message(path: Path) -> str | None:
198
+ """Return warning text when a capture looks blank, else None."""
199
+ if _is_blank_image(path):
200
+ return (
201
+ "Capture looks empty or low-content. "
202
+ "Use an existing screenshot, e.g. imgl vql /tmp/screen.png -o layout.vql.json"
203
+ )
204
+ return None