PyPI - imgl - Versions diffs - 0.7.1__py3-none-any.whl - Mend

imgl 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

imgl/__init__.py +51 -0
imgl/__main__.py +6 -0
imgl/actions.py +259 -0
imgl/capture.py +204 -0
imgl/catalog.py +334 -0
imgl/catalog_filter.py +138 -0
imgl/classify/__init__.py +5 -0
imgl/classify/gui_heuristics.py +261 -0
imgl/cli.py +603 -0
imgl/config.py +23 -0
imgl/coords.py +70 -0
imgl/detect/__init__.py +11 -0
imgl/detect/img2vql_bridge.py +64 -0
imgl/detect/local.py +278 -0
imgl/detect/rectangles.py +96 -0
imgl/diagnose.py +247 -0
imgl/execute.py +103 -0
imgl/export/__init__.py +30 -0
imgl/export/_escape.py +19 -0
imgl/export/annotate_export.py +299 -0
imgl/export/html_export.py +149 -0
imgl/export/json_export.py +22 -0
imgl/export/svg_export.py +137 -0
imgl/export/vql_adapter.py +244 -0
imgl/geometry.py +37 -0
imgl/interact.py +511 -0
imgl/layout.py +108 -0
imgl/llm_catalog.py +422 -0
imgl/nlp2uri.py +248 -0
imgl/ocr/__init__.py +12 -0
imgl/ocr/base.py +14 -0
imgl/ocr/lang.py +32 -0
imgl/ocr/tesseract.py +94 -0
imgl/paths.py +41 -0
imgl/pipeline.py +116 -0
imgl/preprocess.py +63 -0
imgl/py.typed +0 -0
imgl/scene_cache.py +63 -0
imgl/types.py +170 -0
imgl/uri.py +118 -0
imgl/window_scope.py +408 -0
imgl-0.7.1.dist-info/METADATA +264 -0
imgl-0.7.1.dist-info/RECORD +47 -0
imgl-0.7.1.dist-info/WHEEL +5 -0
imgl-0.7.1.dist-info/entry_points.txt +2 -0
imgl-0.7.1.dist-info/licenses/LICENSE +201 -0
imgl-0.7.1.dist-info/top_level.txt +1 -0

imgl/__init__.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""
+imgl - Image to Layout
+Convert screenshots into semantic UI models (JSON/HTML/SVG) with OCR text
+and element bounding boxes.
+"""
+__version__ = "0.7.1"
+__author__ = "Tom Sapletta"
+__email__ = "tom@sapletta.com"
+from imgl.actions import ActionTarget, ElementNotFoundError, SceneActions, TypeAction, actions
+from imgl.config import ImglConfig
+from imgl.diagnose import BlankImageError, diagnose_content, worth_analyzing
+from imgl.export import (
+    scene_from_json,
+    scene_to_html,
+    scene_to_json,
+    scene_to_svg,
+    scene_to_vql,
+    scene_to_vql_json,
+    write_vql_program,
+)
+from imgl.pipeline import analyze
+from imgl.types import BBox, Element, OcrBox, Scene, Window
+__all__ = [
+    "__version__",
+    "analyze",
+    "scene_to_json",
+    "scene_from_json",
+    "scene_to_html",
+    "scene_to_svg",
+    "scene_to_vql",
+    "scene_to_vql_json",
+    "write_vql_program",
+    "actions",
+    "SceneActions",
+    "ActionTarget",
+    "TypeAction",
+    "ElementNotFoundError",
+    "BlankImageError",
+    "diagnose_content",
+    "worth_analyzing",
+    "ImglConfig",
+    "BBox",
+    "OcrBox",
+    "Element",
+    "Window",
+    "Scene",
+]

imgl/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Main entry point: python -m imgl"""
+from imgl.cli import main
+if __name__ == "__main__":
+    raise SystemExit(main())

imgl/actions.py ADDED Viewed

@@ -0,0 +1,259 @@
+"""Text-based UI actions on analyzed scenes."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any, Iterable
+from imgl.geometry import bbox_distance
+from imgl.types import Element, Scene, Window
+@dataclass
+class ActionTarget:
+    """A resolved UI element that can be clicked or typed into."""
+    element: Element
+    window: Window | None = None
+    def center(self) -> tuple[int, int]:
+        bbox = self.element.bbox
+        return (bbox.x + bbox.w // 2, bbox.y + bbox.h // 2)
+    def click_coords(self) -> tuple[int, int]:
+        """Return pixel coordinates for a click at element center."""
+        return self.center()
+    def to_click_action(self) -> dict[str, Any]:
+        x, y = self.click_coords()
+        return {
+            "action": "click",
+            "x": x,
+            "y": y,
+            "element_id": self.element.id,
+            "element_type": self.element.type,
+            "text": self.element.text,
+            "window_id": self.window.id if self.window else None,
+            "bbox": self.element.bbox.to_dict(),
+        }
+@dataclass
+class TypeAction:
+    """Type text into an input field."""
+    target: ActionTarget
+    value: str
+    label: str | None = None
+    def coords(self) -> tuple[int, int]:
+        return self.target.click_coords()
+    def to_dict(self) -> dict[str, Any]:
+        x, y = self.coords()
+        return {
+            "action": "type",
+            "x": x,
+            "y": y,
+            "text": self.value,
+            "element_id": self.target.element.id,
+            "element_type": self.target.element.type,
+            "label": self.label,
+            "window_id": self.target.window.id if self.target.window else None,
+            "bbox": self.target.element.bbox.to_dict(),
+        }
+@dataclass
+class SceneActions:
+    """Find and interact with elements in a Scene."""
+    scene: Scene
+    def find(
+        self,
+        element_type: str | None = None,
+        *,
+        text: str | None = None,
+        label: str | None = None,
+        window: str | None = None,
+        contains: bool = True,
+    ) -> list[ActionTarget]:
+        """Find elements matching type, text, label, or window."""
+        targets: list[ActionTarget] = []
+        for win, element in _iter_elements(self.scene, window=window):
+            if element_type and element.type != element_type:
+                continue
+            if text is not None and not _text_matches(element.text, text, contains=contains):
+                continue
+            if label is not None:
+                if element.type == "input":
+                    input_label = element.metadata.get("label") or ""
+                    if not _text_matches(str(input_label), label, contains=contains):
+                        continue
+                elif element.type == "label":
+                    if not _text_matches(element.text, label, contains=contains):
+                        continue
+                else:
+                    continue
+            targets.append(ActionTarget(element=element, window=win))
+        if label is not None and element_type in {None, "input"}:
+            for win, element in _iter_elements(self.scene, window=window):
+                if element.type != "input":
+                    continue
+                if any(target.element.id == element.id for target in targets):
+                    continue
+                matched_label = _find_label_for_input(self.scene, element, win)
+                if matched_label and _text_matches(matched_label.text, label, contains=contains):
+                    targets.append(ActionTarget(element=element, window=win))
+        return targets
+    def find_one(
+        self,
+        element_type: str | None = None,
+        *,
+        text: str | None = None,
+        label: str | None = None,
+        window: str | None = None,
+        contains: bool = True,
+    ) -> ActionTarget | None:
+        matches = self.find(
+            element_type,
+            text=text,
+            label=label,
+            window=window,
+            contains=contains,
+        )
+        return matches[0] if matches else None
+    def click(
+        self,
+        element_type: str | None = None,
+        *,
+        text: str | None = None,
+        label: str | None = None,
+        window: str | None = None,
+    ) -> dict[str, Any]:
+        """Resolve a click action for the first matching element."""
+        target = self.find_one(element_type, text=text, label=label, window=window)
+        if target is None:
+            raise ElementNotFoundError(
+                _format_query(element_type, text=text, label=label, window=window)
+            )
+        return target.to_click_action()
+    def type_into(
+        self,
+        value: str,
+        *,
+        label: str | None = None,
+        text: str | None = None,
+        window: str | None = None,
+    ) -> dict[str, Any]:
+        """Resolve a type action for an input field."""
+        target = self.find_one("input", label=label, text=text, window=window)
+        if target is None and text is not None:
+            target = self.find_one("input", text=text, window=window)
+        if target is None:
+            raise ElementNotFoundError(
+                _format_query("input", text=text, label=label, window=window)
+            )
+        resolved_label = label or target.element.metadata.get("label")
+        return TypeAction(target=target, value=value, label=resolved_label).to_dict()
+    def list_actions(self) -> list[dict[str, Any]]:
+        """List available click/type actions for interactive elements."""
+        actions: list[dict[str, Any]] = []
+        for _, element in _iter_elements(self.scene):
+            if element.type in {"button", "icon_button"}:
+                actions.append(ActionTarget(element=element).to_click_action())
+            elif element.type == "input":
+                actions.append(
+                    TypeAction(
+                        target=ActionTarget(element=element),
+                        value=element.text or "",
+                        label=element.metadata.get("label"),
+                    ).to_dict()
+                )
+        return actions
+class ElementNotFoundError(LookupError):
+    """Raised when no element matches the query."""
+def actions(scene: Scene) -> SceneActions:
+    """Create a SceneActions helper for a scene."""
+    return SceneActions(scene)
+def _format_query(
+    element_type: str | None,
+    *,
+    text: str | None,
+    label: str | None,
+    window: str | None,
+) -> str:
+    parts = []
+    if element_type:
+        parts.append(f"type={element_type}")
+    if text:
+        parts.append(f"text={text!r}")
+    if label:
+        parts.append(f"label={label!r}")
+    if window:
+        parts.append(f"window={window!r}")
+    return "element not found: " + ", ".join(parts)
+def _text_matches(value: str | None, query: str, *, contains: bool) -> bool:
+    if value is None:
+        return False
+    left = value.casefold()
+    right = query.casefold()
+    return right in left if contains else left == right
+def _iter_elements(
+    scene: Scene,
+    *,
+    window: str | None = None,
+) -> Iterable[tuple[Window | None, Element]]:
+    for win in scene.windows:
+        if window is not None and not _window_matches(win, window):
+            continue
+        for element in win.elements:
+            yield win, element
+    if window is None:
+        for element in scene.orphan_elements:
+            yield None, element
+def _window_matches(window: Window, query: str) -> bool:
+    query_cf = query.casefold()
+    if window.id.casefold() == query_cf:
+        return True
+    if window.title and query_cf in window.title.casefold():
+        return True
+    return False
+def _find_label_for_input(
+    scene: Scene,
+    input_element: Element,
+    window: Window | None,
+) -> Element | None:
+    candidates: list[Element] = []
+    for win, element in _iter_elements(scene, window=window.id if window else None):
+        if element.type != "label":
+            continue
+        if element.metadata.get("for_input") == input_element.id:
+            return element
+        candidates.append(element)
+    if not candidates:
+        return None
+    return min(candidates, key=lambda label: bbox_distance(label.bbox, input_element.bbox))

imgl/capture.py ADDED Viewed

@@ -0,0 +1,204 @@
+"""Screenshot capture helpers."""
+from __future__ import annotations
+import os
+import shutil
+import subprocess
+from datetime import UTC, datetime
+from pathlib import Path
+class CaptureError(RuntimeError):
+    """Raised when screen capture fails."""
+class BlankCaptureError(CaptureError):
+    """Raised when capture succeeded but image is empty/black."""
+def default_capture_path(out: str | Path | None = None) -> Path:
+    if out:
+        path = Path(out).expanduser()
+        path.parent.mkdir(parents=True, exist_ok=True)
+        return path
+    ts = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
+    path = Path.home() / ".imgl" / "captures" / f"screen_{ts}.png"
+    path.parent.mkdir(parents=True, exist_ok=True)
+    return path
+def _is_wayland() -> bool:
+    session = (os.environ.get("XDG_SESSION_TYPE") or "").lower()
+    return session == "wayland" or bool(os.environ.get("WAYLAND_DISPLAY"))
+def capture_screen(
+    out: str | Path | None = None,
+    *,
+    monitor: int = 1,
+    interactive: bool = False,
+    allow_blank: bool = False,
+) -> Path:
+    """
+    Capture the desktop to a PNG file.
+    Tries vql capture (if installed), then grim/gnome-screenshot/scrot.
+    On Wayland, mss is avoided (usually returns a black frame).
+    """
+    path = default_capture_path(out)
+    errors: list[str] = []
+    if _try_vql_capture(path, monitor=monitor, interactive=interactive, allow_blank=allow_blank):
+        return path
+    for name, runner in _native_backends(interactive=interactive):
+        try:
+            if runner(path):
+                if allow_blank or not _is_blank_image(path):
+                    return path
+                errors.append(f"{name}: captured but image is blank")
+                continue
+            errors.append(f"{name}: command failed")
+        except Exception as exc:
+            errors.append(f"{name}: {exc}")
+    if not _is_wayland():
+        try:
+            if _capture_with_mss(path, monitor=monitor):
+                if allow_blank or not _is_blank_image(path):
+                    return path
+                errors.append("mss: captured but image is blank")
+        except Exception as exc:
+            errors.append(f"mss: {exc}")
+    hint = (
+        "Screen capture failed or produced a blank image (common on GNOME/Wayland). "
+        "Try: imgl capture --interactive  OR use an existing PNG:\n"
+        "  imgl vql /tmp/screen.png -o layout.vql.json\n"
+        "Install vql for portal capture: pip install -e ~/github/oqlos/vql"
+    )
+    raise BlankCaptureError(f"{hint}\nTried: {'; '.join(errors) or 'no backends'}")
+def _try_vql_capture(
+    path: Path,
+    *,
+    monitor: int,
+    interactive: bool,
+    allow_blank: bool,
+) -> bool:
+    try:
+        from vql.adopt.window import capture_screen as vql_capture
+    except ImportError:
+        return False
+    try:
+        info = vql_capture(path, monitor=monitor, interactive=interactive)
+        captured = Path(info.path)
+        if allow_blank or not _is_blank_image(captured):
+            return True
+    except Exception:
+        pass
+    return False
+def _native_backends(*, interactive: bool) -> list[tuple[str, callable]]:
+    backends: list[tuple[str, callable]] = []
+    if interactive:
+        portal = _capture_with_portal
+        backends.append(("portal-interactive", lambda p: portal(p, interactive=True)))
+    if _is_wayland():
+        order = (
+            ("gnome-screenshot", _capture_with_gnome_screenshot),
+            ("scrot", _capture_with_scrot),
+            ("grim", _capture_with_grim),
+        )
+    else:
+        order = (
+            ("scrot", _capture_with_scrot),
+            ("gnome-screenshot", _capture_with_gnome_screenshot),
+            ("grim", _capture_with_grim),
+        )
+    backends.extend(order)
+    return backends
+def _run_command(cmd: list[str], path: Path, *, timeout: int = 20) -> bool:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout, check=False)
+    return proc.returncode == 0 and path.is_file() and path.stat().st_size > 0
+def _capture_with_grim(path: Path) -> bool:
+    if not shutil.which("grim"):
+        return False
+    return _run_command(["grim", str(path)], path)
+def _capture_with_gnome_screenshot(path: Path) -> bool:
+    if not shutil.which("gnome-screenshot"):
+        return False
+    return _run_command(["gnome-screenshot", "-f", str(path)], path, timeout=25)
+def _capture_with_scrot(path: Path) -> bool:
+    if not shutil.which("scrot"):
+        return False
+    return _run_command(["scrot", str(path)], path)
+def _capture_with_portal(path: Path, *, interactive: bool) -> bool:
+    """xdg-desktop-portal screenshot via vql helper script when available."""
+    try:
+        from vql.adopt.portal_capture import capture_via_portal
+    except ImportError:
+        return False
+    result = capture_via_portal(str(path), interactive=interactive)
+    return bool(result.get("ok")) and path.is_file()
+def _capture_with_mss(path: Path, *, monitor: int) -> bool:
+    import mss
+    from PIL import Image
+    with mss.mss() as grabber:
+        monitors = grabber.monitors
+        index = min(max(monitor, 1), len(monitors) - 1)
+        shot = grabber.grab(monitors[index])
+        image = Image.frombytes("RGB", shot.size, shot.bgra, "raw", "BGRX")
+        image.save(path)
+    return path.is_file()
+def _is_blank_image(path: Path) -> bool:
+    try:
+        from imgl.diagnose import diagnose_content, worth_analyzing
+        diag = diagnose_content(path)
+        return bool(diag.get("ok")) and not worth_analyzing(diag)
+    except Exception:
+        from PIL import Image
+        image = Image.open(path).convert("RGB")
+        small = image.resize((32, 32))
+        pixels = list(small.get_flattened_data())
+        if not pixels:
+            return True
+        if len(set(pixels)) <= 1:
+            return True
+        brightness = [int(0.299 * r + 0.587 * g + 0.114 * b) for r, g, b in pixels]
+        return max(brightness) < 8
+def capture_status_message(path: Path) -> str | None:
+    """Return warning text when a capture looks blank, else None."""
+    if _is_blank_image(path):
+        return (
+            "Capture looks empty or low-content. "
+            "Use an existing screenshot, e.g. imgl vql /tmp/screen.png -o layout.vql.json"
+        )
+    return None