PyPI - pyautoscene - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

pyautoscene 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

pyautoscene/__init__.py +2 -1
pyautoscene/_types.py +6 -0
pyautoscene/constants.py +6 -0
pyautoscene/ocr.py +73 -70
pyautoscene/ocr_config.yaml +112 -112
pyautoscene/references.py +100 -78
pyautoscene/region.py +70 -0
pyautoscene/scene.py +61 -61
pyautoscene/session.py +163 -140
pyautoscene/utils.py +148 -25
{pyautoscene-0.2.0.dist-info → pyautoscene-0.2.2.dist-info}/METADATA +5 -1
pyautoscene-0.2.2.dist-info/RECORD +15 -0
{pyautoscene-0.2.0.dist-info → pyautoscene-0.2.2.dist-info}/licenses/LICENSE +201 -201
pyautoscene/screen.py +0 -79
pyautoscene-0.2.0.dist-info/RECORD +0 -13
{pyautoscene-0.2.0.dist-info → pyautoscene-0.2.2.dist-info}/WHEEL +0 -0
{pyautoscene-0.2.0.dist-info → pyautoscene-0.2.2.dist-info}/entry_points.txt +0 -0

pyautoscene/__init__.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from .references import ImageElement, TextElement
+from .region import Region, RegionSpec
 from .scene import Scene
 from .session import Session
-__all__ = ["Scene", "Session", "ImageElement", "TextElement"]
+__all__ = ["Scene", "Session", "ImageElement", "TextElement", "RegionSpec", "Region"]

pyautoscene/_types.py ADDED Viewed

@@ -0,0 +1,6 @@
+from __future__ import annotations
+from typing import Literal
+type MouseButton = Literal["left", "right"]
+type TowardsDirection = Literal["top", "left", "bottom", "right", None]

pyautoscene/constants.py ADDED Viewed

@@ -0,0 +1,6 @@
+import os
+LOCATE_AND_CLICK_DELAY = float(os.getenv("PYAUTOSCENE_LOCATE_AND_CLICK_DELAY", 0.3))
+# pixels per second, used for calculating move duration
+POINTER_SPEED = int(os.getenv("PYAUTOSCENE_POINTER_SPEED", 1000))

pyautoscene/ocr.py CHANGED Viewed

@@ -1,70 +1,73 @@
-import logging
-from hashlib import sha256
-from pathlib import Path
-import numpy as np
-from PIL import Image
-from .screen import Region
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-try:
-    from rapidocr import RapidOCR
-    from rapidocr.utils.output import RapidOCROutput
-except ImportError:
-    raise ImportError(
-        "RapidOCR is not installed. Please install it using 'pip install pyautoscene[ocr]'."
-    )
-ocr_config_path = Path(__file__).parent / "ocr_config.yaml"
-def hash_image(img: Image.Image) -> str:
-    return sha256(img.tobytes()).hexdigest()
-def convert_points_to_ltwh(points: np.ndarray) -> Region:
-    if points.shape[0] == 0:
-        raise ValueError("Points array is empty")
-    x_min = np.min(points[:, 0])
-    y_min = np.min(points[:, 1])
-    x_max = np.max(points[:, 0])
-    y_max = np.max(points[:, 1])
-    return Region(left=x_min, top=y_min, width=x_max - x_min, height=y_max - y_min)
-class OCR:
-    engine: RapidOCR | None = None
-    img_cache: dict[str, tuple[tuple[str, Region], ...]] = {}
-    def __new__(cls):
-        if cls.engine is None:
-            cls.engine = RapidOCR(config_path=ocr_config_path.as_posix())
-        return super().__new__(cls)
-    def recognize_text(self, img: Image.Image) -> tuple[tuple[str, Region], ...]:
-        img_gray = img.convert("L")
-        img_hash = hash_image(img_gray)
-        if img_hash in self.img_cache:
-            logger.debug(f"Using cached result for image hash: {img_hash}")
-            return self.img_cache[img_hash]
-        assert self.engine is not None, "Engine should be initialized in __new__"
-        result = self.engine(np.array(img_gray))
-        assert isinstance(result, RapidOCROutput), (
-            "Result should be of type RapidOCROutput"
-        )
-        assert result.txts is not None and result.boxes is not None, (
-            "Text recognition failed, txts and boxes should not be None"
-        )
-        detections = tuple(
-            (txt, convert_points_to_ltwh(box))
-            for txt, box in zip(result.txts, result.boxes)
-        )
-        self.img_cache[img_hash] = detections
-        return detections
+import logging
+import os
+from hashlib import sha256
+from pathlib import Path
+import numpy as np
+from PIL import Image
+from .region import Region
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+try:
+    from rapidocr import RapidOCR
+    from rapidocr.utils.output import RapidOCROutput
+except ImportError:
+    raise ImportError(
+        "RapidOCR is not installed. Please install it using 'pip install pyautoscene[ocr]'."
+    )
+default_ocr_config_path = Path(__file__).parent / "ocr_config.yaml"
+ocr_config_path = Path(os.getenv("PYAUTOSCENE_OCR_CONFIG", default_ocr_config_path))
+logger.info(f"OCR config path: {ocr_config_path}")
+def hash_image(img: Image.Image) -> str:
+    return sha256(img.tobytes()).hexdigest()
+def convert_points_to_ltwh(points: np.ndarray) -> Region:
+    if points.shape[0] == 0:
+        raise ValueError("Points array is empty")
+    x_min = np.min(points[:, 0])
+    y_min = np.min(points[:, 1])
+    x_max = np.max(points[:, 0])
+    y_max = np.max(points[:, 1])
+    return Region(left=x_min, top=y_min, width=x_max - x_min, height=y_max - y_min)
+class OCR:
+    engine: RapidOCR | None = None
+    img_cache: dict[str, tuple[tuple[str, Region], ...]] = {}
+    def __new__(cls):
+        if cls.engine is None:
+            cls.engine = RapidOCR(config_path=ocr_config_path.as_posix())
+        return super().__new__(cls)
+    def recognize_text(self, img: Image.Image) -> tuple[tuple[str, Region], ...]:
+        img_gray = img.convert("L")
+        img_hash = hash_image(img_gray)
+        if img_hash in self.img_cache:
+            logger.debug(f"Using cached result for image hash: {img_hash}")
+            return self.img_cache[img_hash]
+        assert self.engine is not None, "Engine should be initialized in __new__"
+        result = self.engine(np.array(img_gray))
+        assert isinstance(result, RapidOCROutput), (
+            "Result should be of type RapidOCROutput"
+        )
+        assert result.txts is not None and result.boxes is not None, (
+            "Text recognition failed, txts and boxes should not be None"
+        )
+        detections = tuple(
+            (txt, convert_points_to_ltwh(box))
+            for txt, box in zip(result.txts, result.boxes)
+        )
+        self.img_cache[img_hash] = detections
+        return detections

pyautoscene/ocr_config.yaml CHANGED Viewed

@@ -1,112 +1,112 @@
-Global:
-    text_score: 0.5
-    use_det: true
-    use_cls: false
-    use_angle_cls: false
-    use_rec: true
-    min_height: 30
-    width_height_ratio: 8
-    max_side_len: 2000
-    min_side_len: 30
-    return_word_box: false
-    return_single_char_box: false
-    font_path: null
-EngineConfig:
-    onnxruntime:
-        intra_op_num_threads: -1
-        inter_op_num_threads: -1
-        enable_cpu_mem_arena: false
-        cpu_ep_cfg:
-            arena_extend_strategy: "kSameAsRequested"
-        use_cuda: false
-        cuda_ep_cfg:
-            device_id: 0
-            arena_extend_strategy: "kNextPowerOfTwo"
-            cudnn_conv_algo_search: "EXHAUSTIVE"
-            do_copy_in_default_stream: true
-        use_dml: false
-        dm_ep_cfg: null
-        use_cann: false
-        cann_ep_cfg:
-            device_id: 0
-            arena_extend_strategy: "kNextPowerOfTwo"
-            npu_mem_limit:  21474836480 # 20 * 1024 * 1024 * 1024
-            op_select_impl_mode: "high_performance"
-            optypelist_for_implmode: "Gelu"
-            enable_cann_graph: true
-    openvino:
-        inference_num_threads: -1
-    paddle:
-        cpu_math_library_num_threads: -1
-        use_cuda: false
-        gpu_id: 0
-        gpu_mem: 500
-    torch:
-        use_cuda: false
-        gpu_id: 0
-Det:
-    engine_type: "onnxruntime"
-    lang_type: "en"
-    model_type: "mobile"
-    ocr_version: "PP-OCRv4"
-    task_type: "det"
-    model_path: null
-    model_dir: null
-    limit_side_len: 736
-    limit_type: min
-    std: [ 0.5, 0.5, 0.5 ]
-    mean: [ 0.5, 0.5, 0.5 ]
-    thresh: 0.3
-    box_thresh: 0.5
-    max_candidates: 1000
-    unclip_ratio: 1.6
-    use_dilation: true
-    score_mode: fast
-Cls:
-    engine_type: "onnxruntime"
-    lang_type: "ch"
-    model_type: "mobile"
-    ocr_version: "PP-OCRv4"
-    task_type: "cls"
-    model_path: null
-    model_dir: null
-    cls_image_shape: [3, 48, 192]
-    cls_batch_num: 6
-    cls_thresh: 0.9
-    label_list: ["0", "180"]
-Rec:
-    engine_type: "onnxruntime"
-    lang_type: "en"
-    model_type: "mobile"
-    ocr_version: "PP-OCRv4"
-    task_type: "rec"
-    model_path: null
-    model_dir: null
-    rec_keys_path: null
-    rec_img_shape: [3, 48, 320]
-    rec_batch_num: 6
+Global:
+    text_score: 0.5
+    use_det: true
+    use_cls: false
+    use_angle_cls: false
+    use_rec: true
+    min_height: 30
+    width_height_ratio: 8
+    max_side_len: 2000
+    min_side_len: 30
+    return_word_box: false
+    return_single_char_box: false
+    font_path: null
+EngineConfig:
+    onnxruntime:
+        intra_op_num_threads: -1
+        inter_op_num_threads: -1
+        enable_cpu_mem_arena: false
+        cpu_ep_cfg:
+            arena_extend_strategy: "kSameAsRequested"
+        use_cuda: false
+        cuda_ep_cfg:
+            device_id: 0
+            arena_extend_strategy: "kNextPowerOfTwo"
+            cudnn_conv_algo_search: "EXHAUSTIVE"
+            do_copy_in_default_stream: true
+        use_dml: false
+        dm_ep_cfg: null
+        use_cann: false
+        cann_ep_cfg:
+            device_id: 0
+            arena_extend_strategy: "kNextPowerOfTwo"
+            npu_mem_limit:  21474836480 # 20 * 1024 * 1024 * 1024
+            op_select_impl_mode: "high_performance"
+            optypelist_for_implmode: "Gelu"
+            enable_cann_graph: true
+    openvino:
+        inference_num_threads: -1
+    paddle:
+        cpu_math_library_num_threads: -1
+        use_cuda: false
+        gpu_id: 0
+        gpu_mem: 500
+    torch:
+        use_cuda: false
+        gpu_id: 0
+Det:
+    engine_type: "onnxruntime"
+    lang_type: "en"
+    model_type: "mobile"
+    ocr_version: "PP-OCRv4"
+    task_type: "det"
+    model_path: null
+    model_dir: null
+    limit_side_len: 736
+    limit_type: min
+    std: [ 0.5, 0.5, 0.5 ]
+    mean: [ 0.5, 0.5, 0.5 ]
+    thresh: 0.3
+    box_thresh: 0.5
+    max_candidates: 1000
+    unclip_ratio: 1.6
+    use_dilation: true
+    score_mode: fast
+Cls:
+    engine_type: "onnxruntime"
+    lang_type: "ch"
+    model_type: "mobile"
+    ocr_version: "PP-OCRv4"
+    task_type: "cls"
+    model_path: null
+    model_dir: null
+    cls_image_shape: [3, 48, 192]
+    cls_batch_num: 6
+    cls_thresh: 0.9
+    label_list: ["0", "180"]
+Rec:
+    engine_type: "onnxruntime"
+    lang_type: "en"
+    model_type: "mobile"
+    ocr_version: "PP-OCRv4"
+    task_type: "rec"
+    model_path: null
+    model_dir: null
+    rec_keys_path: null
+    rec_img_shape: [3, 48, 320]
+    rec_batch_num: 6

pyautoscene/references.py CHANGED Viewed

@@ -1,78 +1,100 @@
-from abc import ABC, abstractmethod
-from typing import override
-import pyautogui as gui
-from .screen import RegionSpec, generate_region_from_spec, locate_on_screen
-class ReferenceElement(ABC):
-    """Base class for reference elements used to identify scenes."""
-    @abstractmethod
-    def is_visible(self, region: RegionSpec | None = None) -> RegionSpec | None:
-        """Detect the presence of the reference element."""
-        raise NotImplementedError("Subclasses must implement this method")
-class ImageElement(ReferenceElement):
-    """Reference element that identifies a scene by an image."""
-    def __init__(
-        self,
-        path: str | list[str],
-        confidence: float = 0.999,
-        region: RegionSpec | None = None,
-    ):
-        self.path = path
-        self.confidence = confidence
-        self.region = region
-    @override
-    def is_visible(self, region: RegionSpec | None = None):
-        """Method to detect the presence of the image in the current screen."""
-        if isinstance(self.path, str):
-            path = [self.path]  # Ensure path is a list for consistency
-        else:
-            path = self.path
-        for image_path in path:
-            try:
-                location = locate_on_screen(
-                    image_path, region=region or self.region, confidence=self.confidence
-                )
-                return location
-            except gui.ImageNotFoundException:
-                continue
-class TextElement(ReferenceElement):
-    """Reference element that identifies a scene by text."""
-    def __init__(
-        self,
-        text: str,
-        region: RegionSpec | None = None,
-        case_sensitive: bool = False,
-    ):
-        self.text = text
-        self.region = region
-        self.case_sensitive = case_sensitive
-        if not case_sensitive:
-            self.text = self.text.lower()
-    def is_visible(self, region: RegionSpec | None = None):
-        """Method to detect the presence of the text in the current screen."""
-        from .ocr import OCR
-        ocr = OCR()
-        region = region or self.region
-        for text, detected_region in ocr.recognize_text(
-            gui.screenshot(
-                region=generate_region_from_spec(region).to_box() if region else None
-            )
-        ):
-            if not self.case_sensitive:
-                text = text.lower()
-            if text.strip() == self.text.strip():
-                return detected_region
-        return None
+from abc import ABC, abstractmethod
+from typing import Callable, override
+import pyautogui as gui
+from PIL import Image
+from ._types import MouseButton, TowardsDirection
+from .region import Region, RegionSpec
+from .utils import locate_on_screen, move_and_click
+class ReferenceElement(ABC):
+    """Base class for reference elements used to identify scenes."""
+    @abstractmethod
+    def locate(self, region: RegionSpec | None = None) -> Region | None:
+        """Detect the presence of the reference element."""
+        raise NotImplementedError("Subclasses must implement this method")
+    def locate_and_click(
+        self,
+        offset: tuple[int, int] = (0, 0),
+        region: RegionSpec | None = None,
+        clicks: int = 1,
+        button: MouseButton = "left",
+        towards: TowardsDirection = None,
+    ):
+        """Locate the reference element and click on it."""
+        region = self.locate(region=region)
+        assert region is not None, f"Element {self} not found on screen"
+        move_and_click(
+            target_region=region,
+            clicks=clicks,
+            button=button,
+            offset=offset,
+            towards=towards,
+        )
+class ImageElement(ReferenceElement):
+    """Reference element that identifies a scene by an image."""
+    def __init__(
+        self,
+        path: str | list[str],
+        confidence: float = 0.999,
+        region: RegionSpec | None = None,
+        locator: Callable[[Image.Image, Image.Image], list[Region]] | None = None,
+    ):
+        self.path = path
+        self.confidence = confidence
+        self.region = region
+        self.locator = locator
+    @override
+    def locate(self, region: RegionSpec | None = None) -> Region | None:
+        """Method to detect the presence of the image in the current screen."""
+        if isinstance(self.path, str):
+            path = [self.path]  # Ensure path is a list for consistency
+        else:
+            path = self.path
+        for image_path in path:
+            try:
+                location = locate_on_screen(
+                    image_path,
+                    region=region if region else self.region,
+                    confidence=self.confidence,
+                    locator=self.locator,
+                )
+                return location
+            except gui.ImageNotFoundException:
+                continue
+class TextElement(ReferenceElement):
+    """Reference element that identifies a scene by text."""
+    def __init__(
+        self, text: str, region: RegionSpec | None = None, case_sensitive: bool = False
+    ):
+        self.text = text
+        self.region = region
+        self.case_sensitive = case_sensitive
+        if not case_sensitive:
+            self.text = self.text.lower()
+    def locate(self, region: RegionSpec | None = None):
+        """Method to detect the presence of the text in the current screen."""
+        from .ocr import OCR
+        ocr = OCR()
+        region = region or self.region
+        for text, detected_region in ocr.recognize_text(
+            gui.screenshot(region=Region.from_spec(region).to_box() if region else None)
+        ):
+            if not self.case_sensitive:
+                text = text.lower()
+            if text.strip() == self.text.strip():
+                return detected_region.resolve(base=region)
+        return None

pyautoscene/region.py ADDED Viewed

@@ -0,0 +1,70 @@
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+import numpy as np
+import pyautogui as gui
+from pyscreeze import Box
+type RegionSpec = Region | str
+axis_pattern = re.compile(r"(?P<d>[xy]):\(?(?P<i>\d+)(?:-(?P<j>\d+))?\)?/(?P<n>\d+)")
+@dataclass(frozen=True, slots=True)
+class Region:
+    left: int
+    top: int
+    width: int
+    height: int
+    def to_box(self) -> Box:
+        """Convert to a pyscreeze Box."""
+        return Box(self.left, self.top, self.width, self.height)
+    @classmethod
+    def from_box(cls, box: Box) -> Region:
+        """Create a Region from a pyscreeze Box."""
+        return cls(left=box.left, top=box.top, width=box.width, height=box.height)
+    @property
+    def center(self) -> tuple[int, int]:
+        """Get the center coordinates of the region."""
+        return (self.left + self.width // 2, self.top + self.height // 2)
+    @classmethod
+    def from_spec(
+        cls, spec: RegionSpec, shape: tuple[int, int] | None = None
+    ) -> Region:
+        if isinstance(spec, Region):
+            return spec
+        if shape is None:
+            img = np.array(gui.screenshot())
+            shape = (img.shape[0]), (img.shape[1])
+        default_region = {"left": 0, "top": 0, "width": shape[1], "height": shape[0]}
+        axis_mapping = {"x": ("left", "width", 1), "y": ("top", "height", 0)}
+        for axis, i, j, n in axis_pattern.findall(spec):
+            alignment, size_attr, dim_index = axis_mapping[axis]
+            size = shape[dim_index] // int(n)
+            i, j = int(i), int(j) if j else int(i)
+            default_region.update({
+                alignment: (i - 1) * size,
+                size_attr: (j - i + 1) * size,
+            })
+        return cls(**default_region)
+    def resolve(self, base: RegionSpec | None) -> Region:
+        if base is None:
+            return self
+        if isinstance(base, str):
+            base = Region.from_spec(base)
+        return Region(
+            left=self.left + base.left,
+            top=self.top + base.top,
+            width=self.width,
+            height=self.height,
+        )

pyautoscene 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

pyautoscene 0.2.0py3-none-any.whl → 0.2.2py3-none-any.whl