PyPI - pyautoscene - Versions diffs - 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl - Mend

pyautoscene 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

pyautoscene/ocr.py +70 -70
pyautoscene/ocr_config.yaml +112 -112
pyautoscene/references.py +78 -78
pyautoscene/scene.py +61 -61
pyautoscene/screen.py +79 -79
pyautoscene/session.py +140 -140
pyautoscene/utils.py +25 -25
{pyautoscene-0.2.0.dist-info → pyautoscene-0.2.1.dist-info}/METADATA +5 -1
pyautoscene-0.2.1.dist-info/RECORD +13 -0
{pyautoscene-0.2.0.dist-info → pyautoscene-0.2.1.dist-info}/licenses/LICENSE +201 -201
pyautoscene-0.2.0.dist-info/RECORD +0 -13
{pyautoscene-0.2.0.dist-info → pyautoscene-0.2.1.dist-info}/WHEEL +0 -0
{pyautoscene-0.2.0.dist-info → pyautoscene-0.2.1.dist-info}/entry_points.txt +0 -0

pyautoscene/ocr.py CHANGED Viewed

@@ -1,70 +1,70 @@
-import logging
-from hashlib import sha256
-from pathlib import Path
-import numpy as np
-from PIL import Image
-from .screen import Region
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-try:
-    from rapidocr import RapidOCR
-    from rapidocr.utils.output import RapidOCROutput
-except ImportError:
-    raise ImportError(
-        "RapidOCR is not installed. Please install it using 'pip install pyautoscene[ocr]'."
-    )
-ocr_config_path = Path(__file__).parent / "ocr_config.yaml"
-def hash_image(img: Image.Image) -> str:
-    return sha256(img.tobytes()).hexdigest()
-def convert_points_to_ltwh(points: np.ndarray) -> Region:
-    if points.shape[0] == 0:
-        raise ValueError("Points array is empty")
-    x_min = np.min(points[:, 0])
-    y_min = np.min(points[:, 1])
-    x_max = np.max(points[:, 0])
-    y_max = np.max(points[:, 1])
-    return Region(left=x_min, top=y_min, width=x_max - x_min, height=y_max - y_min)
-class OCR:
-    engine: RapidOCR | None = None
-    img_cache: dict[str, tuple[tuple[str, Region], ...]] = {}
-    def __new__(cls):
-        if cls.engine is None:
-            cls.engine = RapidOCR(config_path=ocr_config_path.as_posix())
-        return super().__new__(cls)
-    def recognize_text(self, img: Image.Image) -> tuple[tuple[str, Region], ...]:
-        img_gray = img.convert("L")
-        img_hash = hash_image(img_gray)
-        if img_hash in self.img_cache:
-            logger.debug(f"Using cached result for image hash: {img_hash}")
-            return self.img_cache[img_hash]
-        assert self.engine is not None, "Engine should be initialized in __new__"
-        result = self.engine(np.array(img_gray))
-        assert isinstance(result, RapidOCROutput), (
-            "Result should be of type RapidOCROutput"
-        )
-        assert result.txts is not None and result.boxes is not None, (
-            "Text recognition failed, txts and boxes should not be None"
-        )
-        detections = tuple(
-            (txt, convert_points_to_ltwh(box))
-            for txt, box in zip(result.txts, result.boxes)
-        )
-        self.img_cache[img_hash] = detections
-        return detections
+import logging
+from hashlib import sha256
+from pathlib import Path
+import numpy as np
+from PIL import Image
+from .screen import Region
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+try:
+    from rapidocr import RapidOCR
+    from rapidocr.utils.output import RapidOCROutput
+except ImportError:
+    raise ImportError(
+        "RapidOCR is not installed. Please install it using 'pip install pyautoscene[ocr]'."
+    )
+ocr_config_path = Path(__file__).parent / "ocr_config.yaml"
+def hash_image(img: Image.Image) -> str:
+    return sha256(img.tobytes()).hexdigest()
+def convert_points_to_ltwh(points: np.ndarray) -> Region:
+    if points.shape[0] == 0:
+        raise ValueError("Points array is empty")
+    x_min = np.min(points[:, 0])
+    y_min = np.min(points[:, 1])
+    x_max = np.max(points[:, 0])
+    y_max = np.max(points[:, 1])
+    return Region(left=x_min, top=y_min, width=x_max - x_min, height=y_max - y_min)
+class OCR:
+    engine: RapidOCR | None = None
+    img_cache: dict[str, tuple[tuple[str, Region], ...]] = {}
+    def __new__(cls):
+        if cls.engine is None:
+            cls.engine = RapidOCR(config_path=ocr_config_path.as_posix())
+        return super().__new__(cls)
+    def recognize_text(self, img: Image.Image) -> tuple[tuple[str, Region], ...]:
+        img_gray = img.convert("L")
+        img_hash = hash_image(img_gray)
+        if img_hash in self.img_cache:
+            logger.debug(f"Using cached result for image hash: {img_hash}")
+            return self.img_cache[img_hash]
+        assert self.engine is not None, "Engine should be initialized in __new__"
+        result = self.engine(np.array(img_gray))
+        assert isinstance(result, RapidOCROutput), (
+            "Result should be of type RapidOCROutput"
+        )
+        assert result.txts is not None and result.boxes is not None, (
+            "Text recognition failed, txts and boxes should not be None"
+        )
+        detections = tuple(
+            (txt, convert_points_to_ltwh(box))
+            for txt, box in zip(result.txts, result.boxes)
+        )
+        self.img_cache[img_hash] = detections
+        return detections

pyautoscene/ocr_config.yaml CHANGED Viewed

@@ -1,112 +1,112 @@
-Global:
-    text_score: 0.5
-    use_det: true
-    use_cls: false
-    use_angle_cls: false
-    use_rec: true
-    min_height: 30
-    width_height_ratio: 8
-    max_side_len: 2000
-    min_side_len: 30
-    return_word_box: false
-    return_single_char_box: false
-    font_path: null
-EngineConfig:
-    onnxruntime:
-        intra_op_num_threads: -1
-        inter_op_num_threads: -1
-        enable_cpu_mem_arena: false
-        cpu_ep_cfg:
-            arena_extend_strategy: "kSameAsRequested"
-        use_cuda: false
-        cuda_ep_cfg:
-            device_id: 0
-            arena_extend_strategy: "kNextPowerOfTwo"
-            cudnn_conv_algo_search: "EXHAUSTIVE"
-            do_copy_in_default_stream: true
-        use_dml: false
-        dm_ep_cfg: null
-        use_cann: false
-        cann_ep_cfg:
-            device_id: 0
-            arena_extend_strategy: "kNextPowerOfTwo"
-            npu_mem_limit:  21474836480 # 20 * 1024 * 1024 * 1024
-            op_select_impl_mode: "high_performance"
-            optypelist_for_implmode: "Gelu"
-            enable_cann_graph: true
-    openvino:
-        inference_num_threads: -1
-    paddle:
-        cpu_math_library_num_threads: -1
-        use_cuda: false
-        gpu_id: 0
-        gpu_mem: 500
-    torch:
-        use_cuda: false
-        gpu_id: 0
-Det:
-    engine_type: "onnxruntime"
-    lang_type: "en"
-    model_type: "mobile"
-    ocr_version: "PP-OCRv4"
-    task_type: "det"
-    model_path: null
-    model_dir: null
-    limit_side_len: 736
-    limit_type: min
-    std: [ 0.5, 0.5, 0.5 ]
-    mean: [ 0.5, 0.5, 0.5 ]
-    thresh: 0.3
-    box_thresh: 0.5
-    max_candidates: 1000
-    unclip_ratio: 1.6
-    use_dilation: true
-    score_mode: fast
-Cls:
-    engine_type: "onnxruntime"
-    lang_type: "ch"
-    model_type: "mobile"
-    ocr_version: "PP-OCRv4"
-    task_type: "cls"
-    model_path: null
-    model_dir: null
-    cls_image_shape: [3, 48, 192]
-    cls_batch_num: 6
-    cls_thresh: 0.9
-    label_list: ["0", "180"]
-Rec:
-    engine_type: "onnxruntime"
-    lang_type: "en"
-    model_type: "mobile"
-    ocr_version: "PP-OCRv4"
-    task_type: "rec"
-    model_path: null
-    model_dir: null
-    rec_keys_path: null
-    rec_img_shape: [3, 48, 320]
-    rec_batch_num: 6
+Global:
+    text_score: 0.5
+    use_det: true
+    use_cls: false
+    use_angle_cls: false
+    use_rec: true
+    min_height: 30
+    width_height_ratio: 8
+    max_side_len: 2000
+    min_side_len: 30
+    return_word_box: false
+    return_single_char_box: false
+    font_path: null
+EngineConfig:
+    onnxruntime:
+        intra_op_num_threads: -1
+        inter_op_num_threads: -1
+        enable_cpu_mem_arena: false
+        cpu_ep_cfg:
+            arena_extend_strategy: "kSameAsRequested"
+        use_cuda: false
+        cuda_ep_cfg:
+            device_id: 0
+            arena_extend_strategy: "kNextPowerOfTwo"
+            cudnn_conv_algo_search: "EXHAUSTIVE"
+            do_copy_in_default_stream: true
+        use_dml: false
+        dm_ep_cfg: null
+        use_cann: false
+        cann_ep_cfg:
+            device_id: 0
+            arena_extend_strategy: "kNextPowerOfTwo"
+            npu_mem_limit:  21474836480 # 20 * 1024 * 1024 * 1024
+            op_select_impl_mode: "high_performance"
+            optypelist_for_implmode: "Gelu"
+            enable_cann_graph: true
+    openvino:
+        inference_num_threads: -1
+    paddle:
+        cpu_math_library_num_threads: -1
+        use_cuda: false
+        gpu_id: 0
+        gpu_mem: 500
+    torch:
+        use_cuda: false
+        gpu_id: 0
+Det:
+    engine_type: "onnxruntime"
+    lang_type: "en"
+    model_type: "mobile"
+    ocr_version: "PP-OCRv4"
+    task_type: "det"
+    model_path: null
+    model_dir: null
+    limit_side_len: 736
+    limit_type: min
+    std: [ 0.5, 0.5, 0.5 ]
+    mean: [ 0.5, 0.5, 0.5 ]
+    thresh: 0.3
+    box_thresh: 0.5
+    max_candidates: 1000
+    unclip_ratio: 1.6
+    use_dilation: true
+    score_mode: fast
+Cls:
+    engine_type: "onnxruntime"
+    lang_type: "ch"
+    model_type: "mobile"
+    ocr_version: "PP-OCRv4"
+    task_type: "cls"
+    model_path: null
+    model_dir: null
+    cls_image_shape: [3, 48, 192]
+    cls_batch_num: 6
+    cls_thresh: 0.9
+    label_list: ["0", "180"]
+Rec:
+    engine_type: "onnxruntime"
+    lang_type: "en"
+    model_type: "mobile"
+    ocr_version: "PP-OCRv4"
+    task_type: "rec"
+    model_path: null
+    model_dir: null
+    rec_keys_path: null
+    rec_img_shape: [3, 48, 320]
+    rec_batch_num: 6

pyautoscene/references.py CHANGED Viewed

@@ -1,78 +1,78 @@
-from abc import ABC, abstractmethod
-from typing import override
-import pyautogui as gui
-from .screen import RegionSpec, generate_region_from_spec, locate_on_screen
-class ReferenceElement(ABC):
-    """Base class for reference elements used to identify scenes."""
-    @abstractmethod
-    def is_visible(self, region: RegionSpec | None = None) -> RegionSpec | None:
-        """Detect the presence of the reference element."""
-        raise NotImplementedError("Subclasses must implement this method")
-class ImageElement(ReferenceElement):
-    """Reference element that identifies a scene by an image."""
-    def __init__(
-        self,
-        path: str | list[str],
-        confidence: float = 0.999,
-        region: RegionSpec | None = None,
-    ):
-        self.path = path
-        self.confidence = confidence
-        self.region = region
-    @override
-    def is_visible(self, region: RegionSpec | None = None):
-        """Method to detect the presence of the image in the current screen."""
-        if isinstance(self.path, str):
-            path = [self.path]  # Ensure path is a list for consistency
-        else:
-            path = self.path
-        for image_path in path:
-            try:
-                location = locate_on_screen(
-                    image_path, region=region or self.region, confidence=self.confidence
-                )
-                return location
-            except gui.ImageNotFoundException:
-                continue
-class TextElement(ReferenceElement):
-    """Reference element that identifies a scene by text."""
-    def __init__(
-        self,
-        text: str,
-        region: RegionSpec | None = None,
-        case_sensitive: bool = False,
-    ):
-        self.text = text
-        self.region = region
-        self.case_sensitive = case_sensitive
-        if not case_sensitive:
-            self.text = self.text.lower()
-    def is_visible(self, region: RegionSpec | None = None):
-        """Method to detect the presence of the text in the current screen."""
-        from .ocr import OCR
-        ocr = OCR()
-        region = region or self.region
-        for text, detected_region in ocr.recognize_text(
-            gui.screenshot(
-                region=generate_region_from_spec(region).to_box() if region else None
-            )
-        ):
-            if not self.case_sensitive:
-                text = text.lower()
-            if text.strip() == self.text.strip():
-                return detected_region
-        return None
+from abc import ABC, abstractmethod
+from typing import override
+import pyautogui as gui
+from .screen import RegionSpec, generate_region_from_spec, locate_on_screen
+class ReferenceElement(ABC):
+    """Base class for reference elements used to identify scenes."""
+    @abstractmethod
+    def is_visible(self, region: RegionSpec | None = None) -> RegionSpec | None:
+        """Detect the presence of the reference element."""
+        raise NotImplementedError("Subclasses must implement this method")
+class ImageElement(ReferenceElement):
+    """Reference element that identifies a scene by an image."""
+    def __init__(
+        self,
+        path: str | list[str],
+        confidence: float = 0.999,
+        region: RegionSpec | None = None,
+    ):
+        self.path = path
+        self.confidence = confidence
+        self.region = region
+    @override
+    def is_visible(self, region: RegionSpec | None = None):
+        """Method to detect the presence of the image in the current screen."""
+        if isinstance(self.path, str):
+            path = [self.path]  # Ensure path is a list for consistency
+        else:
+            path = self.path
+        for image_path in path:
+            try:
+                location = locate_on_screen(
+                    image_path, region=region or self.region, confidence=self.confidence
+                )
+                return location
+            except gui.ImageNotFoundException:
+                continue
+class TextElement(ReferenceElement):
+    """Reference element that identifies a scene by text."""
+    def __init__(
+        self,
+        text: str,
+        region: RegionSpec | None = None,
+        case_sensitive: bool = False,
+    ):
+        self.text = text
+        self.region = region
+        self.case_sensitive = case_sensitive
+        if not case_sensitive:
+            self.text = self.text.lower()
+    def is_visible(self, region: RegionSpec | None = None):
+        """Method to detect the presence of the text in the current screen."""
+        from .ocr import OCR
+        ocr = OCR()
+        region = region or self.region
+        for text, detected_region in ocr.recognize_text(
+            gui.screenshot(
+                region=generate_region_from_spec(region).to_box() if region else None
+            )
+        ):
+            if not self.case_sensitive:
+                text = text.lower()
+            if text.strip() == self.text.strip():
+                return detected_region
+        return None

pyautoscene/scene.py CHANGED Viewed

@@ -1,61 +1,61 @@
-from __future__ import annotations
-from typing import Callable, TypedDict
-from statemachine import State
-from pyautoscene.utils import is_valid_variable_name
-from .references import ReferenceElement
-from .screen import Region
-class ActionInfo(TypedDict):
-    """Type definition for action information in a scene."""
-    action: Callable[..., None]
-    transitions_to: Scene | None
-class Scene(State):
-    """A scene represents a state in the GUI automation state machine."""
-    def __init__(
-        self,
-        name: str,
-        elements: list[ReferenceElement] | None = None,
-        initial: bool = False,
-    ):
-        assert is_valid_variable_name(name), (
-            f"Invalid scene name: {name}, must be a valid Python identifier."
-        )
-        super().__init__(name, initial=initial)
-        self.elements = elements or []
-        self.actions: dict[str, ActionInfo] = {}
-    def action(self, transitions_to: Scene | None = None):
-        """Decorator to register an action for this scene."""
-        def decorator(func: Callable[..., None]) -> Callable[..., None]:
-            if func.__name__ not in self.actions:
-                action_name = func.__name__
-                self.actions[action_name] = {
-                    "action": func,
-                    "transitions_to": transitions_to,
-                }
-            return func
-        return decorator
-    def get_action(self, action_name: str) -> ActionInfo | None:
-        """Get an action by name."""
-        return self.actions.get(action_name)
-    def is_on_screen(self, region: Region | None = None) -> bool:
-        """Check if any reference element is currently on screen."""
-        # TODO: Refactor after text recognition is implemented
-        # elements = (elem for elem in self.elements if isinstance(elem, ReferenceImage))
-        return all(elem.is_visible(region) for elem in self.elements)
-    def __repr__(self):
-        return f"Scene({self.name!r}, elements={len(self.elements)})"
+from __future__ import annotations
+from typing import Callable, TypedDict
+from statemachine import State
+from pyautoscene.utils import is_valid_variable_name
+from .references import ReferenceElement
+from .screen import Region
+class ActionInfo(TypedDict):
+    """Type definition for action information in a scene."""
+    action: Callable[..., None]
+    transitions_to: Scene | None
+class Scene(State):
+    """A scene represents a state in the GUI automation state machine."""
+    def __init__(
+        self,
+        name: str,
+        elements: list[ReferenceElement] | None = None,
+        initial: bool = False,
+    ):
+        assert is_valid_variable_name(name), (
+            f"Invalid scene name: {name}, must be a valid Python identifier."
+        )
+        super().__init__(name, initial=initial)
+        self.elements = elements or []
+        self.actions: dict[str, ActionInfo] = {}
+    def action(self, transitions_to: Scene | None = None):
+        """Decorator to register an action for this scene."""
+        def decorator(func: Callable[..., None]) -> Callable[..., None]:
+            if func.__name__ not in self.actions:
+                action_name = func.__name__
+                self.actions[action_name] = {
+                    "action": func,
+                    "transitions_to": transitions_to,
+                }
+            return func
+        return decorator
+    def get_action(self, action_name: str) -> ActionInfo | None:
+        """Get an action by name."""
+        return self.actions.get(action_name)
+    def is_on_screen(self, region: Region | None = None) -> bool:
+        """Check if any reference element is currently on screen."""
+        # TODO: Refactor after text recognition is implemented
+        # elements = (elem for elem in self.elements if isinstance(elem, ReferenceImage))
+        return all(elem.is_visible(region) for elem in self.elements)
+    def __repr__(self):
+        return f"Scene({self.name!r}, elements={len(self.elements)})"

pyautoscene 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

pyautoscene 0.2.0py3-none-any.whl → 0.2.1py3-none-any.whl