pyautoscene 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyautoscene/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  from .references import ImageElement, TextElement
2
+ from .region import Region, RegionSpec
2
3
  from .scene import Scene
3
4
  from .session import Session
4
5
 
5
- __all__ = ["Scene", "Session", "ImageElement", "TextElement"]
6
+ __all__ = ["Scene", "Session", "ImageElement", "TextElement", "RegionSpec", "Region"]
pyautoscene/_types.py ADDED
@@ -0,0 +1,6 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+
5
+ type MouseButton = Literal["left", "right"]
6
+ type TowardsDirection = Literal["top", "left", "bottom", "right", None]
@@ -0,0 +1,6 @@
1
+ import os
2
+
3
+ LOCATE_AND_CLICK_DELAY = float(os.getenv("PYAUTOSCENE_LOCATE_AND_CLICK_DELAY", 0.3))
4
+
5
+ # pixels per second, used for calculating move duration
6
+ POINTER_SPEED = int(os.getenv("PYAUTOSCENE_POINTER_SPEED", 1000))
pyautoscene/ocr.py CHANGED
@@ -1,70 +1,73 @@
1
- import logging
2
- from hashlib import sha256
3
- from pathlib import Path
4
-
5
- import numpy as np
6
- from PIL import Image
7
-
8
- from .screen import Region
9
-
10
- logging.basicConfig(level=logging.INFO)
11
- logger = logging.getLogger(__name__)
12
-
13
- try:
14
- from rapidocr import RapidOCR
15
- from rapidocr.utils.output import RapidOCROutput
16
- except ImportError:
17
- raise ImportError(
18
- "RapidOCR is not installed. Please install it using 'pip install pyautoscene[ocr]'."
19
- )
20
-
21
- ocr_config_path = Path(__file__).parent / "ocr_config.yaml"
22
-
23
-
24
- def hash_image(img: Image.Image) -> str:
25
- return sha256(img.tobytes()).hexdigest()
26
-
27
-
28
- def convert_points_to_ltwh(points: np.ndarray) -> Region:
29
- if points.shape[0] == 0:
30
- raise ValueError("Points array is empty")
31
-
32
- x_min = np.min(points[:, 0])
33
- y_min = np.min(points[:, 1])
34
- x_max = np.max(points[:, 0])
35
- y_max = np.max(points[:, 1])
36
-
37
- return Region(left=x_min, top=y_min, width=x_max - x_min, height=y_max - y_min)
38
-
39
-
40
- class OCR:
41
- engine: RapidOCR | None = None
42
- img_cache: dict[str, tuple[tuple[str, Region], ...]] = {}
43
-
44
- def __new__(cls):
45
- if cls.engine is None:
46
- cls.engine = RapidOCR(config_path=ocr_config_path.as_posix())
47
- return super().__new__(cls)
48
-
49
- def recognize_text(self, img: Image.Image) -> tuple[tuple[str, Region], ...]:
50
- img_gray = img.convert("L")
51
- img_hash = hash_image(img_gray)
52
- if img_hash in self.img_cache:
53
- logger.debug(f"Using cached result for image hash: {img_hash}")
54
- return self.img_cache[img_hash]
55
-
56
- assert self.engine is not None, "Engine should be initialized in __new__"
57
- result = self.engine(np.array(img_gray))
58
- assert isinstance(result, RapidOCROutput), (
59
- "Result should be of type RapidOCROutput"
60
- )
61
- assert result.txts is not None and result.boxes is not None, (
62
- "Text recognition failed, txts and boxes should not be None"
63
- )
64
-
65
- detections = tuple(
66
- (txt, convert_points_to_ltwh(box))
67
- for txt, box in zip(result.txts, result.boxes)
68
- )
69
- self.img_cache[img_hash] = detections
70
- return detections
1
+ import logging
2
+ import os
3
+ from hashlib import sha256
4
+ from pathlib import Path
5
+
6
+ import numpy as np
7
+ from PIL import Image
8
+
9
+ from .region import Region
10
+
11
+ logging.basicConfig(level=logging.INFO)
12
+ logger = logging.getLogger(__name__)
13
+
14
+ try:
15
+ from rapidocr import RapidOCR
16
+ from rapidocr.utils.output import RapidOCROutput
17
+ except ImportError:
18
+ raise ImportError(
19
+ "RapidOCR is not installed. Please install it using 'pip install pyautoscene[ocr]'."
20
+ )
21
+
22
+ default_ocr_config_path = Path(__file__).parent / "ocr_config.yaml"
23
+ ocr_config_path = Path(os.getenv("PYAUTOSCENE_OCR_CONFIG", default_ocr_config_path))
24
+ logger.info(f"OCR config path: {ocr_config_path}")
25
+
26
+
27
+ def hash_image(img: Image.Image) -> str:
28
+ return sha256(img.tobytes()).hexdigest()
29
+
30
+
31
+ def convert_points_to_ltwh(points: np.ndarray) -> Region:
32
+ if points.shape[0] == 0:
33
+ raise ValueError("Points array is empty")
34
+
35
+ x_min = np.min(points[:, 0])
36
+ y_min = np.min(points[:, 1])
37
+ x_max = np.max(points[:, 0])
38
+ y_max = np.max(points[:, 1])
39
+
40
+ return Region(left=x_min, top=y_min, width=x_max - x_min, height=y_max - y_min)
41
+
42
+
43
+ class OCR:
44
+ engine: RapidOCR | None = None
45
+ img_cache: dict[str, tuple[tuple[str, Region], ...]] = {}
46
+
47
+ def __new__(cls):
48
+ if cls.engine is None:
49
+ cls.engine = RapidOCR(config_path=ocr_config_path.as_posix())
50
+ return super().__new__(cls)
51
+
52
+ def recognize_text(self, img: Image.Image) -> tuple[tuple[str, Region], ...]:
53
+ img_gray = img.convert("L")
54
+ img_hash = hash_image(img_gray)
55
+ if img_hash in self.img_cache:
56
+ logger.debug(f"Using cached result for image hash: {img_hash}")
57
+ return self.img_cache[img_hash]
58
+
59
+ assert self.engine is not None, "Engine should be initialized in __new__"
60
+ result = self.engine(np.array(img_gray))
61
+ assert isinstance(result, RapidOCROutput), (
62
+ "Result should be of type RapidOCROutput"
63
+ )
64
+ assert result.txts is not None and result.boxes is not None, (
65
+ "Text recognition failed, txts and boxes should not be None"
66
+ )
67
+
68
+ detections = tuple(
69
+ (txt, convert_points_to_ltwh(box))
70
+ for txt, box in zip(result.txts, result.boxes)
71
+ )
72
+ self.img_cache[img_hash] = detections
73
+ return detections
@@ -1,112 +1,112 @@
1
- Global:
2
- text_score: 0.5
3
-
4
- use_det: true
5
- use_cls: false
6
- use_angle_cls: false
7
- use_rec: true
8
-
9
- min_height: 30
10
- width_height_ratio: 8
11
- max_side_len: 2000
12
- min_side_len: 30
13
-
14
- return_word_box: false
15
- return_single_char_box: false
16
-
17
- font_path: null
18
-
19
- EngineConfig:
20
- onnxruntime:
21
- intra_op_num_threads: -1
22
- inter_op_num_threads: -1
23
- enable_cpu_mem_arena: false
24
-
25
- cpu_ep_cfg:
26
- arena_extend_strategy: "kSameAsRequested"
27
-
28
- use_cuda: false
29
- cuda_ep_cfg:
30
- device_id: 0
31
- arena_extend_strategy: "kNextPowerOfTwo"
32
- cudnn_conv_algo_search: "EXHAUSTIVE"
33
- do_copy_in_default_stream: true
34
-
35
- use_dml: false
36
- dm_ep_cfg: null
37
-
38
- use_cann: false
39
- cann_ep_cfg:
40
- device_id: 0
41
- arena_extend_strategy: "kNextPowerOfTwo"
42
- npu_mem_limit: 21474836480 # 20 * 1024 * 1024 * 1024
43
- op_select_impl_mode: "high_performance"
44
- optypelist_for_implmode: "Gelu"
45
- enable_cann_graph: true
46
-
47
- openvino:
48
- inference_num_threads: -1
49
-
50
- paddle:
51
- cpu_math_library_num_threads: -1
52
- use_cuda: false
53
- gpu_id: 0
54
- gpu_mem: 500
55
-
56
- torch:
57
- use_cuda: false
58
- gpu_id: 0
59
-
60
- Det:
61
- engine_type: "onnxruntime"
62
- lang_type: "en"
63
- model_type: "mobile"
64
- ocr_version: "PP-OCRv4"
65
-
66
- task_type: "det"
67
-
68
- model_path: null
69
- model_dir: null
70
-
71
- limit_side_len: 736
72
- limit_type: min
73
- std: [ 0.5, 0.5, 0.5 ]
74
- mean: [ 0.5, 0.5, 0.5 ]
75
-
76
- thresh: 0.3
77
- box_thresh: 0.5
78
- max_candidates: 1000
79
- unclip_ratio: 1.6
80
- use_dilation: true
81
- score_mode: fast
82
-
83
- Cls:
84
- engine_type: "onnxruntime"
85
- lang_type: "ch"
86
- model_type: "mobile"
87
- ocr_version: "PP-OCRv4"
88
-
89
- task_type: "cls"
90
-
91
- model_path: null
92
- model_dir: null
93
-
94
- cls_image_shape: [3, 48, 192]
95
- cls_batch_num: 6
96
- cls_thresh: 0.9
97
- label_list: ["0", "180"]
98
-
99
- Rec:
100
- engine_type: "onnxruntime"
101
- lang_type: "en"
102
- model_type: "mobile"
103
- ocr_version: "PP-OCRv4"
104
-
105
- task_type: "rec"
106
-
107
- model_path: null
108
- model_dir: null
109
-
110
- rec_keys_path: null
111
- rec_img_shape: [3, 48, 320]
112
- rec_batch_num: 6
1
+ Global:
2
+ text_score: 0.5
3
+
4
+ use_det: true
5
+ use_cls: false
6
+ use_angle_cls: false
7
+ use_rec: true
8
+
9
+ min_height: 30
10
+ width_height_ratio: 8
11
+ max_side_len: 2000
12
+ min_side_len: 30
13
+
14
+ return_word_box: false
15
+ return_single_char_box: false
16
+
17
+ font_path: null
18
+
19
+ EngineConfig:
20
+ onnxruntime:
21
+ intra_op_num_threads: -1
22
+ inter_op_num_threads: -1
23
+ enable_cpu_mem_arena: false
24
+
25
+ cpu_ep_cfg:
26
+ arena_extend_strategy: "kSameAsRequested"
27
+
28
+ use_cuda: false
29
+ cuda_ep_cfg:
30
+ device_id: 0
31
+ arena_extend_strategy: "kNextPowerOfTwo"
32
+ cudnn_conv_algo_search: "EXHAUSTIVE"
33
+ do_copy_in_default_stream: true
34
+
35
+ use_dml: false
36
+ dm_ep_cfg: null
37
+
38
+ use_cann: false
39
+ cann_ep_cfg:
40
+ device_id: 0
41
+ arena_extend_strategy: "kNextPowerOfTwo"
42
+ npu_mem_limit: 21474836480 # 20 * 1024 * 1024 * 1024
43
+ op_select_impl_mode: "high_performance"
44
+ optypelist_for_implmode: "Gelu"
45
+ enable_cann_graph: true
46
+
47
+ openvino:
48
+ inference_num_threads: -1
49
+
50
+ paddle:
51
+ cpu_math_library_num_threads: -1
52
+ use_cuda: false
53
+ gpu_id: 0
54
+ gpu_mem: 500
55
+
56
+ torch:
57
+ use_cuda: false
58
+ gpu_id: 0
59
+
60
+ Det:
61
+ engine_type: "onnxruntime"
62
+ lang_type: "en"
63
+ model_type: "mobile"
64
+ ocr_version: "PP-OCRv4"
65
+
66
+ task_type: "det"
67
+
68
+ model_path: null
69
+ model_dir: null
70
+
71
+ limit_side_len: 736
72
+ limit_type: min
73
+ std: [ 0.5, 0.5, 0.5 ]
74
+ mean: [ 0.5, 0.5, 0.5 ]
75
+
76
+ thresh: 0.3
77
+ box_thresh: 0.5
78
+ max_candidates: 1000
79
+ unclip_ratio: 1.6
80
+ use_dilation: true
81
+ score_mode: fast
82
+
83
+ Cls:
84
+ engine_type: "onnxruntime"
85
+ lang_type: "ch"
86
+ model_type: "mobile"
87
+ ocr_version: "PP-OCRv4"
88
+
89
+ task_type: "cls"
90
+
91
+ model_path: null
92
+ model_dir: null
93
+
94
+ cls_image_shape: [3, 48, 192]
95
+ cls_batch_num: 6
96
+ cls_thresh: 0.9
97
+ label_list: ["0", "180"]
98
+
99
+ Rec:
100
+ engine_type: "onnxruntime"
101
+ lang_type: "en"
102
+ model_type: "mobile"
103
+ ocr_version: "PP-OCRv4"
104
+
105
+ task_type: "rec"
106
+
107
+ model_path: null
108
+ model_dir: null
109
+
110
+ rec_keys_path: null
111
+ rec_img_shape: [3, 48, 320]
112
+ rec_batch_num: 6
pyautoscene/references.py CHANGED
@@ -1,78 +1,100 @@
1
- from abc import ABC, abstractmethod
2
- from typing import override
3
-
4
- import pyautogui as gui
5
-
6
- from .screen import RegionSpec, generate_region_from_spec, locate_on_screen
7
-
8
-
9
- class ReferenceElement(ABC):
10
- """Base class for reference elements used to identify scenes."""
11
-
12
- @abstractmethod
13
- def is_visible(self, region: RegionSpec | None = None) -> RegionSpec | None:
14
- """Detect the presence of the reference element."""
15
- raise NotImplementedError("Subclasses must implement this method")
16
-
17
-
18
- class ImageElement(ReferenceElement):
19
- """Reference element that identifies a scene by an image."""
20
-
21
- def __init__(
22
- self,
23
- path: str | list[str],
24
- confidence: float = 0.999,
25
- region: RegionSpec | None = None,
26
- ):
27
- self.path = path
28
- self.confidence = confidence
29
- self.region = region
30
-
31
- @override
32
- def is_visible(self, region: RegionSpec | None = None):
33
- """Method to detect the presence of the image in the current screen."""
34
- if isinstance(self.path, str):
35
- path = [self.path] # Ensure path is a list for consistency
36
- else:
37
- path = self.path
38
- for image_path in path:
39
- try:
40
- location = locate_on_screen(
41
- image_path, region=region or self.region, confidence=self.confidence
42
- )
43
- return location
44
- except gui.ImageNotFoundException:
45
- continue
46
-
47
-
48
- class TextElement(ReferenceElement):
49
- """Reference element that identifies a scene by text."""
50
-
51
- def __init__(
52
- self,
53
- text: str,
54
- region: RegionSpec | None = None,
55
- case_sensitive: bool = False,
56
- ):
57
- self.text = text
58
- self.region = region
59
- self.case_sensitive = case_sensitive
60
- if not case_sensitive:
61
- self.text = self.text.lower()
62
-
63
- def is_visible(self, region: RegionSpec | None = None):
64
- """Method to detect the presence of the text in the current screen."""
65
- from .ocr import OCR
66
-
67
- ocr = OCR()
68
- region = region or self.region
69
- for text, detected_region in ocr.recognize_text(
70
- gui.screenshot(
71
- region=generate_region_from_spec(region).to_box() if region else None
72
- )
73
- ):
74
- if not self.case_sensitive:
75
- text = text.lower()
76
- if text.strip() == self.text.strip():
77
- return detected_region
78
- return None
1
+ from abc import ABC, abstractmethod
2
+ from typing import Callable, override
3
+
4
+ import pyautogui as gui
5
+ from PIL import Image
6
+
7
+ from ._types import MouseButton, TowardsDirection
8
+ from .region import Region, RegionSpec
9
+ from .utils import locate_on_screen, move_and_click
10
+
11
+
12
+ class ReferenceElement(ABC):
13
+ """Base class for reference elements used to identify scenes."""
14
+
15
+ @abstractmethod
16
+ def locate(self, region: RegionSpec | None = None) -> Region | None:
17
+ """Detect the presence of the reference element."""
18
+ raise NotImplementedError("Subclasses must implement this method")
19
+
20
+ def locate_and_click(
21
+ self,
22
+ offset: tuple[int, int] = (0, 0),
23
+ region: RegionSpec | None = None,
24
+ clicks: int = 1,
25
+ button: MouseButton = "left",
26
+ towards: TowardsDirection = None,
27
+ ):
28
+ """Locate the reference element and click on it."""
29
+ region = self.locate(region=region)
30
+ assert region is not None, f"Element {self} not found on screen"
31
+ move_and_click(
32
+ target_region=region,
33
+ clicks=clicks,
34
+ button=button,
35
+ offset=offset,
36
+ towards=towards,
37
+ )
38
+
39
+
40
+ class ImageElement(ReferenceElement):
41
+ """Reference element that identifies a scene by an image."""
42
+
43
+ def __init__(
44
+ self,
45
+ path: str | list[str],
46
+ confidence: float = 0.999,
47
+ region: RegionSpec | None = None,
48
+ locator: Callable[[Image.Image, Image.Image], list[Region]] | None = None,
49
+ ):
50
+ self.path = path
51
+ self.confidence = confidence
52
+ self.region = region
53
+ self.locator = locator
54
+
55
+ @override
56
+ def locate(self, region: RegionSpec | None = None) -> Region | None:
57
+ """Method to detect the presence of the image in the current screen."""
58
+ if isinstance(self.path, str):
59
+ path = [self.path] # Ensure path is a list for consistency
60
+ else:
61
+ path = self.path
62
+ for image_path in path:
63
+ try:
64
+ location = locate_on_screen(
65
+ image_path,
66
+ region=region if region else self.region,
67
+ confidence=self.confidence,
68
+ locator=self.locator,
69
+ )
70
+ return location
71
+ except gui.ImageNotFoundException:
72
+ continue
73
+
74
+
75
+ class TextElement(ReferenceElement):
76
+ """Reference element that identifies a scene by text."""
77
+
78
+ def __init__(
79
+ self, text: str, region: RegionSpec | None = None, case_sensitive: bool = False
80
+ ):
81
+ self.text = text
82
+ self.region = region
83
+ self.case_sensitive = case_sensitive
84
+ if not case_sensitive:
85
+ self.text = self.text.lower()
86
+
87
+ def locate(self, region: RegionSpec | None = None):
88
+ """Method to detect the presence of the text in the current screen."""
89
+ from .ocr import OCR
90
+
91
+ ocr = OCR()
92
+ region = region or self.region
93
+ for text, detected_region in ocr.recognize_text(
94
+ gui.screenshot(region=Region.from_spec(region).to_box() if region else None)
95
+ ):
96
+ if not self.case_sensitive:
97
+ text = text.lower()
98
+ if text.strip() == self.text.strip():
99
+ return detected_region.resolve(base=region)
100
+ return None
pyautoscene/region.py ADDED
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+
3
+ import re
4
+ from dataclasses import dataclass
5
+
6
+ import numpy as np
7
+ import pyautogui as gui
8
+ from pyscreeze import Box
9
+
10
+ type RegionSpec = Region | str
11
+
12
+ axis_pattern = re.compile(r"(?P<d>[xy]):\(?(?P<i>\d+)(?:-(?P<j>\d+))?\)?/(?P<n>\d+)")
13
+
14
+
15
+ @dataclass(frozen=True, slots=True)
16
+ class Region:
17
+ left: int
18
+ top: int
19
+ width: int
20
+ height: int
21
+
22
+ def to_box(self) -> Box:
23
+ """Convert to a pyscreeze Box."""
24
+ return Box(self.left, self.top, self.width, self.height)
25
+
26
+ @classmethod
27
+ def from_box(cls, box: Box) -> Region:
28
+ """Create a Region from a pyscreeze Box."""
29
+ return cls(left=box.left, top=box.top, width=box.width, height=box.height)
30
+
31
+ @property
32
+ def center(self) -> tuple[int, int]:
33
+ """Get the center coordinates of the region."""
34
+ return (self.left + self.width // 2, self.top + self.height // 2)
35
+
36
+ @classmethod
37
+ def from_spec(
38
+ cls, spec: RegionSpec, shape: tuple[int, int] | None = None
39
+ ) -> Region:
40
+ if isinstance(spec, Region):
41
+ return spec
42
+ if shape is None:
43
+ img = np.array(gui.screenshot())
44
+ shape = (img.shape[0]), (img.shape[1])
45
+
46
+ default_region = {"left": 0, "top": 0, "width": shape[1], "height": shape[0]}
47
+
48
+ axis_mapping = {"x": ("left", "width", 1), "y": ("top", "height", 0)}
49
+ for axis, i, j, n in axis_pattern.findall(spec):
50
+ alignment, size_attr, dim_index = axis_mapping[axis]
51
+ size = shape[dim_index] // int(n)
52
+ i, j = int(i), int(j) if j else int(i)
53
+ default_region.update({
54
+ alignment: (i - 1) * size,
55
+ size_attr: (j - i + 1) * size,
56
+ })
57
+
58
+ return cls(**default_region)
59
+
60
+ def resolve(self, base: RegionSpec | None) -> Region:
61
+ if base is None:
62
+ return self
63
+ if isinstance(base, str):
64
+ base = Region.from_spec(base)
65
+ return Region(
66
+ left=self.left + base.left,
67
+ top=self.top + base.top,
68
+ width=self.width,
69
+ height=self.height,
70
+ )