pyautoscene 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pyautoscene/ocr.py CHANGED
@@ -1,70 +1,70 @@
1
- import logging
2
- from hashlib import sha256
3
- from pathlib import Path
4
-
5
- import numpy as np
6
- from PIL import Image
7
-
8
- from .screen import Region
9
-
10
- logging.basicConfig(level=logging.INFO)
11
- logger = logging.getLogger(__name__)
12
-
13
- try:
14
- from rapidocr import RapidOCR
15
- from rapidocr.utils.output import RapidOCROutput
16
- except ImportError:
17
- raise ImportError(
18
- "RapidOCR is not installed. Please install it using 'pip install pyautoscene[ocr]'."
19
- )
20
-
21
- ocr_config_path = Path(__file__).parent / "ocr_config.yaml"
22
-
23
-
24
- def hash_image(img: Image.Image) -> str:
25
- return sha256(img.tobytes()).hexdigest()
26
-
27
-
28
- def convert_points_to_ltwh(points: np.ndarray) -> Region:
29
- if points.shape[0] == 0:
30
- raise ValueError("Points array is empty")
31
-
32
- x_min = np.min(points[:, 0])
33
- y_min = np.min(points[:, 1])
34
- x_max = np.max(points[:, 0])
35
- y_max = np.max(points[:, 1])
36
-
37
- return Region(left=x_min, top=y_min, width=x_max - x_min, height=y_max - y_min)
38
-
39
-
40
- class OCR:
41
- engine: RapidOCR | None = None
42
- img_cache: dict[str, tuple[tuple[str, Region], ...]] = {}
43
-
44
- def __new__(cls):
45
- if cls.engine is None:
46
- cls.engine = RapidOCR(config_path=ocr_config_path.as_posix())
47
- return super().__new__(cls)
48
-
49
- def recognize_text(self, img: Image.Image) -> tuple[tuple[str, Region], ...]:
50
- img_gray = img.convert("L")
51
- img_hash = hash_image(img_gray)
52
- if img_hash in self.img_cache:
53
- logger.debug(f"Using cached result for image hash: {img_hash}")
54
- return self.img_cache[img_hash]
55
-
56
- assert self.engine is not None, "Engine should be initialized in __new__"
57
- result = self.engine(np.array(img_gray))
58
- assert isinstance(result, RapidOCROutput), (
59
- "Result should be of type RapidOCROutput"
60
- )
61
- assert result.txts is not None and result.boxes is not None, (
62
- "Text recognition failed, txts and boxes should not be None"
63
- )
64
-
65
- detections = tuple(
66
- (txt, convert_points_to_ltwh(box))
67
- for txt, box in zip(result.txts, result.boxes)
68
- )
69
- self.img_cache[img_hash] = detections
70
- return detections
1
+ import logging
2
+ from hashlib import sha256
3
+ from pathlib import Path
4
+
5
+ import numpy as np
6
+ from PIL import Image
7
+
8
+ from .screen import Region
9
+
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+ try:
14
+ from rapidocr import RapidOCR
15
+ from rapidocr.utils.output import RapidOCROutput
16
+ except ImportError:
17
+ raise ImportError(
18
+ "RapidOCR is not installed. Please install it using 'pip install pyautoscene[ocr]'."
19
+ )
20
+
21
+ ocr_config_path = Path(__file__).parent / "ocr_config.yaml"
22
+
23
+
24
+ def hash_image(img: Image.Image) -> str:
25
+ return sha256(img.tobytes()).hexdigest()
26
+
27
+
28
+ def convert_points_to_ltwh(points: np.ndarray) -> Region:
29
+ if points.shape[0] == 0:
30
+ raise ValueError("Points array is empty")
31
+
32
+ x_min = np.min(points[:, 0])
33
+ y_min = np.min(points[:, 1])
34
+ x_max = np.max(points[:, 0])
35
+ y_max = np.max(points[:, 1])
36
+
37
+ return Region(left=x_min, top=y_min, width=x_max - x_min, height=y_max - y_min)
38
+
39
+
40
+ class OCR:
41
+ engine: RapidOCR | None = None
42
+ img_cache: dict[str, tuple[tuple[str, Region], ...]] = {}
43
+
44
+ def __new__(cls):
45
+ if cls.engine is None:
46
+ cls.engine = RapidOCR(config_path=ocr_config_path.as_posix())
47
+ return super().__new__(cls)
48
+
49
+ def recognize_text(self, img: Image.Image) -> tuple[tuple[str, Region], ...]:
50
+ img_gray = img.convert("L")
51
+ img_hash = hash_image(img_gray)
52
+ if img_hash in self.img_cache:
53
+ logger.debug(f"Using cached result for image hash: {img_hash}")
54
+ return self.img_cache[img_hash]
55
+
56
+ assert self.engine is not None, "Engine should be initialized in __new__"
57
+ result = self.engine(np.array(img_gray))
58
+ assert isinstance(result, RapidOCROutput), (
59
+ "Result should be of type RapidOCROutput"
60
+ )
61
+ assert result.txts is not None and result.boxes is not None, (
62
+ "Text recognition failed, txts and boxes should not be None"
63
+ )
64
+
65
+ detections = tuple(
66
+ (txt, convert_points_to_ltwh(box))
67
+ for txt, box in zip(result.txts, result.boxes)
68
+ )
69
+ self.img_cache[img_hash] = detections
70
+ return detections
@@ -1,112 +1,112 @@
1
- Global:
2
- text_score: 0.5
3
-
4
- use_det: true
5
- use_cls: false
6
- use_angle_cls: false
7
- use_rec: true
8
-
9
- min_height: 30
10
- width_height_ratio: 8
11
- max_side_len: 2000
12
- min_side_len: 30
13
-
14
- return_word_box: false
15
- return_single_char_box: false
16
-
17
- font_path: null
18
-
19
- EngineConfig:
20
- onnxruntime:
21
- intra_op_num_threads: -1
22
- inter_op_num_threads: -1
23
- enable_cpu_mem_arena: false
24
-
25
- cpu_ep_cfg:
26
- arena_extend_strategy: "kSameAsRequested"
27
-
28
- use_cuda: false
29
- cuda_ep_cfg:
30
- device_id: 0
31
- arena_extend_strategy: "kNextPowerOfTwo"
32
- cudnn_conv_algo_search: "EXHAUSTIVE"
33
- do_copy_in_default_stream: true
34
-
35
- use_dml: false
36
- dm_ep_cfg: null
37
-
38
- use_cann: false
39
- cann_ep_cfg:
40
- device_id: 0
41
- arena_extend_strategy: "kNextPowerOfTwo"
42
- npu_mem_limit: 21474836480 # 20 * 1024 * 1024 * 1024
43
- op_select_impl_mode: "high_performance"
44
- optypelist_for_implmode: "Gelu"
45
- enable_cann_graph: true
46
-
47
- openvino:
48
- inference_num_threads: -1
49
-
50
- paddle:
51
- cpu_math_library_num_threads: -1
52
- use_cuda: false
53
- gpu_id: 0
54
- gpu_mem: 500
55
-
56
- torch:
57
- use_cuda: false
58
- gpu_id: 0
59
-
60
- Det:
61
- engine_type: "onnxruntime"
62
- lang_type: "en"
63
- model_type: "mobile"
64
- ocr_version: "PP-OCRv4"
65
-
66
- task_type: "det"
67
-
68
- model_path: null
69
- model_dir: null
70
-
71
- limit_side_len: 736
72
- limit_type: min
73
- std: [ 0.5, 0.5, 0.5 ]
74
- mean: [ 0.5, 0.5, 0.5 ]
75
-
76
- thresh: 0.3
77
- box_thresh: 0.5
78
- max_candidates: 1000
79
- unclip_ratio: 1.6
80
- use_dilation: true
81
- score_mode: fast
82
-
83
- Cls:
84
- engine_type: "onnxruntime"
85
- lang_type: "ch"
86
- model_type: "mobile"
87
- ocr_version: "PP-OCRv4"
88
-
89
- task_type: "cls"
90
-
91
- model_path: null
92
- model_dir: null
93
-
94
- cls_image_shape: [3, 48, 192]
95
- cls_batch_num: 6
96
- cls_thresh: 0.9
97
- label_list: ["0", "180"]
98
-
99
- Rec:
100
- engine_type: "onnxruntime"
101
- lang_type: "en"
102
- model_type: "mobile"
103
- ocr_version: "PP-OCRv4"
104
-
105
- task_type: "rec"
106
-
107
- model_path: null
108
- model_dir: null
109
-
110
- rec_keys_path: null
111
- rec_img_shape: [3, 48, 320]
112
- rec_batch_num: 6
1
+ Global:
2
+ text_score: 0.5
3
+
4
+ use_det: true
5
+ use_cls: false
6
+ use_angle_cls: false
7
+ use_rec: true
8
+
9
+ min_height: 30
10
+ width_height_ratio: 8
11
+ max_side_len: 2000
12
+ min_side_len: 30
13
+
14
+ return_word_box: false
15
+ return_single_char_box: false
16
+
17
+ font_path: null
18
+
19
+ EngineConfig:
20
+ onnxruntime:
21
+ intra_op_num_threads: -1
22
+ inter_op_num_threads: -1
23
+ enable_cpu_mem_arena: false
24
+
25
+ cpu_ep_cfg:
26
+ arena_extend_strategy: "kSameAsRequested"
27
+
28
+ use_cuda: false
29
+ cuda_ep_cfg:
30
+ device_id: 0
31
+ arena_extend_strategy: "kNextPowerOfTwo"
32
+ cudnn_conv_algo_search: "EXHAUSTIVE"
33
+ do_copy_in_default_stream: true
34
+
35
+ use_dml: false
36
+ dm_ep_cfg: null
37
+
38
+ use_cann: false
39
+ cann_ep_cfg:
40
+ device_id: 0
41
+ arena_extend_strategy: "kNextPowerOfTwo"
42
+ npu_mem_limit: 21474836480 # 20 * 1024 * 1024 * 1024
43
+ op_select_impl_mode: "high_performance"
44
+ optypelist_for_implmode: "Gelu"
45
+ enable_cann_graph: true
46
+
47
+ openvino:
48
+ inference_num_threads: -1
49
+
50
+ paddle:
51
+ cpu_math_library_num_threads: -1
52
+ use_cuda: false
53
+ gpu_id: 0
54
+ gpu_mem: 500
55
+
56
+ torch:
57
+ use_cuda: false
58
+ gpu_id: 0
59
+
60
+ Det:
61
+ engine_type: "onnxruntime"
62
+ lang_type: "en"
63
+ model_type: "mobile"
64
+ ocr_version: "PP-OCRv4"
65
+
66
+ task_type: "det"
67
+
68
+ model_path: null
69
+ model_dir: null
70
+
71
+ limit_side_len: 736
72
+ limit_type: min
73
+ std: [ 0.5, 0.5, 0.5 ]
74
+ mean: [ 0.5, 0.5, 0.5 ]
75
+
76
+ thresh: 0.3
77
+ box_thresh: 0.5
78
+ max_candidates: 1000
79
+ unclip_ratio: 1.6
80
+ use_dilation: true
81
+ score_mode: fast
82
+
83
+ Cls:
84
+ engine_type: "onnxruntime"
85
+ lang_type: "ch"
86
+ model_type: "mobile"
87
+ ocr_version: "PP-OCRv4"
88
+
89
+ task_type: "cls"
90
+
91
+ model_path: null
92
+ model_dir: null
93
+
94
+ cls_image_shape: [3, 48, 192]
95
+ cls_batch_num: 6
96
+ cls_thresh: 0.9
97
+ label_list: ["0", "180"]
98
+
99
+ Rec:
100
+ engine_type: "onnxruntime"
101
+ lang_type: "en"
102
+ model_type: "mobile"
103
+ ocr_version: "PP-OCRv4"
104
+
105
+ task_type: "rec"
106
+
107
+ model_path: null
108
+ model_dir: null
109
+
110
+ rec_keys_path: null
111
+ rec_img_shape: [3, 48, 320]
112
+ rec_batch_num: 6
pyautoscene/references.py CHANGED
@@ -1,78 +1,78 @@
1
- from abc import ABC, abstractmethod
2
- from typing import override
3
-
4
- import pyautogui as gui
5
-
6
- from .screen import RegionSpec, generate_region_from_spec, locate_on_screen
7
-
8
-
9
- class ReferenceElement(ABC):
10
- """Base class for reference elements used to identify scenes."""
11
-
12
- @abstractmethod
13
- def is_visible(self, region: RegionSpec | None = None) -> RegionSpec | None:
14
- """Detect the presence of the reference element."""
15
- raise NotImplementedError("Subclasses must implement this method")
16
-
17
-
18
- class ImageElement(ReferenceElement):
19
- """Reference element that identifies a scene by an image."""
20
-
21
- def __init__(
22
- self,
23
- path: str | list[str],
24
- confidence: float = 0.999,
25
- region: RegionSpec | None = None,
26
- ):
27
- self.path = path
28
- self.confidence = confidence
29
- self.region = region
30
-
31
- @override
32
- def is_visible(self, region: RegionSpec | None = None):
33
- """Method to detect the presence of the image in the current screen."""
34
- if isinstance(self.path, str):
35
- path = [self.path] # Ensure path is a list for consistency
36
- else:
37
- path = self.path
38
- for image_path in path:
39
- try:
40
- location = locate_on_screen(
41
- image_path, region=region or self.region, confidence=self.confidence
42
- )
43
- return location
44
- except gui.ImageNotFoundException:
45
- continue
46
-
47
-
48
- class TextElement(ReferenceElement):
49
- """Reference element that identifies a scene by text."""
50
-
51
- def __init__(
52
- self,
53
- text: str,
54
- region: RegionSpec | None = None,
55
- case_sensitive: bool = False,
56
- ):
57
- self.text = text
58
- self.region = region
59
- self.case_sensitive = case_sensitive
60
- if not case_sensitive:
61
- self.text = self.text.lower()
62
-
63
- def is_visible(self, region: RegionSpec | None = None):
64
- """Method to detect the presence of the text in the current screen."""
65
- from .ocr import OCR
66
-
67
- ocr = OCR()
68
- region = region or self.region
69
- for text, detected_region in ocr.recognize_text(
70
- gui.screenshot(
71
- region=generate_region_from_spec(region).to_box() if region else None
72
- )
73
- ):
74
- if not self.case_sensitive:
75
- text = text.lower()
76
- if text.strip() == self.text.strip():
77
- return detected_region
78
- return None
1
+ from abc import ABC, abstractmethod
2
+ from typing import override
3
+
4
+ import pyautogui as gui
5
+
6
+ from .screen import RegionSpec, generate_region_from_spec, locate_on_screen
7
+
8
+
9
+ class ReferenceElement(ABC):
10
+ """Base class for reference elements used to identify scenes."""
11
+
12
+ @abstractmethod
13
+ def is_visible(self, region: RegionSpec | None = None) -> RegionSpec | None:
14
+ """Detect the presence of the reference element."""
15
+ raise NotImplementedError("Subclasses must implement this method")
16
+
17
+
18
+ class ImageElement(ReferenceElement):
19
+ """Reference element that identifies a scene by an image."""
20
+
21
+ def __init__(
22
+ self,
23
+ path: str | list[str],
24
+ confidence: float = 0.999,
25
+ region: RegionSpec | None = None,
26
+ ):
27
+ self.path = path
28
+ self.confidence = confidence
29
+ self.region = region
30
+
31
+ @override
32
+ def is_visible(self, region: RegionSpec | None = None):
33
+ """Method to detect the presence of the image in the current screen."""
34
+ if isinstance(self.path, str):
35
+ path = [self.path] # Ensure path is a list for consistency
36
+ else:
37
+ path = self.path
38
+ for image_path in path:
39
+ try:
40
+ location = locate_on_screen(
41
+ image_path, region=region or self.region, confidence=self.confidence
42
+ )
43
+ return location
44
+ except gui.ImageNotFoundException:
45
+ continue
46
+
47
+
48
+ class TextElement(ReferenceElement):
49
+ """Reference element that identifies a scene by text."""
50
+
51
+ def __init__(
52
+ self,
53
+ text: str,
54
+ region: RegionSpec | None = None,
55
+ case_sensitive: bool = False,
56
+ ):
57
+ self.text = text
58
+ self.region = region
59
+ self.case_sensitive = case_sensitive
60
+ if not case_sensitive:
61
+ self.text = self.text.lower()
62
+
63
+ def is_visible(self, region: RegionSpec | None = None):
64
+ """Method to detect the presence of the text in the current screen."""
65
+ from .ocr import OCR
66
+
67
+ ocr = OCR()
68
+ region = region or self.region
69
+ for text, detected_region in ocr.recognize_text(
70
+ gui.screenshot(
71
+ region=generate_region_from_spec(region).to_box() if region else None
72
+ )
73
+ ):
74
+ if not self.case_sensitive:
75
+ text = text.lower()
76
+ if text.strip() == self.text.strip():
77
+ return detected_region
78
+ return None
pyautoscene/scene.py CHANGED
@@ -1,61 +1,61 @@
1
- from __future__ import annotations
2
-
3
- from typing import Callable, TypedDict
4
-
5
- from statemachine import State
6
-
7
- from pyautoscene.utils import is_valid_variable_name
8
-
9
- from .references import ReferenceElement
10
- from .screen import Region
11
-
12
-
13
- class ActionInfo(TypedDict):
14
- """Type definition for action information in a scene."""
15
-
16
- action: Callable[..., None]
17
- transitions_to: Scene | None
18
-
19
-
20
- class Scene(State):
21
- """A scene represents a state in the GUI automation state machine."""
22
-
23
- def __init__(
24
- self,
25
- name: str,
26
- elements: list[ReferenceElement] | None = None,
27
- initial: bool = False,
28
- ):
29
- assert is_valid_variable_name(name), (
30
- f"Invalid scene name: {name}, must be a valid Python identifier."
31
- )
32
- super().__init__(name, initial=initial)
33
- self.elements = elements or []
34
- self.actions: dict[str, ActionInfo] = {}
35
-
36
- def action(self, transitions_to: Scene | None = None):
37
- """Decorator to register an action for this scene."""
38
-
39
- def decorator(func: Callable[..., None]) -> Callable[..., None]:
40
- if func.__name__ not in self.actions:
41
- action_name = func.__name__
42
- self.actions[action_name] = {
43
- "action": func,
44
- "transitions_to": transitions_to,
45
- }
46
- return func
47
-
48
- return decorator
49
-
50
- def get_action(self, action_name: str) -> ActionInfo | None:
51
- """Get an action by name."""
52
- return self.actions.get(action_name)
53
-
54
- def is_on_screen(self, region: Region | None = None) -> bool:
55
- """Check if any reference element is currently on screen."""
56
- # TODO: Refactor after text recognition is implemented
57
- # elements = (elem for elem in self.elements if isinstance(elem, ReferenceImage))
58
- return all(elem.is_visible(region) for elem in self.elements)
59
-
60
- def __repr__(self):
61
- return f"Scene({self.name!r}, elements={len(self.elements)})"
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable, TypedDict
4
+
5
+ from statemachine import State
6
+
7
+ from pyautoscene.utils import is_valid_variable_name
8
+
9
+ from .references import ReferenceElement
10
+ from .screen import Region
11
+
12
+
13
+ class ActionInfo(TypedDict):
14
+ """Type definition for action information in a scene."""
15
+
16
+ action: Callable[..., None]
17
+ transitions_to: Scene | None
18
+
19
+
20
+ class Scene(State):
21
+ """A scene represents a state in the GUI automation state machine."""
22
+
23
+ def __init__(
24
+ self,
25
+ name: str,
26
+ elements: list[ReferenceElement] | None = None,
27
+ initial: bool = False,
28
+ ):
29
+ assert is_valid_variable_name(name), (
30
+ f"Invalid scene name: {name}, must be a valid Python identifier."
31
+ )
32
+ super().__init__(name, initial=initial)
33
+ self.elements = elements or []
34
+ self.actions: dict[str, ActionInfo] = {}
35
+
36
+ def action(self, transitions_to: Scene | None = None):
37
+ """Decorator to register an action for this scene."""
38
+
39
+ def decorator(func: Callable[..., None]) -> Callable[..., None]:
40
+ if func.__name__ not in self.actions:
41
+ action_name = func.__name__
42
+ self.actions[action_name] = {
43
+ "action": func,
44
+ "transitions_to": transitions_to,
45
+ }
46
+ return func
47
+
48
+ return decorator
49
+
50
+ def get_action(self, action_name: str) -> ActionInfo | None:
51
+ """Get an action by name."""
52
+ return self.actions.get(action_name)
53
+
54
+ def is_on_screen(self, region: Region | None = None) -> bool:
55
+ """Check if any reference element is currently on screen."""
56
+ # TODO: Refactor after text recognition is implemented
57
+ # elements = (elem for elem in self.elements if isinstance(elem, ReferenceImage))
58
+ return all(elem.is_visible(region) for elem in self.elements)
59
+
60
+ def __repr__(self):
61
+ return f"Scene({self.name!r}, elements={len(self.elements)})"