juham-watermeter 0.0.4__py3-none-any.whl → 0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,273 +1,273 @@
1
- """Optical Character Recognition based water meter
2
- Note: tested and works, but needs more work to be reliable
3
-
4
- """
5
-
6
- import json
7
- import time
8
- import cv2
9
- import numpy as np
10
- from typing import Any, Optional, Union, cast
11
- from typing_extensions import override
12
- import pytesseract # type: ignore
13
- from PIL import Image
14
- from masterpiece.mqtt import Mqtt
15
- from juham_core.timeutils import timestamp
16
- from .webcamera import WebCameraThread, WebCamera
17
-
18
-
19
- class WaterMeterThreadOCR(WebCameraThread):
20
- """Asynchronous thread for capturing and processing images of web camera."""
21
-
22
- # class attributes
23
- _watermeter_topic: str = ""
24
- _expected_image_size: int = 640 * 480
25
- _crop_x: int = 195
26
- _crop_y: int = 157
27
- _crop_width: int = 640
28
- _crop_height: int = 480
29
- _save_images: bool = True
30
- _num_digits: int = 5
31
-
32
- def __init__(self, client: Optional[Mqtt] = None):
33
- """Construct with the given mqtt client.
34
-
35
- Args:
36
- client (object, optional): MQTT client. Defaults to None.
37
- """
38
- super().__init__(client)
39
- self.mqtt_client: Optional[Mqtt] = client
40
- self.total_liter: float = 0.0
41
- self.active_liter_lpm: float = 0.0
42
- self._prev_time: float = (
43
- 0.0 # for computing momentary consumption (liters per hour)
44
- )
45
-
46
- def init_watermeter_ocr(
47
- self,
48
- topic: str,
49
- interval: float,
50
- location: str,
51
- camera: int,
52
- crop_x: int,
53
- crop_y: int,
54
- crop_width: int,
55
- crop_height: int,
56
- save_images: bool,
57
- num_digits: int,
58
- ) -> None:
59
- """Initialize the data acquisition thread
60
-
61
- Args:
62
- topic (str): mqtt topic to publish the acquired system info
63
- interval (float): update interval in seconds
64
- location (str): geographic location
65
- camera(int) : ordinal specifying the camera to be used (0, 1)
66
- crop_x, crop_y, crop_width, crop_height (int): crop box
67
- save_images (bool) : true to enable saving of captured images, for debugging
68
- num_digits (int) : number of digits in the watermeter
69
- """
70
- super().init(interval, location, camera)
71
- self._watermeter_topic = topic
72
- self._crop_x = crop_x
73
- self._crop_y = crop_y
74
- self._crop_width = crop_width
75
- self._crop_height = crop_height
76
- self._save_images = save_images
77
- self._num_digits = num_digits
78
-
79
- @override
80
- def update_interval(self) -> float:
81
- return self._interval
82
-
83
- @override
84
- def update(self) -> bool:
85
- captured_image = self.capture_image()
86
- if captured_image.size < self._expected_image_size:
87
- return False
88
- processed_image = self.process_image(captured_image)
89
- if processed_image.size < self._expected_image_size:
90
- return False
91
-
92
- value: float = self.recognize_text(processed_image)
93
- if value < self.total_liter:
94
- self.warning("Invalid watermeter reading {value} skipped")
95
-
96
- self.total_liter = value
97
- current_time: float = time.time()
98
- elapsed_seconds: float = current_time - self._prev_time
99
- self._prev_time = current_time
100
- liters_per_minute = value / (60 * elapsed_seconds)
101
- watermeter: dict[str, Union[float, str]] = {
102
- "location": self._location,
103
- "sensor": self.name,
104
- "total_liter": self.total_liter,
105
- "active_lpm": liters_per_minute,
106
- "ts": timestamp(),
107
- }
108
-
109
- msg = json.dumps(watermeter)
110
- self.publish(self._watermeter_topic, msg, qos=0, retain=False)
111
- self.debug(f"Watermeter published to {self._watermeter_topic}", msg)
112
- return True
113
-
114
- def evaluate_text(self, text: str) -> float:
115
- # make sure we got all the digits
116
- num_lines: int = len(text.splitlines())
117
- if num_lines != 2:
118
- print(f"{text} has invalid number of lines {num_lines}")
119
- return 0.0
120
- first_line = text.splitlines()[0]
121
- num_digits: int = len(first_line)
122
-
123
- if num_digits != self._num_digits:
124
- print(
125
- f"{text} has invalid number of digits {num_digits}, expected {self._num_digits}"
126
- )
127
- return 0.0
128
- try:
129
- num = float(first_line)
130
- self.debug(f"Evaluated string {first_line} as {num}")
131
- print(f"Evaluated string {first_line} as {num}")
132
- return num
133
- except ValueError:
134
- self.warning(f"Cannot evaluate string {first_line}")
135
- print(f"Cannot evaluated string {first_line}")
136
-
137
- return 0.0
138
-
139
- def recognize_text(self, greyscale_image: np.ndarray) -> float:
140
- """Recognize numerical digits from the given greyscale image.
141
-
142
- Args:
143
- greyscale_image (np.ndarray): image to be recognized
144
-
145
- Returns:
146
- float: recognized value.
147
- """
148
-
149
- # cv2.imwrite("full.jpg", greyscale_image)
150
-
151
- # Apply a mask to focus only on the digits
152
- mask = np.zeros_like(greyscale_image)
153
- cv2.rectangle(
154
- mask,
155
- (self._crop_x, self._crop_y),
156
- (self._crop_x + self._crop_width, self._crop_y + self._crop_height),
157
- 255,
158
- -1,
159
- ) # White rectangle for the ROI
160
- masked_image = cv2.bitwise_and(greyscale_image, mask)
161
- # cv2.imwrite("masked.jpg", masked_image)
162
-
163
- # Crop the ROI for OCR
164
- cropped_roi = masked_image[
165
- self._crop_y : self._crop_y + self._crop_height,
166
- self._crop_x : self._crop_x + self._crop_width,
167
- ]
168
-
169
- # Step 1: Apply a Gaussian blur to smooth out small details
170
- blurred_image = cv2.GaussianBlur(cropped_roi, (5, 5), 0)
171
-
172
- # Step 2: Apply adaptive thresholding for better OCR
173
- thresholded_image = cv2.adaptiveThreshold(
174
- blurred_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
175
- )
176
-
177
- # Step 3: Use morphological operations to remove thin vertical lines
178
- kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
179
- morph_image = cv2.morphologyEx(thresholded_image, cv2.MORPH_CLOSE, kernel)
180
-
181
- # Save the intermediate image for debugging
182
- # cv2.imwrite("preprocessed.jpg", morph_image)
183
-
184
- # Convert to PIL image for pytesseract
185
- pil_image = Image.fromarray(morph_image)
186
-
187
- # Perform OCR with digits only
188
- text = pytesseract.image_to_string(
189
- pil_image, config="--psm 6 -c tessedit_char_whitelist=0123456789"
190
- )
191
-
192
- return self.evaluate_text(text)
193
-
194
-
195
- class WaterMeterOCR(WebCamera):
196
- """Constructs a data acquisition thread for reading system status
197
- info, e.g. available disk space and publishes the data to the watermeter topic.
198
-
199
- """
200
-
201
- _WATERMETER: str = "watermeter_ocr"
202
- _WATERMETER_ATTRS: list[str] = [
203
- "topic",
204
- "update_interval",
205
- "location",
206
- "crop_x",
207
- "crop_y",
208
- "crop_width",
209
- "crop_height",
210
- "save_images",
211
- "num_digits",
212
- ]
213
-
214
- _workerThreadId: str = WaterMeterThreadOCR.get_class_id()
215
- update_interval: float = 60
216
- topic = "watermeter"
217
- location = "home"
218
- camera: int = 0
219
- crop_x: int = 0
220
- crop_y: int = 0
221
- crop_width: int = 640
222
- crop_height: int = 480
223
- save_images: bool = True
224
- num_digits: int = 5
225
-
226
- def __init__(self, name="watermeter_ocr") -> None:
227
- """Constructs system status automation object for acquiring and publishing
228
- system info e.g. available memory and CPU loads.
229
-
230
- Args:
231
- name (str, optional): name of the object.
232
- """
233
- super().__init__(name)
234
- self.worker: Optional[WaterMeterThreadOCR] = None
235
- self.watermeter_topic: str = self.make_topic_name(self.topic)
236
-
237
- @override
238
- def run(self) -> None:
239
- # create, initialize and start the asynchronous thread for acquiring forecast
240
-
241
- self.worker = cast(
242
- WaterMeterThreadOCR, self.instantiate(WaterMeterOCR._workerThreadId)
243
- )
244
- self.worker.name = self.name
245
-
246
- self.worker.init_watermeter_ocr(
247
- self.watermeter_topic,
248
- self.update_interval,
249
- self.location,
250
- self.camera,
251
- self.crop_x,
252
- self.crop_y,
253
- self.crop_width,
254
- self.crop_height,
255
- self.save_images,
256
- self.num_digits,
257
- )
258
- super().run()
259
-
260
- def to_dict(self) -> dict[str, Any]:
261
- data = super().to_dict() # Call parent class method
262
- watermeter_data = {}
263
- for attr in self._WATERMETER_ATTRS:
264
- watermeter_data[attr] = getattr(self, attr)
265
- data[self._WATERMETER] = watermeter_data
266
- return data
267
-
268
- def from_dict(self, data: dict[str, Any]) -> None:
269
- super().from_dict(data) # Call parent class method
270
- if self._WATERMETER in data:
271
- watermeter_data = data[self._WATERMETER]
272
- for attr in self._WATERMETER_ATTRS:
273
- setattr(self, attr, watermeter_data.get(attr, None))
1
+ """Optical Character Recognition based water meter
2
+ Note: tested and works, but needs more work to be reliable
3
+
4
+ """
5
+
6
+ import json
7
+ import time
8
+ import cv2
9
+ import numpy as np
10
+ from typing import Any, Optional, Union, cast
11
+ from typing_extensions import override
12
+ import pytesseract # type: ignore
13
+ from PIL import Image
14
+ from masterpiece.mqtt import Mqtt
15
+ from juham_core.timeutils import timestamp
16
+ from .webcamera import WebCameraThread, WebCamera
17
+
18
+
19
+ class WaterMeterThreadOCR(WebCameraThread):
20
+ """Asynchronous thread for capturing and processing images of web camera."""
21
+
22
+ # class attributes
23
+ _watermeter_topic: str = ""
24
+ _expected_image_size: int = 640 * 480
25
+ _crop_x: int = 195
26
+ _crop_y: int = 157
27
+ _crop_width: int = 640
28
+ _crop_height: int = 480
29
+ _save_images: bool = True
30
+ _num_digits: int = 5
31
+
32
+ def __init__(self, client: Optional[Mqtt] = None):
33
+ """Construct with the given mqtt client.
34
+
35
+ Args:
36
+ client (object, optional): MQTT client. Defaults to None.
37
+ """
38
+ super().__init__(client)
39
+ self.mqtt_client: Optional[Mqtt] = client
40
+ self.total_liter: float = 0.0
41
+ self.active_liter_lpm: float = 0.0
42
+ self._prev_time: float = (
43
+ 0.0 # for computing momentary consumption (liters per hour)
44
+ )
45
+
46
+ def init_watermeter_ocr(
47
+ self,
48
+ topic: str,
49
+ interval: float,
50
+ location: str,
51
+ camera: int,
52
+ crop_x: int,
53
+ crop_y: int,
54
+ crop_width: int,
55
+ crop_height: int,
56
+ save_images: bool,
57
+ num_digits: int,
58
+ ) -> None:
59
+ """Initialize the data acquisition thread
60
+
61
+ Args:
62
+ topic (str): mqtt topic to publish the acquired system info
63
+ interval (float): update interval in seconds
64
+ location (str): geographic location
65
+ camera(int) : ordinal specifying the camera to be used (0, 1)
66
+ crop_x, crop_y, crop_width, crop_height (int): crop box
67
+ save_images (bool) : true to enable saving of captured images, for debugging
68
+ num_digits (int) : number of digits in the watermeter
69
+ """
70
+ super().init(interval, location, camera)
71
+ self._watermeter_topic = topic
72
+ self._crop_x = crop_x
73
+ self._crop_y = crop_y
74
+ self._crop_width = crop_width
75
+ self._crop_height = crop_height
76
+ self._save_images = save_images
77
+ self._num_digits = num_digits
78
+
79
+ @override
80
+ def update_interval(self) -> float:
81
+ return self._interval
82
+
83
+ @override
84
+ def update(self) -> bool:
85
+ captured_image = self.capture_image()
86
+ if captured_image.size < self._expected_image_size:
87
+ return False
88
+ processed_image = self.process_image(captured_image)
89
+ if processed_image.size < self._expected_image_size:
90
+ return False
91
+
92
+ value: float = self.recognize_text(processed_image)
93
+ if value < self.total_liter:
94
+ self.warning("Invalid watermeter reading {value} skipped")
95
+
96
+ self.total_liter = value
97
+ current_time: float = time.time()
98
+ elapsed_seconds: float = current_time - self._prev_time
99
+ self._prev_time = current_time
100
+ liters_per_minute = value / (60 * elapsed_seconds)
101
+ watermeter: dict[str, Union[float, str]] = {
102
+ "location": self._location,
103
+ "sensor": self.name,
104
+ "total_liter": self.total_liter,
105
+ "active_lpm": liters_per_minute,
106
+ "ts": timestamp(),
107
+ }
108
+
109
+ msg = json.dumps(watermeter)
110
+ self.publish(self._watermeter_topic, msg, qos=0, retain=False)
111
+ self.debug(f"Watermeter published to {self._watermeter_topic}", msg)
112
+ return True
113
+
114
+ def evaluate_text(self, text: str) -> float:
115
+ # make sure we got all the digits
116
+ num_lines: int = len(text.splitlines())
117
+ if num_lines != 2:
118
+ print(f"{text} has invalid number of lines {num_lines}")
119
+ return 0.0
120
+ first_line = text.splitlines()[0]
121
+ num_digits: int = len(first_line)
122
+
123
+ if num_digits != self._num_digits:
124
+ print(
125
+ f"{text} has invalid number of digits {num_digits}, expected {self._num_digits}"
126
+ )
127
+ return 0.0
128
+ try:
129
+ num = float(first_line)
130
+ self.debug(f"Evaluated string {first_line} as {num}")
131
+ print(f"Evaluated string {first_line} as {num}")
132
+ return num
133
+ except ValueError:
134
+ self.warning(f"Cannot evaluate string {first_line}")
135
+ print(f"Cannot evaluated string {first_line}")
136
+
137
+ return 0.0
138
+
139
+ def recognize_text(self, greyscale_image: np.ndarray) -> float:
140
+ """Recognize numerical digits from the given greyscale image.
141
+
142
+ Args:
143
+ greyscale_image (np.ndarray): image to be recognized
144
+
145
+ Returns:
146
+ float: recognized value.
147
+ """
148
+
149
+ # cv2.imwrite("full.jpg", greyscale_image)
150
+
151
+ # Apply a mask to focus only on the digits
152
+ mask = np.zeros_like(greyscale_image)
153
+ cv2.rectangle(
154
+ mask,
155
+ (self._crop_x, self._crop_y),
156
+ (self._crop_x + self._crop_width, self._crop_y + self._crop_height),
157
+ 255,
158
+ -1,
159
+ ) # White rectangle for the ROI
160
+ masked_image = cv2.bitwise_and(greyscale_image, mask)
161
+ # cv2.imwrite("masked.jpg", masked_image)
162
+
163
+ # Crop the ROI for OCR
164
+ cropped_roi = masked_image[
165
+ self._crop_y : self._crop_y + self._crop_height,
166
+ self._crop_x : self._crop_x + self._crop_width,
167
+ ]
168
+
169
+ # Step 1: Apply a Gaussian blur to smooth out small details
170
+ blurred_image = cv2.GaussianBlur(cropped_roi, (5, 5), 0)
171
+
172
+ # Step 2: Apply adaptive thresholding for better OCR
173
+ thresholded_image = cv2.adaptiveThreshold(
174
+ blurred_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2
175
+ )
176
+
177
+ # Step 3: Use morphological operations to remove thin vertical lines
178
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
179
+ morph_image = cv2.morphologyEx(thresholded_image, cv2.MORPH_CLOSE, kernel)
180
+
181
+ # Save the intermediate image for debugging
182
+ # cv2.imwrite("preprocessed.jpg", morph_image)
183
+
184
+ # Convert to PIL image for pytesseract
185
+ pil_image = Image.fromarray(morph_image)
186
+
187
+ # Perform OCR with digits only
188
+ text = pytesseract.image_to_string(
189
+ pil_image, config="--psm 6 -c tessedit_char_whitelist=0123456789"
190
+ )
191
+
192
+ return self.evaluate_text(text)
193
+
194
+
195
+ class WaterMeterOCR(WebCamera):
196
+ """Constructs a data acquisition thread for reading system status
197
+ info, e.g. available disk space and publishes the data to the watermeter topic.
198
+
199
+ """
200
+
201
+ _WATERMETER: str = "watermeter_ocr"
202
+ _WATERMETER_ATTRS: list[str] = [
203
+ "topic",
204
+ "update_interval",
205
+ "location",
206
+ "crop_x",
207
+ "crop_y",
208
+ "crop_width",
209
+ "crop_height",
210
+ "save_images",
211
+ "num_digits",
212
+ ]
213
+
214
+ _workerThreadId: str = WaterMeterThreadOCR.get_class_id()
215
+ update_interval: float = 60
216
+ topic = "watermeter"
217
+ location = "home"
218
+ camera: int = 0
219
+ crop_x: int = 0
220
+ crop_y: int = 0
221
+ crop_width: int = 640
222
+ crop_height: int = 480
223
+ save_images: bool = True
224
+ num_digits: int = 5
225
+
226
+ def __init__(self, name="watermeter_ocr") -> None:
227
+ """Constructs system status automation object for acquiring and publishing
228
+ system info e.g. available memory and CPU loads.
229
+
230
+ Args:
231
+ name (str, optional): name of the object.
232
+ """
233
+ super().__init__(name)
234
+ self.worker: Optional[WaterMeterThreadOCR] = None
235
+ self.watermeter_topic: str = self.make_topic_name(self.topic)
236
+
237
+ @override
238
+ def run(self) -> None:
239
+ # create, initialize and start the asynchronous thread for acquiring forecast
240
+
241
+ self.worker = cast(
242
+ WaterMeterThreadOCR, self.instantiate(WaterMeterOCR._workerThreadId)
243
+ )
244
+ self.worker.name = self.name
245
+
246
+ self.worker.init_watermeter_ocr(
247
+ self.watermeter_topic,
248
+ self.update_interval,
249
+ self.location,
250
+ self.camera,
251
+ self.crop_x,
252
+ self.crop_y,
253
+ self.crop_width,
254
+ self.crop_height,
255
+ self.save_images,
256
+ self.num_digits,
257
+ )
258
+ super().run()
259
+
260
+ def to_dict(self) -> dict[str, Any]:
261
+ data = super().to_dict() # Call parent class method
262
+ watermeter_data = {}
263
+ for attr in self._WATERMETER_ATTRS:
264
+ watermeter_data[attr] = getattr(self, attr)
265
+ data[self._WATERMETER] = watermeter_data
266
+ return data
267
+
268
+ def from_dict(self, data: dict[str, Any]) -> None:
269
+ super().from_dict(data) # Call parent class method
270
+ if self._WATERMETER in data:
271
+ watermeter_data = data[self._WATERMETER]
272
+ for attr in self._WATERMETER_ATTRS:
273
+ setattr(self, attr, watermeter_data.get(attr, None))