GameSentenceMiner 2.8.26__py3-none-any.whl → 2.8.28__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- GameSentenceMiner/ai/ai_prompting.py +3 -3
- GameSentenceMiner/anki.py +14 -7
- GameSentenceMiner/ffmpeg.py +17 -25
- GameSentenceMiner/gsm.py +25 -17
- GameSentenceMiner/ocr/owocr_helper.py +1 -1
- GameSentenceMiner/owocr/owocr/config.py +25 -13
- GameSentenceMiner/owocr/owocr/ocr.py +103 -95
- GameSentenceMiner/owocr/owocr/run.py +602 -598
- GameSentenceMiner/owocr/owocr/screen_coordinate_picker.py +3 -2
- GameSentenceMiner/vad/result.py +8 -0
- GameSentenceMiner/vad/silero_trim.py +3 -4
- GameSentenceMiner/vad/vosk_helper.py +5 -5
- GameSentenceMiner/vad/whisper_helper.py +5 -5
- {gamesentenceminer-2.8.26.dist-info → gamesentenceminer-2.8.28.dist-info}/METADATA +1 -1
- {gamesentenceminer-2.8.26.dist-info → gamesentenceminer-2.8.28.dist-info}/RECORD +19 -18
- {gamesentenceminer-2.8.26.dist-info → gamesentenceminer-2.8.28.dist-info}/WHEEL +0 -0
- {gamesentenceminer-2.8.26.dist-info → gamesentenceminer-2.8.28.dist-info}/entry_points.txt +0 -0
- {gamesentenceminer-2.8.26.dist-info → gamesentenceminer-2.8.28.dist-info}/licenses/LICENSE +0 -0
- {gamesentenceminer-2.8.26.dist-info → gamesentenceminer-2.8.28.dist-info}/top_level.txt +0 -0
@@ -96,6 +96,22 @@ def post_process(text):
|
|
96
96
|
return text
|
97
97
|
|
98
98
|
|
99
|
+
def input_to_pil_image(img):
|
100
|
+
if isinstance(img, Image.Image):
|
101
|
+
pil_image = img
|
102
|
+
elif isinstance(img, (bytes, bytearray)):
|
103
|
+
pil_image = Image.open(io.BytesIO(img))
|
104
|
+
elif isinstance(img, Path):
|
105
|
+
try:
|
106
|
+
pil_image = Image.open(img)
|
107
|
+
pil_image.load()
|
108
|
+
except (UnidentifiedImageError, OSError) as e:
|
109
|
+
return None
|
110
|
+
else:
|
111
|
+
raise ValueError(f'img must be a path, PIL.Image or bytes object, instead got: {img}')
|
112
|
+
return pil_image
|
113
|
+
|
114
|
+
|
99
115
|
def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
|
100
116
|
if img_format == 'png' and optimized_png_encode and not optimize:
|
101
117
|
raw_data = img.convert('RGBA').tobytes()
|
@@ -157,15 +173,14 @@ class MangaOcr:
|
|
157
173
|
self.available = True
|
158
174
|
logger.info('Manga OCR ready')
|
159
175
|
|
160
|
-
def __call__(self,
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
img = img_or_path
|
165
|
-
else:
|
166
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
176
|
+
def __call__(self, img):
|
177
|
+
img = input_to_pil_image(img)
|
178
|
+
if not img:
|
179
|
+
return (False, 'Invalid image provided')
|
167
180
|
|
168
181
|
x = (True, self.model(img))
|
182
|
+
|
183
|
+
# img.close()
|
169
184
|
return x
|
170
185
|
|
171
186
|
class GoogleVision:
|
@@ -188,13 +203,10 @@ class GoogleVision:
|
|
188
203
|
except:
|
189
204
|
logger.warning('Error parsing Google credentials, Google Vision will not work!')
|
190
205
|
|
191
|
-
def __call__(self,
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
img = img_or_path
|
196
|
-
else:
|
197
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
206
|
+
def __call__(self, img):
|
207
|
+
img = input_to_pil_image(img)
|
208
|
+
if not img:
|
209
|
+
return (False, 'Invalid image provided')
|
198
210
|
|
199
211
|
image_bytes = self._preprocess(img)
|
200
212
|
image = vision.Image(content=image_bytes)
|
@@ -207,6 +219,8 @@ class GoogleVision:
|
|
207
219
|
texts = response.text_annotations
|
208
220
|
res = texts[0].description if len(texts) > 0 else ''
|
209
221
|
x = (True, res)
|
222
|
+
|
223
|
+
# img.close()
|
210
224
|
return x
|
211
225
|
|
212
226
|
def _preprocess(self, img):
|
@@ -225,13 +239,10 @@ class GoogleLens:
|
|
225
239
|
self.available = True
|
226
240
|
logger.info('Google Lens ready')
|
227
241
|
|
228
|
-
def __call__(self,
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
img = img_or_path
|
233
|
-
else:
|
234
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
242
|
+
def __call__(self, img):
|
243
|
+
img = input_to_pil_image(img)
|
244
|
+
if not img:
|
245
|
+
return (False, 'Invalid image provided')
|
235
246
|
|
236
247
|
request = LensOverlayServerRequest()
|
237
248
|
|
@@ -298,6 +309,8 @@ class GoogleLens:
|
|
298
309
|
res += '\n'
|
299
310
|
|
300
311
|
x = (True, res)
|
312
|
+
|
313
|
+
# img.close()
|
301
314
|
return x
|
302
315
|
|
303
316
|
def _preprocess(self, img):
|
@@ -305,7 +318,9 @@ class GoogleLens:
|
|
305
318
|
aspect_ratio = img.width / img.height
|
306
319
|
new_w = int(sqrt(3000000 * aspect_ratio))
|
307
320
|
new_h = int(new_w / aspect_ratio)
|
308
|
-
|
321
|
+
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
322
|
+
# img.close()
|
323
|
+
img = img_resized
|
309
324
|
|
310
325
|
return (pil_image_to_bytes(img), img.width, img.height)
|
311
326
|
|
@@ -323,13 +338,10 @@ class GoogleLensWeb:
|
|
323
338
|
self.available = True
|
324
339
|
logger.info('Google Lens (web) ready')
|
325
340
|
|
326
|
-
def __call__(self,
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
img = img_or_path
|
331
|
-
else:
|
332
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
341
|
+
def __call__(self, img):
|
342
|
+
img = input_to_pil_image(img)
|
343
|
+
if not img:
|
344
|
+
return (False, 'Invalid image provided')
|
333
345
|
|
334
346
|
url = 'https://lens.google.com/v3/upload'
|
335
347
|
files = {'encoded_image': ('image.png', self._preprocess(img), 'image/png')}
|
@@ -393,6 +405,8 @@ class GoogleLensWeb:
|
|
393
405
|
res += '\n'
|
394
406
|
|
395
407
|
x = (True, res)
|
408
|
+
|
409
|
+
# img.close()
|
396
410
|
return x
|
397
411
|
|
398
412
|
def _preprocess(self, img):
|
@@ -400,7 +414,9 @@ class GoogleLensWeb:
|
|
400
414
|
aspect_ratio = img.width / img.height
|
401
415
|
new_w = int(sqrt(3000000 * aspect_ratio))
|
402
416
|
new_h = int(new_w / aspect_ratio)
|
403
|
-
|
417
|
+
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
418
|
+
# img.close()
|
419
|
+
img = img_resized
|
404
420
|
|
405
421
|
return pil_image_to_bytes(img)
|
406
422
|
|
@@ -415,13 +431,10 @@ class Bing:
|
|
415
431
|
self.available = True
|
416
432
|
logger.info('Bing ready')
|
417
433
|
|
418
|
-
def __call__(self,
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
img = img_or_path
|
423
|
-
else:
|
424
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
434
|
+
def __call__(self, img):
|
435
|
+
img = input_to_pil_image(img)
|
436
|
+
if not img:
|
437
|
+
return (False, 'Invalid image provided')
|
425
438
|
|
426
439
|
img_bytes = self._preprocess(img)
|
427
440
|
if not img_bytes:
|
@@ -515,6 +528,8 @@ class Bing:
|
|
515
528
|
res += line['text'] + '\n'
|
516
529
|
|
517
530
|
x = (True, res)
|
531
|
+
|
532
|
+
# img.close()
|
518
533
|
return x
|
519
534
|
|
520
535
|
def _preprocess(self, img):
|
@@ -526,7 +541,9 @@ class Bing:
|
|
526
541
|
resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height)
|
527
542
|
new_w = int(img.width * resize_factor)
|
528
543
|
new_h = int(img.height * resize_factor)
|
529
|
-
|
544
|
+
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
545
|
+
# img.close()
|
546
|
+
img = img_resized
|
530
547
|
|
531
548
|
img_bytes, _ = limit_image_size(img, max_byte_size)
|
532
549
|
|
@@ -550,13 +567,10 @@ class AppleVision:
|
|
550
567
|
self.available = True
|
551
568
|
logger.info('Apple Vision ready')
|
552
569
|
|
553
|
-
def __call__(self,
|
554
|
-
|
555
|
-
|
556
|
-
|
557
|
-
img = img_or_path
|
558
|
-
else:
|
559
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
570
|
+
def __call__(self, img):
|
571
|
+
img = input_to_pil_image(img)
|
572
|
+
if not img:
|
573
|
+
return (False, 'Invalid image provided')
|
560
574
|
|
561
575
|
with objc.autorelease_pool():
|
562
576
|
req = Vision.VNRecognizeTextRequest.alloc().init()
|
@@ -579,6 +593,7 @@ class AppleVision:
|
|
579
593
|
else:
|
580
594
|
x = (False, 'Unknown error!')
|
581
595
|
|
596
|
+
# img.close()
|
582
597
|
return x
|
583
598
|
|
584
599
|
def _preprocess(self, img):
|
@@ -631,13 +646,10 @@ class AppleLiveText:
|
|
631
646
|
self.available = True
|
632
647
|
logger.info('Apple Live Text ready')
|
633
648
|
|
634
|
-
def __call__(self,
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
img = img_or_path
|
639
|
-
else:
|
640
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
649
|
+
def __call__(self, img):
|
650
|
+
img = input_to_pil_image(img)
|
651
|
+
if not img:
|
652
|
+
return (False, 'Invalid image provided')
|
641
653
|
|
642
654
|
with objc.autorelease_pool():
|
643
655
|
analyzer = self.VKCImageAnalyzer.alloc().init()
|
@@ -691,13 +703,10 @@ class WinRTOCR:
|
|
691
703
|
except:
|
692
704
|
logger.warning('Error reading URL from config, WinRT OCR will not work!')
|
693
705
|
|
694
|
-
def __call__(self,
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
img = img_or_path
|
699
|
-
else:
|
700
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
706
|
+
def __call__(self, img):
|
707
|
+
img = input_to_pil_image(img)
|
708
|
+
if not img:
|
709
|
+
return (False, 'Invalid image provided')
|
701
710
|
|
702
711
|
if sys.platform == 'win32':
|
703
712
|
res = winocr.recognize_pil_sync(img, lang='ja')['text']
|
@@ -716,6 +725,8 @@ class WinRTOCR:
|
|
716
725
|
res = res.json()['text']
|
717
726
|
|
718
727
|
x = (True, res)
|
728
|
+
|
729
|
+
# img.close()
|
719
730
|
return x
|
720
731
|
|
721
732
|
def _preprocess(self, img):
|
@@ -749,13 +760,10 @@ class OneOCR:
|
|
749
760
|
except:
|
750
761
|
logger.warning('Error reading URL from config, OneOCR will not work!')
|
751
762
|
|
752
|
-
def __call__(self,
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
img = img_or_path
|
757
|
-
else:
|
758
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
763
|
+
def __call__(self, img):
|
764
|
+
img = input_to_pil_image(img)
|
765
|
+
if not img:
|
766
|
+
return (False, 'Invalid image provided')
|
759
767
|
|
760
768
|
if sys.platform == 'win32':
|
761
769
|
try:
|
@@ -779,6 +787,8 @@ class OneOCR:
|
|
779
787
|
res = res.json()['text']
|
780
788
|
|
781
789
|
x = (True, res)
|
790
|
+
|
791
|
+
# img.close()
|
782
792
|
return x
|
783
793
|
|
784
794
|
def _preprocess(self, img):
|
@@ -802,13 +812,10 @@ class AzureImageAnalysis:
|
|
802
812
|
except:
|
803
813
|
logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!')
|
804
814
|
|
805
|
-
def __call__(self,
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
img = img_or_path
|
810
|
-
else:
|
811
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
815
|
+
def __call__(self, img):
|
816
|
+
img = input_to_pil_image(img)
|
817
|
+
if not img:
|
818
|
+
return (False, 'Invalid image provided')
|
812
819
|
|
813
820
|
try:
|
814
821
|
read_result = self.client.analyze(image_data=self._preprocess(img), visual_features=[VisualFeatures.READ])
|
@@ -826,6 +833,8 @@ class AzureImageAnalysis:
|
|
826
833
|
return (False, 'Unknown error!')
|
827
834
|
|
828
835
|
x = (True, res)
|
836
|
+
|
837
|
+
# img.close()
|
829
838
|
return x
|
830
839
|
|
831
840
|
def _preprocess(self, img):
|
@@ -833,7 +842,9 @@ class AzureImageAnalysis:
|
|
833
842
|
resize_factor = max(50 / img.width, 50 / img.height)
|
834
843
|
new_w = int(img.width * resize_factor)
|
835
844
|
new_h = int(img.height * resize_factor)
|
836
|
-
|
845
|
+
img_resized = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
|
846
|
+
# img.close()
|
847
|
+
img = img_resized
|
837
848
|
|
838
849
|
return pil_image_to_bytes(img)
|
839
850
|
|
@@ -853,13 +864,10 @@ class EasyOCR:
|
|
853
864
|
self.available = True
|
854
865
|
logger.info('EasyOCR ready')
|
855
866
|
|
856
|
-
def __call__(self,
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
img = img_or_path
|
861
|
-
else:
|
862
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
867
|
+
def __call__(self, img):
|
868
|
+
img = input_to_pil_image(img)
|
869
|
+
if not img:
|
870
|
+
return (False, 'Invalid image provided')
|
863
871
|
|
864
872
|
res = ''
|
865
873
|
read_result = self.model.readtext(self._preprocess(img), detail=0)
|
@@ -867,6 +875,8 @@ class EasyOCR:
|
|
867
875
|
res += text + '\n'
|
868
876
|
|
869
877
|
x = (True, res)
|
878
|
+
|
879
|
+
# img.close()
|
870
880
|
return x
|
871
881
|
|
872
882
|
def _preprocess(self, img):
|
@@ -900,13 +910,10 @@ class RapidOCR:
|
|
900
910
|
self.available = True
|
901
911
|
logger.info('RapidOCR ready')
|
902
912
|
|
903
|
-
def __call__(self,
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
img = img_or_path
|
908
|
-
else:
|
909
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
913
|
+
def __call__(self, img):
|
914
|
+
img = input_to_pil_image(img)
|
915
|
+
if not img:
|
916
|
+
return (False, 'Invalid image provided')
|
910
917
|
|
911
918
|
res = ''
|
912
919
|
read_results, elapsed = self.model(self._preprocess(img))
|
@@ -915,6 +922,8 @@ class RapidOCR:
|
|
915
922
|
res += read_result[1] + '\n'
|
916
923
|
|
917
924
|
x = (True, res)
|
925
|
+
|
926
|
+
# img.close()
|
918
927
|
return x
|
919
928
|
|
920
929
|
def _preprocess(self, img):
|
@@ -935,13 +944,10 @@ class OCRSpace:
|
|
935
944
|
except:
|
936
945
|
logger.warning('Error reading API key from config, OCRSpace will not work!')
|
937
946
|
|
938
|
-
def __call__(self,
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
img = img_or_path
|
943
|
-
else:
|
944
|
-
raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
|
947
|
+
def __call__(self, img):
|
948
|
+
img = input_to_pil_image(img)
|
949
|
+
if not img:
|
950
|
+
return (False, 'Invalid image provided')
|
945
951
|
|
946
952
|
img_bytes, img_extension = self._preprocess(img)
|
947
953
|
if not img_bytes:
|
@@ -972,6 +978,8 @@ class OCRSpace:
|
|
972
978
|
|
973
979
|
res = res['ParsedResults'][0]['ParsedText']
|
974
980
|
x = (True, res)
|
981
|
+
|
982
|
+
# img.close()
|
975
983
|
return x
|
976
984
|
|
977
985
|
def _preprocess(self, img):
|