GameSentenceMiner 2.6.4__py3-none-any.whl → 2.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,975 @@
1
+ import re
2
+ import os
3
+ import io
4
+ from pathlib import Path
5
+ import sys
6
+ import platform
7
+ import logging
8
+ from math import sqrt
9
+ import json
10
+ import base64
11
+ from urllib.parse import urlparse, parse_qs
12
+
13
+ import jaconv
14
+ import numpy as np
15
+ from PIL import Image
16
+ from loguru import logger
17
+ import requests
18
+
19
+ try:
20
+ from manga_ocr import MangaOcr as MOCR
21
+ except ImportError:
22
+ pass
23
+
24
+ try:
25
+ import Vision
26
+ import objc
27
+ from AppKit import NSData, NSImage, NSBundle
28
+ from CoreFoundation import CFRunLoopRunInMode, kCFRunLoopDefaultMode, CFRunLoopStop, CFRunLoopGetCurrent
29
+ except ImportError:
30
+ pass
31
+
32
+ try:
33
+ from google.cloud import vision
34
+ from google.oauth2 import service_account
35
+ from google.api_core.exceptions import ServiceUnavailable
36
+ except ImportError:
37
+ pass
38
+
39
+ try:
40
+ from azure.ai.vision.imageanalysis import ImageAnalysisClient
41
+ from azure.ai.vision.imageanalysis.models import VisualFeatures
42
+ from azure.core.credentials import AzureKeyCredential
43
+ from azure.core.exceptions import ServiceRequestError
44
+ except ImportError:
45
+ pass
46
+
47
+ try:
48
+ import easyocr
49
+ except ImportError:
50
+ pass
51
+
52
+ try:
53
+ from rapidocr_onnxruntime import RapidOCR as ROCR
54
+ import urllib.request
55
+ except ImportError:
56
+ pass
57
+
58
+ try:
59
+ import winocr
60
+ except ImportError:
61
+ pass
62
+
63
+ try:
64
+ import oneocr
65
+ except ImportError:
66
+ pass
67
+
68
+ try:
69
+ import pyjson5
70
+ except ImportError:
71
+ pass
72
+
73
+ try:
74
+ import betterproto
75
+ from .lens_betterproto import *
76
+ import random
77
+ except ImportError:
78
+ pass
79
+
80
+ try:
81
+ import fpng_py
82
+ optimized_png_encode = True
83
+ except:
84
+ optimized_png_encode = False
85
+
86
+
87
+ def empty_post_process(text):
88
+ return text
89
+
90
+
91
+ def post_process(text):
92
+ text = ' '.join([''.join(i.split()) for i in text.splitlines()])
93
+ text = text.replace('…', '...')
94
+ text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text)
95
+ text = jaconv.h2z(text, ascii=True, digit=True)
96
+ return text
97
+
98
+
99
+ def pil_image_to_bytes(img, img_format='png', png_compression=6, jpeg_quality=80, optimize=False):
100
+ if img_format == 'png' and optimized_png_encode and not optimize:
101
+ raw_data = img.convert('RGBA').tobytes()
102
+ image_bytes = fpng_py.fpng_encode_image_to_memory(raw_data, img.width, img.height)
103
+ else:
104
+ image_bytes = io.BytesIO()
105
+ if img_format == 'jpeg':
106
+ img = img.convert('RGB')
107
+ img.save(image_bytes, format=img_format, compress_level=png_compression, quality=jpeg_quality, optimize=optimize, subsampling=0)
108
+ image_bytes = image_bytes.getvalue()
109
+ return image_bytes
110
+
111
+
112
+ def pil_image_to_numpy_array(img):
113
+ return np.array(img.convert('RGBA'))
114
+
115
+
116
+ def limit_image_size(img, max_size):
117
+ img_bytes = pil_image_to_bytes(img)
118
+ if len(img_bytes) <= max_size:
119
+ return img_bytes, 'png'
120
+
121
+ scaling_factor = 0.60 if any(x > 2000 for x in img.size) else 0.75
122
+ new_w = int(img.width * scaling_factor)
123
+ new_h = int(img.height * scaling_factor)
124
+ resized_img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
125
+ resized_img_bytes = pil_image_to_bytes(resized_img)
126
+ if len(resized_img_bytes) <= max_size:
127
+ return resized_img_bytes, 'png'
128
+
129
+ for _ in range(2):
130
+ jpeg_quality = 80
131
+ while jpeg_quality >= 60:
132
+ img_bytes = pil_image_to_bytes(img, 'jpeg', jpeg_quality=jpeg_quality, optimize=True)
133
+ if len(img_bytes) <= max_size:
134
+ return img_bytes, 'jpeg'
135
+ jpeg_quality -= 5
136
+ img = resized_img
137
+
138
+ return False, ''
139
+
140
+
141
+ class MangaOcr:
142
+ name = 'mangaocr'
143
+ readable_name = 'Manga OCR'
144
+ key = 'm'
145
+ available = False
146
+
147
+ def __init__(self, config={'pretrained_model_name_or_path':'kha-white/manga-ocr-base','force_cpu': False}):
148
+ if 'manga_ocr' not in sys.modules:
149
+ logger.warning('manga-ocr not available, Manga OCR will not work!')
150
+ else:
151
+ logger.disable('manga_ocr')
152
+ logging.getLogger('transformers').setLevel(logging.ERROR) # silence transformers >=4.46 warnings
153
+ from manga_ocr import ocr
154
+ ocr.post_process = empty_post_process
155
+ logger.info(f'Loading Manga OCR model')
156
+ self.model = MOCR(config['pretrained_model_name_or_path'], config['force_cpu'])
157
+ self.available = True
158
+ logger.info('Manga OCR ready')
159
+
160
+ def __call__(self, img_or_path):
161
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
162
+ img = Image.open(img_or_path)
163
+ elif isinstance(img_or_path, Image.Image):
164
+ img = img_or_path
165
+ else:
166
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
167
+
168
+ x = (True, self.model(img))
169
+ return x
170
+
171
+ class GoogleVision:
172
+ name = 'gvision'
173
+ readable_name = 'Google Vision'
174
+ key = 'g'
175
+ available = False
176
+
177
+ def __init__(self):
178
+ if 'google.cloud' not in sys.modules:
179
+ logger.warning('google-cloud-vision not available, Google Vision will not work!')
180
+ else:
181
+ logger.info(f'Parsing Google credentials')
182
+ google_credentials_file = os.path.join(os.path.expanduser('~'),'.config','google_vision.json')
183
+ try:
184
+ google_credentials = service_account.Credentials.from_service_account_file(google_credentials_file)
185
+ self.client = vision.ImageAnnotatorClient(credentials=google_credentials)
186
+ self.available = True
187
+ logger.info('Google Vision ready')
188
+ except:
189
+ logger.warning('Error parsing Google credentials, Google Vision will not work!')
190
+
191
+ def __call__(self, img_or_path):
192
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
193
+ img = Image.open(img_or_path)
194
+ elif isinstance(img_or_path, Image.Image):
195
+ img = img_or_path
196
+ else:
197
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
198
+
199
+ image_bytes = self._preprocess(img)
200
+ image = vision.Image(content=image_bytes)
201
+ try:
202
+ response = self.client.text_detection(image=image)
203
+ except ServiceUnavailable:
204
+ return (False, 'Connection error!')
205
+ except:
206
+ return (False, 'Unknown error!')
207
+ texts = response.text_annotations
208
+ res = texts[0].description if len(texts) > 0 else ''
209
+ x = (True, res)
210
+ return x
211
+
212
+ def _preprocess(self, img):
213
+ return pil_image_to_bytes(img)
214
+
215
+ class GoogleLens:
216
+ name = 'glens'
217
+ readable_name = 'Google Lens'
218
+ key = 'l'
219
+ available = False
220
+
221
+ def __init__(self):
222
+ if 'betterproto' not in sys.modules:
223
+ logger.warning('betterproto not available, Google Lens will not work!')
224
+ else:
225
+ self.available = True
226
+ logger.info('Google Lens ready')
227
+
228
+ def __call__(self, img_or_path):
229
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
230
+ img = Image.open(img_or_path)
231
+ elif isinstance(img_or_path, Image.Image):
232
+ img = img_or_path
233
+ else:
234
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
235
+
236
+ request = LensOverlayServerRequest()
237
+
238
+ request.objects_request.request_context.request_id.uuid = random.randint(0, 2**64 - 1)
239
+ request.objects_request.request_context.request_id.sequence_id = 0
240
+ request.objects_request.request_context.request_id.image_sequence_id = 0
241
+ request.objects_request.request_context.request_id.analytics_id = random.randbytes(16)
242
+ request.objects_request.request_context.request_id.routing_info = LensOverlayRoutingInfo()
243
+
244
+ request.objects_request.request_context.client_context.platform = Platform.WEB
245
+ request.objects_request.request_context.client_context.surface = Surface.CHROMIUM
246
+
247
+ request.objects_request.request_context.client_context.locale_context.language = 'ja'
248
+ request.objects_request.request_context.client_context.locale_context.region = 'Asia/Tokyo'
249
+ request.objects_request.request_context.client_context.locale_context.time_zone = '' # not set by chromium
250
+
251
+ request.objects_request.request_context.client_context.app_id = '' # not set by chromium
252
+
253
+ filter = AppliedFilter()
254
+ filter.filter_type = LensOverlayFilterType.AUTO_FILTER
255
+ request.objects_request.request_context.client_context.client_filters.filter.append(filter)
256
+
257
+ image_data = self._preprocess(img)
258
+ request.objects_request.image_data.payload.image_bytes = image_data[0]
259
+ request.objects_request.image_data.image_metadata.width = image_data[1]
260
+ request.objects_request.image_data.image_metadata.height = image_data[2]
261
+
262
+ payload = request.SerializeToString()
263
+
264
+ headers = {
265
+ 'Host': 'lensfrontend-pa.googleapis.com',
266
+ 'Connection': 'keep-alive',
267
+ 'Content-Type': 'application/x-protobuf',
268
+ 'X-Goog-Api-Key': 'AIzaSyDr2UxVnv_U85AbhhY8XSHSIavUW0DC-sY',
269
+ 'Sec-Fetch-Site': 'none',
270
+ 'Sec-Fetch-Mode': 'no-cors',
271
+ 'Sec-Fetch-Dest': 'empty',
272
+ 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
273
+ 'Accept-Encoding': 'gzip, deflate, br, zstd',
274
+ 'Accept-Language': 'ja-JP;q=0.6,ja;q=0.5'
275
+ }
276
+
277
+ try:
278
+ res = requests.post('https://lensfrontend-pa.googleapis.com/v1/crupload', data=payload, headers=headers, timeout=20)
279
+ except requests.exceptions.Timeout:
280
+ return (False, 'Request timeout!')
281
+ except requests.exceptions.ConnectionError:
282
+ return (False, 'Connection error!')
283
+
284
+ if res.status_code != 200:
285
+ return (False, 'Unknown error!')
286
+
287
+ response_proto = LensOverlayServerResponse().FromString(res.content)
288
+ response_dict = response_proto.to_dict(betterproto.Casing.SNAKE)
289
+
290
+ res = ''
291
+ text = response_dict['objects_response']['text']
292
+ if 'text_layout' in text:
293
+ paragraphs = text['text_layout']['paragraphs']
294
+ for paragraph in paragraphs:
295
+ for line in paragraph['lines']:
296
+ for word in line['words']:
297
+ res += word['plain_text'] + word['text_separator']
298
+ res += '\n'
299
+
300
+ x = (True, res)
301
+ return x
302
+
303
+ def _preprocess(self, img):
304
+ if img.width * img.height > 3000000:
305
+ aspect_ratio = img.width / img.height
306
+ new_w = int(sqrt(3000000 * aspect_ratio))
307
+ new_h = int(new_w / aspect_ratio)
308
+ img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
309
+
310
+ return (pil_image_to_bytes(img), img.width, img.height)
311
+
312
+ class GoogleLensWeb:
313
+ name = 'glensweb'
314
+ readable_name = 'Google Lens (web)'
315
+ key = 'k'
316
+ available = False
317
+
318
+ def __init__(self):
319
+ if 'pyjson5' not in sys.modules:
320
+ logger.warning('pyjson5 not available, Google Lens (web) will not work!')
321
+ else:
322
+ self.requests_session = requests.Session()
323
+ self.available = True
324
+ logger.info('Google Lens (web) ready')
325
+
326
+ def __call__(self, img_or_path):
327
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
328
+ img = Image.open(img_or_path)
329
+ elif isinstance(img_or_path, Image.Image):
330
+ img = img_or_path
331
+ else:
332
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
333
+
334
+ url = 'https://lens.google.com/v3/upload'
335
+ files = {'encoded_image': ('image.png', self._preprocess(img), 'image/png')}
336
+ headers = {
337
+ 'Host': 'lens.google.com',
338
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0',
339
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
340
+ 'Accept-Language': 'ja-JP;q=0.6,ja;q=0.5',
341
+ 'Accept-Encoding': 'gzip, deflate, br, zstd',
342
+ 'Referer': 'https://www.google.com/',
343
+ 'Origin': 'https://www.google.com',
344
+ 'Alt-Used': 'lens.google.com',
345
+ 'Connection': 'keep-alive',
346
+ 'Upgrade-Insecure-Requests': '1',
347
+ 'Sec-Fetch-Dest': 'document',
348
+ 'Sec-Fetch-Mode': 'navigate',
349
+ 'Sec-Fetch-Site': 'same-site',
350
+ 'Priority': 'u=0, i',
351
+ 'TE': 'trailers'
352
+ }
353
+ cookies = {'SOCS': 'CAESEwgDEgk0ODE3Nzk3MjQaAmVuIAEaBgiA_LyaBg'}
354
+
355
+ try:
356
+ res = self.requests_session.post(url, files=files, headers=headers, cookies=cookies, timeout=20, allow_redirects=False)
357
+ except requests.exceptions.Timeout:
358
+ return (False, 'Request timeout!')
359
+ except requests.exceptions.ConnectionError:
360
+ return (False, 'Connection error!')
361
+
362
+ if res.status_code != 303:
363
+ return (False, 'Unknown error!')
364
+
365
+ redirect_url = res.headers.get('Location')
366
+ if not redirect_url:
367
+ return (False, 'Error getting redirect URL!')
368
+
369
+ parsed_url = urlparse(redirect_url)
370
+ query_params = parse_qs(parsed_url.query)
371
+
372
+ if ('vsrid' not in query_params) or ('gsessionid' not in query_params):
373
+ return (False, 'Unknown error!')
374
+
375
+ try:
376
+ res = self.requests_session.get(f"https://lens.google.com/qfmetadata?vsrid={query_params['vsrid'][0]}&gsessionid={query_params['gsessionid'][0]}", timeout=20)
377
+ except requests.exceptions.Timeout:
378
+ return (False, 'Request timeout!')
379
+ except requests.exceptions.ConnectionError:
380
+ return (False, 'Connection error!')
381
+
382
+ if (len(res.text.splitlines()) != 3):
383
+ return (False, 'Unknown error!')
384
+
385
+ lens_object = pyjson5.loads(res.text.splitlines()[2])
386
+
387
+ res = ''
388
+ text = lens_object[0][2][0][0]
389
+ for paragraph in text:
390
+ for line in paragraph[1]:
391
+ for word in line[0]:
392
+ res += word[1] + word[2]
393
+ res += '\n'
394
+
395
+ x = (True, res)
396
+ return x
397
+
398
+ def _preprocess(self, img):
399
+ if img.width * img.height > 3000000:
400
+ aspect_ratio = img.width / img.height
401
+ new_w = int(sqrt(3000000 * aspect_ratio))
402
+ new_h = int(new_w / aspect_ratio)
403
+ img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
404
+
405
+ return pil_image_to_bytes(img)
406
+
407
+ class Bing:
408
+ name = 'bing'
409
+ readable_name = 'Bing'
410
+ key = 'b'
411
+ available = False
412
+
413
+ def __init__(self):
414
+ self.requests_session = requests.Session()
415
+ self.available = True
416
+ logger.info('Bing ready')
417
+
418
+ def __call__(self, img_or_path):
419
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
420
+ img = Image.open(img_or_path)
421
+ elif isinstance(img_or_path, Image.Image):
422
+ img = img_or_path
423
+ else:
424
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
425
+
426
+ img_bytes = self._preprocess(img)
427
+ if not img_bytes:
428
+ return (False, 'Image is too big!')
429
+
430
+ upload_url = 'https://www.bing.com/images/search?view=detailv2&iss=sbiupload'
431
+ upload_headers = {
432
+ 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
433
+ 'accept-language': 'ja-JP;q=0.6,ja;q=0.5',
434
+ 'cache-control': 'max-age=0',
435
+ 'origin': 'https://www.bing.com',
436
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0',
437
+ }
438
+ files = {
439
+ 'imgurl': (None, ''),
440
+ 'cbir': (None, 'sbi'),
441
+ 'imageBin': (None, img_bytes)
442
+ }
443
+
444
+ for _ in range(2):
445
+ api_host = urlparse(upload_url).netloc
446
+ try:
447
+ res = self.requests_session.post(upload_url, headers=upload_headers, files=files, timeout=20, allow_redirects=False)
448
+ except requests.exceptions.Timeout:
449
+ return (False, 'Request timeout!')
450
+ except requests.exceptions.ConnectionError:
451
+ return (False, 'Connection error!')
452
+
453
+ if res.status_code != 302:
454
+ return (False, 'Unknown error!')
455
+
456
+ redirect_url = res.headers.get('Location')
457
+ if not redirect_url:
458
+ return (False, 'Error getting redirect URL!')
459
+ if not redirect_url.startswith('https://'):
460
+ break
461
+ upload_url = redirect_url
462
+
463
+ parsed_url = urlparse(redirect_url)
464
+ query_params = parse_qs(parsed_url.query)
465
+
466
+ image_insights_token = query_params.get('insightsToken')
467
+ if not image_insights_token:
468
+ return (False, 'Error getting token!')
469
+ image_insights_token = image_insights_token[0]
470
+
471
+ api_url = f'https://{api_host}/images/api/custom/knowledge'
472
+ api_headers = {
473
+ 'accept': '*/*',
474
+ 'accept-language': 'ja-JP;q=0.6,ja;q=0.5',
475
+ 'origin': 'https://www.bing.com',
476
+ 'referer': f'https://www.bing.com/images/search?view=detailV2&insightstoken={image_insights_token}',
477
+ 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:136.0) Gecko/20100101 Firefox/136.0',
478
+ }
479
+ api_data_json = {
480
+ 'imageInfo': {'imageInsightsToken': image_insights_token, 'source': 'Url'},
481
+ 'knowledgeRequest': {'invokedSkills': ['OCR'], 'index': 1}
482
+ }
483
+ files = {
484
+ 'knowledgeRequest': (None, json.dumps(api_data_json), 'application/json')
485
+ }
486
+
487
+ try:
488
+ res = self.requests_session.post(api_url, headers=api_headers, files=files, timeout=20)
489
+ except requests.exceptions.Timeout:
490
+ return (False, 'Request timeout!')
491
+ except requests.exceptions.ConnectionError:
492
+ return (False, 'Connection error!')
493
+
494
+ if res.status_code != 200:
495
+ return (False, 'Unknown error!')
496
+
497
+ data = res.json()
498
+
499
+ res = ''
500
+ text_tag = None
501
+ for tag in data['tags']:
502
+ if tag.get('displayName') == '##TextRecognition':
503
+ text_tag = tag
504
+ break
505
+ if text_tag:
506
+ text_action = None
507
+ for action in text_tag['actions']:
508
+ if action.get('_type') == 'ImageKnowledge/TextRecognitionAction':
509
+ text_action = action
510
+ break
511
+ if text_action:
512
+ regions = text_action['data'].get('regions', [])
513
+ for region in regions:
514
+ for line in region.get('lines', []):
515
+ res += line['text'] + '\n'
516
+
517
+ x = (True, res)
518
+ return x
519
+
520
+ def _preprocess(self, img):
521
+ max_pixel_size = 4000
522
+ max_byte_size = 767772
523
+ res = None
524
+
525
+ if any(x > max_pixel_size for x in img.size):
526
+ resize_factor = max(max_pixel_size / img.width, max_pixel_size / img.height)
527
+ new_w = int(img.width * resize_factor)
528
+ new_h = int(img.height * resize_factor)
529
+ img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
530
+
531
+ img_bytes, _ = limit_image_size(img, max_byte_size)
532
+
533
+ if img_bytes:
534
+ res = base64.b64encode(img_bytes).decode('utf-8')
535
+
536
+ return res
537
+
538
+ class AppleVision:
539
+ name = 'avision'
540
+ readable_name = 'Apple Vision'
541
+ key = 'a'
542
+ available = False
543
+
544
+ def __init__(self):
545
+ if sys.platform != 'darwin':
546
+ logger.warning('Apple Vision is not supported on non-macOS platforms!')
547
+ elif int(platform.mac_ver()[0].split('.')[0]) < 13:
548
+ logger.warning('Apple Vision is not supported on macOS older than Ventura/13.0!')
549
+ else:
550
+ self.available = True
551
+ logger.info('Apple Vision ready')
552
+
553
+ def __call__(self, img_or_path):
554
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
555
+ img = Image.open(img_or_path)
556
+ elif isinstance(img_or_path, Image.Image):
557
+ img = img_or_path
558
+ else:
559
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
560
+
561
+ with objc.autorelease_pool():
562
+ req = Vision.VNRecognizeTextRequest.alloc().init()
563
+
564
+ req.setRevision_(Vision.VNRecognizeTextRequestRevision3)
565
+ req.setRecognitionLevel_(Vision.VNRequestTextRecognitionLevelAccurate)
566
+ req.setUsesLanguageCorrection_(True)
567
+ req.setRecognitionLanguages_(['ja','en'])
568
+
569
+ handler = Vision.VNImageRequestHandler.alloc().initWithData_options_(
570
+ self._preprocess(img), None
571
+ )
572
+
573
+ success = handler.performRequests_error_([req], None)
574
+ res = ''
575
+ if success[0]:
576
+ for result in req.results():
577
+ res += result.text() + '\n'
578
+ x = (True, res)
579
+ else:
580
+ x = (False, 'Unknown error!')
581
+
582
+ return x
583
+
584
+ def _preprocess(self, img):
585
+ return pil_image_to_bytes(img, 'tiff')
586
+
587
+
588
+ class AppleLiveText:
589
+ name = 'alivetext'
590
+ readable_name = 'Apple Live Text'
591
+ key = 'd'
592
+ available = False
593
+
594
+ def __init__(self):
595
+ if sys.platform != 'darwin':
596
+ logger.warning('Apple Live Text is not supported on non-macOS platforms!')
597
+ elif int(platform.mac_ver()[0].split('.')[0]) < 13:
598
+ logger.warning('Apple Live Text is not supported on macOS older than Ventura/13.0!')
599
+ else:
600
+ app_info = NSBundle.mainBundle().infoDictionary()
601
+ app_info['LSBackgroundOnly'] = '1'
602
+ self.VKCImageAnalyzer = objc.lookUpClass('VKCImageAnalyzer')
603
+ self.VKCImageAnalyzerRequest = objc.lookUpClass('VKCImageAnalyzerRequest')
604
+ objc.registerMetaDataForSelector(
605
+ b'VKCImageAnalyzer',
606
+ b'processRequest:progressHandler:completionHandler:',
607
+ {
608
+ 'arguments': {
609
+ 3: {
610
+ 'callable': {
611
+ 'retval': {'type': b'v'},
612
+ 'arguments': {
613
+ 0: {'type': b'^v'},
614
+ 1: {'type': b'd'},
615
+ }
616
+ }
617
+ },
618
+ 4: {
619
+ 'callable': {
620
+ 'retval': {'type': b'v'},
621
+ 'arguments': {
622
+ 0: {'type': b'^v'},
623
+ 1: {'type': b'@'},
624
+ 2: {'type': b'@'},
625
+ }
626
+ }
627
+ }
628
+ }
629
+ }
630
+ )
631
+ self.available = True
632
+ logger.info('Apple Live Text ready')
633
+
634
+ def __call__(self, img_or_path):
635
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
636
+ img = Image.open(img_or_path)
637
+ elif isinstance(img_or_path, Image.Image):
638
+ img = img_or_path
639
+ else:
640
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
641
+
642
+ with objc.autorelease_pool():
643
+ analyzer = self.VKCImageAnalyzer.alloc().init()
644
+ req = self.VKCImageAnalyzerRequest.alloc().initWithImage_requestType_(self._preprocess(img), 1) #VKAnalysisTypeText
645
+ req.setLocales_(['ja','en'])
646
+ self.result = None
647
+ analyzer.processRequest_progressHandler_completionHandler_(req, lambda progress: None, self._process)
648
+
649
+ CFRunLoopRunInMode(kCFRunLoopDefaultMode, 10.0, False)
650
+
651
+ if self.result == None:
652
+ return (False, 'Unknown error!')
653
+ return (True, self.result)
654
+
655
+ def _process(self, analysis, error):
656
+ res = ''
657
+ lines = analysis.allLines()
658
+ if lines:
659
+ for line in lines:
660
+ res += line.string() + '\n'
661
+ self.result = res
662
+ CFRunLoopStop(CFRunLoopGetCurrent())
663
+
664
+ def _preprocess(self, img):
665
+ image_bytes = pil_image_to_bytes(img, 'tiff')
666
+ ns_data = NSData.dataWithBytes_length_(image_bytes, len(image_bytes))
667
+ ns_image = NSImage.alloc().initWithData_(ns_data)
668
+ return ns_image
669
+
670
+
671
+ class WinRTOCR:
672
+ name = 'winrtocr'
673
+ readable_name = 'WinRT OCR'
674
+ key = 'w'
675
+ available = False
676
+
677
+ def __init__(self, config={}):
678
+ if sys.platform == 'win32':
679
+ if int(platform.release()) < 10:
680
+ logger.warning('WinRT OCR is not supported on Windows older than 10!')
681
+ elif 'winocr' not in sys.modules:
682
+ logger.warning('winocr not available, WinRT OCR will not work!')
683
+ else:
684
+ self.available = True
685
+ logger.info('WinRT OCR ready')
686
+ else:
687
+ try:
688
+ self.url = config['url']
689
+ self.available = True
690
+ logger.info('WinRT OCR ready')
691
+ except:
692
+ logger.warning('Error reading URL from config, WinRT OCR will not work!')
693
+
694
+ def __call__(self, img_or_path):
695
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
696
+ img = Image.open(img_or_path)
697
+ elif isinstance(img_or_path, Image.Image):
698
+ img = img_or_path
699
+ else:
700
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
701
+
702
+ if sys.platform == 'win32':
703
+ res = winocr.recognize_pil_sync(img, lang='ja')['text']
704
+ else:
705
+ params = {'lang': 'ja'}
706
+ try:
707
+ res = requests.post(self.url, params=params, data=self._preprocess(img), timeout=3)
708
+ except requests.exceptions.Timeout:
709
+ return (False, 'Request timeout!')
710
+ except requests.exceptions.ConnectionError:
711
+ return (False, 'Connection error!')
712
+
713
+ if res.status_code != 200:
714
+ return (False, 'Unknown error!')
715
+
716
+ res = res.json()['text']
717
+
718
+ x = (True, res)
719
+ return x
720
+
721
+ def _preprocess(self, img):
722
+ return pil_image_to_bytes(img, png_compression=1)
723
+
724
+ class OneOCR:
725
+ name = 'oneocr'
726
+ readable_name = 'OneOCR'
727
+ key = 'z'
728
+ available = False
729
+
730
+ def __init__(self, config={}):
731
+ if sys.platform == 'win32':
732
+ if int(platform.release()) < 10:
733
+ logger.warning('OneOCR is not supported on Windows older than 10!')
734
+ elif 'oneocr' not in sys.modules:
735
+ logger.warning('oneocr not available, OneOCR will not work!')
736
+ else:
737
+ try:
738
+ self.model = oneocr.OcrEngine()
739
+ except RuntimeError as e:
740
+ logger.warning(e + ', OneOCR will not work!')
741
+ else:
742
+ self.available = True
743
+ logger.info('OneOCR ready')
744
+ else:
745
+ try:
746
+ self.url = config['url']
747
+ self.available = True
748
+ logger.info('OneOCR ready')
749
+ except:
750
+ logger.warning('Error reading URL from config, OneOCR will not work!')
751
+
752
+ def __call__(self, img_or_path):
753
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
754
+ img = Image.open(img_or_path)
755
+ elif isinstance(img_or_path, Image.Image):
756
+ img = img_or_path
757
+ else:
758
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
759
+
760
+ if sys.platform == 'win32':
761
+ try:
762
+ res = self.model.recognize_pil(img)['text']
763
+ except RuntimeError as e:
764
+ return (False, e)
765
+ else:
766
+ try:
767
+ res = requests.post(self.url, data=self._preprocess(img), timeout=3)
768
+ except requests.exceptions.Timeout:
769
+ return (False, 'Request timeout!')
770
+ except requests.exceptions.ConnectionError:
771
+ return (False, 'Connection error!')
772
+
773
+ if res.status_code != 200:
774
+ return (False, 'Unknown error!')
775
+
776
+ res = res.json()['text']
777
+
778
+ x = (True, res)
779
+ return x
780
+
781
+ def _preprocess(self, img):
782
+ return pil_image_to_bytes(img, png_compression=1)
783
+
784
+ class AzureImageAnalysis:
785
+ name = 'azure'
786
+ readable_name = 'Azure Image Analysis'
787
+ key = 'v'
788
+ available = False
789
+
790
+ def __init__(self, config={}):
791
+ if 'azure.ai.vision.imageanalysis' not in sys.modules:
792
+ logger.warning('azure-ai-vision-imageanalysis not available, Azure Image Analysis will not work!')
793
+ else:
794
+ logger.info(f'Parsing Azure credentials')
795
+ try:
796
+ self.client = ImageAnalysisClient(config['endpoint'], AzureKeyCredential(config['api_key']))
797
+ self.available = True
798
+ logger.info('Azure Image Analysis ready')
799
+ except:
800
+ logger.warning('Error parsing Azure credentials, Azure Image Analysis will not work!')
801
+
802
+ def __call__(self, img_or_path):
803
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
804
+ img = Image.open(img_or_path)
805
+ elif isinstance(img_or_path, Image.Image):
806
+ img = img_or_path
807
+ else:
808
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
809
+
810
+ try:
811
+ read_result = self.client.analyze(image_data=self._preprocess(img), visual_features=[VisualFeatures.READ])
812
+ except ServiceRequestError:
813
+ return (False, 'Connection error!')
814
+ except:
815
+ return (False, 'Unknown error!')
816
+
817
+ res = ''
818
+ if read_result.read:
819
+ for block in read_result.read.blocks:
820
+ for line in block.lines:
821
+ res += line.text + '\n'
822
+ else:
823
+ return (False, 'Unknown error!')
824
+
825
+ x = (True, res)
826
+ return x
827
+
828
+ def _preprocess(self, img):
829
+ if any(x < 50 for x in img.size):
830
+ resize_factor = max(50 / img.width, 50 / img.height)
831
+ new_w = int(img.width * resize_factor)
832
+ new_h = int(img.height * resize_factor)
833
+ img = img.resize((new_w, new_h), Image.Resampling.LANCZOS)
834
+
835
+ return pil_image_to_bytes(img)
836
+
837
+ class EasyOCR:
838
+ name = 'easyocr'
839
+ readable_name = 'EasyOCR'
840
+ key = 'e'
841
+ available = False
842
+
843
+ def __init__(self, config={'gpu': True}):
844
+ if 'easyocr' not in sys.modules:
845
+ logger.warning('easyocr not available, EasyOCR will not work!')
846
+ else:
847
+ logger.info('Loading EasyOCR model')
848
+ logging.getLogger('easyocr.easyocr').setLevel(logging.ERROR)
849
+ self.model = easyocr.Reader(['ja','en'], gpu=config['gpu'])
850
+ self.available = True
851
+ logger.info('EasyOCR ready')
852
+
853
+ def __call__(self, img_or_path):
854
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
855
+ img = Image.open(img_or_path)
856
+ elif isinstance(img_or_path, Image.Image):
857
+ img = img_or_path
858
+ else:
859
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
860
+
861
+ res = ''
862
+ read_result = self.model.readtext(self._preprocess(img), detail=0)
863
+ for text in read_result:
864
+ res += text + '\n'
865
+
866
+ x = (True, res)
867
+ return x
868
+
869
+ def _preprocess(self, img):
870
+ return pil_image_to_numpy_array(img)
871
+
872
+ class RapidOCR:
873
+ name = 'rapidocr'
874
+ readable_name = 'RapidOCR'
875
+ key = 'r'
876
+ available = False
877
+
878
+ def __init__(self):
879
+ if 'rapidocr_onnxruntime' not in sys.modules:
880
+ logger.warning('rapidocr_onnxruntime not available, RapidOCR will not work!')
881
+ else:
882
+ rapidocr_model_file = os.path.join(os.path.expanduser('~'),'.cache','rapidocr_japan_PP-OCRv4_rec_infer.onnx')
883
+ if not os.path.isfile(rapidocr_model_file):
884
+ logger.info('Downloading RapidOCR model ' + rapidocr_model_file)
885
+ try:
886
+ cache_folder = os.path.join(os.path.expanduser('~'),'.cache')
887
+ if not os.path.isdir(cache_folder):
888
+ os.makedirs(cache_folder)
889
+ urllib.request.urlretrieve('https://github.com/AuroraWright/owocr/raw/master/rapidocr_japan_PP-OCRv4_rec_infer.onnx', rapidocr_model_file)
890
+ except:
891
+ logger.warning('Download failed. RapidOCR will not work!')
892
+ return
893
+
894
+ logger.info('Loading RapidOCR model')
895
+ self.model = ROCR(rec_model_path=rapidocr_model_file)
896
+ logging.getLogger().setLevel(logging.ERROR)
897
+ self.available = True
898
+ logger.info('RapidOCR ready')
899
+
900
+ def __call__(self, img_or_path):
901
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
902
+ img = Image.open(img_or_path)
903
+ elif isinstance(img_or_path, Image.Image):
904
+ img = img_or_path
905
+ else:
906
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
907
+
908
+ res = ''
909
+ read_results, elapsed = self.model(self._preprocess(img))
910
+ if read_results:
911
+ for read_result in read_results:
912
+ res += read_result[1] + '\n'
913
+
914
+ x = (True, res)
915
+ return x
916
+
917
+ def _preprocess(self, img):
918
+ return pil_image_to_numpy_array(img)
919
+
920
+ class OCRSpace:
921
+ name = 'ocrspace'
922
+ readable_name = 'OCRSpace'
923
+ key = 'o'
924
+ available = False
925
+
926
+ def __init__(self, config={}):
927
+ try:
928
+ self.api_key = config['api_key']
929
+ self.max_byte_size = config.get('file_size_limit', 1000000)
930
+ self.available = True
931
+ logger.info('OCRSpace ready')
932
+ except:
933
+ logger.warning('Error reading API key from config, OCRSpace will not work!')
934
+
935
+ def __call__(self, img_or_path):
936
+ if isinstance(img_or_path, str) or isinstance(img_or_path, Path):
937
+ img = Image.open(img_or_path)
938
+ elif isinstance(img_or_path, Image.Image):
939
+ img = img_or_path
940
+ else:
941
+ raise ValueError(f'img_or_path must be a path or PIL.Image, instead got: {img_or_path}')
942
+
943
+ img_bytes, img_extension = self._preprocess(img)
944
+ if not img_bytes:
945
+ return (False, 'Image is too big!')
946
+
947
+ data = {
948
+ 'apikey': self.api_key,
949
+ 'language': 'jpn'
950
+ }
951
+ files = {'file': ('image.' + img_extension, img_bytes, 'image/' + img_extension)}
952
+
953
+ try:
954
+ res = requests.post('https://api.ocr.space/parse/image', data=data, files=files, timeout=20)
955
+ except requests.exceptions.Timeout:
956
+ return (False, 'Request timeout!')
957
+ except requests.exceptions.ConnectionError:
958
+ return (False, 'Connection error!')
959
+
960
+ if res.status_code != 200:
961
+ return (False, 'Unknown error!')
962
+
963
+ res = res.json()
964
+
965
+ if isinstance(res, str):
966
+ return (False, 'Unknown error!')
967
+ if res['IsErroredOnProcessing']:
968
+ return (False, res['ErrorMessage'])
969
+
970
+ res = res['ParsedResults'][0]['ParsedText']
971
+ x = (True, res)
972
+ return x
973
+
974
+ def _preprocess(self, img):
975
+ return limit_image_size(img, self.max_byte_size)