flask-Humanify 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,836 @@
1
+ import json
2
+ import logging
3
+ import socket
4
+ import time
5
+ import threading
6
+ import os
7
+ import importlib.metadata
8
+ import importlib.resources
9
+ import urllib.request
10
+ import gzip
11
+ import pickle
12
+ import random
13
+ import secrets
14
+ from pathlib import Path
15
+ from typing import Dict, List, Optional, Tuple, Union
16
+ from datetime import datetime, timedelta
17
+ from netaddr import IPNetwork, IPAddress
18
+
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ try:
24
+ importlib.metadata.distribution("flask-humanify")
25
+ BASE_DIR = importlib.resources.files("flask_humanify")
26
+ except importlib.metadata.PackageNotFoundError:
27
+ BASE_DIR = Path(__file__).parent
28
+
29
+ if not isinstance(BASE_DIR, Path):
30
+ BASE_DIR = Path(str(BASE_DIR))
31
+
32
+ DATASET_DIR = BASE_DIR / "datasets"
33
+ if not DATASET_DIR.exists():
34
+ DATASET_DIR.mkdir(parents=True)
35
+
36
+ IPSET_DATA_PATH = str(DATASET_DIR / "ipset.json")
37
+ SECRET_KEY_FILE = BASE_DIR / "secret_key.bin"
38
+
39
+ IMAGES_CAPTCHA_DATASETS = {
40
+ "keys": (
41
+ "https://raw.githubusercontent.com/tn3w/Captcha_Datasets/"
42
+ "refs/heads/master/datasets/keys.pkl"
43
+ ),
44
+ "animals": (
45
+ "https://raw.githubusercontent.com/tn3w/Captcha_Datasets/"
46
+ "refs/heads/master/datasets/animals.pkl"
47
+ ),
48
+ "ai_dogs": (
49
+ "https://raw.githubusercontent.com/tn3w/Captcha_Datasets/"
50
+ "refs/heads/master/datasets/ai-dogs.pkl"
51
+ ),
52
+ }
53
+
54
+ AUDIO_CAPTCHA_DATASETS = {
55
+ "characters": (
56
+ "https://raw.githubusercontent.com/librecap/audiocaptcha/"
57
+ "refs/heads/main/characters/characters.pkl"
58
+ )
59
+ }
60
+
61
+
62
+ class MemoryServer:
63
+ """A singleton memory server that manages IP sets and provides lookup functionality."""
64
+
65
+ _instance = None
66
+ _lock = threading.Lock()
67
+
68
+ def __new__(cls, port: int = 9876, data_path: Optional[str] = None):
69
+ if data_path is None:
70
+ data_path = IPSET_DATA_PATH
71
+
72
+ with cls._lock:
73
+ if cls._instance is None:
74
+ cls._instance = super(MemoryServer, cls).__new__(cls)
75
+ cls._instance.initialized = False
76
+ return cls._instance
77
+
78
+ def __init__(self, port: int = 9876, data_path: Optional[str] = None):
79
+ if data_path is None:
80
+ data_path = IPSET_DATA_PATH
81
+
82
+ if getattr(self, "initialized", False):
83
+ return
84
+
85
+ self.port = port
86
+ self.data_path = data_path
87
+ self.ip_to_groups: Dict[str, List[str]] = {}
88
+ self.cidrs_to_ips: Dict[IPNetwork, List[str]] = {}
89
+ self.last_update: Optional[datetime] = None
90
+ self.server_socket = None
91
+ self.server_thread = None
92
+ self.running = False
93
+
94
+ self.captcha_image_data: Dict[str, Dict[str, List[bytes]]] = {}
95
+ self.captcha_audio_data: Dict[str, Dict[str, Dict[str, List[bytes]]]] = {}
96
+ self.current_image_dataset: Optional[str] = None
97
+ self.current_audio_dataset: Optional[str] = None
98
+ self.secret_key: bytes = self._load_or_create_secret_key()
99
+
100
+ self.initialized = True
101
+
102
+ def _load_or_create_secret_key(self) -> bytes:
103
+ """Load the secret key from file or create a new one if it doesn't exist."""
104
+ if SECRET_KEY_FILE.exists():
105
+ logger.info("Loading secret key from %s", SECRET_KEY_FILE)
106
+ with open(SECRET_KEY_FILE, "rb") as f:
107
+ return f.read()
108
+
109
+ logger.info("Generating new secret key")
110
+ secret_key = secrets.token_bytes(32)
111
+ with open(SECRET_KEY_FILE, "wb") as f:
112
+ f.write(secret_key)
113
+
114
+ return secret_key
115
+
116
+ def get_secret_key(self) -> bytes:
117
+ """Return the secret key."""
118
+ return self.secret_key
119
+
120
+ def is_server_running(self) -> bool:
121
+ """Check if the server is already running on the specified port."""
122
+ try:
123
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
124
+ s.connect(("127.0.0.1", self.port))
125
+ return True
126
+ except (ConnectionRefusedError, socket.error):
127
+ return False
128
+
129
+ def download_data(self, force: bool = False) -> bool:
130
+ """Download IP set data from GitHub and update the timestamp."""
131
+ try:
132
+ if not force and os.path.exists(self.data_path):
133
+ with open(self.data_path, "r", encoding="utf-8") as f:
134
+ try:
135
+ data = json.load(f)
136
+ if isinstance(data, dict) and "_timestamp" in data:
137
+ timestamp = datetime.fromisoformat(data["_timestamp"])
138
+ if datetime.now() - timestamp < timedelta(days=7):
139
+ return True
140
+ except (json.JSONDecodeError, KeyError, ValueError):
141
+ pass
142
+
143
+ url = "https://raw.githubusercontent.com/tn3w/IPSet/refs/heads/master/ipset.json"
144
+ with urllib.request.urlopen(url, timeout=30) as response:
145
+ response_data = response.read().decode("utf-8")
146
+
147
+ data = json.loads(response_data)
148
+ data["_timestamp"] = datetime.now().isoformat()
149
+
150
+ with open(self.data_path, "w", encoding="utf-8") as f:
151
+ json.dump(data, f)
152
+
153
+ return True
154
+ except Exception as e:
155
+ logger.error("Error downloading IP set data: %s", e)
156
+ return False
157
+
158
+ def download_captcha_dataset(self, dataset_url: str, dataset_name: str) -> str:
159
+ """Download a captcha dataset from the internet."""
160
+ filename = f"{dataset_name}.pkl"
161
+ file_path = os.path.join(DATASET_DIR, filename)
162
+
163
+ if os.path.exists(file_path):
164
+ return file_path
165
+
166
+ try:
167
+ urllib.request.urlretrieve(dataset_url, file_path)
168
+ return file_path
169
+ except Exception as e:
170
+ logger.error("Failed to download captcha dataset %s: %s", dataset_name, e)
171
+ return ""
172
+
173
+ def load_data(self) -> bool:
174
+ """Load IP set data into memory."""
175
+ try:
176
+ with open(self.data_path, "r", encoding="utf-8") as f:
177
+ data = json.load(f)
178
+
179
+ if "_timestamp" in data:
180
+ self.last_update = datetime.fromisoformat(data.pop("_timestamp"))
181
+
182
+ self.ip_to_groups = {}
183
+ self.cidrs_to_ips = {}
184
+
185
+ for group, ips in data.items():
186
+ for ip in ips:
187
+ if "/" in ip:
188
+ try:
189
+ ip_obj = IPNetwork(ip)
190
+ if ip_obj not in self.cidrs_to_ips:
191
+ self.cidrs_to_ips[ip_obj] = []
192
+ self.cidrs_to_ips[ip_obj].append(group)
193
+ except Exception:
194
+ continue
195
+ continue
196
+
197
+ if ip not in self.ip_to_groups:
198
+ self.ip_to_groups[ip] = []
199
+ self.ip_to_groups[ip].append(group)
200
+
201
+ return True
202
+ except Exception as e:
203
+ logger.error("Error loading IP set data: %s", e)
204
+ return False
205
+
206
+ def load_captcha_datasets(
207
+ self, image_dataset: str = "animals", audio_dataset: Optional[str] = None
208
+ ) -> bool:
209
+ """Load captcha datasets into memory."""
210
+ try:
211
+ if (
212
+ self.current_image_dataset == image_dataset
213
+ and self.current_audio_dataset == audio_dataset
214
+ and (self.captcha_image_data or self.captcha_audio_data)
215
+ ):
216
+ return True
217
+
218
+ self.current_image_dataset = image_dataset
219
+ self.current_audio_dataset = audio_dataset
220
+
221
+ if image_dataset in IMAGES_CAPTCHA_DATASETS:
222
+ dataset_url = IMAGES_CAPTCHA_DATASETS[image_dataset]
223
+ try:
224
+ dataset_path = self.download_captcha_dataset(
225
+ dataset_url, image_dataset
226
+ )
227
+ if dataset_path:
228
+ with open(dataset_path, "rb") as f:
229
+ data = pickle.load(f)
230
+ if data["type"] == "image":
231
+ first_image = data["keys"][next(iter(data["keys"]))][0]
232
+ if not first_image.startswith(b"\x89PNG\r\n\x1a\n"):
233
+ data["keys"] = {
234
+ k: [gzip.decompress(img) for img in v]
235
+ for k, v in data["keys"].items()
236
+ }
237
+ self.captcha_image_data = data
238
+ logger.info("Loaded %s image captcha dataset", image_dataset)
239
+ except Exception as e:
240
+ logger.error(
241
+ "Failed to load %s image captcha dataset: %s",
242
+ image_dataset,
243
+ e,
244
+ )
245
+ return False
246
+
247
+ if audio_dataset in AUDIO_CAPTCHA_DATASETS:
248
+ dataset_url = AUDIO_CAPTCHA_DATASETS[audio_dataset]
249
+ try:
250
+ dataset_path = self.download_captcha_dataset(
251
+ dataset_url, audio_dataset
252
+ )
253
+ if dataset_path:
254
+ with open(dataset_path, "rb") as f:
255
+ data = pickle.load(f)
256
+ self.captcha_audio_data = data
257
+ logger.info("Loaded %s audio captcha dataset", audio_dataset)
258
+ except Exception as e:
259
+ logger.error(
260
+ "Failed to load %s audio captcha dataset: %s",
261
+ audio_dataset,
262
+ e,
263
+ )
264
+ return False
265
+
266
+ return True
267
+ except Exception as e:
268
+ logger.error("Error loading captcha datasets: %s", e)
269
+ return False
270
+
271
+ def check_and_update_data(self) -> None:
272
+ """Check if data needs updating and update if necessary."""
273
+ if self.last_update is None or datetime.now() - self.last_update > timedelta(
274
+ days=7
275
+ ):
276
+ threading.Thread(target=self._async_update).start()
277
+
278
+ def _async_update(self) -> None:
279
+ """Update data in the background without affecting current operations."""
280
+ if self.download_data(force=True):
281
+ self.load_data()
282
+
283
+ def find_matching_groups(self, ip: str) -> List[str]:
284
+ """Find all groups matching the given IP."""
285
+ self.check_and_update_data()
286
+
287
+ matching_groups = self.ip_to_groups.get(ip, [])
288
+
289
+ try:
290
+ ip_obj = IPAddress(ip)
291
+ ip_version = ip_obj.version
292
+
293
+ for cidr, groups in self.cidrs_to_ips.items():
294
+ if cidr.version != ip_version:
295
+ continue
296
+
297
+ if ip_obj in cidr:
298
+ for group in groups:
299
+ if group not in matching_groups:
300
+ matching_groups.append(group)
301
+
302
+ except Exception:
303
+ return []
304
+
305
+ return matching_groups
306
+
307
+ def get_captcha_images(
308
+ self,
309
+ image_dataset: Optional[str] = None,
310
+ correct_index_range: Union[Tuple[int, int], int] = (2, 3),
311
+ num_images: int = 9,
312
+ preview_image: bool = False,
313
+ ) -> Tuple[List[bytes], str, str]:
314
+ """
315
+ Get captcha images for verification.
316
+
317
+ Args:
318
+ image_dataset: The image dataset to use. If None, uses the current dataset.
319
+ correct_index_range: The range of correct indexes to select.
320
+ num_images: The number of images to select.
321
+ preview_image: If True, add an additional correct image at the beginning of the list.
322
+
323
+ Returns:
324
+ A tuple containing:
325
+ - List of images
326
+ - A string containing indexes of correct images. (e.g., "034")
327
+ - The subject that represents the correct images (e.g., "smiling dog")
328
+ """
329
+ if image_dataset:
330
+ self.load_captcha_datasets(image_dataset=image_dataset)
331
+ elif not self.captcha_image_data:
332
+ self.load_captcha_datasets()
333
+
334
+ if (
335
+ not self.captcha_image_data
336
+ or self.captcha_image_data.get("type") != "image"
337
+ or not self.captcha_image_data.get("keys")
338
+ ):
339
+ logger.error("Image captcha dataset not loaded or invalid")
340
+ return [], "", ""
341
+
342
+ keys = self.captcha_image_data.get("keys", {})
343
+ if not keys:
344
+ logger.error("Invalid image captcha dataset structure")
345
+ return [], "", ""
346
+
347
+ all_keys = list(keys.keys())
348
+ if len(all_keys) <= 2:
349
+ correct_key = all_keys[0]
350
+ else:
351
+ correct_key = random.choice(all_keys)
352
+
353
+ correct_images = keys.get(correct_key, [])
354
+
355
+ incorrect_keys = [k for k in all_keys if k != correct_key]
356
+ incorrect_images = []
357
+ for k in incorrect_keys:
358
+ incorrect_images.extend(keys.get(k, []))
359
+
360
+ if not correct_images or not incorrect_images:
361
+ logger.error("Empty image lists in captcha dataset")
362
+ return [], "", ""
363
+
364
+ if isinstance(correct_index_range, int):
365
+ num_correct = correct_index_range
366
+ else:
367
+ num_correct = random.randint(correct_index_range[0], correct_index_range[1])
368
+
369
+ preview_correct_image = []
370
+ if preview_image:
371
+ preview_correct_image = [random.choice(correct_images)]
372
+
373
+ selected_correct = random.sample(
374
+ correct_images, min(num_correct, len(correct_images))
375
+ )
376
+
377
+ num_incorrect = num_images - len(selected_correct)
378
+ selected_incorrect = random.sample(
379
+ incorrect_images, min(num_incorrect, len(incorrect_images))
380
+ )
381
+
382
+ all_images = selected_correct + selected_incorrect
383
+
384
+ combined = list(
385
+ zip(all_images, [i < len(selected_correct) for i in range(len(all_images))])
386
+ )
387
+ random.shuffle(combined)
388
+ all_images, is_correct = zip(*combined)
389
+ correct_indexes = [i for i, correct in enumerate(is_correct) if correct]
390
+
391
+ all_images = preview_correct_image + list(all_images)
392
+
393
+ correct_indexes_str = "".join(str(i) for i in correct_indexes)
394
+
395
+ return list(all_images), correct_indexes_str, correct_key
396
+
397
+ def get_captcha_audio(
398
+ self,
399
+ audio_dataset: Optional[str] = None,
400
+ num_chars: int = 6,
401
+ language: str = "en",
402
+ ) -> Tuple[List[bytes], str]:
403
+ """
404
+ Get captcha audio for verification.
405
+
406
+ Args:
407
+ audio_dataset: The audio dataset to use. If None, uses the current dataset.
408
+ num_chars: The number of characters to include in the audio captcha.
409
+ language: The language code for the audio files.
410
+
411
+ Returns:
412
+ A tuple containing:
413
+ - List of audio file bytes
414
+ - The correct characters string
415
+ """
416
+ if audio_dataset:
417
+ self.load_captcha_datasets(audio_dataset=audio_dataset)
418
+ elif not self.captcha_audio_data:
419
+ self.load_captcha_datasets(audio_dataset="characters")
420
+
421
+ if (
422
+ not self.captcha_audio_data
423
+ or self.captcha_audio_data.get("type") != "audio"
424
+ or not self.captcha_audio_data.get("keys")
425
+ ):
426
+ logger.error("Audio captcha dataset not loaded or invalid")
427
+ return [], ""
428
+
429
+ keys = self.captcha_audio_data.get("keys", {})
430
+ if not keys:
431
+ logger.error("Invalid audio captcha dataset structure")
432
+ return [], ""
433
+
434
+ available_chars = list(keys.keys())
435
+
436
+ selected_chars = random.choices(available_chars, k=num_chars)
437
+ correct_chars_str = "".join(selected_chars)
438
+
439
+ audio_files = []
440
+ for char in selected_chars:
441
+ try:
442
+ audio_files.append(keys[char][language])
443
+ except KeyError:
444
+ logger.error(
445
+ "Error getting audio for character %s in language %s",
446
+ char,
447
+ language,
448
+ )
449
+
450
+ if not audio_files:
451
+ logger.error("No audio files selected")
452
+ return [], ""
453
+
454
+ return audio_files, correct_chars_str
455
+
456
+ def handle_client(
457
+ self, client_socket: socket.socket, addr: Tuple[str, int]
458
+ ) -> None:
459
+ """Handle client connection and queries."""
460
+ try:
461
+ while True:
462
+ data = client_socket.recv(1024).decode("utf-8").strip()
463
+ if not data:
464
+ break
465
+
466
+ if data.startswith("IPSET:"):
467
+ ip = data[6:]
468
+ result = self.find_matching_groups(ip)
469
+ response = json.dumps(result)
470
+ elif data.startswith("IMAGE_CAPTCHA:"):
471
+ parts = data.split(":")
472
+ dataset_name = parts[1] if len(parts) > 1 else None
473
+ num_images = (
474
+ int(parts[2]) if len(parts) > 2 and parts[2].isdigit() else 9
475
+ )
476
+ correct_range = (
477
+ int(parts[3])
478
+ if len(parts) > 3 and parts[3].isdigit()
479
+ else (2, 3)
480
+ )
481
+ preview = parts[4].lower() == "true" if len(parts) > 4 else False
482
+
483
+ images, correct_indexes, subject = self.get_captcha_images(
484
+ image_dataset=dataset_name,
485
+ num_images=num_images,
486
+ correct_index_range=correct_range,
487
+ preview_image=preview,
488
+ )
489
+
490
+ response_data = {
491
+ "status": "success" if images else "error",
492
+ "correct_indexes": correct_indexes,
493
+ "subject": subject,
494
+ "num_images": len(images),
495
+ }
496
+ response = json.dumps(response_data)
497
+ client_socket.send(f"{response}\n".encode("utf-8"))
498
+
499
+ for img in images:
500
+ size_bytes = len(img).to_bytes(4, byteorder="big")
501
+ client_socket.send(size_bytes)
502
+ client_socket.send(img)
503
+ continue
504
+
505
+ elif data.startswith("AUDIO_CAPTCHA:"):
506
+ parts = data.split(":")
507
+ dataset_name = parts[1] if len(parts) > 1 else None
508
+ num_chars = (
509
+ int(parts[2]) if len(parts) > 2 and parts[2].isdigit() else 6
510
+ )
511
+ language = parts[3] if len(parts) > 3 else "en"
512
+
513
+ audio_files, correct_chars = self.get_captcha_audio(
514
+ audio_dataset=dataset_name,
515
+ num_chars=num_chars,
516
+ language=language,
517
+ )
518
+
519
+ response_data = {
520
+ "status": "success" if audio_files else "error",
521
+ "correct_chars": correct_chars,
522
+ "num_files": len(audio_files),
523
+ }
524
+ response = json.dumps(response_data)
525
+ client_socket.send(f"{response}\n".encode("utf-8"))
526
+
527
+ for audio in audio_files:
528
+ size_bytes = len(audio).to_bytes(4, byteorder="big")
529
+ client_socket.send(size_bytes)
530
+ client_socket.send(audio)
531
+ continue
532
+ elif data.startswith("SECRET_KEY:"):
533
+ secret_key = self.get_secret_key()
534
+ hex_key = secret_key.hex()
535
+ response = json.dumps(hex_key)
536
+ else:
537
+ result = self.find_matching_groups(data)
538
+ response = json.dumps(result)
539
+
540
+ client_socket.send(f"{response}\n".encode("utf-8"))
541
+ except Exception as e:
542
+ logger.error("Error handling client %s: %s", addr, e)
543
+ finally:
544
+ client_socket.close()
545
+
546
+ def run_server(self) -> None:
547
+ """Run the memory server."""
548
+ if self.is_server_running():
549
+ logger.info("Server already running on port %s", self.port)
550
+ return
551
+
552
+ if not os.path.exists(self.data_path):
553
+ logger.info("IP data file not found at %s, downloading...", self.data_path)
554
+ if not self.download_data():
555
+ logger.error("Failed to download data, cannot start server")
556
+ return
557
+
558
+ if not self.load_data():
559
+ logger.error("Failed to load data, cannot start server")
560
+ return
561
+
562
+ self.check_and_update_data()
563
+ self.load_captcha_datasets(image_dataset="animals", audio_dataset="characters")
564
+
565
+ try:
566
+ self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
567
+ self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
568
+ self.server_socket.bind(("0.0.0.0", self.port))
569
+ self.server_socket.listen(10)
570
+ self.running = True
571
+
572
+ logger.info(
573
+ "Memory server started on port %s with data from %s",
574
+ self.port,
575
+ self.data_path,
576
+ )
577
+
578
+ while self.running:
579
+ try:
580
+ client_socket, addr = self.server_socket.accept()
581
+ client_thread = threading.Thread(
582
+ target=self.handle_client, args=(client_socket, addr)
583
+ )
584
+ client_thread.daemon = True
585
+ client_thread.start()
586
+ except Exception as e:
587
+ if self.running:
588
+ logger.error("Error accepting connection: %s", e)
589
+
590
+ except Exception as e:
591
+ logger.error("Server error: %s", e)
592
+ finally:
593
+ if self.server_socket:
594
+ self.server_socket.close()
595
+
596
+ def start(self) -> None:
597
+ """Start the server in a background thread."""
598
+ if self.server_thread and self.server_thread.is_alive():
599
+ return
600
+
601
+ self.server_thread = threading.Thread(target=self.run_server)
602
+ self.server_thread.daemon = True
603
+ self.server_thread.start()
604
+
605
+ def stop(self) -> None:
606
+ """Stop the server."""
607
+ self.running = False
608
+ if self.server_socket:
609
+ self.server_socket.close()
610
+
611
+
612
+ class MemoryClient:
613
+ """Client to connect to the MemoryServer."""
614
+
615
+ def __init__(self, host: str = "127.0.0.1", port: int = 9876):
616
+ self.host = host
617
+ self.port = port
618
+ self.socket = None
619
+
620
+ def connect(self) -> bool:
621
+ """Connect to the memory server."""
622
+ try:
623
+ self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
624
+ self.socket.connect((self.host, self.port))
625
+ return True
626
+ except Exception as e:
627
+ logger.error("Failed to connect to memory server: %s", e)
628
+ return False
629
+
630
+ def lookup_ip(self, ip: str) -> List[str]:
631
+ """Look up an IP in the memory server."""
632
+ if not self.socket:
633
+ if not self.connect():
634
+ return []
635
+
636
+ try:
637
+ if self.socket:
638
+ self.socket.send(f"IPSET:{ip}\n".encode("utf-8"))
639
+ response = self.socket.recv(4096).decode("utf-8").strip()
640
+ return json.loads(response)
641
+ return []
642
+ except Exception as e:
643
+ logger.error("Error looking up IP: %s", e)
644
+ if self.connect():
645
+ try:
646
+ if self.socket:
647
+ self.socket.send(f"IPSET:{ip}\n".encode("utf-8"))
648
+ response = self.socket.recv(4096).decode("utf-8").strip()
649
+ return json.loads(response)
650
+ except Exception:
651
+ pass
652
+ return []
653
+
654
+ def get_captcha_images(
655
+ self,
656
+ dataset_name: Optional[str] = None,
657
+ num_images: int = 9,
658
+ num_correct: Union[int, Tuple[int, int]] = (2, 3),
659
+ preview_image: bool = False,
660
+ ) -> Tuple[List[bytes], str, str]:
661
+ """
662
+ Get captcha images from the memory server.
663
+
664
+ Args:
665
+ dataset_name: The name of the dataset to use
666
+ num_images: Number of images to return
667
+ num_correct: Number or range of correct images
668
+ preview_image: Whether to include a preview image
669
+
670
+ Returns:
671
+ Tuple of (images list, correct indexes string, subject)
672
+ """
673
+ if not self.socket:
674
+ if not self.connect():
675
+ return [], "", ""
676
+
677
+ try:
678
+ if self.socket:
679
+ command = (
680
+ f"IMAGE_CAPTCHA:{dataset_name or ''}:"
681
+ f"{num_images}:{num_correct}:{preview_image}"
682
+ )
683
+ self.socket.send(f"{command}\n".encode("utf-8"))
684
+
685
+ json_data = b""
686
+ while True:
687
+ chunk = self.socket.recv(1)
688
+ if not chunk:
689
+ return [], "", ""
690
+ json_data += chunk
691
+ if chunk == b"\n":
692
+ break
693
+
694
+ response = json_data.decode("utf-8").strip()
695
+ response_data = json.loads(response)
696
+
697
+ if response_data.get("status") != "success":
698
+ return [], "", ""
699
+
700
+ images = []
701
+ num_images = response_data.get("num_images", 0)
702
+ for _ in range(num_images):
703
+ size_bytes = self.socket.recv(4)
704
+ size = int.from_bytes(size_bytes, byteorder="big")
705
+ img_data = b""
706
+ remaining = size
707
+ while remaining > 0:
708
+ chunk = self.socket.recv(min(remaining, 4096))
709
+ if not chunk:
710
+ break
711
+ img_data += chunk
712
+ remaining -= len(chunk)
713
+ images.append(img_data)
714
+
715
+ return (
716
+ images,
717
+ response_data.get("correct_indexes", ""),
718
+ response_data.get("subject", ""),
719
+ )
720
+ return [], "", ""
721
+ except Exception as e:
722
+ logger.error("Error getting captcha images: %s", e)
723
+ return [], "", ""
724
+
725
+ def get_captcha_audio(
726
+ self,
727
+ dataset_name: Optional[str] = None,
728
+ num_chars: int = 6,
729
+ language: str = "en",
730
+ ) -> Tuple[List[bytes], str]:
731
+ """
732
+ Get captcha audio from the memory server.
733
+
734
+ Args:
735
+ dataset_name: The name of the dataset to use
736
+ num_chars: Number of characters in the audio captcha
737
+ language: Language code for the audio
738
+
739
+ Returns:
740
+ Tuple of (audio files list, correct characters string)
741
+ """
742
+ if not self.socket:
743
+ if not self.connect():
744
+ return [], ""
745
+
746
+ try:
747
+ if self.socket:
748
+ command = f"AUDIO_CAPTCHA:{dataset_name or ''}:{num_chars}:{language}"
749
+ self.socket.send(f"{command}\n".encode("utf-8"))
750
+
751
+ json_data = b""
752
+ while True:
753
+ chunk = self.socket.recv(1)
754
+ if not chunk:
755
+ return [], ""
756
+ json_data += chunk
757
+ if chunk == b"\n":
758
+ break
759
+
760
+ response = json_data.decode("utf-8").strip()
761
+ response_data = json.loads(response)
762
+
763
+ if response_data.get("status") != "success":
764
+ return [], ""
765
+
766
+ audio_files = []
767
+ num_files = response_data.get("num_files", 0)
768
+ for _ in range(num_files):
769
+ size_bytes = self.socket.recv(4)
770
+ size = int.from_bytes(size_bytes, byteorder="big")
771
+ audio_data = b""
772
+ remaining = size
773
+ while remaining > 0:
774
+ chunk = self.socket.recv(min(remaining, 4096))
775
+ if not chunk:
776
+ break
777
+ audio_data += chunk
778
+ remaining -= len(chunk)
779
+ audio_files.append(audio_data)
780
+
781
+ return audio_files, response_data.get("correct_chars", "")
782
+ return [], ""
783
+ except Exception as e:
784
+ logger.error("Error getting captcha audio: %s", e)
785
+ return [], ""
786
+
787
+ def get_secret_key(self) -> bytes:
788
+ """Get the secret key from the memory server."""
789
+ if not self.socket:
790
+ if not self.connect():
791
+ return b""
792
+
793
+ try:
794
+ if self.socket:
795
+ self.socket.send("SECRET_KEY:\n".encode("utf-8"))
796
+ response = self.socket.recv(4096).decode("utf-8").strip()
797
+ try:
798
+ json_response = json.loads(response)
799
+ if isinstance(json_response, str):
800
+ return bytes.fromhex(json_response)
801
+ return b""
802
+ except (json.JSONDecodeError, ValueError):
803
+ return b""
804
+ return b""
805
+ except Exception as e:
806
+ logger.error("Error getting secret key: %s", e)
807
+ return b""
808
+
809
+ def close(self) -> None:
810
+ """Close the connection to the memory server."""
811
+ if self.socket:
812
+ try:
813
+ self.socket.close()
814
+ except Exception:
815
+ pass
816
+ self.socket = None
817
+
818
+
819
+ def ensure_server_running(
820
+ port: int = 9876,
821
+ data_path: Optional[str] = None,
822
+ image_dataset: str = "animals",
823
+ audio_dataset: str = "characters",
824
+ ) -> None:
825
+ """Ensure that the memory server is running."""
826
+ if data_path is None:
827
+ data_path = IPSET_DATA_PATH
828
+
829
+ server = MemoryServer(port=port, data_path=data_path)
830
+ server.load_captcha_datasets(
831
+ image_dataset=image_dataset, audio_dataset=audio_dataset
832
+ )
833
+ server.start()
834
+
835
+ while not server.is_server_running():
836
+ time.sleep(0.1)