flask-Humanify 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,838 @@
1
+ import json
2
+ import logging
3
+ import socket
4
+ import time
5
+ import threading
6
+ import os
7
+ import importlib.metadata
8
+ import importlib.resources
9
+ import urllib.request
10
+ import gzip
11
+ import pickle
12
+ import random
13
+ import secrets
14
+ from pathlib import Path
15
+ from typing import Dict, List, Optional, Tuple, Union
16
+ from datetime import datetime, timedelta
17
+ from netaddr import IPNetwork, IPAddress
18
+
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ try:
24
+ importlib.metadata.distribution("flask-humanify")
25
+ BASE_DIR = importlib.resources.files("flask_humanify")
26
+ except importlib.metadata.PackageNotFoundError:
27
+ BASE_DIR = Path(__file__).parent
28
+
29
+ if not isinstance(BASE_DIR, Path):
30
+ BASE_DIR = Path(str(BASE_DIR))
31
+
32
+ DATASET_DIR = BASE_DIR / "datasets"
33
+ if not DATASET_DIR.exists():
34
+ DATASET_DIR.mkdir(parents=True)
35
+
36
+ IPSET_DATA_PATH = str(DATASET_DIR / "ipset.json")
37
+ SECRET_KEY_FILE = BASE_DIR / "secret_key.bin"
38
+
39
+ IMAGES_CAPTCHA_DATASETS = {
40
+ "keys": (
41
+ "https://raw.githubusercontent.com/tn3w/Captcha_Datasets/"
42
+ "refs/heads/master/datasets/keys.pkl"
43
+ ),
44
+ "animals": (
45
+ "https://raw.githubusercontent.com/tn3w/Captcha_Datasets/"
46
+ "refs/heads/master/datasets/animals.pkl"
47
+ ),
48
+ "ai_dogs": (
49
+ "https://raw.githubusercontent.com/tn3w/Captcha_Datasets/"
50
+ "refs/heads/master/datasets/ai-dogs.pkl"
51
+ ),
52
+ }
53
+
54
+ AUDIO_CAPTCHA_DATASETS = {
55
+ "characters": (
56
+ "https://raw.githubusercontent.com/librecap/audiocaptcha/"
57
+ "refs/heads/main/characters/characters.pkl"
58
+ )
59
+ }
60
+
61
+
62
+ class MemoryServer:
63
+ """A singleton memory server that manages IP sets and provides lookup functionality."""
64
+
65
+ _instance = None
66
+ _lock = threading.Lock()
67
+
68
+ def __new__(cls, port: int = 9876, data_path: Optional[str] = None):
69
+ if data_path is None:
70
+ data_path = IPSET_DATA_PATH
71
+
72
+ with cls._lock:
73
+ if cls._instance is None:
74
+ cls._instance = super(MemoryServer, cls).__new__(cls)
75
+ cls._instance.initialized = False
76
+ return cls._instance
77
+
78
+ def __init__(self, port: int = 9876, data_path: Optional[str] = None):
79
+ if data_path is None:
80
+ data_path = IPSET_DATA_PATH
81
+
82
+ if getattr(self, "initialized", False):
83
+ return
84
+
85
+ self.port = port
86
+ self.data_path = data_path
87
+ self.ip_to_groups: Dict[str, List[str]] = {}
88
+ self.cidrs_to_ips: Dict[IPNetwork, List[str]] = {}
89
+ self.last_update: Optional[datetime] = None
90
+ self.server_socket = None
91
+ self.server_thread = None
92
+ self.running = False
93
+
94
+ self.captcha_image_data: Dict[str, Dict[str, List[bytes]]] = {}
95
+ self.captcha_audio_data: Dict[str, Dict[str, Dict[str, List[bytes]]]] = {}
96
+ self.current_image_dataset: Optional[str] = None
97
+ self.current_audio_dataset: Optional[str] = None
98
+ self.secret_key: bytes = self._load_or_create_secret_key()
99
+
100
+ self.initialized = True
101
+
102
+ def _load_or_create_secret_key(self) -> bytes:
103
+ """Load the secret key from file or create a new one if it doesn't exist."""
104
+ if SECRET_KEY_FILE.exists():
105
+ logger.info("Loading secret key from %s", SECRET_KEY_FILE)
106
+ with open(SECRET_KEY_FILE, "rb") as f:
107
+ return f.read()
108
+
109
+ logger.info("Generating new secret key")
110
+ secret_key = secrets.token_bytes(32)
111
+ with open(SECRET_KEY_FILE, "wb") as f:
112
+ f.write(secret_key)
113
+
114
+ return secret_key
115
+
116
+ def get_secret_key(self) -> bytes:
117
+ """Return the secret key."""
118
+ return self.secret_key
119
+
120
+ def is_server_running(self) -> bool:
121
+ """Check if the server is already running on the specified port."""
122
+ try:
123
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
124
+ s.connect(("127.0.0.1", self.port))
125
+ return True
126
+ except (ConnectionRefusedError, socket.error):
127
+ return False
128
+
129
+ def download_data(self, force: bool = False) -> bool:
130
+ """Download IP set data from GitHub and update the timestamp."""
131
+ try:
132
+ if not force and os.path.exists(self.data_path):
133
+ with open(self.data_path, "r", encoding="utf-8") as f:
134
+ try:
135
+ data = json.load(f)
136
+ if isinstance(data, dict) and "_timestamp" in data:
137
+ timestamp = datetime.fromisoformat(data["_timestamp"])
138
+ if datetime.now() - timestamp < timedelta(days=7):
139
+ return True
140
+ except (json.JSONDecodeError, KeyError, ValueError):
141
+ pass
142
+
143
+ url = "https://raw.githubusercontent.com/tn3w/IPSet/refs/heads/master/ipset.json"
144
+ with urllib.request.urlopen(url, timeout=30) as response:
145
+ response_data = response.read().decode("utf-8")
146
+
147
+ data = json.loads(response_data)
148
+ data["_timestamp"] = datetime.now().isoformat()
149
+
150
+ with open(self.data_path, "w", encoding="utf-8") as f:
151
+ json.dump(data, f)
152
+
153
+ return True
154
+ except Exception as e:
155
+ logger.error("Error downloading IP set data: %s", e)
156
+ return False
157
+
158
+ def download_captcha_dataset(self, dataset_url: str, dataset_name: str) -> str:
159
+ """Download a captcha dataset from the internet."""
160
+ filename = f"{dataset_name}.pkl"
161
+ file_path = os.path.join(DATASET_DIR, filename)
162
+
163
+ if os.path.exists(file_path):
164
+ return file_path
165
+
166
+ try:
167
+ urllib.request.urlretrieve(dataset_url, file_path)
168
+ return file_path
169
+ except Exception as e:
170
+ logger.error("Failed to download captcha dataset %s: %s", dataset_name, e)
171
+ return ""
172
+
173
+ def load_data(self) -> bool:
174
+ """Load IP set data into memory."""
175
+ try:
176
+ with open(self.data_path, "r", encoding="utf-8") as f:
177
+ data = json.load(f)
178
+
179
+ if "_timestamp" in data:
180
+ self.last_update = datetime.fromisoformat(data.pop("_timestamp"))
181
+
182
+ self.ip_to_groups = {}
183
+ self.cidrs_to_ips = {}
184
+
185
+ for group, ips in data.items():
186
+ for ip in ips:
187
+ if "/" in ip:
188
+ try:
189
+ ip_obj = IPNetwork(ip)
190
+ if ip_obj not in self.cidrs_to_ips:
191
+ self.cidrs_to_ips[ip_obj] = []
192
+ self.cidrs_to_ips[ip_obj].append(group)
193
+ except Exception:
194
+ continue
195
+ continue
196
+
197
+ if ip not in self.ip_to_groups:
198
+ self.ip_to_groups[ip] = []
199
+ self.ip_to_groups[ip].append(group)
200
+
201
+ return True
202
+ except Exception as e:
203
+ logger.error("Error loading IP set data: %s", e)
204
+ return False
205
+
206
+ def load_captcha_datasets(
207
+ self,
208
+ image_dataset: Optional[str] = None,
209
+ audio_dataset: Optional[str] = None,
210
+ ) -> bool:
211
+ """Load captcha datasets into memory."""
212
+ try:
213
+ if (
214
+ self.current_image_dataset == image_dataset
215
+ and self.current_audio_dataset == audio_dataset
216
+ and (self.captcha_image_data or self.captcha_audio_data)
217
+ ):
218
+ return True
219
+
220
+ self.current_image_dataset = image_dataset
221
+ self.current_audio_dataset = audio_dataset
222
+
223
+ if image_dataset in IMAGES_CAPTCHA_DATASETS:
224
+ dataset_url = IMAGES_CAPTCHA_DATASETS[image_dataset]
225
+ try:
226
+ dataset_path = self.download_captcha_dataset(
227
+ dataset_url, image_dataset
228
+ )
229
+ if dataset_path:
230
+ with open(dataset_path, "rb") as f:
231
+ data = pickle.load(f)
232
+ if data["type"] == "image":
233
+ first_image = data["keys"][next(iter(data["keys"]))][0]
234
+ if not first_image.startswith(b"\x89PNG\r\n\x1a\n"):
235
+ data["keys"] = {
236
+ k: [gzip.decompress(img) for img in v]
237
+ for k, v in data["keys"].items()
238
+ }
239
+ self.captcha_image_data = data
240
+ logger.info("Loaded %s image captcha dataset", image_dataset)
241
+ except Exception as e:
242
+ logger.error(
243
+ "Failed to load %s image captcha dataset: %s",
244
+ image_dataset,
245
+ e,
246
+ )
247
+ return False
248
+
249
+ if audio_dataset in AUDIO_CAPTCHA_DATASETS:
250
+ dataset_url = AUDIO_CAPTCHA_DATASETS[audio_dataset]
251
+ try:
252
+ dataset_path = self.download_captcha_dataset(
253
+ dataset_url, audio_dataset
254
+ )
255
+ if dataset_path:
256
+ with open(dataset_path, "rb") as f:
257
+ data = pickle.load(f)
258
+ self.captcha_audio_data = data
259
+ logger.info("Loaded %s audio captcha dataset", audio_dataset)
260
+ except Exception as e:
261
+ logger.error(
262
+ "Failed to load %s audio captcha dataset: %s",
263
+ audio_dataset,
264
+ e,
265
+ )
266
+ return False
267
+
268
+ return True
269
+ except Exception as e:
270
+ logger.error("Error loading captcha datasets: %s", e)
271
+ return False
272
+
273
+ def check_and_update_data(self) -> None:
274
+ """Check if data needs updating and update if necessary."""
275
+ if self.last_update is None or datetime.now() - self.last_update > timedelta(
276
+ days=7
277
+ ):
278
+ threading.Thread(target=self._async_update).start()
279
+
280
+ def _async_update(self) -> None:
281
+ """Update data in the background without affecting current operations."""
282
+ if self.download_data(force=True):
283
+ self.load_data()
284
+
285
+ def find_matching_groups(self, ip: str) -> List[str]:
286
+ """Find all groups matching the given IP."""
287
+ self.check_and_update_data()
288
+
289
+ matching_groups = self.ip_to_groups.get(ip, [])
290
+
291
+ try:
292
+ ip_obj = IPAddress(ip)
293
+ ip_version = ip_obj.version
294
+
295
+ for cidr, groups in self.cidrs_to_ips.items():
296
+ if cidr.version != ip_version:
297
+ continue
298
+
299
+ if ip_obj in cidr:
300
+ for group in groups:
301
+ if group not in matching_groups:
302
+ matching_groups.append(group)
303
+
304
+ except Exception:
305
+ return []
306
+
307
+ return matching_groups
308
+
309
+ def get_captcha_images(
310
+ self,
311
+ image_dataset: Optional[str] = None,
312
+ correct_index_range: Union[Tuple[int, int], int] = (2, 3),
313
+ num_images: int = 9,
314
+ preview_image: bool = False,
315
+ ) -> Tuple[List[bytes], str, str]:
316
+ """
317
+ Get captcha images for verification.
318
+
319
+ Args:
320
+ image_dataset: The image dataset to use. If None, uses the current dataset.
321
+ correct_index_range: The range of correct indexes to select.
322
+ num_images: The number of images to select.
323
+ preview_image: If True, add an additional correct image at the beginning of the list.
324
+
325
+ Returns:
326
+ A tuple containing:
327
+ - List of images
328
+ - A string containing indexes of correct images. (e.g., "034")
329
+ - The subject that represents the correct images (e.g., "smiling dog")
330
+ """
331
+ if image_dataset:
332
+ self.load_captcha_datasets(image_dataset=image_dataset)
333
+ elif not self.captcha_image_data:
334
+ self.load_captcha_datasets()
335
+
336
+ if (
337
+ not self.captcha_image_data
338
+ or self.captcha_image_data.get("type") != "image"
339
+ or not self.captcha_image_data.get("keys")
340
+ ):
341
+ logger.error("Image captcha dataset not loaded or invalid")
342
+ return [], "", ""
343
+
344
+ keys = self.captcha_image_data.get("keys", {})
345
+ if not keys:
346
+ logger.error("Invalid image captcha dataset structure")
347
+ return [], "", ""
348
+
349
+ all_keys = list(keys.keys())
350
+ if len(all_keys) <= 2:
351
+ correct_key = all_keys[0]
352
+ else:
353
+ correct_key = random.choice(all_keys)
354
+
355
+ correct_images = keys.get(correct_key, [])
356
+
357
+ incorrect_keys = [k for k in all_keys if k != correct_key]
358
+ incorrect_images = []
359
+ for k in incorrect_keys:
360
+ incorrect_images.extend(keys.get(k, []))
361
+
362
+ if not correct_images or not incorrect_images:
363
+ logger.error("Empty image lists in captcha dataset")
364
+ return [], "", ""
365
+
366
+ if isinstance(correct_index_range, int):
367
+ num_correct = correct_index_range
368
+ else:
369
+ num_correct = random.randint(correct_index_range[0], correct_index_range[1])
370
+
371
+ preview_correct_image = []
372
+ if preview_image:
373
+ preview_correct_image = [random.choice(correct_images)]
374
+
375
+ selected_correct = random.sample(
376
+ correct_images, min(num_correct, len(correct_images))
377
+ )
378
+
379
+ num_incorrect = num_images - len(selected_correct)
380
+ selected_incorrect = random.sample(
381
+ incorrect_images, min(num_incorrect, len(incorrect_images))
382
+ )
383
+
384
+ all_images = selected_correct + selected_incorrect
385
+
386
+ combined = list(
387
+ zip(all_images, [i < len(selected_correct) for i in range(len(all_images))])
388
+ )
389
+ random.shuffle(combined)
390
+ all_images, is_correct = zip(*combined)
391
+ correct_indexes = [i for i, correct in enumerate(is_correct) if correct]
392
+
393
+ all_images = preview_correct_image + list(all_images)
394
+
395
+ correct_indexes_str = "".join(str(i) for i in correct_indexes)
396
+
397
+ return list(all_images), correct_indexes_str, correct_key
398
+
399
+ def get_captcha_audio(
400
+ self,
401
+ audio_dataset: Optional[str] = None,
402
+ num_chars: int = 6,
403
+ language: str = "en",
404
+ ) -> Tuple[List[bytes], str]:
405
+ """
406
+ Get captcha audio for verification.
407
+
408
+ Args:
409
+ audio_dataset: The audio dataset to use. If None, uses the current dataset.
410
+ num_chars: The number of characters to include in the audio captcha.
411
+ language: The language code for the audio files.
412
+
413
+ Returns:
414
+ A tuple containing:
415
+ - List of audio file bytes
416
+ - The correct characters string
417
+ """
418
+ if audio_dataset:
419
+ self.load_captcha_datasets(audio_dataset=audio_dataset)
420
+ elif not self.captcha_audio_data:
421
+ self.load_captcha_datasets(audio_dataset="characters")
422
+
423
+ if (
424
+ not self.captcha_audio_data
425
+ or self.captcha_audio_data.get("type") != "audio"
426
+ or not self.captcha_audio_data.get("keys")
427
+ ):
428
+ logger.error("Audio captcha dataset not loaded or invalid")
429
+ return [], ""
430
+
431
+ keys = self.captcha_audio_data.get("keys", {})
432
+ if not keys:
433
+ logger.error("Invalid audio captcha dataset structure")
434
+ return [], ""
435
+
436
+ available_chars = list(keys.keys())
437
+
438
+ selected_chars = random.choices(available_chars, k=num_chars)
439
+ correct_chars_str = "".join(selected_chars)
440
+
441
+ audio_files = []
442
+ for char in selected_chars:
443
+ try:
444
+ audio_files.append(keys[char][language])
445
+ except KeyError:
446
+ logger.error(
447
+ "Error getting audio for character %s in language %s",
448
+ char,
449
+ language,
450
+ )
451
+
452
+ if not audio_files:
453
+ logger.error("No audio files selected")
454
+ return [], ""
455
+
456
+ return audio_files, correct_chars_str
457
+
458
+ def handle_client(
459
+ self, client_socket: socket.socket, addr: Tuple[str, int]
460
+ ) -> None:
461
+ """Handle client connection and queries."""
462
+ try:
463
+ while True:
464
+ data = client_socket.recv(1024).decode("utf-8").strip()
465
+ if not data:
466
+ break
467
+
468
+ if data.startswith("IPSET:"):
469
+ ip = data[6:]
470
+ result = self.find_matching_groups(ip)
471
+ response = json.dumps(result)
472
+ elif data.startswith("IMAGE_CAPTCHA:"):
473
+ parts = data.split(":")
474
+ dataset_name = parts[1] if len(parts) > 1 else None
475
+ num_images = (
476
+ int(parts[2]) if len(parts) > 2 and parts[2].isdigit() else 9
477
+ )
478
+ correct_range = (
479
+ int(parts[3])
480
+ if len(parts) > 3 and parts[3].isdigit()
481
+ else (2, 3)
482
+ )
483
+ preview = parts[4].lower() == "true" if len(parts) > 4 else False
484
+
485
+ images, correct_indexes, subject = self.get_captcha_images(
486
+ image_dataset=dataset_name,
487
+ num_images=num_images,
488
+ correct_index_range=correct_range,
489
+ preview_image=preview,
490
+ )
491
+
492
+ response_data = {
493
+ "status": "success" if images else "error",
494
+ "correct_indexes": correct_indexes,
495
+ "subject": subject,
496
+ "num_images": len(images),
497
+ }
498
+ response = json.dumps(response_data)
499
+ client_socket.send(f"{response}\n".encode("utf-8"))
500
+
501
+ for img in images:
502
+ size_bytes = len(img).to_bytes(4, byteorder="big")
503
+ client_socket.send(size_bytes)
504
+ client_socket.send(img)
505
+ continue
506
+
507
+ elif data.startswith("AUDIO_CAPTCHA:"):
508
+ parts = data.split(":")
509
+ dataset_name = parts[1] if len(parts) > 1 else None
510
+ num_chars = (
511
+ int(parts[2]) if len(parts) > 2 and parts[2].isdigit() else 6
512
+ )
513
+ language = parts[3] if len(parts) > 3 else "en"
514
+
515
+ audio_files, correct_chars = self.get_captcha_audio(
516
+ audio_dataset=dataset_name,
517
+ num_chars=num_chars,
518
+ language=language,
519
+ )
520
+
521
+ response_data = {
522
+ "status": "success" if audio_files else "error",
523
+ "correct_chars": correct_chars,
524
+ "num_files": len(audio_files),
525
+ }
526
+ response = json.dumps(response_data)
527
+ client_socket.send(f"{response}\n".encode("utf-8"))
528
+
529
+ for audio in audio_files:
530
+ size_bytes = len(audio).to_bytes(4, byteorder="big")
531
+ client_socket.send(size_bytes)
532
+ client_socket.send(audio)
533
+ continue
534
+ elif data.startswith("SECRET_KEY:"):
535
+ secret_key = self.get_secret_key()
536
+ hex_key = secret_key.hex()
537
+ response = json.dumps(hex_key)
538
+ else:
539
+ result = self.find_matching_groups(data)
540
+ response = json.dumps(result)
541
+
542
+ client_socket.send(f"{response}\n".encode("utf-8"))
543
+ except Exception as e:
544
+ logger.error("Error handling client %s: %s", addr, e)
545
+ finally:
546
+ client_socket.close()
547
+
548
+ def run_server(self) -> None:
549
+ """Run the memory server."""
550
+ if self.is_server_running():
551
+ logger.info("Server already running on port %s", self.port)
552
+ return
553
+
554
+ if not os.path.exists(self.data_path):
555
+ logger.info("IP data file not found at %s, downloading...", self.data_path)
556
+ if not self.download_data():
557
+ logger.error("Failed to download data, cannot start server")
558
+ return
559
+
560
+ if not self.load_data():
561
+ logger.error("Failed to load data, cannot start server")
562
+ return
563
+
564
+ self.check_and_update_data()
565
+ self.load_captcha_datasets(image_dataset="animals", audio_dataset="characters")
566
+
567
+ try:
568
+ self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
569
+ self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
570
+ self.server_socket.bind(("0.0.0.0", self.port))
571
+ self.server_socket.listen(10)
572
+ self.running = True
573
+
574
+ logger.info(
575
+ "Memory server started on port %s with data from %s",
576
+ self.port,
577
+ self.data_path,
578
+ )
579
+
580
+ while self.running:
581
+ try:
582
+ client_socket, addr = self.server_socket.accept()
583
+ client_thread = threading.Thread(
584
+ target=self.handle_client, args=(client_socket, addr)
585
+ )
586
+ client_thread.daemon = True
587
+ client_thread.start()
588
+ except Exception as e:
589
+ if self.running:
590
+ logger.error("Error accepting connection: %s", e)
591
+
592
+ except Exception as e:
593
+ logger.error("Server error: %s", e)
594
+ finally:
595
+ if self.server_socket:
596
+ self.server_socket.close()
597
+
598
+ def start(self) -> None:
599
+ """Start the server in a background thread."""
600
+ if self.server_thread and self.server_thread.is_alive():
601
+ return
602
+
603
+ self.server_thread = threading.Thread(target=self.run_server)
604
+ self.server_thread.daemon = True
605
+ self.server_thread.start()
606
+
607
+ def stop(self) -> None:
608
+ """Stop the server."""
609
+ self.running = False
610
+ if self.server_socket:
611
+ self.server_socket.close()
612
+
613
+
614
+ class MemoryClient:
615
+ """Client to connect to the MemoryServer."""
616
+
617
+ def __init__(self, host: str = "127.0.0.1", port: int = 9876):
618
+ self.host = host
619
+ self.port = port
620
+ self.socket = None
621
+
622
+ def connect(self) -> bool:
623
+ """Connect to the memory server."""
624
+ try:
625
+ self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
626
+ self.socket.connect((self.host, self.port))
627
+ return True
628
+ except Exception as e:
629
+ logger.error("Failed to connect to memory server: %s", e)
630
+ return False
631
+
632
+ def lookup_ip(self, ip: str) -> List[str]:
633
+ """Look up an IP in the memory server."""
634
+ if not self.socket:
635
+ if not self.connect():
636
+ return []
637
+
638
+ try:
639
+ if self.socket:
640
+ self.socket.send(f"IPSET:{ip}\n".encode("utf-8"))
641
+ response = self.socket.recv(4096).decode("utf-8").strip()
642
+ return json.loads(response)
643
+ return []
644
+ except Exception as e:
645
+ logger.error("Error looking up IP: %s", e)
646
+ if self.connect():
647
+ try:
648
+ if self.socket:
649
+ self.socket.send(f"IPSET:{ip}\n".encode("utf-8"))
650
+ response = self.socket.recv(4096).decode("utf-8").strip()
651
+ return json.loads(response)
652
+ except Exception:
653
+ pass
654
+ return []
655
+
656
+ def get_captcha_images(
657
+ self,
658
+ dataset_name: Optional[str] = None,
659
+ num_images: int = 9,
660
+ num_correct: Union[int, Tuple[int, int]] = (2, 3),
661
+ preview_image: bool = False,
662
+ ) -> Tuple[List[bytes], str, str]:
663
+ """
664
+ Get captcha images from the memory server.
665
+
666
+ Args:
667
+ dataset_name: The name of the dataset to use
668
+ num_images: Number of images to return
669
+ num_correct: Number or range of correct images
670
+ preview_image: Whether to include a preview image
671
+
672
+ Returns:
673
+ Tuple of (images list, correct indexes string, subject)
674
+ """
675
+ if not self.socket:
676
+ if not self.connect():
677
+ return [], "", ""
678
+
679
+ try:
680
+ if self.socket:
681
+ command = (
682
+ f"IMAGE_CAPTCHA:{dataset_name or ''}:"
683
+ f"{num_images}:{num_correct}:{preview_image}"
684
+ )
685
+ self.socket.send(f"{command}\n".encode("utf-8"))
686
+
687
+ json_data = b""
688
+ while True:
689
+ chunk = self.socket.recv(1)
690
+ if not chunk:
691
+ return [], "", ""
692
+ json_data += chunk
693
+ if chunk == b"\n":
694
+ break
695
+
696
+ response = json_data.decode("utf-8").strip()
697
+ response_data = json.loads(response)
698
+
699
+ if response_data.get("status") != "success":
700
+ return [], "", ""
701
+
702
+ images = []
703
+ num_images = response_data.get("num_images", 0)
704
+ for _ in range(num_images):
705
+ size_bytes = self.socket.recv(4)
706
+ size = int.from_bytes(size_bytes, byteorder="big")
707
+ img_data = b""
708
+ remaining = size
709
+ while remaining > 0:
710
+ chunk = self.socket.recv(min(remaining, 4096))
711
+ if not chunk:
712
+ break
713
+ img_data += chunk
714
+ remaining -= len(chunk)
715
+ images.append(img_data)
716
+
717
+ return (
718
+ images,
719
+ response_data.get("correct_indexes", ""),
720
+ response_data.get("subject", ""),
721
+ )
722
+ return [], "", ""
723
+ except Exception as e:
724
+ logger.error("Error getting captcha images: %s", e)
725
+ return [], "", ""
726
+
727
+ def get_captcha_audio(
728
+ self,
729
+ dataset_name: Optional[str] = None,
730
+ num_chars: int = 6,
731
+ language: str = "en",
732
+ ) -> Tuple[List[bytes], str]:
733
+ """
734
+ Get captcha audio from the memory server.
735
+
736
+ Args:
737
+ dataset_name: The name of the dataset to use
738
+ num_chars: Number of characters in the audio captcha
739
+ language: Language code for the audio
740
+
741
+ Returns:
742
+ Tuple of (audio files list, correct characters string)
743
+ """
744
+ if not self.socket:
745
+ if not self.connect():
746
+ return [], ""
747
+
748
+ try:
749
+ if self.socket:
750
+ command = f"AUDIO_CAPTCHA:{dataset_name or ''}:{num_chars}:{language}"
751
+ self.socket.send(f"{command}\n".encode("utf-8"))
752
+
753
+ json_data = b""
754
+ while True:
755
+ chunk = self.socket.recv(1)
756
+ if not chunk:
757
+ return [], ""
758
+ json_data += chunk
759
+ if chunk == b"\n":
760
+ break
761
+
762
+ response = json_data.decode("utf-8").strip()
763
+ response_data = json.loads(response)
764
+
765
+ if response_data.get("status") != "success":
766
+ return [], ""
767
+
768
+ audio_files = []
769
+ num_files = response_data.get("num_files", 0)
770
+ for _ in range(num_files):
771
+ size_bytes = self.socket.recv(4)
772
+ size = int.from_bytes(size_bytes, byteorder="big")
773
+ audio_data = b""
774
+ remaining = size
775
+ while remaining > 0:
776
+ chunk = self.socket.recv(min(remaining, 4096))
777
+ if not chunk:
778
+ break
779
+ audio_data += chunk
780
+ remaining -= len(chunk)
781
+ audio_files.append(audio_data)
782
+
783
+ return audio_files, response_data.get("correct_chars", "")
784
+ return [], ""
785
+ except Exception as e:
786
+ logger.error("Error getting captcha audio: %s", e)
787
+ return [], ""
788
+
789
+ def get_secret_key(self) -> bytes:
790
+ """Get the secret key from the memory server."""
791
+ if not self.socket:
792
+ if not self.connect():
793
+ return b""
794
+
795
+ try:
796
+ if self.socket:
797
+ self.socket.send("SECRET_KEY:\n".encode("utf-8"))
798
+ response = self.socket.recv(4096).decode("utf-8").strip()
799
+ try:
800
+ json_response = json.loads(response)
801
+ if isinstance(json_response, str):
802
+ return bytes.fromhex(json_response)
803
+ return b""
804
+ except (json.JSONDecodeError, ValueError):
805
+ return b""
806
+ return b""
807
+ except Exception as e:
808
+ logger.error("Error getting secret key: %s", e)
809
+ return b""
810
+
811
+ def close(self) -> None:
812
+ """Close the connection to the memory server."""
813
+ if self.socket:
814
+ try:
815
+ self.socket.close()
816
+ except Exception:
817
+ pass
818
+ self.socket = None
819
+
820
+
821
+ def ensure_server_running(
822
+ port: int = 9876,
823
+ data_path: Optional[str] = None,
824
+ image_dataset: Optional[str] = None,
825
+ audio_dataset: Optional[str] = None,
826
+ ) -> None:
827
+ """Ensure that the memory server is running."""
828
+ if data_path is None:
829
+ data_path = IPSET_DATA_PATH
830
+
831
+ server = MemoryServer(port=port, data_path=data_path)
832
+ server.load_captcha_datasets(
833
+ image_dataset=image_dataset, audio_dataset=audio_dataset
834
+ )
835
+ server.start()
836
+
837
+ while not server.is_server_running():
838
+ time.sleep(0.1)