flask-Humanify 0.1.4__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flask_humanify/__init__.py +1 -1
- flask_humanify/datasets/ai_dogs.pkl +0 -0
- flask_humanify/datasets/animals.pkl +0 -0
- flask_humanify/datasets/characters.pkl +0 -0
- flask_humanify/features/rate_limiter.py +1 -1
- flask_humanify/humanify.py +392 -15
- flask_humanify/memory_server.py +836 -0
- flask_humanify/secret_key.bin +0 -0
- flask_humanify/templates/audio_challenge.html +208 -0
- flask_humanify/templates/grid_challenge.html +232 -0
- flask_humanify/templates/{oneclick_captcha.html → one_click_challenge.html} +4 -9
- flask_humanify/utils.py +422 -2
- {flask_humanify-0.1.4.dist-info → flask_humanify-0.2.0.dist-info}/METADATA +9 -4
- flask_humanify-0.2.0.dist-info/RECORD +20 -0
- flask_humanify/ipset.py +0 -315
- flask_humanify-0.1.4.dist-info/RECORD +0 -14
- {flask_humanify-0.1.4.dist-info → flask_humanify-0.2.0.dist-info}/WHEEL +0 -0
- {flask_humanify-0.1.4.dist-info → flask_humanify-0.2.0.dist-info}/licenses/LICENSE +0 -0
- {flask_humanify-0.1.4.dist-info → flask_humanify-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,836 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
import socket
|
4
|
+
import time
|
5
|
+
import threading
|
6
|
+
import os
|
7
|
+
import importlib.metadata
|
8
|
+
import importlib.resources
|
9
|
+
import urllib.request
|
10
|
+
import gzip
|
11
|
+
import pickle
|
12
|
+
import random
|
13
|
+
import secrets
|
14
|
+
from pathlib import Path
|
15
|
+
from typing import Dict, List, Optional, Tuple, Union
|
16
|
+
from datetime import datetime, timedelta
|
17
|
+
from netaddr import IPNetwork, IPAddress
|
18
|
+
|
19
|
+
|
20
|
+
logger = logging.getLogger(__name__)
|
21
|
+
|
22
|
+
|
23
|
+
try:
|
24
|
+
importlib.metadata.distribution("flask-humanify")
|
25
|
+
BASE_DIR = importlib.resources.files("flask_humanify")
|
26
|
+
except importlib.metadata.PackageNotFoundError:
|
27
|
+
BASE_DIR = Path(__file__).parent
|
28
|
+
|
29
|
+
if not isinstance(BASE_DIR, Path):
|
30
|
+
BASE_DIR = Path(str(BASE_DIR))
|
31
|
+
|
32
|
+
DATASET_DIR = BASE_DIR / "datasets"
|
33
|
+
if not DATASET_DIR.exists():
|
34
|
+
DATASET_DIR.mkdir(parents=True)
|
35
|
+
|
36
|
+
IPSET_DATA_PATH = str(DATASET_DIR / "ipset.json")
|
37
|
+
SECRET_KEY_FILE = BASE_DIR / "secret_key.bin"
|
38
|
+
|
39
|
+
IMAGES_CAPTCHA_DATASETS = {
|
40
|
+
"keys": (
|
41
|
+
"https://raw.githubusercontent.com/tn3w/Captcha_Datasets/"
|
42
|
+
"refs/heads/master/datasets/keys.pkl"
|
43
|
+
),
|
44
|
+
"animals": (
|
45
|
+
"https://raw.githubusercontent.com/tn3w/Captcha_Datasets/"
|
46
|
+
"refs/heads/master/datasets/animals.pkl"
|
47
|
+
),
|
48
|
+
"ai_dogs": (
|
49
|
+
"https://raw.githubusercontent.com/tn3w/Captcha_Datasets/"
|
50
|
+
"refs/heads/master/datasets/ai-dogs.pkl"
|
51
|
+
),
|
52
|
+
}
|
53
|
+
|
54
|
+
AUDIO_CAPTCHA_DATASETS = {
|
55
|
+
"characters": (
|
56
|
+
"https://raw.githubusercontent.com/librecap/audiocaptcha/"
|
57
|
+
"refs/heads/main/characters/characters.pkl"
|
58
|
+
)
|
59
|
+
}
|
60
|
+
|
61
|
+
|
62
|
+
class MemoryServer:
|
63
|
+
"""A singleton memory server that manages IP sets and provides lookup functionality."""
|
64
|
+
|
65
|
+
_instance = None
|
66
|
+
_lock = threading.Lock()
|
67
|
+
|
68
|
+
def __new__(cls, port: int = 9876, data_path: Optional[str] = None):
|
69
|
+
if data_path is None:
|
70
|
+
data_path = IPSET_DATA_PATH
|
71
|
+
|
72
|
+
with cls._lock:
|
73
|
+
if cls._instance is None:
|
74
|
+
cls._instance = super(MemoryServer, cls).__new__(cls)
|
75
|
+
cls._instance.initialized = False
|
76
|
+
return cls._instance
|
77
|
+
|
78
|
+
def __init__(self, port: int = 9876, data_path: Optional[str] = None):
|
79
|
+
if data_path is None:
|
80
|
+
data_path = IPSET_DATA_PATH
|
81
|
+
|
82
|
+
if getattr(self, "initialized", False):
|
83
|
+
return
|
84
|
+
|
85
|
+
self.port = port
|
86
|
+
self.data_path = data_path
|
87
|
+
self.ip_to_groups: Dict[str, List[str]] = {}
|
88
|
+
self.cidrs_to_ips: Dict[IPNetwork, List[str]] = {}
|
89
|
+
self.last_update: Optional[datetime] = None
|
90
|
+
self.server_socket = None
|
91
|
+
self.server_thread = None
|
92
|
+
self.running = False
|
93
|
+
|
94
|
+
self.captcha_image_data: Dict[str, Dict[str, List[bytes]]] = {}
|
95
|
+
self.captcha_audio_data: Dict[str, Dict[str, Dict[str, List[bytes]]]] = {}
|
96
|
+
self.current_image_dataset: Optional[str] = None
|
97
|
+
self.current_audio_dataset: Optional[str] = None
|
98
|
+
self.secret_key: bytes = self._load_or_create_secret_key()
|
99
|
+
|
100
|
+
self.initialized = True
|
101
|
+
|
102
|
+
def _load_or_create_secret_key(self) -> bytes:
|
103
|
+
"""Load the secret key from file or create a new one if it doesn't exist."""
|
104
|
+
if SECRET_KEY_FILE.exists():
|
105
|
+
logger.info("Loading secret key from %s", SECRET_KEY_FILE)
|
106
|
+
with open(SECRET_KEY_FILE, "rb") as f:
|
107
|
+
return f.read()
|
108
|
+
|
109
|
+
logger.info("Generating new secret key")
|
110
|
+
secret_key = secrets.token_bytes(32)
|
111
|
+
with open(SECRET_KEY_FILE, "wb") as f:
|
112
|
+
f.write(secret_key)
|
113
|
+
|
114
|
+
return secret_key
|
115
|
+
|
116
|
+
def get_secret_key(self) -> bytes:
|
117
|
+
"""Return the secret key."""
|
118
|
+
return self.secret_key
|
119
|
+
|
120
|
+
def is_server_running(self) -> bool:
|
121
|
+
"""Check if the server is already running on the specified port."""
|
122
|
+
try:
|
123
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
124
|
+
s.connect(("127.0.0.1", self.port))
|
125
|
+
return True
|
126
|
+
except (ConnectionRefusedError, socket.error):
|
127
|
+
return False
|
128
|
+
|
129
|
+
def download_data(self, force: bool = False) -> bool:
|
130
|
+
"""Download IP set data from GitHub and update the timestamp."""
|
131
|
+
try:
|
132
|
+
if not force and os.path.exists(self.data_path):
|
133
|
+
with open(self.data_path, "r", encoding="utf-8") as f:
|
134
|
+
try:
|
135
|
+
data = json.load(f)
|
136
|
+
if isinstance(data, dict) and "_timestamp" in data:
|
137
|
+
timestamp = datetime.fromisoformat(data["_timestamp"])
|
138
|
+
if datetime.now() - timestamp < timedelta(days=7):
|
139
|
+
return True
|
140
|
+
except (json.JSONDecodeError, KeyError, ValueError):
|
141
|
+
pass
|
142
|
+
|
143
|
+
url = "https://raw.githubusercontent.com/tn3w/IPSet/refs/heads/master/ipset.json"
|
144
|
+
with urllib.request.urlopen(url, timeout=30) as response:
|
145
|
+
response_data = response.read().decode("utf-8")
|
146
|
+
|
147
|
+
data = json.loads(response_data)
|
148
|
+
data["_timestamp"] = datetime.now().isoformat()
|
149
|
+
|
150
|
+
with open(self.data_path, "w", encoding="utf-8") as f:
|
151
|
+
json.dump(data, f)
|
152
|
+
|
153
|
+
return True
|
154
|
+
except Exception as e:
|
155
|
+
logger.error("Error downloading IP set data: %s", e)
|
156
|
+
return False
|
157
|
+
|
158
|
+
def download_captcha_dataset(self, dataset_url: str, dataset_name: str) -> str:
|
159
|
+
"""Download a captcha dataset from the internet."""
|
160
|
+
filename = f"{dataset_name}.pkl"
|
161
|
+
file_path = os.path.join(DATASET_DIR, filename)
|
162
|
+
|
163
|
+
if os.path.exists(file_path):
|
164
|
+
return file_path
|
165
|
+
|
166
|
+
try:
|
167
|
+
urllib.request.urlretrieve(dataset_url, file_path)
|
168
|
+
return file_path
|
169
|
+
except Exception as e:
|
170
|
+
logger.error("Failed to download captcha dataset %s: %s", dataset_name, e)
|
171
|
+
return ""
|
172
|
+
|
173
|
+
def load_data(self) -> bool:
|
174
|
+
"""Load IP set data into memory."""
|
175
|
+
try:
|
176
|
+
with open(self.data_path, "r", encoding="utf-8") as f:
|
177
|
+
data = json.load(f)
|
178
|
+
|
179
|
+
if "_timestamp" in data:
|
180
|
+
self.last_update = datetime.fromisoformat(data.pop("_timestamp"))
|
181
|
+
|
182
|
+
self.ip_to_groups = {}
|
183
|
+
self.cidrs_to_ips = {}
|
184
|
+
|
185
|
+
for group, ips in data.items():
|
186
|
+
for ip in ips:
|
187
|
+
if "/" in ip:
|
188
|
+
try:
|
189
|
+
ip_obj = IPNetwork(ip)
|
190
|
+
if ip_obj not in self.cidrs_to_ips:
|
191
|
+
self.cidrs_to_ips[ip_obj] = []
|
192
|
+
self.cidrs_to_ips[ip_obj].append(group)
|
193
|
+
except Exception:
|
194
|
+
continue
|
195
|
+
continue
|
196
|
+
|
197
|
+
if ip not in self.ip_to_groups:
|
198
|
+
self.ip_to_groups[ip] = []
|
199
|
+
self.ip_to_groups[ip].append(group)
|
200
|
+
|
201
|
+
return True
|
202
|
+
except Exception as e:
|
203
|
+
logger.error("Error loading IP set data: %s", e)
|
204
|
+
return False
|
205
|
+
|
206
|
+
def load_captcha_datasets(
|
207
|
+
self, image_dataset: str = "animals", audio_dataset: Optional[str] = None
|
208
|
+
) -> bool:
|
209
|
+
"""Load captcha datasets into memory."""
|
210
|
+
try:
|
211
|
+
if (
|
212
|
+
self.current_image_dataset == image_dataset
|
213
|
+
and self.current_audio_dataset == audio_dataset
|
214
|
+
and (self.captcha_image_data or self.captcha_audio_data)
|
215
|
+
):
|
216
|
+
return True
|
217
|
+
|
218
|
+
self.current_image_dataset = image_dataset
|
219
|
+
self.current_audio_dataset = audio_dataset
|
220
|
+
|
221
|
+
if image_dataset in IMAGES_CAPTCHA_DATASETS:
|
222
|
+
dataset_url = IMAGES_CAPTCHA_DATASETS[image_dataset]
|
223
|
+
try:
|
224
|
+
dataset_path = self.download_captcha_dataset(
|
225
|
+
dataset_url, image_dataset
|
226
|
+
)
|
227
|
+
if dataset_path:
|
228
|
+
with open(dataset_path, "rb") as f:
|
229
|
+
data = pickle.load(f)
|
230
|
+
if data["type"] == "image":
|
231
|
+
first_image = data["keys"][next(iter(data["keys"]))][0]
|
232
|
+
if not first_image.startswith(b"\x89PNG\r\n\x1a\n"):
|
233
|
+
data["keys"] = {
|
234
|
+
k: [gzip.decompress(img) for img in v]
|
235
|
+
for k, v in data["keys"].items()
|
236
|
+
}
|
237
|
+
self.captcha_image_data = data
|
238
|
+
logger.info("Loaded %s image captcha dataset", image_dataset)
|
239
|
+
except Exception as e:
|
240
|
+
logger.error(
|
241
|
+
"Failed to load %s image captcha dataset: %s",
|
242
|
+
image_dataset,
|
243
|
+
e,
|
244
|
+
)
|
245
|
+
return False
|
246
|
+
|
247
|
+
if audio_dataset in AUDIO_CAPTCHA_DATASETS:
|
248
|
+
dataset_url = AUDIO_CAPTCHA_DATASETS[audio_dataset]
|
249
|
+
try:
|
250
|
+
dataset_path = self.download_captcha_dataset(
|
251
|
+
dataset_url, audio_dataset
|
252
|
+
)
|
253
|
+
if dataset_path:
|
254
|
+
with open(dataset_path, "rb") as f:
|
255
|
+
data = pickle.load(f)
|
256
|
+
self.captcha_audio_data = data
|
257
|
+
logger.info("Loaded %s audio captcha dataset", audio_dataset)
|
258
|
+
except Exception as e:
|
259
|
+
logger.error(
|
260
|
+
"Failed to load %s audio captcha dataset: %s",
|
261
|
+
audio_dataset,
|
262
|
+
e,
|
263
|
+
)
|
264
|
+
return False
|
265
|
+
|
266
|
+
return True
|
267
|
+
except Exception as e:
|
268
|
+
logger.error("Error loading captcha datasets: %s", e)
|
269
|
+
return False
|
270
|
+
|
271
|
+
def check_and_update_data(self) -> None:
|
272
|
+
"""Check if data needs updating and update if necessary."""
|
273
|
+
if self.last_update is None or datetime.now() - self.last_update > timedelta(
|
274
|
+
days=7
|
275
|
+
):
|
276
|
+
threading.Thread(target=self._async_update).start()
|
277
|
+
|
278
|
+
def _async_update(self) -> None:
|
279
|
+
"""Update data in the background without affecting current operations."""
|
280
|
+
if self.download_data(force=True):
|
281
|
+
self.load_data()
|
282
|
+
|
283
|
+
def find_matching_groups(self, ip: str) -> List[str]:
|
284
|
+
"""Find all groups matching the given IP."""
|
285
|
+
self.check_and_update_data()
|
286
|
+
|
287
|
+
matching_groups = self.ip_to_groups.get(ip, [])
|
288
|
+
|
289
|
+
try:
|
290
|
+
ip_obj = IPAddress(ip)
|
291
|
+
ip_version = ip_obj.version
|
292
|
+
|
293
|
+
for cidr, groups in self.cidrs_to_ips.items():
|
294
|
+
if cidr.version != ip_version:
|
295
|
+
continue
|
296
|
+
|
297
|
+
if ip_obj in cidr:
|
298
|
+
for group in groups:
|
299
|
+
if group not in matching_groups:
|
300
|
+
matching_groups.append(group)
|
301
|
+
|
302
|
+
except Exception:
|
303
|
+
return []
|
304
|
+
|
305
|
+
return matching_groups
|
306
|
+
|
307
|
+
def get_captcha_images(
|
308
|
+
self,
|
309
|
+
image_dataset: Optional[str] = None,
|
310
|
+
correct_index_range: Union[Tuple[int, int], int] = (2, 3),
|
311
|
+
num_images: int = 9,
|
312
|
+
preview_image: bool = False,
|
313
|
+
) -> Tuple[List[bytes], str, str]:
|
314
|
+
"""
|
315
|
+
Get captcha images for verification.
|
316
|
+
|
317
|
+
Args:
|
318
|
+
image_dataset: The image dataset to use. If None, uses the current dataset.
|
319
|
+
correct_index_range: The range of correct indexes to select.
|
320
|
+
num_images: The number of images to select.
|
321
|
+
preview_image: If True, add an additional correct image at the beginning of the list.
|
322
|
+
|
323
|
+
Returns:
|
324
|
+
A tuple containing:
|
325
|
+
- List of images
|
326
|
+
- A string containing indexes of correct images. (e.g., "034")
|
327
|
+
- The subject that represents the correct images (e.g., "smiling dog")
|
328
|
+
"""
|
329
|
+
if image_dataset:
|
330
|
+
self.load_captcha_datasets(image_dataset=image_dataset)
|
331
|
+
elif not self.captcha_image_data:
|
332
|
+
self.load_captcha_datasets()
|
333
|
+
|
334
|
+
if (
|
335
|
+
not self.captcha_image_data
|
336
|
+
or self.captcha_image_data.get("type") != "image"
|
337
|
+
or not self.captcha_image_data.get("keys")
|
338
|
+
):
|
339
|
+
logger.error("Image captcha dataset not loaded or invalid")
|
340
|
+
return [], "", ""
|
341
|
+
|
342
|
+
keys = self.captcha_image_data.get("keys", {})
|
343
|
+
if not keys:
|
344
|
+
logger.error("Invalid image captcha dataset structure")
|
345
|
+
return [], "", ""
|
346
|
+
|
347
|
+
all_keys = list(keys.keys())
|
348
|
+
if len(all_keys) <= 2:
|
349
|
+
correct_key = all_keys[0]
|
350
|
+
else:
|
351
|
+
correct_key = random.choice(all_keys)
|
352
|
+
|
353
|
+
correct_images = keys.get(correct_key, [])
|
354
|
+
|
355
|
+
incorrect_keys = [k for k in all_keys if k != correct_key]
|
356
|
+
incorrect_images = []
|
357
|
+
for k in incorrect_keys:
|
358
|
+
incorrect_images.extend(keys.get(k, []))
|
359
|
+
|
360
|
+
if not correct_images or not incorrect_images:
|
361
|
+
logger.error("Empty image lists in captcha dataset")
|
362
|
+
return [], "", ""
|
363
|
+
|
364
|
+
if isinstance(correct_index_range, int):
|
365
|
+
num_correct = correct_index_range
|
366
|
+
else:
|
367
|
+
num_correct = random.randint(correct_index_range[0], correct_index_range[1])
|
368
|
+
|
369
|
+
preview_correct_image = []
|
370
|
+
if preview_image:
|
371
|
+
preview_correct_image = [random.choice(correct_images)]
|
372
|
+
|
373
|
+
selected_correct = random.sample(
|
374
|
+
correct_images, min(num_correct, len(correct_images))
|
375
|
+
)
|
376
|
+
|
377
|
+
num_incorrect = num_images - len(selected_correct)
|
378
|
+
selected_incorrect = random.sample(
|
379
|
+
incorrect_images, min(num_incorrect, len(incorrect_images))
|
380
|
+
)
|
381
|
+
|
382
|
+
all_images = selected_correct + selected_incorrect
|
383
|
+
|
384
|
+
combined = list(
|
385
|
+
zip(all_images, [i < len(selected_correct) for i in range(len(all_images))])
|
386
|
+
)
|
387
|
+
random.shuffle(combined)
|
388
|
+
all_images, is_correct = zip(*combined)
|
389
|
+
correct_indexes = [i for i, correct in enumerate(is_correct) if correct]
|
390
|
+
|
391
|
+
all_images = preview_correct_image + list(all_images)
|
392
|
+
|
393
|
+
correct_indexes_str = "".join(str(i) for i in correct_indexes)
|
394
|
+
|
395
|
+
return list(all_images), correct_indexes_str, correct_key
|
396
|
+
|
397
|
+
def get_captcha_audio(
|
398
|
+
self,
|
399
|
+
audio_dataset: Optional[str] = None,
|
400
|
+
num_chars: int = 6,
|
401
|
+
language: str = "en",
|
402
|
+
) -> Tuple[List[bytes], str]:
|
403
|
+
"""
|
404
|
+
Get captcha audio for verification.
|
405
|
+
|
406
|
+
Args:
|
407
|
+
audio_dataset: The audio dataset to use. If None, uses the current dataset.
|
408
|
+
num_chars: The number of characters to include in the audio captcha.
|
409
|
+
language: The language code for the audio files.
|
410
|
+
|
411
|
+
Returns:
|
412
|
+
A tuple containing:
|
413
|
+
- List of audio file bytes
|
414
|
+
- The correct characters string
|
415
|
+
"""
|
416
|
+
if audio_dataset:
|
417
|
+
self.load_captcha_datasets(audio_dataset=audio_dataset)
|
418
|
+
elif not self.captcha_audio_data:
|
419
|
+
self.load_captcha_datasets(audio_dataset="characters")
|
420
|
+
|
421
|
+
if (
|
422
|
+
not self.captcha_audio_data
|
423
|
+
or self.captcha_audio_data.get("type") != "audio"
|
424
|
+
or not self.captcha_audio_data.get("keys")
|
425
|
+
):
|
426
|
+
logger.error("Audio captcha dataset not loaded or invalid")
|
427
|
+
return [], ""
|
428
|
+
|
429
|
+
keys = self.captcha_audio_data.get("keys", {})
|
430
|
+
if not keys:
|
431
|
+
logger.error("Invalid audio captcha dataset structure")
|
432
|
+
return [], ""
|
433
|
+
|
434
|
+
available_chars = list(keys.keys())
|
435
|
+
|
436
|
+
selected_chars = random.choices(available_chars, k=num_chars)
|
437
|
+
correct_chars_str = "".join(selected_chars)
|
438
|
+
|
439
|
+
audio_files = []
|
440
|
+
for char in selected_chars:
|
441
|
+
try:
|
442
|
+
audio_files.append(keys[char][language])
|
443
|
+
except KeyError:
|
444
|
+
logger.error(
|
445
|
+
"Error getting audio for character %s in language %s",
|
446
|
+
char,
|
447
|
+
language,
|
448
|
+
)
|
449
|
+
|
450
|
+
if not audio_files:
|
451
|
+
logger.error("No audio files selected")
|
452
|
+
return [], ""
|
453
|
+
|
454
|
+
return audio_files, correct_chars_str
|
455
|
+
|
456
|
+
def handle_client(
|
457
|
+
self, client_socket: socket.socket, addr: Tuple[str, int]
|
458
|
+
) -> None:
|
459
|
+
"""Handle client connection and queries."""
|
460
|
+
try:
|
461
|
+
while True:
|
462
|
+
data = client_socket.recv(1024).decode("utf-8").strip()
|
463
|
+
if not data:
|
464
|
+
break
|
465
|
+
|
466
|
+
if data.startswith("IPSET:"):
|
467
|
+
ip = data[6:]
|
468
|
+
result = self.find_matching_groups(ip)
|
469
|
+
response = json.dumps(result)
|
470
|
+
elif data.startswith("IMAGE_CAPTCHA:"):
|
471
|
+
parts = data.split(":")
|
472
|
+
dataset_name = parts[1] if len(parts) > 1 else None
|
473
|
+
num_images = (
|
474
|
+
int(parts[2]) if len(parts) > 2 and parts[2].isdigit() else 9
|
475
|
+
)
|
476
|
+
correct_range = (
|
477
|
+
int(parts[3])
|
478
|
+
if len(parts) > 3 and parts[3].isdigit()
|
479
|
+
else (2, 3)
|
480
|
+
)
|
481
|
+
preview = parts[4].lower() == "true" if len(parts) > 4 else False
|
482
|
+
|
483
|
+
images, correct_indexes, subject = self.get_captcha_images(
|
484
|
+
image_dataset=dataset_name,
|
485
|
+
num_images=num_images,
|
486
|
+
correct_index_range=correct_range,
|
487
|
+
preview_image=preview,
|
488
|
+
)
|
489
|
+
|
490
|
+
response_data = {
|
491
|
+
"status": "success" if images else "error",
|
492
|
+
"correct_indexes": correct_indexes,
|
493
|
+
"subject": subject,
|
494
|
+
"num_images": len(images),
|
495
|
+
}
|
496
|
+
response = json.dumps(response_data)
|
497
|
+
client_socket.send(f"{response}\n".encode("utf-8"))
|
498
|
+
|
499
|
+
for img in images:
|
500
|
+
size_bytes = len(img).to_bytes(4, byteorder="big")
|
501
|
+
client_socket.send(size_bytes)
|
502
|
+
client_socket.send(img)
|
503
|
+
continue
|
504
|
+
|
505
|
+
elif data.startswith("AUDIO_CAPTCHA:"):
|
506
|
+
parts = data.split(":")
|
507
|
+
dataset_name = parts[1] if len(parts) > 1 else None
|
508
|
+
num_chars = (
|
509
|
+
int(parts[2]) if len(parts) > 2 and parts[2].isdigit() else 6
|
510
|
+
)
|
511
|
+
language = parts[3] if len(parts) > 3 else "en"
|
512
|
+
|
513
|
+
audio_files, correct_chars = self.get_captcha_audio(
|
514
|
+
audio_dataset=dataset_name,
|
515
|
+
num_chars=num_chars,
|
516
|
+
language=language,
|
517
|
+
)
|
518
|
+
|
519
|
+
response_data = {
|
520
|
+
"status": "success" if audio_files else "error",
|
521
|
+
"correct_chars": correct_chars,
|
522
|
+
"num_files": len(audio_files),
|
523
|
+
}
|
524
|
+
response = json.dumps(response_data)
|
525
|
+
client_socket.send(f"{response}\n".encode("utf-8"))
|
526
|
+
|
527
|
+
for audio in audio_files:
|
528
|
+
size_bytes = len(audio).to_bytes(4, byteorder="big")
|
529
|
+
client_socket.send(size_bytes)
|
530
|
+
client_socket.send(audio)
|
531
|
+
continue
|
532
|
+
elif data.startswith("SECRET_KEY:"):
|
533
|
+
secret_key = self.get_secret_key()
|
534
|
+
hex_key = secret_key.hex()
|
535
|
+
response = json.dumps(hex_key)
|
536
|
+
else:
|
537
|
+
result = self.find_matching_groups(data)
|
538
|
+
response = json.dumps(result)
|
539
|
+
|
540
|
+
client_socket.send(f"{response}\n".encode("utf-8"))
|
541
|
+
except Exception as e:
|
542
|
+
logger.error("Error handling client %s: %s", addr, e)
|
543
|
+
finally:
|
544
|
+
client_socket.close()
|
545
|
+
|
546
|
+
def run_server(self) -> None:
|
547
|
+
"""Run the memory server."""
|
548
|
+
if self.is_server_running():
|
549
|
+
logger.info("Server already running on port %s", self.port)
|
550
|
+
return
|
551
|
+
|
552
|
+
if not os.path.exists(self.data_path):
|
553
|
+
logger.info("IP data file not found at %s, downloading...", self.data_path)
|
554
|
+
if not self.download_data():
|
555
|
+
logger.error("Failed to download data, cannot start server")
|
556
|
+
return
|
557
|
+
|
558
|
+
if not self.load_data():
|
559
|
+
logger.error("Failed to load data, cannot start server")
|
560
|
+
return
|
561
|
+
|
562
|
+
self.check_and_update_data()
|
563
|
+
self.load_captcha_datasets(image_dataset="animals", audio_dataset="characters")
|
564
|
+
|
565
|
+
try:
|
566
|
+
self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
567
|
+
self.server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
568
|
+
self.server_socket.bind(("0.0.0.0", self.port))
|
569
|
+
self.server_socket.listen(10)
|
570
|
+
self.running = True
|
571
|
+
|
572
|
+
logger.info(
|
573
|
+
"Memory server started on port %s with data from %s",
|
574
|
+
self.port,
|
575
|
+
self.data_path,
|
576
|
+
)
|
577
|
+
|
578
|
+
while self.running:
|
579
|
+
try:
|
580
|
+
client_socket, addr = self.server_socket.accept()
|
581
|
+
client_thread = threading.Thread(
|
582
|
+
target=self.handle_client, args=(client_socket, addr)
|
583
|
+
)
|
584
|
+
client_thread.daemon = True
|
585
|
+
client_thread.start()
|
586
|
+
except Exception as e:
|
587
|
+
if self.running:
|
588
|
+
logger.error("Error accepting connection: %s", e)
|
589
|
+
|
590
|
+
except Exception as e:
|
591
|
+
logger.error("Server error: %s", e)
|
592
|
+
finally:
|
593
|
+
if self.server_socket:
|
594
|
+
self.server_socket.close()
|
595
|
+
|
596
|
+
def start(self) -> None:
|
597
|
+
"""Start the server in a background thread."""
|
598
|
+
if self.server_thread and self.server_thread.is_alive():
|
599
|
+
return
|
600
|
+
|
601
|
+
self.server_thread = threading.Thread(target=self.run_server)
|
602
|
+
self.server_thread.daemon = True
|
603
|
+
self.server_thread.start()
|
604
|
+
|
605
|
+
def stop(self) -> None:
|
606
|
+
"""Stop the server."""
|
607
|
+
self.running = False
|
608
|
+
if self.server_socket:
|
609
|
+
self.server_socket.close()
|
610
|
+
|
611
|
+
|
612
|
+
class MemoryClient:
|
613
|
+
"""Client to connect to the MemoryServer."""
|
614
|
+
|
615
|
+
def __init__(self, host: str = "127.0.0.1", port: int = 9876):
|
616
|
+
self.host = host
|
617
|
+
self.port = port
|
618
|
+
self.socket = None
|
619
|
+
|
620
|
+
def connect(self) -> bool:
|
621
|
+
"""Connect to the memory server."""
|
622
|
+
try:
|
623
|
+
self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
624
|
+
self.socket.connect((self.host, self.port))
|
625
|
+
return True
|
626
|
+
except Exception as e:
|
627
|
+
logger.error("Failed to connect to memory server: %s", e)
|
628
|
+
return False
|
629
|
+
|
630
|
+
def lookup_ip(self, ip: str) -> List[str]:
|
631
|
+
"""Look up an IP in the memory server."""
|
632
|
+
if not self.socket:
|
633
|
+
if not self.connect():
|
634
|
+
return []
|
635
|
+
|
636
|
+
try:
|
637
|
+
if self.socket:
|
638
|
+
self.socket.send(f"IPSET:{ip}\n".encode("utf-8"))
|
639
|
+
response = self.socket.recv(4096).decode("utf-8").strip()
|
640
|
+
return json.loads(response)
|
641
|
+
return []
|
642
|
+
except Exception as e:
|
643
|
+
logger.error("Error looking up IP: %s", e)
|
644
|
+
if self.connect():
|
645
|
+
try:
|
646
|
+
if self.socket:
|
647
|
+
self.socket.send(f"IPSET:{ip}\n".encode("utf-8"))
|
648
|
+
response = self.socket.recv(4096).decode("utf-8").strip()
|
649
|
+
return json.loads(response)
|
650
|
+
except Exception:
|
651
|
+
pass
|
652
|
+
return []
|
653
|
+
|
654
|
+
def get_captcha_images(
|
655
|
+
self,
|
656
|
+
dataset_name: Optional[str] = None,
|
657
|
+
num_images: int = 9,
|
658
|
+
num_correct: Union[int, Tuple[int, int]] = (2, 3),
|
659
|
+
preview_image: bool = False,
|
660
|
+
) -> Tuple[List[bytes], str, str]:
|
661
|
+
"""
|
662
|
+
Get captcha images from the memory server.
|
663
|
+
|
664
|
+
Args:
|
665
|
+
dataset_name: The name of the dataset to use
|
666
|
+
num_images: Number of images to return
|
667
|
+
num_correct: Number or range of correct images
|
668
|
+
preview_image: Whether to include a preview image
|
669
|
+
|
670
|
+
Returns:
|
671
|
+
Tuple of (images list, correct indexes string, subject)
|
672
|
+
"""
|
673
|
+
if not self.socket:
|
674
|
+
if not self.connect():
|
675
|
+
return [], "", ""
|
676
|
+
|
677
|
+
try:
|
678
|
+
if self.socket:
|
679
|
+
command = (
|
680
|
+
f"IMAGE_CAPTCHA:{dataset_name or ''}:"
|
681
|
+
f"{num_images}:{num_correct}:{preview_image}"
|
682
|
+
)
|
683
|
+
self.socket.send(f"{command}\n".encode("utf-8"))
|
684
|
+
|
685
|
+
json_data = b""
|
686
|
+
while True:
|
687
|
+
chunk = self.socket.recv(1)
|
688
|
+
if not chunk:
|
689
|
+
return [], "", ""
|
690
|
+
json_data += chunk
|
691
|
+
if chunk == b"\n":
|
692
|
+
break
|
693
|
+
|
694
|
+
response = json_data.decode("utf-8").strip()
|
695
|
+
response_data = json.loads(response)
|
696
|
+
|
697
|
+
if response_data.get("status") != "success":
|
698
|
+
return [], "", ""
|
699
|
+
|
700
|
+
images = []
|
701
|
+
num_images = response_data.get("num_images", 0)
|
702
|
+
for _ in range(num_images):
|
703
|
+
size_bytes = self.socket.recv(4)
|
704
|
+
size = int.from_bytes(size_bytes, byteorder="big")
|
705
|
+
img_data = b""
|
706
|
+
remaining = size
|
707
|
+
while remaining > 0:
|
708
|
+
chunk = self.socket.recv(min(remaining, 4096))
|
709
|
+
if not chunk:
|
710
|
+
break
|
711
|
+
img_data += chunk
|
712
|
+
remaining -= len(chunk)
|
713
|
+
images.append(img_data)
|
714
|
+
|
715
|
+
return (
|
716
|
+
images,
|
717
|
+
response_data.get("correct_indexes", ""),
|
718
|
+
response_data.get("subject", ""),
|
719
|
+
)
|
720
|
+
return [], "", ""
|
721
|
+
except Exception as e:
|
722
|
+
logger.error("Error getting captcha images: %s", e)
|
723
|
+
return [], "", ""
|
724
|
+
|
725
|
+
def get_captcha_audio(
|
726
|
+
self,
|
727
|
+
dataset_name: Optional[str] = None,
|
728
|
+
num_chars: int = 6,
|
729
|
+
language: str = "en",
|
730
|
+
) -> Tuple[List[bytes], str]:
|
731
|
+
"""
|
732
|
+
Get captcha audio from the memory server.
|
733
|
+
|
734
|
+
Args:
|
735
|
+
dataset_name: The name of the dataset to use
|
736
|
+
num_chars: Number of characters in the audio captcha
|
737
|
+
language: Language code for the audio
|
738
|
+
|
739
|
+
Returns:
|
740
|
+
Tuple of (audio files list, correct characters string)
|
741
|
+
"""
|
742
|
+
if not self.socket:
|
743
|
+
if not self.connect():
|
744
|
+
return [], ""
|
745
|
+
|
746
|
+
try:
|
747
|
+
if self.socket:
|
748
|
+
command = f"AUDIO_CAPTCHA:{dataset_name or ''}:{num_chars}:{language}"
|
749
|
+
self.socket.send(f"{command}\n".encode("utf-8"))
|
750
|
+
|
751
|
+
json_data = b""
|
752
|
+
while True:
|
753
|
+
chunk = self.socket.recv(1)
|
754
|
+
if not chunk:
|
755
|
+
return [], ""
|
756
|
+
json_data += chunk
|
757
|
+
if chunk == b"\n":
|
758
|
+
break
|
759
|
+
|
760
|
+
response = json_data.decode("utf-8").strip()
|
761
|
+
response_data = json.loads(response)
|
762
|
+
|
763
|
+
if response_data.get("status") != "success":
|
764
|
+
return [], ""
|
765
|
+
|
766
|
+
audio_files = []
|
767
|
+
num_files = response_data.get("num_files", 0)
|
768
|
+
for _ in range(num_files):
|
769
|
+
size_bytes = self.socket.recv(4)
|
770
|
+
size = int.from_bytes(size_bytes, byteorder="big")
|
771
|
+
audio_data = b""
|
772
|
+
remaining = size
|
773
|
+
while remaining > 0:
|
774
|
+
chunk = self.socket.recv(min(remaining, 4096))
|
775
|
+
if not chunk:
|
776
|
+
break
|
777
|
+
audio_data += chunk
|
778
|
+
remaining -= len(chunk)
|
779
|
+
audio_files.append(audio_data)
|
780
|
+
|
781
|
+
return audio_files, response_data.get("correct_chars", "")
|
782
|
+
return [], ""
|
783
|
+
except Exception as e:
|
784
|
+
logger.error("Error getting captcha audio: %s", e)
|
785
|
+
return [], ""
|
786
|
+
|
787
|
+
def get_secret_key(self) -> bytes:
|
788
|
+
"""Get the secret key from the memory server."""
|
789
|
+
if not self.socket:
|
790
|
+
if not self.connect():
|
791
|
+
return b""
|
792
|
+
|
793
|
+
try:
|
794
|
+
if self.socket:
|
795
|
+
self.socket.send("SECRET_KEY:\n".encode("utf-8"))
|
796
|
+
response = self.socket.recv(4096).decode("utf-8").strip()
|
797
|
+
try:
|
798
|
+
json_response = json.loads(response)
|
799
|
+
if isinstance(json_response, str):
|
800
|
+
return bytes.fromhex(json_response)
|
801
|
+
return b""
|
802
|
+
except (json.JSONDecodeError, ValueError):
|
803
|
+
return b""
|
804
|
+
return b""
|
805
|
+
except Exception as e:
|
806
|
+
logger.error("Error getting secret key: %s", e)
|
807
|
+
return b""
|
808
|
+
|
809
|
+
def close(self) -> None:
|
810
|
+
"""Close the connection to the memory server."""
|
811
|
+
if self.socket:
|
812
|
+
try:
|
813
|
+
self.socket.close()
|
814
|
+
except Exception:
|
815
|
+
pass
|
816
|
+
self.socket = None
|
817
|
+
|
818
|
+
|
819
|
+
def ensure_server_running(
|
820
|
+
port: int = 9876,
|
821
|
+
data_path: Optional[str] = None,
|
822
|
+
image_dataset: str = "animals",
|
823
|
+
audio_dataset: str = "characters",
|
824
|
+
) -> None:
|
825
|
+
"""Ensure that the memory server is running."""
|
826
|
+
if data_path is None:
|
827
|
+
data_path = IPSET_DATA_PATH
|
828
|
+
|
829
|
+
server = MemoryServer(port=port, data_path=data_path)
|
830
|
+
server.load_captcha_datasets(
|
831
|
+
image_dataset=image_dataset, audio_dataset=audio_dataset
|
832
|
+
)
|
833
|
+
server.start()
|
834
|
+
|
835
|
+
while not server.is_server_running():
|
836
|
+
time.sleep(0.1)
|