flask-Humanify 0.1.4__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,7 +4,7 @@ Flask-Humanify
4
4
  A Flask extension that protects against bots and DDoS attacks.
5
5
  """
6
6
 
7
- __version__ = "0.1.4"
7
+ __version__ = "0.2.1"
8
8
 
9
9
  from . import utils
10
10
  from .humanify import Humanify
Binary file
Binary file
Binary file
Binary file
@@ -11,7 +11,7 @@ class RateLimiter:
11
11
  Rate limiter.
12
12
  """
13
13
 
14
- def __init__(self, app=None, max_requests: int = 2, time_window: int = 10):
14
+ def __init__(self, app=None, max_requests: int = 10, time_window: int = 10):
15
15
  """
16
16
  Initialize the rate limiter.
17
17
  """
@@ -1,11 +1,35 @@
1
1
  from dataclasses import dataclass
2
2
  import logging
3
- from typing import List, Optional
3
+ import random
4
+ from typing import List, Optional, Union, Dict, Any, Pattern
5
+ import re
6
+ import fnmatch
4
7
 
5
8
  from werkzeug.wrappers import Response
6
- from flask import Blueprint, request, render_template, redirect, url_for, current_app
7
- from .ipset import IPSetClient, ensure_server_running
8
- from .utils import get_client_ip, get_return_url
9
+ from flask import (
10
+ Blueprint,
11
+ request,
12
+ render_template,
13
+ redirect,
14
+ url_for,
15
+ current_app,
16
+ g,
17
+ abort,
18
+ )
19
+ from .memory_server import MemoryClient, ensure_server_running
20
+ from .utils import (
21
+ get_client_ip,
22
+ get_return_url,
23
+ validate_clearance_token,
24
+ generate_user_hash,
25
+ manipulate_image_bytes,
26
+ image_bytes_to_data_url,
27
+ generate_captcha_token,
28
+ validate_captcha_token,
29
+ generate_clearance_token,
30
+ combine_audio_files,
31
+ audio_bytes_to_data_url,
32
+ )
9
33
 
10
34
 
11
35
  VPN_PROVIDERS = [
@@ -19,6 +43,26 @@ VPN_PROVIDERS = [
19
43
  "Mullvad",
20
44
  ]
21
45
 
46
+ IMAGE_CAPTCHA_MAPPING = {
47
+ "grid": {
48
+ "num_correct": (2, 3),
49
+ "num_images": 9,
50
+ "preview_image": False,
51
+ "hardness_range": (1, 3),
52
+ },
53
+ "one_click": {
54
+ "num_correct": 1,
55
+ "num_images": 6,
56
+ "preview_image": True,
57
+ "hardness_range": (1, 3),
58
+ },
59
+ }
60
+
61
+ AUDIO_CAPTCHA_CONFIG = {
62
+ "num_chars": 6,
63
+ "language": "en",
64
+ }
65
+
22
66
  logger = logging.getLogger(__name__)
23
67
 
24
68
 
@@ -84,8 +128,17 @@ class Humanify:
84
128
  Protect against bots and DDoS attacks.
85
129
  """
86
130
 
87
- def __init__(self, app=None):
131
+ def __init__(
132
+ self,
133
+ app=None,
134
+ challenge_type: str = "one_click",
135
+ image_dataset: Optional[str] = "ai_dogs",
136
+ audio_dataset: Optional[str] = None,
137
+ ):
88
138
  self.app = app
139
+ self.challenge_type = challenge_type
140
+ self.image_dataset = image_dataset
141
+ self.audio_dataset = audio_dataset
89
142
  if app is not None:
90
143
  self.init_app(app)
91
144
 
@@ -95,9 +148,13 @@ class Humanify:
95
148
  """
96
149
  self.app = app
97
150
 
98
- ensure_server_running()
99
- self.ipset_client = IPSetClient()
100
- self.ipset_client.connect()
151
+ ensure_server_running(
152
+ image_dataset=self.image_dataset,
153
+ audio_dataset=self.audio_dataset,
154
+ )
155
+ self.memory_client = MemoryClient()
156
+ self.memory_client.connect()
157
+ self._secret_key = self.memory_client.get_secret_key()
101
158
 
102
159
  self.blueprint = Blueprint(
103
160
  "humanify", __name__, template_folder="templates", static_folder=None
@@ -108,6 +165,48 @@ class Humanify:
108
165
  def _register_routes(self) -> None:
109
166
  """Register the humanify routes."""
110
167
 
168
+ @self.blueprint.route("/humanify/challenge", methods=["GET"])
169
+ def challenge():
170
+ """
171
+ Challenge route.
172
+ """
173
+ if self.image_dataset is None:
174
+ return self._render_challenge(is_audio=True)
175
+
176
+ return self._render_challenge()
177
+
178
+ @self.blueprint.route("/humanify/audio_challenge", methods=["GET"])
179
+ def audio_challenge():
180
+ """
181
+ Audio challenge route.
182
+ """
183
+ if self.audio_dataset is None:
184
+ return redirect(
185
+ url_for("humanify.challenge", return_url=request.full_path)
186
+ )
187
+
188
+ return self._render_challenge(is_audio=True)
189
+
190
+ @self.blueprint.route("/humanify/verify", methods=["POST"])
191
+ def verify():
192
+ """
193
+ Verify route.
194
+ """
195
+ if self.image_dataset is None:
196
+ abort(404)
197
+
198
+ return self._verify_captcha()
199
+
200
+ @self.blueprint.route("/humanify/verify_audio", methods=["POST"])
201
+ def verify_audio():
202
+ """
203
+ Verify audio route.
204
+ """
205
+ if self.audio_dataset is None:
206
+ abort(404)
207
+
208
+ return self._verify_audio_captcha()
209
+
111
210
  @self.blueprint.route("/humanify/access_denied", methods=["GET"])
112
211
  def access_denied():
113
212
  """
@@ -121,38 +220,478 @@ class Humanify:
121
220
  {"Cache-Control": "public, max-age=15552000"},
122
221
  )
123
222
 
124
- def register_middleware(self, action: str = "deny_access"):
125
- """
126
- Register the middleware.
223
+ def register_middleware(
224
+ self,
225
+ action: str = "challenge",
226
+ endpoint_patterns: Union[str, List[str], None] = None,
227
+ url_patterns: Union[str, List[str], None] = None,
228
+ exclude_patterns: Union[str, List[str], None] = None,
229
+ request_filters: Optional[Dict[str, Any]] = None,
230
+ ):
127
231
  """
232
+ Register the middleware with advanced filtering options.
128
233
 
234
+ Args:
235
+ action: The action to take when a bot is detected ('challenge' or 'deny_access')
236
+ endpoint_patterns: Endpoint patterns to match (regex or glob patterns)
237
+ url_patterns: URL patterns to match (regex or glob patterns)
238
+ exclude_patterns: Patterns to exclude from protection (regex or glob patterns)
239
+ request_filters: Dict of request attributes and values to filter by
240
+ """
129
241
  self.app = self.app or current_app
130
242
 
243
+ if isinstance(endpoint_patterns, str):
244
+ endpoint_patterns = [endpoint_patterns]
245
+ if isinstance(url_patterns, str):
246
+ url_patterns = [url_patterns]
247
+ if isinstance(exclude_patterns, str):
248
+ exclude_patterns = [exclude_patterns]
249
+
250
+ compiled_endpoint_patterns = (
251
+ self._compile_patterns(endpoint_patterns) if endpoint_patterns else None
252
+ )
253
+ compiled_url_patterns = (
254
+ self._compile_patterns(url_patterns) if url_patterns else None
255
+ )
256
+ compiled_exclude_patterns = (
257
+ self._compile_patterns(exclude_patterns) if exclude_patterns else None
258
+ )
259
+
131
260
  @self.app.before_request
132
261
  def before_request():
133
262
  """
134
- Before request hook.
263
+ Before request hook with advanced filtering.
135
264
  """
136
- if request.endpoint in ["humanify.rate_limited", "humanify.access_denied"]:
265
+ if request.endpoint and request.endpoint.startswith("humanify."):
266
+ return
267
+
268
+ current_endpoint = request.endpoint or ""
269
+ current_path = request.path
270
+
271
+ if compiled_exclude_patterns and self._matches_any_pattern(
272
+ current_endpoint, current_path, compiled_exclude_patterns
273
+ ):
137
274
  return
138
275
 
139
- if self.is_bot:
276
+ patterns_specified = (
277
+ compiled_endpoint_patterns is not None
278
+ or compiled_url_patterns is not None
279
+ )
280
+
281
+ matches_endpoint = not patterns_specified or (
282
+ compiled_endpoint_patterns
283
+ and self._matches_any_pattern(
284
+ current_endpoint, None, compiled_endpoint_patterns
285
+ )
286
+ )
287
+
288
+ matches_url = not patterns_specified or (
289
+ compiled_url_patterns
290
+ and self._matches_any_pattern(None, current_path, compiled_url_patterns)
291
+ )
292
+
293
+ matches_request_filters = (
294
+ not request_filters or self._matches_request_filters(request_filters)
295
+ )
296
+
297
+ if (
298
+ (matches_endpoint or matches_url)
299
+ and matches_request_filters
300
+ and self.is_bot
301
+ ):
302
+ if action == "challenge":
303
+ return self.challenge()
140
304
  if action == "deny_access":
141
305
  return self.deny_access()
142
306
 
307
+ def _compile_patterns(self, patterns):
308
+ """
309
+ Compile a list of patterns into regex patterns.
310
+ Handles glob patterns like * and ? by converting them to regex.
311
+ """
312
+ compiled = []
313
+ for pattern in patterns:
314
+ if pattern is None:
315
+ continue
316
+
317
+ if "*" in pattern or "?" in pattern:
318
+ regex_pattern = fnmatch.translate(pattern)
319
+ compiled.append(re.compile(regex_pattern))
320
+ else:
321
+ try:
322
+ compiled.append(re.compile(pattern))
323
+ except re.error:
324
+ compiled.append(re.compile(re.escape(pattern)))
325
+
326
+ return compiled
327
+
328
+ def _matches_any_pattern(
329
+ self,
330
+ endpoint: Optional[str],
331
+ path: Optional[str],
332
+ compiled_patterns: List[Pattern],
333
+ ):
334
+ """
335
+ Check if the current endpoint or path matches any of the compiled patterns.
336
+ """
337
+ for pattern in compiled_patterns:
338
+ if endpoint is not None and pattern.search(endpoint):
339
+ return True
340
+ if path is not None and pattern.search(path):
341
+ return True
342
+ return False
343
+
344
+ def _matches_request_filters(self, request_filters: Dict[str, Any]) -> bool:
345
+ """
346
+ Check if the current request matches all the specified filters.
347
+ Filters can target any attribute of the request object or its nested properties.
348
+ """
349
+ for key, value in request_filters.items():
350
+ parts = key.split(".")
351
+ obj = request
352
+
353
+ for part in parts[:-1]:
354
+ if hasattr(obj, part):
355
+ obj = getattr(obj, part)
356
+ elif isinstance(obj, dict) and part in obj:
357
+ obj = obj[part]
358
+ else:
359
+ return False
360
+
361
+ final_attr = parts[-1]
362
+
363
+ if hasattr(obj, final_attr):
364
+ attr_value = getattr(obj, final_attr)
365
+ elif isinstance(obj, dict) and final_attr in obj:
366
+ attr_value = obj[final_attr]
367
+ else:
368
+ return False
369
+
370
+ if isinstance(value, str) and value.startswith("regex:"):
371
+ regex_pattern = value[6:]
372
+ try:
373
+ if not re.search(regex_pattern, str(attr_value)):
374
+ return False
375
+ except (re.error, TypeError):
376
+ return False
377
+ elif isinstance(value, list):
378
+ if attr_value not in value:
379
+ return False
380
+ elif attr_value != value:
381
+ return False
382
+
383
+ return True
384
+
385
+ @property
386
+ def client_ip(self) -> Optional[str]:
387
+ """Get the client IP address."""
388
+ if hasattr(g, "humanify_client_ip"):
389
+ return g.humanify_client_ip
390
+
391
+ client_ip = get_client_ip(request)
392
+ g.humanify_client_ip = client_ip
393
+ return client_ip
394
+
143
395
  @property
144
- def is_bot(self) -> HumanifyResult:
396
+ def check_result(self) -> HumanifyResult:
145
397
  """
146
398
  Check if the IP is a bot.
147
399
  """
148
- ip = get_client_ip(request)
149
- if ip is None:
150
- return HumanifyResult(ip=ip, is_invalid_ip=True)
151
- ip_groups = self.ipset_client.lookup_ip(ip)
152
- return HumanifyResult.from_ip_groups(ip, ip_groups)
400
+ if self.client_ip is None:
401
+ return HumanifyResult(ip=self.client_ip, is_invalid_ip=True)
402
+
403
+ if hasattr(g, "humanify_ip_groups"):
404
+ humanify_ip_groups = g.humanify_ip_groups
405
+ if isinstance(humanify_ip_groups, list):
406
+ return HumanifyResult.from_ip_groups(self.client_ip, humanify_ip_groups)
407
+
408
+ ip_groups = self.memory_client.lookup_ip(self.client_ip)
409
+ g.humanify_ip_groups = ip_groups
410
+ return HumanifyResult.from_ip_groups(self.client_ip, ip_groups)
411
+
412
+ @property
413
+ def has_valid_clearance_token(self) -> bool:
414
+ """Check if the current client has a valid clearance token."""
415
+ return validate_clearance_token(
416
+ request.cookies.get("clearance_token", ""),
417
+ self._secret_key,
418
+ generate_user_hash(
419
+ self.client_ip or "127.0.0.1",
420
+ request.user_agent.string or "",
421
+ ),
422
+ )
423
+
424
+ @property
425
+ def is_bot(self) -> bool:
426
+ """Check if the current client is a bot."""
427
+ return not self.has_valid_clearance_token and self.check_result.is_bot
153
428
 
154
429
  def deny_access(self) -> Response:
155
430
  """
156
431
  Redirect to the access denied page.
157
432
  """
158
433
  return redirect(url_for("humanify.access_denied", return_url=request.full_path))
434
+
435
+ def challenge(self) -> Response:
436
+ """
437
+ Challenge the client.
438
+ """
439
+ return redirect(url_for("humanify.challenge", return_url=request.full_path))
440
+
441
+ def _render_challenge(self, is_audio: bool = False) -> Response:
442
+ return_url = get_return_url(request)
443
+ if self.has_valid_clearance_token:
444
+ return redirect(return_url)
445
+
446
+ error = request.args.get("error", None)
447
+ if error not in [
448
+ "Invalid captcha token",
449
+ "Wrong selection. Try again.",
450
+ "Wrong response. Try again.",
451
+ ]:
452
+ error = None
453
+
454
+ if is_audio:
455
+ return self._render_audio_challenge(return_url, error)
456
+
457
+ if self.challenge_type in ["grid", "one_click"]:
458
+ return self._render_image_challenge(return_url, error)
459
+
460
+ abort(404, "Invalid challenge type")
461
+
462
+ def _render_image_challenge(
463
+ self, return_url: str, error: Optional[str]
464
+ ) -> Response:
465
+ """
466
+ Render the image challenge.
467
+ """
468
+
469
+ captcha_config = IMAGE_CAPTCHA_MAPPING[self.challenge_type]
470
+ use_preview_image = captcha_config["preview_image"]
471
+
472
+ images_bytes, correct_indexes, subject = self.memory_client.get_captcha_images(
473
+ num_correct=captcha_config["num_correct"],
474
+ num_images=captcha_config["num_images"],
475
+ preview_image=use_preview_image,
476
+ dataset_name=self.image_dataset,
477
+ )
478
+
479
+ if not images_bytes:
480
+ abort(500, "Could not load captcha images")
481
+
482
+ processed_images = []
483
+ for i, img_bytes in enumerate(images_bytes):
484
+ try:
485
+ distorted_img_bytes = manipulate_image_bytes(
486
+ img_bytes,
487
+ is_small=not (i == 0 and use_preview_image),
488
+ hardness=random.randint(
489
+ captcha_config["hardness_range"][0],
490
+ captcha_config["hardness_range"][1],
491
+ ),
492
+ )
493
+ processed_images.append(image_bytes_to_data_url(distorted_img_bytes))
494
+ except Exception as e:
495
+ current_app.logger.error(f"Error processing image: {e}")
496
+ processed_images.append(
497
+ (
498
+ ""
499
+ "CAQAAAC1HAwCAAAAC0lEQVR42mNk+A8AAQUBAScY42YAAAAASUVORK5CYII="
500
+ )
501
+ )
502
+
503
+ preview_image = None
504
+ if use_preview_image:
505
+ preview_image = processed_images[0]
506
+ processed_images = processed_images[1:]
507
+
508
+ user_hash = generate_user_hash(
509
+ self.client_ip or "127.0.0.1", request.user_agent.string or ""
510
+ )
511
+ captcha_data = generate_captcha_token(
512
+ user_hash, correct_indexes, self._secret_key
513
+ )
514
+
515
+ return Response(
516
+ render_template(
517
+ f"{self.challenge_type}_challenge.html",
518
+ images=processed_images,
519
+ preview_image=preview_image,
520
+ subject=subject,
521
+ captcha_data=captcha_data,
522
+ return_url=return_url or "/",
523
+ error=error,
524
+ audio_challenge_available=self.audio_dataset is not None,
525
+ ),
526
+ mimetype="text/html",
527
+ )
528
+
529
+ def _render_audio_challenge(
530
+ self, return_url: str, error: Optional[str]
531
+ ) -> Response:
532
+ """
533
+ Render the audio challenge.
534
+ """
535
+ num_chars = AUDIO_CAPTCHA_CONFIG["num_chars"]
536
+ language = AUDIO_CAPTCHA_CONFIG["language"]
537
+
538
+ audio_files, correct_chars = self.memory_client.get_captcha_audio(
539
+ num_chars=num_chars, language=language
540
+ )
541
+
542
+ if not audio_files:
543
+ abort(500, "Could not load captcha audio")
544
+
545
+ combined_audio = combine_audio_files(audio_files)
546
+ if not combined_audio:
547
+ abort(500, "Could not process audio files")
548
+
549
+ audio_data_url = audio_bytes_to_data_url(combined_audio, "mp3")
550
+
551
+ user_hash = generate_user_hash(
552
+ self.client_ip or "127.0.0.1", request.user_agent.string or ""
553
+ )
554
+ captcha_data = generate_captcha_token(
555
+ user_hash, correct_chars, self._secret_key
556
+ )
557
+
558
+ return Response(
559
+ render_template(
560
+ "audio_challenge.html",
561
+ audio_file=audio_data_url,
562
+ captcha_data=captcha_data,
563
+ return_url=return_url or "/",
564
+ error=error,
565
+ image_challenge_available=self.image_dataset is not None,
566
+ ),
567
+ mimetype="text/html",
568
+ )
569
+
570
+ def _verify_captcha(self) -> Response:
571
+ """Verify the captcha solution."""
572
+ return_url = get_return_url(request)
573
+ if self.has_valid_clearance_token:
574
+ return redirect(return_url)
575
+
576
+ captcha_data = request.form.get("captcha_data", "")
577
+ if not captcha_data:
578
+ return redirect(
579
+ url_for(
580
+ "humanify.challenge",
581
+ error="Invalid captcha token",
582
+ return_url=return_url,
583
+ )
584
+ )
585
+
586
+ user_hash = generate_user_hash(
587
+ self.client_ip or "127.0.0.1", request.user_agent.string or ""
588
+ )
589
+ decrypted_data = validate_captcha_token(
590
+ captcha_data, self._secret_key, user_hash
591
+ )
592
+
593
+ if decrypted_data is None:
594
+ return redirect(
595
+ url_for(
596
+ "humanify.challenge",
597
+ error="Invalid captcha token",
598
+ return_url=return_url,
599
+ )
600
+ )
601
+
602
+ verify_functions = {
603
+ "grid": self._verify_image_captcha,
604
+ "one_click": self._verify_image_captcha,
605
+ }
606
+
607
+ verify_function = verify_functions[self.challenge_type]
608
+ if not verify_function(decrypted_data):
609
+ return redirect(
610
+ url_for(
611
+ "humanify.challenge",
612
+ error="Wrong selection. Try again.",
613
+ return_url=return_url,
614
+ )
615
+ )
616
+
617
+ clearance_token = generate_clearance_token(user_hash, self._secret_key)
618
+
619
+ response = redirect(return_url or "/")
620
+ response.set_cookie(
621
+ "clearance_token",
622
+ clearance_token,
623
+ max_age=14400,
624
+ httponly=True,
625
+ samesite="Strict",
626
+ )
627
+
628
+ return response
629
+
630
+ def _verify_audio_captcha(self) -> Response:
631
+ """Verify the audio captcha solution."""
632
+ return_url = get_return_url(request)
633
+ if self.has_valid_clearance_token:
634
+ return redirect(return_url)
635
+
636
+ captcha_data = request.form.get("captcha_data", "")
637
+ if not captcha_data:
638
+ return redirect(
639
+ url_for(
640
+ "humanify.audio_challenge",
641
+ error="Invalid captcha token",
642
+ return_url=return_url,
643
+ )
644
+ )
645
+
646
+ user_hash = generate_user_hash(
647
+ self.client_ip or "127.0.0.1", request.user_agent.string or ""
648
+ )
649
+ correct_chars = validate_captcha_token(
650
+ captcha_data, self._secret_key, user_hash, valid_lengths=[197]
651
+ )
652
+
653
+ if correct_chars is None:
654
+ return redirect(
655
+ url_for(
656
+ "humanify.audio_challenge",
657
+ error="Invalid captcha token",
658
+ return_url=return_url,
659
+ )
660
+ )
661
+
662
+ audio_response = request.form.get("audio_response", "").lower().strip()
663
+ if not audio_response or audio_response != correct_chars:
664
+ return redirect(
665
+ url_for(
666
+ "humanify.audio_challenge",
667
+ error="Wrong response. Try again.",
668
+ return_url=return_url,
669
+ )
670
+ )
671
+
672
+ clearance_token = generate_clearance_token(user_hash, self._secret_key)
673
+
674
+ response = redirect(return_url or "/")
675
+ response.set_cookie(
676
+ "clearance_token",
677
+ clearance_token,
678
+ max_age=14400,
679
+ httponly=True,
680
+ samesite="Strict",
681
+ )
682
+
683
+ return response
684
+
685
+ def _verify_image_captcha(self, decrypted_data: str) -> bool:
686
+ """Verify the image captcha."""
687
+ captcha_config = IMAGE_CAPTCHA_MAPPING[self.challenge_type]
688
+
689
+ selected_indexes = []
690
+ for i in range(1, captcha_config["num_images"] + 1):
691
+ if request.form.get(str(i), None) == "1":
692
+ selected_indexes.append(str(i - 1))
693
+
694
+ selected_str = "".join(sorted(selected_indexes))
695
+ correct_str = "".join(sorted(list(decrypted_data)))
696
+
697
+ return selected_str == correct_str