crawler-user-agents 1.43.0 → 1.44.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/__init__.py +22 -2
  2. package/package.json +1 -1
package/__init__.py CHANGED
@@ -4,6 +4,9 @@ from functools import cached_property
4
4
  from pathlib import Path
5
5
 
6
6
 
7
+ CHUNK_SIZE = 25
8
+
9
+
7
10
  class CrawlerPatterns:
8
11
  def __init__(self):
9
12
  pass
@@ -19,6 +22,22 @@ class CrawlerPatterns:
19
22
  def case_sensitive(self):
20
23
  return re.compile("|".join(i["pattern"] for i in CRAWLER_USER_AGENTS_DATA))
21
24
 
25
+ @cached_property
26
+ def _chunks_case_sensitive(self):
27
+ patterns = [i["pattern"] for i in CRAWLER_USER_AGENTS_DATA]
28
+ return [
29
+ re.compile("|".join(patterns[i:i + CHUNK_SIZE]))
30
+ for i in range(0, len(patterns), CHUNK_SIZE)
31
+ ]
32
+
33
+ @cached_property
34
+ def _chunks_case_insensitive(self):
35
+ patterns = [i["pattern"].lower() for i in CRAWLER_USER_AGENTS_DATA]
36
+ return [
37
+ re.compile("|".join(patterns[i:i + CHUNK_SIZE]))
38
+ for i in range(0, len(patterns), CHUNK_SIZE)
39
+ ]
40
+
22
41
 
23
42
  def load_json():
24
43
  cwd = Path(__file__).parent
@@ -34,8 +53,9 @@ CRAWLER_PATTERNS = CrawlerPatterns()
34
53
  def is_crawler(user_agent: str, case_sensitive: bool = True) -> bool:
35
54
  """Return True if the given User-Agent matches a known crawler."""
36
55
  if case_sensitive:
37
- return bool(re.search(CRAWLER_PATTERNS.case_sensitive, user_agent))
38
- return bool(re.search(CRAWLER_PATTERNS.case_insensitive, user_agent))
56
+ return any(p.search(user_agent) for p in CRAWLER_PATTERNS._chunks_case_sensitive)
57
+ ua = user_agent.lower()
58
+ return any(p.search(ua) for p in CRAWLER_PATTERNS._chunks_case_insensitive)
39
59
 
40
60
 
41
61
  def matching_crawlers(user_agent: str, case_sensitive: bool = True) -> list[int]:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawler-user-agents",
3
- "version": "1.43.0",
3
+ "version": "1.44.0",
4
4
  "main": "crawler-user-agents.json",
5
5
  "typings": "./index.d.ts",
6
6
  "exports": {