crawler-user-agents 1.0.156 → 1.0.157

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/__init__.py CHANGED
@@ -1,8 +1,25 @@
1
1
  import re
2
2
  import json
3
+ from functools import cached_property
3
4
  from pathlib import Path
4
5
 
5
6
 
7
+ class CrawlerPatterns:
8
+ def __init__(self):
9
+ pass
10
+
11
+ @cached_property
12
+ def case_insensitive(self):
13
+ return re.compile(
14
+ "|".join(i["pattern"] for i in CRAWLER_USER_AGENTS_DATA),
15
+ re.IGNORECASE
16
+ )
17
+
18
+ @cached_property
19
+ def case_sensitive(self):
20
+ return re.compile("|".join(i["pattern"] for i in CRAWLER_USER_AGENTS_DATA))
21
+
22
+
6
23
  def load_json():
7
24
  cwd = Path(__file__).parent
8
25
  user_agents_file_path = cwd / "crawler-user-agents.json"
@@ -11,24 +28,24 @@ def load_json():
11
28
 
12
29
 
13
30
  CRAWLER_USER_AGENTS_DATA = load_json()
14
- CRAWLER_USER_AGENTS_REGEXP = re.compile(
15
- "|".join(i["pattern"] for i in CRAWLER_USER_AGENTS_DATA)
16
- )
31
+ CRAWLER_PATTERNS = CrawlerPatterns()
17
32
 
18
33
 
19
- def is_crawler(user_agent: str) -> bool:
34
+ def is_crawler(user_agent: str, case_sensitive: bool = True) -> bool:
20
35
  """Return True if the given User-Agent matches a known crawler."""
21
- return bool(CRAWLER_USER_AGENTS_REGEXP.search(user_agent))
36
+ if case_sensitive:
37
+ return bool(re.search(CRAWLER_PATTERNS.case_sensitive, user_agent))
38
+ return bool(re.search(CRAWLER_PATTERNS.case_insensitive, user_agent))
22
39
 
23
40
 
24
- def matching_crawlers(user_agent: str) -> list[int]:
41
+ def matching_crawlers(user_agent: str, case_sensitive: bool = True) -> list[int]:
25
42
  """
26
43
  Return a list of the indices in CRAWLER_USER_AGENTS_DATA of any crawlers
27
44
  matching the given User-Agent.
28
45
  """
29
46
  result = []
30
- if is_crawler(user_agent):
47
+ if is_crawler(user_agent, case_sensitive):
31
48
  for num, crawler_user_agent in enumerate(CRAWLER_USER_AGENTS_DATA):
32
- if re.search(crawler_user_agent["pattern"], user_agent):
49
+ if re.search(crawler_user_agent["pattern"], user_agent, 0 if case_sensitive else re.IGNORECASE):
33
50
  result.append(num)
34
51
  return result
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawler-user-agents",
3
- "version": "1.0.156",
3
+ "version": "1.0.157",
4
4
  "main": "crawler-user-agents.json",
5
5
  "typings": "./index.d.ts",
6
6
  "author": "Martin Monperrus <martin.monperrus@gnieh.org>",
package/test_harness.py CHANGED
@@ -5,28 +5,42 @@ Usage:
5
5
  $ pytest test_harness.py
6
6
 
7
7
  """
8
- from crawleruseragents import is_crawler, matching_crawlers
9
8
 
9
+ from crawleruseragents import is_crawler, matching_crawlers
10
10
 
11
- def test_match():
12
- assert is_crawler("test Googlebot/2.0 test") is True
13
11
 
14
12
 
15
13
  def test_nomatch():
16
14
  assert is_crawler("!!!!!!!!!!!!") is False
17
15
 
18
16
 
19
- def test_case():
17
+ def test_case_sensitive():
20
18
  assert is_crawler("test googlebot/2.0 test") is False
21
19
 
22
20
 
23
- def test_matching_crawlers_match():
21
+ def test_case_insensitive():
22
+ assert is_crawler("test googlebot/2.0 test", case_sensitive=False) is True
23
+
24
+
25
+ def test_matching_crawlers_match_case_sensitive():
24
26
  result = matching_crawlers("test Googlebot/2.0 test")
25
27
  assert isinstance(result, list)
26
28
  assert len(result) > 0
27
29
  assert all(isinstance(val, int) for val in result)
28
30
 
29
31
 
32
+ def test_matching_crawlers_match_case_insensitive():
33
+ result = matching_crawlers("test googlebot/2.0 test", False)
34
+ assert isinstance(result, list)
35
+ assert len(result) > 0
36
+ assert all(isinstance(val, int) for val in result)
37
+
38
+ def test_matching_crawlers_match_lower_case_agent():
39
+ result = matching_crawlers("test googlebot/2.0 test")
40
+ assert isinstance(result, list)
41
+ assert len(result) == 0
42
+
43
+
30
44
  def test_matching_crawlers_nomatch():
31
45
  result = matching_crawlers("!!!!!!!!!!!!")
32
46
  assert isinstance(result, list)