crawler-user-agents 1.0.141 → 1.0.143

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/__init__.py CHANGED
@@ -1,22 +1,25 @@
1
- import crawleruseragents
2
1
  import re
3
2
  import json
4
- from importlib import resources
5
-
3
+ from pathlib import Path
4
+
5
+
6
6
  def load_json():
7
- return json.loads(resources.read_text(crawleruseragents,"crawler-user-agents.json"))
7
+ cwd = Path(__file__).parent
8
+ user_agents_file_path = cwd / "crawler-user-agents.json"
9
+ with user_agents_file_path.open() as patterns_file:
10
+ return json.load(patterns_file)
8
11
 
9
- DATA = load_json()
10
12
 
11
- def is_crawler(s):
12
- # print(s)
13
- for i in DATA:
14
- test=re.search(i["pattern"],s,re.IGNORECASE)
15
- if test:
16
- return True
17
- return False
13
+ CRAWLER_USER_AGENTS_DATA = load_json()
18
14
 
19
- def is_crawler2(s):
20
- regexp = re.compile("|".join([i["pattern"] for i in DATA]))
21
- return regexp.search(s) != None
22
15
 
16
+ def is_crawler(user_agent: str) -> bool:
17
+ for crawler_user_agent in CRAWLER_USER_AGENTS_DATA:
18
+ if re.search(crawler_user_agent["pattern"], user_agent, re.IGNORECASE):
19
+ return True
20
+ return False
21
+
22
+
23
+ def is_crawler2(s):
24
+ regexp = re.compile("|".join([i["pattern"] for i in CRAWLER_USER_AGENTS_DATA]))
25
+ return regexp.search(s) is not None
@@ -12,8 +12,7 @@
12
12
  "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.96 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
13
13
  "Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Safari/537.36"
14
14
  ]
15
- }
16
- ,
15
+ },
17
16
  {
18
17
  "pattern": "Googlebot-Mobile",
19
18
  "instances": [
@@ -3958,6 +3957,7 @@
3958
3957
  "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5 Build/MRA58N) AppleWebKit/537.36(KHTML, like Gecko) Chrome/69.0.3464.0 Mobile Safari/537.36 Chrome-Lighthouse",
3959
3958
  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36(KHTML, like Gecko) Chrome/69.0.3464.0 Safari/537.36 Chrome-Lighthouse",
3960
3959
  "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3694.0 Safari/537.36 Chrome-Lighthouse",
3960
+ "Mozilla/5.0 (Linux; Android 7.0; Moto G (4)) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4695.0 Mobile Safari/537.36 Chrome-Lighthouse",
3961
3961
  "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3694.0 Mobile Safari/537.36 Chrome-Lighthouse"
3962
3962
  ],
3963
3963
  "url": "https://developers.google.com/speed/pagespeed/insights"
@@ -5314,5 +5314,11 @@
5314
5314
  "addition_date": "2024/05/14",
5315
5315
  "instances": ["Mozilla/5.0 (compatible; Monsidobot/2.2; +http://monsido.com/bot.html; info@monsido.com)"],
5316
5316
  "url": "http://monsido.com/bot.html"
5317
+ },
5318
+ {
5319
+ "pattern": "GroupMeBot",
5320
+ "addition_date": "2024/05/19",
5321
+ "instances": ["GroupMeBot/1.0"],
5322
+ "url": "https://groupme.com/"
5317
5323
  }
5318
5324
  ]
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "crawler-user-agents",
3
- "version": "1.0.141",
3
+ "version": "1.0.143",
4
4
  "main": "crawler-user-agents.json",
5
5
  "typings": "./index.d.ts",
6
6
  "author": "Martin Monperrus <martin.monperrus@gnieh.org>",