crawler-user-agents 1.0.126 → 1.0.128
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crawler-user-agents.json +2 -2
- package/package.json +1 -1
- package/validate.py +8 -0
package/crawler-user-agents.json
CHANGED
|
@@ -220,7 +220,7 @@
|
|
|
220
220
|
,
|
|
221
221
|
{
|
|
222
222
|
"pattern": "httpx",
|
|
223
|
-
"addition_date":"
|
|
223
|
+
"addition_date": "2019/12/23",
|
|
224
224
|
"instances": [
|
|
225
225
|
"python-httpx/0.16.1",
|
|
226
226
|
"python-httpx/0.13.0.dev1"
|
|
@@ -265,7 +265,7 @@
|
|
|
265
265
|
,
|
|
266
266
|
{
|
|
267
267
|
"pattern": "phpcrawl",
|
|
268
|
-
"addition_date": "2012
|
|
268
|
+
"addition_date": "2012/09/17",
|
|
269
269
|
"url": "http://phpcrawl.cuab.de/",
|
|
270
270
|
"instances": [
|
|
271
271
|
"phpcrawl"
|
package/package.json
CHANGED
package/validate.py
CHANGED
|
@@ -6,6 +6,7 @@ from __future__ import print_function
|
|
|
6
6
|
import json
|
|
7
7
|
import re
|
|
8
8
|
from collections import Counter
|
|
9
|
+
import datetime
|
|
9
10
|
|
|
10
11
|
from jsonschema import validate
|
|
11
12
|
|
|
@@ -59,6 +60,13 @@ def main():
|
|
|
59
60
|
for entry in json_data:
|
|
60
61
|
pattern = entry['pattern']
|
|
61
62
|
|
|
63
|
+
# assert that field "addition_date" has format "2019/12/23",
|
|
64
|
+
if 'addition_date' in entry:
|
|
65
|
+
if not re.match(r'\d{4}/\d{2}/\d{2}', entry['addition_date']):
|
|
66
|
+
raise ValueError('addition_date {!r} has invalid format'.format(entry['addition_date']))
|
|
67
|
+
# parse the date with datetime
|
|
68
|
+
datetime.datetime.strptime(entry['addition_date'], '%Y/%m/%d')
|
|
69
|
+
|
|
62
70
|
# canonicalize entry
|
|
63
71
|
if 'depends_on' not in entry: entry['depends_on'] = []
|
|
64
72
|
|