python-epo-ops-client 4.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- epo_ops/__init__.py +25 -0
- epo_ops/__version__.py +6 -0
- epo_ops/api.py +470 -0
- epo_ops/exceptions.py +30 -0
- epo_ops/middlewares/__init__.py +9 -0
- epo_ops/middlewares/cache/__init__.py +1 -0
- epo_ops/middlewares/cache/dogpile/__init__.py +1 -0
- epo_ops/middlewares/cache/dogpile/dogpile.py +78 -0
- epo_ops/middlewares/cache/dogpile/helpers.py +14 -0
- epo_ops/middlewares/middleware.py +19 -0
- epo_ops/middlewares/throttle/__init__.py +3 -0
- epo_ops/middlewares/throttle/storages/__init__.py +4 -0
- epo_ops/middlewares/throttle/storages/sqlite.py +156 -0
- epo_ops/middlewares/throttle/storages/storage.py +24 -0
- epo_ops/middlewares/throttle/throttler.py +26 -0
- epo_ops/middlewares/throttle/utils.py +25 -0
- epo_ops/models.py +119 -0
- epo_ops/utils.py +41 -0
- python_epo_ops_client-4.2.0.dist-info/METADATA +253 -0
- python_epo_ops_client-4.2.0.dist-info/RECORD +23 -0
- python_epo_ops_client-4.2.0.dist-info/WHEEL +5 -0
- python_epo_ops_client-4.2.0.dist-info/licenses/LICENSE +176 -0
- python_epo_ops_client-4.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
from __future__ import division
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import sqlite3
|
|
9
|
+
from datetime import timedelta
|
|
10
|
+
from itertools import cycle
|
|
11
|
+
|
|
12
|
+
from dateutil.parser import parse
|
|
13
|
+
|
|
14
|
+
from ....utils import makedirs, now
|
|
15
|
+
from .storage import Storage
|
|
16
|
+
|
|
17
|
+
log = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def convert_timestamp(ts):
|
|
21
|
+
return parse(ts)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
sqlite3.register_converter("timestamp", convert_timestamp)
|
|
25
|
+
|
|
26
|
+
# FIXME: S108 Probable insecure usage of temporary file or directory: "/var/tmp/python-epo-ops-client/cache.dbm"
|
|
27
|
+
DEFAULT_DB_PATH = "/var/tmp/python-epo-ops-client/throttle_history.db" # noqa: S108
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SQLite(Storage):
|
|
31
|
+
SERVICES = ("images", "inpadoc", "other", "retrieval", "search")
|
|
32
|
+
|
|
33
|
+
def __init__(self, db_path=DEFAULT_DB_PATH):
|
|
34
|
+
self.db_path = db_path
|
|
35
|
+
makedirs(os.path.dirname(db_path))
|
|
36
|
+
|
|
37
|
+
# Making SQLite more threadsafe, as described in https://ricardoanderegg.com/posts/python-sqlite-thread-safety/#conclusion
|
|
38
|
+
if sqlite3.threadsafety == 3:
|
|
39
|
+
check_same_thread = False
|
|
40
|
+
else:
|
|
41
|
+
check_same_thread = True
|
|
42
|
+
|
|
43
|
+
self.db = sqlite3.connect(
|
|
44
|
+
db_path,
|
|
45
|
+
detect_types=sqlite3.PARSE_DECLTYPES,
|
|
46
|
+
check_same_thread=check_same_thread,
|
|
47
|
+
)
|
|
48
|
+
self.db.row_factory = sqlite3.Row
|
|
49
|
+
self.prepare()
|
|
50
|
+
|
|
51
|
+
def service_columns(self, include_type=False):
|
|
52
|
+
columns = []
|
|
53
|
+
for service in self.SERVICES:
|
|
54
|
+
columns.extend(
|
|
55
|
+
[
|
|
56
|
+
"{0}_status".format(service),
|
|
57
|
+
"{0}_limit".format(service),
|
|
58
|
+
"{0}_retry_after".format(service),
|
|
59
|
+
]
|
|
60
|
+
)
|
|
61
|
+
if include_type:
|
|
62
|
+
for i, pair in enumerate(
|
|
63
|
+
zip(columns, cycle(["text", "integer", "integer"]))
|
|
64
|
+
):
|
|
65
|
+
columns[i] = "{0} {1}".format(*pair)
|
|
66
|
+
|
|
67
|
+
return columns
|
|
68
|
+
|
|
69
|
+
def prepare(self):
|
|
70
|
+
sql = """\
|
|
71
|
+
CREATE TABLE throttle_history(
|
|
72
|
+
timestamp timestamp primary key,
|
|
73
|
+
system_status text, {0}
|
|
74
|
+
)
|
|
75
|
+
"""
|
|
76
|
+
try:
|
|
77
|
+
with self.db:
|
|
78
|
+
self.db.execute(
|
|
79
|
+
sql.format(", ".join(self.service_columns(True)))
|
|
80
|
+
)
|
|
81
|
+
except sqlite3.OperationalError:
|
|
82
|
+
pass
|
|
83
|
+
|
|
84
|
+
def prune(self):
|
|
85
|
+
sql = """\
|
|
86
|
+
DELETE FROM throttle_history
|
|
87
|
+
WHERE timestamp < datetime('now', '-1 minute')
|
|
88
|
+
"""
|
|
89
|
+
with self.db:
|
|
90
|
+
self.db.execute(sql)
|
|
91
|
+
|
|
92
|
+
def parse_throttle(self, throttle):
|
|
93
|
+
re_str = r"{0}=(\w+):(\d+)"
|
|
94
|
+
status = {"services": {}}
|
|
95
|
+
status["system_status"] = re.search("^(\\w+) \\(", throttle).group(1)
|
|
96
|
+
for service in self.SERVICES:
|
|
97
|
+
match = re.search(re_str.format(service), throttle)
|
|
98
|
+
status["services"][service] = {
|
|
99
|
+
"status": match.group(1),
|
|
100
|
+
"limit": int(match.group(2)),
|
|
101
|
+
}
|
|
102
|
+
return status
|
|
103
|
+
|
|
104
|
+
def convert(self, status, retry):
|
|
105
|
+
sql = (
|
|
106
|
+
"INSERT INTO throttle_history(timestamp, system_status, {0}) VALUES ({1})"
|
|
107
|
+
).format(", ".join(self.service_columns()), ", ".join(["?"] * 17))
|
|
108
|
+
values = [now(), status["system_status"]]
|
|
109
|
+
for service in self.SERVICES:
|
|
110
|
+
service_status = status["services"][service]["status"]
|
|
111
|
+
service_limit = status["services"][service]["limit"]
|
|
112
|
+
service_retry = 0
|
|
113
|
+
if service_status.lower() == "black":
|
|
114
|
+
service_retry = retry
|
|
115
|
+
values.extend([service_status, service_limit, service_retry])
|
|
116
|
+
return sql, values
|
|
117
|
+
|
|
118
|
+
def delay_for(self, service):
|
|
119
|
+
"This method is a public interface for a throttle storage class"
|
|
120
|
+
|
|
121
|
+
_now = now()
|
|
122
|
+
limit = "{0}_limit".format(service)
|
|
123
|
+
self.prune()
|
|
124
|
+
sql = ("SELECT * FROM throttle_history ORDER BY {0} limit 1").format(
|
|
125
|
+
limit
|
|
126
|
+
)
|
|
127
|
+
with self.db:
|
|
128
|
+
r = self.db.execute(sql).fetchone()
|
|
129
|
+
|
|
130
|
+
if not r: # If there are no rows
|
|
131
|
+
next_run = _now
|
|
132
|
+
elif r[limit] == 0:
|
|
133
|
+
next_run = r["timestamp"] + timedelta(
|
|
134
|
+
milliseconds=r["{0}_retry_after".format(service)]
|
|
135
|
+
)
|
|
136
|
+
else:
|
|
137
|
+
next_run = _now + timedelta(seconds=60.0 / r[limit])
|
|
138
|
+
|
|
139
|
+
if next_run < _now:
|
|
140
|
+
return 0.0
|
|
141
|
+
else:
|
|
142
|
+
td = next_run - _now
|
|
143
|
+
ts = td.microseconds + (td.seconds + td.days * 24 * 3600) * 10**6
|
|
144
|
+
return ts / 10**6
|
|
145
|
+
|
|
146
|
+
def update(self, headers):
|
|
147
|
+
"This method is a public interface for a throttle storage class"
|
|
148
|
+
|
|
149
|
+
self.prune()
|
|
150
|
+
if "x-throttling-control" not in headers:
|
|
151
|
+
return
|
|
152
|
+
status = self.parse_throttle(headers["x-throttling-control"])
|
|
153
|
+
retry_after = int(headers.get("retry-after", 0))
|
|
154
|
+
sql, values = self.convert(status, retry_after)
|
|
155
|
+
with self.db:
|
|
156
|
+
self.db.execute(sql, values)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
log = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Storage(object):
|
|
9
|
+
def delay_for(self, service):
|
|
10
|
+
"""
|
|
11
|
+
This method accepts the name of a service, and return back number of
|
|
12
|
+
seconds Throttler should wait before processing the request. Take care
|
|
13
|
+
to observe the one minute sliding window.
|
|
14
|
+
"""
|
|
15
|
+
raise NotImplementedError
|
|
16
|
+
|
|
17
|
+
def update(self, headers):
|
|
18
|
+
"""
|
|
19
|
+
This method accepts a requests.Response.headers object, or any other
|
|
20
|
+
dictionary-like object that contains the the keys
|
|
21
|
+
'x-throttling-control' and (optionally) 'retry-after'. It updates the
|
|
22
|
+
throttle history storage backend appropriately.
|
|
23
|
+
"""
|
|
24
|
+
raise NotImplementedError
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
from ..middleware import Middleware
|
|
7
|
+
from .storages import SQLite
|
|
8
|
+
from .utils import service_for_url
|
|
9
|
+
|
|
10
|
+
log = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Throttler(Middleware):
|
|
14
|
+
def __init__(self, history_storage=None):
|
|
15
|
+
self.history = history_storage or SQLite()
|
|
16
|
+
|
|
17
|
+
def process_request(self, env, url, data, **kwargs):
|
|
18
|
+
if not env["from-cache"]:
|
|
19
|
+
service = service_for_url(url)
|
|
20
|
+
time.sleep(self.history.delay_for(service))
|
|
21
|
+
return url, data, kwargs
|
|
22
|
+
|
|
23
|
+
def process_response(self, env, response):
|
|
24
|
+
if not env["from-cache"]:
|
|
25
|
+
self.history.update(response.headers)
|
|
26
|
+
return response
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
log = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
# Since patterns are searched in order, we need to specify from most to least
|
|
9
|
+
# specific
|
|
10
|
+
URLPATTERNS = (
|
|
11
|
+
("classification/cpc/media", "images"),
|
|
12
|
+
("family", "inpadoc"),
|
|
13
|
+
("legal", "inpadoc"),
|
|
14
|
+
("published-data/images", "images"),
|
|
15
|
+
("published-data/search", "search"),
|
|
16
|
+
("published-data", "retrieval"),
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def service_for_url(url):
|
|
21
|
+
for pattern, service in URLPATTERNS:
|
|
22
|
+
if re.search("rest-services/{0}".format(pattern), url):
|
|
23
|
+
return service
|
|
24
|
+
|
|
25
|
+
return "other"
|
epo_ops/models.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from datetime import datetime, timedelta
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from .exceptions import MissingRequiredValue
|
|
9
|
+
from .utils import quote, validate_date
|
|
10
|
+
|
|
11
|
+
log = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
NETWORK_TIMEOUT = 10.0
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _prepare_part(part):
|
|
18
|
+
return "({0})".format(quote(part))
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class BaseInput(object):
|
|
22
|
+
def __init__(self, number, country_code, kind_code, date):
|
|
23
|
+
if not number:
|
|
24
|
+
raise MissingRequiredValue("number must be present")
|
|
25
|
+
self.number = number
|
|
26
|
+
self.country_code = country_code
|
|
27
|
+
self.kind_code = kind_code
|
|
28
|
+
self.date = validate_date(date)
|
|
29
|
+
|
|
30
|
+
def as_api_input(self):
|
|
31
|
+
parts = filter(
|
|
32
|
+
None, [self.country_code, self.number, self.kind_code, self.date]
|
|
33
|
+
)
|
|
34
|
+
return ".".join(map(_prepare_part, parts))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class Original(BaseInput):
|
|
38
|
+
def __init__(self, number, country_code=None, kind_code=None, date=None):
|
|
39
|
+
super(Original, self).__init__(number, country_code, kind_code, date)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class Docdb(BaseInput):
|
|
43
|
+
def __init__(self, number, country_code, kind_code, date=None):
|
|
44
|
+
if not all([number, country_code, kind_code]):
|
|
45
|
+
raise MissingRequiredValue(
|
|
46
|
+
"number, country_code, and kind_code must be present"
|
|
47
|
+
)
|
|
48
|
+
super(Docdb, self).__init__(number, country_code, kind_code, date)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class Epodoc(BaseInput):
|
|
52
|
+
def __init__(self, number, kind_code=None, date=None):
|
|
53
|
+
super(Epodoc, self).__init__(number, None, kind_code, date)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class AccessToken(object):
|
|
57
|
+
def __init__(self, response):
|
|
58
|
+
self._content = response.json()
|
|
59
|
+
self.response = response
|
|
60
|
+
self.token = self._content["access_token"]
|
|
61
|
+
self.expiration = datetime.now() + timedelta(
|
|
62
|
+
seconds=int(self._content["expires_in"])
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
@property
|
|
66
|
+
def is_expired(self):
|
|
67
|
+
return datetime.now() >= self.expiration
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class Request(object):
|
|
71
|
+
def __init__(self, middlewares):
|
|
72
|
+
self.middlewares = middlewares
|
|
73
|
+
self.reset_env()
|
|
74
|
+
|
|
75
|
+
@property
|
|
76
|
+
def default_env(self):
|
|
77
|
+
return {
|
|
78
|
+
"cache-key": None,
|
|
79
|
+
"from-cache": False,
|
|
80
|
+
"is-cached": False,
|
|
81
|
+
"response": None,
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
def reset_env(self):
|
|
85
|
+
self.env = {}
|
|
86
|
+
self.env.update(self.default_env)
|
|
87
|
+
|
|
88
|
+
def post(self, url, data=None, **kwargs):
|
|
89
|
+
return self._request(_post_callback, url, data, **kwargs)
|
|
90
|
+
|
|
91
|
+
def get(self, url, data=None, **kwargs):
|
|
92
|
+
return self._request(_get_callback, url, data, **kwargs)
|
|
93
|
+
|
|
94
|
+
def _request(self, callback, url, data=None, **kwargs):
|
|
95
|
+
self.reset_env()
|
|
96
|
+
|
|
97
|
+
for mw in self.middlewares:
|
|
98
|
+
url, data, kwargs = mw.process_request(
|
|
99
|
+
self.env, url, data, **kwargs
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Either get response from cache environment or request from upstream
|
|
103
|
+
# bool(<Response [200]>) is True
|
|
104
|
+
# bool(<Response [404]>) is False
|
|
105
|
+
response = self.env["response"] or callback(url, data, **kwargs)
|
|
106
|
+
|
|
107
|
+
for mw in reversed(self.middlewares):
|
|
108
|
+
response = mw.process_response(self.env, response)
|
|
109
|
+
|
|
110
|
+
self.reset_env()
|
|
111
|
+
return response
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _post_callback(url, data, **kwargs):
|
|
115
|
+
return requests.post(url, data, **kwargs, timeout=NETWORK_TIMEOUT)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _get_callback(url, data, **kwargs):
|
|
119
|
+
return requests.get(url, **kwargs, timeout=NETWORK_TIMEOUT)
|
epo_ops/utils.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
from dateutil.tz import tzutc
|
|
9
|
+
from six.moves import urllib
|
|
10
|
+
|
|
11
|
+
from .exceptions import InvalidDate
|
|
12
|
+
|
|
13
|
+
log = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def makedirs(path, mode=0o777):
|
|
17
|
+
try:
|
|
18
|
+
os.makedirs(path, mode)
|
|
19
|
+
except OSError:
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def now():
|
|
24
|
+
return datetime.now(tzutc())
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def quote(string):
|
|
28
|
+
parsed = urllib.parse.quote(string, safe="/\\")
|
|
29
|
+
return re.sub(r"~", "%7E", parsed)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def validate_date(date):
|
|
33
|
+
if date is None or date == "":
|
|
34
|
+
return ""
|
|
35
|
+
try:
|
|
36
|
+
datetime.strptime(date, "%Y%m%d")
|
|
37
|
+
return date
|
|
38
|
+
except ValueError as exc:
|
|
39
|
+
raise InvalidDate(
|
|
40
|
+
"{0} is not a valid YYYYMMDD date.".format(date)
|
|
41
|
+
) from exc
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: python-epo-ops-client
|
|
3
|
+
Version: 4.2.0
|
|
4
|
+
Summary: Python client for EPO OPS, the European Patent Office's Open Patent Services API.
|
|
5
|
+
Home-page: https://github.com/ip-tools/python-epo-ops-client
|
|
6
|
+
Download-URL: https://pypi.org/project/python-epo-ops-client/#files
|
|
7
|
+
Author: George Song
|
|
8
|
+
Author-email: george@monozuku.com
|
|
9
|
+
Maintainer: Andreas Motl
|
|
10
|
+
Maintainer-email: andreas.motl@ip-tools.org
|
|
11
|
+
Keywords: ops,epo,epo-ops,patent-data,patent-office,patent-data-api,european patent office,open patent services
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Natural Language :: English
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Programming Language :: Python
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.6
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.7
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
26
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
License-File: LICENSE
|
|
29
|
+
Requires-Dist: dogpile.cache<1.5
|
|
30
|
+
Requires-Dist: importlib-metadata; python_version < "3.8"
|
|
31
|
+
Requires-Dist: python-dateutil<2.10
|
|
32
|
+
Requires-Dist: requests<3,>=2.27
|
|
33
|
+
Requires-Dist: six<2
|
|
34
|
+
Provides-Extra: develop
|
|
35
|
+
Requires-Dist: ruff<0.13; python_version >= "3.7" and extra == "develop"
|
|
36
|
+
Requires-Dist: twine<7; extra == "develop"
|
|
37
|
+
Requires-Dist: wheel<1; extra == "develop"
|
|
38
|
+
Provides-Extra: test
|
|
39
|
+
Requires-Dist: pytest<9; extra == "test"
|
|
40
|
+
Requires-Dist: pytest-cache<2; extra == "test"
|
|
41
|
+
Requires-Dist: pytest-cov<6.3; extra == "test"
|
|
42
|
+
Requires-Dist: python-dotenv<1.2; extra == "test"
|
|
43
|
+
Requires-Dist: responses<0.26; extra == "test"
|
|
44
|
+
Dynamic: author
|
|
45
|
+
Dynamic: author-email
|
|
46
|
+
Dynamic: classifier
|
|
47
|
+
Dynamic: description
|
|
48
|
+
Dynamic: description-content-type
|
|
49
|
+
Dynamic: download-url
|
|
50
|
+
Dynamic: home-page
|
|
51
|
+
Dynamic: keywords
|
|
52
|
+
Dynamic: license-file
|
|
53
|
+
Dynamic: maintainer
|
|
54
|
+
Dynamic: maintainer-email
|
|
55
|
+
Dynamic: provides-extra
|
|
56
|
+
Dynamic: requires-dist
|
|
57
|
+
Dynamic: summary
|
|
58
|
+
|
|
59
|
+
# python-epo-ops-client
|
|
60
|
+
|
|
61
|
+
[](https://pypi.org/project/python-epo-ops-client/)
|
|
62
|
+
[](https://pypi.org/project/python-epo-ops-client/)
|
|
63
|
+
[](https://github.com/ip-tools/python-epo-ops-client/actions/workflows/main.yml)
|
|
64
|
+
[](https://codecov.io/gh/ip-tools/python-epo-ops-client)
|
|
65
|
+
|
|
66
|
+
python-epo-ops-client is an [Apache2 licensed][apache license] client library
|
|
67
|
+
for accessing the [European Patent Office][epo]'s ("EPO") [Open Patent
|
|
68
|
+
Services][ops] ("OPS") v3.2 (based on [v1.3.16 of the reference guide][ops guide]).
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import epo_ops
|
|
72
|
+
|
|
73
|
+
# Instantiate client.
|
|
74
|
+
client = epo_ops.Client(key="abc", secret="xyz")
|
|
75
|
+
|
|
76
|
+
# Retrieve bibliography data.
|
|
77
|
+
response = client.published_data(
|
|
78
|
+
|
|
79
|
+
# publication, application, priority
|
|
80
|
+
reference_type="publication",
|
|
81
|
+
|
|
82
|
+
# docdb, epodoc
|
|
83
|
+
input=epo_ops.models.Docdb("1000000", "EP", "A1"),
|
|
84
|
+
|
|
85
|
+
# optional, defaults to biblio in case of published_data
|
|
86
|
+
endpoint="biblio",
|
|
87
|
+
|
|
88
|
+
# optional, list of constituents
|
|
89
|
+
constituents=[],
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# Retrieve description.
|
|
93
|
+
response = client.published_data(
|
|
94
|
+
reference_type="publication",
|
|
95
|
+
input=epo_ops.models.Docdb("1000000", "EP", "A1"),
|
|
96
|
+
endpoint="description",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Retrieve claims.
|
|
100
|
+
response = client.published_data(
|
|
101
|
+
reference_type="publication",
|
|
102
|
+
input=epo_ops.models.Docdb("1000000", "EP", "A1"),
|
|
103
|
+
endpoint="claims",
|
|
104
|
+
)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## Features
|
|
110
|
+
|
|
111
|
+
`python-epo-ops-client` abstracts away the complexities of accessing EPO OPS:
|
|
112
|
+
|
|
113
|
+
- Format the requests properly
|
|
114
|
+
- Bubble up quota problems as proper HTTP errors
|
|
115
|
+
- Handle token authentication and renewals automatically
|
|
116
|
+
- Handle throttling properly
|
|
117
|
+
- Add optional caching to minimize impact on the OPS servers
|
|
118
|
+
|
|
119
|
+
There are two main layers to `python-epo-ops-client`: Client and Middleware.
|
|
120
|
+
|
|
121
|
+
### Client
|
|
122
|
+
|
|
123
|
+
The Client contains all the formatting and token handling logic and is what
|
|
124
|
+
you'll interact with mostly.
|
|
125
|
+
|
|
126
|
+
When you issue a request, the response is a [requests.Response][] object. If
|
|
127
|
+
`response.status_code != 200` then a `requests.HTTPError` exception will be
|
|
128
|
+
raised — it's your responsibility to handle those exceptions if you want to. The
|
|
129
|
+
one case that's handled is when the access token has expired: in this case, the
|
|
130
|
+
client will automatically handle the HTTP 400 status and renew the token.
|
|
131
|
+
|
|
132
|
+
Note that the Client does not attempt to interpret the data supplied by OPS, so
|
|
133
|
+
it's your responsibility to parse the XML or JSON payload for your own purpose.
|
|
134
|
+
|
|
135
|
+
The following custom exceptions are raised for cases when OPS quotas are
|
|
136
|
+
exceeded, they are all in the `epo_ops.exceptions` module and are subclasses of
|
|
137
|
+
`requests.HTTPError`, and therefore offer the same behaviors:
|
|
138
|
+
|
|
139
|
+
- IndividualQuotaPerHourExceeded
|
|
140
|
+
- RegisteredQuotaPerWeekExceeded
|
|
141
|
+
|
|
142
|
+
Again, it's up to you to parse the response and decide what to do.
|
|
143
|
+
|
|
144
|
+
Currently the Client knows how to issue request for the following services:
|
|
145
|
+
|
|
146
|
+
| Client method | API end point | throttle |
|
|
147
|
+
| ----------------------------------------------------------------------------- | --------------------- | --------- |
|
|
148
|
+
| `family(reference_type, input, endpoint=None, constituents=None)` | family | inpadoc |
|
|
149
|
+
| `image(path, range=1, extension='tiff')` | published-data/images | images |
|
|
150
|
+
| `legal(reference_type, input)` | legal | other |
|
|
151
|
+
| `number(reference_type, input, output_format)` | number-service | other |
|
|
152
|
+
| `published_data(reference_type, input, endpoint='biblio', constituents=None)` | published-data | retrieval |
|
|
153
|
+
| `published_data_search(cql, range_begin=1, range_end=25, constituents=None)` | published-data/search | search |
|
|
154
|
+
| `register(reference_type, input, constituents=['biblio'])` | register | other |
|
|
155
|
+
| `register_search(cql, range_begin=1, range_end=25)` | register/search | other |
|
|
156
|
+
|
|
157
|
+
Bulk operations can be achieved by passing a list of valid models to the
|
|
158
|
+
published_data input field.
|
|
159
|
+
|
|
160
|
+
See the [OPS guide][] or use the [Developer's Area][] for more information on
|
|
161
|
+
how to use each service.
|
|
162
|
+
|
|
163
|
+
Please submit pull requests for the following services by enhancing the
|
|
164
|
+
`epo_ops.api.Client` class:
|
|
165
|
+
|
|
166
|
+
- Legal service
|
|
167
|
+
|
|
168
|
+
### Middleware
|
|
169
|
+
|
|
170
|
+
All requests and responses are passed through each middleware object listed in
|
|
171
|
+
`client.middlewares`. Requests are processed in the order listed, and responses
|
|
172
|
+
are processed in the _reverse_ order.
|
|
173
|
+
|
|
174
|
+
Each middleware should subclass `middlewares.Middleware` and implement the
|
|
175
|
+
`process_request` and `process_response` methods.
|
|
176
|
+
|
|
177
|
+
There are two middleware classes out of the box: Throttler and Dogpile.
|
|
178
|
+
Throttler is in charge of the OPS throttling rules and will delay requests
|
|
179
|
+
accordingly. Dogpile is an optional cache which will cache all HTTP status 200,
|
|
180
|
+
404, 405, and 413 responses.
|
|
181
|
+
|
|
182
|
+
By default, only the Throttler middleware is enabled, if you want to enable
|
|
183
|
+
caching:
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
import epo_ops
|
|
187
|
+
|
|
188
|
+
middlewares = [
|
|
189
|
+
epo_ops.middlewares.Dogpile(),
|
|
190
|
+
epo_ops.middlewares.Throttler(),
|
|
191
|
+
]
|
|
192
|
+
client = epo_ops.Client(
|
|
193
|
+
key='key',
|
|
194
|
+
secret='secret',
|
|
195
|
+
middlewares=middlewares,
|
|
196
|
+
)
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
You'll also need to install caching dependencies in your projects, such as `pip install dogpile.cache`.
|
|
200
|
+
|
|
201
|
+
_Note that caching middleware should be first in most cases._
|
|
202
|
+
|
|
203
|
+
#### Dogpile
|
|
204
|
+
|
|
205
|
+
Dogpile is based on (surprise) [dogpile.cache][]. By default it is instantiated
|
|
206
|
+
with a DBMBackend region with timeout of 2 weeks.
|
|
207
|
+
|
|
208
|
+
Dogpile takes three optional instantiation parameters:
|
|
209
|
+
|
|
210
|
+
- `region`: You can pass whatever valid [dogpile.cache Region][] you want to
|
|
211
|
+
backend the cache
|
|
212
|
+
- `kwargs_handlers`: A list of keyword argument handlers, which it will use to
|
|
213
|
+
process the kwargs passed to the request object in order to extract elements
|
|
214
|
+
for generating the cache key. Currently one handler is implemented (and
|
|
215
|
+
instantiated by default) to make sure that the range request header is part of
|
|
216
|
+
the cache key.
|
|
217
|
+
- `http_status_codes`: A list of HTTP status codes that you would like to have
|
|
218
|
+
cached. By default 200, 404, 405, and 413 responses are cached.
|
|
219
|
+
|
|
220
|
+
**Note**: dogpile.cache is not installed by default, if you want to use it, `pip install dogpile.cache` in your project.
|
|
221
|
+
|
|
222
|
+
#### Throttler
|
|
223
|
+
|
|
224
|
+
Throttler contains all the logic for handling different throttling scenarios.
|
|
225
|
+
Since OPS throttling is based on a one minute rolling window, we must persist
|
|
226
|
+
historical (at least for the past minute) throtting data in order to know what
|
|
227
|
+
the proper request frequency is. Each Throttler must be instantiated with a
|
|
228
|
+
Storage object.
|
|
229
|
+
|
|
230
|
+
##### Storage
|
|
231
|
+
|
|
232
|
+
The Storage object is responsible for:
|
|
233
|
+
|
|
234
|
+
1. Knowing how to update the historical record with each request
|
|
235
|
+
(`Storage.update()`), making sure to observe the one minute rolling window
|
|
236
|
+
rule.
|
|
237
|
+
2. Calculating how long to wait before issuing the next request
|
|
238
|
+
(`Storage.delay_for()`).
|
|
239
|
+
|
|
240
|
+
Currently the only Storage backend provided is SQLite, but you can easily write
|
|
241
|
+
your own Storage backend (such as file, Redis, etc.). To use a custom Storage
|
|
242
|
+
type, just pass the Storage object when you're instantiating a Throttler object.
|
|
243
|
+
See `epo_ops.middlewares.throttle.storages.Storage` for more implementation
|
|
244
|
+
details.
|
|
245
|
+
|
|
246
|
+
[apache license]: http://www.apache.org/licenses/LICENSE-2.0
|
|
247
|
+
[developer's area]: https://developers.epo.org/ops-v3-2/apis
|
|
248
|
+
[dogpile.cache region]: https://dogpilecache.sqlalchemy.org/en/latest/api.html#module-dogpile.cache.region
|
|
249
|
+
[dogpile.cache]: https://github.com/sqlalchemy/dogpile.cache
|
|
250
|
+
[epo]: http://epo.org
|
|
251
|
+
[ops guide]: https://link.epo.org/web/ops_v3.2_documentation_-_version_1.3.19_en.pdf
|
|
252
|
+
[ops]: https://www.epo.org/searching-for-patents/data/web-services/ops.html
|
|
253
|
+
[requests.response]: http://requests.readthedocs.org/en/latest/user/advanced/#request-and-response-objects
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
epo_ops/__init__.py,sha256=FBeuePcy1kzPa9r8eVbJKuCTH7KraksI9RxpBcOqWXc,571
|
|
2
|
+
epo_ops/__version__.py,sha256=oPE9VFlJQnVOEwuJbON4aK7XhyF3CB86MEnkbhJ0NTU,204
|
|
3
|
+
epo_ops/api.py,sha256=3Oft8shAW-kh1r4DezcvoPBA-da1BvmI35n_uLJl6oQ,16621
|
|
4
|
+
epo_ops/exceptions.py,sha256=MYaRMNHBH1REggRn0Lmp8_acnHbFb9uq51HFS-qz6c4,620
|
|
5
|
+
epo_ops/models.py,sha256=RU2c4b6TzhyMj5ML4_XLN-N3YNZlna_kQ-hE1oNBIW0,3391
|
|
6
|
+
epo_ops/utils.py,sha256=mGy57QUSgpQG21_nbHEj6ZMqoxeAK8TbFwohfGDB6xw,776
|
|
7
|
+
epo_ops/middlewares/__init__.py,sha256=y6MT9llD5jdVduGHUghGr1tgssvoJ75-0L4WlFPe3pw,178
|
|
8
|
+
epo_ops/middlewares/middleware.py,sha256=kXo58_kju4IlM7vTAVwmn2Ynk5O-ZIdoWw2slonT_nY,518
|
|
9
|
+
epo_ops/middlewares/cache/__init__.py,sha256=3tN3n2RS-jxUqs5iwPHmdEIcor2c1bE3BBeLxfzYsQw,29
|
|
10
|
+
epo_ops/middlewares/cache/dogpile/__init__.py,sha256=3tN3n2RS-jxUqs5iwPHmdEIcor2c1bE3BBeLxfzYsQw,29
|
|
11
|
+
epo_ops/middlewares/cache/dogpile/dogpile.py,sha256=9kGAnoSm5nBH17A_kvmiZR1Myowx1MP10B0cnejq5to,2546
|
|
12
|
+
epo_ops/middlewares/cache/dogpile/helpers.py,sha256=2I83ocJeSsOmNkSflm7jTOXWNxiGWkVkQB3xmgf7elQ,370
|
|
13
|
+
epo_ops/middlewares/throttle/__init__.py,sha256=Tf7tlJnTxa9vE7y9b_4N8HYJ_fSUDOZMfqGDWSU7WUE,58
|
|
14
|
+
epo_ops/middlewares/throttle/throttler.py,sha256=-ATkg6DkDCfxu343hh85OCnwdm7Zyv6dfkIuMi9v1wE,696
|
|
15
|
+
epo_ops/middlewares/throttle/utils.py,sha256=DezWXXGJw9GoNk4lPtHVKQ0FZXJo98jgCzvS4gQlSrA,589
|
|
16
|
+
epo_ops/middlewares/throttle/storages/__init__.py,sha256=VtpaDmfxeQM2-tc9tad5-_DTFYp4n-Ec4QPPi6niglw,81
|
|
17
|
+
epo_ops/middlewares/throttle/storages/sqlite.py,sha256=frYBYpDyay4pTfGrVatqZVsIBATlvGqo-4zZJFddSO4,4934
|
|
18
|
+
epo_ops/middlewares/throttle/storages/storage.py,sha256=FLJ0kvUSDaI_fiHuXbT3UVNRmXcAPS5M2v3vYEYBgQY,757
|
|
19
|
+
python_epo_ops_client-4.2.0.dist-info/licenses/LICENSE,sha256=psuoW8kuDP96RQsdhzwOqi6fyWv0ct8CR6Jr7He_P_k,10173
|
|
20
|
+
python_epo_ops_client-4.2.0.dist-info/METADATA,sha256=rkkdiqIyKKasErPSPeH6oStlsRVhyDPR9H_0DDWpgr8,10419
|
|
21
|
+
python_epo_ops_client-4.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
22
|
+
python_epo_ops_client-4.2.0.dist-info/top_level.txt,sha256=0e54UaEWTAp3B6Xh_hZvJnuTK3iB0xtm57WxsW8xJkI,8
|
|
23
|
+
python_epo_ops_client-4.2.0.dist-info/RECORD,,
|