python-epo-ops-client 4.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
epo_ops/__init__.py ADDED
@@ -0,0 +1,25 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import logging
4
+
5
+ from .__version__ import __version__
6
+ from .api import Client
7
+
8
+ __title__ = "python-epo-ops-client"
9
+ __ops_version__ = "3.2"
10
+ __author__ = "George Song"
11
+ __license__ = "Apache 2.0"
12
+ __copyright__ = "Copyright 2015 Monozuku"
13
+
14
+
15
+ # Set default logging handler to avoid "No handler found" warnings.
16
+ try: # Python 2.7+
17
+ from logging import NullHandler
18
+ except ImportError: # pragma: no cover
19
+
20
+ class NullHandler(logging.Handler):
21
+ def emit(self, record):
22
+ pass
23
+
24
+
25
+ logging.getLogger(__name__).addHandler(NullHandler())
epo_ops/__version__.py ADDED
@@ -0,0 +1,6 @@
1
+ try:
2
+ from importlib.metadata import version
3
+ except ImportError: # pragma: nocover
4
+ from importlib_metadata import version # type: ignore[no-redef]
5
+
6
+ __version__ = version("python-epo-ops-client")
epo_ops/api.py ADDED
@@ -0,0 +1,470 @@
1
+ # -*- coding: utf-8 -*-
2
+ import logging
3
+ import warnings
4
+ from base64 import b64encode
5
+ from typing import List, Optional, Union
6
+ from xml.etree import ElementTree as ET
7
+
8
+ import requests
9
+ from requests.exceptions import HTTPError
10
+
11
+ from . import exceptions
12
+ from .middlewares import Throttler
13
+ from .models import (
14
+ NETWORK_TIMEOUT,
15
+ AccessToken,
16
+ Docdb,
17
+ Epodoc,
18
+ Original,
19
+ Request,
20
+ )
21
+
22
+ log = logging.getLogger(__name__)
23
+
24
+
25
+ class Client(object):
26
+ __auth_url__ = "https://ops.epo.org/3.2/auth/accesstoken"
27
+ __service_url_prefix__ = "https://ops.epo.org/3.2/rest-services"
28
+
29
+ __family_path__ = "family"
30
+ __images_path__ = "published-data/images"
31
+ __legal_path__ = "legal"
32
+ __number_path__ = "number-service"
33
+ __published_data_path__ = "published-data"
34
+ __published_data_search_path__ = "published-data/search"
35
+ __register_path__ = "register"
36
+ __register_search_path__ = "register/search"
37
+
38
+ def __init__(self, key, secret, accept_type="xml", middlewares=None):
39
+ self.accept_type = "application/{0}".format(accept_type)
40
+ self.middlewares = middlewares
41
+ if middlewares is None:
42
+ self.middlewares = [Throttler()]
43
+ self.request = Request(self.middlewares)
44
+ self.key = key
45
+ self.secret = secret
46
+ self._access_token = None
47
+
48
+ def family(
49
+ self,
50
+ reference_type: str,
51
+ input: Union[Docdb, Epodoc],
52
+ endpoint=None,
53
+ constituents: Optional[List[str]] = None,
54
+ ) -> requests.Response:
55
+ """
56
+ Retrieves the patent numbers of the extended patent family related to the input (INPADOC family).
57
+
58
+ Args:
59
+ reference_type (str): Any of "publication", "application", or "priority".
60
+ input (Epodoc or Docdb): The document number. Cannot be Original.
61
+ endpoint (optional): None. Not applicable for family service.
62
+ constituents (List[str], optional): List of "biblio", "legal" or both.
63
+ Defaults to None.
64
+
65
+ Returns:
66
+ requests.Response: a requests.Response object.
67
+
68
+ Examples:
69
+ >>> response = client.family("publication", epo_ops.models.Epodoc("EP1000000"))
70
+ >>> response
71
+ <Response [200]>
72
+ >>> len(response.text)
73
+ 8790
74
+
75
+ >>> response_with_constituents = client.family("publication", epo_ops.models.Epodoc("EP1000000"), None, ["biblio", "legal"])
76
+ >>> response_with_constituents
77
+ <Response [200]>
78
+ >>> len(response_with_constituents.text)
79
+ 160206
80
+ """
81
+ if endpoint is not None:
82
+ warnings.warn(
83
+ "The `endpoint` argument is not used in this context and will be removed.",
84
+ DeprecationWarning,
85
+ stacklevel=2,
86
+ )
87
+
88
+ url = self._make_request_url(
89
+ dict(
90
+ service=self.__family_path__,
91
+ reference_type=reference_type,
92
+ input=input,
93
+ endpoint=None,
94
+ constituents=constituents,
95
+ use_get=True,
96
+ )
97
+ )
98
+ return self._make_request(
99
+ url, None, params=input.as_api_input(), use_get=True
100
+ )
101
+
102
+ def image(
103
+ self,
104
+ path: str,
105
+ range: int = 1,
106
+ document_format: str = "application/tiff",
107
+ ) -> requests.Response:
108
+ """
109
+ Retrieve the image page for a given path, one page at a time.
110
+ The path needs to be retrieved from the xml resulting from a prior inquiry using
111
+ the published_data() service with the 'endpoint="images"' argument.
112
+
113
+ Args:
114
+ path (str): contained in the 'link' attribute of the document instance element (inquiry xml).
115
+ range (int, optional): the number of the image page to be fetched. Defaults to 1.
116
+ document_format (str, optional): depends on the inquiry response. Defaults to "application/tiff".
117
+
118
+ Returns:
119
+ requests.Response: a requests.Response object.
120
+ """
121
+ return self._image_request(path, range, document_format)
122
+
123
+ def legal(
124
+ self,
125
+ reference_type: str,
126
+ input: Union[Original, Docdb, Epodoc],
127
+ ) -> requests.Response:
128
+ """
129
+ Retrieval service for legal data.
130
+
131
+ Args:
132
+ reference_type (str): Any of "publication", "application", or "priority".
133
+ input (Original, Epodoc, or Docdb): The document number as an Original, Epodoc, or Docdb data object.
134
+ Returns:
135
+ requests.Response: a requests.Response object.
136
+
137
+ Examples:
138
+ >>> response = client.legal("publication", epo_ops.models.Epodoc("EP1000000"))
139
+ >>> response
140
+ <Response [200]>
141
+ >>> "ops:legal" in response.text
142
+ True
143
+
144
+ Note:
145
+ This service provides access to legal status information for patents
146
+ as documented in chapter 3.5 of the OPS v3.2 documentation.˜
147
+ """
148
+
149
+ return self._service_request(
150
+ dict(
151
+ service=self.__legal_path__,
152
+ reference_type=reference_type,
153
+ input=input,
154
+ )
155
+ )
156
+ def number(
157
+ self,
158
+ reference_type: str,
159
+ input: Union[Original, Docdb, Epodoc],
160
+ output_format: str,
161
+ ) -> requests.Response:
162
+ """
163
+ This service converts a patent number from one input format into another format.
164
+
165
+ Args:
166
+ reference_type (str): Any of "publication", "application", or "priority".
167
+ input (Original, Epodoc or Docdb): The document number as a data object.
168
+ output_format (str): Any of "original", "epodoc" or "docdb".
169
+
170
+ Returns:
171
+ requests.Response: a requests.Response object.
172
+
173
+
174
+ Examples:
175
+ # from JP original to docdb
176
+ >>> response = client.number(
177
+ "application",
178
+ Original(number="2006-147056", country_code="JP", kind_code="A", date="20060526"),
179
+ "docdb,
180
+ )
181
+
182
+ # from US original to epodoc
183
+ >>> response = client.number(
184
+ "application",
185
+ Original("08/921,321", "US", "A", "19970829"),
186
+ "epodoc",
187
+ )
188
+
189
+ # from PCT original to docdb
190
+ >>> response = client.number(
191
+ "application",
192
+ Original("PCT/GB02/04635", date="19970829"),
193
+ "docdb",
194
+ )
195
+
196
+ Use-cases:
197
+ Given that other OPS services use only the Epodoc or Docdb format,
198
+ the general use-case of this method is to convert the Original format
199
+ into either the Docdb or the Epodoc format.
200
+
201
+ Note:
202
+ It is especially important to include the date of publication in the input
203
+ whenever possible because number formatting may vary depending on the date.
204
+ """
205
+ possible_conversions = {
206
+ "docdb": ["original", "epodoc"],
207
+ "epodoc": ["original"],
208
+ "original": ["docdb", "epodoc"],
209
+ }
210
+ input_format = input.__class__.__name__.lower()
211
+
212
+ if output_format not in possible_conversions[input_format]:
213
+ raise exceptions.InvalidNumberConversion(
214
+ "Cannot convert from {0} to {1}".format(
215
+ input_format, output_format
216
+ )
217
+ )
218
+ return self._service_request(
219
+ dict(
220
+ service=self.__number_path__,
221
+ reference_type=reference_type,
222
+ input=input,
223
+ endpoint=output_format,
224
+ )
225
+ )
226
+
227
+ def published_data(
228
+ self,
229
+ reference_type: str,
230
+ input: Union[Docdb, Epodoc],
231
+ endpoint="biblio",
232
+ constituents: Optional[List[str]] = None,
233
+ ) -> requests.Response:
234
+ """
235
+ Retrieval service for published data.
236
+
237
+ Args:
238
+ reference_type (str): Any of "publication", "application", or "priority".
239
+ input (Epodoc or Docdb): The document number as a Epodoc or Docdb data object.
240
+ endpoint (str, optional): "biblio", "equivalents", "abstract", "claims", "description",
241
+ "fulltext", "images". Defaults to "biblio".
242
+ constituents (list[str], optional): List of "biblio", "abstract", "images", "full cycle".
243
+
244
+ Returns:
245
+ requests.Response: a requests.Response object.
246
+
247
+ Note:
248
+ 1) input cannot be a models.Original
249
+ 2) only the endpoint "biblio" or "equivalents" use the constituents parameter.
250
+ 3) the images and fulltext retrieval require a two-step process: inquiry, then retrieval, e.g.
251
+ - client.published_data(..., endpoint='images',...) to retrieve the image path, then
252
+ - client.image(path=...)
253
+ """
254
+ return self._service_request(
255
+ dict(
256
+ service=self.__published_data_path__,
257
+ reference_type=reference_type,
258
+ input=input,
259
+ endpoint=endpoint,
260
+ constituents=constituents,
261
+ )
262
+ )
263
+
264
+ def published_data_search(
265
+ self,
266
+ cql: str,
267
+ range_begin: int = 1,
268
+ range_end: int = 25,
269
+ constituents: Optional[List[str]] = None,
270
+ ) -> requests.Response:
271
+ """
272
+ Performs a bibliographic search ussing common query language (CQL) to retrieve the data.
273
+ Possible constituents: "abstract", "biblio" and/or "full-cycle".
274
+ """
275
+ range = dict(key="X-OPS-Range", begin=range_begin, end=range_end)
276
+ return self._search_request(
277
+ dict(
278
+ service=self.__published_data_search_path__,
279
+ constituents=constituents,
280
+ ),
281
+ cql,
282
+ range,
283
+ )
284
+
285
+ def register(
286
+ self,
287
+ reference_type: str,
288
+ input: Epodoc,
289
+ constituents: Optional[List[str]] = None,
290
+ ) -> requests.Response:
291
+ """
292
+ Provides the interface for the European Patent Register online service for retrieving all
293
+ the publicly available information on published European patent applications and
294
+ international PCT applications designating the EPO as they pass through the grant procedure.
295
+
296
+ Possible constituents: "biblio", "events", "procedural-steps" or "upp".
297
+
298
+ Notes:
299
+ 1) Only the Epodoc input format is supported
300
+ 2) the default behaviour of the register retrieval is biblio, so you don't have to add the
301
+ biblio constituent if you want to retrieve only bibliographic data.
302
+ """
303
+ # TODO: input can only be Epodoc, not Docdb
304
+ constituents = constituents or ["biblio"]
305
+ return self._service_request(
306
+ dict(
307
+ service=self.__register_path__,
308
+ reference_type=reference_type,
309
+ input=input,
310
+ constituents=constituents,
311
+ )
312
+ )
313
+
314
+ def register_search(
315
+ self, cql: str, range_begin: int = 1, range_end: int = 25
316
+ ) -> requests.Response:
317
+ """
318
+ Use this service to find specific register data
319
+ that is part of the public aspect of the patent lifecycle.
320
+
321
+ Example:
322
+ >>> response = client.register_search(cql="pa=IBM", range_begin=1, range_end=25)
323
+ >>> print(response.text)
324
+
325
+ """
326
+ range = dict(key="Range", begin=range_begin, end=range_end)
327
+ return self._search_request(
328
+ {"service": self.__register_search_path__}, cql, range
329
+ )
330
+
331
+ @property
332
+ def access_token(self):
333
+ # TODO: Custom auth handler plugin to requests?
334
+ if (not self._access_token) or (
335
+ self._access_token and self._access_token.is_expired
336
+ ):
337
+ self._acquire_token()
338
+ return self._access_token
339
+
340
+ def _acquire_token(self):
341
+ headers = {
342
+ "Authorization": "Basic {0}".format(
343
+ b64encode(
344
+ "{0}:{1}".format(self.key, self.secret).encode("ascii")
345
+ ).decode("ascii")
346
+ ),
347
+ "Content-Type": "application/x-www-form-urlencoded",
348
+ }
349
+ payload = {"grant_type": "client_credentials"}
350
+ response = requests.post(
351
+ self.__auth_url__,
352
+ headers=headers,
353
+ data=payload,
354
+ timeout=NETWORK_TIMEOUT,
355
+ )
356
+ response.raise_for_status()
357
+ self._access_token = AccessToken(response)
358
+
359
+ def _check_for_exceeded_quota(self, response):
360
+ if (response.status_code != requests.codes.forbidden) or (
361
+ "X-Rejection-Reason" not in response.headers
362
+ ):
363
+ return response
364
+
365
+ reasons = ("IndividualQuotaPerHour", "RegisteredQuotaPerWeek")
366
+
367
+ rejection = response.headers["X-Rejection-Reason"]
368
+
369
+ for reason in [r for r in reasons if r.lower() in rejection.lower()]:
370
+ try:
371
+ response.raise_for_status()
372
+ except HTTPError as e:
373
+ klass = getattr(exceptions, "{0}Exceeded".format(reason))
374
+ e.__class__ = klass
375
+ raise
376
+ return response # pragma: no cover
377
+
378
+ def _make_request(
379
+ self, url, data, extra_headers=None, params=None, use_get=False
380
+ ):
381
+ token = "Bearer {0}".format(self.access_token.token)
382
+ headers = {
383
+ "Accept": self.accept_type,
384
+ "Content-Type": "text/plain",
385
+ "Authorization": token,
386
+ }
387
+ headers.update(extra_headers or {})
388
+ request_method = self.request.post
389
+ if use_get:
390
+ request_method = self.request.get
391
+
392
+ response = request_method(
393
+ url, data=data, headers=headers, params=params
394
+ )
395
+ response = self._check_for_expired_token(response)
396
+ response = self._check_for_exceeded_quota(response)
397
+ response.raise_for_status()
398
+ return response
399
+
400
+ # info: {
401
+ # use_get?: boolean = False
402
+ # service?: string,
403
+ # reference_type?: string,
404
+ # input?: BaseInput | BaseInput[],
405
+ # endpoint?: string,
406
+ # constituents?: string[]
407
+ # }
408
+ def _make_request_url(self, info):
409
+ _input = info.get("input", None)
410
+ input_format = _input.__class__.__name__.lower() if _input else None
411
+ constituents = info.get("constituents") or []
412
+
413
+ parts_pre = [
414
+ self.__service_url_prefix__,
415
+ info.get("service", None),
416
+ info.get("reference_type", None),
417
+ input_format,
418
+ ]
419
+ parts_post = [info.get("endpoint", None), ",".join(constituents)]
420
+
421
+ if info.get("use_get", False):
422
+ parts = parts_pre + [_input.as_api_input()] + parts_post
423
+ else:
424
+ parts = parts_pre + parts_post
425
+
426
+ return "/".join(filter(None, parts))
427
+
428
+ # Service requests
429
+ # info: {service, reference_type, input, endpoint, constituents}
430
+ def _service_request(self, info):
431
+ _input = info["input"]
432
+ if isinstance(_input, list):
433
+ data = "\n".join([i.as_api_input() for i in _input])
434
+ info["input"] = _input[0]
435
+ else:
436
+ data = _input.as_api_input()
437
+
438
+ url = self._make_request_url(info)
439
+ return self._make_request(url, data)
440
+
441
+ # info: {service, constituents}
442
+ def _search_request(self, info, cql, range):
443
+ url = self._make_request_url(info)
444
+ return self._make_request(
445
+ url, {"q": cql}, {range["key"]: "{begin}-{end}".format(**range)}
446
+ )
447
+
448
+ def _image_request(self, path, range, document_format):
449
+ url = self._make_request_url({"service": self.__images_path__})
450
+ params = {"Range": range}
451
+ data = path.replace(self.__images_path__ + "/", "")
452
+ return self._make_request(
453
+ url,
454
+ data=data,
455
+ extra_headers={"Accept": document_format},
456
+ params=params,
457
+ )
458
+
459
+ def _check_for_expired_token(self, response):
460
+ if response.status_code != requests.codes.bad:
461
+ return response
462
+
463
+ # FIXME: S314 Using `xml` to parse untrusted data is known to be vulnerable to XML attacks; use `defusedxml` equivalents
464
+ message = ET.fromstring(response.content) # noqa: S314
465
+ if message.findtext("message") == "invalid_access_token":
466
+ self._acquire_token()
467
+ response = self._make_request(
468
+ response.request.url, response.request.body
469
+ )
470
+ return response
epo_ops/exceptions.py ADDED
@@ -0,0 +1,30 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import logging
4
+
5
+ from requests.exceptions import HTTPError
6
+
7
+ log = logging.getLogger(__name__)
8
+
9
+
10
+ # Query errors
11
+ class InvalidDate(ValueError):
12
+ """User used an invalid date."""
13
+
14
+
15
+ class MissingRequiredValue(ValueError):
16
+ """User did not supply a required value."""
17
+
18
+
19
+ # Number service error
20
+ class InvalidNumberConversion(ValueError):
21
+ """Invalid number conversion request."""
22
+
23
+
24
+ # OPS quota errors
25
+ class IndividualQuotaPerHourExceeded(HTTPError):
26
+ """Quota per hour (approx 450MB) exceeded."""
27
+
28
+
29
+ class RegisteredQuotaPerWeekExceeded(HTTPError):
30
+ """Quota per week (2.5GB) exceeded."""
@@ -0,0 +1,9 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from .middleware import Middleware
4
+ from .throttle import Throttler
5
+
6
+ try:
7
+ from .cache import Dogpile
8
+ except ImportError: # pragma: no cover
9
+ pass
@@ -0,0 +1 @@
1
+ from .dogpile import Dogpile
@@ -0,0 +1 @@
1
+ from .dogpile import Dogpile
@@ -0,0 +1,78 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from __future__ import absolute_import
4
+
5
+ import logging
6
+ import os
7
+
8
+ import requests
9
+ from dogpile.cache import make_region
10
+ from dogpile.cache.api import NO_VALUE
11
+
12
+ from .... import __version__
13
+ from ...middleware import Middleware
14
+ from .helpers import kwarg_range_header_handler
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+ # FIXME: S108 Probable insecure usage of temporary file or directory: "/var/tmp/python-epo-ops-client/cache.dbm"
19
+ DEFAULT_DBM_PATH = "/var/tmp/python-epo-ops-client/cache.dbm" # noqa: S108
20
+ DEFAULT_TIMEOUT = 60 * 60 * 24 * 7 * 2 # 2 weeks in seconds
21
+
22
+
23
+ class Dogpile(Middleware):
24
+ def __init__(
25
+ self, region=None, kwargs_handlers=None, http_status_codes=None
26
+ ):
27
+ if not region:
28
+ dbm_path = os.path.dirname(DEFAULT_DBM_PATH)
29
+ if not os.path.exists(dbm_path):
30
+ os.makedirs(dbm_path)
31
+
32
+ region = make_region().configure(
33
+ "dogpile.cache.dbm",
34
+ expiration_time=DEFAULT_TIMEOUT,
35
+ arguments={"filename": DEFAULT_DBM_PATH},
36
+ )
37
+ self.region = region
38
+
39
+ if not kwargs_handlers:
40
+ kwargs_handlers = [kwarg_range_header_handler]
41
+ self.kwargs_handlers = kwargs_handlers
42
+
43
+ if not http_status_codes:
44
+ http_status_codes = (
45
+ requests.codes.ok, # 200
46
+ requests.codes.not_found, # 404
47
+ requests.codes.method_not_allowed, # 405
48
+ requests.codes.request_entity_too_large, # 413
49
+ )
50
+ self.http_status_codes = http_status_codes
51
+
52
+ def generate_key(self, *args, **kwargs):
53
+ key = ["epo-ops-{0}".format(__version__)] + list(map(str, args))
54
+
55
+ for handler in self.kwargs_handlers:
56
+ s = handler(**kwargs)
57
+ if s:
58
+ key.append(s)
59
+
60
+ return "|".join(key)
61
+
62
+ def is_response_cacheable(self, response):
63
+ return response.status_code in self.http_status_codes
64
+
65
+ def process_request(self, env, url, data, **kwargs):
66
+ key = self.generate_key(url, data, **kwargs)
67
+ env["cache-key"] = key
68
+ response = self.region.get(key)
69
+ if response != NO_VALUE:
70
+ env["from-cache"] = True
71
+ env["response"] = response
72
+ return url, data, kwargs
73
+
74
+ def process_response(self, env, response):
75
+ if (not env["from-cache"]) and self.is_response_cacheable(response):
76
+ self.region.set(env["cache-key"], response)
77
+ env["is-cached"] = True
78
+ return response
@@ -0,0 +1,14 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import logging
4
+
5
+ log = logging.getLogger(__name__)
6
+
7
+
8
+ def kwarg_range_header_handler(**kwargs):
9
+ keys = []
10
+ range_headers = {"X-OPS-Range", "Range"}
11
+ headers = kwargs.get("headers", {})
12
+ for header in range_headers & set(headers.keys()):
13
+ keys.append("headers.{0}={1}".format(header, headers[header]))
14
+ return "|".join(keys)
@@ -0,0 +1,19 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import logging
4
+
5
+ log = logging.getLogger(__name__)
6
+
7
+
8
+ class Middleware(object):
9
+ def process_request(self, env, url, data, **kwargs):
10
+ # Do something. Return an actual response if you want the middleware
11
+ # chain to stop processing requests
12
+ # response = None
13
+ # return url, args, kwargs, response
14
+ raise NotImplementedError
15
+
16
+ def process_response(self, env, response):
17
+ # Do something.
18
+ # return response
19
+ raise NotImplementedError
@@ -0,0 +1,3 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from .throttler import Throttler
@@ -0,0 +1,4 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ from .sqlite import SQLite
4
+ from .storage import Storage