crate 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
crate/client/http.py ADDED
@@ -0,0 +1,684 @@
1
+ # -*- coding: utf-8; -*-
2
+ #
3
+ # Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
4
+ # license agreements. See the NOTICE file distributed with this work for
5
+ # additional information regarding copyright ownership. Crate licenses
6
+ # this file to you under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License. You may
8
+ # obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
14
+ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
15
+ # License for the specific language governing permissions and limitations
16
+ # under the License.
17
+ #
18
+ # However, if you have executed another commercial license agreement
19
+ # with Crate these terms will supersede the license and you may use the
20
+ # software solely pursuant to the terms of the relevant commercial agreement.
21
+
22
+
23
+ import calendar
24
+ import heapq
25
+ import io
26
+ import json
27
+ import logging
28
+ import os
29
+ import re
30
+ import socket
31
+ import ssl
32
+ import threading
33
+ from base64 import b64encode
34
+ from datetime import date, datetime, timezone
35
+ from decimal import Decimal
36
+ from time import time
37
+ from urllib.parse import urlparse
38
+ from uuid import UUID
39
+
40
+ import urllib3
41
+ from urllib3 import connection_from_url
42
+ from urllib3.connection import HTTPConnection
43
+ from urllib3.exceptions import (
44
+ HTTPError,
45
+ MaxRetryError,
46
+ ProtocolError,
47
+ ProxyError,
48
+ ReadTimeoutError,
49
+ SSLError,
50
+ )
51
+ from urllib3.util.retry import Retry
52
+ from verlib2 import Version
53
+
54
+ from crate.client.exceptions import (
55
+ BlobLocationNotFoundException,
56
+ ConnectionError,
57
+ DigestNotFoundException,
58
+ IntegrityError,
59
+ ProgrammingError,
60
+ )
61
+
62
+ logger = logging.getLogger(__name__)
63
+
64
+
65
+ _HTTP_PAT = pat = re.compile("https?://.+", re.I)
66
+ SRV_UNAVAILABLE_STATUSES = {502, 503, 504, 509}
67
+ PRESERVE_ACTIVE_SERVER_EXCEPTIONS = {ConnectionResetError, BrokenPipeError}
68
+ SSL_ONLY_ARGS = {"ca_certs", "cert_reqs", "cert_file", "key_file"}
69
+
70
+
71
+ def super_len(o):
72
+ if hasattr(o, "__len__"):
73
+ return len(o)
74
+ if hasattr(o, "len"):
75
+ return o.len
76
+ if hasattr(o, "fileno"):
77
+ try:
78
+ fileno = o.fileno()
79
+ except io.UnsupportedOperation:
80
+ pass
81
+ else:
82
+ return os.fstat(fileno).st_size
83
+ if hasattr(o, "getvalue"):
84
+ # e.g. BytesIO, cStringIO.StringI
85
+ return len(o.getvalue())
86
+ return None
87
+
88
+
89
+ class CrateJsonEncoder(json.JSONEncoder):
90
+ epoch_aware = datetime(1970, 1, 1, tzinfo=timezone.utc)
91
+ epoch_naive = datetime(1970, 1, 1)
92
+
93
+ def default(self, o):
94
+ if isinstance(o, (Decimal, UUID)):
95
+ return str(o)
96
+ if isinstance(o, datetime):
97
+ if o.tzinfo is not None:
98
+ delta = o - self.epoch_aware
99
+ else:
100
+ delta = o - self.epoch_naive
101
+ return int(
102
+ delta.microseconds / 1000.0
103
+ + (delta.seconds + delta.days * 24 * 3600) * 1000.0
104
+ )
105
+ if isinstance(o, date):
106
+ return calendar.timegm(o.timetuple()) * 1000
107
+ return json.JSONEncoder.default(self, o)
108
+
109
+
110
+ class Server:
111
+ def __init__(self, server, **pool_kw):
112
+ socket_options = _get_socket_opts(
113
+ pool_kw.pop("socket_keepalive", False),
114
+ pool_kw.pop("socket_tcp_keepidle", None),
115
+ pool_kw.pop("socket_tcp_keepintvl", None),
116
+ pool_kw.pop("socket_tcp_keepcnt", None),
117
+ )
118
+ self.pool = connection_from_url(
119
+ server,
120
+ socket_options=socket_options,
121
+ **pool_kw,
122
+ )
123
+
124
+ def request(
125
+ self,
126
+ method,
127
+ path,
128
+ data=None,
129
+ stream=False,
130
+ headers=None,
131
+ username=None,
132
+ password=None,
133
+ schema=None,
134
+ backoff_factor=0,
135
+ **kwargs,
136
+ ):
137
+ """Send a request
138
+
139
+ Always set the Content-Length and the Content-Type header.
140
+ """
141
+ if headers is None:
142
+ headers = {}
143
+ if "Content-Length" not in headers:
144
+ length = super_len(data)
145
+ if length is not None:
146
+ headers["Content-Length"] = length
147
+
148
+ # Authentication credentials
149
+ if username is not None:
150
+ if "Authorization" not in headers and username is not None:
151
+ credentials = username + ":"
152
+ if password is not None:
153
+ credentials += password
154
+ headers["Authorization"] = "Basic %s" % b64encode(
155
+ credentials.encode("utf-8")
156
+ ).decode("utf-8")
157
+ # For backwards compatibility with Crate <= 2.2
158
+ if "X-User" not in headers:
159
+ headers["X-User"] = username
160
+
161
+ if schema is not None:
162
+ headers["Default-Schema"] = schema
163
+ headers["Accept"] = "application/json"
164
+ headers["Content-Type"] = "application/json"
165
+ kwargs["assert_same_host"] = False
166
+ kwargs["redirect"] = False
167
+ kwargs["retries"] = Retry(read=0, backoff_factor=backoff_factor)
168
+ return self.pool.urlopen(
169
+ method,
170
+ path,
171
+ body=data,
172
+ preload_content=not stream,
173
+ headers=headers,
174
+ **kwargs,
175
+ )
176
+
177
+ def close(self):
178
+ self.pool.close()
179
+
180
+
181
+ def _json_from_response(response):
182
+ try:
183
+ return json.loads(response.data.decode("utf-8"))
184
+ except ValueError as ex:
185
+ raise ProgrammingError(
186
+ "Invalid server response of content-type '{}':\n{}".format(
187
+ response.headers.get("content-type", "unknown"),
188
+ response.data.decode("utf-8"),
189
+ )
190
+ ) from ex
191
+
192
+
193
+ def _blob_path(table, digest):
194
+ return "/_blobs/{table}/{digest}".format(table=table, digest=digest)
195
+
196
+
197
+ def _ex_to_message(ex):
198
+ return getattr(ex, "message", None) or str(ex) or repr(ex)
199
+
200
+
201
+ def _raise_for_status(response):
202
+ """
203
+ Raise `IntegrityError` exceptions for `DuplicateKeyException` errors.
204
+ """
205
+ try:
206
+ return _raise_for_status_real(response)
207
+ except ProgrammingError as ex:
208
+ if "DuplicateKeyException" in ex.message:
209
+ raise IntegrityError(ex.message, error_trace=ex.error_trace) from ex
210
+ raise
211
+
212
+
213
+ def _raise_for_status_real(response):
214
+ """make sure that only crate.exceptions are raised that are defined in
215
+ the DB-API specification"""
216
+ message = ""
217
+ if 400 <= response.status < 500:
218
+ message = "%s Client Error: %s" % (response.status, response.reason)
219
+ elif 500 <= response.status < 600:
220
+ message = "%s Server Error: %s" % (response.status, response.reason)
221
+ else:
222
+ return
223
+ if response.status == 503:
224
+ raise ConnectionError(message)
225
+ if response.headers.get("content-type", "").startswith("application/json"):
226
+ data = json.loads(response.data.decode("utf-8"))
227
+ error = data.get("error", {})
228
+ error_trace = data.get("error_trace", None)
229
+ if "results" in data:
230
+ errors = [
231
+ res["error_message"]
232
+ for res in data["results"]
233
+ if res.get("error_message")
234
+ ]
235
+ if errors:
236
+ raise ProgrammingError("\n".join(errors))
237
+ if isinstance(error, dict):
238
+ raise ProgrammingError(
239
+ error.get("message", ""), error_trace=error_trace
240
+ )
241
+ raise ProgrammingError(error, error_trace=error_trace)
242
+ raise ProgrammingError(message)
243
+
244
+
245
+ def _server_url(server):
246
+ """
247
+ Normalizes a given server string to an url
248
+
249
+ >>> print(_server_url('a'))
250
+ http://a
251
+ >>> print(_server_url('a:9345'))
252
+ http://a:9345
253
+ >>> print(_server_url('https://a:9345'))
254
+ https://a:9345
255
+ >>> print(_server_url('https://a'))
256
+ https://a
257
+ >>> print(_server_url('demo.crate.io'))
258
+ http://demo.crate.io
259
+ """
260
+ if not _HTTP_PAT.match(server):
261
+ server = "http://%s" % server
262
+ parsed = urlparse(server)
263
+ url = "%s://%s" % (parsed.scheme, parsed.netloc)
264
+ return url
265
+
266
+
267
+ def _to_server_list(servers):
268
+ if isinstance(servers, str):
269
+ servers = servers.split()
270
+ return [_server_url(s) for s in servers]
271
+
272
+
273
+ def _pool_kw_args(
274
+ verify_ssl_cert,
275
+ ca_cert,
276
+ client_cert,
277
+ client_key,
278
+ timeout=None,
279
+ pool_size=None,
280
+ ):
281
+ ca_cert = ca_cert or os.environ.get("REQUESTS_CA_BUNDLE", None)
282
+ if ca_cert and not os.path.exists(ca_cert):
283
+ # Sanity check
284
+ raise IOError('CA bundle file "{}" does not exist.'.format(ca_cert))
285
+
286
+ kw = {
287
+ "ca_certs": ca_cert,
288
+ "cert_reqs": ssl.CERT_REQUIRED if verify_ssl_cert else ssl.CERT_NONE,
289
+ "cert_file": client_cert,
290
+ "key_file": client_key,
291
+ }
292
+ if timeout is not None:
293
+ if isinstance(timeout, str):
294
+ timeout = float(timeout)
295
+ kw["timeout"] = timeout
296
+ if pool_size is not None:
297
+ kw["maxsize"] = int(pool_size)
298
+ return kw
299
+
300
+
301
+ def _remove_certs_for_non_https(server, kwargs):
302
+ if server.lower().startswith("https"):
303
+ return kwargs
304
+ used_ssl_args = SSL_ONLY_ARGS & set(kwargs.keys())
305
+ if used_ssl_args:
306
+ kwargs = kwargs.copy()
307
+ for arg in used_ssl_args:
308
+ kwargs.pop(arg)
309
+ return kwargs
310
+
311
+
312
+ def _update_pool_kwargs_for_ssl_minimum_version(server, kwargs):
313
+ """
314
+ On urllib3 v2, re-add support for TLS 1.0 and TLS 1.1.
315
+
316
+ https://urllib3.readthedocs.io/en/latest/v2-migration-guide.html#https-requires-tls-1-2
317
+ """
318
+ if Version(urllib3.__version__) >= Version("2"):
319
+ from urllib3.util import parse_url
320
+
321
+ scheme, _, host, port, *_ = parse_url(server)
322
+ if scheme == "https":
323
+ kwargs["ssl_minimum_version"] = ssl.TLSVersion.MINIMUM_SUPPORTED
324
+
325
+
326
+ def _create_sql_payload(stmt, args, bulk_args):
327
+ if not isinstance(stmt, str):
328
+ raise ValueError("stmt is not a string")
329
+ if args and bulk_args:
330
+ raise ValueError("Cannot provide both: args and bulk_args")
331
+
332
+ data = {"stmt": stmt}
333
+ if args:
334
+ data["args"] = args
335
+ if bulk_args:
336
+ data["bulk_args"] = bulk_args
337
+ return json.dumps(data, cls=CrateJsonEncoder)
338
+
339
+
340
+ def _get_socket_opts(
341
+ keepalive=True, tcp_keepidle=None, tcp_keepintvl=None, tcp_keepcnt=None
342
+ ):
343
+ """
344
+ Return an optional list of socket options for urllib3's HTTPConnection
345
+ constructor.
346
+ """
347
+ if not keepalive:
348
+ return None
349
+
350
+ # always use TCP keepalive
351
+ opts = [(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)]
352
+
353
+ # hasattr check because some options depend on system capabilities
354
+ # see https://docs.python.org/3/library/socket.html#socket.SOMAXCONN
355
+ if hasattr(socket, "TCP_KEEPIDLE") and tcp_keepidle is not None:
356
+ opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, tcp_keepidle))
357
+ if hasattr(socket, "TCP_KEEPINTVL") and tcp_keepintvl is not None:
358
+ opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, tcp_keepintvl))
359
+ if hasattr(socket, "TCP_KEEPCNT") and tcp_keepcnt is not None:
360
+ opts.append((socket.IPPROTO_TCP, socket.TCP_KEEPCNT, tcp_keepcnt))
361
+
362
+ # additionally use urllib3's default socket options
363
+ return list(HTTPConnection.default_socket_options) + opts
364
+
365
+
366
+ class Client:
367
+ """
368
+ Crate connection client using CrateDB's HTTP API.
369
+ """
370
+
371
+ SQL_PATH = "/_sql?types=true"
372
+ """Crate URI path for issuing SQL statements."""
373
+
374
+ retry_interval = 30
375
+ """Retry interval for failed servers in seconds."""
376
+
377
+ default_server = "http://127.0.0.1:4200"
378
+ """Default server to use if no servers are given on instantiation."""
379
+
380
+ def __init__(
381
+ self,
382
+ servers=None,
383
+ timeout=None,
384
+ backoff_factor=0,
385
+ verify_ssl_cert=True,
386
+ ca_cert=None,
387
+ error_trace=False,
388
+ cert_file=None,
389
+ key_file=None,
390
+ ssl_relax_minimum_version=False,
391
+ username=None,
392
+ password=None,
393
+ schema=None,
394
+ pool_size=None,
395
+ socket_keepalive=True,
396
+ socket_tcp_keepidle=None,
397
+ socket_tcp_keepintvl=None,
398
+ socket_tcp_keepcnt=None,
399
+ ):
400
+ if not servers:
401
+ servers = [self.default_server]
402
+ else:
403
+ servers = _to_server_list(servers)
404
+
405
+ # Try to derive credentials from first server argument if not
406
+ # explicitly given.
407
+ if servers and not username:
408
+ try:
409
+ url = urlparse(servers[0])
410
+ if url.username is not None:
411
+ username = url.username
412
+ if url.password is not None:
413
+ password = url.password
414
+ except Exception as ex:
415
+ logger.warning(
416
+ "Unable to decode credentials from database "
417
+ "URI, so connecting to CrateDB without "
418
+ "authentication: {ex}".format(ex=ex)
419
+ )
420
+
421
+ self._active_servers = servers
422
+ self._inactive_servers = []
423
+ pool_kw = _pool_kw_args(
424
+ verify_ssl_cert,
425
+ ca_cert,
426
+ cert_file,
427
+ key_file,
428
+ timeout,
429
+ pool_size,
430
+ )
431
+ pool_kw.update(
432
+ {
433
+ "socket_keepalive": socket_keepalive,
434
+ "socket_tcp_keepidle": socket_tcp_keepidle,
435
+ "socket_tcp_keepintvl": socket_tcp_keepintvl,
436
+ "socket_tcp_keepcnt": socket_tcp_keepcnt,
437
+ }
438
+ )
439
+ self.ssl_relax_minimum_version = ssl_relax_minimum_version
440
+ self.backoff_factor = backoff_factor
441
+ self.server_pool = {}
442
+ self._update_server_pool(servers, **pool_kw)
443
+ self._pool_kw = pool_kw
444
+ self._lock = threading.RLock()
445
+ self._local = threading.local()
446
+ self.username = username
447
+ self.password = password
448
+ self.schema = schema
449
+
450
+ self.path = self.SQL_PATH
451
+ if error_trace:
452
+ self.path += "&error_trace=true"
453
+
454
+ def close(self):
455
+ for server in self.server_pool.values():
456
+ server.close()
457
+
458
+ def _create_server(self, server, **pool_kw):
459
+ kwargs = _remove_certs_for_non_https(server, pool_kw)
460
+ # After updating to urllib3 v2, optionally retain support
461
+ # for TLS 1.0 and TLS 1.1, in order to support connectivity
462
+ # to older versions of CrateDB.
463
+ if self.ssl_relax_minimum_version:
464
+ _update_pool_kwargs_for_ssl_minimum_version(server, kwargs)
465
+ self.server_pool[server] = Server(server, **kwargs)
466
+
467
+ def _update_server_pool(self, servers, **pool_kw):
468
+ for server in servers:
469
+ self._create_server(server, **pool_kw)
470
+
471
+ def sql(self, stmt, parameters=None, bulk_parameters=None):
472
+ """
473
+ Execute SQL stmt against the crate server.
474
+ """
475
+ if stmt is None:
476
+ return None
477
+
478
+ data = _create_sql_payload(stmt, parameters, bulk_parameters)
479
+ logger.debug("Sending request to %s with payload: %s", self.path, data)
480
+ content = self._json_request("POST", self.path, data=data)
481
+ logger.debug("JSON response for stmt(%s): %s", stmt, content)
482
+
483
+ return content
484
+
485
+ def server_infos(self, server):
486
+ response = self._request("GET", "/", server=server)
487
+ _raise_for_status(response)
488
+ content = _json_from_response(response)
489
+ node_name = content.get("name")
490
+ node_version = content.get("version", {}).get("number", "0.0.0")
491
+ return server, node_name, node_version
492
+
493
+ def blob_put(self, table, digest, data) -> bool:
494
+ """
495
+ Stores the contents of the file like @data object in a blob under the
496
+ given table and digest.
497
+ """
498
+ response = self._request("PUT", _blob_path(table, digest), data=data)
499
+ if response.status == 201:
500
+ # blob created
501
+ return True
502
+ if response.status == 409:
503
+ # blob exists
504
+ return False
505
+ if response.status in (400, 404):
506
+ raise BlobLocationNotFoundException(table, digest)
507
+ _raise_for_status(response)
508
+ return False
509
+
510
+ def blob_del(self, table, digest) -> bool:
511
+ """
512
+ Deletes the blob with given digest under the given table.
513
+ """
514
+ response = self._request("DELETE", _blob_path(table, digest))
515
+ if response.status == 204:
516
+ return True
517
+ if response.status == 404:
518
+ return False
519
+ _raise_for_status(response)
520
+ return False
521
+
522
+ def blob_get(self, table, digest, chunk_size=1024 * 128):
523
+ """
524
+ Returns a file like object representing the contents of the blob
525
+ with the given digest.
526
+ """
527
+ response = self._request("GET", _blob_path(table, digest), stream=True)
528
+ if response.status == 404:
529
+ raise DigestNotFoundException(table, digest)
530
+ _raise_for_status(response)
531
+ return response.stream(amt=chunk_size)
532
+
533
+ def blob_exists(self, table, digest) -> bool:
534
+ """
535
+ Returns true if the blob with the given digest exists
536
+ under the given table.
537
+ """
538
+ response = self._request("HEAD", _blob_path(table, digest))
539
+ if response.status == 200:
540
+ return True
541
+ elif response.status == 404:
542
+ return False
543
+ _raise_for_status(response)
544
+ return False
545
+
546
+ def _add_server(self, server):
547
+ with self._lock:
548
+ if server not in self.server_pool:
549
+ self._create_server(server, **self._pool_kw)
550
+
551
+ def _request(self, method, path, server=None, **kwargs):
552
+ """Execute a request to the cluster
553
+
554
+ A server is selected from the server pool.
555
+ """
556
+ while True:
557
+ next_server = server or self._get_server()
558
+ try:
559
+ response = self.server_pool[next_server].request(
560
+ method,
561
+ path,
562
+ username=self.username,
563
+ password=self.password,
564
+ backoff_factor=self.backoff_factor,
565
+ schema=self.schema,
566
+ **kwargs,
567
+ )
568
+ redirect_location = response.get_redirect_location()
569
+ if redirect_location and 300 <= response.status <= 308:
570
+ redirect_server = _server_url(redirect_location)
571
+ self._add_server(redirect_server)
572
+ return self._request(
573
+ method, path, server=redirect_server, **kwargs
574
+ )
575
+ if not server and response.status in SRV_UNAVAILABLE_STATUSES:
576
+ with self._lock:
577
+ # drop server from active ones
578
+ self._drop_server(next_server, response.reason)
579
+ else:
580
+ return response
581
+ except (
582
+ MaxRetryError,
583
+ ReadTimeoutError,
584
+ SSLError,
585
+ HTTPError,
586
+ ProxyError,
587
+ ) as ex:
588
+ ex_message = _ex_to_message(ex)
589
+ if server:
590
+ raise ConnectionError(
591
+ "Server not available, exception: %s" % ex_message
592
+ ) from ex
593
+ preserve_server = False
594
+ if isinstance(ex, ProtocolError):
595
+ preserve_server = any(
596
+ t in [type(arg) for arg in ex.args]
597
+ for t in PRESERVE_ACTIVE_SERVER_EXCEPTIONS
598
+ )
599
+ if not preserve_server:
600
+ with self._lock:
601
+ # drop server from active ones
602
+ self._drop_server(next_server, ex_message)
603
+ except Exception as e:
604
+ raise ProgrammingError(_ex_to_message(e)) from e
605
+
606
+ def _json_request(self, method, path, data):
607
+ """
608
+ Issue request against the crate HTTP API.
609
+ """
610
+
611
+ response = self._request(method, path, data=data)
612
+ _raise_for_status(response)
613
+ if len(response.data) > 0:
614
+ return _json_from_response(response)
615
+ return response.data
616
+
617
+ def _get_server(self):
618
+ """
619
+ Get server to use for request.
620
+ Also process inactive server list, re-add them after given interval.
621
+ """
622
+ with self._lock:
623
+ inactive_server_count = len(self._inactive_servers)
624
+ for _ in range(inactive_server_count):
625
+ try:
626
+ ts, server, message = heapq.heappop(self._inactive_servers)
627
+ except IndexError:
628
+ pass
629
+ else:
630
+ if (ts + self.retry_interval) > time():
631
+ # Not yet, put it back
632
+ heapq.heappush(
633
+ self._inactive_servers, (ts, server, message)
634
+ )
635
+ else:
636
+ self._active_servers.append(server)
637
+ logger.warning(
638
+ "Restored server %s into active pool", server
639
+ )
640
+
641
+ # if none is old enough, use oldest
642
+ if not self._active_servers:
643
+ ts, server, message = heapq.heappop(self._inactive_servers)
644
+ self._active_servers.append(server)
645
+ logger.info("Restored server %s into active pool", server)
646
+
647
+ server = self._active_servers[0]
648
+ self._roundrobin()
649
+
650
+ return server
651
+
652
+ @property
653
+ def active_servers(self):
654
+ """get the active servers for this client"""
655
+ with self._lock:
656
+ return list(self._active_servers)
657
+
658
+ def _drop_server(self, server, message):
659
+ """
660
+ Drop server from active list and adds it to the inactive ones.
661
+ """
662
+ try:
663
+ self._active_servers.remove(server)
664
+ except ValueError:
665
+ pass
666
+ else:
667
+ heapq.heappush(self._inactive_servers, (time(), server, message))
668
+ logger.warning("Removed server %s from active pool", server)
669
+
670
+ # if this is the last server raise exception, otherwise try next
671
+ if not self._active_servers:
672
+ raise ConnectionError(
673
+ ("No more Servers available, " "exception from last server: %s")
674
+ % message
675
+ )
676
+
677
+ def _roundrobin(self):
678
+ """
679
+ Very simple round-robin implementation
680
+ """
681
+ self._active_servers.append(self._active_servers.pop(0))
682
+
683
+ def __repr__(self):
684
+ return "<Client {0}>".format(str(self._active_servers))
File without changes