nucliadb-utils 6.8.1.post4926__py3-none-any.whl → 6.8.1.post4939__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nucliadb-utils might be problematic. Click here for more details.

@@ -21,10 +21,6 @@ class InvalidCloudFile(Exception):
21
21
  pass
22
22
 
23
23
 
24
- class CouldNotCreateBucket(Exception):
25
- pass
26
-
27
-
28
24
  class InvalidOffset(Exception):
29
25
  def __init__(self, range_header, offset):
30
26
  self.range_header = range_header
@@ -43,7 +43,6 @@ from nucliadb_utils import logger
43
43
  from nucliadb_utils.storages import CHUNK_SIZE
44
44
  from nucliadb_utils.storages.exceptions import (
45
45
  CouldNotCopyNotFound,
46
- CouldNotCreateBucket,
47
46
  InvalidOffset,
48
47
  ResumableUploadGone,
49
48
  )
@@ -97,7 +96,6 @@ RETRIABLE_EXCEPTIONS = (
97
96
  aiohttp.client_exceptions.ClientOSError,
98
97
  aiohttp.client_exceptions.ServerConnectionError,
99
98
  aiohttp.client_exceptions.ServerDisconnectedError,
100
- CouldNotCreateBucket,
101
99
  socket.gaierror,
102
100
  )
103
101
 
@@ -131,9 +129,6 @@ class GCSStorageField(StorageField):
131
129
  origin_bucket_name: str,
132
130
  destination_bucket_name: str,
133
131
  ):
134
- if self.storage.session is None:
135
- raise AttributeError()
136
-
137
132
  url = "{}/{}/o/{}/rewriteTo/b/{}/o/{}".format(
138
133
  self.storage.object_base_url,
139
134
  origin_bucket_name,
@@ -231,9 +226,6 @@ class GCSStorageField(StorageField):
231
226
 
232
227
  cf: New file to upload
233
228
  """
234
- if self.storage.session is None:
235
- raise AttributeError()
236
-
237
229
  if self.field is not None and self.field.upload_uri != "":
238
230
  # If there is a temporal url
239
231
  await self.storage.delete_upload(self.field.upload_uri, self.field.bucket_name)
@@ -308,10 +300,6 @@ class GCSStorageField(StorageField):
308
300
  async def _append(self, cf: CloudFile, data: bytes):
309
301
  if self.field is None:
310
302
  raise AttributeError()
311
-
312
- if self.storage.session is None:
313
- raise AttributeError()
314
-
315
303
  # size = 0 ==> size may be unset, as 0 is the default protobuffer value
316
304
  # Makes no sense to assume a file with size = 0 in upload
317
305
  if cf.size > 0:
@@ -394,8 +382,6 @@ class GCSStorageField(StorageField):
394
382
  Existence can be checked either with a CloudFile data in the field attribute
395
383
  or own StorageField key and bucket. Field takes precendece
396
384
  """
397
- if self.storage.session is None:
398
- raise AttributeError()
399
385
  key = None
400
386
  bucket = None
401
387
  if self.field is not None and self.field.uri != "":
@@ -439,7 +425,7 @@ class GCSStorageField(StorageField):
439
425
 
440
426
  class GCSStorage(Storage):
441
427
  field_klass = GCSStorageField
442
- session: Optional[aiohttp.ClientSession] = None
428
+ _session: Optional[aiohttp.ClientSession] = None
443
429
  _credentials = None
444
430
  _json_credentials = None
445
431
  chunk_size = CHUNK_SIZE
@@ -484,7 +470,7 @@ class GCSStorage(Storage):
484
470
  self.source = CloudFile.GCS
485
471
  self.deadletter_bucket = deadletter_bucket
486
472
  self.indexing_bucket = indexing_bucket
487
- self.bucket = bucket
473
+ self.bucket = bucket or "{kbid}"
488
474
  self._location = location
489
475
  self._project = project
490
476
  # https://cloud.google.com/storage/docs/bucket-locations
@@ -492,7 +478,7 @@ class GCSStorage(Storage):
492
478
  self._executor = executor
493
479
  self._upload_url = url + "/upload/storage/v1/b/{bucket}/o"
494
480
  self.object_base_url = url + "/storage/v1/b"
495
- self._client = None
481
+ self._session = None
496
482
 
497
483
  def _get_access_token(self):
498
484
  if self._credentials.expired or self._credentials.valid is False:
@@ -501,11 +487,17 @@ class GCSStorage(Storage):
501
487
 
502
488
  return self._credentials.token
503
489
 
490
+ @property
491
+ def session(self) -> aiohttp.ClientSession:
492
+ if self._session is None: # pragma: no cover
493
+ raise AttributeError("Session not initialized, call initialize first")
494
+ return self._session
495
+
504
496
  @storage_ops_observer.wrap({"type": "initialize"})
505
497
  async def initialize(self):
506
498
  loop = asyncio.get_event_loop()
507
499
 
508
- self.session = aiohttp.ClientSession(
500
+ self._session = aiohttp.ClientSession(
509
501
  loop=loop, connector=aiohttp.TCPConnector(ttl_dns_cache=60 * 5), timeout=TIMEOUT
510
502
  )
511
503
  try:
@@ -521,7 +513,9 @@ class GCSStorage(Storage):
521
513
  logger.exception(f"Could not create bucket {self.indexing_bucket}", exc_info=True)
522
514
 
523
515
  async def finalize(self):
524
- await self.session.close()
516
+ if self._session is not None:
517
+ await self._session.close()
518
+ self._session = None
525
519
 
526
520
  async def get_access_headers(self):
527
521
  if self._credentials is None:
@@ -538,8 +532,6 @@ class GCSStorage(Storage):
538
532
  )
539
533
  @storage_ops_observer.wrap({"type": "delete"})
540
534
  async def delete_upload(self, uri: str, bucket_name: str):
541
- if self.session is None:
542
- raise AttributeError()
543
535
  if uri:
544
536
  url = "{}/{}/o/{}".format(self.object_base_url, bucket_name, quote_plus(uri))
545
537
  headers = await self.get_access_headers()
@@ -557,9 +549,6 @@ class GCSStorage(Storage):
557
549
 
558
550
  @storage_ops_observer.wrap({"type": "check_bucket_exists"})
559
551
  async def check_exists(self, bucket_name: str):
560
- if self.session is None:
561
- raise AttributeError()
562
-
563
552
  headers = await self.get_access_headers()
564
553
  # Using object access url instead of bucket access to avoid
565
554
  # giving admin permission to the SA, needed to GET a bucket
@@ -573,10 +562,14 @@ class GCSStorage(Storage):
573
562
  return True
574
563
  return False
575
564
 
565
+ @backoff.on_exception(
566
+ backoff.expo,
567
+ RETRIABLE_EXCEPTIONS,
568
+ jitter=backoff.random_jitter,
569
+ max_tries=MAX_TRIES,
570
+ )
571
+ @storage_ops_observer.wrap({"type": "create_bucket"})
576
572
  async def create_bucket(self, bucket_name: str, kbid: Optional[str] = None):
577
- if self.session is None:
578
- raise AttributeError()
579
-
580
573
  if await self.check_exists(bucket_name=bucket_name):
581
574
  return
582
575
 
@@ -586,10 +579,7 @@ class GCSStorage(Storage):
586
579
  labels = deepcopy(self._bucket_labels)
587
580
  if kbid is not None:
588
581
  labels["kbid"] = kbid.lower()
589
- await self._create_bucket(url, headers, bucket_name, labels)
590
582
 
591
- @storage_ops_observer.wrap({"type": "create_bucket"})
592
- async def _create_bucket(self, url, headers, bucket_name, labels):
593
583
  async with self.session.post(
594
584
  url,
595
585
  headers=headers,
@@ -605,20 +595,12 @@ class GCSStorage(Storage):
605
595
  },
606
596
  },
607
597
  ) as resp:
608
- if resp.status != 200: # pragma: no cover
609
- logger.info(f"Creation of bucket error: {resp.status}")
610
- text = await resp.text()
611
- logger.info(f"Bucket : {bucket_name}")
612
- logger.info(f"Location : {self._location}")
613
- logger.info(f"Labels : {labels}")
614
- logger.info(f"URL : {url}")
615
- logger.info(text)
616
-
617
- raise CouldNotCreateBucket(text)
598
+ if resp.status == 200:
599
+ return
600
+ text = await resp.text()
601
+ raise GoogleCloudException(f"Bucket creation failed with status {resp.status}: {text}")
618
602
 
619
603
  def get_bucket_name(self, kbid: str):
620
- if self.bucket is None:
621
- raise AttributeError()
622
604
  bucket_name = self.bucket.format(
623
605
  kbid=kbid,
624
606
  )
@@ -631,13 +613,12 @@ class GCSStorage(Storage):
631
613
  await self.create_bucket(bucket_name, kbid)
632
614
  created = True
633
615
  except Exception as e:
616
+ errors.capture_exception(e)
634
617
  logger.exception(f"Could not create bucket {kbid}", exc_info=e)
635
618
  return created
636
619
 
637
620
  @storage_ops_observer.wrap({"type": "schedule_delete"})
638
621
  async def schedule_delete_kb(self, kbid: str):
639
- if self.session is None:
640
- raise AttributeError()
641
622
  bucket_name = self.get_bucket_name(kbid)
642
623
  headers = await self.get_access_headers()
643
624
  url = f"{self.object_base_url}/{bucket_name}?fields=lifecycle"
@@ -664,8 +645,6 @@ class GCSStorage(Storage):
664
645
 
665
646
  @storage_ops_observer.wrap({"type": "delete"})
666
647
  async def delete_kb(self, kbid: str) -> tuple[bool, bool]:
667
- if self.session is None:
668
- raise AttributeError()
669
648
  bucket_name = self.get_bucket_name(kbid)
670
649
  headers = await self.get_access_headers()
671
650
  url = f"{self.object_base_url}/{bucket_name}"
@@ -694,8 +673,6 @@ class GCSStorage(Storage):
694
673
  async def iterate_objects(
695
674
  self, bucket: str, prefix: str, start: Optional[str] = None
696
675
  ) -> AsyncGenerator[ObjectInfo, None]:
697
- if self.session is None:
698
- raise AttributeError()
699
676
  url = "{}/{}/o".format(self.object_base_url, bucket)
700
677
  headers = await self.get_access_headers()
701
678
  params = {"prefix": prefix}
@@ -743,8 +720,6 @@ class GCSStorage(Storage):
743
720
  """
744
721
  Put an object in the storage without any metadata.
745
722
  """
746
- if self.session is None: # pragma: no cover
747
- raise AttributeError()
748
723
  bucket_upload_url = self._upload_url.format(bucket=bucket_name)
749
724
  url = f"{bucket_upload_url}?uploadType=media&name={quote_plus(key)}"
750
725
  headers = await self.get_access_headers()
@@ -522,6 +522,12 @@ async def bucket_exists(client: AioSession, bucket_name: str) -> bool:
522
522
  return exists
523
523
 
524
524
 
525
+ @backoff.on_exception(
526
+ backoff.expo,
527
+ RETRIABLE_EXCEPTIONS,
528
+ jitter=backoff.random_jitter,
529
+ max_tries=MAX_TRIES,
530
+ )
525
531
  async def create_bucket(
526
532
  client: AioSession,
527
533
  bucket_name: str,
@@ -119,7 +119,7 @@ async def gcs_storage(gcs, gcs_storage_settings: dict[str, Any]):
119
119
  storage = GCSStorage(
120
120
  url=storage_settings.gcs_endpoint_url,
121
121
  account_credentials=storage_settings.gcs_base64_creds,
122
- bucket=storage_settings.gcs_bucket,
122
+ bucket=storage_settings.gcs_bucket, # type: ignore
123
123
  location=storage_settings.gcs_location,
124
124
  project=storage_settings.gcs_project,
125
125
  executor=ThreadPoolExecutor(1),
@@ -155,6 +155,9 @@ async def _create_storage(gcs_scopes: Optional[List[str]] = None) -> Storage:
155
155
  elif storage_settings.file_backend == FileBackendConfig.GCS:
156
156
  from nucliadb_utils.storages.gcs import GCSStorage
157
157
 
158
+ if storage_settings.gcs_bucket is None:
159
+ raise ConfigurationError("GCS_BUCKET env var not configured")
160
+
158
161
  gcsutil = GCSStorage(
159
162
  url=storage_settings.gcs_endpoint_url,
160
163
  account_credentials=storage_settings.gcs_base64_creds,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nucliadb_utils
3
- Version: 6.8.1.post4926
3
+ Version: 6.8.1.post4939
4
4
  Summary: NucliaDB util library
5
5
  Author-email: Nuclia <nucliadb@nuclia.com>
6
6
  License-Expression: AGPL-3.0-or-later
@@ -27,8 +27,8 @@ Requires-Dist: nats-py[nkeys]>=2.6.0
27
27
  Requires-Dist: PyNaCl
28
28
  Requires-Dist: pyjwt>=2.4.0
29
29
  Requires-Dist: mrflagly>=0.2.9
30
- Requires-Dist: nucliadb-protos>=6.8.1.post4926
31
- Requires-Dist: nucliadb-telemetry>=6.8.1.post4926
30
+ Requires-Dist: nucliadb-protos>=6.8.1.post4939
31
+ Requires-Dist: nucliadb-telemetry>=6.8.1.post4939
32
32
  Provides-Extra: cache
33
33
  Requires-Dist: redis>=4.3.4; extra == "cache"
34
34
  Requires-Dist: orjson>=3.6.7; extra == "cache"
@@ -15,7 +15,7 @@ nucliadb_utils/settings.py,sha256=lZUCliwNKYfk_Tt0KiYeHsT4jRBG0gLAompuHWu9fBI,82
15
15
  nucliadb_utils/signals.py,sha256=lo_Mk12NIX5Au--3H3WObvDOXq_OMurql2qiC2TnAao,2676
16
16
  nucliadb_utils/store.py,sha256=kQ35HemE0v4_Qg6xVqNIJi8vSFAYQtwI3rDtMsNy62Y,890
17
17
  nucliadb_utils/transaction.py,sha256=l3ZvrITYMnAs_fv1OOC-1nDZxWPG5qmbBhzvuC3DUzQ,8039
18
- nucliadb_utils/utilities.py,sha256=-7iNa3My7gpIrpJDgswgQ9rx8pumkv-wyWUiG6pJxVo,15659
18
+ nucliadb_utils/utilities.py,sha256=D07dg5Ycm1sYkO65OqxHG9TqDfvBoy91143iln8pOhA,15782
19
19
  nucliadb_utils/aiopynecone/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZVV_MJn4bIXMa20ks,835
20
20
  nucliadb_utils/aiopynecone/client.py,sha256=MPyHnDXwhukJr7U3CJh7BpsekfSuOkyM4g5b9LLtzc8,22941
21
21
  nucliadb_utils/aiopynecone/exceptions.py,sha256=fUErx3ceKQK1MUbOnYcZhIzpNe8UVAptZE9JIRDLXDE,4000
@@ -40,12 +40,12 @@ nucliadb_utils/nuclia_usage/utils/__init__.py,sha256=cp15ZcFnHvpcu_5-aK2A4uUyvuZ
40
40
  nucliadb_utils/nuclia_usage/utils/kb_usage_report.py,sha256=6lLuxCCPQVn3dOuZNL5ThPjl2yws-1TJ_7duhQSWkPU,3934
41
41
  nucliadb_utils/storages/__init__.py,sha256=5Qc8AUWiJv9_JbGCBpAn88AIJhwDlm0OPQpg2ZdRL4U,872
42
42
  nucliadb_utils/storages/azure.py,sha256=EEUyi-2c69FQz8iPhKixkZDp8xVMgMFGEPaZDVuillc,17429
43
- nucliadb_utils/storages/exceptions.py,sha256=GOPKH-F3dPTfHEkwGNfVkSfF70eWJJXjI83yccw9WpA,2501
44
- nucliadb_utils/storages/gcs.py,sha256=XbtX0Lt3GO7kzuJ1E5CdazlpSqjU46Bhhezq32VQUok,29041
43
+ nucliadb_utils/storages/exceptions.py,sha256=07Isip18qxkEynGz28AkO2BBC34b_zjL5dEUzeSC2OU,2451
44
+ nucliadb_utils/storages/gcs.py,sha256=VyT72My34N4pEMmrQc5wdAMNLiuqpYl8OW3d50cJfSA,28222
45
45
  nucliadb_utils/storages/local.py,sha256=2aCHpZymORG_dUc1FDq0VFcgQulu0w2pZiUaj9dphFs,11686
46
46
  nucliadb_utils/storages/nuclia.py,sha256=vEv94xAT7QM2g80S25QyrOw2pzvP2BAX-ADgZLtuCVc,2097
47
47
  nucliadb_utils/storages/object_store.py,sha256=2PueRP5Q3XOuWgKhj6B9Kp2fyBql5np0T400YRUbqn4,4535
48
- nucliadb_utils/storages/s3.py,sha256=1mMXfC0hCJLlVnw-B_WWPWZrNyeYzW3bqQm3u-EE9T8,21707
48
+ nucliadb_utils/storages/s3.py,sha256=eFFVRgNTIxTz1Hpmd6ofRz9KQhPJAmiyetW4EmWN8EM,21835
49
49
  nucliadb_utils/storages/settings.py,sha256=ugCPy1zxBOmA2KosT-4tsjpvP002kg5iQyi42yCGCJA,1285
50
50
  nucliadb_utils/storages/storage.py,sha256=2EIgnaCN5XzKpienounOjQ2AX3ANtQA2Xgl6hnMpHr4,21951
51
51
  nucliadb_utils/storages/utils.py,sha256=F4Iboa_0_bhDQr-JOKD9sGPld_-hKwJW5ptyZdn9Oag,1505
@@ -53,11 +53,11 @@ nucliadb_utils/tests/__init__.py,sha256=Oo9CAE7B0eW5VHn8sHd6o30SQzOWUhktLPRXdlDO
53
53
  nucliadb_utils/tests/asyncbenchmark.py,sha256=vrX_x9ifCXi18PfNShc23w9x_VUiB_Ph-2nuolh9z3Q,10707
54
54
  nucliadb_utils/tests/azure.py,sha256=NvMrPG6gfbpDE0m_aZgaa7eorbmA1r9rhAsAANhMlJk,4494
55
55
  nucliadb_utils/tests/fixtures.py,sha256=4lzz-khYvbGzdbT18IG6KKg40f7CVex2q3ho88I-jL8,3799
56
- nucliadb_utils/tests/gcs.py,sha256=MBMzn_UHU5SU6iILuCsB5zU4umhNcaCw_MKrxZhwvOc,4705
56
+ nucliadb_utils/tests/gcs.py,sha256=JNqp5ymeNNU9Ci8rNYTh7-VqP4fjybElhyB3ap7EV1c,4721
57
57
  nucliadb_utils/tests/local.py,sha256=z9E11_ol1mu7N8Y6PkjKl-WMPPMl7JqQbDj3uhVa1A0,1933
58
58
  nucliadb_utils/tests/nats.py,sha256=RWHjwqq5esuO7OFbP24yYX1cXnpPLcWJwDUdmwCpH28,1897
59
59
  nucliadb_utils/tests/s3.py,sha256=kz9ULxrAYLVslZ59I8dtweZ9DJz5R8Ioy2XYrveZzHw,3829
60
- nucliadb_utils-6.8.1.post4926.dist-info/METADATA,sha256=knYfMAcN1DAT8940phgY66M4QnJy7uv1icsDc09IJkg,2180
61
- nucliadb_utils-6.8.1.post4926.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
- nucliadb_utils-6.8.1.post4926.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
63
- nucliadb_utils-6.8.1.post4926.dist-info/RECORD,,
60
+ nucliadb_utils-6.8.1.post4939.dist-info/METADATA,sha256=_Hc2_6N9wNVxBxy-GAJpiK4wZDy-1Xw5av_UvYeVBwI,2180
61
+ nucliadb_utils-6.8.1.post4939.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
62
+ nucliadb_utils-6.8.1.post4939.dist-info/top_level.txt,sha256=fE3vJtALTfgh7bcAWcNhcfXkNPp_eVVpbKK-2IYua3E,15
63
+ nucliadb_utils-6.8.1.post4939.dist-info/RECORD,,