lamindb_setup 0.72.1__py2.py3-none-any.whl → 0.73.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb_setup/__init__.py CHANGED
@@ -34,7 +34,7 @@ Modules & settings:
34
34
 
35
35
  """
36
36
 
37
- __version__ = "0.72.1" # denote a release candidate for 0.1.0 with 0.1rc1
37
+ __version__ = "0.73.0" # denote a release candidate for 0.1.0 with 0.1rc1
38
38
 
39
39
  import sys
40
40
  from os import name as _os_name
@@ -56,6 +56,7 @@ def register_storage_in_instance(ssettings: StorageSettings):
56
56
  "region": ssettings.region,
57
57
  "instance_uid": instance_uid,
58
58
  "created_by_id": current_user_id(),
59
+ "run": None,
59
60
  }
60
61
  if ssettings._uid is not None:
61
62
  defaults["uid"] = ssettings._uid
@@ -22,6 +22,10 @@ else:
22
22
  HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
23
23
 
24
24
 
25
+ def _keep_trailing_slash(path_str: str):
26
+ return path_str if path_str[-1] == "/" else path_str + "/"
27
+
28
+
25
29
  AWS_CREDENTIALS_EXPIRATION = 11 * 60 * 60 # refresh credentials after 11 hours
26
30
 
27
31
 
@@ -81,7 +85,10 @@ class AWSCredentialsManager:
81
85
  return S3Path(path, cache_regions=cache_regions, **connection_options)
82
86
 
83
87
  def enrich_path(self, path: S3Path, access_token: str | None = None) -> S3Path:
84
- path_str = path.as_posix().rstrip("/")
88
+ # trailing slash is needed to avoid returning incorrect results
89
+ # with .startswith
90
+ # for example s3://lamindata-eu should not receive cache for s3://lamindata
91
+ path_str = _keep_trailing_slash(path.as_posix())
85
92
  root = self._find_root(path_str)
86
93
 
87
94
  if root is not None:
@@ -127,7 +134,7 @@ class AWSCredentialsManager:
127
134
  # write the bucket for everything else
128
135
  root = path._url.netloc
129
136
  root = "s3://" + root
130
- self._set_cached_credentials(root, credentials)
137
+ self._set_cached_credentials(_keep_trailing_slash(root), credentials)
131
138
 
132
139
  return self._path_inject_options(path, credentials)
133
140
 
@@ -5,6 +5,7 @@ import shutil
5
5
  from pathlib import Path
6
6
  from typing import TYPE_CHECKING, Literal
7
7
 
8
+ from django.db.utils import ProgrammingError
8
9
  from lamin_utils import logger
9
10
 
10
11
  from ._hub_client import call_with_fallback
@@ -106,8 +107,15 @@ class InstanceSettings:
106
107
  local_records = Storage.objects.filter(root=local_root)
107
108
  else:
108
109
  local_records = Storage.objects.filter(type="local")
110
+ all_local_records = local_records.all()
111
+ try:
112
+ # trigger an error in case of a migration issue
113
+ all_local_records.first()
114
+ except ProgrammingError:
115
+ logger.error("not able to load Storage registry: please migrate")
116
+ return None
109
117
  found = False
110
- for record in local_records.all():
118
+ for record in all_local_records:
111
119
  root_path = Path(record.root)
112
120
  if root_path.exists():
113
121
  marker_path = root_path / ".lamindb/_is_initialized"
@@ -37,33 +37,36 @@ def base62(n_char: int) -> str:
37
37
  return id
38
38
 
39
39
 
40
- def get_storage_region(storage_root: UPathStr) -> str | None:
41
- storage_root_str = str(storage_root)
42
- if storage_root_str.startswith("s3://"):
43
- import botocore.session as session
40
+ def get_storage_region(path: UPathStr) -> str | None:
41
+ path_str = str(path)
42
+ if path_str.startswith("s3://"):
43
+ import botocore.session
44
44
  from botocore.config import Config
45
- from botocore.exceptions import NoCredentialsError
45
+ from botocore.exceptions import ClientError
46
46
 
47
47
  # strip the prefix and any suffixes of the bucket name
48
- bucket = storage_root_str.replace("s3://", "").split("/")[0]
49
- s3_session = session.get_session()
50
- s3_client = s3_session.create_client("s3")
48
+ bucket = path_str.replace("s3://", "").split("/")[0]
49
+ session = botocore.session.get_session()
50
+ credentials = session.get_credentials()
51
+ if credentials is None or credentials.access_key is None:
52
+ config = Config(signature_version=botocore.session.UNSIGNED)
53
+ else:
54
+ config = None
55
+ s3_client = session.create_client("s3", config=config)
51
56
  try:
52
57
  response = s3_client.head_bucket(Bucket=bucket)
53
- except NoCredentialsError: # deal with anonymous access
54
- s3_client = s3_session.create_client(
55
- "s3", config=Config(signature_version=session.UNSIGNED)
56
- )
57
- response = s3_client.head_bucket(Bucket=bucket)
58
- storage_region = response["ResponseMetadata"].get("HTTPHeaders", {})[
59
- "x-amz-bucket-region"
60
- ]
61
- # if we want to except botcore.exceptions.ClientError to reformat an
62
- # error message, this is how to do test for the "NoSuchBucket" error:
63
- # exc.response["Error"]["Code"] == "NoSuchBucket"
58
+ except ClientError as exc:
59
+ response = getattr(exc, "response", {})
60
+ if response.get("Error", {}).get("Code") == "404":
61
+ raise exc
62
+ region = (
63
+ response.get("ResponseMetadata", {})
64
+ .get("HTTPHeaders", {})
65
+ .get("x-amz-bucket-region")
66
+ )
64
67
  else:
65
- storage_region = None
66
- return storage_region
68
+ region = None
69
+ return region
67
70
 
68
71
 
69
72
  def mark_storage_root(root: UPathStr, uid: str):
@@ -53,6 +53,7 @@ def write_bionty_sources(isettings: InstanceSettings) -> None:
53
53
  kwargs["species"] = kwargs.pop("organism")
54
54
  elif hasattr(PublicSource, "organism") and "species" in kwargs:
55
55
  kwargs["organism"] = kwargs.pop("species")
56
+ kwargs["run"] = None # can't yet access tracking information
56
57
  record = PublicSource(**kwargs)
57
58
  all_records.append(record)
58
59
 
@@ -155,7 +155,10 @@ def create_mapper(
155
155
 
156
156
 
157
157
  def print_hook(size: int, value: int, objectname: str, action: str):
158
- progress_in_percent = (value / size) * 100
158
+ if size == 0:
159
+ progress_in_percent = 100.0
160
+ else:
161
+ progress_in_percent = (value / size) * 100
159
162
  out = f"... {action} {objectname}:" f" {min(progress_in_percent, 100):4.1f}%"
160
163
  if "NBPRJ_TEST_NBPATH" not in os.environ:
161
164
  end = "\n" if progress_in_percent >= 100 else "\r"
@@ -238,50 +241,75 @@ class ChildProgressCallback(fsspec.callbacks.Callback):
238
241
  self.parent.update_relative_value(inc)
239
242
 
240
243
  def relative_update(self, inc=1):
241
- self.parent_update(inc / self.size)
244
+ if self.size != 0:
245
+ self.parent_update(inc / self.size)
246
+ else:
247
+ self.parent_update(1)
242
248
 
243
249
 
244
- def download_to(self, path: UPathStr, print_progress: bool = False, **kwargs):
245
- """Download to a path."""
250
+ def download_to(self, local_path: UPathStr, print_progress: bool = True, **kwargs):
251
+ """Download from self (a destination in the cloud) to the local path."""
252
+ if "recursive" not in kwargs:
253
+ kwargs["recursive"] = True
246
254
  if print_progress and "callback" not in kwargs:
247
255
  callback = ProgressCallback(
248
- PurePosixPath(path).name, "downloading", adjust_size=True
256
+ PurePosixPath(local_path).name, "downloading", adjust_size=True
249
257
  )
250
258
  kwargs["callback"] = callback
251
259
 
252
- self.fs.download(str(self), str(path), **kwargs)
260
+ self.fs.download(str(self), str(local_path), **kwargs)
253
261
 
254
262
 
255
263
  def upload_from(
256
264
  self,
257
- path: UPathStr,
258
- dir_inplace: bool = False,
259
- print_progress: bool = False,
265
+ local_path: UPathStr,
266
+ create_folder: bool | None = None,
267
+ print_progress: bool = True,
260
268
  **kwargs,
261
- ):
262
- """Upload from a local path."""
263
- path = Path(path)
264
- path_is_dir = path.is_dir()
265
- if not path_is_dir:
266
- dir_inplace = False
269
+ ) -> UPath:
270
+ """Upload from the local path to `self` (a destination in the cloud).
271
+
272
+ If the local path is a directory, recursively upload its contents.
273
+
274
+ Args:
275
+ local_path: A local path of a file or directory.
276
+ create_folder: Only applies if `local_path` is a directory and then
277
+ defaults to `True`. If `True`, make a new folder in the destination
278
+ using the directory name of `local_path`. If `False`, upload the
279
+ contents of the directory to to the root-level of the destination.
280
+ print_progress: Print progress.
281
+
282
+ Returns:
283
+ The destination path.
284
+ """
285
+ local_path = Path(local_path)
286
+ local_path_is_dir = local_path.is_dir()
287
+ if create_folder is None:
288
+ create_folder = local_path_is_dir
289
+ if create_folder and not local_path_is_dir:
290
+ raise ValueError("create_folder can only be True if local_path is a directory")
267
291
 
268
292
  if print_progress and "callback" not in kwargs:
269
- callback = ProgressCallback(path.name, "uploading")
293
+ callback = ProgressCallback(local_path.name, "uploading")
270
294
  kwargs["callback"] = callback
271
295
 
272
- if dir_inplace:
273
- source = [f for f in path.rglob("*") if f.is_file()]
274
- destination = [str(self / f.relative_to(path)) for f in source]
296
+ if local_path_is_dir and not create_folder:
297
+ source = [f for f in local_path.rglob("*") if f.is_file()]
298
+ # convert_pathlike is needed to remove the trailing slash because
299
+ # UPath("s3://some-bucket/some-folder/") / "some-key"
300
+ # results in UPath("s3://some-bucket/some-folder//some-key")
301
+ # for upath 0.1.4
302
+ dest_root = convert_pathlike(self) if self._parts[-1] == "" else self
303
+ destination = [str(dest_root / f.relative_to(local_path)) for f in source]
275
304
  source = [str(f) for f in source] # type: ignore
276
305
  else:
277
- source = str(path) # type: ignore
306
+ source = str(local_path) # type: ignore
278
307
  destination = str(self) # type: ignore
279
- # this weird thing is to avoid s3fs triggering create_bucket in upload
280
- # if dirs are present
281
- # it allows to avoid permission error
282
- if self.protocol != "s3" or not path_is_dir or dir_inplace:
283
- cleanup_cache = False
284
- else:
308
+
309
+ # the below lines are to avoid s3fs triggering create_bucket in upload if
310
+ # dirs are present it allows to avoid permission error
311
+ # would be easier to just
312
+ if self.protocol == "s3" and local_path_is_dir and create_folder:
285
313
  bucket = self._url.netloc
286
314
  if bucket not in self.fs.dircache:
287
315
  self.fs.dircache[bucket] = [{}]
@@ -290,14 +318,23 @@ def upload_from(
290
318
  cleanup_cache = True
291
319
  else:
292
320
  cleanup_cache = False
321
+ else:
322
+ cleanup_cache = False
293
323
 
294
- self.fs.upload(source, destination, **kwargs)
324
+ self.fs.upload(source, destination, recursive=create_folder, **kwargs)
295
325
 
296
326
  if cleanup_cache:
297
327
  # normally this is invalidated after the upload but still better to check
298
328
  if bucket in self.fs.dircache:
299
329
  del self.fs.dircache[bucket]
300
330
 
331
+ if local_path_is_dir and create_folder:
332
+ # convert_pathlike is needed to remove the trailing slash
333
+ dest_root = convert_pathlike(self) if self._parts[-1] == "" else self
334
+ return dest_root / local_path.name
335
+ else:
336
+ return self
337
+
301
338
 
302
339
  def synchronize(
303
340
  self,
@@ -305,14 +342,14 @@ def synchronize(
305
342
  error_no_origin: bool = True,
306
343
  print_progress: bool = False,
307
344
  callback: fsspec.callbacks.Callback | None = None,
308
- **kwargs,
345
+ timestamp: float | None = None,
309
346
  ):
310
347
  """Sync to a local destination path."""
311
348
  # optimize the number of network requests
312
- if "timestamp" in kwargs:
349
+ if timestamp is not None:
313
350
  is_dir = False
314
351
  exists = True
315
- cloud_mts = kwargs.pop("timestamp")
352
+ cloud_mts = timestamp
316
353
  else:
317
354
  # perform only one network request to check existence, type and timestamp
318
355
  try:
@@ -379,7 +416,7 @@ def synchronize(
379
416
  destination = objectpath / file_key
380
417
  child = callback.branched(origin, destination.as_posix())
381
418
  UPath(origin, **self._kwargs).synchronize(
382
- destination, timestamp=timestamp, callback=child, **kwargs
419
+ destination, callback=child, timestamp=timestamp
383
420
  )
384
421
  child.close()
385
422
  if destination_exists:
@@ -400,15 +437,16 @@ def synchronize(
400
437
  callback = ProgressCallback.requires_progress(
401
438
  callback, print_progress, objectpath.name, "synchronizing"
402
439
  )
403
- kwargs["callback"] = callback
404
440
  if objectpath.exists():
405
- local_mts = objectpath.stat().st_mtime # type: ignore
406
- need_synchronize = cloud_mts > local_mts
441
+ local_mts_obj = objectpath.stat().st_mtime # type: ignore
442
+ need_synchronize = cloud_mts > local_mts_obj
407
443
  else:
408
444
  objectpath.parent.mkdir(parents=True, exist_ok=True)
409
445
  need_synchronize = True
410
446
  if need_synchronize:
411
- self.download_to(objectpath, **kwargs)
447
+ self.download_to(
448
+ objectpath, recursive=False, print_progress=False, callback=callback
449
+ )
412
450
  os.utime(objectpath, times=(cloud_mts, cloud_mts))
413
451
  else:
414
452
  # nothing happens if parent_update is not defined
@@ -477,7 +515,7 @@ def compute_file_tree(
477
515
  if child_path.is_dir():
478
516
  if include_dirs and child_path not in include_dirs:
479
517
  continue
480
- yield prefix + pointer + child_path.name
518
+ yield prefix + pointer + child_path.name + "/"
481
519
  n_directories += 1
482
520
  n_files_per_dir_and_type = defaultdict(lambda: 0)
483
521
  extension = branch if pointer == tee else space
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb_setup
3
- Version: 0.72.1
3
+ Version: 0.73.0
4
4
  Summary: Setup & configure LaminDB.
5
5
  Author-email: Lamin Labs <laminlabs@gmail.com>
6
6
  Description-Content-Type: text/markdown
@@ -1,4 +1,4 @@
1
- lamindb_setup/__init__.py,sha256=Uw5mXYU3C6xw4fCmayp05RMBoM0iSQgj2YtJmBbzvxc,1542
1
+ lamindb_setup/__init__.py,sha256=yI2JVyxdrqTPdboJL65uyR5-TzhBhh56tP278f4QY5s,1542
2
2
  lamindb_setup/_cache.py,sha256=wA7mbysANwe8hPNbjDo9bOmXJ0xIyaS5iyxIpxSWji4,846
3
3
  lamindb_setup/_check.py,sha256=28PcG8Kp6OpjSLSi1r2boL2Ryeh6xkaCL87HFbjs6GA,129
4
4
  lamindb_setup/_check_setup.py,sha256=cNEL9Q4yPpmEkGKHH8JgullWl1VUZwALJ4RHn9wZypY,2613
@@ -8,7 +8,7 @@ lamindb_setup/_delete.py,sha256=Y8KSFYgY0UHAvjd7cCL6hZ_XiLeJwx50BguVATcj_Xo,5524
8
8
  lamindb_setup/_django.py,sha256=EoyWvFzH0i9wxjy4JZhcoXCTckztP_Mrl6FbYQnMmLE,1534
9
9
  lamindb_setup/_exportdb.py,sha256=uTIZjKKTB7arzEr1j0O6lONiT2pRBKeOFdLvOV8ZwzE,2120
10
10
  lamindb_setup/_importdb.py,sha256=yYYShzUajTsR-cTW4CZ-UNDWZY2uE5PAgNbp-wn8Ogc,1874
11
- lamindb_setup/_init_instance.py,sha256=lR-6txbf3Z2O7ki-DMZWFg36QNiUZ_B5lc6JXjScdus,11897
11
+ lamindb_setup/_init_instance.py,sha256=phw7HW2HHtcQRYXXcHq6O_LyQgSFnotLm1mdP3EvD1U,11918
12
12
  lamindb_setup/_migrate.py,sha256=4nBTFg5-BK4A2gH-D3_tcFf8EtvMnIo5Mq0e_C6_9-U,8815
13
13
  lamindb_setup/_register_instance.py,sha256=Jeu0wyvJVSVQ_n-A_7yn7xOZIP0ncJD92DRABqzPIjA,940
14
14
  lamindb_setup/_schema.py,sha256=b3uzhhWpV5mQtDwhMINc2MabGCnGLESy51ito3yl6Wc,679
@@ -16,7 +16,7 @@ lamindb_setup/_set_managed_storage.py,sha256=mNZrANn-9rwZ0oGWxxg0wS0T0VOQCWyo4nS
16
16
  lamindb_setup/_setup_user.py,sha256=6Oc7Rke-yRQSZbuntdUAz8QbJ6UuPzYHI9FnYlf_q-A,3670
17
17
  lamindb_setup/_silence_loggers.py,sha256=AKF_YcHvX32eGXdsYK8MJlxEaZ-Uo2f6QDRzjKFCtws,1568
18
18
  lamindb_setup/core/__init__.py,sha256=dV9S-rQpNK9JcBn4hiEmiLnmNqfpPFJD9pqagMCaIew,416
19
- lamindb_setup/core/_aws_credentials.py,sha256=84kugTtELFACJ4SGeTqT-ul0FbOPTe07P0xptv2tOPo,4972
19
+ lamindb_setup/core/_aws_credentials.py,sha256=nK04-lNYz6MYDgD6Z56peoxCULZ82uFDRMzjwsPh25U,5293
20
20
  lamindb_setup/core/_aws_storage.py,sha256=nEjeUv4xUVpoV0Lx-zjjmyb9w804bDyaeiM-OqbfwM0,1799
21
21
  lamindb_setup/core/_deprecated.py,sha256=3qxUI1dnDlSeR0BYrv7ucjqRBEojbqotPgpShXs4KF8,2520
22
22
  lamindb_setup/core/_docs.py,sha256=3k-YY-oVaJd_9UIY-LfBg_u8raKOCNfkZQPA73KsUhs,276
@@ -25,20 +25,20 @@ lamindb_setup/core/_hub_core.py,sha256=RGjTqf1owuWmkXAYy0EPaoHAaJ-0T0hAidkqa3cId
25
25
  lamindb_setup/core/_hub_crud.py,sha256=b1XF7AJpM9Q-ttm9nPG-r3OTRWHQaGzAGIyvmb83NTo,4859
26
26
  lamindb_setup/core/_hub_utils.py,sha256=b_M1LkdCjiMWm1EOlSb9GuPdLijwVgQDtATTpeZuXI0,1875
27
27
  lamindb_setup/core/_settings.py,sha256=jjZ_AxRXB3Y3UP6m04BAw_dhFbJbdg2-nZWmEv2LNZ8,3141
28
- lamindb_setup/core/_settings_instance.py,sha256=w5SBrp6nIJMegzNJSnfQl8HvqEtwgmR7OrayffVedLc,16612
28
+ lamindb_setup/core/_settings_instance.py,sha256=kda3kqUtwh-XZMDmbdFIp8RGGFw0Az8T8g2SKEU65mo,16949
29
29
  lamindb_setup/core/_settings_load.py,sha256=NGgCDpN85j1EqoKlrYFIlZBMlBJm33gx2-wc96CP_ZQ,3922
30
30
  lamindb_setup/core/_settings_save.py,sha256=d1A-Ex-7H08mb8l7I0Oe0j0GilrfaDuprh_NMxhQAsQ,2704
31
- lamindb_setup/core/_settings_storage.py,sha256=7f0jt1zcSltpOYDPQ5CVvbBon_d7aneKTte935-2REY,13236
31
+ lamindb_setup/core/_settings_storage.py,sha256=7IUZh0RJgVy9fQKwovXVFQbh8IkU5IJxaLzO2S-mIYk,13137
32
32
  lamindb_setup/core/_settings_store.py,sha256=dagS5c7wAMRnuZTRfCU4sKaIOyF_HwAP5Fnnn8vphno,2084
33
33
  lamindb_setup/core/_settings_user.py,sha256=P2lC4WDRAFfT-Xq3MlXJ-wMKIHCoGNhMTQfRGIAyUNQ,1344
34
- lamindb_setup/core/_setup_bionty_sources.py,sha256=OgPpZxN2_Wffy-ogEBz_97c_k8d2bD-DDVt89-u9GLY,3002
34
+ lamindb_setup/core/_setup_bionty_sources.py,sha256=h_pBANsSGK6ujAFsG21mtADHVJoMLKDR4eGgRP4Fgls,3072
35
35
  lamindb_setup/core/cloud_sqlite_locker.py,sha256=NIBNAGq7TTRrip9OzMdiQKj8QOuwhL9esyM0aehUqBA,6893
36
36
  lamindb_setup/core/django.py,sha256=QUQm3zt5QIiD8uv6o9vbSm_bshqiSWzKSkgD3z2eJCg,3542
37
37
  lamindb_setup/core/exceptions.py,sha256=eoI7AXgATgDVzgArtN7CUvpaMUC067vsBg5LHCsWzDM,305
38
38
  lamindb_setup/core/hashing.py,sha256=7r96h5JBzuwfOR_gNNqTyWNPKMuiOUfBYwn6sCbZkf8,2269
39
39
  lamindb_setup/core/types.py,sha256=bcYnZ0uM_2NXKJCl94Mmc-uYrQlRUUVKG3sK2N-F-N4,532
40
- lamindb_setup/core/upath.py,sha256=QnAiaOZgT1TLUaX0PEs9dSJ0E4ZDD431hCfKrJIbmqQ,26339
41
- lamindb_setup-0.72.1.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
42
- lamindb_setup-0.72.1.dist-info/WHEEL,sha256=Sgu64hAMa6g5FdzHxXv9Xdse9yxpGGMeagVtPMWpJQY,99
43
- lamindb_setup-0.72.1.dist-info/METADATA,sha256=JPuCGY8Fy7wJzFXSh6xaBJVNBOpz2XeDvSLDzwTEXB8,1620
44
- lamindb_setup-0.72.1.dist-info/RECORD,,
40
+ lamindb_setup/core/upath.py,sha256=3rCqpcQY9m09Co2aOLqOAbgdfdJlVyc2Spmyr8fBKAk,28135
41
+ lamindb_setup-0.73.0.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
42
+ lamindb_setup-0.73.0.dist-info/WHEEL,sha256=Sgu64hAMa6g5FdzHxXv9Xdse9yxpGGMeagVtPMWpJQY,99
43
+ lamindb_setup-0.73.0.dist-info/METADATA,sha256=WFMMPhqXq6j9jl5NACjayz-MaN4ey4SVURRo-eb6Dlc,1620
44
+ lamindb_setup-0.73.0.dist-info/RECORD,,