lamindb_setup 0.81.3__py3-none-any.whl → 1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,25 +12,23 @@ def select_instance_by_owner_name(
12
12
  name: str,
13
13
  client: Client,
14
14
  ) -> dict | None:
15
- try:
16
- data = (
17
- client.table("instance")
18
- .select(
19
- "*, account!inner!instance_account_id_28936e8f_fk_account_id(*),"
20
- " storage!inner!storage_instance_id_359fca71_fk_instance_id(*)"
21
- )
22
- .eq("name", name)
23
- .eq("account.handle", owner)
24
- .eq("storage.is_default", True)
25
- .execute()
26
- .data
15
+ # this won't find an instance without the default storage
16
+ data = (
17
+ client.table("instance")
18
+ .select(
19
+ "*, account!inner!instance_account_id_28936e8f_fk_account_id(*),"
20
+ " storage!inner!storage_instance_id_359fca71_fk_instance_id(*)"
27
21
  )
28
- except Exception:
29
- return None
22
+ .eq("name", name)
23
+ .eq("account.handle", owner)
24
+ .eq("storage.is_default", True)
25
+ .execute()
26
+ .data
27
+ )
30
28
  if len(data) == 0:
31
29
  return None
32
30
  result = data[0]
33
- # this is now a list
31
+ # this is a list
34
32
  # assume only one default storage
35
33
  result["storage"] = result["storage"][0]
36
34
  return result
@@ -89,15 +87,22 @@ def select_instance_by_id_with_storage(
89
87
  instance_id: str,
90
88
  client: Client,
91
89
  ):
92
- response = (
90
+ # this won't find an instance without the default storage
91
+ data = (
93
92
  client.table("instance")
94
- .select("*, storage!instance_storage_id_87963cc8_fk_storage_id(*)")
93
+ .select("*, storage!inner!storage_instance_id_359fca71_fk_instance_id(*)")
95
94
  .eq("id", instance_id)
95
+ .eq("storage.is_default", True)
96
96
  .execute()
97
+ .data
97
98
  )
98
- if len(response.data) == 0:
99
+ if len(data) == 0:
99
100
  return None
100
- return response.data[0]
101
+ result = data[0]
102
+ # this is a list
103
+ # assume only one default storage
104
+ result["storage"] = result["storage"][0]
105
+ return result
101
106
 
102
107
 
103
108
  def update_instance(instance_id: str, instance_fields: dict, client: Client):
@@ -139,17 +144,14 @@ def select_collaborator(
139
144
  def select_default_storage_by_instance_id(
140
145
  instance_id: str, client: Client
141
146
  ) -> dict | None:
142
- try:
143
- data = (
144
- client.table("storage")
145
- .select("*")
146
- .eq("instance_id", instance_id)
147
- .eq("is_default", True)
148
- .execute()
149
- .data
150
- )
151
- except Exception:
152
- return None
147
+ data = (
148
+ client.table("storage")
149
+ .select("*")
150
+ .eq("instance_id", instance_id)
151
+ .eq("is_default", True)
152
+ .execute()
153
+ .data
154
+ )
153
155
  if len(data) == 0:
154
156
  return None
155
157
  return data[0]
@@ -164,7 +164,7 @@ class SetupSettings:
164
164
 
165
165
  @property
166
166
  def paths(self) -> type[SetupPaths]:
167
- """Convert cloud paths to lamidb local paths.
167
+ """Convert cloud paths to lamindb local paths.
168
168
 
169
169
  Use `settings.paths.cloud_to_local_no_update`
170
170
  or `settings.paths.cloud_to_local`.
@@ -179,7 +179,7 @@ class SetupSettings:
179
179
  repr = self.user.__repr__()
180
180
  repr += f"\nAuto-connect in Python: {self.auto_connect}\n"
181
181
  repr += f"Private Django API: {self.private_django_api}\n"
182
- repr += f"Cache directory: {self.cache_dir}\n"
182
+ repr += f"Cache directory: {self.cache_dir.as_posix()}\n"
183
183
  if self._instance_exists:
184
184
  repr += self.instance.__repr__()
185
185
  else:
@@ -200,9 +200,14 @@ class SetupPaths:
200
200
  # cache_key is ignored if filepath is a local path
201
201
  if not isinstance(filepath, LocalPathClasses):
202
202
  # settings is defined further in this file
203
- local_filepath = settings.cache_dir / (
204
- filepath.path if cache_key is None else cache_key # type: ignore
205
- )
203
+ if cache_key is None:
204
+ local_key = filepath.path # type: ignore
205
+ protocol = filepath.protocol # type: ignore
206
+ if protocol in {"http", "https"}:
207
+ local_key = local_key.removeprefix(protocol + "://")
208
+ else:
209
+ local_key = cache_key
210
+ local_filepath = settings.cache_dir / local_key
206
211
  else:
207
212
  local_filepath = filepath
208
213
  return local_filepath
@@ -121,7 +121,7 @@ class InstanceSettings:
121
121
  def _search_local_root(
122
122
  self, local_root: str | None = None, mute_warning: bool = False
123
123
  ) -> StorageSettings | None:
124
- from lnschema_core.models import Storage
124
+ from lamindb.models import Storage
125
125
 
126
126
  if local_root is not None:
127
127
  local_records = Storage.objects.filter(root=local_root)
@@ -358,7 +358,7 @@ class InstanceSettings:
358
358
  sqlite_filepath = self.storage.cloud_to_local(
359
359
  self._sqlite_file, error_no_origin=False
360
360
  )
361
- return f"sqlite:///{sqlite_filepath}"
361
+ return f"sqlite:///{sqlite_filepath.as_posix()}"
362
362
  else:
363
363
  return self._db
364
364
 
@@ -457,11 +457,24 @@ class InstanceSettings:
457
457
  settings._instance_settings = self
458
458
 
459
459
  def _init_db(self):
460
+ from lamindb_setup import _check_setup
461
+
460
462
  from .django import setup_django
461
463
 
464
+ _check_setup.IS_LOADING = True
462
465
  setup_django(self, init=True)
466
+ _check_setup.IS_LOADING = False
467
+
468
+ from lamindb.models import Space
469
+
470
+ Space.objects.get_or_create(
471
+ name="All",
472
+ description="Every team & user with access to the instance has access.",
473
+ )
463
474
 
464
475
  def _load_db(self) -> tuple[bool, str]:
476
+ from lamindb_setup import _check_setup
477
+
465
478
  # Is the database available and initialized as LaminDB?
466
479
  # returns a tuple of status code and message
467
480
  if self.dialect == "sqlite" and not self._sqlite_file.exists():
@@ -472,7 +485,6 @@ class InstanceSettings:
472
485
  f" {legacy_file} to {self._sqlite_file}"
473
486
  )
474
487
  return False, f"SQLite file {self._sqlite_file} does not exist"
475
- from lamindb_setup import settings # to check user
476
488
 
477
489
  from .django import setup_django
478
490
 
@@ -481,5 +493,7 @@ class InstanceSettings:
481
493
  # setting up django also performs a check for migrations & prints them
482
494
  # as warnings
483
495
  # this should fail, e.g., if the db is not reachable
496
+ _check_setup.IS_LOADING = True
484
497
  setup_django(self)
498
+ _check_setup.IS_LOADING = False
485
499
  return True, ""
@@ -7,6 +7,7 @@ import string
7
7
  from pathlib import Path
8
8
  from typing import TYPE_CHECKING, Any, Literal
9
9
 
10
+ import fsspec
10
11
  from lamin_utils import logger
11
12
 
12
13
  from ._aws_credentials import HOSTED_REGIONS, get_aws_credentials_manager
@@ -24,6 +25,10 @@ if TYPE_CHECKING:
24
25
 
25
26
  IS_INITIALIZED_KEY = ".lamindb/_is_initialized"
26
27
 
28
+ # a list of supported fsspec protocols
29
+ # rename file to local before showing to a user
30
+ VALID_PROTOCOLS = ("file", "gs", "s3", "hf", "http", "https")
31
+
27
32
 
28
33
  def base62(n_char: int) -> str:
29
34
  """Like nanoid without hyphen and underscore."""
@@ -114,16 +119,11 @@ def init_storage(
114
119
  root_str = f"s3://lamin-{region}/{uid}"
115
120
  else:
116
121
  root_str = f"s3://lamin-hosted-test/{uid}"
117
- elif root_str.startswith(("gs://", "s3://", "hf://")):
118
- pass
119
- else: # local path
120
- try:
121
- _ = Path(root_str)
122
- except Exception as e:
123
- logger.error(
124
- "`storage` is not a valid local, GCP storage, AWS S3 path or Hugging Face path"
125
- )
126
- raise e
122
+ elif (input_protocol := fsspec.utils.get_protocol(root_str)) not in VALID_PROTOCOLS:
123
+ valid_protocols = ("local",) + VALID_PROTOCOLS[1:] # show local instead of file
124
+ raise ValueError(
125
+ f"Protocol {input_protocol} is not supported, valid protocols are {', '.join(valid_protocols)}"
126
+ )
127
127
  ssettings = StorageSettings(
128
128
  uid=uid,
129
129
  root=root_str,
@@ -227,10 +227,10 @@ class StorageSettings:
227
227
 
228
228
  @property
229
229
  def record(self) -> Any:
230
- """Storage record in current instance."""
230
+ """Storage record in the current instance."""
231
231
  if self._record is None:
232
232
  # dynamic import because of import order
233
- from lnschema_core.models import Storage
233
+ from lamindb.models import Storage
234
234
 
235
235
  from ._settings import settings
236
236
 
@@ -299,14 +299,15 @@ class StorageSettings:
299
299
  return self._region
300
300
 
301
301
  @property
302
- def type(self) -> Literal["local", "s3", "gs"]:
302
+ def type(self) -> Literal["local", "s3", "gs", "hf", "http", "https"]:
303
303
  """AWS S3 vs. Google Cloud vs. local.
304
304
 
305
- Returns the protocol as a string: "local", "s3", "gs".
305
+ Returns the protocol as a string: "local", "s3", "gs", "http", "https".
306
306
  """
307
307
  import fsspec
308
308
 
309
309
  convert = {"file": "local"}
310
+ # init_storage checks that the root protocol belongs to VALID_PROTOCOLS
310
311
  protocol = fsspec.utils.get_protocol(self.root_as_str)
311
312
  return convert.get(protocol, protocol) # type: ignore
312
313
 
@@ -345,5 +346,5 @@ class StorageSettings:
345
346
  return self.root / filekey
346
347
 
347
348
  def local_filepath(self, filekey: UPathStr) -> UPath:
348
- """Local (cache) filepath from filekey: `local(filepath(...))`."""
349
+ """Local (cache) filepath from filekey."""
349
350
  return self.cloud_to_local(self.key_to_filepath(filekey))
@@ -48,7 +48,7 @@ class UserSettings:
48
48
  @property
49
49
  def id(self):
50
50
  """Integer id valid in current intance."""
51
- from lnschema_core.users import current_user_id
51
+ from lamindb.base.users import current_user_id
52
52
 
53
53
  # there is no cache needed here because current_user_id()
54
54
  # has its own cache
@@ -5,10 +5,8 @@ import builtins
5
5
  import os
6
6
  from pathlib import Path
7
7
  import time
8
- from lamin_utils import logger
9
- from ._settings_store import current_instance_settings_file
10
8
  from ._settings_instance import InstanceSettings
11
- import sys
9
+
12
10
 
13
11
  IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
14
12
  IS_SETUP = False
@@ -12,6 +12,7 @@ from __future__ import annotations
12
12
 
13
13
  import base64
14
14
  import hashlib
15
+ import json
15
16
  from concurrent.futures import ThreadPoolExecutor
16
17
  from typing import TYPE_CHECKING
17
18
 
@@ -40,11 +41,21 @@ def b16_to_b64(s: str):
40
41
  return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
41
42
 
42
43
 
44
+ def hash_string(string: str) -> str:
45
+ # as we're truncating (not here) at 22 b64, we choose md5 over sha512
46
+ return to_b64_str(hashlib.md5(string.encode("utf-8")).digest())
47
+
48
+
43
49
  # a lot to read about this: lamin-notes/2022/hashing
44
50
  def hash_set(s: set[str]) -> str:
45
- bstr = ":".join(sorted(s)).encode("utf-8")
46
- # as we're truncating at 22 b64, we choose md5 over sha512
47
- return to_b64_str(hashlib.md5(bstr).digest())[:HASH_LENGTH]
51
+ join_s = ":".join(sorted(s))
52
+ return hash_string(join_s)[:HASH_LENGTH]
53
+
54
+
55
+ def hash_dict(d: dict) -> str:
56
+ return to_b64_str(hashlib.md5(json.dumps(d, sort_keys=True).encode()).digest())[
57
+ :HASH_LENGTH
58
+ ]
48
59
 
49
60
 
50
61
  def hash_from_hashes_list(hashes: Iterable[str]) -> str:
@@ -111,6 +122,6 @@ def hash_dir(path: Path):
111
122
  hashes, sizes = zip(*hashes_sizes)
112
123
 
113
124
  hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
114
- n_objects = len(hashes)
125
+ n_files = len(hashes)
115
126
  size = sum(sizes)
116
- return size, hash, hash_type, n_objects
127
+ return size, hash, hash_type, n_files
@@ -12,14 +12,15 @@ from itertools import islice
12
12
  from pathlib import Path, PosixPath, PurePosixPath, WindowsPath
13
13
  from typing import TYPE_CHECKING, Any, Literal
14
14
 
15
+ import click
15
16
  import fsspec
16
17
  from lamin_utils import logger
17
18
  from upath import UPath
18
- from upath.implementations.cloud import CloudPath, S3Path # keep CloudPath!
19
+ from upath.implementations.cloud import CloudPath # keep CloudPath!
19
20
  from upath.implementations.local import LocalPath
20
21
 
21
22
  from ._aws_credentials import HOSTED_BUCKETS, get_aws_credentials_manager
22
- from .hashing import HASH_LENGTH, b16_to_b64, hash_from_hashes_list
23
+ from .hashing import HASH_LENGTH, b16_to_b64, hash_from_hashes_list, hash_string
23
24
 
24
25
  if TYPE_CHECKING:
25
26
  from .types import UPathStr
@@ -190,8 +191,17 @@ class ProgressCallback(fsspec.callbacks.Callback):
190
191
  pass
191
192
 
192
193
  def update_relative_value(self, inc=1):
193
- self.value += inc
194
- self.call()
194
+ if inc != 0:
195
+ self.value += inc
196
+ self.call()
197
+ else:
198
+ # this is specific to http filesystem
199
+ # for some reason the last update is 0 always
200
+ # sometimes the reported result is less that 100%
201
+ # here 100% is forced manually in this case
202
+ if self.value < 1.0 and self.value >= 0.999:
203
+ self.value = self.size
204
+ self.call()
195
205
 
196
206
  def branch(self, path_1, path_2, kwargs):
197
207
  if self.adjust_size:
@@ -258,7 +268,17 @@ def download_to(self, local_path: UPathStr, print_progress: bool = True, **kwarg
258
268
  )
259
269
  kwargs["callback"] = callback
260
270
 
261
- self.fs.download(str(self), str(local_path), **kwargs)
271
+ cloud_path_str = str(self)
272
+ local_path_str = str(local_path)
273
+ # needed due to https://github.com/fsspec/filesystem_spec/issues/1766
274
+ # otherwise fsspec calls fs._ls_real where it reads the body and parses links
275
+ # so the file is downloaded 2 times
276
+ # upath doesn't call fs.ls to infer type, so it is safe to call
277
+ if self.protocol in {"http", "https"} and self.stat().as_info()["type"] == "file":
278
+ self.fs.use_listings_cache = True
279
+ self.fs.dircache[cloud_path_str] = []
280
+
281
+ self.fs.download(cloud_path_str, local_path_str, **kwargs)
262
282
 
263
283
 
264
284
  def upload_from(
@@ -306,8 +326,7 @@ def upload_from(
306
326
  destination = self.as_posix()
307
327
 
308
328
  # the below lines are to avoid s3fs triggering create_bucket in upload if
309
- # dirs are present it allows to avoid permission error
310
- # would be easier to just
329
+ # dirs are present, it allows to avoid the permission error
311
330
  if self.protocol == "s3" and local_path_is_dir and create_folder:
312
331
  bucket = self.drive
313
332
  if bucket not in self.fs.dircache:
@@ -350,27 +369,19 @@ def synchronize(
350
369
  exists = True
351
370
  cloud_mts = timestamp
352
371
  else:
353
- # hf requires special treatment
354
- if protocol == "hf":
355
- try:
356
- stat_hf = self.stat().as_info()
357
- is_dir = stat_hf["type"] == "directory"
358
- exists = True
359
- if not is_dir:
360
- cloud_mts = stat_hf["last_commit"].date.timestamp()
361
- except FileNotFoundError:
362
- exists = False
363
- else:
364
- # perform only one network request to check existence, type and timestamp
365
- try:
366
- cloud_mts = self.modified.timestamp()
367
- is_dir = False
368
- exists = True
369
- except FileNotFoundError:
370
- exists = False
371
- except IsADirectoryError:
372
- is_dir = True
373
- exists = True
372
+ try:
373
+ cloud_stat = self.stat()
374
+ cloud_info = cloud_stat.as_info()
375
+ exists = True
376
+ is_dir = cloud_info["type"] == "directory"
377
+ if not is_dir:
378
+ # hf requires special treatment
379
+ if protocol == "hf":
380
+ cloud_mts = cloud_info["last_commit"].date.timestamp()
381
+ else:
382
+ cloud_mts = cloud_stat.st_mtime
383
+ except FileNotFoundError:
384
+ exists = False
374
385
 
375
386
  if not exists:
376
387
  warn_or_error = f"The original path {self} does not exist anymore."
@@ -386,6 +397,7 @@ def synchronize(
386
397
  return None
387
398
 
388
399
  # synchronization logic for directories
400
+ # to synchronize directories, it should be possible to get modification times
389
401
  if is_dir:
390
402
  files = self.fs.find(str(self), detail=True)
391
403
  if protocol == "s3":
@@ -451,8 +463,16 @@ def synchronize(
451
463
  callback, print_progress, objectpath.name, "synchronizing"
452
464
  )
453
465
  if objectpath.exists():
454
- local_mts_obj = objectpath.stat().st_mtime # type: ignore
455
- need_synchronize = cloud_mts > local_mts_obj
466
+ if cloud_mts != 0:
467
+ local_mts_obj = objectpath.stat().st_mtime
468
+ need_synchronize = cloud_mts > local_mts_obj
469
+ else:
470
+ # this is true for http for example
471
+ # where size is present but st_mtime is not
472
+ # we assume that any change without the change in size is unlikely
473
+ cloud_size = cloud_stat.st_size
474
+ local_size_obj = objectpath.stat().st_size
475
+ need_synchronize = cloud_size != local_size_obj
456
476
  else:
457
477
  objectpath.parent.mkdir(parents=True, exist_ok=True)
458
478
  need_synchronize = True
@@ -464,7 +484,8 @@ def synchronize(
464
484
  self.download_to(
465
485
  objectpath, recursive=False, print_progress=False, callback=callback
466
486
  )
467
- os.utime(objectpath, times=(cloud_mts, cloud_mts))
487
+ if cloud_mts != 0:
488
+ os.utime(objectpath, times=(cloud_mts, cloud_mts))
468
489
  else:
469
490
  # nothing happens if parent_update is not defined
470
491
  # because of Callback.no_op
@@ -497,7 +518,7 @@ def compute_file_tree(
497
518
  skip_suffixes_tuple = ()
498
519
  else:
499
520
  skip_suffixes_tuple = tuple(skip_suffixes) # type: ignore
500
- n_objects = 0
521
+ n_files = 0
501
522
  n_directories = 0
502
523
 
503
524
  # by default only including registered files
@@ -510,7 +531,7 @@ def compute_file_tree(
510
531
  include_paths = set()
511
532
 
512
533
  def inner(dir_path: Path, prefix: str = "", level: int = -1):
513
- nonlocal n_objects, n_directories, suffixes
534
+ nonlocal n_files, n_directories, suffixes
514
535
  if level == 0:
515
536
  return
516
537
  stripped_dir_path = dir_path.as_posix().rstrip("/")
@@ -543,7 +564,7 @@ def compute_file_tree(
543
564
  suffix = extract_suffix_from_path(child_path)
544
565
  suffixes.add(suffix)
545
566
  n_files_per_dir_and_type[suffix] += 1
546
- n_objects += 1
567
+ n_files += 1
547
568
  if n_files_per_dir_and_type[suffix] == n_max_files_per_dir_and_type:
548
569
  yield prefix + "..."
549
570
  elif n_files_per_dir_and_type[suffix] > n_max_files_per_dir_and_type:
@@ -556,15 +577,15 @@ def compute_file_tree(
556
577
  for line in islice(iterator, n_max_files):
557
578
  folder_tree += f"\n{line}"
558
579
  if next(iterator, None):
559
- folder_tree += f"\n... only showing {n_max_files} out of {n_objects} files"
580
+ folder_tree += f"\n... only showing {n_max_files} out of {n_files} files"
560
581
  directory_info = "directory" if n_directories == 1 else "directories"
561
582
  display_suffixes = ", ".join([f"{suffix!r}" for suffix in suffixes])
562
- suffix_message = f" with suffixes {display_suffixes}" if n_objects > 0 else ""
583
+ suffix_message = f" with suffixes {display_suffixes}" if n_files > 0 else ""
563
584
  message = (
564
585
  f"{n_directories} sub-{directory_info} &"
565
- f" {n_objects} files{suffix_message}\n{path.resolve()}{folder_tree}"
586
+ f" {n_files} files{suffix_message}\n{path.resolve()}{folder_tree}"
566
587
  )
567
- return message, n_objects
588
+ return message, n_files
568
589
 
569
590
 
570
591
  # adapted from: https://stackoverflow.com/questions/9727673
@@ -718,12 +739,26 @@ warnings.filterwarnings(
718
739
  )
719
740
 
720
741
 
721
- def create_path(path: UPath, access_token: str | None = None) -> UPath:
722
- path = UPath(path)
723
- # test whether we have an AWS S3 path
724
- if not isinstance(path, S3Path):
725
- return path
726
- return get_aws_credentials_manager().enrich_path(path, access_token)
742
+ def create_path(path: UPathStr, access_token: str | None = None) -> UPath:
743
+ upath = UPath(path)
744
+
745
+ if upath.protocol == "s3":
746
+ # add managed credentials and other options for AWS s3 paths
747
+ return get_aws_credentials_manager().enrich_path(upath, access_token)
748
+
749
+ if upath.protocol in {"http", "https"}:
750
+ # this is needed because by default aiohttp drops a connection after 5 min
751
+ # so it is impossible to download large files
752
+ client_kwargs = upath.storage_options.get("client_kwargs", {})
753
+ if "timeout" not in client_kwargs:
754
+ from aiohttp import ClientTimeout
755
+
756
+ client_kwargs = {
757
+ **client_kwargs,
758
+ "timeout": ClientTimeout(sock_connect=30, sock_read=30),
759
+ }
760
+ return UPath(upath, client_kwargs=client_kwargs)
761
+ return upath
727
762
 
728
763
 
729
764
  def get_stat_file_cloud(stat: dict) -> tuple[int, str | None, str | None]:
@@ -739,20 +774,28 @@ def get_stat_file_cloud(stat: dict) -> tuple[int, str | None, str | None]:
739
774
  hash = b16_to_b64(stat["blob_id"])
740
775
  hash_type = "sha1"
741
776
  # s3
777
+ # StorageClass is checked to be sure that it is indeed s3
778
+ # because http also has ETag
742
779
  elif "ETag" in stat:
743
780
  etag = stat["ETag"]
744
- # small files
745
- if "-" not in etag:
746
- # only store hash for non-multipart uploads
747
- # we can't rapidly validate multi-part uploaded files client-side
748
- # we can add more logic later down-the-road
749
- hash = b16_to_b64(etag)
750
- hash_type = "md5"
781
+ if "mimetype" in stat:
782
+ # http
783
+ hash = hash_string(etag.strip('"'))
784
+ hash_type = "md5-etag"
751
785
  else:
752
- stripped_etag, suffix = etag.split("-")
753
- suffix = suffix.strip('"')
754
- hash = b16_to_b64(stripped_etag)
755
- hash_type = f"md5-{suffix}" # this is the S3 chunk-hashing strategy
786
+ # s3
787
+ # small files
788
+ if "-" not in etag:
789
+ # only store hash for non-multipart uploads
790
+ # we can't rapidly validate multi-part uploaded files client-side
791
+ # we can add more logic later down-the-road
792
+ hash = b16_to_b64(etag)
793
+ hash_type = "md5"
794
+ else:
795
+ stripped_etag, suffix = etag.split("-")
796
+ suffix = suffix.strip('"')
797
+ hash = b16_to_b64(stripped_etag)
798
+ hash_type = f"md5-{suffix}" # this is the S3 chunk-hashing strategy
756
799
  if hash is not None:
757
800
  hash = hash[:HASH_LENGTH]
758
801
  return size, hash, hash_type
@@ -777,17 +820,18 @@ def get_stat_dir_cloud(path: UPath) -> tuple[int, str | None, str | None, int]:
777
820
  if compute_list_hash:
778
821
  hashes.append(object[accessor].strip('"='))
779
822
  size = sum(sizes)
780
- n_objects = len(sizes)
823
+ n_files = len(sizes)
781
824
  if compute_list_hash:
782
825
  hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
783
- return size, hash, hash_type, n_objects
826
+ return size, hash, hash_type, n_files
784
827
 
785
828
 
786
- class InstanceNotEmpty(Exception):
787
- pass
829
+ class InstanceNotEmpty(click.ClickException):
830
+ def show(self, file=None):
831
+ pass
788
832
 
789
833
 
790
- # is as fast as boto3: https://lamin.ai/laminlabs/lamindata/transform/krGp3hT1f78N5zKv
834
+ # is as fast as boto3: https://lamin.ai/laminlabs/lamin-site-assets/transform/krGp3hT1f78N5zKv
791
835
  def check_storage_is_empty(
792
836
  root: UPathStr, *, raise_error: bool = True, account_for_sqlite_file: bool = False
793
837
  ) -> int:
@@ -810,20 +854,20 @@ def check_storage_is_empty(
810
854
  root_string += "/"
811
855
  directory_string = root_string + ".lamindb"
812
856
  objects = root_upath.fs.find(directory_string)
813
- n_objects = len(objects)
814
- n_diff = n_objects - n_offset_objects
857
+ n_files = len(objects)
858
+ n_diff = n_files - n_offset_objects
815
859
  ask_for_deletion = (
816
860
  "delete them prior to deleting the instance"
817
861
  if raise_error
818
862
  else "consider deleting them"
819
863
  )
820
864
  message = (
821
- f"Storage '{directory_string}' contains {n_objects - n_offset_objects} objects"
865
+ f"Storage '{directory_string}' contains {n_files - n_offset_objects} objects"
822
866
  f" - {ask_for_deletion}"
823
867
  )
824
868
  if n_diff > 0:
825
869
  if raise_error:
826
- raise InstanceNotEmpty(message)
870
+ raise InstanceNotEmpty(message) from None
827
871
  else:
828
872
  logger.warning(message)
829
873
  return n_diff
@@ -1,20 +1,19 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.3
2
2
  Name: lamindb_setup
3
- Version: 0.81.3
3
+ Version: 1.0a1
4
4
  Summary: Setup & configure LaminDB.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.9
7
7
  Description-Content-Type: text/markdown
8
- Requires-Dist: lnschema_core>=0.51.0
9
8
  Requires-Dist: lamin_utils>=0.3.3
10
- Requires-Dist: django>4.2,<5.3.0
9
+ Requires-Dist: django>=5,<5.2
11
10
  Requires-Dist: dj_database_url>=1.3.0,<3.0.0
12
11
  Requires-Dist: pydantic-settings
13
12
  Requires-Dist: appdirs<2.0.0
14
13
  Requires-Dist: requests
15
14
  Requires-Dist: universal_pathlib==0.2.5
16
15
  Requires-Dist: botocore<2.0.0
17
- Requires-Dist: supabase>=2.8.1,<=2.10.0
16
+ Requires-Dist: supabase>=2.8.1,<=2.11.0
18
17
  Requires-Dist: psutil
19
18
  Requires-Dist: urllib3<2 ; extra == "aws"
20
19
  Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"