lamindb_setup 0.72.2__py2.py3-none-any.whl → 0.73.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb_setup/__init__.py CHANGED
@@ -34,7 +34,7 @@ Modules & settings:
34
34
 
35
35
  """
36
36
 
37
- __version__ = "0.72.2" # denote a release candidate for 0.1.0 with 0.1rc1
37
+ __version__ = "0.73.1" # denote a release candidate for 0.1.0 with 0.1rc1
38
38
 
39
39
  import sys
40
40
  from os import name as _os_name
@@ -16,7 +16,7 @@ from ._silence_loggers import silence_loggers
16
16
  from .core import InstanceSettings
17
17
  from .core._settings import settings
18
18
  from .core._settings_storage import StorageSettings, init_storage
19
- from .core.upath import convert_pathlike
19
+ from .core.upath import UPath
20
20
 
21
21
  if TYPE_CHECKING:
22
22
  from pydantic import PostgresDsn
@@ -56,6 +56,7 @@ def register_storage_in_instance(ssettings: StorageSettings):
56
56
  "region": ssettings.region,
57
57
  "instance_uid": instance_uid,
58
58
  "created_by_id": current_user_id(),
59
+ "run": None,
59
60
  }
60
61
  if ssettings._uid is not None:
61
62
  defaults["uid"] = ssettings._uid
@@ -350,7 +351,7 @@ def infer_instance_name(
350
351
  return str(db).split("/")[-1]
351
352
  if storage == "create-s3":
352
353
  raise ValueError("pass name to init if storage = 'create-s3'")
353
- storage_path = convert_pathlike(storage)
354
+ storage_path = UPath(storage)
354
355
  if storage_path.name != "":
355
356
  name = storage_path.name
356
357
  else:
lamindb_setup/_migrate.py CHANGED
@@ -68,6 +68,8 @@ class migrate:
68
68
  @classmethod
69
69
  def deploy(cls) -> None:
70
70
  """Deploy a migration."""
71
+ from ._schema_metadata import update_schema_in_hub
72
+
71
73
  if _check_instance_setup():
72
74
  raise RuntimeError("Restart Python session to migrate or use CLI!")
73
75
  from lamindb_setup.core._hub_client import call_with_fallback_auth
@@ -104,6 +106,9 @@ class migrate:
104
106
  # this populates the hub
105
107
  if instance_is_on_hub:
106
108
  logger.important(f"updating lamindb version in hub: {lamindb.__version__}")
109
+ # TODO: integrate update of instance table within update_schema_in_hub & below
110
+ if settings.instance.dialect != "sqlite":
111
+ update_schema_in_hub()
107
112
  call_with_fallback_auth(
108
113
  update_instance,
109
114
  instance_id=settings.instance._id.hex,
@@ -0,0 +1,479 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ import importlib
5
+ import json
6
+ from dataclasses import dataclass
7
+ from typing import TYPE_CHECKING, Dict
8
+ from uuid import UUID
9
+
10
+ import sqlparse
11
+ from django.contrib.postgres.expressions import ArraySubquery
12
+ from django.db.models import (
13
+ Field,
14
+ ForeignObjectRel,
15
+ ManyToManyField,
16
+ ManyToManyRel,
17
+ OuterRef,
18
+ QuerySet,
19
+ Subquery,
20
+ )
21
+ from django.db.models.functions import JSONObject
22
+ from sqlparse.sql import Identifier, IdentifierList
23
+ from sqlparse.tokens import DML, Keyword
24
+
25
+ from lamindb_setup import settings
26
+ from lamindb_setup._init_instance import get_schema_module_name
27
+ from lamindb_setup.core._hub_client import call_with_fallback_auth
28
+
29
+ if TYPE_CHECKING:
30
+ from lnschema_core.models import Registry
31
+ from supabase import Client
32
+
33
+
34
+ def update_schema_in_hub() -> tuple[bool, UUID, dict]:
35
+ return call_with_fallback_auth(_synchronize_schema)
36
+
37
+
38
+ def _synchronize_schema(client: Client) -> tuple[bool, UUID, dict]:
39
+ schema_metadata = SchemaMetadata()
40
+ schema_metadata_dict = schema_metadata.to_json()
41
+ schema_uuid = _dict_to_uuid(schema_metadata_dict)
42
+ schema = _get_schema_by_id(schema_uuid, client)
43
+
44
+ is_new = schema is None
45
+ if is_new:
46
+ module_set_info = schema_metadata._get_module_set_info()
47
+ module_ids = "-".join(str(module_info["id"]) for module_info in module_set_info)
48
+ schema = (
49
+ client.table("schema")
50
+ .insert(
51
+ {
52
+ "id": schema_uuid.hex,
53
+ "module_ids": module_ids,
54
+ "module_set_info": module_set_info,
55
+ "json": schema_metadata_dict,
56
+ }
57
+ )
58
+ .execute()
59
+ .data[0]
60
+ )
61
+
62
+ instance_response = (
63
+ client.table("instance")
64
+ .update({"schema_id": schema_uuid.hex})
65
+ .eq("id", settings.instance._id.hex)
66
+ .execute()
67
+ )
68
+ assert (
69
+ len(instance_response.data) == 1
70
+ ), f"schema of instance {settings.instance._id.hex} could not be updated with schema {schema_uuid.hex}"
71
+
72
+ return is_new, schema_uuid, schema
73
+
74
+
75
+ def get_schema_by_id(id: UUID):
76
+ return call_with_fallback_auth(_get_schema_by_id, id=id)
77
+
78
+
79
+ def _get_schema_by_id(id: UUID, client: Client):
80
+ response = client.table("schema").select("*").eq("id", id.hex).execute()
81
+ if len(response.data) == 0:
82
+ return None
83
+ return response.data[0]
84
+
85
+
86
+ def _dict_to_uuid(dict: dict):
87
+ encoded = json.dumps(dict, sort_keys=True).encode("utf-8")
88
+ hash = hashlib.md5(encoded).digest()
89
+ uuid = UUID(bytes=hash[:16])
90
+ return uuid
91
+
92
+
93
+ class SchemaMetadata:
94
+ def __init__(self) -> None:
95
+ self.included_modules = ["core"] + list(settings.instance.schema)
96
+ self.modules = self._get_modules_metadata()
97
+
98
+ def to_dict(
99
+ self, include_django_objects: bool = True, include_select_terms: bool = True
100
+ ):
101
+ return {
102
+ module_name: {
103
+ model_name: model.to_dict(include_django_objects, include_select_terms)
104
+ for model_name, model in module.items()
105
+ }
106
+ for module_name, module in self.modules.items()
107
+ }
108
+
109
+ def to_json(self, include_select_terms: bool = True):
110
+ return self.to_dict(
111
+ include_django_objects=False, include_select_terms=include_select_terms
112
+ )
113
+
114
+ def _get_modules_metadata(self):
115
+ return {
116
+ module_name: {
117
+ model._meta.model_name: ModelMetadata(
118
+ model, module_name, self.included_modules
119
+ )
120
+ for model in self._get_schema_module(
121
+ module_name
122
+ ).models.__dict__.values()
123
+ if model.__class__.__name__ == "ModelBase"
124
+ and model.__name__ not in ["Registry", "ORM"]
125
+ and not model._meta.abstract
126
+ and model.__get_schema_name__() == module_name
127
+ }
128
+ for module_name in self.included_modules
129
+ }
130
+
131
+ def _get_module_set_info(self):
132
+ # TODO: rely on schemamodule table for this
133
+ module_set_info = []
134
+ for module_name in self.included_modules:
135
+ module = self._get_schema_module(module_name)
136
+ module_set_info.append(
137
+ {"id": 0, "name": module_name, "version": module.__version__}
138
+ )
139
+ return module_set_info
140
+
141
+ @staticmethod
142
+ def _get_schema_module(module_name):
143
+ return importlib.import_module(get_schema_module_name(module_name))
144
+
145
+
146
+ @dataclass
147
+ class FieldMetadata:
148
+ schema_name: str
149
+ model_name: str
150
+ field_name: str
151
+ type: str
152
+ is_link_table: bool
153
+ column: str | None = None
154
+ relation_type: str | None = None
155
+ related_schema_name: str | None = None
156
+ related_model_name: str | None = None
157
+ related_field_name: str | None = None
158
+ through: dict | None = None
159
+
160
+
161
+ class ModelRelations:
162
+ def __init__(self, fields: list[ForeignObjectRel]) -> None:
163
+ self.many_to_one = {}
164
+ self.one_to_many = {}
165
+ self.many_to_many = {}
166
+ self.one_to_one = {}
167
+
168
+ for field in fields:
169
+ if field.many_to_one:
170
+ self.many_to_one.update({field.name: field})
171
+ elif field.one_to_many:
172
+ self.one_to_many.update({field.name: field})
173
+ elif field.many_to_many:
174
+ self.many_to_many.update({field.name: field})
175
+ elif field.one_to_one:
176
+ self.one_to_one.update({field.name: field})
177
+
178
+ self.all = {
179
+ **self.many_to_one,
180
+ **self.one_to_many,
181
+ **self.many_to_many,
182
+ **self.one_to_one,
183
+ }
184
+
185
+
186
+ class ModelMetadata:
187
+ def __init__(self, model, module_name: str, included_modules: list[str]) -> None:
188
+ self.model = model
189
+ self.class_name = model.__name__
190
+ self.module_name = module_name
191
+ self.model_name = model._meta.model_name
192
+ self.table_name = model._meta.db_table
193
+ self.included_modules = included_modules
194
+ self.fields, self.relations = self._get_fields_metadata(self.model)
195
+
196
+ def to_dict(
197
+ self, include_django_objects: bool = True, include_select_terms: bool = True
198
+ ):
199
+ _dict = {
200
+ "fields": self.fields.copy(),
201
+ "class_name": self.class_name,
202
+ "table_name": self.table_name,
203
+ }
204
+
205
+ select_terms = self.select_terms if include_select_terms else []
206
+
207
+ for field_name in self.fields.keys():
208
+ _dict["fields"][field_name] = _dict["fields"][field_name].__dict__
209
+ if field_name in select_terms:
210
+ _dict["fields"][field_name].update(
211
+ {"select_term": select_terms[field_name]}
212
+ )
213
+
214
+ if include_django_objects:
215
+ _dict.update({"model": self.model})
216
+
217
+ return _dict
218
+
219
+ @property
220
+ def select_terms(self):
221
+ return (
222
+ DjangoQueryBuilder(self.module_name, self.model_name)
223
+ .add_all_sub_queries()
224
+ .extract_select_terms()
225
+ )
226
+
227
+ def _get_fields_metadata(self, model):
228
+ related_fields = []
229
+ fields_metadata: dict[str, FieldMetadata] = {}
230
+
231
+ for field in model._meta.get_fields():
232
+ field_metadata = self._get_field_metadata(model, field)
233
+ if field_metadata.related_schema_name is None:
234
+ fields_metadata.update({field.name: field_metadata})
235
+
236
+ if (
237
+ field_metadata.related_schema_name in self.included_modules
238
+ and field_metadata.schema_name in self.included_modules
239
+ ):
240
+ related_fields.append(field)
241
+
242
+ model_relations_metadata = ModelRelations(related_fields)
243
+
244
+ related_fields_metadata = self._get_related_fields_metadata(
245
+ model, model_relations_metadata
246
+ )
247
+
248
+ fields_metadata = {**fields_metadata, **related_fields_metadata}
249
+
250
+ return fields_metadata, model_relations_metadata
251
+
252
+ def _get_related_fields_metadata(
253
+ self, model, model_relations_metadata: ModelRelations
254
+ ):
255
+ related_fields: dict[str, FieldMetadata] = {}
256
+
257
+ # Many to one (foreign key defined in the model)
258
+ for link_field_name, link_field in model_relations_metadata.many_to_one.items():
259
+ related_fields.update(
260
+ {f"{link_field_name}": self._get_field_metadata(model, link_field)}
261
+ )
262
+ for field in link_field.related_model._meta.fields:
263
+ related_fields.update(
264
+ {
265
+ f"{link_field_name}__{field.name}": self._get_field_metadata(
266
+ model, field
267
+ )
268
+ }
269
+ )
270
+
271
+ # One to many (foreign key defined in the related model)
272
+ for relation_name, relation in model_relations_metadata.one_to_many.items():
273
+ # exclude self reference as it is already included in the many to one
274
+ if relation.related_model == model:
275
+ continue
276
+ related_fields.update(
277
+ {f"{relation_name}": self._get_field_metadata(model, relation.field)}
278
+ )
279
+
280
+ # One to one
281
+ for link_field_name, link_field in model_relations_metadata.one_to_one.items():
282
+ related_fields.update(
283
+ {f"{link_field_name}": self._get_field_metadata(model, link_field)}
284
+ )
285
+ for field in link_field.related_model._meta.fields:
286
+ related_fields.update(
287
+ {
288
+ f"{link_field_name}__{field.name}": self._get_field_metadata(
289
+ model, field
290
+ )
291
+ }
292
+ )
293
+
294
+ # Many to many
295
+ for (
296
+ link_field_name,
297
+ link_field,
298
+ ) in model_relations_metadata.many_to_many.items():
299
+ related_fields.update(
300
+ {f"{link_field_name}": self._get_field_metadata(model, link_field)}
301
+ )
302
+
303
+ return related_fields
304
+
305
+ def _get_field_metadata(self, model, field: Field):
306
+ from lnschema_core.models import LinkORM
307
+
308
+ internal_type = field.get_internal_type()
309
+ model_name = field.model._meta.model_name
310
+ relation_type = self._get_relation_type(model, field)
311
+ if field.related_model is None:
312
+ schema_name = field.model.__get_schema_name__()
313
+ related_model_name = None
314
+ related_schema_name = None
315
+ related_field_name = None
316
+ field_name = field.name
317
+ else:
318
+ related_model_name = field.related_model._meta.model_name
319
+ related_schema_name = field.related_model.__get_schema_name__()
320
+ schema_name = field.model.__get_schema_name__()
321
+ related_field_name = field.remote_field.name
322
+ field_name = field.name
323
+
324
+ if relation_type in ["one-to-many"]:
325
+ # For a one-to-many relation, the field belong
326
+ # to the other model as a foreign key.
327
+ # To make usage similar to other relation types
328
+ # we need to invert model and related model.
329
+ schema_name, related_schema_name = related_schema_name, schema_name
330
+ model_name, related_model_name = related_model_name, model_name
331
+ field_name, related_field_name = related_field_name, field_name
332
+ pass
333
+
334
+ column = None
335
+ if relation_type not in ["many-to-many", "one-to-one", "one-to-many"]:
336
+ column = field.column
337
+
338
+ through = None
339
+ if relation_type == "many-to-many":
340
+ through = self._get_through(model, field)
341
+
342
+ return FieldMetadata(
343
+ schema_name,
344
+ model_name,
345
+ field_name,
346
+ internal_type,
347
+ issubclass(field.model, LinkORM),
348
+ column,
349
+ relation_type,
350
+ related_schema_name,
351
+ related_model_name,
352
+ related_field_name,
353
+ through,
354
+ )
355
+
356
+ @staticmethod
357
+ def _get_through(model, field_or_rel: ManyToManyField | ManyToManyRel):
358
+ table_name = model._meta.db_table
359
+ related_table_name = field_or_rel.related_model._meta.db_table
360
+
361
+ if isinstance(field_or_rel, ManyToManyField):
362
+ return {
363
+ "link_table_name": field_or_rel.remote_field.through._meta.db_table,
364
+ table_name: field_or_rel.m2m_column_name(),
365
+ related_table_name: field_or_rel.m2m_reverse_name(),
366
+ }
367
+
368
+ if isinstance(field_or_rel, ManyToManyRel):
369
+ return {
370
+ "link_table_name": field_or_rel.through._meta.db_table,
371
+ table_name: field_or_rel.field.m2m_column_name(),
372
+ related_table_name: field_or_rel.field.m2m_reverse_name(),
373
+ }
374
+
375
+ @staticmethod
376
+ def _get_relation_type(model, field: Field):
377
+ if field.many_to_one:
378
+ # defined in the model
379
+ if model == field.model:
380
+ return "many-to-one"
381
+ # defined in the related model
382
+ else:
383
+ return "one-to-many"
384
+ elif field.one_to_many:
385
+ return "one-to-many"
386
+ elif field.many_to_many:
387
+ return "many-to-many"
388
+ elif field.one_to_one:
389
+ return "one-to-one"
390
+ else:
391
+ return None
392
+
393
+
394
+ class DjangoQueryBuilder:
395
+ def __init__(
396
+ self, module_name: str, model_name: str, query_set: QuerySet | None = None
397
+ ) -> None:
398
+ self.schema_metadata = SchemaMetadata()
399
+ self.module_name = module_name
400
+ self.model_name = model_name
401
+ self.model_metadata = self.schema_metadata.modules[module_name][model_name]
402
+ self.query_set = query_set if query_set else self.model_metadata.model.objects
403
+
404
+ def add_all_sub_queries(self):
405
+ all_fields = self.model_metadata.fields
406
+ included_relations = [
407
+ field_name
408
+ for field_name, field in all_fields.items()
409
+ if field.relation_type is not None
410
+ ]
411
+ self.add_sub_queries(included_relations)
412
+ return self
413
+
414
+ def add_sub_queries(self, included_relations: list[str]):
415
+ sub_queries = {
416
+ f"annotated_{relation_name}": self._get_sub_query(
417
+ self.model_metadata.fields[relation_name]
418
+ )
419
+ for relation_name in included_relations
420
+ }
421
+ self.query_set = self.query_set.annotate(**sub_queries)
422
+ return self
423
+
424
+ def extract_select_terms(self):
425
+ parsed = sqlparse.parse(self.sql_query)
426
+ select_found = False
427
+ select_terms = {}
428
+
429
+ def get_name(identifier):
430
+ name = identifier.get_name()
431
+ return name if name is not None else str(identifier).split(".")
432
+
433
+ for token in parsed[0].tokens:
434
+ if token.ttype is DML and token.value.upper() == "SELECT":
435
+ select_found = True
436
+ elif select_found and isinstance(token, IdentifierList):
437
+ for identifier in token.get_identifiers():
438
+ select_terms[get_name(identifier)] = str(identifier)
439
+ elif select_found and isinstance(token, Identifier):
440
+ select_terms[get_name(token)] = str(token)
441
+ elif token.ttype is Keyword:
442
+ if token.value.upper() in ["FROM", "WHERE", "GROUP BY", "ORDER BY"]:
443
+ break
444
+
445
+ return select_terms
446
+
447
+ def _get_sub_query(self, field_metadata: FieldMetadata):
448
+ module_name = field_metadata.related_schema_name
449
+ model_name = field_metadata.related_model_name
450
+ field_name = field_metadata.related_field_name
451
+ model_metadata = self.schema_metadata.modules[module_name][model_name]
452
+ query_set = model_metadata.model.objects.get_queryset()
453
+ select = {
454
+ field_name: field_name
455
+ for field_name in model_metadata.fields.keys()
456
+ if model_metadata.fields[field_name].relation_type is None
457
+ and "__" not in field_name
458
+ }
459
+
460
+ if field_metadata.relation_type in ["many-to-many", "one-to-many"]:
461
+ return ArraySubquery(
462
+ Subquery(
463
+ query_set.filter(**{field_name: OuterRef("pk")}).values(
464
+ data=JSONObject(**select)
465
+ )[:5]
466
+ )
467
+ )
468
+ if field_metadata.relation_type in ["many-to-one", "one-to-one"]:
469
+ return Subquery(
470
+ query_set.filter(**{field_name: OuterRef("pk")}).values(
471
+ data=JSONObject(**select)
472
+ )[:5]
473
+ )
474
+
475
+ @property
476
+ def sql_query(self):
477
+ sql_template, params = self.query_set.query.sql_with_params()
478
+ sql_query = sql_template % tuple(f"'{p}'" for p in params)
479
+ return sql_query.replace("annotated_", "")
@@ -67,8 +67,8 @@ class AWSCredentialsManager:
67
67
  def _path_inject_options(self, path: S3Path, credentials: dict) -> S3Path:
68
68
  if credentials == {}:
69
69
  # credentials were specified manually for the path
70
- if "anon" in path._kwargs:
71
- anon = path._kwargs["anon"]
70
+ if "anon" in path.storage_options:
71
+ anon = path.storage_options["anon"]
72
72
  elif path.fs.key is not None and path.fs.secret is not None:
73
73
  anon = False
74
74
  else:
@@ -77,8 +77,8 @@ class AWSCredentialsManager:
77
77
  else:
78
78
  connection_options = credentials
79
79
 
80
- if "cache_regions" in path._kwargs:
81
- cache_regions = path._kwargs["cache_regions"]
80
+ if "cache_regions" in path.storage_options:
81
+ cache_regions = path.storage_options["cache_regions"]
82
82
  else:
83
83
  cache_regions = True
84
84
 
@@ -18,7 +18,7 @@ from .cloud_sqlite_locker import (
18
18
  EXPIRATION_TIME,
19
19
  InstanceLockedException,
20
20
  )
21
- from .upath import LocalPathClasses, UPath, convert_pathlike
21
+ from .upath import LocalPathClasses, UPath
22
22
 
23
23
  if TYPE_CHECKING:
24
24
  from uuid import UUID
@@ -205,7 +205,7 @@ class InstanceSettings:
205
205
  )
206
206
  if response != "y":
207
207
  return None
208
- local_root = convert_pathlike(local_root)
208
+ local_root = UPath(local_root)
209
209
  assert isinstance(local_root, LocalPathClasses)
210
210
  self._storage_local = init_storage(local_root, self._id, register_hub=True) # type: ignore
211
211
  register_storage_in_instance(self._storage_local) # type: ignore
@@ -17,7 +17,6 @@ from ._settings_store import system_storage_settings_file
17
17
  from .upath import (
18
18
  LocalPathClasses,
19
19
  UPath,
20
- convert_pathlike,
21
20
  create_path,
22
21
  )
23
22
 
@@ -37,40 +36,43 @@ def base62(n_char: int) -> str:
37
36
  return id
38
37
 
39
38
 
40
- def get_storage_region(storage_root: UPathStr) -> str | None:
41
- storage_root_str = str(storage_root)
42
- if storage_root_str.startswith("s3://"):
43
- import botocore.session as session
39
+ def get_storage_region(path: UPathStr) -> str | None:
40
+ path_str = str(path)
41
+ if path_str.startswith("s3://"):
42
+ import botocore.session
44
43
  from botocore.config import Config
45
- from botocore.exceptions import NoCredentialsError
44
+ from botocore.exceptions import ClientError
46
45
 
47
46
  # strip the prefix and any suffixes of the bucket name
48
- bucket = storage_root_str.replace("s3://", "").split("/")[0]
49
- s3_session = session.get_session()
50
- s3_client = s3_session.create_client("s3")
47
+ bucket = path_str.replace("s3://", "").split("/")[0]
48
+ session = botocore.session.get_session()
49
+ credentials = session.get_credentials()
50
+ if credentials is None or credentials.access_key is None:
51
+ config = Config(signature_version=botocore.session.UNSIGNED)
52
+ else:
53
+ config = None
54
+ s3_client = session.create_client("s3", config=config)
51
55
  try:
52
56
  response = s3_client.head_bucket(Bucket=bucket)
53
- except NoCredentialsError: # deal with anonymous access
54
- s3_client = s3_session.create_client(
55
- "s3", config=Config(signature_version=session.UNSIGNED)
56
- )
57
- response = s3_client.head_bucket(Bucket=bucket)
58
- storage_region = response["ResponseMetadata"].get("HTTPHeaders", {})[
59
- "x-amz-bucket-region"
60
- ]
61
- # if we want to except botcore.exceptions.ClientError to reformat an
62
- # error message, this is how to do test for the "NoSuchBucket" error:
63
- # exc.response["Error"]["Code"] == "NoSuchBucket"
57
+ except ClientError as exc:
58
+ response = getattr(exc, "response", {})
59
+ if response.get("Error", {}).get("Code") == "404":
60
+ raise exc
61
+ region = (
62
+ response.get("ResponseMetadata", {})
63
+ .get("HTTPHeaders", {})
64
+ .get("x-amz-bucket-region")
65
+ )
64
66
  else:
65
- storage_region = None
66
- return storage_region
67
+ region = None
68
+ return region
67
69
 
68
70
 
69
71
  def mark_storage_root(root: UPathStr, uid: str):
70
72
  # we need to touch a 0-byte object in folder-like storage location on S3 to avoid
71
73
  # permission errors from leveraging s3fs on an empty hosted storage location
72
74
  # for consistency, we write this file everywhere
73
- root_upath = convert_pathlike(root)
75
+ root_upath = UPath(root)
74
76
  mark_upath = root_upath / IS_INITIALIZED_KEY
75
77
  mark_upath.write_text(uid)
76
78
 
@@ -156,7 +158,7 @@ class StorageSettings:
156
158
  ):
157
159
  self._uid = uid
158
160
  self._uuid_ = uuid
159
- self._root_init = convert_pathlike(root)
161
+ self._root_init = UPath(root)
160
162
  if isinstance(self._root_init, LocalPathClasses): # local paths
161
163
  try:
162
164
  (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
@@ -53,6 +53,7 @@ def write_bionty_sources(isettings: InstanceSettings) -> None:
53
53
  kwargs["species"] = kwargs.pop("organism")
54
54
  elif hasattr(PublicSource, "organism") and "species" in kwargs:
55
55
  kwargs["organism"] = kwargs.pop("species")
56
+ kwargs["run"] = None # can't yet access tracking information
56
57
  record = PublicSource(**kwargs)
57
58
  all_records.append(record)
58
59
 
@@ -59,6 +59,7 @@ VALID_SUFFIXES = {
59
59
  VALID_COMPOSITE_SUFFIXES = {
60
60
  ".anndata.zarr",
61
61
  ".spatialdata.zarr",
62
+ ".ome.zarr",
62
63
  }
63
64
 
64
65
  TRAILING_SEP = (os.sep, os.altsep) if os.altsep is not None else os.sep
@@ -155,7 +156,10 @@ def create_mapper(
155
156
 
156
157
 
157
158
  def print_hook(size: int, value: int, objectname: str, action: str):
158
- progress_in_percent = (value / size) * 100
159
+ if size == 0:
160
+ progress_in_percent = 100.0
161
+ else:
162
+ progress_in_percent = (value / size) * 100
159
163
  out = f"... {action} {objectname}:" f" {min(progress_in_percent, 100):4.1f}%"
160
164
  if "NBPRJ_TEST_NBPATH" not in os.environ:
161
165
  end = "\n" if progress_in_percent >= 100 else "\r"
@@ -238,50 +242,70 @@ class ChildProgressCallback(fsspec.callbacks.Callback):
238
242
  self.parent.update_relative_value(inc)
239
243
 
240
244
  def relative_update(self, inc=1):
241
- self.parent_update(inc / self.size)
245
+ if self.size != 0:
246
+ self.parent_update(inc / self.size)
247
+ else:
248
+ self.parent_update(1)
242
249
 
243
250
 
244
- def download_to(self, path: UPathStr, print_progress: bool = False, **kwargs):
245
- """Download to a path."""
251
+ def download_to(self, local_path: UPathStr, print_progress: bool = True, **kwargs):
252
+ """Download from self (a destination in the cloud) to the local path."""
253
+ if "recursive" not in kwargs:
254
+ kwargs["recursive"] = True
246
255
  if print_progress and "callback" not in kwargs:
247
256
  callback = ProgressCallback(
248
- PurePosixPath(path).name, "downloading", adjust_size=True
257
+ PurePosixPath(local_path).name, "downloading", adjust_size=True
249
258
  )
250
259
  kwargs["callback"] = callback
251
260
 
252
- self.fs.download(str(self), str(path), **kwargs)
261
+ self.fs.download(str(self), str(local_path), **kwargs)
253
262
 
254
263
 
255
264
  def upload_from(
256
265
  self,
257
- path: UPathStr,
258
- dir_inplace: bool = False,
259
- print_progress: bool = False,
266
+ local_path: UPathStr,
267
+ create_folder: bool | None = None,
268
+ print_progress: bool = True,
260
269
  **kwargs,
261
- ):
262
- """Upload from a local path."""
263
- path = Path(path)
264
- path_is_dir = path.is_dir()
265
- if not path_is_dir:
266
- dir_inplace = False
270
+ ) -> UPath:
271
+ """Upload from the local path to `self` (a destination in the cloud).
272
+
273
+ If the local path is a directory, recursively upload its contents.
274
+
275
+ Args:
276
+ local_path: A local path of a file or directory.
277
+ create_folder: Only applies if `local_path` is a directory and then
278
+ defaults to `True`. If `True`, make a new folder in the destination
279
+ using the directory name of `local_path`. If `False`, upload the
280
+ contents of the directory to to the root-level of the destination.
281
+ print_progress: Print progress.
282
+
283
+ Returns:
284
+ The destination path.
285
+ """
286
+ local_path = Path(local_path)
287
+ local_path_is_dir = local_path.is_dir()
288
+ if create_folder is None:
289
+ create_folder = local_path_is_dir
290
+ if create_folder and not local_path_is_dir:
291
+ raise ValueError("create_folder can only be True if local_path is a directory")
267
292
 
268
293
  if print_progress and "callback" not in kwargs:
269
- callback = ProgressCallback(path.name, "uploading")
294
+ callback = ProgressCallback(local_path.name, "uploading")
270
295
  kwargs["callback"] = callback
271
296
 
272
- if dir_inplace:
273
- source = [f for f in path.rglob("*") if f.is_file()]
274
- destination = [str(self / f.relative_to(path)) for f in source]
297
+ if local_path_is_dir and not create_folder:
298
+ source = [f for f in local_path.rglob("*") if f.is_file()]
299
+ destination = [str(self / f.relative_to(local_path)) for f in source]
275
300
  source = [str(f) for f in source] # type: ignore
276
301
  else:
277
- source = str(path) # type: ignore
302
+ source = str(local_path) # type: ignore
278
303
  destination = str(self) # type: ignore
279
- # this weird thing is to avoid s3fs triggering create_bucket in upload
280
- # if dirs are present
281
- # it allows to avoid permission error
282
- if self.protocol != "s3" or not path_is_dir or dir_inplace:
283
- cleanup_cache = False
284
- else:
304
+
305
+ # the below lines are to avoid s3fs triggering create_bucket in upload if
306
+ # dirs are present it allows to avoid permission error
307
+ # would be easier to just
308
+ if self.protocol == "s3" and local_path_is_dir and create_folder:
285
309
  bucket = self._url.netloc
286
310
  if bucket not in self.fs.dircache:
287
311
  self.fs.dircache[bucket] = [{}]
@@ -290,14 +314,21 @@ def upload_from(
290
314
  cleanup_cache = True
291
315
  else:
292
316
  cleanup_cache = False
317
+ else:
318
+ cleanup_cache = False
293
319
 
294
- self.fs.upload(source, destination, **kwargs)
320
+ self.fs.upload(source, destination, recursive=create_folder, **kwargs)
295
321
 
296
322
  if cleanup_cache:
297
323
  # normally this is invalidated after the upload but still better to check
298
324
  if bucket in self.fs.dircache:
299
325
  del self.fs.dircache[bucket]
300
326
 
327
+ if local_path_is_dir and create_folder:
328
+ return self / local_path.name
329
+ else:
330
+ return self
331
+
301
332
 
302
333
  def synchronize(
303
334
  self,
@@ -305,14 +336,14 @@ def synchronize(
305
336
  error_no_origin: bool = True,
306
337
  print_progress: bool = False,
307
338
  callback: fsspec.callbacks.Callback | None = None,
308
- **kwargs,
339
+ timestamp: float | None = None,
309
340
  ):
310
341
  """Sync to a local destination path."""
311
342
  # optimize the number of network requests
312
- if "timestamp" in kwargs:
343
+ if timestamp is not None:
313
344
  is_dir = False
314
345
  exists = True
315
- cloud_mts = kwargs.pop("timestamp")
346
+ cloud_mts = timestamp
316
347
  else:
317
348
  # perform only one network request to check existence, type and timestamp
318
349
  try:
@@ -378,8 +409,8 @@ def synchronize(
378
409
  origin = f"{self.protocol}://{file}"
379
410
  destination = objectpath / file_key
380
411
  child = callback.branched(origin, destination.as_posix())
381
- UPath(origin, **self._kwargs).synchronize(
382
- destination, timestamp=timestamp, callback=child, **kwargs
412
+ UPath(origin, **self.storage_options).synchronize(
413
+ destination, callback=child, timestamp=timestamp
383
414
  )
384
415
  child.close()
385
416
  if destination_exists:
@@ -400,15 +431,16 @@ def synchronize(
400
431
  callback = ProgressCallback.requires_progress(
401
432
  callback, print_progress, objectpath.name, "synchronizing"
402
433
  )
403
- kwargs["callback"] = callback
404
434
  if objectpath.exists():
405
- local_mts = objectpath.stat().st_mtime # type: ignore
406
- need_synchronize = cloud_mts > local_mts
435
+ local_mts_obj = objectpath.stat().st_mtime # type: ignore
436
+ need_synchronize = cloud_mts > local_mts_obj
407
437
  else:
408
438
  objectpath.parent.mkdir(parents=True, exist_ok=True)
409
439
  need_synchronize = True
410
440
  if need_synchronize:
411
- self.download_to(objectpath, **kwargs)
441
+ self.download_to(
442
+ objectpath, recursive=False, print_progress=False, callback=callback
443
+ )
412
444
  os.utime(objectpath, times=(cloud_mts, cloud_mts))
413
445
  else:
414
446
  # nothing happens if parent_update is not defined
@@ -477,7 +509,7 @@ def compute_file_tree(
477
509
  if child_path.is_dir():
478
510
  if include_dirs and child_path not in include_dirs:
479
511
  continue
480
- yield prefix + pointer + child_path.name
512
+ yield prefix + pointer + child_path.name + "/"
481
513
  n_directories += 1
482
514
  n_files_per_dir_and_type = defaultdict(lambda: 0)
483
515
  extension = branch if pointer == tee else space
@@ -657,22 +689,8 @@ Args:
657
689
  """
658
690
 
659
691
 
660
- def convert_pathlike(pathlike: UPathStr) -> UPath:
661
- """Convert pathlike to Path or UPath inheriting options from root."""
662
- if isinstance(pathlike, (str, UPath)):
663
- path = UPath(pathlike)
664
- elif isinstance(pathlike, Path):
665
- path = UPath(str(pathlike)) # UPath applied on Path gives Path back
666
- else:
667
- raise ValueError("pathlike should be of type UPathStr")
668
- # remove trailing slash
669
- if path._parts and path._parts[-1] == "":
670
- path._parts = path._parts[:-1]
671
- return path
672
-
673
-
674
692
  def create_path(path: UPath, access_token: str | None = None) -> UPath:
675
- path = convert_pathlike(path)
693
+ path = UPath(path)
676
694
  # test whether we have an AWS S3 path
677
695
  if not isinstance(path, S3Path):
678
696
  return path
@@ -681,15 +699,16 @@ def create_path(path: UPath, access_token: str | None = None) -> UPath:
681
699
 
682
700
  def get_stat_file_cloud(stat: dict) -> tuple[int, str, str]:
683
701
  size = stat["size"]
702
+ etag = stat["ETag"]
684
703
  # small files
685
- if "-" not in stat["ETag"]:
704
+ if "-" not in etag:
686
705
  # only store hash for non-multipart uploads
687
706
  # we can't rapidly validate multi-part uploaded files client-side
688
707
  # we can add more logic later down-the-road
689
- hash = b16_to_b64(stat["ETag"])
708
+ hash = b16_to_b64(etag)
690
709
  hash_type = "md5"
691
710
  else:
692
- stripped_etag, suffix = stat["ETag"].split("-")
711
+ stripped_etag, suffix = etag.split("-")
693
712
  suffix = suffix.strip('"')
694
713
  hash = f"{b16_to_b64(stripped_etag)}-{suffix}"
695
714
  hash_type = "md5-n" # this is the S3 chunk-hashing strategy
@@ -721,7 +740,7 @@ class InstanceNotEmpty(Exception):
721
740
  def check_storage_is_empty(
722
741
  root: UPathStr, *, raise_error: bool = True, account_for_sqlite_file: bool = False
723
742
  ) -> int:
724
- root_upath = convert_pathlike(root)
743
+ root_upath = UPath(root)
725
744
  root_string = root_upath.as_posix() # type: ignore
726
745
  # we currently touch a 0-byte file in the root of a hosted storage location
727
746
  # ({storage_root}/.lamindb/_is_initialized) during storage initialization
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb_setup
3
- Version: 0.72.2
3
+ Version: 0.73.1
4
4
  Summary: Setup & configure LaminDB.
5
5
  Author-email: Lamin Labs <laminlabs@gmail.com>
6
6
  Description-Content-Type: text/markdown
@@ -11,7 +11,7 @@ Requires-Dist: dj_database_url>=1.3.0,<3.0.0
11
11
  Requires-Dist: pydantic[dotenv]<2.0.0
12
12
  Requires-Dist: appdirs<2.0.0
13
13
  Requires-Dist: requests
14
- Requires-Dist: universal_pathlib==0.1.4
14
+ Requires-Dist: universal_pathlib==0.2.2
15
15
  Requires-Dist: botocore<2.0.0
16
16
  Requires-Dist: supabase==2.2.1
17
17
  Requires-Dist: urllib3<2 ; extra == "aws"
@@ -1,4 +1,4 @@
1
- lamindb_setup/__init__.py,sha256=p_LDSrN8hygUj_CeoWH3ZILNhJi_GQVghFfu7pSmt5w,1542
1
+ lamindb_setup/__init__.py,sha256=XYsA4Sk8k2514_kS3XNzBJs1ZXhDkUcrd1rQTzwsan4,1542
2
2
  lamindb_setup/_cache.py,sha256=wA7mbysANwe8hPNbjDo9bOmXJ0xIyaS5iyxIpxSWji4,846
3
3
  lamindb_setup/_check.py,sha256=28PcG8Kp6OpjSLSi1r2boL2Ryeh6xkaCL87HFbjs6GA,129
4
4
  lamindb_setup/_check_setup.py,sha256=cNEL9Q4yPpmEkGKHH8JgullWl1VUZwALJ4RHn9wZypY,2613
@@ -8,15 +8,16 @@ lamindb_setup/_delete.py,sha256=Y8KSFYgY0UHAvjd7cCL6hZ_XiLeJwx50BguVATcj_Xo,5524
8
8
  lamindb_setup/_django.py,sha256=EoyWvFzH0i9wxjy4JZhcoXCTckztP_Mrl6FbYQnMmLE,1534
9
9
  lamindb_setup/_exportdb.py,sha256=uTIZjKKTB7arzEr1j0O6lONiT2pRBKeOFdLvOV8ZwzE,2120
10
10
  lamindb_setup/_importdb.py,sha256=yYYShzUajTsR-cTW4CZ-UNDWZY2uE5PAgNbp-wn8Ogc,1874
11
- lamindb_setup/_init_instance.py,sha256=lR-6txbf3Z2O7ki-DMZWFg36QNiUZ_B5lc6JXjScdus,11897
12
- lamindb_setup/_migrate.py,sha256=4nBTFg5-BK4A2gH-D3_tcFf8EtvMnIo5Mq0e_C6_9-U,8815
11
+ lamindb_setup/_init_instance.py,sha256=Cji3h2kCj8Meukkm8Btl8Lu4Jio4NiWLlzAAKukEzYY,11896
12
+ lamindb_setup/_migrate.py,sha256=49xPkwU-QQjpq0xSUepiVfvRsMhmjfbRlhd72YIq8o8,9059
13
13
  lamindb_setup/_register_instance.py,sha256=Jeu0wyvJVSVQ_n-A_7yn7xOZIP0ncJD92DRABqzPIjA,940
14
14
  lamindb_setup/_schema.py,sha256=b3uzhhWpV5mQtDwhMINc2MabGCnGLESy51ito3yl6Wc,679
15
+ lamindb_setup/_schema_metadata.py,sha256=G3yXJ46OkVGqHxccCgjvQnnBNKg1uhwxXB1CddlkwYw,16882
15
16
  lamindb_setup/_set_managed_storage.py,sha256=mNZrANn-9rwZ0oGWxxg0wS0T0VOQCWyo4nSSyNAE15Q,1419
16
17
  lamindb_setup/_setup_user.py,sha256=6Oc7Rke-yRQSZbuntdUAz8QbJ6UuPzYHI9FnYlf_q-A,3670
17
18
  lamindb_setup/_silence_loggers.py,sha256=AKF_YcHvX32eGXdsYK8MJlxEaZ-Uo2f6QDRzjKFCtws,1568
18
19
  lamindb_setup/core/__init__.py,sha256=dV9S-rQpNK9JcBn4hiEmiLnmNqfpPFJD9pqagMCaIew,416
19
- lamindb_setup/core/_aws_credentials.py,sha256=nK04-lNYz6MYDgD6Z56peoxCULZ82uFDRMzjwsPh25U,5293
20
+ lamindb_setup/core/_aws_credentials.py,sha256=uKMQO9q42Hnepz8aj3RxwLKDWUJx8pNOYrFnnNh5X40,5325
20
21
  lamindb_setup/core/_aws_storage.py,sha256=nEjeUv4xUVpoV0Lx-zjjmyb9w804bDyaeiM-OqbfwM0,1799
21
22
  lamindb_setup/core/_deprecated.py,sha256=3qxUI1dnDlSeR0BYrv7ucjqRBEojbqotPgpShXs4KF8,2520
22
23
  lamindb_setup/core/_docs.py,sha256=3k-YY-oVaJd_9UIY-LfBg_u8raKOCNfkZQPA73KsUhs,276
@@ -25,20 +26,20 @@ lamindb_setup/core/_hub_core.py,sha256=RGjTqf1owuWmkXAYy0EPaoHAaJ-0T0hAidkqa3cId
25
26
  lamindb_setup/core/_hub_crud.py,sha256=b1XF7AJpM9Q-ttm9nPG-r3OTRWHQaGzAGIyvmb83NTo,4859
26
27
  lamindb_setup/core/_hub_utils.py,sha256=b_M1LkdCjiMWm1EOlSb9GuPdLijwVgQDtATTpeZuXI0,1875
27
28
  lamindb_setup/core/_settings.py,sha256=jjZ_AxRXB3Y3UP6m04BAw_dhFbJbdg2-nZWmEv2LNZ8,3141
28
- lamindb_setup/core/_settings_instance.py,sha256=kda3kqUtwh-XZMDmbdFIp8RGGFw0Az8T8g2SKEU65mo,16949
29
+ lamindb_setup/core/_settings_instance.py,sha256=O9TtGijSJCXMREePegrxlQmHzKJFbgpC8yrlOe4BwJo,16920
29
30
  lamindb_setup/core/_settings_load.py,sha256=NGgCDpN85j1EqoKlrYFIlZBMlBJm33gx2-wc96CP_ZQ,3922
30
31
  lamindb_setup/core/_settings_save.py,sha256=d1A-Ex-7H08mb8l7I0Oe0j0GilrfaDuprh_NMxhQAsQ,2704
31
- lamindb_setup/core/_settings_storage.py,sha256=7f0jt1zcSltpOYDPQ5CVvbBon_d7aneKTte935-2REY,13236
32
+ lamindb_setup/core/_settings_storage.py,sha256=k4XyJR6_KpUpQuBYZp4mEdABiT91gTTfbK7tAVwqZCA,13093
32
33
  lamindb_setup/core/_settings_store.py,sha256=dagS5c7wAMRnuZTRfCU4sKaIOyF_HwAP5Fnnn8vphno,2084
33
34
  lamindb_setup/core/_settings_user.py,sha256=P2lC4WDRAFfT-Xq3MlXJ-wMKIHCoGNhMTQfRGIAyUNQ,1344
34
- lamindb_setup/core/_setup_bionty_sources.py,sha256=OgPpZxN2_Wffy-ogEBz_97c_k8d2bD-DDVt89-u9GLY,3002
35
+ lamindb_setup/core/_setup_bionty_sources.py,sha256=h_pBANsSGK6ujAFsG21mtADHVJoMLKDR4eGgRP4Fgls,3072
35
36
  lamindb_setup/core/cloud_sqlite_locker.py,sha256=NIBNAGq7TTRrip9OzMdiQKj8QOuwhL9esyM0aehUqBA,6893
36
37
  lamindb_setup/core/django.py,sha256=QUQm3zt5QIiD8uv6o9vbSm_bshqiSWzKSkgD3z2eJCg,3542
37
38
  lamindb_setup/core/exceptions.py,sha256=eoI7AXgATgDVzgArtN7CUvpaMUC067vsBg5LHCsWzDM,305
38
39
  lamindb_setup/core/hashing.py,sha256=7r96h5JBzuwfOR_gNNqTyWNPKMuiOUfBYwn6sCbZkf8,2269
39
40
  lamindb_setup/core/types.py,sha256=bcYnZ0uM_2NXKJCl94Mmc-uYrQlRUUVKG3sK2N-F-N4,532
40
- lamindb_setup/core/upath.py,sha256=QnAiaOZgT1TLUaX0PEs9dSJ0E4ZDD431hCfKrJIbmqQ,26339
41
- lamindb_setup-0.72.2.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
42
- lamindb_setup-0.72.2.dist-info/WHEEL,sha256=Sgu64hAMa6g5FdzHxXv9Xdse9yxpGGMeagVtPMWpJQY,99
43
- lamindb_setup-0.72.2.dist-info/METADATA,sha256=oeLIvcLxoZcHsOT7yDplKrq89sW5elwq2TV9CpZWY6M,1620
44
- lamindb_setup-0.72.2.dist-info/RECORD,,
41
+ lamindb_setup/core/upath.py,sha256=dwudkTVsXuyjS-2xR16WomcWtXJAEfRZ0ZzFq8_EDhE,27157
42
+ lamindb_setup-0.73.1.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
43
+ lamindb_setup-0.73.1.dist-info/WHEEL,sha256=Sgu64hAMa6g5FdzHxXv9Xdse9yxpGGMeagVtPMWpJQY,99
44
+ lamindb_setup-0.73.1.dist-info/METADATA,sha256=7iNq1IHqO4W4cHuOXPr_4wG7SxruZvn8MTBHZ_fRNM0,1620
45
+ lamindb_setup-0.73.1.dist-info/RECORD,,