lamindb_setup 0.81.4__py3-none-any.whl → 1.0a5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -232,21 +232,21 @@ class _ModelHandler:
232
232
  return related_fields
233
233
 
234
234
  def _get_field_metadata(self, model, field: Field):
235
- from lnschema_core.models import LinkORM
235
+ from lamindb.models import LinkORM
236
236
 
237
237
  internal_type = field.get_internal_type()
238
238
  model_name = field.model._meta.model_name
239
239
  relation_type = self._get_relation_type(model, field)
240
240
  if field.related_model is None:
241
- schema_name = field.model.__get_schema_name__()
241
+ schema_name = field.model.__get_module_name__()
242
242
  related_model_name = None
243
243
  related_schema_name = None
244
244
  related_field_name = None
245
245
  field_name = field.name
246
246
  else:
247
247
  related_model_name = field.related_model._meta.model_name
248
- related_schema_name = field.related_model.__get_schema_name__()
249
- schema_name = field.model.__get_schema_name__()
248
+ related_schema_name = field.related_model.__get_module_name__()
249
+ schema_name = field.model.__get_module_name__()
250
250
  related_field_name = field.remote_field.name
251
251
  field_name = field.name
252
252
 
@@ -273,14 +273,16 @@ class _ModelHandler:
273
273
  through = self._get_through(field)
274
274
 
275
275
  return FieldMetadata(
276
- schema_name=schema_name,
276
+ schema_name=schema_name if schema_name != "lamindb" else "core",
277
277
  model_name=model_name,
278
278
  field_name=field_name,
279
279
  type=internal_type,
280
280
  is_link_table=issubclass(field.model, LinkORM),
281
281
  column_name=column,
282
282
  relation_type=relation_type,
283
- related_schema_name=related_schema_name,
283
+ related_schema_name=related_schema_name
284
+ if related_schema_name != "lamindb"
285
+ else "core",
284
286
  related_model_name=related_model_name,
285
287
  related_field_name=related_field_name,
286
288
  through=through,
@@ -288,7 +290,7 @@ class _ModelHandler:
288
290
 
289
291
  @staticmethod
290
292
  def _get_through_many_to_many(field_or_rel: ManyToManyField | ManyToManyRel):
291
- from lnschema_core.models import Registry
293
+ from lamindb.models import Registry
292
294
 
293
295
  if isinstance(field_or_rel, ManyToManyField):
294
296
  if field_or_rel.model != Registry:
@@ -360,12 +362,12 @@ class _ModelHandler:
360
362
 
361
363
  class _SchemaHandler:
362
364
  def __init__(self) -> None:
363
- self.included_modules = ["core"] + list(settings.instance.schema)
365
+ self.included_modules = ["core"] + list(settings.instance.modules)
364
366
  self.modules = self._get_modules_metadata()
365
367
 
366
368
  def to_dict(self, include_django_objects: bool = True):
367
369
  return {
368
- module_name: {
370
+ module_name if module_name != "lamindb" else "core": {
369
371
  model_name: model.to_dict(include_django_objects)
370
372
  for model_name, model in module.items()
371
373
  }
@@ -376,7 +378,7 @@ class _SchemaHandler:
376
378
  return self.to_dict(include_django_objects=False)
377
379
 
378
380
  def _get_modules_metadata(self):
379
- from lnschema_core.models import Record, Registry
381
+ from lamindb.models import Record, Registry
380
382
 
381
383
  all_models = {
382
384
  module_name: {
@@ -389,7 +391,7 @@ class _SchemaHandler:
389
391
  if model.__class__ is Registry
390
392
  and model is not Record
391
393
  and not model._meta.abstract
392
- and model.__get_schema_name__() == module_name
394
+ and model.__get_module_name__() == module_name
393
395
  }
394
396
  for module_name in self.included_modules
395
397
  }
@@ -401,6 +403,8 @@ class _SchemaHandler:
401
403
  module_set_info = []
402
404
  for module_name in self.included_modules:
403
405
  module = self._get_schema_module(module_name)
406
+ if module_name == "lamindb":
407
+ module_name = "core"
404
408
  module_set_info.append(
405
409
  {"id": 0, "name": module_name, "version": module.__version__}
406
410
  )
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import os
4
4
  import time
5
5
 
6
+ from lamin_utils import logger
6
7
  from upath.implementations.cloud import S3Path
7
8
 
8
9
  HOSTED_REGIONS = [
@@ -40,8 +41,15 @@ class AWSCredentialsManager:
40
41
 
41
42
  # this is cached so will be resued with the connection initialized
42
43
  fs = S3FileSystem(cache_regions=True)
43
- fs.connect()
44
- self.anon: bool = fs.session._credentials is None
44
+ try:
45
+ fs.connect()
46
+ self.anon: bool = fs.session._credentials is None
47
+ except Exception as e:
48
+ logger.warning(
49
+ f"There is a problem with your default AWS Credentials: {e}\n"
50
+ "`anon` mode will be used for all non-managed buckets."
51
+ )
52
+ self.anon = True
45
53
  self.anon_public: bool | None = None
46
54
  if not self.anon:
47
55
  try:
@@ -30,23 +30,30 @@ from functools import wraps
30
30
  def deprecated(new_name: str):
31
31
  """Deprecated.
32
32
 
33
- This is a decorator which can be used to mark functions
33
+ This is a decorator which can be used to mark functions, methods and properties
34
34
  as deprecated. It will result in a warning being emitted
35
35
  when the function is used.
36
+
37
+ It will also hide the function from the docs.
38
+
39
+ Example::
40
+
41
+ @property
42
+ @deprecated("n_files")
43
+ def n_objects(self) -> int:
44
+ return self.n_files
45
+
36
46
  """
37
47
 
38
48
  def decorator(func):
39
49
  @wraps(func)
40
50
  def new_func(*args, **kwargs):
41
- # turn off filter
42
- warnings.simplefilter("always", DeprecationWarning)
43
51
  warnings.warn(
44
52
  f"Use {new_name} instead of {func.__name__}, "
45
53
  f"{func.__name__} will be removed in the future.",
46
- category=DeprecationWarning,
54
+ category=FutureWarning,
47
55
  stacklevel=2,
48
56
  )
49
- warnings.simplefilter("default", DeprecationWarning) # reset filter
50
57
  return func(*args, **kwargs)
51
58
 
52
59
  setattr(new_func, "__deprecated", True)
@@ -233,11 +233,11 @@ def _delete_instance(
233
233
  )
234
234
  if require_empty:
235
235
  for storage_record in storage_records:
236
+ root_string: str = storage_record["root"] # type: ignore
236
237
  account_for_sqlite_file = (
237
238
  instance_with_storage["db_scheme"] is None
238
- and instance_with_storage["storage"]["root"] == storage_record["root"]
239
+ and instance_with_storage["storage"]["root"] == root_string
239
240
  )
240
- root_string = storage_record["root"]
241
241
  # gate storage and instance deletion on empty storage location for
242
242
  # normally auth.get_session() doesn't have access_token
243
243
  # so this block is useless i think (Sergei)
@@ -144,17 +144,14 @@ def select_collaborator(
144
144
  def select_default_storage_by_instance_id(
145
145
  instance_id: str, client: Client
146
146
  ) -> dict | None:
147
- try:
148
- data = (
149
- client.table("storage")
150
- .select("*")
151
- .eq("instance_id", instance_id)
152
- .eq("is_default", True)
153
- .execute()
154
- .data
155
- )
156
- except Exception:
157
- return None
147
+ data = (
148
+ client.table("storage")
149
+ .select("*")
150
+ .eq("instance_id", instance_id)
151
+ .eq("is_default", True)
152
+ .execute()
153
+ .data
154
+ )
158
155
  if len(data) == 0:
159
156
  return None
160
157
  return data[0]
@@ -7,15 +7,6 @@ from pydantic import BaseModel, Field, GetCoreSchemaHandler
7
7
  from pydantic_core import CoreSchema, core_schema
8
8
 
9
9
 
10
- def validate_schema_arg(schema: str | None = None) -> str:
11
- if schema is None or schema == "":
12
- return ""
13
- # currently no actual validation, can add back if we see a need
14
- # the following just strips white spaces
15
- to_be_validated = [s.strip() for s in schema.split(",")]
16
- return ",".join(to_be_validated)
17
-
18
-
19
10
  def validate_db_arg(db: str | None) -> None:
20
11
  if db is not None:
21
12
  LaminDsnModel(db=db)
@@ -164,7 +164,7 @@ class SetupSettings:
164
164
 
165
165
  @property
166
166
  def paths(self) -> type[SetupPaths]:
167
- """Convert cloud paths to lamidb local paths.
167
+ """Convert cloud paths to lamindb local paths.
168
168
 
169
169
  Use `settings.paths.cloud_to_local_no_update`
170
170
  or `settings.paths.cloud_to_local`.
@@ -179,7 +179,7 @@ class SetupSettings:
179
179
  repr = self.user.__repr__()
180
180
  repr += f"\nAuto-connect in Python: {self.auto_connect}\n"
181
181
  repr += f"Private Django API: {self.private_django_api}\n"
182
- repr += f"Cache directory: {self.cache_dir}\n"
182
+ repr += f"Cache directory: {self.cache_dir.as_posix()}\n"
183
183
  if self._instance_exists:
184
184
  repr += self.instance.__repr__()
185
185
  else:
@@ -8,6 +8,7 @@ from typing import TYPE_CHECKING, Literal
8
8
  from django.db.utils import ProgrammingError
9
9
  from lamin_utils import logger
10
10
 
11
+ from ._deprecated import deprecated
11
12
  from ._hub_client import call_with_fallback
12
13
  from ._hub_crud import select_account_handle_name_by_lnid
13
14
  from ._hub_utils import LaminDsn, LaminDsnModel
@@ -53,7 +54,7 @@ class InstanceSettings:
53
54
  keep_artifacts_local: bool = False, # default to local storage
54
55
  uid: str | None = None, # instance uid/lnid
55
56
  db: str | None = None, # DB URI
56
- schema: str | None = None, # comma-separated string of schema names
57
+ modules: str | None = None, # comma-separated string of module names
57
58
  git_repo: str | None = None, # a git repo URL
58
59
  is_on_hub: bool | None = None, # initialized from hub
59
60
  api_url: str | None = None,
@@ -69,7 +70,7 @@ class InstanceSettings:
69
70
  self._storage: StorageSettings = storage
70
71
  validate_db_arg(db)
71
72
  self._db: str | None = db
72
- self._schema_str: str | None = schema
73
+ self._schema_str: str | None = modules
73
74
  self._git_repo = None if git_repo is None else sanitize_git_repo_url(git_repo)
74
75
  # local storage
75
76
  self._keep_artifacts_local = keep_artifacts_local
@@ -84,7 +85,7 @@ class InstanceSettings:
84
85
  def __repr__(self):
85
86
  """Rich string representation."""
86
87
  representation = f"Current instance: {self.slug}"
87
- attrs = ["owner", "name", "storage", "db", "schema", "git_repo"]
88
+ attrs = ["owner", "name", "storage", "db", "modules", "git_repo"]
88
89
  for attr in attrs:
89
90
  value = getattr(self, attr)
90
91
  if attr == "storage":
@@ -121,7 +122,7 @@ class InstanceSettings:
121
122
  def _search_local_root(
122
123
  self, local_root: str | None = None, mute_warning: bool = False
123
124
  ) -> StorageSettings | None:
124
- from lnschema_core.models import Storage
125
+ from lamindb.models import Storage
125
126
 
126
127
  if local_root is not None:
127
128
  local_records = Storage.objects.filter(root=local_root)
@@ -271,12 +272,20 @@ class InstanceSettings:
271
272
  return hash_and_encode_as_b62(self._id.hex)[:12]
272
273
 
273
274
  @property
274
- def schema(self) -> set[str]:
275
- """Schema modules in addition to core schema."""
275
+ def modules(self) -> set[str]:
276
+ """The set of modules that defines the database schema.
277
+
278
+ The core schema contained in lamindb is not included in this set.
279
+ """
276
280
  if self._schema_str is None:
277
281
  return {} # type: ignore
278
282
  else:
279
- return {schema for schema in self._schema_str.split(",") if schema != ""}
283
+ return {module for module in self._schema_str.split(",") if module != ""}
284
+
285
+ @property
286
+ @deprecated("modules")
287
+ def schema(self) -> set[str]:
288
+ return self.modules
280
289
 
281
290
  @property
282
291
  def _sqlite_file(self) -> UPath:
@@ -358,7 +367,7 @@ class InstanceSettings:
358
367
  sqlite_filepath = self.storage.cloud_to_local(
359
368
  self._sqlite_file, error_no_origin=False
360
369
  )
361
- return f"sqlite:///{sqlite_filepath}"
370
+ return f"sqlite:///{sqlite_filepath.as_posix()}"
362
371
  else:
363
372
  return self._db
364
373
 
@@ -457,11 +466,24 @@ class InstanceSettings:
457
466
  settings._instance_settings = self
458
467
 
459
468
  def _init_db(self):
469
+ from lamindb_setup import _check_setup
470
+
460
471
  from .django import setup_django
461
472
 
473
+ _check_setup.IS_LOADING = True
462
474
  setup_django(self, init=True)
475
+ _check_setup.IS_LOADING = False
476
+
477
+ from lamindb.models import Space
478
+
479
+ Space.objects.get_or_create(
480
+ name="All",
481
+ description="Every team & user with access to the instance has access.",
482
+ )
463
483
 
464
484
  def _load_db(self) -> tuple[bool, str]:
485
+ from lamindb_setup import _check_setup
486
+
465
487
  # Is the database available and initialized as LaminDB?
466
488
  # returns a tuple of status code and message
467
489
  if self.dialect == "sqlite" and not self._sqlite_file.exists():
@@ -472,7 +494,6 @@ class InstanceSettings:
472
494
  f" {legacy_file} to {self._sqlite_file}"
473
495
  )
474
496
  return False, f"SQLite file {self._sqlite_file} does not exist"
475
- from lamindb_setup import settings # to check user
476
497
 
477
498
  from .django import setup_django
478
499
 
@@ -481,5 +502,7 @@ class InstanceSettings:
481
502
  # setting up django also performs a check for migrations & prints them
482
503
  # as warnings
483
504
  # this should fail, e.g., if the db is not reachable
505
+ _check_setup.IS_LOADING = True
484
506
  setup_django(self)
507
+ _check_setup.IS_LOADING = False
485
508
  return True, ""
@@ -98,7 +98,7 @@ def setup_instance_from_store(store: InstanceSettingsStore) -> InstanceSettings:
98
98
  name=store.name,
99
99
  storage=ssettings,
100
100
  db=_null_to_value(store.db),
101
- schema=_null_to_value(store.schema_str),
101
+ modules=_null_to_value(store.schema_str),
102
102
  git_repo=_null_to_value(store.git_repo),
103
103
  keep_artifacts_local=store.keep_artifacts_local, # type: ignore
104
104
  )
@@ -230,7 +230,7 @@ class StorageSettings:
230
230
  """Storage record in the current instance."""
231
231
  if self._record is None:
232
232
  # dynamic import because of import order
233
- from lnschema_core.models import Storage
233
+ from lamindb.models import Storage
234
234
 
235
235
  from ._settings import settings
236
236
 
@@ -48,7 +48,7 @@ class UserSettings:
48
48
  @property
49
49
  def id(self):
50
50
  """Integer id valid in current intance."""
51
- from lnschema_core.users import current_user_id
51
+ from lamindb.base.users import current_user_id
52
52
 
53
53
  # there is no cache needed here because current_user_id()
54
54
  # has its own cache
@@ -17,7 +17,7 @@ RENAME = {"name": "source", "description": "source_name"}
17
17
 
18
18
  def write_bionty_sources(isettings: InstanceSettings) -> None:
19
19
  """Write bionty sources to Source table."""
20
- if "bionty" not in isettings.schema:
20
+ if "bionty" not in isettings.modules:
21
21
  return None
22
22
  import shutil
23
23
 
@@ -79,7 +79,7 @@ def load_bionty_sources(isettings: InstanceSettings | None = None):
79
79
  # not setting up bionty sources
80
80
  return None
81
81
  if isettings is not None:
82
- if "bionty" not in isettings.schema:
82
+ if "bionty" not in isettings.modules:
83
83
  # no need to setup anything
84
84
  return None
85
85
 
@@ -96,7 +96,7 @@ def load_bionty_sources(isettings: InstanceSettings | None = None):
96
96
  for kwargs in active_records:
97
97
  for db_field, base_col in RENAME.items():
98
98
  kwargs[base_col] = kwargs.pop(db_field)
99
- # TODO: non-bionty schema?
99
+ # TODO: non-bionty modules?
100
100
  kwargs["entity"] = kwargs["entity"].replace("bionty.", "")
101
101
  write_yaml(
102
102
  parse_currently_used_sources(active_records),
@@ -5,10 +5,8 @@ import builtins
5
5
  import os
6
6
  from pathlib import Path
7
7
  import time
8
- from lamin_utils import logger
9
- from ._settings_store import current_instance_settings_file
10
8
  from ._settings_instance import InstanceSettings
11
- import sys
9
+
12
10
 
13
11
  IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
14
12
  IS_SETUP = False
@@ -54,14 +52,14 @@ def setup_django(
54
52
  }
55
53
  from .._init_instance import get_schema_module_name
56
54
 
57
- schema_names = ["core"] + list(isettings.schema)
55
+ module_names = ["core"] + list(isettings.modules)
58
56
  raise_import_error = True if init else False
59
57
  installed_apps = [
60
58
  package_name
61
- for n in schema_names
59
+ for name in module_names
62
60
  if (
63
61
  package_name := get_schema_module_name(
64
- n, raise_import_error=raise_import_error
62
+ name, raise_import_error=raise_import_error
65
63
  )
66
64
  )
67
65
  is not None
@@ -115,7 +113,9 @@ def setup_django(
115
113
  elif init:
116
114
  global IS_MIGRATING
117
115
  IS_MIGRATING = True
118
- call_command("migrate", verbosity=0)
116
+ call_command(
117
+ "migrate", verbosity=2
118
+ ) # may change back to verbosity 0 in the future
119
119
  IS_MIGRATING = False
120
120
 
121
121
  global IS_SETUP
@@ -12,6 +12,7 @@ from __future__ import annotations
12
12
 
13
13
  import base64
14
14
  import hashlib
15
+ import json
15
16
  from concurrent.futures import ThreadPoolExecutor
16
17
  from typing import TYPE_CHECKING
17
18
 
@@ -40,11 +41,21 @@ def b16_to_b64(s: str):
40
41
  return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
41
42
 
42
43
 
44
+ def hash_string(string: str) -> str:
45
+ # as we're truncating (not here) at 22 b64, we choose md5 over sha512
46
+ return to_b64_str(hashlib.md5(string.encode("utf-8")).digest())
47
+
48
+
43
49
  # a lot to read about this: lamin-notes/2022/hashing
44
50
  def hash_set(s: set[str]) -> str:
45
- bstr = ":".join(sorted(s)).encode("utf-8")
46
- # as we're truncating at 22 b64, we choose md5 over sha512
47
- return to_b64_str(hashlib.md5(bstr).digest())[:HASH_LENGTH]
51
+ join_s = ":".join(sorted(s))
52
+ return hash_string(join_s)[:HASH_LENGTH]
53
+
54
+
55
+ def hash_dict(d: dict) -> str:
56
+ return to_b64_str(hashlib.md5(json.dumps(d, sort_keys=True).encode()).digest())[
57
+ :HASH_LENGTH
58
+ ]
48
59
 
49
60
 
50
61
  def hash_from_hashes_list(hashes: Iterable[str]) -> str:
@@ -111,6 +122,6 @@ def hash_dir(path: Path):
111
122
  hashes, sizes = zip(*hashes_sizes)
112
123
 
113
124
  hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
114
- n_objects = len(hashes)
125
+ n_files = len(hashes)
115
126
  size = sum(sizes)
116
- return size, hash, hash_type, n_objects
127
+ return size, hash, hash_type, n_files