lamindb_setup 1.9.1__py3-none-any.whl → 1.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. lamindb_setup/__init__.py +107 -107
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check_setup.py +192 -166
  4. lamindb_setup/_connect_instance.py +415 -328
  5. lamindb_setup/_delete.py +144 -141
  6. lamindb_setup/_disconnect.py +35 -32
  7. lamindb_setup/_init_instance.py +430 -440
  8. lamindb_setup/_migrate.py +278 -266
  9. lamindb_setup/_register_instance.py +32 -35
  10. lamindb_setup/_schema_metadata.py +441 -441
  11. lamindb_setup/_set_managed_storage.py +69 -70
  12. lamindb_setup/_setup_user.py +172 -133
  13. lamindb_setup/core/__init__.py +21 -21
  14. lamindb_setup/core/_aws_options.py +223 -223
  15. lamindb_setup/core/_aws_storage.py +9 -1
  16. lamindb_setup/core/_hub_client.py +248 -248
  17. lamindb_setup/core/_hub_core.py +728 -665
  18. lamindb_setup/core/_hub_crud.py +227 -227
  19. lamindb_setup/core/_private_django_api.py +83 -83
  20. lamindb_setup/core/_settings.py +384 -377
  21. lamindb_setup/core/_settings_instance.py +577 -569
  22. lamindb_setup/core/_settings_load.py +141 -141
  23. lamindb_setup/core/_settings_save.py +95 -95
  24. lamindb_setup/core/_settings_storage.py +427 -429
  25. lamindb_setup/core/_settings_store.py +91 -91
  26. lamindb_setup/core/_settings_user.py +55 -55
  27. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  28. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  29. lamindb_setup/core/django.py +315 -305
  30. lamindb_setup/core/exceptions.py +1 -1
  31. lamindb_setup/core/hashing.py +134 -134
  32. lamindb_setup/core/types.py +1 -1
  33. lamindb_setup/core/upath.py +1013 -1013
  34. lamindb_setup/errors.py +80 -70
  35. lamindb_setup/types.py +20 -20
  36. {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/METADATA +3 -3
  37. lamindb_setup-1.10.0.dist-info/RECORD +50 -0
  38. lamindb_setup-1.9.1.dist-info/RECORD +0 -50
  39. {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/LICENSE +0 -0
  40. {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/WHEEL +0 -0
@@ -1,569 +1,577 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import shutil
5
- from pathlib import Path
6
- from typing import TYPE_CHECKING, Literal
7
-
8
- from django.db.utils import ProgrammingError
9
- from lamin_utils import logger
10
-
11
- from ._deprecated import deprecated
12
- from ._hub_client import call_with_fallback
13
- from ._hub_crud import select_account_handle_name_by_lnid
14
- from ._hub_utils import LaminDsn, LaminDsnModel
15
- from ._settings_save import save_instance_settings
16
- from ._settings_storage import (
17
- LEGACY_STORAGE_UID_FILE_KEY,
18
- STORAGE_UID_FILE_KEY,
19
- StorageSettings,
20
- init_storage,
21
- instance_uid_from_uuid,
22
- )
23
- from ._settings_store import current_instance_settings_file, instance_settings_file
24
- from .cloud_sqlite_locker import (
25
- EXPIRATION_TIME,
26
- InstanceLockedException,
27
- )
28
- from .upath import LocalPathClasses, UPath
29
-
30
- if TYPE_CHECKING:
31
- from uuid import UUID
32
-
33
- from ._settings_user import UserSettings
34
-
35
- LOCAL_STORAGE_MESSAGE = "No local storage location found in current environment: defaulting to cloud storage"
36
-
37
-
38
- def sanitize_git_repo_url(repo_url: str) -> str:
39
- assert repo_url.startswith("https://")
40
- return repo_url.replace(".git", "")
41
-
42
-
43
- def is_local_db_url(db_url: str) -> bool:
44
- if "@localhost:" in db_url:
45
- return True
46
- if "@0.0.0.0:" in db_url:
47
- return True
48
- if "@127.0.0.1" in db_url:
49
- return True
50
- return False
51
-
52
-
53
- class InstanceSettings:
54
- """Instance settings."""
55
-
56
- def __init__(
57
- self,
58
- id: UUID, # instance id/uuid
59
- owner: str, # owner handle
60
- name: str, # instance name
61
- storage: StorageSettings, # storage location
62
- keep_artifacts_local: bool = False, # default to local storage
63
- uid: str | None = None, # instance uid/lnid
64
- db: str | None = None, # DB URI
65
- modules: str | None = None, # comma-separated string of module names
66
- git_repo: str | None = None, # a git repo URL
67
- is_on_hub: bool | None = None, # initialized from hub
68
- api_url: str | None = None,
69
- schema_id: UUID | None = None,
70
- fine_grained_access: bool = False,
71
- db_permissions: str | None = None,
72
- _locker_user: UserSettings | None = None, # user to lock for if cloud sqlite
73
- ):
74
- from ._hub_utils import validate_db_arg
75
-
76
- self._id_: UUID = id
77
- self._owner: str = owner
78
- self._name: str = name
79
- self._uid: str | None = uid
80
- self._storage: StorageSettings = storage
81
- validate_db_arg(db)
82
- self._db: str | None = db
83
- self._schema_str: str | None = modules
84
- self._git_repo = None if git_repo is None else sanitize_git_repo_url(git_repo)
85
- # local storage
86
- self._keep_artifacts_local = keep_artifacts_local
87
- self._local_storage: StorageSettings | None = None
88
- self._is_on_hub = is_on_hub
89
- # private, needed for api requests
90
- self._api_url = api_url
91
- self._schema_id = schema_id
92
- # private, whether fine grained access is used
93
- # needed to be set to request jwt etc
94
- self._fine_grained_access = fine_grained_access
95
- # permissions for db such as jwt, read, write etc.
96
- self._db_permissions = db_permissions
97
- # if None then settings.user is used
98
- self._locker_user = _locker_user
99
-
100
- def __repr__(self):
101
- """Rich string representation."""
102
- representation = "Current instance:"
103
- attrs = ["slug", "storage", "db", "modules", "git_repo"]
104
- for attr in attrs:
105
- value = getattr(self, attr)
106
- if attr == "storage":
107
- if self.keep_artifacts_local:
108
- import lamindb as ln
109
-
110
- self._local_storage = ln.setup.settings.instance._local_storage
111
- if self._local_storage is not None:
112
- value_local = self.local_storage
113
- representation += f"\n - local storage: {value_local.root_as_str} ({value_local.region})"
114
- representation += (
115
- f"\n - cloud storage: {value.root_as_str} ({value.region})"
116
- )
117
- else:
118
- representation += (
119
- f"\n - storage: {value.root_as_str} ({value.region})"
120
- )
121
- elif attr == "db":
122
- if self.dialect != "sqlite":
123
- model = LaminDsnModel(db=value)
124
- db_print = LaminDsn.build(
125
- scheme=model.db.scheme,
126
- user=model.db.user,
127
- password="***",
128
- host="***",
129
- port=model.db.port,
130
- database=model.db.database,
131
- )
132
- else:
133
- db_print = value
134
- representation += f"\n - {attr}: {db_print}"
135
- elif attr == "modules":
136
- representation += f"\n - {attr}: {value if value else '{}'}"
137
- else:
138
- representation += f"\n - {attr}: {value}"
139
- return representation
140
-
141
- @property
142
- def owner(self) -> str:
143
- """Instance owner. A user or organization account handle."""
144
- return self._owner
145
-
146
- @property
147
- def name(self) -> str:
148
- """Instance name."""
149
- return self._name
150
-
151
- def _search_local_root(
152
- self, local_root: str | None = None, mute_warning: bool = False
153
- ) -> StorageSettings | None:
154
- from lamindb.models import Storage
155
-
156
- if local_root is not None:
157
- local_records = Storage.objects.filter(root=local_root)
158
- else:
159
- # only search local managed storage locations (instance_uid=self.uid)
160
- local_records = Storage.objects.filter(type="local", instance_uid=self.uid)
161
- all_local_records = local_records.all()
162
- try:
163
- # trigger an error in case of a migration issue
164
- all_local_records.first()
165
- except ProgrammingError:
166
- logger.error("not able to load Storage registry: please migrate")
167
- return None
168
- found = []
169
- for record in all_local_records:
170
- root_path = Path(record.root)
171
- try:
172
- root_path_exists = root_path.exists()
173
- except PermissionError:
174
- continue
175
- if root_path_exists:
176
- marker_path = root_path / STORAGE_UID_FILE_KEY
177
- try:
178
- marker_path_exists = marker_path.exists()
179
- except PermissionError:
180
- continue
181
- if not marker_path_exists:
182
- legacy_filepath = root_path / LEGACY_STORAGE_UID_FILE_KEY
183
- if legacy_filepath.exists():
184
- logger.warning(
185
- f"found legacy marker file, renaming it from {legacy_filepath} to {marker_path}"
186
- )
187
- legacy_filepath.rename(marker_path)
188
- else:
189
- logger.warning(
190
- f"local storage location '{root_path}' is corrupted, cannot find marker file with storage uid"
191
- )
192
- continue
193
- try:
194
- uid = marker_path.read_text().splitlines()[0]
195
- except PermissionError:
196
- logger.warning(
197
- f"ignoring the following location because no permission to read it: {marker_path}"
198
- )
199
- continue
200
- if uid == record.uid:
201
- found.append(record)
202
- if found:
203
- if len(found) > 1:
204
- found_display = "\n - ".join([f"{record.root}" for record in found])
205
- logger.important(f"found locations:\n - {found_display}")
206
- record = found[0]
207
- logger.important(f"defaulting to local storage: {record.root}")
208
- return StorageSettings(record.root, region=record.region)
209
- elif not mute_warning:
210
- start = LOCAL_STORAGE_MESSAGE[0].lower()
211
- logger.warning(f"{start}{LOCAL_STORAGE_MESSAGE[1:]}")
212
- return None
213
-
214
- @property
215
- def keep_artifacts_local(self) -> bool:
216
- """Default to keeping artifacts local.
217
-
218
- Guide: :doc:`faq/keep-artifacts-local`
219
- """
220
- return self._keep_artifacts_local
221
-
222
- @keep_artifacts_local.setter
223
- def keep_artifacts_local(self, value: bool):
224
- if not isinstance(value, bool):
225
- raise ValueError("keep_artifacts_local must be a boolean value.")
226
- self._keep_artifacts_local = value
227
-
228
- @property
229
- def storage(self) -> StorageSettings:
230
- """Default storage of instance.
231
-
232
- For a cloud instance, this is cloud storage. For a local instance, this
233
- is a local directory.
234
- """
235
- return self._storage
236
-
237
- @property
238
- def local_storage(self) -> StorageSettings:
239
- """An alternative default local storage location in the current environment.
240
-
241
- Serves as the default storage location if :attr:`keep_artifacts_local` is enabled.
242
-
243
- Guide: :doc:`faq/keep-artifacts-local`
244
- """
245
- if not self.keep_artifacts_local:
246
- raise ValueError(
247
- "`keep_artifacts_local` is False, switch via: ln.setup.settings.instance.keep_artifacts_local = True"
248
- )
249
- if self._local_storage is None:
250
- self._local_storage = self._search_local_root()
251
- if self._local_storage is None:
252
- raise ValueError(LOCAL_STORAGE_MESSAGE)
253
- return self._local_storage
254
-
255
- @local_storage.setter
256
- def local_storage(self, local_root_host: tuple[Path | str, str]):
257
- from lamindb_setup._init_instance import register_storage_in_instance
258
-
259
- if not isinstance(local_root_host, tuple):
260
- local_root = local_root_host
261
- host = "unspecified-host"
262
- else:
263
- local_root, host = local_root_host
264
-
265
- local_root = Path(local_root)
266
- if not self.keep_artifacts_local:
267
- raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
268
- local_storage = self._search_local_root(
269
- local_root=StorageSettings(local_root).root_as_str, mute_warning=True
270
- )
271
- if local_storage is not None:
272
- # great, we're merely switching storage location
273
- self._local_storage = local_storage
274
- return None
275
- local_storage = self._search_local_root(mute_warning=True)
276
- if local_storage is not None:
277
- if os.getenv("LAMIN_TESTING") == "true":
278
- response = "y"
279
- else:
280
- response = input(
281
- "You already configured a local storage root for this instance in this"
282
- f" environment: {self.local_storage.root}\nDo you want to register another one? (y/n)"
283
- )
284
- if response != "y":
285
- return None
286
- if host == "unspecified-host":
287
- logger.warning(
288
- "setting local_storage with a single path is deprecated for creating storage locations"
289
- )
290
- logger.warning(
291
- "use this instead: ln.Storage(root='/dir/our_shared_dir', host='our-server-123').save()"
292
- )
293
- local_root = UPath(local_root)
294
- assert isinstance(local_root, LocalPathClasses)
295
- tentative_storage, hub_status = init_storage(
296
- local_root,
297
- instance_id=self._id,
298
- instance_slug=self.slug,
299
- register_hub=True,
300
- region=host,
301
- ) # type: ignore
302
- if hub_status in ["hub-record-created", "hub-record-retrieved"]:
303
- register_storage_in_instance(tentative_storage) # type: ignore
304
- self._local_storage = tentative_storage
305
- logger.important(
306
- f"defaulting to local storage: {self._local_storage.root} on host {host}"
307
- )
308
- else:
309
- logger.warning(f"could not set this local storage location: {local_root}")
310
-
311
- @property
312
- @deprecated("local_storage")
313
- def storage_local(self) -> StorageSettings:
314
- return self.local_storage
315
-
316
- @storage_local.setter
317
- @deprecated("local_storage")
318
- def storage_local(self, local_root_host: tuple[Path | str, str]):
319
- self.local_storage = local_root_host # type: ignore
320
-
321
- @property
322
- def slug(self) -> str:
323
- """Unique semantic identifier of form `"{account_handle}/{instance_name}"`."""
324
- return f"{self.owner}/{self.name}"
325
-
326
- @property
327
- def git_repo(self) -> str | None:
328
- """Sync transforms with scripts in git repository.
329
-
330
- Provide the full git repo URL.
331
- """
332
- return self._git_repo
333
-
334
- @property
335
- def _id(self) -> UUID:
336
- """The internal instance id."""
337
- return self._id_
338
-
339
- @property
340
- def uid(self) -> str:
341
- """The user-facing instance id."""
342
- return instance_uid_from_uuid(self._id)
343
-
344
- @property
345
- def modules(self) -> set[str]:
346
- """The set of modules that defines the database schema.
347
-
348
- The core schema contained in lamindb is not included in this set.
349
- """
350
- if self._schema_str is None:
351
- return set()
352
- else:
353
- return {module for module in self._schema_str.split(",") if module != ""}
354
-
355
- @property
356
- @deprecated("modules")
357
- def schema(self) -> set[str]:
358
- return self.modules
359
-
360
- @property
361
- def _sqlite_file(self) -> UPath:
362
- """SQLite file."""
363
- filepath = self.storage.root / ".lamindb/lamin.db"
364
- return filepath
365
-
366
- @property
367
- def _sqlite_file_local(self) -> Path:
368
- """Local SQLite file."""
369
- return self.storage.cloud_to_local_no_update(self._sqlite_file)
370
-
371
- def _update_cloud_sqlite_file(self, unlock_cloud_sqlite: bool = True) -> None:
372
- """Upload the local sqlite file to the cloud file."""
373
- if self._is_cloud_sqlite:
374
- sqlite_file = self._sqlite_file
375
- logger.warning(
376
- f"updating{' & unlocking' if unlock_cloud_sqlite else ''} cloud SQLite "
377
- f"'{sqlite_file}' of instance"
378
- f" '{self.slug}'"
379
- )
380
- cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
381
- sqlite_file.upload_from(cache_file, print_progress=True) # type: ignore
382
- cloud_mtime = sqlite_file.modified.timestamp() # type: ignore
383
- # this seems to work even if there is an open connection
384
- # to the cache file
385
- os.utime(cache_file, times=(cloud_mtime, cloud_mtime))
386
- if unlock_cloud_sqlite:
387
- self._cloud_sqlite_locker.unlock()
388
-
389
- def _update_local_sqlite_file(self, lock_cloud_sqlite: bool = True) -> None:
390
- """Download the cloud sqlite file if it is newer than local."""
391
- if self._is_cloud_sqlite:
392
- logger.warning(
393
- "updating local SQLite & locking cloud SQLite (sync back & unlock:"
394
- " lamin disconnect)"
395
- )
396
- if lock_cloud_sqlite:
397
- self._cloud_sqlite_locker.lock()
398
- self._check_sqlite_lock()
399
- sqlite_file = self._sqlite_file
400
- cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
401
- sqlite_file.synchronize_to(cache_file, print_progress=True) # type: ignore
402
-
403
- def _check_sqlite_lock(self):
404
- if not self._cloud_sqlite_locker.has_lock:
405
- locked_by = self._cloud_sqlite_locker._locked_by
406
- lock_msg = "Cannot load the instance, it is locked by "
407
- user_info = call_with_fallback(
408
- select_account_handle_name_by_lnid,
409
- lnid=locked_by,
410
- )
411
- if user_info is None:
412
- lock_msg += f"uid: '{locked_by}'."
413
- else:
414
- lock_msg += (
415
- f"'{user_info['handle']}' (uid: '{locked_by}', name:"
416
- f" '{user_info['name']}')."
417
- )
418
- lock_msg += (
419
- " The instance will be automatically unlocked after"
420
- f" {int(EXPIRATION_TIME/3600/24)}d of no activity."
421
- )
422
- raise InstanceLockedException(lock_msg)
423
-
424
- @property
425
- def db(self) -> str:
426
- """Database connection string (URI)."""
427
- if "LAMINDB_DJANGO_DATABASE_URL" in os.environ:
428
- logger.warning(
429
- "LAMINDB_DJANGO_DATABASE_URL env variable "
430
- f"is set to {os.environ['LAMINDB_DJANGO_DATABASE_URL']}. "
431
- "It overwrites all db connections and is used instead of `instance.db`."
432
- )
433
- if self._db is None:
434
- # here, we want the updated sqlite file
435
- # hence, we don't use self._sqlite_file_local()
436
- # error_no_origin=False because on instance init
437
- # the sqlite file is not yet in the cloud
438
- sqlite_filepath = self.storage.cloud_to_local(
439
- self._sqlite_file, error_no_origin=False
440
- )
441
- return f"sqlite:///{sqlite_filepath.as_posix()}"
442
- else:
443
- return self._db
444
-
445
- @property
446
- def dialect(self) -> Literal["sqlite", "postgresql"]:
447
- """SQL dialect."""
448
- if self._db is None or self._db.startswith("sqlite://"):
449
- return "sqlite"
450
- else:
451
- assert self._db.startswith("postgresql"), f"Unexpected DB value: {self._db}"
452
- return "postgresql"
453
-
454
- @property
455
- def _is_cloud_sqlite(self) -> bool:
456
- # can we make this a private property, Sergei?
457
- # as it's not relevant to the user
458
- """Is this a cloud instance with sqlite db."""
459
- return self.dialect == "sqlite" and self.storage.type_is_cloud
460
-
461
- @property
462
- def _cloud_sqlite_locker(self):
463
- # avoid circular import
464
- from .cloud_sqlite_locker import empty_locker, get_locker
465
-
466
- if self._is_cloud_sqlite:
467
- try:
468
- # if _locker_user is None then settings.user is used
469
- return get_locker(self, self._locker_user)
470
- except PermissionError:
471
- logger.warning("read-only access - did not access locker")
472
- return empty_locker
473
- else:
474
- return empty_locker
475
-
476
- @property
477
- def is_remote(self) -> bool:
478
- """Boolean indicating if an instance has no local component."""
479
- if not self.storage.type_is_cloud:
480
- return False
481
-
482
- if self.dialect == "postgresql":
483
- if is_local_db_url(self.db):
484
- return False
485
- # returns True for cloud SQLite
486
- # and remote postgres
487
- return True
488
-
489
- @property
490
- def is_on_hub(self) -> bool:
491
- """Is this instance on the hub?
492
-
493
- Can only reliably establish if user has access to the instance. Will
494
- return `False` in case the instance isn't found.
495
- """
496
- if self._is_on_hub is None:
497
- from ._hub_client import call_with_fallback_auth
498
- from ._hub_crud import select_instance_by_id
499
- from ._settings import settings
500
-
501
- if settings.user.handle != "anonymous":
502
- response = call_with_fallback_auth(
503
- select_instance_by_id, instance_id=self._id.hex
504
- )
505
- else:
506
- response = call_with_fallback(
507
- select_instance_by_id, instance_id=self._id.hex
508
- )
509
- logger.warning("calling anonymously, will miss private instances")
510
- if response is None:
511
- self._is_on_hub = False
512
- else:
513
- self._is_on_hub = True
514
- return self._is_on_hub
515
-
516
- def _get_settings_file(self) -> Path:
517
- return instance_settings_file(self.name, self.owner)
518
-
519
- def _persist(self, write_to_disk: bool = True) -> None:
520
- """Set these instance settings as the current instance.
521
-
522
- Args:
523
- write_to_disk: Save these instance settings to disk and
524
- overwrite the current instance settings file.
525
- """
526
- if write_to_disk:
527
- assert self.name is not None
528
- filepath = self._get_settings_file()
529
- # persist under filepath for later reference
530
- save_instance_settings(self, filepath)
531
- # persist under current file for auto load
532
- shutil.copy2(filepath, current_instance_settings_file())
533
- # persist under settings class for same session reference
534
- # need to import here to avoid circular import
535
- from ._settings import settings
536
-
537
- settings._instance_settings = self
538
-
539
- def _init_db(self):
540
- from lamindb_setup._check_setup import disable_auto_connect
541
-
542
- from .django import setup_django
543
-
544
- disable_auto_connect(setup_django)(self, init=True)
545
-
546
- def _load_db(self) -> tuple[bool, str]:
547
- # Is the database available and initialized as LaminDB?
548
- # returns a tuple of status code and message
549
- if self.dialect == "sqlite" and not self._sqlite_file.exists():
550
- legacy_file = self.storage.key_to_filepath(f"{self._id.hex}.lndb")
551
- if legacy_file.exists():
552
- logger.warning(
553
- f"The SQLite file is being renamed from {legacy_file} to {self._sqlite_file}"
554
- )
555
- legacy_file.rename(self._sqlite_file)
556
- else:
557
- return False, f"SQLite file {self._sqlite_file} does not exist"
558
- # we need the local sqlite to setup django
559
- self._update_local_sqlite_file()
560
- # setting up django also performs a check for migrations & prints them
561
- # as warnings
562
- # this should fail, e.g., if the db is not reachable
563
- from lamindb_setup._check_setup import disable_auto_connect
564
-
565
- from .django import setup_django
566
-
567
- disable_auto_connect(setup_django)(self)
568
-
569
- return True, ""
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Literal
7
+
8
+ from django.db.utils import ProgrammingError
9
+ from lamin_utils import logger
10
+
11
+ from ._deprecated import deprecated
12
+ from ._hub_client import call_with_fallback
13
+ from ._hub_crud import select_account_handle_name_by_lnid
14
+ from ._hub_utils import LaminDsn, LaminDsnModel
15
+ from ._settings_save import save_instance_settings
16
+ from ._settings_storage import (
17
+ LEGACY_STORAGE_UID_FILE_KEY,
18
+ STORAGE_UID_FILE_KEY,
19
+ StorageSettings,
20
+ get_storage_type,
21
+ init_storage,
22
+ instance_uid_from_uuid,
23
+ )
24
+ from ._settings_store import current_instance_settings_file, instance_settings_file
25
+ from .cloud_sqlite_locker import (
26
+ EXPIRATION_TIME,
27
+ InstanceLockedException,
28
+ )
29
+ from .upath import LocalPathClasses, UPath
30
+
31
+ if TYPE_CHECKING:
32
+ from uuid import UUID
33
+
34
+ from ._settings_user import UserSettings
35
+ from .types import UPathStr
36
+
37
+ LOCAL_STORAGE_MESSAGE = "No local storage location found in current environment: defaulting to cloud storage"
38
+
39
+
40
+ def sanitize_git_repo_url(repo_url: str) -> str:
41
+ assert repo_url.startswith("https://")
42
+ return repo_url.replace(".git", "")
43
+
44
+
45
+ def is_local_db_url(db_url: str) -> bool:
46
+ if "@localhost:" in db_url:
47
+ return True
48
+ if "@0.0.0.0:" in db_url:
49
+ return True
50
+ if "@127.0.0.1" in db_url:
51
+ return True
52
+ return False
53
+
54
+
55
+ def check_is_instance_remote(root: UPathStr, db: str | None) -> bool:
56
+ # returns True for cloud SQLite
57
+ # and remote postgres
58
+ root_str = str(root)
59
+ if not root_str.startswith("create-s3") and get_storage_type(root_str) == "local":
60
+ return False
61
+
62
+ if db is not None and is_local_db_url(db):
63
+ return False
64
+ return True
65
+
66
+
67
+ class InstanceSettings:
68
+ """Instance settings."""
69
+
70
+ def __init__(
71
+ self,
72
+ id: UUID, # instance id/uuid
73
+ owner: str, # owner handle
74
+ name: str, # instance name
75
+ storage: StorageSettings | None = None, # storage location
76
+ keep_artifacts_local: bool = False, # default to local storage
77
+ db: str | None = None, # DB URI
78
+ modules: str | None = None, # comma-separated string of module names
79
+ git_repo: str | None = None, # a git repo URL
80
+ is_on_hub: bool | None = None, # initialized from hub
81
+ api_url: str | None = None,
82
+ schema_id: UUID | None = None,
83
+ fine_grained_access: bool = False,
84
+ db_permissions: str | None = None,
85
+ _locker_user: UserSettings | None = None, # user to lock for if cloud sqlite
86
+ ):
87
+ from ._hub_utils import validate_db_arg
88
+
89
+ self._id_: UUID = id
90
+ self._owner: str = owner
91
+ self._name: str = name
92
+ self._storage: StorageSettings | None = storage
93
+ validate_db_arg(db)
94
+ self._db: str | None = db
95
+ self._schema_str: str | None = modules
96
+ self._git_repo = None if git_repo is None else sanitize_git_repo_url(git_repo)
97
+ # local storage
98
+ self._keep_artifacts_local = keep_artifacts_local
99
+ self._local_storage: StorageSettings | None = None
100
+ self._is_on_hub = is_on_hub
101
+ # private, needed for api requests
102
+ self._api_url = api_url
103
+ self._schema_id = schema_id
104
+ # private, whether fine grained access is used
105
+ # needed to be set to request jwt etc
106
+ self._fine_grained_access = fine_grained_access
107
+ # permissions for db such as jwt, read, write etc.
108
+ self._db_permissions = db_permissions
109
+ # if None then settings.user is used
110
+ self._locker_user = _locker_user
111
+
112
+ def __repr__(self):
113
+ """Rich string representation."""
114
+ representation = "Current instance:"
115
+ attrs = ["slug", "storage", "db", "modules", "git_repo"]
116
+ for attr in attrs:
117
+ value = getattr(self, attr)
118
+ if attr == "storage":
119
+ if self.keep_artifacts_local:
120
+ import lamindb as ln
121
+
122
+ self._local_storage = ln.setup.settings.instance._local_storage
123
+ if self._local_storage is not None:
124
+ value_local = self.local_storage
125
+ representation += f"\n - local storage: {value_local.root_as_str} ({value_local.region})"
126
+ representation += (
127
+ f"\n - cloud storage: {value.root_as_str} ({value.region})"
128
+ )
129
+ else:
130
+ representation += (
131
+ f"\n - storage: {value.root_as_str} ({value.region})"
132
+ )
133
+ elif attr == "db":
134
+ if self.dialect != "sqlite":
135
+ model = LaminDsnModel(db=value)
136
+ db_print = LaminDsn.build(
137
+ scheme=model.db.scheme,
138
+ user=model.db.user,
139
+ password="***",
140
+ host="***",
141
+ port=model.db.port,
142
+ database=model.db.database,
143
+ )
144
+ else:
145
+ db_print = value
146
+ representation += f"\n - {attr}: {db_print}"
147
+ elif attr == "modules":
148
+ representation += f"\n - {attr}: {value if value else '{}'}"
149
+ else:
150
+ representation += f"\n - {attr}: {value}"
151
+ return representation
152
+
153
+ @property
154
+ def owner(self) -> str:
155
+ """Instance owner. A user or organization account handle."""
156
+ return self._owner
157
+
158
+ @property
159
+ def name(self) -> str:
160
+ """Instance name."""
161
+ return self._name
162
+
163
+ def _search_local_root(
164
+ self, local_root: str | None = None, mute_warning: bool = False
165
+ ) -> StorageSettings | None:
166
+ from lamindb.models import Storage
167
+
168
+ if local_root is not None:
169
+ local_records = Storage.objects.filter(root=local_root)
170
+ else:
171
+ # only search local managed storage locations (instance_uid=self.uid)
172
+ local_records = Storage.objects.filter(type="local", instance_uid=self.uid)
173
+ all_local_records = local_records.all()
174
+ try:
175
+ # trigger an error in case of a migration issue
176
+ all_local_records.first()
177
+ except ProgrammingError:
178
+ logger.error("not able to load Storage registry: please migrate")
179
+ return None
180
+ found = []
181
+ for record in all_local_records:
182
+ root_path = Path(record.root)
183
+ try:
184
+ root_path_exists = root_path.exists()
185
+ except PermissionError:
186
+ continue
187
+ if root_path_exists:
188
+ marker_path = root_path / STORAGE_UID_FILE_KEY
189
+ try:
190
+ marker_path_exists = marker_path.exists()
191
+ except PermissionError:
192
+ continue
193
+ if not marker_path_exists:
194
+ legacy_filepath = root_path / LEGACY_STORAGE_UID_FILE_KEY
195
+ if legacy_filepath.exists():
196
+ logger.warning(
197
+ f"found legacy marker file, renaming it from {legacy_filepath} to {marker_path}"
198
+ )
199
+ legacy_filepath.rename(marker_path)
200
+ else:
201
+ logger.warning(
202
+ f"local storage location '{root_path}' is corrupted, cannot find marker file with storage uid"
203
+ )
204
+ continue
205
+ try:
206
+ uid = marker_path.read_text().splitlines()[0]
207
+ except PermissionError:
208
+ logger.warning(
209
+ f"ignoring the following location because no permission to read it: {marker_path}"
210
+ )
211
+ continue
212
+ if uid == record.uid:
213
+ found.append(record)
214
+ if found:
215
+ if len(found) > 1:
216
+ found_display = "\n - ".join([f"{record.root}" for record in found])
217
+ logger.important(f"found locations:\n - {found_display}")
218
+ record = found[0]
219
+ logger.important(f"defaulting to local storage: {record.root}")
220
+ return StorageSettings(record.root, region=record.region)
221
+ elif not mute_warning:
222
+ start = LOCAL_STORAGE_MESSAGE[0].lower()
223
+ logger.warning(f"{start}{LOCAL_STORAGE_MESSAGE[1:]}")
224
+ return None
225
+
226
+ @property
227
+ def keep_artifacts_local(self) -> bool:
228
+ """Default to keeping artifacts local.
229
+
230
+ Guide: :doc:`faq/keep-artifacts-local`
231
+ """
232
+ return self._keep_artifacts_local
233
+
234
+ @keep_artifacts_local.setter
235
+ def keep_artifacts_local(self, value: bool):
236
+ if not isinstance(value, bool):
237
+ raise ValueError("keep_artifacts_local must be a boolean value.")
238
+ self._keep_artifacts_local = value
239
+
240
+ @property
241
+ def storage(self) -> StorageSettings:
242
+ """Default storage of instance.
243
+
244
+ For a cloud instance, this is cloud storage. For a local instance, this
245
+ is a local directory.
246
+ """
247
+ return self._storage # type: ignore
248
+
249
+ @property
250
+ def local_storage(self) -> StorageSettings:
251
+ """An alternative default local storage location in the current environment.
252
+
253
+ Serves as the default storage location if :attr:`keep_artifacts_local` is enabled.
254
+
255
+ Guide: :doc:`faq/keep-artifacts-local`
256
+ """
257
+ if not self.keep_artifacts_local:
258
+ raise ValueError(
259
+ "`keep_artifacts_local` is False, switch via: ln.setup.settings.instance.keep_artifacts_local = True"
260
+ )
261
+ if self._local_storage is None:
262
+ self._local_storage = self._search_local_root()
263
+ if self._local_storage is None:
264
+ raise ValueError(LOCAL_STORAGE_MESSAGE)
265
+ return self._local_storage
266
+
267
+ @local_storage.setter
268
+ def local_storage(self, local_root_host: tuple[Path | str, str]):
269
+ from lamindb_setup._init_instance import register_storage_in_instance
270
+
271
+ if not isinstance(local_root_host, tuple):
272
+ local_root = local_root_host
273
+ host = "unspecified-host"
274
+ else:
275
+ local_root, host = local_root_host
276
+
277
+ local_root = Path(local_root)
278
+ if not self.keep_artifacts_local:
279
+ raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
280
+ local_storage = self._search_local_root(
281
+ local_root=StorageSettings(local_root).root_as_str, mute_warning=True
282
+ )
283
+ if local_storage is not None:
284
+ # great, we're merely switching storage location
285
+ self._local_storage = local_storage
286
+ return None
287
+ local_storage = self._search_local_root(mute_warning=True)
288
+ if local_storage is not None:
289
+ if os.getenv("LAMIN_TESTING") == "true":
290
+ response = "y"
291
+ else:
292
+ response = input(
293
+ "You already configured a local storage root for this instance in this"
294
+ f" environment: {self.local_storage.root}\nDo you want to register another one? (y/n)"
295
+ )
296
+ if response != "y":
297
+ return None
298
+ if host == "unspecified-host":
299
+ logger.warning(
300
+ "setting local_storage with a single path is deprecated for creating storage locations"
301
+ )
302
+ logger.warning(
303
+ "use this instead: ln.Storage(root='/dir/our_shared_dir', host='our-server-123').save()"
304
+ )
305
+ local_root = UPath(local_root)
306
+ assert isinstance(local_root, LocalPathClasses)
307
+ tentative_storage, hub_status = init_storage(
308
+ local_root,
309
+ instance_id=self._id,
310
+ instance_slug=self.slug,
311
+ register_hub=True,
312
+ region=host,
313
+ ) # type: ignore
314
+ if hub_status in ["hub-record-created", "hub-record-retrieved"]:
315
+ register_storage_in_instance(tentative_storage) # type: ignore
316
+ self._local_storage = tentative_storage
317
+ logger.important(
318
+ f"defaulting to local storage: {self._local_storage.root} on host {host}"
319
+ )
320
+ else:
321
+ logger.warning(f"could not set this local storage location: {local_root}")
322
+
323
+ @property
324
+ @deprecated("local_storage")
325
+ def storage_local(self) -> StorageSettings:
326
+ return self.local_storage
327
+
328
+ @storage_local.setter
329
+ @deprecated("local_storage")
330
+ def storage_local(self, local_root_host: tuple[Path | str, str]):
331
+ self.local_storage = local_root_host # type: ignore
332
+
333
+ @property
334
+ def slug(self) -> str:
335
+ """Unique semantic identifier of form `"{account_handle}/{instance_name}"`."""
336
+ return f"{self.owner}/{self.name}"
337
+
338
+ @property
339
+ def git_repo(self) -> str | None:
340
+ """Sync transforms with scripts in git repository.
341
+
342
+ Provide the full git repo URL.
343
+ """
344
+ return self._git_repo
345
+
346
+ @property
347
+ def _id(self) -> UUID:
348
+ """The internal instance id."""
349
+ return self._id_
350
+
351
+ @property
352
+ def uid(self) -> str:
353
+ """The user-facing instance id."""
354
+ return instance_uid_from_uuid(self._id)
355
+
356
+ @property
357
+ def modules(self) -> set[str]:
358
+ """The set of modules that defines the database schema.
359
+
360
+ The core schema contained in lamindb is not included in this set.
361
+ """
362
+ if self._schema_str is None:
363
+ return set()
364
+ else:
365
+ return {module for module in self._schema_str.split(",") if module != ""}
366
+
367
+ @property
368
+ @deprecated("modules")
369
+ def schema(self) -> set[str]:
370
+ return self.modules
371
+
372
+ @property
373
+ def _sqlite_file(self) -> UPath:
374
+ """SQLite file."""
375
+ filepath = self.storage.root / ".lamindb/lamin.db"
376
+ return filepath
377
+
378
+ @property
379
+ def _sqlite_file_local(self) -> Path:
380
+ """Local SQLite file."""
381
+ return self.storage.cloud_to_local_no_update(self._sqlite_file)
382
+
383
+ def _update_cloud_sqlite_file(self, unlock_cloud_sqlite: bool = True) -> None:
384
+ """Upload the local sqlite file to the cloud file."""
385
+ if self._is_cloud_sqlite:
386
+ sqlite_file = self._sqlite_file
387
+ logger.warning(
388
+ f"updating{' & unlocking' if unlock_cloud_sqlite else ''} cloud SQLite "
389
+ f"'{sqlite_file}' of instance"
390
+ f" '{self.slug}'"
391
+ )
392
+ cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
393
+ sqlite_file.upload_from(cache_file, print_progress=True) # type: ignore
394
+ cloud_mtime = sqlite_file.modified.timestamp() # type: ignore
395
+ # this seems to work even if there is an open connection
396
+ # to the cache file
397
+ os.utime(cache_file, times=(cloud_mtime, cloud_mtime))
398
+ if unlock_cloud_sqlite:
399
+ self._cloud_sqlite_locker.unlock()
400
+
401
+ def _update_local_sqlite_file(self, lock_cloud_sqlite: bool = True) -> None:
402
+ """Download the cloud sqlite file if it is newer than local."""
403
+ if self._is_cloud_sqlite:
404
+ logger.warning(
405
+ "updating local SQLite & locking cloud SQLite (sync back & unlock:"
406
+ " lamin disconnect)"
407
+ )
408
+ if lock_cloud_sqlite:
409
+ self._cloud_sqlite_locker.lock()
410
+ self._check_sqlite_lock()
411
+ sqlite_file = self._sqlite_file
412
+ cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
413
+ sqlite_file.synchronize_to(cache_file, print_progress=True) # type: ignore
414
+
415
+ def _check_sqlite_lock(self):
416
+ if not self._cloud_sqlite_locker.has_lock:
417
+ locked_by = self._cloud_sqlite_locker._locked_by
418
+ lock_msg = "Cannot load the instance, it is locked by "
419
+ user_info = call_with_fallback(
420
+ select_account_handle_name_by_lnid,
421
+ lnid=locked_by,
422
+ )
423
+ if user_info is None:
424
+ lock_msg += f"uid: '{locked_by}'."
425
+ else:
426
+ lock_msg += (
427
+ f"'{user_info['handle']}' (uid: '{locked_by}', name:"
428
+ f" '{user_info['name']}')."
429
+ )
430
+ lock_msg += (
431
+ " The instance will be automatically unlocked after"
432
+ f" {int(EXPIRATION_TIME/3600/24)}d of no activity."
433
+ )
434
+ raise InstanceLockedException(lock_msg)
435
+
436
+ @property
437
+ def db(self) -> str:
438
+ """Database connection string (URI)."""
439
+ if "LAMINDB_DJANGO_DATABASE_URL" in os.environ:
440
+ logger.warning(
441
+ "LAMINDB_DJANGO_DATABASE_URL env variable "
442
+ f"is set to {os.environ['LAMINDB_DJANGO_DATABASE_URL']}. "
443
+ "It overwrites all db connections and is used instead of `instance.db`."
444
+ )
445
+ if self._db is None:
446
+ from .django import IS_SETUP
447
+
448
+ if self._storage is None and self.slug == "none/none":
449
+ return "sqlite:///:memory:"
450
+ # here, we want the updated sqlite file
451
+ # hence, we don't use self._sqlite_file_local()
452
+ # error_no_origin=False because on instance init
453
+ # the sqlite file is not yet in the cloud
454
+ sqlite_filepath = self.storage.cloud_to_local(
455
+ self._sqlite_file, error_no_origin=False
456
+ )
457
+ return f"sqlite:///{sqlite_filepath.as_posix()}"
458
+ else:
459
+ return self._db
460
+
461
+ @property
462
+ def dialect(self) -> Literal["sqlite", "postgresql"]:
463
+ """SQL dialect."""
464
+ if self._db is None or self._db.startswith("sqlite://"):
465
+ return "sqlite"
466
+ else:
467
+ assert self._db.startswith("postgresql"), f"Unexpected DB value: {self._db}"
468
+ return "postgresql"
469
+
470
+ @property
471
+ def _is_cloud_sqlite(self) -> bool:
472
+ # can we make this a private property, Sergei?
473
+ # as it's not relevant to the user
474
+ """Is this a cloud instance with sqlite db."""
475
+ return self.dialect == "sqlite" and self.storage.type_is_cloud
476
+
477
+ @property
478
+ def _cloud_sqlite_locker(self):
479
+ # avoid circular import
480
+ from .cloud_sqlite_locker import empty_locker, get_locker
481
+
482
+ if self._is_cloud_sqlite:
483
+ try:
484
+ # if _locker_user is None then settings.user is used
485
+ return get_locker(self, self._locker_user)
486
+ except PermissionError:
487
+ logger.warning("read-only access - did not access locker")
488
+ return empty_locker
489
+ else:
490
+ return empty_locker
491
+
492
+ @property
493
+ def is_remote(self) -> bool:
494
+ """Boolean indicating if an instance has no local component."""
495
+ return check_is_instance_remote(self.storage.root_as_str, self.db)
496
+
497
+ @property
498
+ def is_on_hub(self) -> bool:
499
+ """Is this instance on the hub?
500
+
501
+ Can only reliably establish if user has access to the instance. Will
502
+ return `False` in case the instance isn't found.
503
+ """
504
+ if self._is_on_hub is None:
505
+ from ._hub_client import call_with_fallback_auth
506
+ from ._hub_crud import select_instance_by_id
507
+ from ._settings import settings
508
+
509
+ if settings.user.handle != "anonymous":
510
+ response = call_with_fallback_auth(
511
+ select_instance_by_id, instance_id=self._id.hex
512
+ )
513
+ else:
514
+ response = call_with_fallback(
515
+ select_instance_by_id, instance_id=self._id.hex
516
+ )
517
+ logger.warning("calling anonymously, will miss private instances")
518
+ if response is None:
519
+ self._is_on_hub = False
520
+ else:
521
+ self._is_on_hub = True
522
+ return self._is_on_hub
523
+
524
+ def _get_settings_file(self) -> Path:
525
+ return instance_settings_file(self.name, self.owner)
526
+
527
+ def _persist(self, write_to_disk: bool = True) -> None:
528
+ """Set these instance settings as the current instance.
529
+
530
+ Args:
531
+ write_to_disk: Save these instance settings to disk and
532
+ overwrite the current instance settings file.
533
+ """
534
+ if write_to_disk and self.slug != "none/none":
535
+ assert self.name is not None
536
+ filepath = self._get_settings_file()
537
+ # persist under filepath for later reference
538
+ save_instance_settings(self, filepath)
539
+ # persist under current file for auto load
540
+ shutil.copy2(filepath, current_instance_settings_file())
541
+ # persist under settings class for same session reference
542
+ # need to import here to avoid circular import
543
+ from ._settings import settings
544
+
545
+ settings._instance_settings = self
546
+
547
+ def _init_db(self):
548
+ from lamindb_setup._check_setup import disable_auto_connect
549
+
550
+ from .django import setup_django
551
+
552
+ disable_auto_connect(setup_django)(self, init=True)
553
+
554
+ def _load_db(self) -> tuple[bool, str]:
555
+ # Is the database available and initialized as LaminDB?
556
+ # returns a tuple of status code and message
557
+ if self.dialect == "sqlite" and not self._sqlite_file.exists():
558
+ legacy_file = self.storage.key_to_filepath(f"{self._id.hex}.lndb")
559
+ if legacy_file.exists():
560
+ logger.warning(
561
+ f"The SQLite file is being renamed from {legacy_file} to {self._sqlite_file}"
562
+ )
563
+ legacy_file.rename(self._sqlite_file)
564
+ else:
565
+ return False, f"SQLite file {self._sqlite_file} does not exist"
566
+ # we need the local sqlite to setup django
567
+ self._update_local_sqlite_file()
568
+ # setting up django also performs a check for migrations & prints them
569
+ # as warnings
570
+ # this should fail, e.g., if the db is not reachable
571
+ from lamindb_setup._check_setup import disable_auto_connect
572
+
573
+ from .django import setup_django
574
+
575
+ disable_auto_connect(setup_django)(self)
576
+
577
+ return True, ""