lamindb_setup 1.9.1__py3-none-any.whl → 1.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. lamindb_setup/__init__.py +107 -107
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check_setup.py +192 -166
  4. lamindb_setup/_connect_instance.py +430 -328
  5. lamindb_setup/_delete.py +144 -141
  6. lamindb_setup/_disconnect.py +35 -32
  7. lamindb_setup/_init_instance.py +430 -440
  8. lamindb_setup/_migrate.py +278 -266
  9. lamindb_setup/_register_instance.py +32 -35
  10. lamindb_setup/_schema_metadata.py +441 -441
  11. lamindb_setup/_set_managed_storage.py +69 -70
  12. lamindb_setup/_setup_user.py +172 -133
  13. lamindb_setup/core/__init__.py +21 -21
  14. lamindb_setup/core/_aws_options.py +223 -223
  15. lamindb_setup/core/_aws_storage.py +9 -1
  16. lamindb_setup/core/_deprecated.py +1 -1
  17. lamindb_setup/core/_hub_client.py +248 -248
  18. lamindb_setup/core/_hub_core.py +751 -665
  19. lamindb_setup/core/_hub_crud.py +247 -227
  20. lamindb_setup/core/_private_django_api.py +83 -83
  21. lamindb_setup/core/_settings.py +374 -377
  22. lamindb_setup/core/_settings_instance.py +609 -569
  23. lamindb_setup/core/_settings_load.py +141 -141
  24. lamindb_setup/core/_settings_save.py +95 -95
  25. lamindb_setup/core/_settings_storage.py +427 -429
  26. lamindb_setup/core/_settings_store.py +91 -91
  27. lamindb_setup/core/_settings_user.py +55 -55
  28. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  29. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  30. lamindb_setup/core/django.py +311 -305
  31. lamindb_setup/core/exceptions.py +1 -1
  32. lamindb_setup/core/hashing.py +134 -134
  33. lamindb_setup/core/types.py +1 -1
  34. lamindb_setup/core/upath.py +1013 -1013
  35. lamindb_setup/errors.py +80 -70
  36. lamindb_setup/types.py +20 -20
  37. {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.1.dist-info}/METADATA +4 -4
  38. lamindb_setup-1.10.1.dist-info/RECORD +50 -0
  39. lamindb_setup-1.9.1.dist-info/RECORD +0 -50
  40. {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.1.dist-info}/LICENSE +0 -0
  41. {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.1.dist-info}/WHEEL +0 -0
@@ -1,569 +1,609 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import shutil
5
- from pathlib import Path
6
- from typing import TYPE_CHECKING, Literal
7
-
8
- from django.db.utils import ProgrammingError
9
- from lamin_utils import logger
10
-
11
- from ._deprecated import deprecated
12
- from ._hub_client import call_with_fallback
13
- from ._hub_crud import select_account_handle_name_by_lnid
14
- from ._hub_utils import LaminDsn, LaminDsnModel
15
- from ._settings_save import save_instance_settings
16
- from ._settings_storage import (
17
- LEGACY_STORAGE_UID_FILE_KEY,
18
- STORAGE_UID_FILE_KEY,
19
- StorageSettings,
20
- init_storage,
21
- instance_uid_from_uuid,
22
- )
23
- from ._settings_store import current_instance_settings_file, instance_settings_file
24
- from .cloud_sqlite_locker import (
25
- EXPIRATION_TIME,
26
- InstanceLockedException,
27
- )
28
- from .upath import LocalPathClasses, UPath
29
-
30
- if TYPE_CHECKING:
31
- from uuid import UUID
32
-
33
- from ._settings_user import UserSettings
34
-
35
- LOCAL_STORAGE_MESSAGE = "No local storage location found in current environment: defaulting to cloud storage"
36
-
37
-
38
- def sanitize_git_repo_url(repo_url: str) -> str:
39
- assert repo_url.startswith("https://")
40
- return repo_url.replace(".git", "")
41
-
42
-
43
- def is_local_db_url(db_url: str) -> bool:
44
- if "@localhost:" in db_url:
45
- return True
46
- if "@0.0.0.0:" in db_url:
47
- return True
48
- if "@127.0.0.1" in db_url:
49
- return True
50
- return False
51
-
52
-
53
- class InstanceSettings:
54
- """Instance settings."""
55
-
56
- def __init__(
57
- self,
58
- id: UUID, # instance id/uuid
59
- owner: str, # owner handle
60
- name: str, # instance name
61
- storage: StorageSettings, # storage location
62
- keep_artifacts_local: bool = False, # default to local storage
63
- uid: str | None = None, # instance uid/lnid
64
- db: str | None = None, # DB URI
65
- modules: str | None = None, # comma-separated string of module names
66
- git_repo: str | None = None, # a git repo URL
67
- is_on_hub: bool | None = None, # initialized from hub
68
- api_url: str | None = None,
69
- schema_id: UUID | None = None,
70
- fine_grained_access: bool = False,
71
- db_permissions: str | None = None,
72
- _locker_user: UserSettings | None = None, # user to lock for if cloud sqlite
73
- ):
74
- from ._hub_utils import validate_db_arg
75
-
76
- self._id_: UUID = id
77
- self._owner: str = owner
78
- self._name: str = name
79
- self._uid: str | None = uid
80
- self._storage: StorageSettings = storage
81
- validate_db_arg(db)
82
- self._db: str | None = db
83
- self._schema_str: str | None = modules
84
- self._git_repo = None if git_repo is None else sanitize_git_repo_url(git_repo)
85
- # local storage
86
- self._keep_artifacts_local = keep_artifacts_local
87
- self._local_storage: StorageSettings | None = None
88
- self._is_on_hub = is_on_hub
89
- # private, needed for api requests
90
- self._api_url = api_url
91
- self._schema_id = schema_id
92
- # private, whether fine grained access is used
93
- # needed to be set to request jwt etc
94
- self._fine_grained_access = fine_grained_access
95
- # permissions for db such as jwt, read, write etc.
96
- self._db_permissions = db_permissions
97
- # if None then settings.user is used
98
- self._locker_user = _locker_user
99
-
100
- def __repr__(self):
101
- """Rich string representation."""
102
- representation = "Current instance:"
103
- attrs = ["slug", "storage", "db", "modules", "git_repo"]
104
- for attr in attrs:
105
- value = getattr(self, attr)
106
- if attr == "storage":
107
- if self.keep_artifacts_local:
108
- import lamindb as ln
109
-
110
- self._local_storage = ln.setup.settings.instance._local_storage
111
- if self._local_storage is not None:
112
- value_local = self.local_storage
113
- representation += f"\n - local storage: {value_local.root_as_str} ({value_local.region})"
114
- representation += (
115
- f"\n - cloud storage: {value.root_as_str} ({value.region})"
116
- )
117
- else:
118
- representation += (
119
- f"\n - storage: {value.root_as_str} ({value.region})"
120
- )
121
- elif attr == "db":
122
- if self.dialect != "sqlite":
123
- model = LaminDsnModel(db=value)
124
- db_print = LaminDsn.build(
125
- scheme=model.db.scheme,
126
- user=model.db.user,
127
- password="***",
128
- host="***",
129
- port=model.db.port,
130
- database=model.db.database,
131
- )
132
- else:
133
- db_print = value
134
- representation += f"\n - {attr}: {db_print}"
135
- elif attr == "modules":
136
- representation += f"\n - {attr}: {value if value else '{}'}"
137
- else:
138
- representation += f"\n - {attr}: {value}"
139
- return representation
140
-
141
- @property
142
- def owner(self) -> str:
143
- """Instance owner. A user or organization account handle."""
144
- return self._owner
145
-
146
- @property
147
- def name(self) -> str:
148
- """Instance name."""
149
- return self._name
150
-
151
- def _search_local_root(
152
- self, local_root: str | None = None, mute_warning: bool = False
153
- ) -> StorageSettings | None:
154
- from lamindb.models import Storage
155
-
156
- if local_root is not None:
157
- local_records = Storage.objects.filter(root=local_root)
158
- else:
159
- # only search local managed storage locations (instance_uid=self.uid)
160
- local_records = Storage.objects.filter(type="local", instance_uid=self.uid)
161
- all_local_records = local_records.all()
162
- try:
163
- # trigger an error in case of a migration issue
164
- all_local_records.first()
165
- except ProgrammingError:
166
- logger.error("not able to load Storage registry: please migrate")
167
- return None
168
- found = []
169
- for record in all_local_records:
170
- root_path = Path(record.root)
171
- try:
172
- root_path_exists = root_path.exists()
173
- except PermissionError:
174
- continue
175
- if root_path_exists:
176
- marker_path = root_path / STORAGE_UID_FILE_KEY
177
- try:
178
- marker_path_exists = marker_path.exists()
179
- except PermissionError:
180
- continue
181
- if not marker_path_exists:
182
- legacy_filepath = root_path / LEGACY_STORAGE_UID_FILE_KEY
183
- if legacy_filepath.exists():
184
- logger.warning(
185
- f"found legacy marker file, renaming it from {legacy_filepath} to {marker_path}"
186
- )
187
- legacy_filepath.rename(marker_path)
188
- else:
189
- logger.warning(
190
- f"local storage location '{root_path}' is corrupted, cannot find marker file with storage uid"
191
- )
192
- continue
193
- try:
194
- uid = marker_path.read_text().splitlines()[0]
195
- except PermissionError:
196
- logger.warning(
197
- f"ignoring the following location because no permission to read it: {marker_path}"
198
- )
199
- continue
200
- if uid == record.uid:
201
- found.append(record)
202
- if found:
203
- if len(found) > 1:
204
- found_display = "\n - ".join([f"{record.root}" for record in found])
205
- logger.important(f"found locations:\n - {found_display}")
206
- record = found[0]
207
- logger.important(f"defaulting to local storage: {record.root}")
208
- return StorageSettings(record.root, region=record.region)
209
- elif not mute_warning:
210
- start = LOCAL_STORAGE_MESSAGE[0].lower()
211
- logger.warning(f"{start}{LOCAL_STORAGE_MESSAGE[1:]}")
212
- return None
213
-
214
- @property
215
- def keep_artifacts_local(self) -> bool:
216
- """Default to keeping artifacts local.
217
-
218
- Guide: :doc:`faq/keep-artifacts-local`
219
- """
220
- return self._keep_artifacts_local
221
-
222
- @keep_artifacts_local.setter
223
- def keep_artifacts_local(self, value: bool):
224
- if not isinstance(value, bool):
225
- raise ValueError("keep_artifacts_local must be a boolean value.")
226
- self._keep_artifacts_local = value
227
-
228
- @property
229
- def storage(self) -> StorageSettings:
230
- """Default storage of instance.
231
-
232
- For a cloud instance, this is cloud storage. For a local instance, this
233
- is a local directory.
234
- """
235
- return self._storage
236
-
237
- @property
238
- def local_storage(self) -> StorageSettings:
239
- """An alternative default local storage location in the current environment.
240
-
241
- Serves as the default storage location if :attr:`keep_artifacts_local` is enabled.
242
-
243
- Guide: :doc:`faq/keep-artifacts-local`
244
- """
245
- if not self.keep_artifacts_local:
246
- raise ValueError(
247
- "`keep_artifacts_local` is False, switch via: ln.setup.settings.instance.keep_artifacts_local = True"
248
- )
249
- if self._local_storage is None:
250
- self._local_storage = self._search_local_root()
251
- if self._local_storage is None:
252
- raise ValueError(LOCAL_STORAGE_MESSAGE)
253
- return self._local_storage
254
-
255
- @local_storage.setter
256
- def local_storage(self, local_root_host: tuple[Path | str, str]):
257
- from lamindb_setup._init_instance import register_storage_in_instance
258
-
259
- if not isinstance(local_root_host, tuple):
260
- local_root = local_root_host
261
- host = "unspecified-host"
262
- else:
263
- local_root, host = local_root_host
264
-
265
- local_root = Path(local_root)
266
- if not self.keep_artifacts_local:
267
- raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
268
- local_storage = self._search_local_root(
269
- local_root=StorageSettings(local_root).root_as_str, mute_warning=True
270
- )
271
- if local_storage is not None:
272
- # great, we're merely switching storage location
273
- self._local_storage = local_storage
274
- return None
275
- local_storage = self._search_local_root(mute_warning=True)
276
- if local_storage is not None:
277
- if os.getenv("LAMIN_TESTING") == "true":
278
- response = "y"
279
- else:
280
- response = input(
281
- "You already configured a local storage root for this instance in this"
282
- f" environment: {self.local_storage.root}\nDo you want to register another one? (y/n)"
283
- )
284
- if response != "y":
285
- return None
286
- if host == "unspecified-host":
287
- logger.warning(
288
- "setting local_storage with a single path is deprecated for creating storage locations"
289
- )
290
- logger.warning(
291
- "use this instead: ln.Storage(root='/dir/our_shared_dir', host='our-server-123').save()"
292
- )
293
- local_root = UPath(local_root)
294
- assert isinstance(local_root, LocalPathClasses)
295
- tentative_storage, hub_status = init_storage(
296
- local_root,
297
- instance_id=self._id,
298
- instance_slug=self.slug,
299
- register_hub=True,
300
- region=host,
301
- ) # type: ignore
302
- if hub_status in ["hub-record-created", "hub-record-retrieved"]:
303
- register_storage_in_instance(tentative_storage) # type: ignore
304
- self._local_storage = tentative_storage
305
- logger.important(
306
- f"defaulting to local storage: {self._local_storage.root} on host {host}"
307
- )
308
- else:
309
- logger.warning(f"could not set this local storage location: {local_root}")
310
-
311
- @property
312
- @deprecated("local_storage")
313
- def storage_local(self) -> StorageSettings:
314
- return self.local_storage
315
-
316
- @storage_local.setter
317
- @deprecated("local_storage")
318
- def storage_local(self, local_root_host: tuple[Path | str, str]):
319
- self.local_storage = local_root_host # type: ignore
320
-
321
- @property
322
- def slug(self) -> str:
323
- """Unique semantic identifier of form `"{account_handle}/{instance_name}"`."""
324
- return f"{self.owner}/{self.name}"
325
-
326
- @property
327
- def git_repo(self) -> str | None:
328
- """Sync transforms with scripts in git repository.
329
-
330
- Provide the full git repo URL.
331
- """
332
- return self._git_repo
333
-
334
- @property
335
- def _id(self) -> UUID:
336
- """The internal instance id."""
337
- return self._id_
338
-
339
- @property
340
- def uid(self) -> str:
341
- """The user-facing instance id."""
342
- return instance_uid_from_uuid(self._id)
343
-
344
- @property
345
- def modules(self) -> set[str]:
346
- """The set of modules that defines the database schema.
347
-
348
- The core schema contained in lamindb is not included in this set.
349
- """
350
- if self._schema_str is None:
351
- return set()
352
- else:
353
- return {module for module in self._schema_str.split(",") if module != ""}
354
-
355
- @property
356
- @deprecated("modules")
357
- def schema(self) -> set[str]:
358
- return self.modules
359
-
360
- @property
361
- def _sqlite_file(self) -> UPath:
362
- """SQLite file."""
363
- filepath = self.storage.root / ".lamindb/lamin.db"
364
- return filepath
365
-
366
- @property
367
- def _sqlite_file_local(self) -> Path:
368
- """Local SQLite file."""
369
- return self.storage.cloud_to_local_no_update(self._sqlite_file)
370
-
371
- def _update_cloud_sqlite_file(self, unlock_cloud_sqlite: bool = True) -> None:
372
- """Upload the local sqlite file to the cloud file."""
373
- if self._is_cloud_sqlite:
374
- sqlite_file = self._sqlite_file
375
- logger.warning(
376
- f"updating{' & unlocking' if unlock_cloud_sqlite else ''} cloud SQLite "
377
- f"'{sqlite_file}' of instance"
378
- f" '{self.slug}'"
379
- )
380
- cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
381
- sqlite_file.upload_from(cache_file, print_progress=True) # type: ignore
382
- cloud_mtime = sqlite_file.modified.timestamp() # type: ignore
383
- # this seems to work even if there is an open connection
384
- # to the cache file
385
- os.utime(cache_file, times=(cloud_mtime, cloud_mtime))
386
- if unlock_cloud_sqlite:
387
- self._cloud_sqlite_locker.unlock()
388
-
389
- def _update_local_sqlite_file(self, lock_cloud_sqlite: bool = True) -> None:
390
- """Download the cloud sqlite file if it is newer than local."""
391
- if self._is_cloud_sqlite:
392
- logger.warning(
393
- "updating local SQLite & locking cloud SQLite (sync back & unlock:"
394
- " lamin disconnect)"
395
- )
396
- if lock_cloud_sqlite:
397
- self._cloud_sqlite_locker.lock()
398
- self._check_sqlite_lock()
399
- sqlite_file = self._sqlite_file
400
- cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
401
- sqlite_file.synchronize_to(cache_file, print_progress=True) # type: ignore
402
-
403
- def _check_sqlite_lock(self):
404
- if not self._cloud_sqlite_locker.has_lock:
405
- locked_by = self._cloud_sqlite_locker._locked_by
406
- lock_msg = "Cannot load the instance, it is locked by "
407
- user_info = call_with_fallback(
408
- select_account_handle_name_by_lnid,
409
- lnid=locked_by,
410
- )
411
- if user_info is None:
412
- lock_msg += f"uid: '{locked_by}'."
413
- else:
414
- lock_msg += (
415
- f"'{user_info['handle']}' (uid: '{locked_by}', name:"
416
- f" '{user_info['name']}')."
417
- )
418
- lock_msg += (
419
- " The instance will be automatically unlocked after"
420
- f" {int(EXPIRATION_TIME/3600/24)}d of no activity."
421
- )
422
- raise InstanceLockedException(lock_msg)
423
-
424
- @property
425
- def db(self) -> str:
426
- """Database connection string (URI)."""
427
- if "LAMINDB_DJANGO_DATABASE_URL" in os.environ:
428
- logger.warning(
429
- "LAMINDB_DJANGO_DATABASE_URL env variable "
430
- f"is set to {os.environ['LAMINDB_DJANGO_DATABASE_URL']}. "
431
- "It overwrites all db connections and is used instead of `instance.db`."
432
- )
433
- if self._db is None:
434
- # here, we want the updated sqlite file
435
- # hence, we don't use self._sqlite_file_local()
436
- # error_no_origin=False because on instance init
437
- # the sqlite file is not yet in the cloud
438
- sqlite_filepath = self.storage.cloud_to_local(
439
- self._sqlite_file, error_no_origin=False
440
- )
441
- return f"sqlite:///{sqlite_filepath.as_posix()}"
442
- else:
443
- return self._db
444
-
445
- @property
446
- def dialect(self) -> Literal["sqlite", "postgresql"]:
447
- """SQL dialect."""
448
- if self._db is None or self._db.startswith("sqlite://"):
449
- return "sqlite"
450
- else:
451
- assert self._db.startswith("postgresql"), f"Unexpected DB value: {self._db}"
452
- return "postgresql"
453
-
454
- @property
455
- def _is_cloud_sqlite(self) -> bool:
456
- # can we make this a private property, Sergei?
457
- # as it's not relevant to the user
458
- """Is this a cloud instance with sqlite db."""
459
- return self.dialect == "sqlite" and self.storage.type_is_cloud
460
-
461
- @property
462
- def _cloud_sqlite_locker(self):
463
- # avoid circular import
464
- from .cloud_sqlite_locker import empty_locker, get_locker
465
-
466
- if self._is_cloud_sqlite:
467
- try:
468
- # if _locker_user is None then settings.user is used
469
- return get_locker(self, self._locker_user)
470
- except PermissionError:
471
- logger.warning("read-only access - did not access locker")
472
- return empty_locker
473
- else:
474
- return empty_locker
475
-
476
- @property
477
- def is_remote(self) -> bool:
478
- """Boolean indicating if an instance has no local component."""
479
- if not self.storage.type_is_cloud:
480
- return False
481
-
482
- if self.dialect == "postgresql":
483
- if is_local_db_url(self.db):
484
- return False
485
- # returns True for cloud SQLite
486
- # and remote postgres
487
- return True
488
-
489
- @property
490
- def is_on_hub(self) -> bool:
491
- """Is this instance on the hub?
492
-
493
- Can only reliably establish if user has access to the instance. Will
494
- return `False` in case the instance isn't found.
495
- """
496
- if self._is_on_hub is None:
497
- from ._hub_client import call_with_fallback_auth
498
- from ._hub_crud import select_instance_by_id
499
- from ._settings import settings
500
-
501
- if settings.user.handle != "anonymous":
502
- response = call_with_fallback_auth(
503
- select_instance_by_id, instance_id=self._id.hex
504
- )
505
- else:
506
- response = call_with_fallback(
507
- select_instance_by_id, instance_id=self._id.hex
508
- )
509
- logger.warning("calling anonymously, will miss private instances")
510
- if response is None:
511
- self._is_on_hub = False
512
- else:
513
- self._is_on_hub = True
514
- return self._is_on_hub
515
-
516
- def _get_settings_file(self) -> Path:
517
- return instance_settings_file(self.name, self.owner)
518
-
519
- def _persist(self, write_to_disk: bool = True) -> None:
520
- """Set these instance settings as the current instance.
521
-
522
- Args:
523
- write_to_disk: Save these instance settings to disk and
524
- overwrite the current instance settings file.
525
- """
526
- if write_to_disk:
527
- assert self.name is not None
528
- filepath = self._get_settings_file()
529
- # persist under filepath for later reference
530
- save_instance_settings(self, filepath)
531
- # persist under current file for auto load
532
- shutil.copy2(filepath, current_instance_settings_file())
533
- # persist under settings class for same session reference
534
- # need to import here to avoid circular import
535
- from ._settings import settings
536
-
537
- settings._instance_settings = self
538
-
539
- def _init_db(self):
540
- from lamindb_setup._check_setup import disable_auto_connect
541
-
542
- from .django import setup_django
543
-
544
- disable_auto_connect(setup_django)(self, init=True)
545
-
546
- def _load_db(self) -> tuple[bool, str]:
547
- # Is the database available and initialized as LaminDB?
548
- # returns a tuple of status code and message
549
- if self.dialect == "sqlite" and not self._sqlite_file.exists():
550
- legacy_file = self.storage.key_to_filepath(f"{self._id.hex}.lndb")
551
- if legacy_file.exists():
552
- logger.warning(
553
- f"The SQLite file is being renamed from {legacy_file} to {self._sqlite_file}"
554
- )
555
- legacy_file.rename(self._sqlite_file)
556
- else:
557
- return False, f"SQLite file {self._sqlite_file} does not exist"
558
- # we need the local sqlite to setup django
559
- self._update_local_sqlite_file()
560
- # setting up django also performs a check for migrations & prints them
561
- # as warnings
562
- # this should fail, e.g., if the db is not reachable
563
- from lamindb_setup._check_setup import disable_auto_connect
564
-
565
- from .django import setup_django
566
-
567
- disable_auto_connect(setup_django)(self)
568
-
569
- return True, ""
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Literal
7
+
8
+ from django.db import connection
9
+ from django.db.utils import ProgrammingError
10
+ from lamin_utils import logger
11
+
12
+ from ._deprecated import deprecated
13
+ from ._hub_client import call_with_fallback
14
+ from ._hub_crud import select_account_handle_name_by_lnid
15
+ from ._hub_utils import LaminDsn, LaminDsnModel
16
+ from ._settings_save import save_instance_settings
17
+ from ._settings_storage import (
18
+ LEGACY_STORAGE_UID_FILE_KEY,
19
+ STORAGE_UID_FILE_KEY,
20
+ StorageSettings,
21
+ get_storage_type,
22
+ init_storage,
23
+ instance_uid_from_uuid,
24
+ )
25
+ from ._settings_store import current_instance_settings_file, instance_settings_file
26
+ from .cloud_sqlite_locker import (
27
+ EXPIRATION_TIME,
28
+ InstanceLockedException,
29
+ )
30
+ from .upath import LocalPathClasses, UPath
31
+
32
+ if TYPE_CHECKING:
33
+ from uuid import UUID
34
+
35
+ from ._settings_user import UserSettings
36
+ from .types import UPathStr
37
+
38
+ LOCAL_STORAGE_MESSAGE = "No local storage location found in current environment: defaulting to cloud storage"
39
+
40
+
41
+ def sanitize_git_repo_url(repo_url: str) -> str:
42
+ assert repo_url.startswith("https://")
43
+ return repo_url.replace(".git", "")
44
+
45
+
46
+ def is_local_db_url(db_url: str) -> bool:
47
+ if "@localhost:" in db_url:
48
+ return True
49
+ if "@0.0.0.0:" in db_url:
50
+ return True
51
+ if "@127.0.0.1" in db_url:
52
+ return True
53
+ return False
54
+
55
+
56
+ def check_is_instance_remote(root: UPathStr, db: str | None) -> bool:
57
+ # returns True for cloud SQLite
58
+ # and remote postgres
59
+ root_str = str(root)
60
+ if not root_str.startswith("create-s3") and get_storage_type(root_str) == "local":
61
+ return False
62
+
63
+ if db is not None and is_local_db_url(db):
64
+ return False
65
+ return True
66
+
67
+
68
+ class InstanceSettings:
69
+ """Instance settings."""
70
+
71
+ def __init__(
72
+ self,
73
+ id: UUID, # instance id/uuid
74
+ owner: str, # owner handle
75
+ name: str, # instance name
76
+ storage: StorageSettings | None = None, # storage location
77
+ keep_artifacts_local: bool = False, # default to local storage
78
+ db: str | None = None, # DB URI
79
+ modules: str | None = None, # comma-separated string of module names
80
+ git_repo: str | None = None, # a git repo URL
81
+ is_on_hub: bool | None = None, # initialized from hub
82
+ api_url: str | None = None,
83
+ schema_id: UUID | None = None,
84
+ fine_grained_access: bool = False,
85
+ db_permissions: str | None = None,
86
+ _locker_user: UserSettings | None = None, # user to lock for if cloud sqlite
87
+ ):
88
+ from ._hub_utils import validate_db_arg
89
+
90
+ self._id_: UUID = id
91
+ self._owner: str = owner
92
+ self._name: str = name
93
+ self._storage: StorageSettings | None = storage
94
+ validate_db_arg(db)
95
+ self._db: str | None = db
96
+ self._schema_str: str | None = modules
97
+ self._git_repo = None if git_repo is None else sanitize_git_repo_url(git_repo)
98
+ # local storage
99
+ self._keep_artifacts_local = keep_artifacts_local
100
+ self._local_storage: StorageSettings | None = None
101
+ self._is_on_hub = is_on_hub
102
+ # private, needed for api requests
103
+ self._api_url = api_url
104
+ self._schema_id = schema_id
105
+ # private, whether fine grained access is used
106
+ # needed to be set to request jwt etc
107
+ self._fine_grained_access = fine_grained_access
108
+ # permissions for db such as jwt, read, write etc.
109
+ self._db_permissions = db_permissions
110
+ # if None then settings.user is used
111
+ self._locker_user = _locker_user
112
+
113
+ def __repr__(self):
114
+ """Rich string representation."""
115
+ representation = "Current instance:"
116
+ attrs = ["slug", "storage", "db", "modules", "git_repo"]
117
+ for attr in attrs:
118
+ value = getattr(self, attr)
119
+ if attr == "storage":
120
+ if self.keep_artifacts_local:
121
+ import lamindb as ln
122
+
123
+ self._local_storage = ln.setup.settings.instance._local_storage
124
+ if self._local_storage is not None:
125
+ value_local = self.local_storage
126
+ representation += f"\n - local storage: {value_local.root_as_str} ({value_local.region})"
127
+ representation += (
128
+ f"\n - cloud storage: {value.root_as_str} ({value.region})"
129
+ )
130
+ else:
131
+ representation += (
132
+ f"\n - storage: {value.root_as_str} ({value.region})"
133
+ )
134
+ elif attr == "db":
135
+ if self.dialect != "sqlite":
136
+ model = LaminDsnModel(db=value)
137
+ db_print = LaminDsn.build(
138
+ scheme=model.db.scheme,
139
+ user=model.db.user,
140
+ password="***",
141
+ host="***",
142
+ port=model.db.port,
143
+ database=model.db.database,
144
+ )
145
+ else:
146
+ db_print = value
147
+ representation += f"\n - {attr}: {db_print}"
148
+ elif attr == "modules":
149
+ representation += f"\n - {attr}: {value if value else '{}'}"
150
+ else:
151
+ representation += f"\n - {attr}: {value}"
152
+ return representation
153
+
154
+ @property
155
+ def owner(self) -> str:
156
+ """Instance owner. A user or organization account handle."""
157
+ return self._owner
158
+
159
+ @property
160
+ def name(self) -> str:
161
+ """Instance name."""
162
+ return self._name
163
+
164
+ def _search_local_root(
165
+ self, local_root: str | None = None, mute_warning: bool = False
166
+ ) -> StorageSettings | None:
167
+ from lamindb.models import Storage
168
+
169
+ if local_root is not None:
170
+ local_records = Storage.objects.filter(root=local_root)
171
+ else:
172
+ # only search local managed storage locations (instance_uid=self.uid)
173
+ local_records = Storage.objects.filter(type="local", instance_uid=self.uid)
174
+ all_local_records = local_records.all()
175
+ try:
176
+ # trigger an error in case of a migration issue
177
+ all_local_records.first()
178
+ except ProgrammingError:
179
+ logger.error("not able to load Storage registry: please migrate")
180
+ return None
181
+ found = []
182
+ for record in all_local_records:
183
+ root_path = Path(record.root)
184
+ try:
185
+ root_path_exists = root_path.exists()
186
+ except PermissionError:
187
+ continue
188
+ if root_path_exists:
189
+ marker_path = root_path / STORAGE_UID_FILE_KEY
190
+ try:
191
+ marker_path_exists = marker_path.exists()
192
+ except PermissionError:
193
+ continue
194
+ if not marker_path_exists:
195
+ legacy_filepath = root_path / LEGACY_STORAGE_UID_FILE_KEY
196
+ if legacy_filepath.exists():
197
+ logger.warning(
198
+ f"found legacy marker file, renaming it from {legacy_filepath} to {marker_path}"
199
+ )
200
+ legacy_filepath.rename(marker_path)
201
+ else:
202
+ logger.warning(
203
+ f"local storage location '{root_path}' is corrupted, cannot find marker file with storage uid"
204
+ )
205
+ continue
206
+ try:
207
+ uid = marker_path.read_text().splitlines()[0]
208
+ except PermissionError:
209
+ logger.warning(
210
+ f"ignoring the following location because no permission to read it: {marker_path}"
211
+ )
212
+ continue
213
+ if uid == record.uid:
214
+ found.append(record)
215
+ if found:
216
+ if len(found) > 1:
217
+ found_display = "\n - ".join([f"{record.root}" for record in found])
218
+ logger.important(f"found locations:\n - {found_display}")
219
+ record = found[0]
220
+ logger.important(f"defaulting to local storage: {record.root}")
221
+ return StorageSettings(record.root, region=record.region)
222
+ elif not mute_warning:
223
+ start = LOCAL_STORAGE_MESSAGE[0].lower()
224
+ logger.warning(f"{start}{LOCAL_STORAGE_MESSAGE[1:]}")
225
+ return None
226
+
227
+ @property
228
+ def keep_artifacts_local(self) -> bool:
229
+ """Default to keeping artifacts local.
230
+
231
+ Guide: :doc:`faq/keep-artifacts-local`
232
+ """
233
+ return self._keep_artifacts_local
234
+
235
+ @keep_artifacts_local.setter
236
+ def keep_artifacts_local(self, value: bool):
237
+ if not isinstance(value, bool):
238
+ raise ValueError("keep_artifacts_local must be a boolean value.")
239
+ self._keep_artifacts_local = value
240
+
241
+ @property
242
+ def storage(self) -> StorageSettings:
243
+ """Default storage of instance.
244
+
245
+ For a cloud instance, this is cloud storage. For a local instance, this
246
+ is a local directory.
247
+ """
248
+ return self._storage # type: ignore
249
+
250
+ @property
251
+ def local_storage(self) -> StorageSettings:
252
+ """An alternative default local storage location in the current environment.
253
+
254
+ Serves as the default storage location if :attr:`keep_artifacts_local` is enabled.
255
+
256
+ Guide: :doc:`faq/keep-artifacts-local`
257
+ """
258
+ if not self.keep_artifacts_local:
259
+ raise ValueError(
260
+ "`keep_artifacts_local` is False, switch via: ln.setup.settings.instance.keep_artifacts_local = True"
261
+ )
262
+ if self._local_storage is None:
263
+ self._local_storage = self._search_local_root()
264
+ if self._local_storage is None:
265
+ raise ValueError(LOCAL_STORAGE_MESSAGE)
266
+ return self._local_storage
267
+
268
+ @local_storage.setter
269
+ def local_storage(self, local_root_host: tuple[Path | str, str]):
270
+ from lamindb_setup._init_instance import register_storage_in_instance
271
+
272
+ if not isinstance(local_root_host, tuple):
273
+ local_root = local_root_host
274
+ host = "unspecified-host"
275
+ else:
276
+ local_root, host = local_root_host
277
+
278
+ local_root = Path(local_root)
279
+ if not self.keep_artifacts_local:
280
+ raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
281
+ local_storage = self._search_local_root(
282
+ local_root=StorageSettings(local_root).root_as_str, mute_warning=True
283
+ )
284
+ if local_storage is not None:
285
+ # great, we're merely switching storage location
286
+ self._local_storage = local_storage
287
+ return None
288
+ local_storage = self._search_local_root(mute_warning=True)
289
+ if local_storage is not None:
290
+ if os.getenv("LAMIN_TESTING") == "true":
291
+ response = "y"
292
+ else:
293
+ response = input(
294
+ "You already configured a local storage root for this instance in this"
295
+ f" environment: {self.local_storage.root}\nDo you want to register another one? (y/n)"
296
+ )
297
+ if response != "y":
298
+ return None
299
+ if host == "unspecified-host":
300
+ logger.warning(
301
+ "setting local_storage with a single path is deprecated for creating storage locations"
302
+ )
303
+ logger.warning(
304
+ "use this instead: ln.Storage(root='/dir/our_shared_dir', host='our-server-123').save()"
305
+ )
306
+ local_root = UPath(local_root)
307
+ assert isinstance(local_root, LocalPathClasses)
308
+ tentative_storage, hub_status = init_storage(
309
+ local_root,
310
+ instance_id=self._id,
311
+ instance_slug=self.slug,
312
+ register_hub=True,
313
+ region=host,
314
+ ) # type: ignore
315
+ if hub_status in ["hub-record-created", "hub-record-retrieved"]:
316
+ register_storage_in_instance(tentative_storage) # type: ignore
317
+ self._local_storage = tentative_storage
318
+ logger.important(
319
+ f"defaulting to local storage: {self._local_storage.root} on host {host}"
320
+ )
321
+ else:
322
+ logger.warning(f"could not set this local storage location: {local_root}")
323
+
324
+ @property
325
+ @deprecated("local_storage")
326
+ def storage_local(self) -> StorageSettings:
327
+ return self.local_storage
328
+
329
+ @storage_local.setter
330
+ @deprecated("local_storage")
331
+ def storage_local(self, local_root_host: tuple[Path | str, str]):
332
+ self.local_storage = local_root_host # type: ignore
333
+
334
+ @property
335
+ def slug(self) -> str:
336
+ """Unique semantic identifier of form `"{account_handle}/{instance_name}"`."""
337
+ return f"{self.owner}/{self.name}"
338
+
339
+ @property
340
+ def git_repo(self) -> str | None:
341
+ """Sync transforms with scripts in git repository.
342
+
343
+ Provide the full git repo URL.
344
+ """
345
+ return self._git_repo
346
+
347
+ @property
348
+ def api_url(self) -> str | None:
349
+ """URL for REST API.
350
+
351
+ Use this URL for API calls related to this instance.
352
+ """
353
+ return self._api_url
354
+
355
+ @property
356
+ def available_spaces(self) -> dict | None:
357
+ """Available spaces with roles for instances fine-grained permissions.
358
+
359
+ Returns a dictionary with roles as keys and lists of available spaces
360
+ as values if this instance has fine-grained permissions and the current user
361
+ is a collaborator, `None` otherwise.
362
+ """
363
+ if self._db_permissions != "jwt":
364
+ return None
365
+
366
+ from lamindb.models import Space
367
+
368
+ spaces: dict = {"admin": [], "write": [], "read": []}
369
+ with connection.cursor() as cur:
370
+ cur.execute("SELECT * FROM check_access() WHERE type = 'space'")
371
+ rows = cur.fetchall()
372
+ for row in rows:
373
+ spaces[row[1]].append(row[0])
374
+ return {
375
+ k: Space.filter(id__in=v).to_list() if v else [] for k, v in spaces.items()
376
+ }
377
+
378
+ @property
379
+ def _id(self) -> UUID:
380
+ """The internal instance id."""
381
+ return self._id_
382
+
383
+ @property
384
+ def uid(self) -> str:
385
+ """The user-facing instance id."""
386
+ return instance_uid_from_uuid(self._id)
387
+
388
+ @property
389
+ def modules(self) -> set[str]:
390
+ """The set of modules that defines the database schema.
391
+
392
+ The core schema contained in lamindb is not included in this set.
393
+ """
394
+ if self._schema_str is None:
395
+ return set()
396
+ else:
397
+ return {module for module in self._schema_str.split(",") if module != ""}
398
+
399
+ @property
400
+ @deprecated("modules")
401
+ def schema(self) -> set[str]:
402
+ return self.modules
403
+
404
+ @property
405
+ def _sqlite_file(self) -> UPath:
406
+ """SQLite file."""
407
+ filepath = self.storage.root / ".lamindb/lamin.db"
408
+ return filepath
409
+
410
+ @property
411
+ def _sqlite_file_local(self) -> Path:
412
+ """Local SQLite file."""
413
+ return self.storage.cloud_to_local_no_update(self._sqlite_file)
414
+
415
+ def _update_cloud_sqlite_file(self, unlock_cloud_sqlite: bool = True) -> None:
416
+ """Upload the local sqlite file to the cloud file."""
417
+ if self._is_cloud_sqlite:
418
+ sqlite_file = self._sqlite_file
419
+ logger.warning(
420
+ f"updating{' & unlocking' if unlock_cloud_sqlite else ''} cloud SQLite "
421
+ f"'{sqlite_file}' of instance"
422
+ f" '{self.slug}'"
423
+ )
424
+ cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
425
+ sqlite_file.upload_from(cache_file, print_progress=True) # type: ignore
426
+ cloud_mtime = sqlite_file.modified.timestamp() # type: ignore
427
+ # this seems to work even if there is an open connection
428
+ # to the cache file
429
+ os.utime(cache_file, times=(cloud_mtime, cloud_mtime))
430
+ if unlock_cloud_sqlite:
431
+ self._cloud_sqlite_locker.unlock()
432
+
433
+ def _update_local_sqlite_file(self, lock_cloud_sqlite: bool = True) -> None:
434
+ """Download the cloud sqlite file if it is newer than local."""
435
+ if self._is_cloud_sqlite:
436
+ logger.warning(
437
+ "updating local SQLite & locking cloud SQLite (sync back & unlock:"
438
+ " lamin disconnect)"
439
+ )
440
+ if lock_cloud_sqlite:
441
+ self._cloud_sqlite_locker.lock()
442
+ self._check_sqlite_lock()
443
+ sqlite_file = self._sqlite_file
444
+ cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
445
+ sqlite_file.synchronize_to(cache_file, print_progress=True) # type: ignore
446
+
447
+ def _check_sqlite_lock(self):
448
+ if not self._cloud_sqlite_locker.has_lock:
449
+ locked_by = self._cloud_sqlite_locker._locked_by
450
+ lock_msg = "Cannot load the instance, it is locked by "
451
+ user_info = call_with_fallback(
452
+ select_account_handle_name_by_lnid,
453
+ lnid=locked_by,
454
+ )
455
+ if user_info is None:
456
+ lock_msg += f"uid: '{locked_by}'."
457
+ else:
458
+ lock_msg += (
459
+ f"'{user_info['handle']}' (uid: '{locked_by}', name:"
460
+ f" '{user_info['name']}')."
461
+ )
462
+ lock_msg += (
463
+ " The instance will be automatically unlocked after"
464
+ f" {int(EXPIRATION_TIME/3600/24)}d of no activity."
465
+ )
466
+ raise InstanceLockedException(lock_msg)
467
+
468
+ @property
469
+ def db(self) -> str:
470
+ """Database connection string (URI)."""
471
+ if "LAMINDB_DJANGO_DATABASE_URL" in os.environ:
472
+ logger.warning(
473
+ "LAMINDB_DJANGO_DATABASE_URL env variable "
474
+ f"is set to {os.environ['LAMINDB_DJANGO_DATABASE_URL']}. "
475
+ "It overwrites all db connections and is used instead of `instance.db`."
476
+ )
477
+ if self._db is None:
478
+ from .django import IS_SETUP
479
+
480
+ if self._storage is None and self.slug == "none/none":
481
+ return "sqlite:///:memory:"
482
+ # here, we want the updated sqlite file
483
+ # hence, we don't use self._sqlite_file_local()
484
+ # error_no_origin=False because on instance init
485
+ # the sqlite file is not yet in the cloud
486
+ sqlite_filepath = self.storage.cloud_to_local(
487
+ self._sqlite_file, error_no_origin=False
488
+ )
489
+ return f"sqlite:///{sqlite_filepath.as_posix()}"
490
+ else:
491
+ return self._db
492
+
493
+ @property
494
+ def dialect(self) -> Literal["sqlite", "postgresql"]:
495
+ """SQL dialect."""
496
+ if self._db is None or self._db.startswith("sqlite://"):
497
+ return "sqlite"
498
+ else:
499
+ assert self._db.startswith("postgresql"), f"Unexpected DB value: {self._db}"
500
+ return "postgresql"
501
+
502
+ @property
503
+ def _is_cloud_sqlite(self) -> bool:
504
+ # can we make this a private property, Sergei?
505
+ # as it's not relevant to the user
506
+ """Is this a cloud instance with sqlite db."""
507
+ return self.dialect == "sqlite" and self.storage.type_is_cloud
508
+
509
+ @property
510
+ def _cloud_sqlite_locker(self):
511
+ # avoid circular import
512
+ from .cloud_sqlite_locker import empty_locker, get_locker
513
+
514
+ if self._is_cloud_sqlite:
515
+ try:
516
+ # if _locker_user is None then settings.user is used
517
+ return get_locker(self, self._locker_user)
518
+ except PermissionError:
519
+ logger.warning("read-only access - did not access locker")
520
+ return empty_locker
521
+ else:
522
+ return empty_locker
523
+
524
+ @property
525
+ def is_remote(self) -> bool:
526
+ """Boolean indicating if an instance has no local component."""
527
+ return check_is_instance_remote(self.storage.root_as_str, self.db)
528
+
529
+ @property
530
+ def is_on_hub(self) -> bool:
531
+ """Is this instance on the hub?
532
+
533
+ Can only reliably establish if user has access to the instance. Will
534
+ return `False` in case the instance isn't found.
535
+ """
536
+ if self._is_on_hub is None:
537
+ from ._hub_client import call_with_fallback_auth
538
+ from ._hub_crud import select_instance_by_id
539
+ from ._settings import settings
540
+
541
+ if settings.user.handle != "anonymous":
542
+ response = call_with_fallback_auth(
543
+ select_instance_by_id, instance_id=self._id.hex
544
+ )
545
+ else:
546
+ response = call_with_fallback(
547
+ select_instance_by_id, instance_id=self._id.hex
548
+ )
549
+ logger.warning("calling anonymously, will miss private instances")
550
+ if response is None:
551
+ self._is_on_hub = False
552
+ else:
553
+ self._is_on_hub = True
554
+ return self._is_on_hub
555
+
556
+ def _get_settings_file(self) -> Path:
557
+ return instance_settings_file(self.name, self.owner)
558
+
559
+ def _persist(self, write_to_disk: bool = True) -> None:
560
+ """Set these instance settings as the current instance.
561
+
562
+ Args:
563
+ write_to_disk: Save these instance settings to disk and
564
+ overwrite the current instance settings file.
565
+ """
566
+ if write_to_disk and self.slug != "none/none":
567
+ assert self.name is not None
568
+ filepath = self._get_settings_file()
569
+ # persist under filepath for later reference
570
+ save_instance_settings(self, filepath)
571
+ # persist under current file for auto load
572
+ shutil.copy2(filepath, current_instance_settings_file())
573
+ # persist under settings class for same session reference
574
+ # need to import here to avoid circular import
575
+ from ._settings import settings
576
+
577
+ settings._instance_settings = self
578
+
579
+ def _init_db(self):
580
+ from lamindb_setup._check_setup import disable_auto_connect
581
+
582
+ from .django import setup_django
583
+
584
+ disable_auto_connect(setup_django)(self, init=True)
585
+
586
+ def _load_db(self) -> tuple[bool, str]:
587
+ # Is the database available and initialized as LaminDB?
588
+ # returns a tuple of status code and message
589
+ if self.dialect == "sqlite" and not self._sqlite_file.exists():
590
+ legacy_file = self.storage.key_to_filepath(f"{self._id.hex}.lndb")
591
+ if legacy_file.exists():
592
+ logger.warning(
593
+ f"The SQLite file is being renamed from {legacy_file} to {self._sqlite_file}"
594
+ )
595
+ legacy_file.rename(self._sqlite_file)
596
+ else:
597
+ return False, f"SQLite file {self._sqlite_file} does not exist"
598
+ # we need the local sqlite to setup django
599
+ self._update_local_sqlite_file()
600
+ # setting up django also performs a check for migrations & prints them
601
+ # as warnings
602
+ # this should fail, e.g., if the db is not reachable
603
+ from lamindb_setup._check_setup import disable_auto_connect
604
+
605
+ from .django import setup_django
606
+
607
+ disable_auto_connect(setup_django)(self)
608
+
609
+ return True, ""