lamindb_setup 1.9.0__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. lamindb_setup/__init__.py +107 -107
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check_setup.py +166 -166
  4. lamindb_setup/_connect_instance.py +328 -342
  5. lamindb_setup/_delete.py +141 -141
  6. lamindb_setup/_disconnect.py +32 -32
  7. lamindb_setup/_init_instance.py +440 -440
  8. lamindb_setup/_migrate.py +266 -266
  9. lamindb_setup/_register_instance.py +35 -35
  10. lamindb_setup/_schema_metadata.py +441 -441
  11. lamindb_setup/_set_managed_storage.py +70 -70
  12. lamindb_setup/_setup_user.py +133 -133
  13. lamindb_setup/core/__init__.py +21 -21
  14. lamindb_setup/core/_aws_options.py +223 -223
  15. lamindb_setup/core/_hub_client.py +248 -248
  16. lamindb_setup/core/_hub_core.py +665 -665
  17. lamindb_setup/core/_hub_crud.py +227 -227
  18. lamindb_setup/core/_private_django_api.py +83 -83
  19. lamindb_setup/core/_settings.py +377 -377
  20. lamindb_setup/core/_settings_instance.py +569 -569
  21. lamindb_setup/core/_settings_load.py +141 -141
  22. lamindb_setup/core/_settings_save.py +95 -95
  23. lamindb_setup/core/_settings_storage.py +429 -429
  24. lamindb_setup/core/_settings_store.py +91 -91
  25. lamindb_setup/core/_settings_user.py +55 -55
  26. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  27. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  28. lamindb_setup/core/django.py +305 -296
  29. lamindb_setup/core/exceptions.py +1 -1
  30. lamindb_setup/core/hashing.py +134 -134
  31. lamindb_setup/core/types.py +1 -1
  32. lamindb_setup/core/upath.py +1013 -1013
  33. lamindb_setup/errors.py +70 -70
  34. lamindb_setup/types.py +20 -20
  35. {lamindb_setup-1.9.0.dist-info → lamindb_setup-1.9.1.dist-info}/METADATA +1 -1
  36. lamindb_setup-1.9.1.dist-info/RECORD +50 -0
  37. lamindb_setup-1.9.0.dist-info/RECORD +0 -50
  38. {lamindb_setup-1.9.0.dist-info → lamindb_setup-1.9.1.dist-info}/LICENSE +0 -0
  39. {lamindb_setup-1.9.0.dist-info → lamindb_setup-1.9.1.dist-info}/WHEEL +0 -0
@@ -1,569 +1,569 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- import shutil
5
- from pathlib import Path
6
- from typing import TYPE_CHECKING, Literal
7
-
8
- from django.db.utils import ProgrammingError
9
- from lamin_utils import logger
10
-
11
- from ._deprecated import deprecated
12
- from ._hub_client import call_with_fallback
13
- from ._hub_crud import select_account_handle_name_by_lnid
14
- from ._hub_utils import LaminDsn, LaminDsnModel
15
- from ._settings_save import save_instance_settings
16
- from ._settings_storage import (
17
- LEGACY_STORAGE_UID_FILE_KEY,
18
- STORAGE_UID_FILE_KEY,
19
- StorageSettings,
20
- init_storage,
21
- instance_uid_from_uuid,
22
- )
23
- from ._settings_store import current_instance_settings_file, instance_settings_file
24
- from .cloud_sqlite_locker import (
25
- EXPIRATION_TIME,
26
- InstanceLockedException,
27
- )
28
- from .upath import LocalPathClasses, UPath
29
-
30
- if TYPE_CHECKING:
31
- from uuid import UUID
32
-
33
- from ._settings_user import UserSettings
34
-
35
- LOCAL_STORAGE_MESSAGE = "No local storage location found in current environment: defaulting to cloud storage"
36
-
37
-
38
- def sanitize_git_repo_url(repo_url: str) -> str:
39
- assert repo_url.startswith("https://")
40
- return repo_url.replace(".git", "")
41
-
42
-
43
- def is_local_db_url(db_url: str) -> bool:
44
- if "@localhost:" in db_url:
45
- return True
46
- if "@0.0.0.0:" in db_url:
47
- return True
48
- if "@127.0.0.1" in db_url:
49
- return True
50
- return False
51
-
52
-
53
- class InstanceSettings:
54
- """Instance settings."""
55
-
56
- def __init__(
57
- self,
58
- id: UUID, # instance id/uuid
59
- owner: str, # owner handle
60
- name: str, # instance name
61
- storage: StorageSettings, # storage location
62
- keep_artifacts_local: bool = False, # default to local storage
63
- uid: str | None = None, # instance uid/lnid
64
- db: str | None = None, # DB URI
65
- modules: str | None = None, # comma-separated string of module names
66
- git_repo: str | None = None, # a git repo URL
67
- is_on_hub: bool | None = None, # initialized from hub
68
- api_url: str | None = None,
69
- schema_id: UUID | None = None,
70
- fine_grained_access: bool = False,
71
- db_permissions: str | None = None,
72
- _locker_user: UserSettings | None = None, # user to lock for if cloud sqlite
73
- ):
74
- from ._hub_utils import validate_db_arg
75
-
76
- self._id_: UUID = id
77
- self._owner: str = owner
78
- self._name: str = name
79
- self._uid: str | None = uid
80
- self._storage: StorageSettings = storage
81
- validate_db_arg(db)
82
- self._db: str | None = db
83
- self._schema_str: str | None = modules
84
- self._git_repo = None if git_repo is None else sanitize_git_repo_url(git_repo)
85
- # local storage
86
- self._keep_artifacts_local = keep_artifacts_local
87
- self._local_storage: StorageSettings | None = None
88
- self._is_on_hub = is_on_hub
89
- # private, needed for api requests
90
- self._api_url = api_url
91
- self._schema_id = schema_id
92
- # private, whether fine grained access is used
93
- # needed to be set to request jwt etc
94
- self._fine_grained_access = fine_grained_access
95
- # permissions for db such as jwt, read, write etc.
96
- self._db_permissions = db_permissions
97
- # if None then settings.user is used
98
- self._locker_user = _locker_user
99
-
100
- def __repr__(self):
101
- """Rich string representation."""
102
- representation = "Current instance:"
103
- attrs = ["slug", "storage", "db", "modules", "git_repo"]
104
- for attr in attrs:
105
- value = getattr(self, attr)
106
- if attr == "storage":
107
- if self.keep_artifacts_local:
108
- import lamindb as ln
109
-
110
- self._local_storage = ln.setup.settings.instance._local_storage
111
- if self._local_storage is not None:
112
- value_local = self.local_storage
113
- representation += f"\n - local storage: {value_local.root_as_str} ({value_local.region})"
114
- representation += (
115
- f"\n - cloud storage: {value.root_as_str} ({value.region})"
116
- )
117
- else:
118
- representation += (
119
- f"\n - storage: {value.root_as_str} ({value.region})"
120
- )
121
- elif attr == "db":
122
- if self.dialect != "sqlite":
123
- model = LaminDsnModel(db=value)
124
- db_print = LaminDsn.build(
125
- scheme=model.db.scheme,
126
- user=model.db.user,
127
- password="***",
128
- host="***",
129
- port=model.db.port,
130
- database=model.db.database,
131
- )
132
- else:
133
- db_print = value
134
- representation += f"\n - {attr}: {db_print}"
135
- elif attr == "modules":
136
- representation += f"\n - {attr}: {value if value else '{}'}"
137
- else:
138
- representation += f"\n - {attr}: {value}"
139
- return representation
140
-
141
- @property
142
- def owner(self) -> str:
143
- """Instance owner. A user or organization account handle."""
144
- return self._owner
145
-
146
- @property
147
- def name(self) -> str:
148
- """Instance name."""
149
- return self._name
150
-
151
- def _search_local_root(
152
- self, local_root: str | None = None, mute_warning: bool = False
153
- ) -> StorageSettings | None:
154
- from lamindb.models import Storage
155
-
156
- if local_root is not None:
157
- local_records = Storage.objects.filter(root=local_root)
158
- else:
159
- # only search local managed storage locations (instance_uid=self.uid)
160
- local_records = Storage.objects.filter(type="local", instance_uid=self.uid)
161
- all_local_records = local_records.all()
162
- try:
163
- # trigger an error in case of a migration issue
164
- all_local_records.first()
165
- except ProgrammingError:
166
- logger.error("not able to load Storage registry: please migrate")
167
- return None
168
- found = []
169
- for record in all_local_records:
170
- root_path = Path(record.root)
171
- try:
172
- root_path_exists = root_path.exists()
173
- except PermissionError:
174
- continue
175
- if root_path_exists:
176
- marker_path = root_path / STORAGE_UID_FILE_KEY
177
- try:
178
- marker_path_exists = marker_path.exists()
179
- except PermissionError:
180
- continue
181
- if not marker_path_exists:
182
- legacy_filepath = root_path / LEGACY_STORAGE_UID_FILE_KEY
183
- if legacy_filepath.exists():
184
- logger.warning(
185
- f"found legacy marker file, renaming it from {legacy_filepath} to {marker_path}"
186
- )
187
- legacy_filepath.rename(marker_path)
188
- else:
189
- logger.warning(
190
- f"local storage location '{root_path}' is corrupted, cannot find marker file with storage uid"
191
- )
192
- continue
193
- try:
194
- uid = marker_path.read_text().splitlines()[0]
195
- except PermissionError:
196
- logger.warning(
197
- f"ignoring the following location because no permission to read it: {marker_path}"
198
- )
199
- continue
200
- if uid == record.uid:
201
- found.append(record)
202
- if found:
203
- if len(found) > 1:
204
- found_display = "\n - ".join([f"{record.root}" for record in found])
205
- logger.important(f"found locations:\n - {found_display}")
206
- record = found[0]
207
- logger.important(f"defaulting to local storage: {record.root}")
208
- return StorageSettings(record.root, region=record.region)
209
- elif not mute_warning:
210
- start = LOCAL_STORAGE_MESSAGE[0].lower()
211
- logger.warning(f"{start}{LOCAL_STORAGE_MESSAGE[1:]}")
212
- return None
213
-
214
- @property
215
- def keep_artifacts_local(self) -> bool:
216
- """Default to keeping artifacts local.
217
-
218
- Guide: :doc:`faq/keep-artifacts-local`
219
- """
220
- return self._keep_artifacts_local
221
-
222
- @keep_artifacts_local.setter
223
- def keep_artifacts_local(self, value: bool):
224
- if not isinstance(value, bool):
225
- raise ValueError("keep_artifacts_local must be a boolean value.")
226
- self._keep_artifacts_local = value
227
-
228
- @property
229
- def storage(self) -> StorageSettings:
230
- """Default storage of instance.
231
-
232
- For a cloud instance, this is cloud storage. For a local instance, this
233
- is a local directory.
234
- """
235
- return self._storage
236
-
237
- @property
238
- def local_storage(self) -> StorageSettings:
239
- """An alternative default local storage location in the current environment.
240
-
241
- Serves as the default storage location if :attr:`keep_artifacts_local` is enabled.
242
-
243
- Guide: :doc:`faq/keep-artifacts-local`
244
- """
245
- if not self.keep_artifacts_local:
246
- raise ValueError(
247
- "`keep_artifacts_local` is False, switch via: ln.setup.settings.instance.keep_artifacts_local = True"
248
- )
249
- if self._local_storage is None:
250
- self._local_storage = self._search_local_root()
251
- if self._local_storage is None:
252
- raise ValueError(LOCAL_STORAGE_MESSAGE)
253
- return self._local_storage
254
-
255
- @local_storage.setter
256
- def local_storage(self, local_root_host: tuple[Path | str, str]):
257
- from lamindb_setup._init_instance import register_storage_in_instance
258
-
259
- if not isinstance(local_root_host, tuple):
260
- local_root = local_root_host
261
- host = "unspecified-host"
262
- else:
263
- local_root, host = local_root_host
264
-
265
- local_root = Path(local_root)
266
- if not self.keep_artifacts_local:
267
- raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
268
- local_storage = self._search_local_root(
269
- local_root=StorageSettings(local_root).root_as_str, mute_warning=True
270
- )
271
- if local_storage is not None:
272
- # great, we're merely switching storage location
273
- self._local_storage = local_storage
274
- return None
275
- local_storage = self._search_local_root(mute_warning=True)
276
- if local_storage is not None:
277
- if os.getenv("LAMIN_TESTING") == "true":
278
- response = "y"
279
- else:
280
- response = input(
281
- "You already configured a local storage root for this instance in this"
282
- f" environment: {self.local_storage.root}\nDo you want to register another one? (y/n)"
283
- )
284
- if response != "y":
285
- return None
286
- if host == "unspecified-host":
287
- logger.warning(
288
- "setting local_storage with a single path is deprecated for creating storage locations"
289
- )
290
- logger.warning(
291
- "use this instead: ln.Storage(root='/dir/our_shared_dir', host='our-server-123').save()"
292
- )
293
- local_root = UPath(local_root)
294
- assert isinstance(local_root, LocalPathClasses)
295
- tentative_storage, hub_status = init_storage(
296
- local_root,
297
- instance_id=self._id,
298
- instance_slug=self.slug,
299
- register_hub=True,
300
- region=host,
301
- ) # type: ignore
302
- if hub_status in ["hub-record-created", "hub-record-retrieved"]:
303
- register_storage_in_instance(tentative_storage) # type: ignore
304
- self._local_storage = tentative_storage
305
- logger.important(
306
- f"defaulting to local storage: {self._local_storage.root} on host {host}"
307
- )
308
- else:
309
- logger.warning(f"could not set this local storage location: {local_root}")
310
-
311
- @property
312
- @deprecated("local_storage")
313
- def storage_local(self) -> StorageSettings:
314
- return self.local_storage
315
-
316
- @storage_local.setter
317
- @deprecated("local_storage")
318
- def storage_local(self, local_root_host: tuple[Path | str, str]):
319
- self.local_storage = local_root_host # type: ignore
320
-
321
- @property
322
- def slug(self) -> str:
323
- """Unique semantic identifier of form `"{account_handle}/{instance_name}"`."""
324
- return f"{self.owner}/{self.name}"
325
-
326
- @property
327
- def git_repo(self) -> str | None:
328
- """Sync transforms with scripts in git repository.
329
-
330
- Provide the full git repo URL.
331
- """
332
- return self._git_repo
333
-
334
- @property
335
- def _id(self) -> UUID:
336
- """The internal instance id."""
337
- return self._id_
338
-
339
- @property
340
- def uid(self) -> str:
341
- """The user-facing instance id."""
342
- return instance_uid_from_uuid(self._id)
343
-
344
- @property
345
- def modules(self) -> set[str]:
346
- """The set of modules that defines the database schema.
347
-
348
- The core schema contained in lamindb is not included in this set.
349
- """
350
- if self._schema_str is None:
351
- return set()
352
- else:
353
- return {module for module in self._schema_str.split(",") if module != ""}
354
-
355
- @property
356
- @deprecated("modules")
357
- def schema(self) -> set[str]:
358
- return self.modules
359
-
360
- @property
361
- def _sqlite_file(self) -> UPath:
362
- """SQLite file."""
363
- filepath = self.storage.root / ".lamindb/lamin.db"
364
- return filepath
365
-
366
- @property
367
- def _sqlite_file_local(self) -> Path:
368
- """Local SQLite file."""
369
- return self.storage.cloud_to_local_no_update(self._sqlite_file)
370
-
371
- def _update_cloud_sqlite_file(self, unlock_cloud_sqlite: bool = True) -> None:
372
- """Upload the local sqlite file to the cloud file."""
373
- if self._is_cloud_sqlite:
374
- sqlite_file = self._sqlite_file
375
- logger.warning(
376
- f"updating{' & unlocking' if unlock_cloud_sqlite else ''} cloud SQLite "
377
- f"'{sqlite_file}' of instance"
378
- f" '{self.slug}'"
379
- )
380
- cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
381
- sqlite_file.upload_from(cache_file, print_progress=True) # type: ignore
382
- cloud_mtime = sqlite_file.modified.timestamp() # type: ignore
383
- # this seems to work even if there is an open connection
384
- # to the cache file
385
- os.utime(cache_file, times=(cloud_mtime, cloud_mtime))
386
- if unlock_cloud_sqlite:
387
- self._cloud_sqlite_locker.unlock()
388
-
389
- def _update_local_sqlite_file(self, lock_cloud_sqlite: bool = True) -> None:
390
- """Download the cloud sqlite file if it is newer than local."""
391
- if self._is_cloud_sqlite:
392
- logger.warning(
393
- "updating local SQLite & locking cloud SQLite (sync back & unlock:"
394
- " lamin disconnect)"
395
- )
396
- if lock_cloud_sqlite:
397
- self._cloud_sqlite_locker.lock()
398
- self._check_sqlite_lock()
399
- sqlite_file = self._sqlite_file
400
- cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
401
- sqlite_file.synchronize_to(cache_file, print_progress=True) # type: ignore
402
-
403
- def _check_sqlite_lock(self):
404
- if not self._cloud_sqlite_locker.has_lock:
405
- locked_by = self._cloud_sqlite_locker._locked_by
406
- lock_msg = "Cannot load the instance, it is locked by "
407
- user_info = call_with_fallback(
408
- select_account_handle_name_by_lnid,
409
- lnid=locked_by,
410
- )
411
- if user_info is None:
412
- lock_msg += f"uid: '{locked_by}'."
413
- else:
414
- lock_msg += (
415
- f"'{user_info['handle']}' (uid: '{locked_by}', name:"
416
- f" '{user_info['name']}')."
417
- )
418
- lock_msg += (
419
- " The instance will be automatically unlocked after"
420
- f" {int(EXPIRATION_TIME/3600/24)}d of no activity."
421
- )
422
- raise InstanceLockedException(lock_msg)
423
-
424
- @property
425
- def db(self) -> str:
426
- """Database connection string (URI)."""
427
- if "LAMINDB_DJANGO_DATABASE_URL" in os.environ:
428
- logger.warning(
429
- "LAMINDB_DJANGO_DATABASE_URL env variable "
430
- f"is set to {os.environ['LAMINDB_DJANGO_DATABASE_URL']}. "
431
- "It overwrites all db connections and is used instead of `instance.db`."
432
- )
433
- if self._db is None:
434
- # here, we want the updated sqlite file
435
- # hence, we don't use self._sqlite_file_local()
436
- # error_no_origin=False because on instance init
437
- # the sqlite file is not yet in the cloud
438
- sqlite_filepath = self.storage.cloud_to_local(
439
- self._sqlite_file, error_no_origin=False
440
- )
441
- return f"sqlite:///{sqlite_filepath.as_posix()}"
442
- else:
443
- return self._db
444
-
445
- @property
446
- def dialect(self) -> Literal["sqlite", "postgresql"]:
447
- """SQL dialect."""
448
- if self._db is None or self._db.startswith("sqlite://"):
449
- return "sqlite"
450
- else:
451
- assert self._db.startswith("postgresql"), f"Unexpected DB value: {self._db}"
452
- return "postgresql"
453
-
454
- @property
455
- def _is_cloud_sqlite(self) -> bool:
456
- # can we make this a private property, Sergei?
457
- # as it's not relevant to the user
458
- """Is this a cloud instance with sqlite db."""
459
- return self.dialect == "sqlite" and self.storage.type_is_cloud
460
-
461
- @property
462
- def _cloud_sqlite_locker(self):
463
- # avoid circular import
464
- from .cloud_sqlite_locker import empty_locker, get_locker
465
-
466
- if self._is_cloud_sqlite:
467
- try:
468
- # if _locker_user is None then settings.user is used
469
- return get_locker(self, self._locker_user)
470
- except PermissionError:
471
- logger.warning("read-only access - did not access locker")
472
- return empty_locker
473
- else:
474
- return empty_locker
475
-
476
- @property
477
- def is_remote(self) -> bool:
478
- """Boolean indicating if an instance has no local component."""
479
- if not self.storage.type_is_cloud:
480
- return False
481
-
482
- if self.dialect == "postgresql":
483
- if is_local_db_url(self.db):
484
- return False
485
- # returns True for cloud SQLite
486
- # and remote postgres
487
- return True
488
-
489
- @property
490
- def is_on_hub(self) -> bool:
491
- """Is this instance on the hub?
492
-
493
- Can only reliably establish if user has access to the instance. Will
494
- return `False` in case the instance isn't found.
495
- """
496
- if self._is_on_hub is None:
497
- from ._hub_client import call_with_fallback_auth
498
- from ._hub_crud import select_instance_by_id
499
- from ._settings import settings
500
-
501
- if settings.user.handle != "anonymous":
502
- response = call_with_fallback_auth(
503
- select_instance_by_id, instance_id=self._id.hex
504
- )
505
- else:
506
- response = call_with_fallback(
507
- select_instance_by_id, instance_id=self._id.hex
508
- )
509
- logger.warning("calling anonymously, will miss private instances")
510
- if response is None:
511
- self._is_on_hub = False
512
- else:
513
- self._is_on_hub = True
514
- return self._is_on_hub
515
-
516
- def _get_settings_file(self) -> Path:
517
- return instance_settings_file(self.name, self.owner)
518
-
519
- def _persist(self, write_to_disk: bool = True) -> None:
520
- """Set these instance settings as the current instance.
521
-
522
- Args:
523
- write_to_disk: Save these instance settings to disk and
524
- overwrite the current instance settings file.
525
- """
526
- if write_to_disk:
527
- assert self.name is not None
528
- filepath = self._get_settings_file()
529
- # persist under filepath for later reference
530
- save_instance_settings(self, filepath)
531
- # persist under current file for auto load
532
- shutil.copy2(filepath, current_instance_settings_file())
533
- # persist under settings class for same session reference
534
- # need to import here to avoid circular import
535
- from ._settings import settings
536
-
537
- settings._instance_settings = self
538
-
539
- def _init_db(self):
540
- from lamindb_setup._check_setup import disable_auto_connect
541
-
542
- from .django import setup_django
543
-
544
- disable_auto_connect(setup_django)(self, init=True)
545
-
546
- def _load_db(self) -> tuple[bool, str]:
547
- # Is the database available and initialized as LaminDB?
548
- # returns a tuple of status code and message
549
- if self.dialect == "sqlite" and not self._sqlite_file.exists():
550
- legacy_file = self.storage.key_to_filepath(f"{self._id.hex}.lndb")
551
- if legacy_file.exists():
552
- logger.warning(
553
- f"The SQLite file is being renamed from {legacy_file} to {self._sqlite_file}"
554
- )
555
- legacy_file.rename(self._sqlite_file)
556
- else:
557
- return False, f"SQLite file {self._sqlite_file} does not exist"
558
- # we need the local sqlite to setup django
559
- self._update_local_sqlite_file()
560
- # setting up django also performs a check for migrations & prints them
561
- # as warnings
562
- # this should fail, e.g., if the db is not reachable
563
- from lamindb_setup._check_setup import disable_auto_connect
564
-
565
- from .django import setup_django
566
-
567
- disable_auto_connect(setup_django)(self)
568
-
569
- return True, ""
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import shutil
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING, Literal
7
+
8
+ from django.db.utils import ProgrammingError
9
+ from lamin_utils import logger
10
+
11
+ from ._deprecated import deprecated
12
+ from ._hub_client import call_with_fallback
13
+ from ._hub_crud import select_account_handle_name_by_lnid
14
+ from ._hub_utils import LaminDsn, LaminDsnModel
15
+ from ._settings_save import save_instance_settings
16
+ from ._settings_storage import (
17
+ LEGACY_STORAGE_UID_FILE_KEY,
18
+ STORAGE_UID_FILE_KEY,
19
+ StorageSettings,
20
+ init_storage,
21
+ instance_uid_from_uuid,
22
+ )
23
+ from ._settings_store import current_instance_settings_file, instance_settings_file
24
+ from .cloud_sqlite_locker import (
25
+ EXPIRATION_TIME,
26
+ InstanceLockedException,
27
+ )
28
+ from .upath import LocalPathClasses, UPath
29
+
30
+ if TYPE_CHECKING:
31
+ from uuid import UUID
32
+
33
+ from ._settings_user import UserSettings
34
+
35
+ LOCAL_STORAGE_MESSAGE = "No local storage location found in current environment: defaulting to cloud storage"
36
+
37
+
38
+ def sanitize_git_repo_url(repo_url: str) -> str:
39
+ assert repo_url.startswith("https://")
40
+ return repo_url.replace(".git", "")
41
+
42
+
43
+ def is_local_db_url(db_url: str) -> bool:
44
+ if "@localhost:" in db_url:
45
+ return True
46
+ if "@0.0.0.0:" in db_url:
47
+ return True
48
+ if "@127.0.0.1" in db_url:
49
+ return True
50
+ return False
51
+
52
+
53
+ class InstanceSettings:
54
+ """Instance settings."""
55
+
56
+ def __init__(
57
+ self,
58
+ id: UUID, # instance id/uuid
59
+ owner: str, # owner handle
60
+ name: str, # instance name
61
+ storage: StorageSettings, # storage location
62
+ keep_artifacts_local: bool = False, # default to local storage
63
+ uid: str | None = None, # instance uid/lnid
64
+ db: str | None = None, # DB URI
65
+ modules: str | None = None, # comma-separated string of module names
66
+ git_repo: str | None = None, # a git repo URL
67
+ is_on_hub: bool | None = None, # initialized from hub
68
+ api_url: str | None = None,
69
+ schema_id: UUID | None = None,
70
+ fine_grained_access: bool = False,
71
+ db_permissions: str | None = None,
72
+ _locker_user: UserSettings | None = None, # user to lock for if cloud sqlite
73
+ ):
74
+ from ._hub_utils import validate_db_arg
75
+
76
+ self._id_: UUID = id
77
+ self._owner: str = owner
78
+ self._name: str = name
79
+ self._uid: str | None = uid
80
+ self._storage: StorageSettings = storage
81
+ validate_db_arg(db)
82
+ self._db: str | None = db
83
+ self._schema_str: str | None = modules
84
+ self._git_repo = None if git_repo is None else sanitize_git_repo_url(git_repo)
85
+ # local storage
86
+ self._keep_artifacts_local = keep_artifacts_local
87
+ self._local_storage: StorageSettings | None = None
88
+ self._is_on_hub = is_on_hub
89
+ # private, needed for api requests
90
+ self._api_url = api_url
91
+ self._schema_id = schema_id
92
+ # private, whether fine grained access is used
93
+ # needed to be set to request jwt etc
94
+ self._fine_grained_access = fine_grained_access
95
+ # permissions for db such as jwt, read, write etc.
96
+ self._db_permissions = db_permissions
97
+ # if None then settings.user is used
98
+ self._locker_user = _locker_user
99
+
100
+ def __repr__(self):
101
+ """Rich string representation."""
102
+ representation = "Current instance:"
103
+ attrs = ["slug", "storage", "db", "modules", "git_repo"]
104
+ for attr in attrs:
105
+ value = getattr(self, attr)
106
+ if attr == "storage":
107
+ if self.keep_artifacts_local:
108
+ import lamindb as ln
109
+
110
+ self._local_storage = ln.setup.settings.instance._local_storage
111
+ if self._local_storage is not None:
112
+ value_local = self.local_storage
113
+ representation += f"\n - local storage: {value_local.root_as_str} ({value_local.region})"
114
+ representation += (
115
+ f"\n - cloud storage: {value.root_as_str} ({value.region})"
116
+ )
117
+ else:
118
+ representation += (
119
+ f"\n - storage: {value.root_as_str} ({value.region})"
120
+ )
121
+ elif attr == "db":
122
+ if self.dialect != "sqlite":
123
+ model = LaminDsnModel(db=value)
124
+ db_print = LaminDsn.build(
125
+ scheme=model.db.scheme,
126
+ user=model.db.user,
127
+ password="***",
128
+ host="***",
129
+ port=model.db.port,
130
+ database=model.db.database,
131
+ )
132
+ else:
133
+ db_print = value
134
+ representation += f"\n - {attr}: {db_print}"
135
+ elif attr == "modules":
136
+ representation += f"\n - {attr}: {value if value else '{}'}"
137
+ else:
138
+ representation += f"\n - {attr}: {value}"
139
+ return representation
140
+
141
+ @property
142
+ def owner(self) -> str:
143
+ """Instance owner. A user or organization account handle."""
144
+ return self._owner
145
+
146
+ @property
147
+ def name(self) -> str:
148
+ """Instance name."""
149
+ return self._name
150
+
151
+ def _search_local_root(
152
+ self, local_root: str | None = None, mute_warning: bool = False
153
+ ) -> StorageSettings | None:
154
+ from lamindb.models import Storage
155
+
156
+ if local_root is not None:
157
+ local_records = Storage.objects.filter(root=local_root)
158
+ else:
159
+ # only search local managed storage locations (instance_uid=self.uid)
160
+ local_records = Storage.objects.filter(type="local", instance_uid=self.uid)
161
+ all_local_records = local_records.all()
162
+ try:
163
+ # trigger an error in case of a migration issue
164
+ all_local_records.first()
165
+ except ProgrammingError:
166
+ logger.error("not able to load Storage registry: please migrate")
167
+ return None
168
+ found = []
169
+ for record in all_local_records:
170
+ root_path = Path(record.root)
171
+ try:
172
+ root_path_exists = root_path.exists()
173
+ except PermissionError:
174
+ continue
175
+ if root_path_exists:
176
+ marker_path = root_path / STORAGE_UID_FILE_KEY
177
+ try:
178
+ marker_path_exists = marker_path.exists()
179
+ except PermissionError:
180
+ continue
181
+ if not marker_path_exists:
182
+ legacy_filepath = root_path / LEGACY_STORAGE_UID_FILE_KEY
183
+ if legacy_filepath.exists():
184
+ logger.warning(
185
+ f"found legacy marker file, renaming it from {legacy_filepath} to {marker_path}"
186
+ )
187
+ legacy_filepath.rename(marker_path)
188
+ else:
189
+ logger.warning(
190
+ f"local storage location '{root_path}' is corrupted, cannot find marker file with storage uid"
191
+ )
192
+ continue
193
+ try:
194
+ uid = marker_path.read_text().splitlines()[0]
195
+ except PermissionError:
196
+ logger.warning(
197
+ f"ignoring the following location because no permission to read it: {marker_path}"
198
+ )
199
+ continue
200
+ if uid == record.uid:
201
+ found.append(record)
202
+ if found:
203
+ if len(found) > 1:
204
+ found_display = "\n - ".join([f"{record.root}" for record in found])
205
+ logger.important(f"found locations:\n - {found_display}")
206
+ record = found[0]
207
+ logger.important(f"defaulting to local storage: {record.root}")
208
+ return StorageSettings(record.root, region=record.region)
209
+ elif not mute_warning:
210
+ start = LOCAL_STORAGE_MESSAGE[0].lower()
211
+ logger.warning(f"{start}{LOCAL_STORAGE_MESSAGE[1:]}")
212
+ return None
213
+
214
+ @property
215
+ def keep_artifacts_local(self) -> bool:
216
+ """Default to keeping artifacts local.
217
+
218
+ Guide: :doc:`faq/keep-artifacts-local`
219
+ """
220
+ return self._keep_artifacts_local
221
+
222
+ @keep_artifacts_local.setter
223
+ def keep_artifacts_local(self, value: bool):
224
+ if not isinstance(value, bool):
225
+ raise ValueError("keep_artifacts_local must be a boolean value.")
226
+ self._keep_artifacts_local = value
227
+
228
+ @property
229
+ def storage(self) -> StorageSettings:
230
+ """Default storage of instance.
231
+
232
+ For a cloud instance, this is cloud storage. For a local instance, this
233
+ is a local directory.
234
+ """
235
+ return self._storage
236
+
237
+ @property
238
+ def local_storage(self) -> StorageSettings:
239
+ """An alternative default local storage location in the current environment.
240
+
241
+ Serves as the default storage location if :attr:`keep_artifacts_local` is enabled.
242
+
243
+ Guide: :doc:`faq/keep-artifacts-local`
244
+ """
245
+ if not self.keep_artifacts_local:
246
+ raise ValueError(
247
+ "`keep_artifacts_local` is False, switch via: ln.setup.settings.instance.keep_artifacts_local = True"
248
+ )
249
+ if self._local_storage is None:
250
+ self._local_storage = self._search_local_root()
251
+ if self._local_storage is None:
252
+ raise ValueError(LOCAL_STORAGE_MESSAGE)
253
+ return self._local_storage
254
+
255
+ @local_storage.setter
256
+ def local_storage(self, local_root_host: tuple[Path | str, str]):
257
+ from lamindb_setup._init_instance import register_storage_in_instance
258
+
259
+ if not isinstance(local_root_host, tuple):
260
+ local_root = local_root_host
261
+ host = "unspecified-host"
262
+ else:
263
+ local_root, host = local_root_host
264
+
265
+ local_root = Path(local_root)
266
+ if not self.keep_artifacts_local:
267
+ raise ValueError("`keep_artifacts_local` is not enabled for this instance.")
268
+ local_storage = self._search_local_root(
269
+ local_root=StorageSettings(local_root).root_as_str, mute_warning=True
270
+ )
271
+ if local_storage is not None:
272
+ # great, we're merely switching storage location
273
+ self._local_storage = local_storage
274
+ return None
275
+ local_storage = self._search_local_root(mute_warning=True)
276
+ if local_storage is not None:
277
+ if os.getenv("LAMIN_TESTING") == "true":
278
+ response = "y"
279
+ else:
280
+ response = input(
281
+ "You already configured a local storage root for this instance in this"
282
+ f" environment: {self.local_storage.root}\nDo you want to register another one? (y/n)"
283
+ )
284
+ if response != "y":
285
+ return None
286
+ if host == "unspecified-host":
287
+ logger.warning(
288
+ "setting local_storage with a single path is deprecated for creating storage locations"
289
+ )
290
+ logger.warning(
291
+ "use this instead: ln.Storage(root='/dir/our_shared_dir', host='our-server-123').save()"
292
+ )
293
+ local_root = UPath(local_root)
294
+ assert isinstance(local_root, LocalPathClasses)
295
+ tentative_storage, hub_status = init_storage(
296
+ local_root,
297
+ instance_id=self._id,
298
+ instance_slug=self.slug,
299
+ register_hub=True,
300
+ region=host,
301
+ ) # type: ignore
302
+ if hub_status in ["hub-record-created", "hub-record-retrieved"]:
303
+ register_storage_in_instance(tentative_storage) # type: ignore
304
+ self._local_storage = tentative_storage
305
+ logger.important(
306
+ f"defaulting to local storage: {self._local_storage.root} on host {host}"
307
+ )
308
+ else:
309
+ logger.warning(f"could not set this local storage location: {local_root}")
310
+
311
+ @property
312
+ @deprecated("local_storage")
313
+ def storage_local(self) -> StorageSettings:
314
+ return self.local_storage
315
+
316
+ @storage_local.setter
317
+ @deprecated("local_storage")
318
+ def storage_local(self, local_root_host: tuple[Path | str, str]):
319
+ self.local_storage = local_root_host # type: ignore
320
+
321
+ @property
322
+ def slug(self) -> str:
323
+ """Unique semantic identifier of form `"{account_handle}/{instance_name}"`."""
324
+ return f"{self.owner}/{self.name}"
325
+
326
+ @property
327
+ def git_repo(self) -> str | None:
328
+ """Sync transforms with scripts in git repository.
329
+
330
+ Provide the full git repo URL.
331
+ """
332
+ return self._git_repo
333
+
334
+ @property
335
+ def _id(self) -> UUID:
336
+ """The internal instance id."""
337
+ return self._id_
338
+
339
+ @property
340
+ def uid(self) -> str:
341
+ """The user-facing instance id."""
342
+ return instance_uid_from_uuid(self._id)
343
+
344
+ @property
345
+ def modules(self) -> set[str]:
346
+ """The set of modules that defines the database schema.
347
+
348
+ The core schema contained in lamindb is not included in this set.
349
+ """
350
+ if self._schema_str is None:
351
+ return set()
352
+ else:
353
+ return {module for module in self._schema_str.split(",") if module != ""}
354
+
355
+ @property
356
+ @deprecated("modules")
357
+ def schema(self) -> set[str]:
358
+ return self.modules
359
+
360
+ @property
361
+ def _sqlite_file(self) -> UPath:
362
+ """SQLite file."""
363
+ filepath = self.storage.root / ".lamindb/lamin.db"
364
+ return filepath
365
+
366
+ @property
367
+ def _sqlite_file_local(self) -> Path:
368
+ """Local SQLite file."""
369
+ return self.storage.cloud_to_local_no_update(self._sqlite_file)
370
+
371
+ def _update_cloud_sqlite_file(self, unlock_cloud_sqlite: bool = True) -> None:
372
+ """Upload the local sqlite file to the cloud file."""
373
+ if self._is_cloud_sqlite:
374
+ sqlite_file = self._sqlite_file
375
+ logger.warning(
376
+ f"updating{' & unlocking' if unlock_cloud_sqlite else ''} cloud SQLite "
377
+ f"'{sqlite_file}' of instance"
378
+ f" '{self.slug}'"
379
+ )
380
+ cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
381
+ sqlite_file.upload_from(cache_file, print_progress=True) # type: ignore
382
+ cloud_mtime = sqlite_file.modified.timestamp() # type: ignore
383
+ # this seems to work even if there is an open connection
384
+ # to the cache file
385
+ os.utime(cache_file, times=(cloud_mtime, cloud_mtime))
386
+ if unlock_cloud_sqlite:
387
+ self._cloud_sqlite_locker.unlock()
388
+
389
+ def _update_local_sqlite_file(self, lock_cloud_sqlite: bool = True) -> None:
390
+ """Download the cloud sqlite file if it is newer than local."""
391
+ if self._is_cloud_sqlite:
392
+ logger.warning(
393
+ "updating local SQLite & locking cloud SQLite (sync back & unlock:"
394
+ " lamin disconnect)"
395
+ )
396
+ if lock_cloud_sqlite:
397
+ self._cloud_sqlite_locker.lock()
398
+ self._check_sqlite_lock()
399
+ sqlite_file = self._sqlite_file
400
+ cache_file = self.storage.cloud_to_local_no_update(sqlite_file)
401
+ sqlite_file.synchronize_to(cache_file, print_progress=True) # type: ignore
402
+
403
+ def _check_sqlite_lock(self):
404
+ if not self._cloud_sqlite_locker.has_lock:
405
+ locked_by = self._cloud_sqlite_locker._locked_by
406
+ lock_msg = "Cannot load the instance, it is locked by "
407
+ user_info = call_with_fallback(
408
+ select_account_handle_name_by_lnid,
409
+ lnid=locked_by,
410
+ )
411
+ if user_info is None:
412
+ lock_msg += f"uid: '{locked_by}'."
413
+ else:
414
+ lock_msg += (
415
+ f"'{user_info['handle']}' (uid: '{locked_by}', name:"
416
+ f" '{user_info['name']}')."
417
+ )
418
+ lock_msg += (
419
+ " The instance will be automatically unlocked after"
420
+ f" {int(EXPIRATION_TIME/3600/24)}d of no activity."
421
+ )
422
+ raise InstanceLockedException(lock_msg)
423
+
424
+ @property
425
+ def db(self) -> str:
426
+ """Database connection string (URI)."""
427
+ if "LAMINDB_DJANGO_DATABASE_URL" in os.environ:
428
+ logger.warning(
429
+ "LAMINDB_DJANGO_DATABASE_URL env variable "
430
+ f"is set to {os.environ['LAMINDB_DJANGO_DATABASE_URL']}. "
431
+ "It overwrites all db connections and is used instead of `instance.db`."
432
+ )
433
+ if self._db is None:
434
+ # here, we want the updated sqlite file
435
+ # hence, we don't use self._sqlite_file_local()
436
+ # error_no_origin=False because on instance init
437
+ # the sqlite file is not yet in the cloud
438
+ sqlite_filepath = self.storage.cloud_to_local(
439
+ self._sqlite_file, error_no_origin=False
440
+ )
441
+ return f"sqlite:///{sqlite_filepath.as_posix()}"
442
+ else:
443
+ return self._db
444
+
445
+ @property
446
+ def dialect(self) -> Literal["sqlite", "postgresql"]:
447
+ """SQL dialect."""
448
+ if self._db is None or self._db.startswith("sqlite://"):
449
+ return "sqlite"
450
+ else:
451
+ assert self._db.startswith("postgresql"), f"Unexpected DB value: {self._db}"
452
+ return "postgresql"
453
+
454
+ @property
455
+ def _is_cloud_sqlite(self) -> bool:
456
+ # can we make this a private property, Sergei?
457
+ # as it's not relevant to the user
458
+ """Is this a cloud instance with sqlite db."""
459
+ return self.dialect == "sqlite" and self.storage.type_is_cloud
460
+
461
+ @property
462
+ def _cloud_sqlite_locker(self):
463
+ # avoid circular import
464
+ from .cloud_sqlite_locker import empty_locker, get_locker
465
+
466
+ if self._is_cloud_sqlite:
467
+ try:
468
+ # if _locker_user is None then settings.user is used
469
+ return get_locker(self, self._locker_user)
470
+ except PermissionError:
471
+ logger.warning("read-only access - did not access locker")
472
+ return empty_locker
473
+ else:
474
+ return empty_locker
475
+
476
+ @property
477
+ def is_remote(self) -> bool:
478
+ """Boolean indicating if an instance has no local component."""
479
+ if not self.storage.type_is_cloud:
480
+ return False
481
+
482
+ if self.dialect == "postgresql":
483
+ if is_local_db_url(self.db):
484
+ return False
485
+ # returns True for cloud SQLite
486
+ # and remote postgres
487
+ return True
488
+
489
+ @property
490
+ def is_on_hub(self) -> bool:
491
+ """Is this instance on the hub?
492
+
493
+ Can only reliably establish if user has access to the instance. Will
494
+ return `False` in case the instance isn't found.
495
+ """
496
+ if self._is_on_hub is None:
497
+ from ._hub_client import call_with_fallback_auth
498
+ from ._hub_crud import select_instance_by_id
499
+ from ._settings import settings
500
+
501
+ if settings.user.handle != "anonymous":
502
+ response = call_with_fallback_auth(
503
+ select_instance_by_id, instance_id=self._id.hex
504
+ )
505
+ else:
506
+ response = call_with_fallback(
507
+ select_instance_by_id, instance_id=self._id.hex
508
+ )
509
+ logger.warning("calling anonymously, will miss private instances")
510
+ if response is None:
511
+ self._is_on_hub = False
512
+ else:
513
+ self._is_on_hub = True
514
+ return self._is_on_hub
515
+
516
+ def _get_settings_file(self) -> Path:
517
+ return instance_settings_file(self.name, self.owner)
518
+
519
+ def _persist(self, write_to_disk: bool = True) -> None:
520
+ """Set these instance settings as the current instance.
521
+
522
+ Args:
523
+ write_to_disk: Save these instance settings to disk and
524
+ overwrite the current instance settings file.
525
+ """
526
+ if write_to_disk:
527
+ assert self.name is not None
528
+ filepath = self._get_settings_file()
529
+ # persist under filepath for later reference
530
+ save_instance_settings(self, filepath)
531
+ # persist under current file for auto load
532
+ shutil.copy2(filepath, current_instance_settings_file())
533
+ # persist under settings class for same session reference
534
+ # need to import here to avoid circular import
535
+ from ._settings import settings
536
+
537
+ settings._instance_settings = self
538
+
539
+ def _init_db(self):
540
+ from lamindb_setup._check_setup import disable_auto_connect
541
+
542
+ from .django import setup_django
543
+
544
+ disable_auto_connect(setup_django)(self, init=True)
545
+
546
+ def _load_db(self) -> tuple[bool, str]:
547
+ # Is the database available and initialized as LaminDB?
548
+ # returns a tuple of status code and message
549
+ if self.dialect == "sqlite" and not self._sqlite_file.exists():
550
+ legacy_file = self.storage.key_to_filepath(f"{self._id.hex}.lndb")
551
+ if legacy_file.exists():
552
+ logger.warning(
553
+ f"The SQLite file is being renamed from {legacy_file} to {self._sqlite_file}"
554
+ )
555
+ legacy_file.rename(self._sqlite_file)
556
+ else:
557
+ return False, f"SQLite file {self._sqlite_file} does not exist"
558
+ # we need the local sqlite to setup django
559
+ self._update_local_sqlite_file()
560
+ # setting up django also performs a check for migrations & prints them
561
+ # as warnings
562
+ # this should fail, e.g., if the db is not reachable
563
+ from lamindb_setup._check_setup import disable_auto_connect
564
+
565
+ from .django import setup_django
566
+
567
+ disable_auto_connect(setup_django)(self)
568
+
569
+ return True, ""