lamindb_setup 1.19.0__py3-none-any.whl → 1.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. lamindb_setup/__init__.py +1 -1
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +131 -131
  5. lamindb_setup/_connect_instance.py +443 -441
  6. lamindb_setup/_delete.py +155 -155
  7. lamindb_setup/_disconnect.py +38 -38
  8. lamindb_setup/_django.py +39 -39
  9. lamindb_setup/_entry_points.py +19 -19
  10. lamindb_setup/_init_instance.py +423 -423
  11. lamindb_setup/_migrate.py +331 -331
  12. lamindb_setup/_register_instance.py +32 -32
  13. lamindb_setup/_schema.py +27 -27
  14. lamindb_setup/_schema_metadata.py +451 -451
  15. lamindb_setup/_set_managed_storage.py +81 -81
  16. lamindb_setup/_setup_user.py +198 -198
  17. lamindb_setup/_silence_loggers.py +46 -46
  18. lamindb_setup/core/__init__.py +25 -34
  19. lamindb_setup/core/_aws_options.py +276 -276
  20. lamindb_setup/core/_aws_storage.py +57 -57
  21. lamindb_setup/core/_clone.py +50 -50
  22. lamindb_setup/core/_deprecated.py +62 -62
  23. lamindb_setup/core/_docs.py +14 -14
  24. lamindb_setup/core/_hub_client.py +288 -288
  25. lamindb_setup/core/_hub_crud.py +247 -247
  26. lamindb_setup/core/_hub_utils.py +100 -100
  27. lamindb_setup/core/_private_django_api.py +80 -80
  28. lamindb_setup/core/_settings.py +440 -434
  29. lamindb_setup/core/_settings_instance.py +22 -1
  30. lamindb_setup/core/_settings_load.py +162 -162
  31. lamindb_setup/core/_settings_save.py +108 -108
  32. lamindb_setup/core/_settings_storage.py +433 -433
  33. lamindb_setup/core/_settings_store.py +162 -162
  34. lamindb_setup/core/_settings_user.py +55 -55
  35. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  36. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  37. lamindb_setup/core/django.py +414 -413
  38. lamindb_setup/core/exceptions.py +1 -1
  39. lamindb_setup/core/hashing.py +134 -134
  40. lamindb_setup/core/types.py +1 -1
  41. lamindb_setup/core/upath.py +1031 -1028
  42. lamindb_setup/errors.py +72 -72
  43. lamindb_setup/io.py +423 -423
  44. lamindb_setup/types.py +17 -17
  45. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info}/METADATA +3 -2
  46. lamindb_setup-1.19.1.dist-info/RECORD +51 -0
  47. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info}/WHEEL +1 -1
  48. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info/licenses}/LICENSE +201 -201
  49. lamindb_setup-1.19.0.dist-info/RECORD +0 -51
@@ -1,423 +1,423 @@
1
- from __future__ import annotations
2
-
3
- import importlib
4
- import os
5
- import uuid
6
- from typing import TYPE_CHECKING, Literal
7
- from uuid import UUID
8
-
9
- from lamin_utils import logger
10
-
11
- from ._disconnect import disconnect
12
- from ._silence_loggers import silence_loggers
13
- from .core import InstanceSettings
14
- from .core._docs import doc_args
15
- from .core._settings import settings
16
- from .core._settings_instance import check_is_instance_remote
17
- from .core._settings_storage import StorageSettings, init_storage
18
- from .core.upath import UPath
19
- from .errors import CannotSwitchDefaultInstance, InstanceNotCreated
20
-
21
- if TYPE_CHECKING:
22
- from lamindb.models import Storage
23
- from pydantic import PostgresDsn
24
-
25
- from .core._settings_user import UserSettings
26
- from .types import UPathStr
27
-
28
-
29
- def get_schema_module_name(module_name, raise_import_error: bool = True) -> str | None:
30
- import importlib.util
31
-
32
- if module_name == "core":
33
- return "lamindb"
34
- name_attempts = [f"lnschema_{module_name.replace('-', '_')}", module_name]
35
- for name in name_attempts:
36
- module_spec = importlib.util.find_spec(name)
37
- if module_spec is not None:
38
- return name
39
- message = f"schema module '{module_name}' is not installed → resolve via `pip install {module_name}`"
40
- if raise_import_error:
41
- raise ImportError(message)
42
- return None
43
-
44
-
45
- def register_storage_in_instance(ssettings: StorageSettings) -> Storage:
46
- from lamindb.models import Storage
47
-
48
- # how do we ensure that this function is only called passing
49
- # the managing instance?
50
- kwargs = {
51
- "root": ssettings.root_as_str,
52
- "type": ssettings.type,
53
- "region": ssettings.region,
54
- "instance_uid": ssettings.instance_uid,
55
- "run": None,
56
- "_skip_preparation": True,
57
- }
58
- if ssettings._uid is not None:
59
- kwargs["uid"] = ssettings._uid
60
- # this checks if the storage already exists under the hood
61
- storage = Storage(**kwargs).save()
62
- return storage
63
-
64
-
65
- def register_user(usettings: UserSettings, update_user: bool = True) -> None:
66
- # we have to import this here dynamically because otherwise
67
- # the except below will fail on re-connect due to reset
68
- from django.core.exceptions import FieldError
69
- from django.db.utils import IntegrityError, OperationalError, ProgrammingError
70
- from lamindb.models import User
71
-
72
- if not update_user and User.objects.filter(uid=usettings.uid).exists():
73
- return
74
-
75
- try:
76
- # need to have try except because of integer primary key migration
77
- user, created = User.objects.update_or_create(
78
- uid=usettings.uid,
79
- defaults={
80
- "handle": usettings.handle,
81
- "name": usettings.name,
82
- },
83
- )
84
- # for users with only read access, except via ProgrammingError
85
- # ProgrammingError: permission denied for table lamindb_user
86
- # IntegrityError: when trying to update a user on a fine-grained access instance
87
- except (OperationalError, FieldError, ProgrammingError, IntegrityError):
88
- pass
89
-
90
-
91
- def register_initial_records(
92
- isettings: InstanceSettings, usettings: UserSettings
93
- ) -> None:
94
- """Register space, user & storage in DB."""
95
- from django.db.utils import OperationalError
96
- from lamindb.models import Branch, Space
97
-
98
- try:
99
- Space.objects.get_or_create(
100
- uid=12 * "a",
101
- name="all",
102
- description="Every team & user with access to the instance has access.",
103
- )
104
- Branch.objects.get_or_create(
105
- id=-1,
106
- uid=12 * "t",
107
- name="trash",
108
- description="The trash.",
109
- )
110
- Branch.objects.get_or_create(
111
- id=0,
112
- uid=12 * "a",
113
- name="archive",
114
- description="The archive.",
115
- )
116
- Branch.objects.get_or_create(
117
- uid=12 * "m",
118
- name="main",
119
- description="The main & default branch of the instance.",
120
- )
121
- register_user(usettings)
122
- register_storage_in_instance(isettings.storage)
123
- except OperationalError as error:
124
- logger.warning(f"instance seems not set up ({error})")
125
-
126
-
127
- ERROR_SQLITE_CACHE = """
128
- Your cached local SQLite file exists, while your cloud SQLite file ({}) doesn't.
129
- Either delete your cache ({}) or add it back to the cloud (if delete was accidental).
130
- """
131
-
132
-
133
- def process_connect_response(
134
- response: tuple | str, instance_identifier: str
135
- ) -> tuple[
136
- UUID,
137
- Literal[
138
- "instance-corrupted-or-deleted", "account-not-exists", "instance-not-found"
139
- ],
140
- ]:
141
- # for internal use when creating instances through CICD
142
- if isinstance(response, tuple) and response[0] == "instance-corrupted-or-deleted":
143
- hub_result = response[1]
144
- instance_state = response[0]
145
- instance_id = UUID(hub_result["id"])
146
- else:
147
- instance_id_str = os.getenv("LAMINDB_INSTANCE_ID_INIT")
148
- if instance_id_str is None:
149
- instance_id = uuid.uuid5(uuid.NAMESPACE_URL, instance_identifier)
150
- else:
151
- instance_id = UUID(instance_id_str)
152
- instance_state = response
153
- return instance_id, instance_state
154
-
155
-
156
- def process_modules_arg(modules: str | None = None) -> str:
157
- if modules is None or modules == "":
158
- return ""
159
- # currently no actual validation, can add back if we see a need
160
- # the following just strips white spaces
161
- to_be_validated = [s.strip() for s in modules.split(",")]
162
- return ",".join(to_be_validated)
163
-
164
-
165
- def validate_init_args(
166
- *,
167
- storage: UPathStr,
168
- name: str | None = None,
169
- db: PostgresDsn | None = None,
170
- modules: str | None = None,
171
- _test: bool = False,
172
- _write_settings: bool = True,
173
- _user: UserSettings | None = None,
174
- ) -> tuple[
175
- str,
176
- UUID,
177
- Literal[
178
- "connected",
179
- "instance-corrupted-or-deleted",
180
- "account-not-exists",
181
- "instance-not-found",
182
- ],
183
- str,
184
- ]:
185
- from ._connect_instance import connect
186
-
187
- if storage is None:
188
- raise SystemExit("✗ `storage` argument can't be `None`")
189
- # should be called as the first thing
190
- name_str = infer_instance_name(storage=storage, name=name, db=db)
191
- owner_str = settings.user.handle if _user is None else _user.handle
192
- # test whether instance exists by trying to load it
193
- instance_slug = f"{owner_str}/{name_str}"
194
- response = connect(
195
- instance_slug,
196
- _db=db,
197
- _raise_not_found_error=False,
198
- _test=_test,
199
- _write_settings=_write_settings,
200
- _user=_user,
201
- )
202
- instance_id: UUID
203
- instance_state: Literal[
204
- "connected",
205
- "instance-corrupted-or-deleted",
206
- "account-not-exists",
207
- "instance-not-found",
208
- ]
209
- if response is None:
210
- instance_state, instance_id = "connected", settings.instance._id
211
- else:
212
- instance_id, instance_state = process_connect_response(response, instance_slug)
213
- modules = process_modules_arg(modules)
214
- return name_str, instance_id, instance_state, instance_slug
215
-
216
-
217
- DOC_STORAGE_ARG = "A local or remote folder (`'s3://...'` or `'gs://...'`). Defaults to current working directory."
218
- DOC_INSTANCE_NAME = (
219
- "Instance name. If not passed, it will equal the folder name passed to `storage`."
220
- )
221
- DOC_DB = "Database connection URL. Defaults to `None`, which implies an SQLite file in the storage location."
222
- DOC_MODULES = "Comma-separated string of schema modules."
223
- DOC_LOW_LEVEL_KWARGS = "Keyword arguments for low-level control."
224
-
225
-
226
- @doc_args(DOC_STORAGE_ARG, DOC_INSTANCE_NAME, DOC_DB, DOC_MODULES, DOC_LOW_LEVEL_KWARGS)
227
- def init(
228
- *,
229
- storage: UPathStr = ".",
230
- name: str | None = None,
231
- db: PostgresDsn | None = None,
232
- modules: str | None = None,
233
- **kwargs,
234
- ) -> None:
235
- """Init a LaminDB instance.
236
-
237
- Args:
238
- storage: {}
239
- name: {}
240
- db: {}
241
- modules: {}
242
- **kwargs: {}
243
-
244
- See Also:
245
- Init an instance for via the CLI, see `here <https://docs.lamin.ai/cli#init>`__.
246
- """
247
- from ._check_setup import _check_instance_setup
248
- from ._connect_instance import (
249
- reset_django_module_variables,
250
- validate_connection_state,
251
- )
252
- from .core._hub_core import init_instance_hub
253
-
254
- silence_loggers()
255
-
256
- isettings = None
257
- ssettings = None
258
-
259
- _write_settings: bool = kwargs.get("_write_settings", True)
260
- if modules is None:
261
- modules = kwargs.get("schema", None)
262
- _test: bool = kwargs.get("_test", False)
263
-
264
- # use this user instead of settings.user
265
- # contains access_token
266
- _user: UserSettings | None = kwargs.get("_user", None)
267
- user_handle: str = settings.user.handle if _user is None else _user.handle
268
- user__uuid: UUID = settings.user._uuid if _user is None else _user._uuid # type: ignore
269
- access_token: str | None = None if _user is None else _user.access_token
270
-
271
- try:
272
- name_str, instance_id, instance_state, _ = validate_init_args(
273
- storage=storage,
274
- name=name,
275
- db=db,
276
- modules=modules,
277
- _test=_test,
278
- _write_settings=_write_settings,
279
- _user=_user, # will get from settings.user if _user is None
280
- )
281
- if instance_state == "connected":
282
- return None
283
- if _check_instance_setup() and not _test:
284
- validate_connection_state(user_handle, name_str)
285
- elif _write_settings:
286
- disconnect(mute=True)
287
- isettings = InstanceSettings(
288
- id=instance_id, # type: ignore
289
- owner=user_handle,
290
- name=name_str,
291
- db=db,
292
- modules=modules,
293
- # to lock passed user in isettings._cloud_sqlite_locker.lock()
294
- _locker_user=_user, # only has effect if cloud sqlite
295
- )
296
- register_on_hub = (
297
- check_is_instance_remote(root=storage, db=db)
298
- and instance_state != "instance-corrupted-or-deleted"
299
- )
300
- if register_on_hub:
301
- init_instance_hub(
302
- isettings, account_id=user__uuid, access_token=access_token
303
- )
304
- ssettings, _ = init_storage(
305
- storage,
306
- instance_id=instance_id,
307
- instance_slug=f"{user_handle}/{name_str}",
308
- init_instance=True,
309
- register_hub=register_on_hub,
310
- created_by=user__uuid,
311
- access_token=access_token,
312
- )
313
- isettings._storage = ssettings
314
- if register_on_hub and not ssettings.is_on_hub:
315
- raise InstanceNotCreated(
316
- "Unable to create the instance because failed to register the storage."
317
- )
318
- validate_sqlite_state(isettings)
319
- # why call it here if it is also called in load_from_isettings?
320
- isettings._persist(write_to_disk=_write_settings)
321
- if _test:
322
- return None
323
- isettings._init_db()
324
- load_from_isettings(
325
- isettings, init=True, user=_user, write_settings=_write_settings
326
- )
327
- if _write_settings and isettings._is_cloud_sqlite:
328
- isettings._cloud_sqlite_locker.lock()
329
- logger.warning(
330
- "locked instance (to unlock and push changes to the cloud SQLite file,"
331
- " call: lamin disconnect)"
332
- )
333
- if register_on_hub and isettings.dialect != "sqlite":
334
- from ._schema_metadata import update_schema_in_hub
335
-
336
- update_schema_in_hub(access_token=access_token)
337
- reset_django_module_variables()
338
- logger.important(f"initialized lamindb: {isettings.slug}")
339
- except Exception as e:
340
- from ._delete import delete_by_isettings
341
- from .core._hub_core import delete_instance_record, delete_storage_record
342
-
343
- if isettings is not None:
344
- if _write_settings:
345
- delete_by_isettings(isettings)
346
- else:
347
- settings._instance_settings = None
348
- if user_handle != "anonymous" or access_token is not None:
349
- if ssettings is not None and ssettings.is_on_hub:
350
- delete_storage_record(ssettings, access_token=access_token)
351
- if isettings is not None and isettings.is_on_hub:
352
- delete_instance_record(isettings._id, access_token=access_token)
353
- raise e
354
- return None
355
-
356
-
357
- def load_from_isettings(
358
- isettings: InstanceSettings,
359
- *,
360
- init: bool = False,
361
- user: UserSettings | None = None,
362
- write_settings: bool = True,
363
- ) -> None:
364
- from .core._setup_bionty_sources import write_bionty_sources
365
-
366
- user = settings.user if user is None else user
367
-
368
- if init:
369
- # during init space, user and storage need to be registered
370
- register_initial_records(isettings, user)
371
- write_bionty_sources(isettings)
372
- isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
373
- else:
374
- # when loading, django is already set up
375
- #
376
- # only register user if the instance is connected for the first time in an environment
377
- # this is our best proxy for that the user might not yet be registered
378
- if not isettings._get_settings_file().exists():
379
- # do not try to update the user on fine grained access instances
380
- # this is blocked anyways, only select and insert are allowed
381
- register_user(user, update_user=not isettings._fine_grained_access)
382
- isettings._persist(write_to_disk=write_settings)
383
- # clear branch & space cache after reconnecting
384
- settings._branch = None
385
- settings._space = None
386
-
387
-
388
- def validate_sqlite_state(isettings: InstanceSettings) -> None:
389
- if isettings._is_cloud_sqlite:
390
- if (
391
- # it's important to first evaluate the existence check
392
- # for the local sqlite file because it doesn't need a network
393
- # request
394
- isettings._sqlite_file_local.exists()
395
- and not isettings._sqlite_file.exists()
396
- ):
397
- raise RuntimeError(
398
- ERROR_SQLITE_CACHE.format(
399
- isettings._sqlite_file, isettings._sqlite_file_local
400
- )
401
- )
402
-
403
-
404
- def infer_instance_name(
405
- *,
406
- storage: UPathStr,
407
- name: str | None = None,
408
- db: PostgresDsn | None = None,
409
- ) -> str:
410
- if name is not None:
411
- if "/" in name:
412
- raise ValueError("Invalid instance name: '/' delimiter not allowed.")
413
- return name
414
- if db is not None:
415
- logger.warning("using the sql database name for the instance name")
416
- # this isn't a great way to access the db name
417
- # could use LaminDsn instead
418
- return str(db).split("/")[-1]
419
- if storage == "create-s3":
420
- raise ValueError("pass name to init if storage = 'create-s3'")
421
- storage_path = UPath(storage).resolve()
422
- name = storage_path.path.rstrip("/").split("/")[-1]
423
- return name.lower()
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import os
5
+ import uuid
6
+ from typing import TYPE_CHECKING, Literal
7
+ from uuid import UUID
8
+
9
+ from lamin_utils import logger
10
+
11
+ from ._disconnect import disconnect
12
+ from ._silence_loggers import silence_loggers
13
+ from .core import InstanceSettings
14
+ from .core._docs import doc_args
15
+ from .core._settings import settings
16
+ from .core._settings_instance import check_is_instance_remote
17
+ from .core._settings_storage import StorageSettings, init_storage
18
+ from .core.upath import UPath
19
+ from .errors import CannotSwitchDefaultInstance, InstanceNotCreated
20
+
21
+ if TYPE_CHECKING:
22
+ from lamindb.models import Storage
23
+ from pydantic import PostgresDsn
24
+
25
+ from .core._settings_user import UserSettings
26
+ from .types import UPathStr
27
+
28
+
29
+ def get_schema_module_name(module_name, raise_import_error: bool = True) -> str | None:
30
+ import importlib.util
31
+
32
+ if module_name == "core":
33
+ return "lamindb"
34
+ name_attempts = [f"lnschema_{module_name.replace('-', '_')}", module_name]
35
+ for name in name_attempts:
36
+ module_spec = importlib.util.find_spec(name)
37
+ if module_spec is not None:
38
+ return name
39
+ message = f"schema module '{module_name}' is not installed → resolve via `pip install {module_name}`"
40
+ if raise_import_error:
41
+ raise ImportError(message)
42
+ return None
43
+
44
+
45
+ def register_storage_in_instance(ssettings: StorageSettings) -> Storage:
46
+ from lamindb.models import Storage
47
+
48
+ # how do we ensure that this function is only called passing
49
+ # the managing instance?
50
+ kwargs = {
51
+ "root": ssettings.root_as_str,
52
+ "type": ssettings.type,
53
+ "region": ssettings.region,
54
+ "instance_uid": ssettings.instance_uid,
55
+ "run": None,
56
+ "_skip_preparation": True,
57
+ }
58
+ if ssettings._uid is not None:
59
+ kwargs["uid"] = ssettings._uid
60
+ # this checks if the storage already exists under the hood
61
+ storage = Storage(**kwargs).save()
62
+ return storage
63
+
64
+
65
+ def register_user(usettings: UserSettings, update_user: bool = True) -> None:
66
+ # we have to import this here dynamically because otherwise
67
+ # the except below will fail on re-connect due to reset
68
+ from django.core.exceptions import FieldError
69
+ from django.db.utils import IntegrityError, OperationalError, ProgrammingError
70
+ from lamindb.models import User
71
+
72
+ if not update_user and User.objects.filter(uid=usettings.uid).exists():
73
+ return
74
+
75
+ try:
76
+ # need to have try except because of integer primary key migration
77
+ user, created = User.objects.update_or_create(
78
+ uid=usettings.uid,
79
+ defaults={
80
+ "handle": usettings.handle,
81
+ "name": usettings.name,
82
+ },
83
+ )
84
+ # for users with only read access, except via ProgrammingError
85
+ # ProgrammingError: permission denied for table lamindb_user
86
+ # IntegrityError: when trying to update a user on a fine-grained access instance
87
+ except (OperationalError, FieldError, ProgrammingError, IntegrityError):
88
+ pass
89
+
90
+
91
+ def register_initial_records(
92
+ isettings: InstanceSettings, usettings: UserSettings
93
+ ) -> None:
94
+ """Register space, user & storage in DB."""
95
+ from django.db.utils import OperationalError
96
+ from lamindb.models import Branch, Space
97
+
98
+ try:
99
+ Space.objects.get_or_create(
100
+ uid=12 * "a",
101
+ name="all",
102
+ description="Every team & user with access to the instance has access.",
103
+ )
104
+ Branch.objects.get_or_create(
105
+ id=-1,
106
+ uid=12 * "t",
107
+ name="trash",
108
+ description="The trash.",
109
+ )
110
+ Branch.objects.get_or_create(
111
+ id=0,
112
+ uid=12 * "a",
113
+ name="archive",
114
+ description="The archive.",
115
+ )
116
+ Branch.objects.get_or_create(
117
+ uid=12 * "m",
118
+ name="main",
119
+ description="The main & default branch of the instance.",
120
+ )
121
+ register_user(usettings)
122
+ register_storage_in_instance(isettings.storage)
123
+ except OperationalError as error:
124
+ logger.warning(f"instance seems not set up ({error})")
125
+
126
+
127
+ ERROR_SQLITE_CACHE = """
128
+ Your cached local SQLite file exists, while your cloud SQLite file ({}) doesn't.
129
+ Either delete your cache ({}) or add it back to the cloud (if delete was accidental).
130
+ """
131
+
132
+
133
+ def process_connect_response(
134
+ response: tuple | str, instance_identifier: str
135
+ ) -> tuple[
136
+ UUID,
137
+ Literal[
138
+ "instance-corrupted-or-deleted", "account-not-exists", "instance-not-found"
139
+ ],
140
+ ]:
141
+ # for internal use when creating instances through CICD
142
+ if isinstance(response, tuple) and response[0] == "instance-corrupted-or-deleted":
143
+ hub_result = response[1]
144
+ instance_state = response[0]
145
+ instance_id = UUID(hub_result["id"])
146
+ else:
147
+ instance_id_str = os.getenv("LAMINDB_INSTANCE_ID_INIT")
148
+ if instance_id_str is None:
149
+ instance_id = uuid.uuid5(uuid.NAMESPACE_URL, instance_identifier)
150
+ else:
151
+ instance_id = UUID(instance_id_str)
152
+ instance_state = response
153
+ return instance_id, instance_state
154
+
155
+
156
+ def process_modules_arg(modules: str | None = None) -> str:
157
+ if modules is None or modules == "":
158
+ return ""
159
+ # currently no actual validation, can add back if we see a need
160
+ # the following just strips white spaces
161
+ to_be_validated = [s.strip() for s in modules.split(",")]
162
+ return ",".join(to_be_validated)
163
+
164
+
165
+ def validate_init_args(
166
+ *,
167
+ storage: UPathStr,
168
+ name: str | None = None,
169
+ db: PostgresDsn | None = None,
170
+ modules: str | None = None,
171
+ _test: bool = False,
172
+ _write_settings: bool = True,
173
+ _user: UserSettings | None = None,
174
+ ) -> tuple[
175
+ str,
176
+ UUID,
177
+ Literal[
178
+ "connected",
179
+ "instance-corrupted-or-deleted",
180
+ "account-not-exists",
181
+ "instance-not-found",
182
+ ],
183
+ str,
184
+ ]:
185
+ from ._connect_instance import connect
186
+
187
+ if storage is None:
188
+ raise SystemExit("✗ `storage` argument can't be `None`")
189
+ # should be called as the first thing
190
+ name_str = infer_instance_name(storage=storage, name=name, db=db)
191
+ owner_str = settings.user.handle if _user is None else _user.handle
192
+ # test whether instance exists by trying to load it
193
+ instance_slug = f"{owner_str}/{name_str}"
194
+ response = connect(
195
+ instance_slug,
196
+ _db=db,
197
+ _raise_not_found_error=False,
198
+ _test=_test,
199
+ _write_settings=_write_settings,
200
+ _user=_user,
201
+ )
202
+ instance_id: UUID
203
+ instance_state: Literal[
204
+ "connected",
205
+ "instance-corrupted-or-deleted",
206
+ "account-not-exists",
207
+ "instance-not-found",
208
+ ]
209
+ if response is None:
210
+ instance_state, instance_id = "connected", settings.instance._id
211
+ else:
212
+ instance_id, instance_state = process_connect_response(response, instance_slug)
213
+ modules = process_modules_arg(modules)
214
+ return name_str, instance_id, instance_state, instance_slug
215
+
216
+
217
+ DOC_STORAGE_ARG = "A local or remote folder (`'s3://...'` or `'gs://...'`). Defaults to current working directory."
218
+ DOC_INSTANCE_NAME = (
219
+ "Instance name. If not passed, it will equal the folder name passed to `storage`."
220
+ )
221
+ DOC_DB = "Database connection URL. Defaults to `None`, which implies an SQLite file in the storage location."
222
+ DOC_MODULES = "Comma-separated string of schema modules."
223
+ DOC_LOW_LEVEL_KWARGS = "Keyword arguments for low-level control."
224
+
225
+
226
+ @doc_args(DOC_STORAGE_ARG, DOC_INSTANCE_NAME, DOC_DB, DOC_MODULES, DOC_LOW_LEVEL_KWARGS)
227
+ def init(
228
+ *,
229
+ storage: UPathStr = ".",
230
+ name: str | None = None,
231
+ db: PostgresDsn | None = None,
232
+ modules: str | None = None,
233
+ **kwargs,
234
+ ) -> None:
235
+ """Init a LaminDB instance.
236
+
237
+ Args:
238
+ storage: {}
239
+ name: {}
240
+ db: {}
241
+ modules: {}
242
+ **kwargs: {}
243
+
244
+ See Also:
245
+ Init an instance for via the CLI, see `here <https://docs.lamin.ai/cli#init>`__.
246
+ """
247
+ from ._check_setup import _check_instance_setup
248
+ from ._connect_instance import (
249
+ reset_django_module_variables,
250
+ validate_connection_state,
251
+ )
252
+ from .core._hub_core import init_instance_hub
253
+
254
+ silence_loggers()
255
+
256
+ isettings = None
257
+ ssettings = None
258
+
259
+ _write_settings: bool = kwargs.get("_write_settings", True)
260
+ if modules is None:
261
+ modules = kwargs.get("schema", None)
262
+ _test: bool = kwargs.get("_test", False)
263
+
264
+ # use this user instead of settings.user
265
+ # contains access_token
266
+ _user: UserSettings | None = kwargs.get("_user", None)
267
+ user_handle: str = settings.user.handle if _user is None else _user.handle
268
+ user__uuid: UUID = settings.user._uuid if _user is None else _user._uuid # type: ignore
269
+ access_token: str | None = None if _user is None else _user.access_token
270
+
271
+ try:
272
+ name_str, instance_id, instance_state, _ = validate_init_args(
273
+ storage=storage,
274
+ name=name,
275
+ db=db,
276
+ modules=modules,
277
+ _test=_test,
278
+ _write_settings=_write_settings,
279
+ _user=_user, # will get from settings.user if _user is None
280
+ )
281
+ if instance_state == "connected":
282
+ return None
283
+ if _check_instance_setup() and not _test:
284
+ validate_connection_state(user_handle, name_str)
285
+ elif _write_settings:
286
+ disconnect(mute=True)
287
+ isettings = InstanceSettings(
288
+ id=instance_id, # type: ignore
289
+ owner=user_handle,
290
+ name=name_str,
291
+ db=db,
292
+ modules=modules,
293
+ # to lock passed user in isettings._cloud_sqlite_locker.lock()
294
+ _locker_user=_user, # only has effect if cloud sqlite
295
+ )
296
+ register_on_hub = (
297
+ check_is_instance_remote(root=storage, db=db)
298
+ and instance_state != "instance-corrupted-or-deleted"
299
+ )
300
+ if register_on_hub:
301
+ init_instance_hub(
302
+ isettings, account_id=user__uuid, access_token=access_token
303
+ )
304
+ ssettings, _ = init_storage(
305
+ storage,
306
+ instance_id=instance_id,
307
+ instance_slug=f"{user_handle}/{name_str}",
308
+ init_instance=True,
309
+ register_hub=register_on_hub,
310
+ created_by=user__uuid,
311
+ access_token=access_token,
312
+ )
313
+ isettings._storage = ssettings
314
+ if register_on_hub and not ssettings.is_on_hub:
315
+ raise InstanceNotCreated(
316
+ "Unable to create the instance because failed to register the storage."
317
+ )
318
+ validate_sqlite_state(isettings)
319
+ # why call it here if it is also called in load_from_isettings?
320
+ isettings._persist(write_to_disk=_write_settings)
321
+ if _test:
322
+ return None
323
+ isettings._init_db()
324
+ load_from_isettings(
325
+ isettings, init=True, user=_user, write_settings=_write_settings
326
+ )
327
+ if _write_settings and isettings._is_cloud_sqlite:
328
+ isettings._cloud_sqlite_locker.lock()
329
+ logger.warning(
330
+ "locked instance (to unlock and push changes to the cloud SQLite file,"
331
+ " call: lamin disconnect)"
332
+ )
333
+ if register_on_hub and isettings.dialect != "sqlite":
334
+ from ._schema_metadata import update_schema_in_hub
335
+
336
+ update_schema_in_hub(access_token=access_token)
337
+ reset_django_module_variables()
338
+ logger.important(f"initialized lamindb: {isettings.slug}")
339
+ except Exception as e:
340
+ from ._delete import delete_by_isettings
341
+ from .core._hub_core import delete_instance_record, delete_storage_record
342
+
343
+ if isettings is not None:
344
+ if _write_settings:
345
+ delete_by_isettings(isettings)
346
+ else:
347
+ settings._instance_settings = None
348
+ if user_handle != "anonymous" or access_token is not None:
349
+ if ssettings is not None and ssettings.is_on_hub:
350
+ delete_storage_record(ssettings, access_token=access_token)
351
+ if isettings is not None and isettings.is_on_hub:
352
+ delete_instance_record(isettings._id, access_token=access_token)
353
+ raise e
354
+ return None
355
+
356
+
357
+ def load_from_isettings(
358
+ isettings: InstanceSettings,
359
+ *,
360
+ init: bool = False,
361
+ user: UserSettings | None = None,
362
+ write_settings: bool = True,
363
+ ) -> None:
364
+ from .core._setup_bionty_sources import write_bionty_sources
365
+
366
+ user = settings.user if user is None else user
367
+
368
+ if init:
369
+ # during init space, user and storage need to be registered
370
+ register_initial_records(isettings, user)
371
+ write_bionty_sources(isettings)
372
+ isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
373
+ else:
374
+ # when loading, django is already set up
375
+ #
376
+ # only register user if the instance is connected for the first time in an environment
377
+ # this is our best proxy for that the user might not yet be registered
378
+ if not isettings._get_settings_file().exists():
379
+ # do not try to update the user on fine grained access instances
380
+ # this is blocked anyways, only select and insert are allowed
381
+ register_user(user, update_user=not isettings._fine_grained_access)
382
+ isettings._persist(write_to_disk=write_settings)
383
+ # clear branch & space cache after reconnecting
384
+ settings._branch = None
385
+ settings._space = None
386
+
387
+
388
+ def validate_sqlite_state(isettings: InstanceSettings) -> None:
389
+ if isettings._is_cloud_sqlite:
390
+ if (
391
+ # it's important to first evaluate the existence check
392
+ # for the local sqlite file because it doesn't need a network
393
+ # request
394
+ isettings._sqlite_file_local.exists()
395
+ and not isettings._sqlite_file.exists()
396
+ ):
397
+ raise RuntimeError(
398
+ ERROR_SQLITE_CACHE.format(
399
+ isettings._sqlite_file, isettings._sqlite_file_local
400
+ )
401
+ )
402
+
403
+
404
+ def infer_instance_name(
405
+ *,
406
+ storage: UPathStr,
407
+ name: str | None = None,
408
+ db: PostgresDsn | None = None,
409
+ ) -> str:
410
+ if name is not None:
411
+ if "/" in name:
412
+ raise ValueError("Invalid instance name: '/' delimiter not allowed.")
413
+ return name
414
+ if db is not None:
415
+ logger.warning("using the sql database name for the instance name")
416
+ # this isn't a great way to access the db name
417
+ # could use LaminDsn instead
418
+ return str(db).split("/")[-1]
419
+ if storage == "create-s3":
420
+ raise ValueError("pass name to init if storage = 'create-s3'")
421
+ storage_path = UPath(storage).resolve()
422
+ name = storage_path.path.rstrip("/").split("/")[-1]
423
+ return name.lower()