lamindb_setup 1.19.0__py3-none-any.whl → 1.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. lamindb_setup/__init__.py +1 -1
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +131 -131
  5. lamindb_setup/_connect_instance.py +443 -441
  6. lamindb_setup/_delete.py +155 -155
  7. lamindb_setup/_disconnect.py +38 -38
  8. lamindb_setup/_django.py +39 -39
  9. lamindb_setup/_entry_points.py +19 -19
  10. lamindb_setup/_init_instance.py +423 -423
  11. lamindb_setup/_migrate.py +331 -331
  12. lamindb_setup/_register_instance.py +32 -32
  13. lamindb_setup/_schema.py +27 -27
  14. lamindb_setup/_schema_metadata.py +451 -451
  15. lamindb_setup/_set_managed_storage.py +81 -81
  16. lamindb_setup/_setup_user.py +198 -198
  17. lamindb_setup/_silence_loggers.py +46 -46
  18. lamindb_setup/core/__init__.py +25 -34
  19. lamindb_setup/core/_aws_options.py +276 -276
  20. lamindb_setup/core/_aws_storage.py +57 -57
  21. lamindb_setup/core/_clone.py +50 -50
  22. lamindb_setup/core/_deprecated.py +62 -62
  23. lamindb_setup/core/_docs.py +14 -14
  24. lamindb_setup/core/_hub_client.py +288 -288
  25. lamindb_setup/core/_hub_crud.py +247 -247
  26. lamindb_setup/core/_hub_utils.py +100 -100
  27. lamindb_setup/core/_private_django_api.py +80 -80
  28. lamindb_setup/core/_settings.py +440 -434
  29. lamindb_setup/core/_settings_instance.py +22 -1
  30. lamindb_setup/core/_settings_load.py +162 -162
  31. lamindb_setup/core/_settings_save.py +108 -108
  32. lamindb_setup/core/_settings_storage.py +433 -433
  33. lamindb_setup/core/_settings_store.py +162 -162
  34. lamindb_setup/core/_settings_user.py +55 -55
  35. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  36. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  37. lamindb_setup/core/django.py +414 -413
  38. lamindb_setup/core/exceptions.py +1 -1
  39. lamindb_setup/core/hashing.py +134 -134
  40. lamindb_setup/core/types.py +1 -1
  41. lamindb_setup/core/upath.py +1031 -1028
  42. lamindb_setup/errors.py +72 -72
  43. lamindb_setup/io.py +423 -423
  44. lamindb_setup/types.py +17 -17
  45. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info}/METADATA +3 -2
  46. lamindb_setup-1.19.1.dist-info/RECORD +51 -0
  47. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info}/WHEEL +1 -1
  48. {lamindb_setup-1.19.0.dist-info → lamindb_setup-1.19.1.dist-info/licenses}/LICENSE +201 -201
  49. lamindb_setup-1.19.0.dist-info/RECORD +0 -51
@@ -1,441 +1,443 @@
1
- from __future__ import annotations
2
-
3
- import importlib
4
- import os
5
- import sys
6
- import types
7
- from typing import TYPE_CHECKING, Any
8
- from uuid import UUID
9
-
10
- from lamin_utils import logger
11
-
12
- from ._check_setup import _check_instance_setup
13
- from ._disconnect import disconnect
14
- from ._init_instance import load_from_isettings
15
- from ._silence_loggers import silence_loggers
16
- from .core._settings import settings
17
- from .core._settings_instance import InstanceSettings
18
- from .core._settings_load import load_instance_settings
19
- from .core._settings_storage import StorageSettings
20
- from .core._settings_store import instance_settings_file
21
- from .core.cloud_sqlite_locker import unlock_cloud_sqlite_upon_exception
22
- from .core.django import reset_django
23
- from .errors import CannotSwitchDefaultInstance, InstanceNotFoundError
24
-
25
- if TYPE_CHECKING:
26
- from pathlib import Path
27
-
28
- from .core._settings_user import UserSettings
29
- from .types import UPathStr
30
-
31
- # this is for testing purposes only
32
- # set to True only to test failed load
33
- _TEST_FAILED_LOAD = False
34
- INSTANCE_NOT_FOUND_MESSAGE = (
35
- "'{owner}/{name}' not found:"
36
- " '{hub_result}'\nCheck your permissions:"
37
- " https://lamin.ai/{owner}/{name}"
38
- )
39
-
40
-
41
- def check_db_dsn_equal_up_to_credentials(db_dsn_hub, db_dsn_local):
42
- return (
43
- db_dsn_hub.scheme == db_dsn_local.scheme
44
- and db_dsn_hub.host == db_dsn_local.host
45
- and db_dsn_hub.database == db_dsn_local.database
46
- and db_dsn_hub.port == db_dsn_local.port
47
- )
48
-
49
-
50
- def update_db_using_local(
51
- hub_instance_result: dict[str, str],
52
- settings_file: Path,
53
- db: str | None = None,
54
- raise_permission_error=True,
55
- ) -> str | None:
56
- db_updated = None
57
- # check if postgres
58
- if hub_instance_result["db_scheme"] == "postgresql":
59
- if db is not None:
60
- # use only the provided db if it is set
61
- db_updated = db
62
- elif (db_env := os.getenv("LAMINDB_INSTANCE_DB")) is not None:
63
- logger.important("loading db URL from env variable LAMINDB_INSTANCE_DB")
64
- # read directly from the environment
65
- db_updated = db_env
66
- else:
67
- # dynamic import to avoid importing the heavy LaminDsnModel at root
68
- from .core._hub_utils import LaminDsnModel
69
-
70
- db_hub = hub_instance_result["db"]
71
- db_dsn_hub = LaminDsnModel(db=db_hub)
72
- # read from a cached settings file in case the hub result is inexistent
73
- if db_dsn_hub.db.user in {None, "none"} and settings_file.exists():
74
- isettings = load_instance_settings(settings_file)
75
- db_updated = isettings.db
76
- else:
77
- # just take the default hub result and ensure there is actually a user
78
- if (
79
- db_dsn_hub.db.user in {None, "none"}
80
- and db_dsn_hub.db.password in {None, "none"}
81
- and raise_permission_error
82
- ):
83
- raise PermissionError(
84
- "No database access, please ask your admin to provide you with"
85
- " a DB URL and pass it via --db <db_url>"
86
- )
87
- db_updated = db_hub
88
- return db_updated
89
-
90
-
91
- def _connect_instance(
92
- owner: str,
93
- name: str,
94
- *,
95
- db: str | None = None,
96
- raise_permission_error: bool = True,
97
- use_root_db_user: bool = False,
98
- use_proxy_db: bool = False,
99
- access_token: str | None = None,
100
- raise_systemexit: bool = False,
101
- ) -> InstanceSettings:
102
- settings_file = instance_settings_file(name, owner)
103
- make_hub_request = True
104
- if settings_file.exists():
105
- isettings = load_instance_settings(settings_file)
106
- # skip hub request for a purely local instance
107
- if isettings.is_remote:
108
- make_hub_request = True
109
- else:
110
- make_hub_request = False
111
- if db is not None and isettings.dialect == "postgresql":
112
- isettings._db = db
113
- if make_hub_request:
114
- # the following will return a string if the instance does not exist on the hub
115
- # do not call hub if the user is anonymous
116
- if owner != "anonymous":
117
- from .core._hub_core import connect_instance_hub
118
-
119
- hub_result = connect_instance_hub(
120
- owner=owner,
121
- name=name,
122
- access_token=access_token,
123
- use_root_db_user=use_root_db_user,
124
- use_proxy_db=use_proxy_db,
125
- )
126
- else:
127
- hub_result = "anonymous-user"
128
- # if hub_result is not a string, it means it made a request
129
- # that successfully returned metadata
130
- if not isinstance(hub_result, str):
131
- instance_result, storage_result = hub_result
132
- db_updated = update_db_using_local(
133
- instance_result,
134
- settings_file,
135
- db=db,
136
- raise_permission_error=raise_permission_error,
137
- )
138
- ssettings = StorageSettings(
139
- root=storage_result["root"],
140
- region=storage_result["region"],
141
- uid=storage_result["lnid"],
142
- uuid=UUID(storage_result["id"]),
143
- instance_id=UUID(instance_result["id"]),
144
- )
145
- isettings = InstanceSettings(
146
- id=UUID(instance_result["id"]),
147
- owner=owner,
148
- name=instance_result["name"],
149
- storage=ssettings,
150
- db=db_updated,
151
- modules=instance_result["schema_str"],
152
- git_repo=instance_result["git_repo"],
153
- keep_artifacts_local=bool(instance_result["keep_artifacts_local"]),
154
- is_on_hub=True,
155
- api_url=instance_result["api_url"],
156
- schema_id=None
157
- if (schema_id := instance_result["schema_id"]) is None
158
- else UUID(schema_id),
159
- fine_grained_access=bool(instance_result["fine_grained_access"]),
160
- db_permissions=instance_result.get("db_permissions", None)
161
- if not use_root_db_user
162
- else "write",
163
- )
164
- else:
165
- if hub_result != "anonymous-user":
166
- message = INSTANCE_NOT_FOUND_MESSAGE.format(
167
- owner=owner, name=name, hub_result=hub_result
168
- )
169
- else:
170
- message = "It is not possible to load an anonymous-owned instance from the hub"
171
- exception = (
172
- SystemExit(message)
173
- if raise_systemexit
174
- else InstanceNotFoundError(message)
175
- )
176
- if settings_file.exists():
177
- isettings = load_instance_settings(settings_file)
178
- if isettings.is_remote:
179
- raise exception
180
- else:
181
- raise exception
182
- return isettings
183
-
184
-
185
- def reset_django_module_variables():
186
- # This function updates all module-level references to Django classes
187
- # But it will fail to update function level references
188
- # This is not a problem unless for the function that calls ln.connect() itself
189
- # So, if a user has
190
- #
191
- # def my_function():
192
- # import lamindb as ln
193
- # ln.connect(...)
194
- #
195
- # Then it will **not** work and the `ln` variable becomes stale and hold a reference to the old classes
196
- # Other functions that dynamically import are no problem because the variables
197
- # are automatically refreshed when the function runs the next time after ln.connect() was called
198
- logger.debug("resetting django module variables")
199
-
200
- # django.apps needs to be a local import to refresh variables
201
- from django.apps import apps
202
-
203
- app_names = {app.name for app in apps.get_app_configs()}
204
- # always copy before iterations over sys.modules
205
- # see https://docs.python.org/3/library/sys.html#sys.modules
206
- # this whole thing runs about 50ms in a big env
207
- for name, module in sys.modules.copy().items():
208
- if (
209
- module is not None
210
- and (not name.startswith("__") or name == "__main__")
211
- and name not in sys.builtin_module_names
212
- ):
213
- try:
214
- for k, v in vars(module).items():
215
- if (
216
- isinstance(v, types.ModuleType)
217
- and not k.startswith("_")
218
- and getattr(v, "__name__", None) in app_names
219
- ):
220
- if v.__name__ in sys.modules:
221
- vars(module)[k] = sys.modules[v.__name__]
222
- # Also reset classes from Django apps - but check if the class module starts with any app name
223
- elif hasattr(v, "__module__") and getattr(v, "__module__", None):
224
- class_module = v.__module__
225
- # Check if the class module starts with any of our app names
226
- if any(
227
- class_module.startswith(app_name) for app_name in app_names
228
- ):
229
- if class_module in sys.modules:
230
- fresh_module = sys.modules[class_module]
231
- attr_name = getattr(v, "__name__", k)
232
- if hasattr(fresh_module, attr_name):
233
- vars(module)[k] = getattr(fresh_module, attr_name)
234
- except (AttributeError, TypeError):
235
- continue
236
-
237
-
238
- def _connect_cli(
239
- instance: str,
240
- use_root_db_user: bool = False,
241
- ) -> None:
242
- from lamindb_setup import settings as settings_
243
-
244
- owner, name = get_owner_name_from_identifier(instance)
245
- isettings = _connect_instance(
246
- owner,
247
- name,
248
- use_root_db_user=use_root_db_user,
249
- raise_systemexit=True,
250
- )
251
- isettings._persist(write_to_disk=True)
252
- if not isettings.is_on_hub or isettings._is_cloud_sqlite:
253
- # there are two reasons to call the full-blown connect
254
- # (1) if the instance is not on the hub, we need to register
255
- # potential users through register_user()
256
- # (2) if the instance is cloud sqlite, we need to lock it
257
- connect(_write_settings=False, _reload_lamindb=False)
258
- else:
259
- logger.important(f"connected lamindb: {isettings.slug}")
260
- if settings_.dev_dir is None:
261
- logger.important_hint(
262
- "to map a local dev directory, call: lamin settings set dev-dir ."
263
- )
264
- return None
265
-
266
-
267
- def validate_connection_state(
268
- owner: str, name: str, use_root_db_user: bool = False
269
- ) -> None:
270
- from django.db import connection
271
-
272
- if (
273
- settings._instance_exists # exists only for real instances, not for none/none
274
- and f"{owner}/{name}" == settings.instance.slug
275
- and not use_root_db_user # always re-connect for root db user
276
- ):
277
- logger.important(
278
- f"doing nothing, already connected lamindb: {settings.instance.slug}"
279
- )
280
- return None
281
- else:
282
- if settings._instance_exists:
283
- import lamindb as ln
284
-
285
- if ln.context.transform is not None:
286
- raise CannotSwitchDefaultInstance(
287
- "Cannot switch default instance while `ln.track()` is live: call `ln.finish()`"
288
- )
289
- reset_django()
290
-
291
-
292
- @unlock_cloud_sqlite_upon_exception(ignore_prev_locker=True)
293
- def connect(instance: str | None = None, **kwargs: Any) -> str | tuple | None:
294
- """Connect the global default instance.
295
-
296
- If you want to create a read-only database client, use :class:`~lamindb.DB` instead.
297
-
298
- Args:
299
- instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
300
- If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
301
- If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
302
-
303
- See Also:
304
- Configure an instance for auto-connect via the CLI, see `here <https://docs.lamin.ai/cli#connect>`__.
305
- """
306
- # validate kwargs
307
- valid_kwargs = {
308
- "use_root_db_user",
309
- "use_proxy_db",
310
- "_db",
311
- "_write_settings",
312
- "_raise_not_found_error",
313
- "_reload_lamindb",
314
- "_test",
315
- "_user",
316
- }
317
- for kwarg in kwargs:
318
- if kwarg not in valid_kwargs:
319
- raise TypeError(f"connect() got unexpected keyword argument '{kwarg}'")
320
-
321
- use_root_db_user: bool = kwargs.get("use_root_db_user", False)
322
- use_proxy_db = kwargs.get("use_proxy_db", False)
323
- # _db is still needed because it is called in init
324
- _db: str | None = kwargs.get("_db", None)
325
- _write_settings: bool = kwargs.get("_write_settings", False)
326
- _raise_not_found_error: bool = kwargs.get("_raise_not_found_error", True)
327
- _reload_lamindb: bool = kwargs.get("_reload_lamindb", True)
328
- _test: bool = kwargs.get("_test", False)
329
-
330
- isettings: InstanceSettings = None # type: ignore
331
-
332
- access_token: str | None = None
333
- _user: UserSettings | None = kwargs.get("_user", None)
334
- if _user is not None:
335
- access_token = _user.access_token
336
- if instance is None:
337
- instance = os.environ.get("LAMIN_CURRENT_INSTANCE")
338
-
339
- try:
340
- if instance is None:
341
- if settings._instance_exists:
342
- isettings = settings.instance
343
- else:
344
- raise ValueError(
345
- "No instance was connected through the CLI, pass a value to `instance` or connect via the CLI."
346
- )
347
- if use_root_db_user:
348
- reset_django()
349
- owner, name = isettings.owner, isettings.name
350
- if _db is not None and isettings.dialect == "postgresql":
351
- isettings._db = _db
352
- else:
353
- owner, name = get_owner_name_from_identifier(instance)
354
- if _check_instance_setup() and not _test:
355
- validate_connection_state(
356
- owner, name, use_root_db_user=use_root_db_user
357
- )
358
- elif (
359
- _write_settings
360
- and settings._instance_exists
361
- and f"{owner}/{name}" != settings.instance.slug
362
- ):
363
- disconnect(mute=True)
364
-
365
- if instance is not None or use_root_db_user:
366
- try:
367
- isettings = _connect_instance(
368
- owner,
369
- name,
370
- db=_db,
371
- access_token=access_token,
372
- use_root_db_user=use_root_db_user,
373
- use_proxy_db=use_proxy_db,
374
- )
375
- except InstanceNotFoundError as e:
376
- if _raise_not_found_error:
377
- raise e
378
- else:
379
- return "instance-not-found"
380
- if isinstance(isettings, str):
381
- return isettings
382
- # at this point we have checked already that isettings is not a string
383
- # _user is passed to lock cloud sqlite for this user in isettings._load_db()
384
- # has no effect if _user is None or if not cloud sqlite instance
385
- isettings._locker_user = _user
386
- isettings._persist(write_to_disk=_write_settings)
387
- if _test:
388
- return None
389
- silence_loggers()
390
- check, msg = isettings._load_db()
391
- if not check:
392
- local_db = (
393
- isettings._is_cloud_sqlite and isettings._sqlite_file_local.exists()
394
- )
395
- if local_db:
396
- logger.warning(
397
- "SQLite file does not exist in the cloud, but exists locally:"
398
- f" {isettings._sqlite_file_local}\nTo push the file to the cloud,"
399
- " call: lamin disconnect"
400
- )
401
- elif _raise_not_found_error:
402
- raise SystemExit(msg)
403
- else:
404
- logger.warning(
405
- f"instance exists with id {isettings._id.hex}, but database is not"
406
- " loadable: re-initializing"
407
- )
408
- return "instance-corrupted-or-deleted"
409
- # this is for testing purposes only
410
- if _TEST_FAILED_LOAD:
411
- raise RuntimeError("Technical testing error.")
412
-
413
- load_from_isettings(isettings, user=_user, write_settings=_write_settings)
414
- if _reload_lamindb:
415
- reset_django_module_variables()
416
- if isettings.slug != "none/none":
417
- logger.important(f"connected lamindb: {isettings.slug}")
418
- except Exception as e:
419
- if isettings is not None:
420
- if _write_settings:
421
- isettings._get_settings_file().unlink(missing_ok=True) # type: ignore
422
- settings._instance_settings = None
423
- raise e
424
- return None
425
-
426
-
427
- def get_owner_name_from_identifier(identifier: str) -> tuple[str, str]:
428
- if "/" in identifier:
429
- if identifier.startswith("https://lamin.ai/"):
430
- identifier = identifier.replace("https://lamin.ai/", "")
431
- split = identifier.split("/")
432
- if len(split) > 2:
433
- raise ValueError(
434
- "The instance identifier needs to be 'owner/name', the instance name"
435
- " (owner is current user) or the URL: https://lamin.ai/owner/name."
436
- )
437
- owner, name = split
438
- else:
439
- owner = settings.user.handle
440
- name = identifier
441
- return owner, name
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import os
5
+ import sys
6
+ import types
7
+ from typing import TYPE_CHECKING, Any
8
+ from uuid import UUID
9
+
10
+ from lamin_utils import logger
11
+
12
+ from ._check_setup import _check_instance_setup
13
+ from ._disconnect import disconnect
14
+ from ._init_instance import load_from_isettings
15
+ from ._silence_loggers import silence_loggers
16
+ from .core._settings import settings
17
+ from .core._settings_instance import InstanceSettings
18
+ from .core._settings_load import load_instance_settings
19
+ from .core._settings_storage import StorageSettings
20
+ from .core._settings_store import instance_settings_file
21
+ from .core.cloud_sqlite_locker import unlock_cloud_sqlite_upon_exception
22
+ from .core.django import reset_django
23
+ from .errors import CannotSwitchDefaultInstance, InstanceNotFoundError
24
+
25
+ if TYPE_CHECKING:
26
+ from pathlib import Path
27
+
28
+ from .core._settings_user import UserSettings
29
+ from .types import UPathStr
30
+
31
+ # this is for testing purposes only
32
+ # set to True only to test failed load
33
+ _TEST_FAILED_LOAD = False
34
+ INSTANCE_NOT_FOUND_MESSAGE = (
35
+ "'{owner}/{name}' not found:"
36
+ " '{hub_result}'\nCheck your permissions:"
37
+ " https://lamin.ai/{owner}/{name}"
38
+ )
39
+
40
+
41
+ def check_db_dsn_equal_up_to_credentials(db_dsn_hub, db_dsn_local):
42
+ return (
43
+ db_dsn_hub.scheme == db_dsn_local.scheme
44
+ and db_dsn_hub.host == db_dsn_local.host
45
+ and db_dsn_hub.database == db_dsn_local.database
46
+ and db_dsn_hub.port == db_dsn_local.port
47
+ )
48
+
49
+
50
+ def update_db_using_local(
51
+ hub_instance_result: dict[str, str],
52
+ settings_file: Path,
53
+ db: str | None = None,
54
+ raise_permission_error=True,
55
+ ) -> str | None:
56
+ db_updated = None
57
+ # check if postgres
58
+ if hub_instance_result["db_scheme"] == "postgresql":
59
+ if db is not None:
60
+ # use only the provided db if it is set
61
+ db_updated = db
62
+ elif (db_env := os.getenv("LAMINDB_INSTANCE_DB")) is not None:
63
+ logger.important("loading db URL from env variable LAMINDB_INSTANCE_DB")
64
+ # read directly from the environment
65
+ db_updated = db_env
66
+ else:
67
+ # dynamic import to avoid importing the heavy LaminDsnModel at root
68
+ from .core._hub_utils import LaminDsnModel
69
+
70
+ db_hub = hub_instance_result["db"]
71
+ db_dsn_hub = LaminDsnModel(db=db_hub)
72
+ # read from a cached settings file in case the hub result is inexistent
73
+ if db_dsn_hub.db.user in {None, "none"} and settings_file.exists():
74
+ isettings = load_instance_settings(settings_file)
75
+ db_updated = isettings.db
76
+ else:
77
+ # just take the default hub result and ensure there is actually a user
78
+ if (
79
+ db_dsn_hub.db.user in {None, "none"}
80
+ and db_dsn_hub.db.password in {None, "none"}
81
+ and raise_permission_error
82
+ ):
83
+ raise PermissionError(
84
+ "No database access, please ask your admin to provide you with"
85
+ " a DB URL and pass it via --db <db_url>"
86
+ )
87
+ db_updated = db_hub
88
+ return db_updated
89
+
90
+
91
+ def _connect_instance(
92
+ owner: str,
93
+ name: str,
94
+ *,
95
+ db: str | None = None,
96
+ raise_permission_error: bool = True,
97
+ use_root_db_user: bool = False,
98
+ use_proxy_db: bool = False,
99
+ access_token: str | None = None,
100
+ raise_systemexit: bool = False,
101
+ ) -> InstanceSettings:
102
+ settings_file = instance_settings_file(name, owner)
103
+ make_hub_request = True
104
+ if settings_file.exists():
105
+ isettings = load_instance_settings(settings_file)
106
+ # skip hub request for a purely local instance
107
+ if isettings.is_remote:
108
+ make_hub_request = True
109
+ else:
110
+ make_hub_request = False
111
+ if db is not None and isettings.dialect == "postgresql":
112
+ isettings._db = db
113
+ if make_hub_request:
114
+ # the following will return a string if the instance does not exist on the hub
115
+ # do not call hub if the user is anonymous
116
+ if owner != "anonymous":
117
+ from .core._hub_core import connect_instance_hub
118
+
119
+ hub_result = connect_instance_hub(
120
+ owner=owner,
121
+ name=name,
122
+ access_token=access_token,
123
+ use_root_db_user=use_root_db_user,
124
+ use_proxy_db=use_proxy_db,
125
+ )
126
+ else:
127
+ hub_result = "anonymous-user"
128
+ # if hub_result is not a string, it means it made a request
129
+ # that successfully returned metadata
130
+ if not isinstance(hub_result, str):
131
+ instance_result, storage_result = hub_result
132
+ db_updated = update_db_using_local(
133
+ instance_result,
134
+ settings_file,
135
+ db=db,
136
+ raise_permission_error=raise_permission_error,
137
+ )
138
+ ssettings = StorageSettings(
139
+ root=storage_result["root"],
140
+ region=storage_result["region"],
141
+ uid=storage_result["lnid"],
142
+ uuid=UUID(storage_result["id"]),
143
+ instance_id=UUID(instance_result["id"]),
144
+ )
145
+ isettings = InstanceSettings(
146
+ id=UUID(instance_result["id"]),
147
+ owner=owner,
148
+ name=instance_result["name"],
149
+ storage=ssettings,
150
+ db=db_updated,
151
+ modules=instance_result["schema_str"],
152
+ git_repo=instance_result["git_repo"],
153
+ keep_artifacts_local=bool(instance_result["keep_artifacts_local"]),
154
+ is_on_hub=True,
155
+ api_url=instance_result["api_url"],
156
+ schema_id=None
157
+ if (schema_id := instance_result["schema_id"]) is None
158
+ else UUID(schema_id),
159
+ fine_grained_access=bool(instance_result["fine_grained_access"]),
160
+ db_permissions=instance_result.get("db_permissions", None)
161
+ if not use_root_db_user
162
+ else "write",
163
+ )
164
+ else:
165
+ if hub_result != "anonymous-user":
166
+ message = INSTANCE_NOT_FOUND_MESSAGE.format(
167
+ owner=owner, name=name, hub_result=hub_result
168
+ )
169
+ else:
170
+ message = "It is not possible to load an anonymous-owned instance from the hub"
171
+ exception = (
172
+ SystemExit(message)
173
+ if raise_systemexit
174
+ else InstanceNotFoundError(message)
175
+ )
176
+ if settings_file.exists():
177
+ isettings = load_instance_settings(settings_file)
178
+ if isettings.is_remote:
179
+ raise exception
180
+ else:
181
+ raise exception
182
+ return isettings
183
+
184
+
185
+ def reset_django_module_variables():
186
+ # This function updates all module-level references to Django classes
187
+ # But it will fail to update function level references
188
+ # This is not a problem unless for the function that calls ln.connect() itself
189
+ # So, if a user has
190
+ #
191
+ # def my_function():
192
+ # import lamindb as ln
193
+ # ln.connect(...)
194
+ #
195
+ # Then it will **not** work and the `ln` variable becomes stale and hold a reference to the old classes
196
+ # Other functions that dynamically import are no problem because the variables
197
+ # are automatically refreshed when the function runs the next time after ln.connect() was called
198
+ logger.debug("resetting django module variables")
199
+
200
+ # django.apps needs to be a local import to refresh variables
201
+ from django.apps import apps
202
+
203
+ app_names = {app.name for app in apps.get_app_configs()}
204
+ # always copy before iterations over sys.modules
205
+ # see https://docs.python.org/3/library/sys.html#sys.modules
206
+ # this whole thing runs about 50ms in a big env
207
+ for name, module in sys.modules.copy().items():
208
+ if (
209
+ module is not None
210
+ and (not name.startswith("__") or name == "__main__")
211
+ and name not in sys.builtin_module_names
212
+ ):
213
+ try:
214
+ module_vars = vars(module) # references the original
215
+ # copy to avoid changing size during the loop
216
+ for k, v in module_vars.copy().items():
217
+ if (
218
+ isinstance(v, types.ModuleType)
219
+ and not k.startswith("_")
220
+ and getattr(v, "__name__", None) in app_names
221
+ ):
222
+ if v.__name__ in sys.modules:
223
+ module_vars[k] = sys.modules[v.__name__]
224
+ # Also reset classes from Django apps - but check if the class module starts with any app name
225
+ elif hasattr(v, "__module__") and getattr(v, "__module__", None):
226
+ class_module = v.__module__
227
+ # Check if the class module starts with any of our app names
228
+ if any(
229
+ class_module.startswith(app_name) for app_name in app_names
230
+ ):
231
+ if class_module in sys.modules:
232
+ fresh_module = sys.modules[class_module]
233
+ attr_name = getattr(v, "__name__", k)
234
+ if hasattr(fresh_module, attr_name):
235
+ module_vars[k] = getattr(fresh_module, attr_name)
236
+ except (AttributeError, TypeError):
237
+ continue
238
+
239
+
240
+ def _connect_cli(
241
+ instance: str,
242
+ use_root_db_user: bool = False,
243
+ ) -> None:
244
+ from lamindb_setup import settings as settings_
245
+
246
+ owner, name = get_owner_name_from_identifier(instance)
247
+ isettings = _connect_instance(
248
+ owner,
249
+ name,
250
+ use_root_db_user=use_root_db_user,
251
+ raise_systemexit=True,
252
+ )
253
+ isettings._persist(write_to_disk=True)
254
+ if not isettings.is_on_hub or isettings._is_cloud_sqlite:
255
+ # there are two reasons to call the full-blown connect
256
+ # (1) if the instance is not on the hub, we need to register
257
+ # potential users through register_user()
258
+ # (2) if the instance is cloud sqlite, we need to lock it
259
+ connect(_write_settings=False, _reload_lamindb=False)
260
+ else:
261
+ logger.important(f"connected lamindb: {isettings.slug}")
262
+ if settings_.dev_dir is None:
263
+ logger.important_hint(
264
+ "to map a local dev directory, call: lamin settings set dev-dir ."
265
+ )
266
+ return None
267
+
268
+
269
+ def validate_connection_state(
270
+ owner: str, name: str, use_root_db_user: bool = False
271
+ ) -> None:
272
+ from django.db import connection
273
+
274
+ if (
275
+ settings._instance_exists # exists only for real instances, not for none/none
276
+ and f"{owner}/{name}" == settings.instance.slug
277
+ and not use_root_db_user # always re-connect for root db user
278
+ ):
279
+ logger.important(
280
+ f"doing nothing, already connected lamindb: {settings.instance.slug}"
281
+ )
282
+ return None
283
+ else:
284
+ if settings._instance_exists:
285
+ import lamindb as ln
286
+
287
+ if ln.context.transform is not None:
288
+ raise CannotSwitchDefaultInstance(
289
+ "Cannot switch default instance while `ln.track()` is live: call `ln.finish()`"
290
+ )
291
+ reset_django()
292
+
293
+
294
+ @unlock_cloud_sqlite_upon_exception(ignore_prev_locker=True)
295
+ def connect(instance: str | None = None, **kwargs: Any) -> str | tuple | None:
296
+ """Connect the global default instance.
297
+
298
+ If you want to create a read-only database client, use :class:`~lamindb.DB` instead.
299
+
300
+ Args:
301
+ instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
302
+ If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
303
+ If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
304
+
305
+ See Also:
306
+ Configure an instance for auto-connect via the CLI, see `here <https://docs.lamin.ai/cli#connect>`__.
307
+ """
308
+ # validate kwargs
309
+ valid_kwargs = {
310
+ "use_root_db_user",
311
+ "use_proxy_db",
312
+ "_db",
313
+ "_write_settings",
314
+ "_raise_not_found_error",
315
+ "_reload_lamindb",
316
+ "_test",
317
+ "_user",
318
+ }
319
+ for kwarg in kwargs:
320
+ if kwarg not in valid_kwargs:
321
+ raise TypeError(f"connect() got unexpected keyword argument '{kwarg}'")
322
+
323
+ use_root_db_user: bool = kwargs.get("use_root_db_user", False)
324
+ use_proxy_db = kwargs.get("use_proxy_db", False)
325
+ # _db is still needed because it is called in init
326
+ _db: str | None = kwargs.get("_db", None)
327
+ _write_settings: bool = kwargs.get("_write_settings", False)
328
+ _raise_not_found_error: bool = kwargs.get("_raise_not_found_error", True)
329
+ _reload_lamindb: bool = kwargs.get("_reload_lamindb", True)
330
+ _test: bool = kwargs.get("_test", False)
331
+
332
+ isettings: InstanceSettings = None # type: ignore
333
+
334
+ access_token: str | None = None
335
+ _user: UserSettings | None = kwargs.get("_user", None)
336
+ if _user is not None:
337
+ access_token = _user.access_token
338
+ if instance is None:
339
+ instance = os.environ.get("LAMIN_CURRENT_INSTANCE")
340
+
341
+ try:
342
+ if instance is None:
343
+ if settings._instance_exists:
344
+ isettings = settings.instance
345
+ else:
346
+ raise ValueError(
347
+ "No instance was connected through the CLI, pass a value to `instance` or connect via the CLI."
348
+ )
349
+ if use_root_db_user:
350
+ reset_django()
351
+ owner, name = isettings.owner, isettings.name
352
+ if _db is not None and isettings.dialect == "postgresql":
353
+ isettings._db = _db
354
+ else:
355
+ owner, name = get_owner_name_from_identifier(instance)
356
+ if _check_instance_setup() and not _test:
357
+ validate_connection_state(
358
+ owner, name, use_root_db_user=use_root_db_user
359
+ )
360
+ elif (
361
+ _write_settings
362
+ and settings._instance_exists
363
+ and f"{owner}/{name}" != settings.instance.slug
364
+ ):
365
+ disconnect(mute=True)
366
+
367
+ if instance is not None or use_root_db_user:
368
+ try:
369
+ isettings = _connect_instance(
370
+ owner,
371
+ name,
372
+ db=_db,
373
+ access_token=access_token,
374
+ use_root_db_user=use_root_db_user,
375
+ use_proxy_db=use_proxy_db,
376
+ )
377
+ except InstanceNotFoundError as e:
378
+ if _raise_not_found_error:
379
+ raise e
380
+ else:
381
+ return "instance-not-found"
382
+ if isinstance(isettings, str):
383
+ return isettings
384
+ # at this point we have checked already that isettings is not a string
385
+ # _user is passed to lock cloud sqlite for this user in isettings._load_db()
386
+ # has no effect if _user is None or if not cloud sqlite instance
387
+ isettings._locker_user = _user
388
+ isettings._persist(write_to_disk=_write_settings)
389
+ if _test:
390
+ return None
391
+ silence_loggers()
392
+ check, msg = isettings._load_db()
393
+ if not check:
394
+ local_db = (
395
+ isettings._is_cloud_sqlite and isettings._sqlite_file_local.exists()
396
+ )
397
+ if local_db:
398
+ logger.warning(
399
+ "SQLite file does not exist in the cloud, but exists locally:"
400
+ f" {isettings._sqlite_file_local}\nTo push the file to the cloud,"
401
+ " call: lamin disconnect"
402
+ )
403
+ elif _raise_not_found_error:
404
+ raise SystemExit(msg)
405
+ else:
406
+ logger.warning(
407
+ f"instance exists with id {isettings._id.hex}, but database is not"
408
+ " loadable: re-initializing"
409
+ )
410
+ return "instance-corrupted-or-deleted"
411
+ # this is for testing purposes only
412
+ if _TEST_FAILED_LOAD:
413
+ raise RuntimeError("Technical testing error.")
414
+
415
+ load_from_isettings(isettings, user=_user, write_settings=_write_settings)
416
+ if _reload_lamindb:
417
+ reset_django_module_variables()
418
+ if isettings.slug != "none/none":
419
+ logger.important(f"connected lamindb: {isettings.slug}")
420
+ except Exception as e:
421
+ if isettings is not None:
422
+ if _write_settings:
423
+ isettings._get_settings_file().unlink(missing_ok=True) # type: ignore
424
+ settings._instance_settings = None
425
+ raise e
426
+ return None
427
+
428
+
429
+ def get_owner_name_from_identifier(identifier: str) -> tuple[str, str]:
430
+ if "/" in identifier:
431
+ if identifier.startswith("https://lamin.ai/"):
432
+ identifier = identifier.replace("https://lamin.ai/", "")
433
+ split = identifier.split("/")
434
+ if len(split) > 2:
435
+ raise ValueError(
436
+ "The instance identifier needs to be 'owner/name', the instance name"
437
+ " (owner is current user) or the URL: https://lamin.ai/owner/name."
438
+ )
439
+ owner, name = split
440
+ else:
441
+ owner = settings.user.handle
442
+ name = identifier
443
+ return owner, name