lamindb_setup 1.18.2__py3-none-any.whl → 1.19.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. lamindb_setup/__init__.py +4 -19
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check.py +7 -7
  4. lamindb_setup/_check_setup.py +131 -131
  5. lamindb_setup/_connect_instance.py +443 -438
  6. lamindb_setup/_delete.py +155 -151
  7. lamindb_setup/_disconnect.py +38 -38
  8. lamindb_setup/_django.py +39 -39
  9. lamindb_setup/_entry_points.py +19 -19
  10. lamindb_setup/_init_instance.py +423 -429
  11. lamindb_setup/_migrate.py +331 -327
  12. lamindb_setup/_register_instance.py +32 -32
  13. lamindb_setup/_schema.py +27 -27
  14. lamindb_setup/_schema_metadata.py +451 -451
  15. lamindb_setup/_set_managed_storage.py +81 -80
  16. lamindb_setup/_setup_user.py +198 -198
  17. lamindb_setup/_silence_loggers.py +46 -46
  18. lamindb_setup/core/__init__.py +25 -34
  19. lamindb_setup/core/_aws_options.py +276 -266
  20. lamindb_setup/core/_aws_storage.py +57 -55
  21. lamindb_setup/core/_clone.py +50 -50
  22. lamindb_setup/core/_deprecated.py +62 -62
  23. lamindb_setup/core/_docs.py +14 -14
  24. lamindb_setup/core/_hub_client.py +288 -294
  25. lamindb_setup/core/_hub_core.py +0 -2
  26. lamindb_setup/core/_hub_crud.py +247 -247
  27. lamindb_setup/core/_hub_utils.py +100 -100
  28. lamindb_setup/core/_private_django_api.py +80 -80
  29. lamindb_setup/core/_settings.py +440 -434
  30. lamindb_setup/core/_settings_instance.py +32 -7
  31. lamindb_setup/core/_settings_load.py +162 -159
  32. lamindb_setup/core/_settings_save.py +108 -96
  33. lamindb_setup/core/_settings_storage.py +433 -433
  34. lamindb_setup/core/_settings_store.py +162 -92
  35. lamindb_setup/core/_settings_user.py +55 -55
  36. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  37. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  38. lamindb_setup/core/django.py +414 -413
  39. lamindb_setup/core/exceptions.py +1 -1
  40. lamindb_setup/core/hashing.py +134 -134
  41. lamindb_setup/core/types.py +1 -1
  42. lamindb_setup/core/upath.py +1031 -1028
  43. lamindb_setup/errors.py +72 -70
  44. lamindb_setup/io.py +423 -416
  45. lamindb_setup/types.py +17 -17
  46. {lamindb_setup-1.18.2.dist-info → lamindb_setup-1.19.1.dist-info}/METADATA +4 -2
  47. lamindb_setup-1.19.1.dist-info/RECORD +51 -0
  48. {lamindb_setup-1.18.2.dist-info → lamindb_setup-1.19.1.dist-info}/WHEEL +1 -1
  49. {lamindb_setup-1.18.2.dist-info → lamindb_setup-1.19.1.dist-info/licenses}/LICENSE +201 -201
  50. lamindb_setup-1.18.2.dist-info/RECORD +0 -51
@@ -1,438 +1,443 @@
1
- from __future__ import annotations
2
-
3
- import importlib
4
- import os
5
- import sys
6
- import types
7
- from typing import TYPE_CHECKING, Any
8
- from uuid import UUID
9
-
10
- from lamin_utils import logger
11
-
12
- from ._check_setup import _check_instance_setup
13
- from ._disconnect import disconnect
14
- from ._init_instance import load_from_isettings
15
- from ._silence_loggers import silence_loggers
16
- from .core._hub_core import connect_instance_hub
17
- from .core._hub_utils import LaminDsnModel
18
- from .core._settings import settings
19
- from .core._settings_instance import InstanceSettings
20
- from .core._settings_load import load_instance_settings
21
- from .core._settings_storage import StorageSettings
22
- from .core._settings_store import instance_settings_file
23
- from .core.cloud_sqlite_locker import unlock_cloud_sqlite_upon_exception
24
- from .core.django import reset_django
25
- from .errors import CannotSwitchDefaultInstance, InstanceNotFoundError
26
-
27
- if TYPE_CHECKING:
28
- from pathlib import Path
29
-
30
- from .core._settings_user import UserSettings
31
- from .types import UPathStr
32
-
33
- # this is for testing purposes only
34
- # set to True only to test failed load
35
- _TEST_FAILED_LOAD = False
36
- INSTANCE_NOT_FOUND_MESSAGE = (
37
- "'{owner}/{name}' not found:"
38
- " '{hub_result}'\nCheck your permissions:"
39
- " https://lamin.ai/{owner}/{name}"
40
- )
41
-
42
-
43
- def check_db_dsn_equal_up_to_credentials(db_dsn_hub, db_dsn_local):
44
- return (
45
- db_dsn_hub.scheme == db_dsn_local.scheme
46
- and db_dsn_hub.host == db_dsn_local.host
47
- and db_dsn_hub.database == db_dsn_local.database
48
- and db_dsn_hub.port == db_dsn_local.port
49
- )
50
-
51
-
52
- def update_db_using_local(
53
- hub_instance_result: dict[str, str],
54
- settings_file: Path,
55
- db: str | None = None,
56
- raise_permission_error=True,
57
- ) -> str | None:
58
- db_updated = None
59
- # check if postgres
60
- if hub_instance_result["db_scheme"] == "postgresql":
61
- if db is not None:
62
- # use only the provided db if it is set
63
- db_updated = db
64
- elif (db_env := os.getenv("LAMINDB_INSTANCE_DB")) is not None:
65
- logger.important("loading db URL from env variable LAMINDB_INSTANCE_DB")
66
- # read directly from the environment
67
- db_updated = db_env
68
- else:
69
- db_hub = hub_instance_result["db"]
70
- db_dsn_hub = LaminDsnModel(db=db_hub)
71
- # read from a cached settings file in case the hub result is inexistent
72
- if db_dsn_hub.db.user in {None, "none"} and settings_file.exists():
73
- isettings = load_instance_settings(settings_file)
74
- db_updated = isettings.db
75
- else:
76
- # just take the default hub result and ensure there is actually a user
77
- if (
78
- db_dsn_hub.db.user in {None, "none"}
79
- and db_dsn_hub.db.password in {None, "none"}
80
- and raise_permission_error
81
- ):
82
- raise PermissionError(
83
- "No database access, please ask your admin to provide you with"
84
- " a DB URL and pass it via --db <db_url>"
85
- )
86
- db_updated = db_hub
87
- return db_updated
88
-
89
-
90
- def _connect_instance(
91
- owner: str,
92
- name: str,
93
- *,
94
- db: str | None = None,
95
- raise_permission_error: bool = True,
96
- use_root_db_user: bool = False,
97
- use_proxy_db: bool = False,
98
- access_token: str | None = None,
99
- raise_systemexit: bool = False,
100
- ) -> InstanceSettings:
101
- settings_file = instance_settings_file(name, owner)
102
- make_hub_request = True
103
- if settings_file.exists():
104
- isettings = load_instance_settings(settings_file)
105
- # skip hub request for a purely local instance
106
- if isettings.is_remote:
107
- make_hub_request = True
108
- else:
109
- make_hub_request = False
110
- if db is not None and isettings.dialect == "postgresql":
111
- isettings._db = db
112
- if make_hub_request:
113
- # the following will return a string if the instance does not exist on the hub
114
- # do not call hub if the user is anonymous
115
- if owner != "anonymous":
116
- hub_result = connect_instance_hub(
117
- owner=owner,
118
- name=name,
119
- access_token=access_token,
120
- use_root_db_user=use_root_db_user,
121
- use_proxy_db=use_proxy_db,
122
- )
123
- else:
124
- hub_result = "anonymous-user"
125
- # if hub_result is not a string, it means it made a request
126
- # that successfully returned metadata
127
- if not isinstance(hub_result, str):
128
- instance_result, storage_result = hub_result
129
- db_updated = update_db_using_local(
130
- instance_result,
131
- settings_file,
132
- db=db,
133
- raise_permission_error=raise_permission_error,
134
- )
135
- ssettings = StorageSettings(
136
- root=storage_result["root"],
137
- region=storage_result["region"],
138
- uid=storage_result["lnid"],
139
- uuid=UUID(storage_result["id"]),
140
- instance_id=UUID(instance_result["id"]),
141
- )
142
- isettings = InstanceSettings(
143
- id=UUID(instance_result["id"]),
144
- owner=owner,
145
- name=instance_result["name"],
146
- storage=ssettings,
147
- db=db_updated,
148
- modules=instance_result["schema_str"],
149
- git_repo=instance_result["git_repo"],
150
- keep_artifacts_local=bool(instance_result["keep_artifacts_local"]),
151
- is_on_hub=True,
152
- api_url=instance_result["api_url"],
153
- schema_id=None
154
- if (schema_id := instance_result["schema_id"]) is None
155
- else UUID(schema_id),
156
- fine_grained_access=bool(instance_result["fine_grained_access"]),
157
- db_permissions=instance_result.get("db_permissions", None)
158
- if not use_root_db_user
159
- else "write",
160
- )
161
- else:
162
- if hub_result != "anonymous-user":
163
- message = INSTANCE_NOT_FOUND_MESSAGE.format(
164
- owner=owner, name=name, hub_result=hub_result
165
- )
166
- else:
167
- message = "It is not possible to load an anonymous-owned instance from the hub"
168
- exception = (
169
- SystemExit(message)
170
- if raise_systemexit
171
- else InstanceNotFoundError(message)
172
- )
173
- if settings_file.exists():
174
- isettings = load_instance_settings(settings_file)
175
- if isettings.is_remote:
176
- raise exception
177
- else:
178
- raise exception
179
- return isettings
180
-
181
-
182
- def reset_django_module_variables():
183
- # This function updates all module-level references to Django classes
184
- # But it will fail to update function level references
185
- # This is not a problem unless for the function that calls ln.connect() itself
186
- # So, if a user has
187
- #
188
- # def my_function():
189
- # import lamindb as ln
190
- # ln.connect(...)
191
- #
192
- # Then it will **not** work and the `ln` variable becomes stale and hold a reference to the old classes
193
- # Other functions that dynamically import are no problem because the variables
194
- # are automatically refreshed when the function runs the next time after ln.connect() was called
195
- logger.debug("resetting django module variables")
196
-
197
- # django.apps needs to be a local import to refresh variables
198
- from django.apps import apps
199
-
200
- app_names = {app.name for app in apps.get_app_configs()}
201
- # always copy before iterations over sys.modules
202
- # see https://docs.python.org/3/library/sys.html#sys.modules
203
- # this whole thing runs about 50ms in a big env
204
- for name, module in sys.modules.copy().items():
205
- if (
206
- module is not None
207
- and (not name.startswith("__") or name == "__main__")
208
- and name not in sys.builtin_module_names
209
- ):
210
- try:
211
- for k, v in vars(module).items():
212
- if (
213
- isinstance(v, types.ModuleType)
214
- and not k.startswith("_")
215
- and getattr(v, "__name__", None) in app_names
216
- ):
217
- if v.__name__ in sys.modules:
218
- vars(module)[k] = sys.modules[v.__name__]
219
- # Also reset classes from Django apps - but check if the class module starts with any app name
220
- elif hasattr(v, "__module__") and getattr(v, "__module__", None):
221
- class_module = v.__module__
222
- # Check if the class module starts with any of our app names
223
- if any(
224
- class_module.startswith(app_name) for app_name in app_names
225
- ):
226
- if class_module in sys.modules:
227
- fresh_module = sys.modules[class_module]
228
- attr_name = getattr(v, "__name__", k)
229
- if hasattr(fresh_module, attr_name):
230
- vars(module)[k] = getattr(fresh_module, attr_name)
231
- except (AttributeError, TypeError):
232
- continue
233
-
234
-
235
- def _connect_cli(
236
- instance: str, use_root_db_user: bool = False, use_proxy_db: bool = False
237
- ) -> None:
238
- from lamindb_setup import settings as settings_
239
-
240
- owner, name = get_owner_name_from_identifier(instance)
241
- isettings = _connect_instance(
242
- owner,
243
- name,
244
- use_root_db_user=use_root_db_user,
245
- use_proxy_db=use_proxy_db,
246
- raise_systemexit=True,
247
- )
248
- isettings._persist(write_to_disk=True)
249
- if not isettings.is_on_hub or isettings._is_cloud_sqlite:
250
- # there are two reasons to call the full-blown connect
251
- # (1) if the instance is not on the hub, we need to register
252
- # potential users through register_user()
253
- # (2) if the instance is cloud sqlite, we need to lock it
254
- connect(_write_settings=False, _reload_lamindb=False)
255
- else:
256
- logger.important(f"connected lamindb: {isettings.slug}")
257
- if settings_.dev_dir is None:
258
- logger.important_hint(
259
- "to map a local dev directory, call: lamin settings set dev-dir ."
260
- )
261
- return None
262
-
263
-
264
- def validate_connection_state(
265
- owner: str, name: str, use_root_db_user: bool = False
266
- ) -> None:
267
- from django.db import connection
268
-
269
- if (
270
- settings._instance_exists # exists only for real instances, not for none/none
271
- and f"{owner}/{name}" == settings.instance.slug
272
- and not use_root_db_user # always re-connect for root db user
273
- ):
274
- logger.important(
275
- f"doing nothing, already connected lamindb: {settings.instance.slug}"
276
- )
277
- return None
278
- else:
279
- if settings._instance_exists:
280
- import lamindb as ln
281
-
282
- if ln.context.transform is not None:
283
- raise CannotSwitchDefaultInstance(
284
- "Cannot switch default instance while `ln.track()` is live: call `ln.finish()`"
285
- )
286
- reset_django()
287
-
288
-
289
- @unlock_cloud_sqlite_upon_exception(ignore_prev_locker=True)
290
- def connect(instance: str | None = None, **kwargs: Any) -> str | tuple | None:
291
- """Connect the global default instance.
292
-
293
- If you want to create a read-only database client, use :class:`~lamindb.DB` instead.
294
-
295
- Args:
296
- instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
297
- If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
298
- If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
299
-
300
- See Also:
301
- Configure an instance for auto-connect via the CLI, see `here <https://docs.lamin.ai/cli#connect>`__.
302
- """
303
- # validate kwargs
304
- valid_kwargs = {
305
- "use_root_db_user",
306
- "use_proxy_db",
307
- "_db",
308
- "_write_settings",
309
- "_raise_not_found_error",
310
- "_reload_lamindb",
311
- "_test",
312
- "_user",
313
- }
314
- for kwarg in kwargs:
315
- if kwarg not in valid_kwargs:
316
- raise TypeError(f"connect() got unexpected keyword argument '{kwarg}'")
317
-
318
- use_root_db_user: bool = kwargs.get("use_root_db_user", False)
319
- use_proxy_db = kwargs.get("use_proxy_db", False)
320
- # _db is still needed because it is called in init
321
- _db: str | None = kwargs.get("_db", None)
322
- _write_settings: bool = kwargs.get("_write_settings", False)
323
- _raise_not_found_error: bool = kwargs.get("_raise_not_found_error", True)
324
- _reload_lamindb: bool = kwargs.get("_reload_lamindb", True)
325
- _test: bool = kwargs.get("_test", False)
326
-
327
- isettings: InstanceSettings = None # type: ignore
328
-
329
- access_token: str | None = None
330
- _user: UserSettings | None = kwargs.get("_user", None)
331
- if _user is not None:
332
- access_token = _user.access_token
333
- if instance is None:
334
- instance = os.environ.get("LAMIN_CURRENT_INSTANCE")
335
-
336
- try:
337
- if instance is None:
338
- if settings._instance_exists:
339
- isettings = settings.instance
340
- else:
341
- raise ValueError(
342
- "No instance was connected through the CLI, pass a value to `instance` or connect via the CLI."
343
- )
344
- if use_root_db_user:
345
- reset_django()
346
- owner, name = isettings.owner, isettings.name
347
- if _db is not None and isettings.dialect == "postgresql":
348
- isettings._db = _db
349
- else:
350
- owner, name = get_owner_name_from_identifier(instance)
351
- if _check_instance_setup() and not _test:
352
- validate_connection_state(
353
- owner, name, use_root_db_user=use_root_db_user
354
- )
355
- elif (
356
- _write_settings
357
- and settings._instance_exists
358
- and f"{owner}/{name}" != settings.instance.slug
359
- ):
360
- disconnect(mute=True)
361
-
362
- if instance is not None or use_root_db_user:
363
- try:
364
- isettings = _connect_instance(
365
- owner,
366
- name,
367
- db=_db,
368
- access_token=access_token,
369
- use_root_db_user=use_root_db_user,
370
- use_proxy_db=use_proxy_db,
371
- )
372
- except InstanceNotFoundError as e:
373
- if _raise_not_found_error:
374
- raise e
375
- else:
376
- return "instance-not-found"
377
- if isinstance(isettings, str):
378
- return isettings
379
- # at this point we have checked already that isettings is not a string
380
- # _user is passed to lock cloud sqlite for this user in isettings._load_db()
381
- # has no effect if _user is None or if not cloud sqlite instance
382
- isettings._locker_user = _user
383
- isettings._persist(write_to_disk=_write_settings)
384
- if _test:
385
- return None
386
- silence_loggers()
387
- check, msg = isettings._load_db()
388
- if not check:
389
- local_db = (
390
- isettings._is_cloud_sqlite and isettings._sqlite_file_local.exists()
391
- )
392
- if local_db:
393
- logger.warning(
394
- "SQLite file does not exist in the cloud, but exists locally:"
395
- f" {isettings._sqlite_file_local}\nTo push the file to the cloud,"
396
- " call: lamin disconnect"
397
- )
398
- elif _raise_not_found_error:
399
- raise SystemExit(msg)
400
- else:
401
- logger.warning(
402
- f"instance exists with id {isettings._id.hex}, but database is not"
403
- " loadable: re-initializing"
404
- )
405
- return "instance-corrupted-or-deleted"
406
- # this is for testing purposes only
407
- if _TEST_FAILED_LOAD:
408
- raise RuntimeError("Technical testing error.")
409
-
410
- load_from_isettings(isettings, user=_user, write_settings=_write_settings)
411
- if _reload_lamindb:
412
- reset_django_module_variables()
413
- if isettings.slug != "none/none":
414
- logger.important(f"connected lamindb: {isettings.slug}")
415
- except Exception as e:
416
- if isettings is not None:
417
- if _write_settings:
418
- isettings._get_settings_file().unlink(missing_ok=True) # type: ignore
419
- settings._instance_settings = None
420
- raise e
421
- return None
422
-
423
-
424
- def get_owner_name_from_identifier(identifier: str) -> tuple[str, str]:
425
- if "/" in identifier:
426
- if identifier.startswith("https://lamin.ai/"):
427
- identifier = identifier.replace("https://lamin.ai/", "")
428
- split = identifier.split("/")
429
- if len(split) > 2:
430
- raise ValueError(
431
- "The instance identifier needs to be 'owner/name', the instance name"
432
- " (owner is current user) or the URL: https://lamin.ai/owner/name."
433
- )
434
- owner, name = split
435
- else:
436
- owner = settings.user.handle
437
- name = identifier
438
- return owner, name
1
+ from __future__ import annotations
2
+
3
+ import importlib
4
+ import os
5
+ import sys
6
+ import types
7
+ from typing import TYPE_CHECKING, Any
8
+ from uuid import UUID
9
+
10
+ from lamin_utils import logger
11
+
12
+ from ._check_setup import _check_instance_setup
13
+ from ._disconnect import disconnect
14
+ from ._init_instance import load_from_isettings
15
+ from ._silence_loggers import silence_loggers
16
+ from .core._settings import settings
17
+ from .core._settings_instance import InstanceSettings
18
+ from .core._settings_load import load_instance_settings
19
+ from .core._settings_storage import StorageSettings
20
+ from .core._settings_store import instance_settings_file
21
+ from .core.cloud_sqlite_locker import unlock_cloud_sqlite_upon_exception
22
+ from .core.django import reset_django
23
+ from .errors import CannotSwitchDefaultInstance, InstanceNotFoundError
24
+
25
+ if TYPE_CHECKING:
26
+ from pathlib import Path
27
+
28
+ from .core._settings_user import UserSettings
29
+ from .types import UPathStr
30
+
31
+ # this is for testing purposes only
32
+ # set to True only to test failed load
33
+ _TEST_FAILED_LOAD = False
34
+ INSTANCE_NOT_FOUND_MESSAGE = (
35
+ "'{owner}/{name}' not found:"
36
+ " '{hub_result}'\nCheck your permissions:"
37
+ " https://lamin.ai/{owner}/{name}"
38
+ )
39
+
40
+
41
+ def check_db_dsn_equal_up_to_credentials(db_dsn_hub, db_dsn_local):
42
+ return (
43
+ db_dsn_hub.scheme == db_dsn_local.scheme
44
+ and db_dsn_hub.host == db_dsn_local.host
45
+ and db_dsn_hub.database == db_dsn_local.database
46
+ and db_dsn_hub.port == db_dsn_local.port
47
+ )
48
+
49
+
50
+ def update_db_using_local(
51
+ hub_instance_result: dict[str, str],
52
+ settings_file: Path,
53
+ db: str | None = None,
54
+ raise_permission_error=True,
55
+ ) -> str | None:
56
+ db_updated = None
57
+ # check if postgres
58
+ if hub_instance_result["db_scheme"] == "postgresql":
59
+ if db is not None:
60
+ # use only the provided db if it is set
61
+ db_updated = db
62
+ elif (db_env := os.getenv("LAMINDB_INSTANCE_DB")) is not None:
63
+ logger.important("loading db URL from env variable LAMINDB_INSTANCE_DB")
64
+ # read directly from the environment
65
+ db_updated = db_env
66
+ else:
67
+ # dynamic import to avoid importing the heavy LaminDsnModel at root
68
+ from .core._hub_utils import LaminDsnModel
69
+
70
+ db_hub = hub_instance_result["db"]
71
+ db_dsn_hub = LaminDsnModel(db=db_hub)
72
+ # read from a cached settings file in case the hub result is inexistent
73
+ if db_dsn_hub.db.user in {None, "none"} and settings_file.exists():
74
+ isettings = load_instance_settings(settings_file)
75
+ db_updated = isettings.db
76
+ else:
77
+ # just take the default hub result and ensure there is actually a user
78
+ if (
79
+ db_dsn_hub.db.user in {None, "none"}
80
+ and db_dsn_hub.db.password in {None, "none"}
81
+ and raise_permission_error
82
+ ):
83
+ raise PermissionError(
84
+ "No database access, please ask your admin to provide you with"
85
+ " a DB URL and pass it via --db <db_url>"
86
+ )
87
+ db_updated = db_hub
88
+ return db_updated
89
+
90
+
91
+ def _connect_instance(
92
+ owner: str,
93
+ name: str,
94
+ *,
95
+ db: str | None = None,
96
+ raise_permission_error: bool = True,
97
+ use_root_db_user: bool = False,
98
+ use_proxy_db: bool = False,
99
+ access_token: str | None = None,
100
+ raise_systemexit: bool = False,
101
+ ) -> InstanceSettings:
102
+ settings_file = instance_settings_file(name, owner)
103
+ make_hub_request = True
104
+ if settings_file.exists():
105
+ isettings = load_instance_settings(settings_file)
106
+ # skip hub request for a purely local instance
107
+ if isettings.is_remote:
108
+ make_hub_request = True
109
+ else:
110
+ make_hub_request = False
111
+ if db is not None and isettings.dialect == "postgresql":
112
+ isettings._db = db
113
+ if make_hub_request:
114
+ # the following will return a string if the instance does not exist on the hub
115
+ # do not call hub if the user is anonymous
116
+ if owner != "anonymous":
117
+ from .core._hub_core import connect_instance_hub
118
+
119
+ hub_result = connect_instance_hub(
120
+ owner=owner,
121
+ name=name,
122
+ access_token=access_token,
123
+ use_root_db_user=use_root_db_user,
124
+ use_proxy_db=use_proxy_db,
125
+ )
126
+ else:
127
+ hub_result = "anonymous-user"
128
+ # if hub_result is not a string, it means it made a request
129
+ # that successfully returned metadata
130
+ if not isinstance(hub_result, str):
131
+ instance_result, storage_result = hub_result
132
+ db_updated = update_db_using_local(
133
+ instance_result,
134
+ settings_file,
135
+ db=db,
136
+ raise_permission_error=raise_permission_error,
137
+ )
138
+ ssettings = StorageSettings(
139
+ root=storage_result["root"],
140
+ region=storage_result["region"],
141
+ uid=storage_result["lnid"],
142
+ uuid=UUID(storage_result["id"]),
143
+ instance_id=UUID(instance_result["id"]),
144
+ )
145
+ isettings = InstanceSettings(
146
+ id=UUID(instance_result["id"]),
147
+ owner=owner,
148
+ name=instance_result["name"],
149
+ storage=ssettings,
150
+ db=db_updated,
151
+ modules=instance_result["schema_str"],
152
+ git_repo=instance_result["git_repo"],
153
+ keep_artifacts_local=bool(instance_result["keep_artifacts_local"]),
154
+ is_on_hub=True,
155
+ api_url=instance_result["api_url"],
156
+ schema_id=None
157
+ if (schema_id := instance_result["schema_id"]) is None
158
+ else UUID(schema_id),
159
+ fine_grained_access=bool(instance_result["fine_grained_access"]),
160
+ db_permissions=instance_result.get("db_permissions", None)
161
+ if not use_root_db_user
162
+ else "write",
163
+ )
164
+ else:
165
+ if hub_result != "anonymous-user":
166
+ message = INSTANCE_NOT_FOUND_MESSAGE.format(
167
+ owner=owner, name=name, hub_result=hub_result
168
+ )
169
+ else:
170
+ message = "It is not possible to load an anonymous-owned instance from the hub"
171
+ exception = (
172
+ SystemExit(message)
173
+ if raise_systemexit
174
+ else InstanceNotFoundError(message)
175
+ )
176
+ if settings_file.exists():
177
+ isettings = load_instance_settings(settings_file)
178
+ if isettings.is_remote:
179
+ raise exception
180
+ else:
181
+ raise exception
182
+ return isettings
183
+
184
+
185
+ def reset_django_module_variables():
186
+ # This function updates all module-level references to Django classes
187
+ # But it will fail to update function level references
188
+ # This is not a problem unless for the function that calls ln.connect() itself
189
+ # So, if a user has
190
+ #
191
+ # def my_function():
192
+ # import lamindb as ln
193
+ # ln.connect(...)
194
+ #
195
+ # Then it will **not** work and the `ln` variable becomes stale and hold a reference to the old classes
196
+ # Other functions that dynamically import are no problem because the variables
197
+ # are automatically refreshed when the function runs the next time after ln.connect() was called
198
+ logger.debug("resetting django module variables")
199
+
200
+ # django.apps needs to be a local import to refresh variables
201
+ from django.apps import apps
202
+
203
+ app_names = {app.name for app in apps.get_app_configs()}
204
+ # always copy before iterations over sys.modules
205
+ # see https://docs.python.org/3/library/sys.html#sys.modules
206
+ # this whole thing runs about 50ms in a big env
207
+ for name, module in sys.modules.copy().items():
208
+ if (
209
+ module is not None
210
+ and (not name.startswith("__") or name == "__main__")
211
+ and name not in sys.builtin_module_names
212
+ ):
213
+ try:
214
+ module_vars = vars(module) # references the original
215
+ # copy to avoid changing size during the loop
216
+ for k, v in module_vars.copy().items():
217
+ if (
218
+ isinstance(v, types.ModuleType)
219
+ and not k.startswith("_")
220
+ and getattr(v, "__name__", None) in app_names
221
+ ):
222
+ if v.__name__ in sys.modules:
223
+ module_vars[k] = sys.modules[v.__name__]
224
+ # Also reset classes from Django apps - but check if the class module starts with any app name
225
+ elif hasattr(v, "__module__") and getattr(v, "__module__", None):
226
+ class_module = v.__module__
227
+ # Check if the class module starts with any of our app names
228
+ if any(
229
+ class_module.startswith(app_name) for app_name in app_names
230
+ ):
231
+ if class_module in sys.modules:
232
+ fresh_module = sys.modules[class_module]
233
+ attr_name = getattr(v, "__name__", k)
234
+ if hasattr(fresh_module, attr_name):
235
+ module_vars[k] = getattr(fresh_module, attr_name)
236
+ except (AttributeError, TypeError):
237
+ continue
238
+
239
+
240
+ def _connect_cli(
241
+ instance: str,
242
+ use_root_db_user: bool = False,
243
+ ) -> None:
244
+ from lamindb_setup import settings as settings_
245
+
246
+ owner, name = get_owner_name_from_identifier(instance)
247
+ isettings = _connect_instance(
248
+ owner,
249
+ name,
250
+ use_root_db_user=use_root_db_user,
251
+ raise_systemexit=True,
252
+ )
253
+ isettings._persist(write_to_disk=True)
254
+ if not isettings.is_on_hub or isettings._is_cloud_sqlite:
255
+ # there are two reasons to call the full-blown connect
256
+ # (1) if the instance is not on the hub, we need to register
257
+ # potential users through register_user()
258
+ # (2) if the instance is cloud sqlite, we need to lock it
259
+ connect(_write_settings=False, _reload_lamindb=False)
260
+ else:
261
+ logger.important(f"connected lamindb: {isettings.slug}")
262
+ if settings_.dev_dir is None:
263
+ logger.important_hint(
264
+ "to map a local dev directory, call: lamin settings set dev-dir ."
265
+ )
266
+ return None
267
+
268
+
269
+ def validate_connection_state(
270
+ owner: str, name: str, use_root_db_user: bool = False
271
+ ) -> None:
272
+ from django.db import connection
273
+
274
+ if (
275
+ settings._instance_exists # exists only for real instances, not for none/none
276
+ and f"{owner}/{name}" == settings.instance.slug
277
+ and not use_root_db_user # always re-connect for root db user
278
+ ):
279
+ logger.important(
280
+ f"doing nothing, already connected lamindb: {settings.instance.slug}"
281
+ )
282
+ return None
283
+ else:
284
+ if settings._instance_exists:
285
+ import lamindb as ln
286
+
287
+ if ln.context.transform is not None:
288
+ raise CannotSwitchDefaultInstance(
289
+ "Cannot switch default instance while `ln.track()` is live: call `ln.finish()`"
290
+ )
291
+ reset_django()
292
+
293
+
294
+ @unlock_cloud_sqlite_upon_exception(ignore_prev_locker=True)
295
+ def connect(instance: str | None = None, **kwargs: Any) -> str | tuple | None:
296
+ """Connect the global default instance.
297
+
298
+ If you want to create a read-only database client, use :class:`~lamindb.DB` instead.
299
+
300
+ Args:
301
+ instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
302
+ If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
303
+ If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
304
+
305
+ See Also:
306
+ Configure an instance for auto-connect via the CLI, see `here <https://docs.lamin.ai/cli#connect>`__.
307
+ """
308
+ # validate kwargs
309
+ valid_kwargs = {
310
+ "use_root_db_user",
311
+ "use_proxy_db",
312
+ "_db",
313
+ "_write_settings",
314
+ "_raise_not_found_error",
315
+ "_reload_lamindb",
316
+ "_test",
317
+ "_user",
318
+ }
319
+ for kwarg in kwargs:
320
+ if kwarg not in valid_kwargs:
321
+ raise TypeError(f"connect() got unexpected keyword argument '{kwarg}'")
322
+
323
+ use_root_db_user: bool = kwargs.get("use_root_db_user", False)
324
+ use_proxy_db = kwargs.get("use_proxy_db", False)
325
+ # _db is still needed because it is called in init
326
+ _db: str | None = kwargs.get("_db", None)
327
+ _write_settings: bool = kwargs.get("_write_settings", False)
328
+ _raise_not_found_error: bool = kwargs.get("_raise_not_found_error", True)
329
+ _reload_lamindb: bool = kwargs.get("_reload_lamindb", True)
330
+ _test: bool = kwargs.get("_test", False)
331
+
332
+ isettings: InstanceSettings = None # type: ignore
333
+
334
+ access_token: str | None = None
335
+ _user: UserSettings | None = kwargs.get("_user", None)
336
+ if _user is not None:
337
+ access_token = _user.access_token
338
+ if instance is None:
339
+ instance = os.environ.get("LAMIN_CURRENT_INSTANCE")
340
+
341
+ try:
342
+ if instance is None:
343
+ if settings._instance_exists:
344
+ isettings = settings.instance
345
+ else:
346
+ raise ValueError(
347
+ "No instance was connected through the CLI, pass a value to `instance` or connect via the CLI."
348
+ )
349
+ if use_root_db_user:
350
+ reset_django()
351
+ owner, name = isettings.owner, isettings.name
352
+ if _db is not None and isettings.dialect == "postgresql":
353
+ isettings._db = _db
354
+ else:
355
+ owner, name = get_owner_name_from_identifier(instance)
356
+ if _check_instance_setup() and not _test:
357
+ validate_connection_state(
358
+ owner, name, use_root_db_user=use_root_db_user
359
+ )
360
+ elif (
361
+ _write_settings
362
+ and settings._instance_exists
363
+ and f"{owner}/{name}" != settings.instance.slug
364
+ ):
365
+ disconnect(mute=True)
366
+
367
+ if instance is not None or use_root_db_user:
368
+ try:
369
+ isettings = _connect_instance(
370
+ owner,
371
+ name,
372
+ db=_db,
373
+ access_token=access_token,
374
+ use_root_db_user=use_root_db_user,
375
+ use_proxy_db=use_proxy_db,
376
+ )
377
+ except InstanceNotFoundError as e:
378
+ if _raise_not_found_error:
379
+ raise e
380
+ else:
381
+ return "instance-not-found"
382
+ if isinstance(isettings, str):
383
+ return isettings
384
+ # at this point we have checked already that isettings is not a string
385
+ # _user is passed to lock cloud sqlite for this user in isettings._load_db()
386
+ # has no effect if _user is None or if not cloud sqlite instance
387
+ isettings._locker_user = _user
388
+ isettings._persist(write_to_disk=_write_settings)
389
+ if _test:
390
+ return None
391
+ silence_loggers()
392
+ check, msg = isettings._load_db()
393
+ if not check:
394
+ local_db = (
395
+ isettings._is_cloud_sqlite and isettings._sqlite_file_local.exists()
396
+ )
397
+ if local_db:
398
+ logger.warning(
399
+ "SQLite file does not exist in the cloud, but exists locally:"
400
+ f" {isettings._sqlite_file_local}\nTo push the file to the cloud,"
401
+ " call: lamin disconnect"
402
+ )
403
+ elif _raise_not_found_error:
404
+ raise SystemExit(msg)
405
+ else:
406
+ logger.warning(
407
+ f"instance exists with id {isettings._id.hex}, but database is not"
408
+ " loadable: re-initializing"
409
+ )
410
+ return "instance-corrupted-or-deleted"
411
+ # this is for testing purposes only
412
+ if _TEST_FAILED_LOAD:
413
+ raise RuntimeError("Technical testing error.")
414
+
415
+ load_from_isettings(isettings, user=_user, write_settings=_write_settings)
416
+ if _reload_lamindb:
417
+ reset_django_module_variables()
418
+ if isettings.slug != "none/none":
419
+ logger.important(f"connected lamindb: {isettings.slug}")
420
+ except Exception as e:
421
+ if isettings is not None:
422
+ if _write_settings:
423
+ isettings._get_settings_file().unlink(missing_ok=True) # type: ignore
424
+ settings._instance_settings = None
425
+ raise e
426
+ return None
427
+
428
+
429
+ def get_owner_name_from_identifier(identifier: str) -> tuple[str, str]:
430
+ if "/" in identifier:
431
+ if identifier.startswith("https://lamin.ai/"):
432
+ identifier = identifier.replace("https://lamin.ai/", "")
433
+ split = identifier.split("/")
434
+ if len(split) > 2:
435
+ raise ValueError(
436
+ "The instance identifier needs to be 'owner/name', the instance name"
437
+ " (owner is current user) or the URL: https://lamin.ai/owner/name."
438
+ )
439
+ owner, name = split
440
+ else:
441
+ owner = settings.user.handle
442
+ name = identifier
443
+ return owner, name