lamindb_setup 1.8.3__py3-none-any.whl → 1.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +107 -107
- lamindb_setup/_cache.py +87 -87
- lamindb_setup/_check_setup.py +166 -166
- lamindb_setup/_connect_instance.py +328 -342
- lamindb_setup/_delete.py +141 -141
- lamindb_setup/_disconnect.py +32 -32
- lamindb_setup/_init_instance.py +440 -440
- lamindb_setup/_migrate.py +266 -259
- lamindb_setup/_register_instance.py +35 -35
- lamindb_setup/_schema_metadata.py +441 -441
- lamindb_setup/_set_managed_storage.py +70 -70
- lamindb_setup/_setup_user.py +133 -133
- lamindb_setup/core/__init__.py +21 -21
- lamindb_setup/core/_aws_options.py +223 -211
- lamindb_setup/core/_hub_client.py +248 -243
- lamindb_setup/core/_hub_core.py +665 -663
- lamindb_setup/core/_hub_crud.py +227 -227
- lamindb_setup/core/_private_django_api.py +83 -83
- lamindb_setup/core/_settings.py +377 -364
- lamindb_setup/core/_settings_instance.py +569 -568
- lamindb_setup/core/_settings_load.py +141 -141
- lamindb_setup/core/_settings_save.py +95 -95
- lamindb_setup/core/_settings_storage.py +429 -429
- lamindb_setup/core/_settings_store.py +91 -91
- lamindb_setup/core/_settings_user.py +55 -55
- lamindb_setup/core/_setup_bionty_sources.py +44 -44
- lamindb_setup/core/cloud_sqlite_locker.py +240 -240
- lamindb_setup/core/django.py +305 -291
- lamindb_setup/core/exceptions.py +1 -1
- lamindb_setup/core/hashing.py +134 -134
- lamindb_setup/core/types.py +1 -1
- lamindb_setup/core/upath.py +1013 -1009
- lamindb_setup/errors.py +70 -70
- lamindb_setup/types.py +20 -20
- {lamindb_setup-1.8.3.dist-info → lamindb_setup-1.9.1.dist-info}/METADATA +1 -1
- lamindb_setup-1.9.1.dist-info/RECORD +50 -0
- lamindb_setup-1.8.3.dist-info/RECORD +0 -50
- {lamindb_setup-1.8.3.dist-info → lamindb_setup-1.9.1.dist-info}/LICENSE +0 -0
- {lamindb_setup-1.8.3.dist-info → lamindb_setup-1.9.1.dist-info}/WHEEL +0 -0
lamindb_setup/_init_instance.py
CHANGED
|
@@ -1,440 +1,440 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import importlib
|
|
4
|
-
import os
|
|
5
|
-
import uuid
|
|
6
|
-
from typing import TYPE_CHECKING, Literal
|
|
7
|
-
from uuid import UUID
|
|
8
|
-
|
|
9
|
-
import click
|
|
10
|
-
from django.core.exceptions import FieldError
|
|
11
|
-
from django.db.utils import IntegrityError, OperationalError, ProgrammingError
|
|
12
|
-
from lamin_utils import logger
|
|
13
|
-
|
|
14
|
-
from ._disconnect import disconnect
|
|
15
|
-
from ._silence_loggers import silence_loggers
|
|
16
|
-
from .core import InstanceSettings
|
|
17
|
-
from .core._docs import doc_args
|
|
18
|
-
from .core._settings import settings
|
|
19
|
-
from .core._settings_instance import is_local_db_url
|
|
20
|
-
from .core._settings_storage import StorageSettings, init_storage
|
|
21
|
-
from .core.upath import UPath
|
|
22
|
-
from .errors import CannotSwitchDefaultInstance
|
|
23
|
-
|
|
24
|
-
if TYPE_CHECKING:
|
|
25
|
-
from pydantic import PostgresDsn
|
|
26
|
-
|
|
27
|
-
from .core._settings_user import UserSettings
|
|
28
|
-
from .types import UPathStr
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class InstanceNotCreated(click.ClickException):
|
|
32
|
-
def show(self, file=None):
|
|
33
|
-
pass
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def get_schema_module_name(module_name, raise_import_error: bool = True) -> str | None:
|
|
37
|
-
import importlib.util
|
|
38
|
-
|
|
39
|
-
if module_name == "core":
|
|
40
|
-
return "lamindb"
|
|
41
|
-
name_attempts = [f"lnschema_{module_name.replace('-', '_')}", module_name]
|
|
42
|
-
for name in name_attempts:
|
|
43
|
-
module_spec = importlib.util.find_spec(name)
|
|
44
|
-
if module_spec is not None:
|
|
45
|
-
return name
|
|
46
|
-
message = f"schema module '{module_name}' is not installed → resolve via `pip install {module_name}`"
|
|
47
|
-
if raise_import_error:
|
|
48
|
-
raise ImportError(message)
|
|
49
|
-
return None
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def register_storage_in_instance(ssettings: StorageSettings):
|
|
53
|
-
from lamindb.models import Storage
|
|
54
|
-
|
|
55
|
-
# how do we ensure that this function is only called passing
|
|
56
|
-
# the managing instance?
|
|
57
|
-
kwargs = {
|
|
58
|
-
"root": ssettings.root_as_str,
|
|
59
|
-
"type": ssettings.type,
|
|
60
|
-
"region": ssettings.region,
|
|
61
|
-
"instance_uid": ssettings.instance_uid,
|
|
62
|
-
"run": None,
|
|
63
|
-
"_skip_preparation": True,
|
|
64
|
-
}
|
|
65
|
-
if ssettings._uid is not None:
|
|
66
|
-
kwargs["uid"] = ssettings._uid
|
|
67
|
-
# this checks if the storage already exists under the hood
|
|
68
|
-
storage = Storage(**kwargs).save()
|
|
69
|
-
return storage
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def register_user(usettings: UserSettings, update_user: bool = True):
|
|
73
|
-
from lamindb.models import User
|
|
74
|
-
|
|
75
|
-
if not update_user and User.objects.filter(uid=usettings.uid).exists():
|
|
76
|
-
return
|
|
77
|
-
|
|
78
|
-
try:
|
|
79
|
-
# need to have try except because of integer primary key migration
|
|
80
|
-
user, created = User.objects.update_or_create(
|
|
81
|
-
uid=usettings.uid,
|
|
82
|
-
defaults={
|
|
83
|
-
"handle": usettings.handle,
|
|
84
|
-
"name": usettings.name,
|
|
85
|
-
},
|
|
86
|
-
)
|
|
87
|
-
# for users with only read access, except via ProgrammingError
|
|
88
|
-
# ProgrammingError: permission denied for table lamindb_user
|
|
89
|
-
# IntegrityError: when trying to update a user on a fine-grained access instance
|
|
90
|
-
except (OperationalError, FieldError, ProgrammingError, IntegrityError):
|
|
91
|
-
pass
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
def register_initial_records(isettings: InstanceSettings, usettings: UserSettings):
|
|
95
|
-
"""Register space, user & storage in DB."""
|
|
96
|
-
from django.db.utils import OperationalError
|
|
97
|
-
from lamindb.models import Branch, Space
|
|
98
|
-
|
|
99
|
-
try:
|
|
100
|
-
Space.objects.get_or_create(
|
|
101
|
-
uid=12 * "a",
|
|
102
|
-
name="all",
|
|
103
|
-
description="Every team & user with access to the instance has access.",
|
|
104
|
-
)
|
|
105
|
-
Branch.objects.get_or_create(
|
|
106
|
-
id=-1,
|
|
107
|
-
uid=12 * "t",
|
|
108
|
-
name="trash",
|
|
109
|
-
description="The trash.",
|
|
110
|
-
)
|
|
111
|
-
Branch.objects.get_or_create(
|
|
112
|
-
id=0,
|
|
113
|
-
uid=12 * "a",
|
|
114
|
-
name="archive",
|
|
115
|
-
description="The archive.",
|
|
116
|
-
)
|
|
117
|
-
Branch.objects.get_or_create(
|
|
118
|
-
uid=12 * "m",
|
|
119
|
-
name="main",
|
|
120
|
-
description="The main & default branch of the instance.",
|
|
121
|
-
)
|
|
122
|
-
register_user(usettings)
|
|
123
|
-
register_storage_in_instance(isettings.storage)
|
|
124
|
-
except OperationalError as error:
|
|
125
|
-
logger.warning(f"instance seems not set up ({error})")
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
ERROR_SQLITE_CACHE = """
|
|
129
|
-
Your cached local SQLite file exists, while your cloud SQLite file ({}) doesn't.
|
|
130
|
-
Either delete your cache ({}) or add it back to the cloud (if delete was accidental).
|
|
131
|
-
"""
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
def process_connect_response(
|
|
135
|
-
response: tuple | str, instance_identifier: str
|
|
136
|
-
) -> tuple[
|
|
137
|
-
UUID,
|
|
138
|
-
Literal[
|
|
139
|
-
"instance-corrupted-or-deleted", "account-not-exists", "instance-not-found"
|
|
140
|
-
],
|
|
141
|
-
]:
|
|
142
|
-
# for internal use when creating instances through CICD
|
|
143
|
-
if isinstance(response, tuple) and response[0] == "instance-corrupted-or-deleted":
|
|
144
|
-
hub_result = response[1]
|
|
145
|
-
instance_state = response[0]
|
|
146
|
-
instance_id = UUID(hub_result["id"])
|
|
147
|
-
else:
|
|
148
|
-
instance_id_str = os.getenv("LAMINDB_INSTANCE_ID_INIT")
|
|
149
|
-
if instance_id_str is None:
|
|
150
|
-
instance_id = uuid.uuid5(uuid.NAMESPACE_URL, instance_identifier)
|
|
151
|
-
else:
|
|
152
|
-
instance_id = UUID(instance_id_str)
|
|
153
|
-
instance_state = response
|
|
154
|
-
return instance_id, instance_state
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
def process_modules_arg(modules: str | None = None) -> str:
|
|
158
|
-
if modules is None or modules == "":
|
|
159
|
-
return ""
|
|
160
|
-
# currently no actual validation, can add back if we see a need
|
|
161
|
-
# the following just strips white spaces
|
|
162
|
-
to_be_validated = [s.strip() for s in modules.split(",")]
|
|
163
|
-
return ",".join(to_be_validated)
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
def validate_init_args(
|
|
167
|
-
*,
|
|
168
|
-
storage: UPathStr,
|
|
169
|
-
name: str | None = None,
|
|
170
|
-
db: PostgresDsn | None = None,
|
|
171
|
-
modules: str | None = None,
|
|
172
|
-
_test: bool = False,
|
|
173
|
-
_write_settings: bool = True,
|
|
174
|
-
_user: UserSettings | None = None,
|
|
175
|
-
) -> tuple[
|
|
176
|
-
str,
|
|
177
|
-
UUID,
|
|
178
|
-
Literal[
|
|
179
|
-
"connected",
|
|
180
|
-
"instance-corrupted-or-deleted",
|
|
181
|
-
"account-not-exists",
|
|
182
|
-
"instance-not-found",
|
|
183
|
-
],
|
|
184
|
-
str,
|
|
185
|
-
]:
|
|
186
|
-
from ._connect_instance import connect
|
|
187
|
-
|
|
188
|
-
if storage is None:
|
|
189
|
-
raise SystemExit("✗ `storage` argument can't be `None`")
|
|
190
|
-
# should be called as the first thing
|
|
191
|
-
name_str = infer_instance_name(storage=storage, name=name, db=db)
|
|
192
|
-
owner_str = settings.user.handle if _user is None else _user.handle
|
|
193
|
-
# test whether instance exists by trying to load it
|
|
194
|
-
instance_slug = f"{owner_str}/{name_str}"
|
|
195
|
-
response = connect(
|
|
196
|
-
instance_slug,
|
|
197
|
-
_db=db,
|
|
198
|
-
_raise_not_found_error=False,
|
|
199
|
-
_test=_test,
|
|
200
|
-
_write_settings=_write_settings,
|
|
201
|
-
_user=_user,
|
|
202
|
-
)
|
|
203
|
-
instance_id: UUID
|
|
204
|
-
instance_state: Literal[
|
|
205
|
-
"connected",
|
|
206
|
-
"instance-corrupted-or-deleted",
|
|
207
|
-
"account-not-exists",
|
|
208
|
-
"instance-not-found",
|
|
209
|
-
]
|
|
210
|
-
if response is None:
|
|
211
|
-
instance_state, instance_id = "connected", settings.instance._id
|
|
212
|
-
else:
|
|
213
|
-
instance_id, instance_state = process_connect_response(response, instance_slug)
|
|
214
|
-
modules = process_modules_arg(modules)
|
|
215
|
-
return name_str, instance_id, instance_state, instance_slug
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
MESSAGE_CANNOT_SWITCH_DEFAULT_INSTANCE = """
|
|
219
|
-
You cannot write to different instances in the same Python session.
|
|
220
|
-
|
|
221
|
-
Do you want to read from another instance via `SQLRecord.using()`? For example:
|
|
222
|
-
|
|
223
|
-
ln.Artifact.using("laminlabs/cellxgene").filter()
|
|
224
|
-
|
|
225
|
-
Or do you want to switch off auto-connect via `lamin settings set auto-connect false`?
|
|
226
|
-
"""
|
|
227
|
-
|
|
228
|
-
DOC_STORAGE_ARG = "A local or remote folder (`'s3://...'` or `'gs://...'`). Defaults to current working directory."
|
|
229
|
-
DOC_INSTANCE_NAME = (
|
|
230
|
-
"Instance name. If not passed, it will equal the folder name passed to `storage`."
|
|
231
|
-
)
|
|
232
|
-
DOC_DB = "Database connection URL. Defaults to `None`, which implies an SQLite file in the storage location."
|
|
233
|
-
DOC_MODULES = "Comma-separated string of schema modules."
|
|
234
|
-
DOC_LOW_LEVEL_KWARGS = "Keyword arguments for low-level control."
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
@doc_args(DOC_STORAGE_ARG, DOC_INSTANCE_NAME, DOC_DB, DOC_MODULES, DOC_LOW_LEVEL_KWARGS)
|
|
238
|
-
def init(
|
|
239
|
-
*,
|
|
240
|
-
storage: UPathStr = ".",
|
|
241
|
-
name: str | None = None,
|
|
242
|
-
db: PostgresDsn | None = None,
|
|
243
|
-
modules: str | None = None,
|
|
244
|
-
**kwargs,
|
|
245
|
-
) -> None:
|
|
246
|
-
"""Init a LaminDB instance.
|
|
247
|
-
|
|
248
|
-
Args:
|
|
249
|
-
storage: {}
|
|
250
|
-
name: {}
|
|
251
|
-
db: {}
|
|
252
|
-
modules: {}
|
|
253
|
-
**kwargs: {}
|
|
254
|
-
"""
|
|
255
|
-
isettings = None
|
|
256
|
-
ssettings = None
|
|
257
|
-
|
|
258
|
-
_write_settings: bool = kwargs.get("_write_settings", True)
|
|
259
|
-
if modules is None:
|
|
260
|
-
modules = kwargs.get("schema", None)
|
|
261
|
-
_test: bool = kwargs.get("_test", False)
|
|
262
|
-
|
|
263
|
-
# use this user instead of settings.user
|
|
264
|
-
# contains access_token
|
|
265
|
-
_user: UserSettings | None = kwargs.get("_user", None)
|
|
266
|
-
user_handle: str = settings.user.handle if _user is None else _user.handle
|
|
267
|
-
user__uuid: UUID = settings.user._uuid if _user is None else _user._uuid # type: ignore
|
|
268
|
-
access_token: str | None = None if _user is None else _user.access_token
|
|
269
|
-
|
|
270
|
-
try:
|
|
271
|
-
silence_loggers()
|
|
272
|
-
from ._check_setup import _check_instance_setup
|
|
273
|
-
|
|
274
|
-
if _check_instance_setup() and not _test:
|
|
275
|
-
raise CannotSwitchDefaultInstance(MESSAGE_CANNOT_SWITCH_DEFAULT_INSTANCE)
|
|
276
|
-
elif _write_settings:
|
|
277
|
-
disconnect(mute=True)
|
|
278
|
-
from .core._hub_core import init_instance_hub
|
|
279
|
-
|
|
280
|
-
name_str, instance_id, instance_state, _ = validate_init_args(
|
|
281
|
-
storage=storage,
|
|
282
|
-
name=name,
|
|
283
|
-
db=db,
|
|
284
|
-
modules=modules,
|
|
285
|
-
_test=_test,
|
|
286
|
-
_write_settings=_write_settings,
|
|
287
|
-
_user=_user, # will get from settings.user if _user is None
|
|
288
|
-
)
|
|
289
|
-
if instance_state == "connected":
|
|
290
|
-
if _write_settings:
|
|
291
|
-
settings.auto_connect = True # we can also debate this switch here
|
|
292
|
-
return None
|
|
293
|
-
prevent_register_hub = is_local_db_url(db) if db is not None else False
|
|
294
|
-
ssettings, _ = init_storage(
|
|
295
|
-
storage,
|
|
296
|
-
instance_id=instance_id,
|
|
297
|
-
instance_slug=f"{user_handle}/{name_str}",
|
|
298
|
-
init_instance=True,
|
|
299
|
-
prevent_register_hub=prevent_register_hub,
|
|
300
|
-
created_by=user__uuid,
|
|
301
|
-
access_token=access_token,
|
|
302
|
-
)
|
|
303
|
-
isettings = InstanceSettings(
|
|
304
|
-
id=instance_id, # type: ignore
|
|
305
|
-
owner=user_handle,
|
|
306
|
-
name=name_str,
|
|
307
|
-
storage=ssettings,
|
|
308
|
-
db=db,
|
|
309
|
-
modules=modules,
|
|
310
|
-
uid=ssettings.uid,
|
|
311
|
-
# to lock passed user in isettings._cloud_sqlite_locker.lock()
|
|
312
|
-
_locker_user=_user, # only has effect if cloud sqlite
|
|
313
|
-
)
|
|
314
|
-
register_on_hub = (
|
|
315
|
-
isettings.is_remote and instance_state != "instance-corrupted-or-deleted"
|
|
316
|
-
)
|
|
317
|
-
if register_on_hub:
|
|
318
|
-
# can't register the instance in the hub
|
|
319
|
-
# if storage is not in the hub
|
|
320
|
-
# raise the exception and initiate cleanups
|
|
321
|
-
if not isettings.storage.is_on_hub:
|
|
322
|
-
raise InstanceNotCreated(
|
|
323
|
-
"Unable to create the instance because failed to register the storage."
|
|
324
|
-
)
|
|
325
|
-
init_instance_hub(
|
|
326
|
-
isettings, account_id=user__uuid, access_token=access_token
|
|
327
|
-
)
|
|
328
|
-
validate_sqlite_state(isettings)
|
|
329
|
-
# why call it here if it is also called in load_from_isettings?
|
|
330
|
-
isettings._persist(write_to_disk=_write_settings)
|
|
331
|
-
if _test:
|
|
332
|
-
return None
|
|
333
|
-
isettings._init_db()
|
|
334
|
-
load_from_isettings(
|
|
335
|
-
isettings, init=True, user=_user, write_settings=_write_settings
|
|
336
|
-
)
|
|
337
|
-
if _write_settings and isettings._is_cloud_sqlite:
|
|
338
|
-
isettings._cloud_sqlite_locker.lock()
|
|
339
|
-
logger.warning(
|
|
340
|
-
"locked instance (to unlock and push changes to the cloud SQLite file,"
|
|
341
|
-
" call: lamin disconnect)"
|
|
342
|
-
)
|
|
343
|
-
if register_on_hub and isettings.dialect != "sqlite":
|
|
344
|
-
from ._schema_metadata import update_schema_in_hub
|
|
345
|
-
|
|
346
|
-
update_schema_in_hub(access_token=access_token)
|
|
347
|
-
if _write_settings:
|
|
348
|
-
settings.auto_connect = True
|
|
349
|
-
importlib.reload(importlib.import_module("lamindb"))
|
|
350
|
-
logger.important(f"initialized lamindb: {isettings.slug}")
|
|
351
|
-
except Exception as e:
|
|
352
|
-
from ._delete import delete_by_isettings
|
|
353
|
-
from .core._hub_core import delete_instance_record, delete_storage_record
|
|
354
|
-
|
|
355
|
-
if isettings is not None:
|
|
356
|
-
if _write_settings:
|
|
357
|
-
delete_by_isettings(isettings)
|
|
358
|
-
else:
|
|
359
|
-
settings._instance_settings = None
|
|
360
|
-
if (
|
|
361
|
-
ssettings is not None
|
|
362
|
-
and (user_handle != "anonymous" or access_token is not None)
|
|
363
|
-
and ssettings.is_on_hub
|
|
364
|
-
):
|
|
365
|
-
delete_storage_record(ssettings, access_token=access_token) # type: ignore
|
|
366
|
-
if isettings is not None:
|
|
367
|
-
if (
|
|
368
|
-
user_handle != "anonymous" or access_token is not None
|
|
369
|
-
) and isettings.is_on_hub:
|
|
370
|
-
delete_instance_record(isettings._id, access_token=access_token)
|
|
371
|
-
raise e
|
|
372
|
-
return None
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
def load_from_isettings(
|
|
376
|
-
isettings: InstanceSettings,
|
|
377
|
-
*,
|
|
378
|
-
init: bool = False,
|
|
379
|
-
user: UserSettings | None = None,
|
|
380
|
-
write_settings: bool = True,
|
|
381
|
-
) -> None:
|
|
382
|
-
from .core._setup_bionty_sources import write_bionty_sources
|
|
383
|
-
|
|
384
|
-
user = settings.user if user is None else user
|
|
385
|
-
|
|
386
|
-
if init:
|
|
387
|
-
# during init space, user and storage need to be registered
|
|
388
|
-
register_initial_records(isettings, user)
|
|
389
|
-
write_bionty_sources(isettings)
|
|
390
|
-
isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
|
|
391
|
-
else:
|
|
392
|
-
# when loading, django is already set up
|
|
393
|
-
#
|
|
394
|
-
# only register user if the instance is connected
|
|
395
|
-
# for the first time in an environment
|
|
396
|
-
# this is our best proxy for that the user might not
|
|
397
|
-
# yet be registered
|
|
398
|
-
if not isettings._get_settings_file().exists():
|
|
399
|
-
# do not try to update the user on fine grained access instances
|
|
400
|
-
# this is blocked anyways, only select and insert are allowed
|
|
401
|
-
register_user(user, update_user=not isettings._fine_grained_access)
|
|
402
|
-
isettings._persist(write_to_disk=write_settings)
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
def validate_sqlite_state(isettings: InstanceSettings) -> None:
|
|
406
|
-
if isettings._is_cloud_sqlite:
|
|
407
|
-
if (
|
|
408
|
-
# it's important to first evaluate the existence check
|
|
409
|
-
# for the local sqlite file because it doesn't need a network
|
|
410
|
-
# request
|
|
411
|
-
isettings._sqlite_file_local.exists()
|
|
412
|
-
and not isettings._sqlite_file.exists()
|
|
413
|
-
):
|
|
414
|
-
raise RuntimeError(
|
|
415
|
-
ERROR_SQLITE_CACHE.format(
|
|
416
|
-
isettings._sqlite_file, isettings._sqlite_file_local
|
|
417
|
-
)
|
|
418
|
-
)
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
def infer_instance_name(
|
|
422
|
-
*,
|
|
423
|
-
storage: UPathStr,
|
|
424
|
-
name: str | None = None,
|
|
425
|
-
db: PostgresDsn | None = None,
|
|
426
|
-
) -> str:
|
|
427
|
-
if name is not None:
|
|
428
|
-
if "/" in name:
|
|
429
|
-
raise ValueError("Invalid instance name: '/' delimiter not allowed.")
|
|
430
|
-
return name
|
|
431
|
-
if db is not None:
|
|
432
|
-
logger.warning("using the sql database name for the instance name")
|
|
433
|
-
# this isn't a great way to access the db name
|
|
434
|
-
# could use LaminDsn instead
|
|
435
|
-
return str(db).split("/")[-1]
|
|
436
|
-
if storage == "create-s3":
|
|
437
|
-
raise ValueError("pass name to init if storage = 'create-s3'")
|
|
438
|
-
storage_path = UPath(storage).resolve()
|
|
439
|
-
name = storage_path.path.rstrip("/").split("/")[-1]
|
|
440
|
-
return name.lower()
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import importlib
|
|
4
|
+
import os
|
|
5
|
+
import uuid
|
|
6
|
+
from typing import TYPE_CHECKING, Literal
|
|
7
|
+
from uuid import UUID
|
|
8
|
+
|
|
9
|
+
import click
|
|
10
|
+
from django.core.exceptions import FieldError
|
|
11
|
+
from django.db.utils import IntegrityError, OperationalError, ProgrammingError
|
|
12
|
+
from lamin_utils import logger
|
|
13
|
+
|
|
14
|
+
from ._disconnect import disconnect
|
|
15
|
+
from ._silence_loggers import silence_loggers
|
|
16
|
+
from .core import InstanceSettings
|
|
17
|
+
from .core._docs import doc_args
|
|
18
|
+
from .core._settings import settings
|
|
19
|
+
from .core._settings_instance import is_local_db_url
|
|
20
|
+
from .core._settings_storage import StorageSettings, init_storage
|
|
21
|
+
from .core.upath import UPath
|
|
22
|
+
from .errors import CannotSwitchDefaultInstance
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from pydantic import PostgresDsn
|
|
26
|
+
|
|
27
|
+
from .core._settings_user import UserSettings
|
|
28
|
+
from .types import UPathStr
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class InstanceNotCreated(click.ClickException):
|
|
32
|
+
def show(self, file=None):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_schema_module_name(module_name, raise_import_error: bool = True) -> str | None:
|
|
37
|
+
import importlib.util
|
|
38
|
+
|
|
39
|
+
if module_name == "core":
|
|
40
|
+
return "lamindb"
|
|
41
|
+
name_attempts = [f"lnschema_{module_name.replace('-', '_')}", module_name]
|
|
42
|
+
for name in name_attempts:
|
|
43
|
+
module_spec = importlib.util.find_spec(name)
|
|
44
|
+
if module_spec is not None:
|
|
45
|
+
return name
|
|
46
|
+
message = f"schema module '{module_name}' is not installed → resolve via `pip install {module_name}`"
|
|
47
|
+
if raise_import_error:
|
|
48
|
+
raise ImportError(message)
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def register_storage_in_instance(ssettings: StorageSettings):
|
|
53
|
+
from lamindb.models import Storage
|
|
54
|
+
|
|
55
|
+
# how do we ensure that this function is only called passing
|
|
56
|
+
# the managing instance?
|
|
57
|
+
kwargs = {
|
|
58
|
+
"root": ssettings.root_as_str,
|
|
59
|
+
"type": ssettings.type,
|
|
60
|
+
"region": ssettings.region,
|
|
61
|
+
"instance_uid": ssettings.instance_uid,
|
|
62
|
+
"run": None,
|
|
63
|
+
"_skip_preparation": True,
|
|
64
|
+
}
|
|
65
|
+
if ssettings._uid is not None:
|
|
66
|
+
kwargs["uid"] = ssettings._uid
|
|
67
|
+
# this checks if the storage already exists under the hood
|
|
68
|
+
storage = Storage(**kwargs).save()
|
|
69
|
+
return storage
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def register_user(usettings: UserSettings, update_user: bool = True):
|
|
73
|
+
from lamindb.models import User
|
|
74
|
+
|
|
75
|
+
if not update_user and User.objects.filter(uid=usettings.uid).exists():
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
# need to have try except because of integer primary key migration
|
|
80
|
+
user, created = User.objects.update_or_create(
|
|
81
|
+
uid=usettings.uid,
|
|
82
|
+
defaults={
|
|
83
|
+
"handle": usettings.handle,
|
|
84
|
+
"name": usettings.name,
|
|
85
|
+
},
|
|
86
|
+
)
|
|
87
|
+
# for users with only read access, except via ProgrammingError
|
|
88
|
+
# ProgrammingError: permission denied for table lamindb_user
|
|
89
|
+
# IntegrityError: when trying to update a user on a fine-grained access instance
|
|
90
|
+
except (OperationalError, FieldError, ProgrammingError, IntegrityError):
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def register_initial_records(isettings: InstanceSettings, usettings: UserSettings):
|
|
95
|
+
"""Register space, user & storage in DB."""
|
|
96
|
+
from django.db.utils import OperationalError
|
|
97
|
+
from lamindb.models import Branch, Space
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
Space.objects.get_or_create(
|
|
101
|
+
uid=12 * "a",
|
|
102
|
+
name="all",
|
|
103
|
+
description="Every team & user with access to the instance has access.",
|
|
104
|
+
)
|
|
105
|
+
Branch.objects.get_or_create(
|
|
106
|
+
id=-1,
|
|
107
|
+
uid=12 * "t",
|
|
108
|
+
name="trash",
|
|
109
|
+
description="The trash.",
|
|
110
|
+
)
|
|
111
|
+
Branch.objects.get_or_create(
|
|
112
|
+
id=0,
|
|
113
|
+
uid=12 * "a",
|
|
114
|
+
name="archive",
|
|
115
|
+
description="The archive.",
|
|
116
|
+
)
|
|
117
|
+
Branch.objects.get_or_create(
|
|
118
|
+
uid=12 * "m",
|
|
119
|
+
name="main",
|
|
120
|
+
description="The main & default branch of the instance.",
|
|
121
|
+
)
|
|
122
|
+
register_user(usettings)
|
|
123
|
+
register_storage_in_instance(isettings.storage)
|
|
124
|
+
except OperationalError as error:
|
|
125
|
+
logger.warning(f"instance seems not set up ({error})")
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
ERROR_SQLITE_CACHE = """
|
|
129
|
+
Your cached local SQLite file exists, while your cloud SQLite file ({}) doesn't.
|
|
130
|
+
Either delete your cache ({}) or add it back to the cloud (if delete was accidental).
|
|
131
|
+
"""
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def process_connect_response(
|
|
135
|
+
response: tuple | str, instance_identifier: str
|
|
136
|
+
) -> tuple[
|
|
137
|
+
UUID,
|
|
138
|
+
Literal[
|
|
139
|
+
"instance-corrupted-or-deleted", "account-not-exists", "instance-not-found"
|
|
140
|
+
],
|
|
141
|
+
]:
|
|
142
|
+
# for internal use when creating instances through CICD
|
|
143
|
+
if isinstance(response, tuple) and response[0] == "instance-corrupted-or-deleted":
|
|
144
|
+
hub_result = response[1]
|
|
145
|
+
instance_state = response[0]
|
|
146
|
+
instance_id = UUID(hub_result["id"])
|
|
147
|
+
else:
|
|
148
|
+
instance_id_str = os.getenv("LAMINDB_INSTANCE_ID_INIT")
|
|
149
|
+
if instance_id_str is None:
|
|
150
|
+
instance_id = uuid.uuid5(uuid.NAMESPACE_URL, instance_identifier)
|
|
151
|
+
else:
|
|
152
|
+
instance_id = UUID(instance_id_str)
|
|
153
|
+
instance_state = response
|
|
154
|
+
return instance_id, instance_state
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def process_modules_arg(modules: str | None = None) -> str:
|
|
158
|
+
if modules is None or modules == "":
|
|
159
|
+
return ""
|
|
160
|
+
# currently no actual validation, can add back if we see a need
|
|
161
|
+
# the following just strips white spaces
|
|
162
|
+
to_be_validated = [s.strip() for s in modules.split(",")]
|
|
163
|
+
return ",".join(to_be_validated)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def validate_init_args(
|
|
167
|
+
*,
|
|
168
|
+
storage: UPathStr,
|
|
169
|
+
name: str | None = None,
|
|
170
|
+
db: PostgresDsn | None = None,
|
|
171
|
+
modules: str | None = None,
|
|
172
|
+
_test: bool = False,
|
|
173
|
+
_write_settings: bool = True,
|
|
174
|
+
_user: UserSettings | None = None,
|
|
175
|
+
) -> tuple[
|
|
176
|
+
str,
|
|
177
|
+
UUID,
|
|
178
|
+
Literal[
|
|
179
|
+
"connected",
|
|
180
|
+
"instance-corrupted-or-deleted",
|
|
181
|
+
"account-not-exists",
|
|
182
|
+
"instance-not-found",
|
|
183
|
+
],
|
|
184
|
+
str,
|
|
185
|
+
]:
|
|
186
|
+
from ._connect_instance import connect
|
|
187
|
+
|
|
188
|
+
if storage is None:
|
|
189
|
+
raise SystemExit("✗ `storage` argument can't be `None`")
|
|
190
|
+
# should be called as the first thing
|
|
191
|
+
name_str = infer_instance_name(storage=storage, name=name, db=db)
|
|
192
|
+
owner_str = settings.user.handle if _user is None else _user.handle
|
|
193
|
+
# test whether instance exists by trying to load it
|
|
194
|
+
instance_slug = f"{owner_str}/{name_str}"
|
|
195
|
+
response = connect(
|
|
196
|
+
instance_slug,
|
|
197
|
+
_db=db,
|
|
198
|
+
_raise_not_found_error=False,
|
|
199
|
+
_test=_test,
|
|
200
|
+
_write_settings=_write_settings,
|
|
201
|
+
_user=_user,
|
|
202
|
+
)
|
|
203
|
+
instance_id: UUID
|
|
204
|
+
instance_state: Literal[
|
|
205
|
+
"connected",
|
|
206
|
+
"instance-corrupted-or-deleted",
|
|
207
|
+
"account-not-exists",
|
|
208
|
+
"instance-not-found",
|
|
209
|
+
]
|
|
210
|
+
if response is None:
|
|
211
|
+
instance_state, instance_id = "connected", settings.instance._id
|
|
212
|
+
else:
|
|
213
|
+
instance_id, instance_state = process_connect_response(response, instance_slug)
|
|
214
|
+
modules = process_modules_arg(modules)
|
|
215
|
+
return name_str, instance_id, instance_state, instance_slug
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
MESSAGE_CANNOT_SWITCH_DEFAULT_INSTANCE = """
|
|
219
|
+
You cannot write to different instances in the same Python session.
|
|
220
|
+
|
|
221
|
+
Do you want to read from another instance via `SQLRecord.using()`? For example:
|
|
222
|
+
|
|
223
|
+
ln.Artifact.using("laminlabs/cellxgene").filter()
|
|
224
|
+
|
|
225
|
+
Or do you want to switch off auto-connect via `lamin settings set auto-connect false`?
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
DOC_STORAGE_ARG = "A local or remote folder (`'s3://...'` or `'gs://...'`). Defaults to current working directory."
|
|
229
|
+
DOC_INSTANCE_NAME = (
|
|
230
|
+
"Instance name. If not passed, it will equal the folder name passed to `storage`."
|
|
231
|
+
)
|
|
232
|
+
DOC_DB = "Database connection URL. Defaults to `None`, which implies an SQLite file in the storage location."
|
|
233
|
+
DOC_MODULES = "Comma-separated string of schema modules."
|
|
234
|
+
DOC_LOW_LEVEL_KWARGS = "Keyword arguments for low-level control."
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@doc_args(DOC_STORAGE_ARG, DOC_INSTANCE_NAME, DOC_DB, DOC_MODULES, DOC_LOW_LEVEL_KWARGS)
|
|
238
|
+
def init(
|
|
239
|
+
*,
|
|
240
|
+
storage: UPathStr = ".",
|
|
241
|
+
name: str | None = None,
|
|
242
|
+
db: PostgresDsn | None = None,
|
|
243
|
+
modules: str | None = None,
|
|
244
|
+
**kwargs,
|
|
245
|
+
) -> None:
|
|
246
|
+
"""Init a LaminDB instance.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
storage: {}
|
|
250
|
+
name: {}
|
|
251
|
+
db: {}
|
|
252
|
+
modules: {}
|
|
253
|
+
**kwargs: {}
|
|
254
|
+
"""
|
|
255
|
+
isettings = None
|
|
256
|
+
ssettings = None
|
|
257
|
+
|
|
258
|
+
_write_settings: bool = kwargs.get("_write_settings", True)
|
|
259
|
+
if modules is None:
|
|
260
|
+
modules = kwargs.get("schema", None)
|
|
261
|
+
_test: bool = kwargs.get("_test", False)
|
|
262
|
+
|
|
263
|
+
# use this user instead of settings.user
|
|
264
|
+
# contains access_token
|
|
265
|
+
_user: UserSettings | None = kwargs.get("_user", None)
|
|
266
|
+
user_handle: str = settings.user.handle if _user is None else _user.handle
|
|
267
|
+
user__uuid: UUID = settings.user._uuid if _user is None else _user._uuid # type: ignore
|
|
268
|
+
access_token: str | None = None if _user is None else _user.access_token
|
|
269
|
+
|
|
270
|
+
try:
|
|
271
|
+
silence_loggers()
|
|
272
|
+
from ._check_setup import _check_instance_setup
|
|
273
|
+
|
|
274
|
+
if _check_instance_setup() and not _test:
|
|
275
|
+
raise CannotSwitchDefaultInstance(MESSAGE_CANNOT_SWITCH_DEFAULT_INSTANCE)
|
|
276
|
+
elif _write_settings:
|
|
277
|
+
disconnect(mute=True)
|
|
278
|
+
from .core._hub_core import init_instance_hub
|
|
279
|
+
|
|
280
|
+
name_str, instance_id, instance_state, _ = validate_init_args(
|
|
281
|
+
storage=storage,
|
|
282
|
+
name=name,
|
|
283
|
+
db=db,
|
|
284
|
+
modules=modules,
|
|
285
|
+
_test=_test,
|
|
286
|
+
_write_settings=_write_settings,
|
|
287
|
+
_user=_user, # will get from settings.user if _user is None
|
|
288
|
+
)
|
|
289
|
+
if instance_state == "connected":
|
|
290
|
+
if _write_settings:
|
|
291
|
+
settings.auto_connect = True # we can also debate this switch here
|
|
292
|
+
return None
|
|
293
|
+
prevent_register_hub = is_local_db_url(db) if db is not None else False
|
|
294
|
+
ssettings, _ = init_storage(
|
|
295
|
+
storage,
|
|
296
|
+
instance_id=instance_id,
|
|
297
|
+
instance_slug=f"{user_handle}/{name_str}",
|
|
298
|
+
init_instance=True,
|
|
299
|
+
prevent_register_hub=prevent_register_hub,
|
|
300
|
+
created_by=user__uuid,
|
|
301
|
+
access_token=access_token,
|
|
302
|
+
)
|
|
303
|
+
isettings = InstanceSettings(
|
|
304
|
+
id=instance_id, # type: ignore
|
|
305
|
+
owner=user_handle,
|
|
306
|
+
name=name_str,
|
|
307
|
+
storage=ssettings,
|
|
308
|
+
db=db,
|
|
309
|
+
modules=modules,
|
|
310
|
+
uid=ssettings.uid,
|
|
311
|
+
# to lock passed user in isettings._cloud_sqlite_locker.lock()
|
|
312
|
+
_locker_user=_user, # only has effect if cloud sqlite
|
|
313
|
+
)
|
|
314
|
+
register_on_hub = (
|
|
315
|
+
isettings.is_remote and instance_state != "instance-corrupted-or-deleted"
|
|
316
|
+
)
|
|
317
|
+
if register_on_hub:
|
|
318
|
+
# can't register the instance in the hub
|
|
319
|
+
# if storage is not in the hub
|
|
320
|
+
# raise the exception and initiate cleanups
|
|
321
|
+
if not isettings.storage.is_on_hub:
|
|
322
|
+
raise InstanceNotCreated(
|
|
323
|
+
"Unable to create the instance because failed to register the storage."
|
|
324
|
+
)
|
|
325
|
+
init_instance_hub(
|
|
326
|
+
isettings, account_id=user__uuid, access_token=access_token
|
|
327
|
+
)
|
|
328
|
+
validate_sqlite_state(isettings)
|
|
329
|
+
# why call it here if it is also called in load_from_isettings?
|
|
330
|
+
isettings._persist(write_to_disk=_write_settings)
|
|
331
|
+
if _test:
|
|
332
|
+
return None
|
|
333
|
+
isettings._init_db()
|
|
334
|
+
load_from_isettings(
|
|
335
|
+
isettings, init=True, user=_user, write_settings=_write_settings
|
|
336
|
+
)
|
|
337
|
+
if _write_settings and isettings._is_cloud_sqlite:
|
|
338
|
+
isettings._cloud_sqlite_locker.lock()
|
|
339
|
+
logger.warning(
|
|
340
|
+
"locked instance (to unlock and push changes to the cloud SQLite file,"
|
|
341
|
+
" call: lamin disconnect)"
|
|
342
|
+
)
|
|
343
|
+
if register_on_hub and isettings.dialect != "sqlite":
|
|
344
|
+
from ._schema_metadata import update_schema_in_hub
|
|
345
|
+
|
|
346
|
+
update_schema_in_hub(access_token=access_token)
|
|
347
|
+
if _write_settings:
|
|
348
|
+
settings.auto_connect = True
|
|
349
|
+
importlib.reload(importlib.import_module("lamindb"))
|
|
350
|
+
logger.important(f"initialized lamindb: {isettings.slug}")
|
|
351
|
+
except Exception as e:
|
|
352
|
+
from ._delete import delete_by_isettings
|
|
353
|
+
from .core._hub_core import delete_instance_record, delete_storage_record
|
|
354
|
+
|
|
355
|
+
if isettings is not None:
|
|
356
|
+
if _write_settings:
|
|
357
|
+
delete_by_isettings(isettings)
|
|
358
|
+
else:
|
|
359
|
+
settings._instance_settings = None
|
|
360
|
+
if (
|
|
361
|
+
ssettings is not None
|
|
362
|
+
and (user_handle != "anonymous" or access_token is not None)
|
|
363
|
+
and ssettings.is_on_hub
|
|
364
|
+
):
|
|
365
|
+
delete_storage_record(ssettings, access_token=access_token) # type: ignore
|
|
366
|
+
if isettings is not None:
|
|
367
|
+
if (
|
|
368
|
+
user_handle != "anonymous" or access_token is not None
|
|
369
|
+
) and isettings.is_on_hub:
|
|
370
|
+
delete_instance_record(isettings._id, access_token=access_token)
|
|
371
|
+
raise e
|
|
372
|
+
return None
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def load_from_isettings(
|
|
376
|
+
isettings: InstanceSettings,
|
|
377
|
+
*,
|
|
378
|
+
init: bool = False,
|
|
379
|
+
user: UserSettings | None = None,
|
|
380
|
+
write_settings: bool = True,
|
|
381
|
+
) -> None:
|
|
382
|
+
from .core._setup_bionty_sources import write_bionty_sources
|
|
383
|
+
|
|
384
|
+
user = settings.user if user is None else user
|
|
385
|
+
|
|
386
|
+
if init:
|
|
387
|
+
# during init space, user and storage need to be registered
|
|
388
|
+
register_initial_records(isettings, user)
|
|
389
|
+
write_bionty_sources(isettings)
|
|
390
|
+
isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
|
|
391
|
+
else:
|
|
392
|
+
# when loading, django is already set up
|
|
393
|
+
#
|
|
394
|
+
# only register user if the instance is connected
|
|
395
|
+
# for the first time in an environment
|
|
396
|
+
# this is our best proxy for that the user might not
|
|
397
|
+
# yet be registered
|
|
398
|
+
if not isettings._get_settings_file().exists():
|
|
399
|
+
# do not try to update the user on fine grained access instances
|
|
400
|
+
# this is blocked anyways, only select and insert are allowed
|
|
401
|
+
register_user(user, update_user=not isettings._fine_grained_access)
|
|
402
|
+
isettings._persist(write_to_disk=write_settings)
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def validate_sqlite_state(isettings: InstanceSettings) -> None:
|
|
406
|
+
if isettings._is_cloud_sqlite:
|
|
407
|
+
if (
|
|
408
|
+
# it's important to first evaluate the existence check
|
|
409
|
+
# for the local sqlite file because it doesn't need a network
|
|
410
|
+
# request
|
|
411
|
+
isettings._sqlite_file_local.exists()
|
|
412
|
+
and not isettings._sqlite_file.exists()
|
|
413
|
+
):
|
|
414
|
+
raise RuntimeError(
|
|
415
|
+
ERROR_SQLITE_CACHE.format(
|
|
416
|
+
isettings._sqlite_file, isettings._sqlite_file_local
|
|
417
|
+
)
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def infer_instance_name(
|
|
422
|
+
*,
|
|
423
|
+
storage: UPathStr,
|
|
424
|
+
name: str | None = None,
|
|
425
|
+
db: PostgresDsn | None = None,
|
|
426
|
+
) -> str:
|
|
427
|
+
if name is not None:
|
|
428
|
+
if "/" in name:
|
|
429
|
+
raise ValueError("Invalid instance name: '/' delimiter not allowed.")
|
|
430
|
+
return name
|
|
431
|
+
if db is not None:
|
|
432
|
+
logger.warning("using the sql database name for the instance name")
|
|
433
|
+
# this isn't a great way to access the db name
|
|
434
|
+
# could use LaminDsn instead
|
|
435
|
+
return str(db).split("/")[-1]
|
|
436
|
+
if storage == "create-s3":
|
|
437
|
+
raise ValueError("pass name to init if storage = 'create-s3'")
|
|
438
|
+
storage_path = UPath(storage).resolve()
|
|
439
|
+
name = storage_path.path.rstrip("/").split("/")[-1]
|
|
440
|
+
return name.lower()
|