lamindb_setup 1.16.0__py3-none-any.whl → 1.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +6 -3
- lamindb_setup/_check_setup.py +24 -85
- lamindb_setup/_connect_instance.py +23 -33
- lamindb_setup/_delete.py +10 -5
- lamindb_setup/_disconnect.py +12 -9
- lamindb_setup/_init_instance.py +0 -1
- lamindb_setup/_migrate.py +56 -30
- lamindb_setup/_set_managed_storage.py +3 -0
- lamindb_setup/_setup_user.py +8 -5
- lamindb_setup/_silence_loggers.py +2 -0
- lamindb_setup/core/__init__.py +0 -3
- lamindb_setup/core/_aws_options.py +18 -8
- lamindb_setup/core/_clone.py +1 -125
- lamindb_setup/core/_hub_client.py +1 -2
- lamindb_setup/core/_hub_core.py +9 -6
- lamindb_setup/core/_private_django_api.py +1 -3
- lamindb_setup/core/_settings.py +14 -10
- lamindb_setup/core/_settings_instance.py +40 -24
- lamindb_setup/core/_settings_load.py +25 -7
- lamindb_setup/core/_settings_storage.py +0 -5
- lamindb_setup/core/django.py +42 -14
- lamindb_setup/core/hashing.py +5 -5
- lamindb_setup/core/lamin.db.gz +0 -0
- lamindb_setup/core/upath.py +15 -6
- lamindb_setup/errors.py +5 -12
- lamindb_setup/io.py +16 -5
- {lamindb_setup-1.16.0.dist-info → lamindb_setup-1.18.0.dist-info}/METADATA +5 -6
- lamindb_setup-1.18.0.dist-info/RECORD +51 -0
- lamindb_setup-1.16.0.dist-info/RECORD +0 -50
- {lamindb_setup-1.16.0.dist-info → lamindb_setup-1.18.0.dist-info}/LICENSE +0 -0
- {lamindb_setup-1.16.0.dist-info → lamindb_setup-1.18.0.dist-info}/WHEEL +0 -0
|
@@ -20,7 +20,7 @@ lamin_env = os.getenv("LAMIN_ENV")
|
|
|
20
20
|
if lamin_env is None or lamin_env == "prod":
|
|
21
21
|
HOSTED_BUCKETS = tuple([f"s3://lamin-{region}" for region in HOSTED_REGIONS])
|
|
22
22
|
else:
|
|
23
|
-
logger.warning("loaded LAMIN_ENV:
|
|
23
|
+
logger.warning(f"loaded LAMIN_ENV: {lamin_env}")
|
|
24
24
|
HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
|
|
25
25
|
|
|
26
26
|
|
|
@@ -60,6 +60,7 @@ class AWSOptionsManager:
|
|
|
60
60
|
from aiobotocore.session import AioSession
|
|
61
61
|
from s3fs import S3FileSystem
|
|
62
62
|
|
|
63
|
+
anon_env = os.getenv("LAMIN_S3_ANON") == "true"
|
|
63
64
|
# this is cached so will be resued with the connection initialized
|
|
64
65
|
# these options are set for paths in _path_inject_options
|
|
65
66
|
# here we set the same options to cache the filesystem
|
|
@@ -68,19 +69,28 @@ class AWSOptionsManager:
|
|
|
68
69
|
use_listings_cache=True,
|
|
69
70
|
version_aware=False,
|
|
70
71
|
config_kwargs={"max_pool_connections": 64},
|
|
72
|
+
anon=anon_env,
|
|
71
73
|
)
|
|
72
74
|
|
|
73
75
|
self._suppress_aiobotocore_traceback_logging()
|
|
74
76
|
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
self.anon: bool = fs.session._credentials is None
|
|
78
|
-
except Exception as e:
|
|
77
|
+
if anon_env:
|
|
78
|
+
self.anon: bool = True
|
|
79
79
|
logger.warning(
|
|
80
|
-
|
|
81
|
-
"
|
|
80
|
+
"`anon` mode will be used for all non-managed buckets "
|
|
81
|
+
"because the environment variable LAMIN_S3_ANON was set to 'true'"
|
|
82
82
|
)
|
|
83
|
-
|
|
83
|
+
else:
|
|
84
|
+
try:
|
|
85
|
+
fs.connect()
|
|
86
|
+
self.anon = fs.session._credentials is None
|
|
87
|
+
except Exception as e:
|
|
88
|
+
logger.warning(
|
|
89
|
+
f"There is a problem with your default AWS Credentials: {e}\n"
|
|
90
|
+
"`anon` mode will be used for all non-managed buckets"
|
|
91
|
+
)
|
|
92
|
+
self.anon = True
|
|
93
|
+
|
|
84
94
|
self.anon_public: bool | None = None
|
|
85
95
|
if not self.anon:
|
|
86
96
|
try:
|
lamindb_setup/core/_clone.py
CHANGED
|
@@ -1,142 +1,18 @@
|
|
|
1
|
-
"""Utilities to
|
|
1
|
+
"""Utilities to work with Postgres Snapshots.
|
|
2
2
|
|
|
3
3
|
.. autosummary::
|
|
4
4
|
:toctree:
|
|
5
5
|
|
|
6
|
-
init_local_sqlite
|
|
7
|
-
connect_local_sqlite
|
|
8
|
-
connect_remote_sqlite
|
|
9
6
|
upload_sqlite_clone
|
|
10
7
|
"""
|
|
11
8
|
|
|
12
9
|
import gzip
|
|
13
|
-
import os
|
|
14
10
|
import shutil
|
|
15
11
|
from pathlib import Path
|
|
16
12
|
|
|
17
|
-
from lamindb_setup.core._settings_load import load_instance_settings
|
|
18
|
-
from lamindb_setup.core._settings_store import instance_settings_file
|
|
19
|
-
from lamindb_setup.core.django import reset_django
|
|
20
13
|
from lamindb_setup.core.upath import create_path
|
|
21
14
|
|
|
22
15
|
|
|
23
|
-
def init_local_sqlite(
|
|
24
|
-
instance: str | None = None, copy_suffix: str | None = None
|
|
25
|
-
) -> None:
|
|
26
|
-
"""Initialize SQLite copy of an existing Postgres instance.
|
|
27
|
-
|
|
28
|
-
Creates a SQLite database with the same schema as the source Postgres instance.
|
|
29
|
-
The copy shares the same storage location as the original instance.
|
|
30
|
-
|
|
31
|
-
The copy is intended for read-only access to instance data without requiring a Postgres connection.
|
|
32
|
-
Data synchronization to complete the clone happens via a separate Lambda function.
|
|
33
|
-
|
|
34
|
-
Note that essential user, branch and storage tables are missing.
|
|
35
|
-
Therefore, it is not possible to store Artifacts without having replayed these records first.
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
|
|
39
|
-
If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
|
|
40
|
-
If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
|
|
41
|
-
copy_suffix: Optional suffix to append to the local clone name.
|
|
42
|
-
"""
|
|
43
|
-
import lamindb_setup as ln_setup
|
|
44
|
-
|
|
45
|
-
if instance is None: # pragma: no cover
|
|
46
|
-
instance = os.environ.get("LAMIN_CURRENT_INSTANCE")
|
|
47
|
-
|
|
48
|
-
if instance is None:
|
|
49
|
-
raise ValueError(
|
|
50
|
-
"No instance identifier provided and LAMIN_CURRENT_INSTANCE is not set"
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
if ln_setup.settings.instance is None: # pragma: no cover
|
|
54
|
-
ln_setup.connect(instance)
|
|
55
|
-
|
|
56
|
-
name = (
|
|
57
|
-
f"{ln_setup.settings.instance.name}{copy_suffix}"
|
|
58
|
-
if copy_suffix is not None
|
|
59
|
-
else ln_setup.settings.instance.name
|
|
60
|
-
)
|
|
61
|
-
isettings = ln_setup._connect_instance._connect_instance(
|
|
62
|
-
owner=ln_setup.settings.instance.owner, name=name
|
|
63
|
-
)
|
|
64
|
-
isettings._db = None
|
|
65
|
-
isettings._is_on_hub = False
|
|
66
|
-
isettings._fine_grained_access = False
|
|
67
|
-
name = (
|
|
68
|
-
f"{isettings.name}{copy_suffix}" if copy_suffix is not None else isettings.name
|
|
69
|
-
)
|
|
70
|
-
isettings._name = name
|
|
71
|
-
isettings._is_clone = True
|
|
72
|
-
isettings._persist(write_to_disk=True)
|
|
73
|
-
|
|
74
|
-
if not isettings._sqlite_file_local.exists():
|
|
75
|
-
# Reset Django configuration before _init_db() because Django was already configured for the original Postgres instance.
|
|
76
|
-
# Without this reset, the `if not settings.configured`` check in `setup_django()` would skip reconfiguration,
|
|
77
|
-
# causing migrations to run against the old Postgres database instead of the new SQLite clone database.
|
|
78
|
-
reset_django()
|
|
79
|
-
isettings._init_db()
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def connect_local_sqlite(
|
|
83
|
-
instance: str,
|
|
84
|
-
) -> None:
|
|
85
|
-
"""Load a locally stored SQLite instance of which a remote hub Postgres instance exists.
|
|
86
|
-
|
|
87
|
-
This function bypasses the hub lookup that `lamin connect` performs, loading the SQLite clone directly from local settings files.
|
|
88
|
-
The clone must first be created via `init_local_sqlite()`.
|
|
89
|
-
|
|
90
|
-
Args:
|
|
91
|
-
instance: Instance slug in the form `account/name` (e.g., `laminlabs/privatedata-local`).
|
|
92
|
-
"""
|
|
93
|
-
owner, name = instance.split("/")
|
|
94
|
-
settings_file = instance_settings_file(name=name, owner=owner)
|
|
95
|
-
|
|
96
|
-
if not settings_file.exists():
|
|
97
|
-
raise ValueError(
|
|
98
|
-
"SQLite clone not found."
|
|
99
|
-
" Run `init_local_sqlite()` to create a local copy or connect to a remote copy using `connect_remote_sqlite`."
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
isettings = load_instance_settings(settings_file)
|
|
103
|
-
isettings._persist(write_to_disk=False)
|
|
104
|
-
|
|
105
|
-
# Using `setup_django` instead of `_load_db` to not ping AWS RDS
|
|
106
|
-
from lamindb_setup._check_setup import disable_auto_connect
|
|
107
|
-
|
|
108
|
-
from .django import setup_django
|
|
109
|
-
|
|
110
|
-
disable_auto_connect(setup_django)(isettings)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def connect_remote_sqlite(instance: str, *, copy_suffix: str | None = None) -> None:
|
|
114
|
-
"""Load an existing SQLite copy of a hub instance.
|
|
115
|
-
|
|
116
|
-
Args:
|
|
117
|
-
instance: Instance slug in the form `account/name` (e.g., `laminlabs/privatedata-local`).
|
|
118
|
-
copy_suffix: Optional suffix of the local clone.
|
|
119
|
-
"""
|
|
120
|
-
import lamindb_setup as ln_setup
|
|
121
|
-
|
|
122
|
-
owner, name = instance.split("/")
|
|
123
|
-
|
|
124
|
-
# Step 1: Create the settings file
|
|
125
|
-
isettings = ln_setup._connect_instance._connect_instance(owner=owner, name=name)
|
|
126
|
-
isettings._db = None
|
|
127
|
-
isettings._is_on_hub = False
|
|
128
|
-
isettings._fine_grained_access = False
|
|
129
|
-
isettings._db_permissions = "read"
|
|
130
|
-
name = (
|
|
131
|
-
f"{isettings.name}{copy_suffix}" if copy_suffix is not None else isettings.name
|
|
132
|
-
)
|
|
133
|
-
isettings._name = name
|
|
134
|
-
isettings._is_clone = True
|
|
135
|
-
isettings._persist(write_to_disk=True)
|
|
136
|
-
|
|
137
|
-
connect_local_sqlite(instance=instance + (copy_suffix or ""))
|
|
138
|
-
|
|
139
|
-
|
|
140
16
|
def upload_sqlite_clone(
|
|
141
17
|
local_sqlite_path: Path | str | None = None, compress: bool = True
|
|
142
18
|
) -> None:
|
|
@@ -11,8 +11,7 @@ import httpx
|
|
|
11
11
|
from httpx_retries import Retry, RetryTransport
|
|
12
12
|
from lamin_utils import logger
|
|
13
13
|
from pydantic_settings import BaseSettings
|
|
14
|
-
from supabase import Client, create_client
|
|
15
|
-
from supabase.lib.client_options import ClientOptions
|
|
14
|
+
from supabase import Client, ClientOptions, create_client
|
|
16
15
|
|
|
17
16
|
from ._settings_save import save_user_settings
|
|
18
17
|
|
lamindb_setup/core/_hub_core.py
CHANGED
|
@@ -383,7 +383,8 @@ def _init_instance_hub(
|
|
|
383
383
|
) -> None:
|
|
384
384
|
from ._settings import settings
|
|
385
385
|
|
|
386
|
-
|
|
386
|
+
created_by_id = settings.user._uuid.hex if account_id is None else account_id.hex # type: ignore
|
|
387
|
+
owner_account_id = os.getenv("LAMINDB_ACCOUNT_ID_INIT", created_by_id)
|
|
387
388
|
|
|
388
389
|
try:
|
|
389
390
|
lamindb_version = metadata.version("lamindb")
|
|
@@ -391,13 +392,13 @@ def _init_instance_hub(
|
|
|
391
392
|
lamindb_version = None
|
|
392
393
|
fields = {
|
|
393
394
|
"id": isettings._id.hex,
|
|
394
|
-
"account_id":
|
|
395
|
+
"account_id": owner_account_id,
|
|
395
396
|
"name": isettings.name,
|
|
396
397
|
"lnid": isettings.uid,
|
|
397
398
|
"schema_str": isettings._schema_str,
|
|
398
399
|
"lamindb_version": lamindb_version,
|
|
399
400
|
"public": False,
|
|
400
|
-
"created_by_id":
|
|
401
|
+
"created_by_id": created_by_id,
|
|
401
402
|
}
|
|
402
403
|
if isettings.dialect != "sqlite":
|
|
403
404
|
db_dsn = LaminDsnModel(db=isettings.db)
|
|
@@ -407,7 +408,7 @@ def _init_instance_hub(
|
|
|
407
408
|
"db_port": db_dsn.db.port,
|
|
408
409
|
"db_database": db_dsn.db.database,
|
|
409
410
|
}
|
|
410
|
-
fields.update(db_fields)
|
|
411
|
+
fields.update(db_fields) # type: ignore
|
|
411
412
|
slug = isettings.slug
|
|
412
413
|
# I'd like the following to be an upsert, but this seems to violate RLS
|
|
413
414
|
# Similarly, if we don't specify `returning="minimal"`, we'll violate RLS
|
|
@@ -415,7 +416,9 @@ def _init_instance_hub(
|
|
|
415
416
|
# as then init_instance is no longer idempotent
|
|
416
417
|
try:
|
|
417
418
|
client.table("instance").insert(fields, returning="minimal").execute()
|
|
418
|
-
except APIError:
|
|
419
|
+
except APIError as e:
|
|
420
|
+
if "new row violates row-level security policy" in str(e):
|
|
421
|
+
raise e
|
|
419
422
|
logger.warning(f"instance already existed at: https://lamin.ai/{slug}")
|
|
420
423
|
return None
|
|
421
424
|
if isettings.dialect != "sqlite" and isettings.is_remote:
|
|
@@ -713,7 +716,7 @@ def get_lamin_site_base_url():
|
|
|
713
716
|
|
|
714
717
|
|
|
715
718
|
def sign_up_local_hub(email) -> str | tuple[str, str, str]:
|
|
716
|
-
# raises
|
|
719
|
+
# raises AuthApiError: User already registered
|
|
717
720
|
password = base62(40) # generate new password
|
|
718
721
|
sign_up_kwargs = {"email": email, "password": password}
|
|
719
722
|
client = connect_hub()
|
|
@@ -34,8 +34,8 @@ def private_django_api(reverse=False):
|
|
|
34
34
|
# the order here matters
|
|
35
35
|
# changing it might break the tests
|
|
36
36
|
attributes = [
|
|
37
|
-
"MultipleObjectsReturned",
|
|
38
37
|
"add_to_class",
|
|
38
|
+
"arefresh_from_db",
|
|
39
39
|
"adelete",
|
|
40
40
|
"asave",
|
|
41
41
|
"clean",
|
|
@@ -51,10 +51,8 @@ def private_django_api(reverse=False):
|
|
|
51
51
|
"validate_unique",
|
|
52
52
|
]
|
|
53
53
|
if reverse:
|
|
54
|
-
attributes.append("arefresh_from_db")
|
|
55
54
|
attributes.append("full_clean")
|
|
56
55
|
else:
|
|
57
|
-
attributes.append("a_refresh_from_db")
|
|
58
56
|
attributes.append("full__clean")
|
|
59
57
|
|
|
60
58
|
django_path = Path(db.__file__).parent.parent
|
lamindb_setup/core/_settings.py
CHANGED
|
@@ -46,6 +46,12 @@ def _process_cache_path(cache_path: UPathStr | None) -> UPath | None:
|
|
|
46
46
|
return cache_dir
|
|
47
47
|
|
|
48
48
|
|
|
49
|
+
# returned by settings.branch for none/none instance
|
|
50
|
+
class MainBranchMock:
|
|
51
|
+
id = 1
|
|
52
|
+
name = "main"
|
|
53
|
+
|
|
54
|
+
|
|
49
55
|
class SetupSettings:
|
|
50
56
|
"""Setup settings."""
|
|
51
57
|
|
|
@@ -140,6 +146,10 @@ class SetupSettings:
|
|
|
140
146
|
# and we never need a DB request
|
|
141
147
|
def branch(self) -> Branch:
|
|
142
148
|
"""Default branch."""
|
|
149
|
+
# this is needed for .filter() with non-default connections
|
|
150
|
+
if not self._instance_exists:
|
|
151
|
+
return MainBranchMock()
|
|
152
|
+
|
|
143
153
|
if self._branch is None:
|
|
144
154
|
from lamindb import Branch
|
|
145
155
|
|
|
@@ -222,10 +232,9 @@ class SetupSettings:
|
|
|
222
232
|
If `True`, the current instance is connected, meaning that the db and other settings
|
|
223
233
|
are properly configured for use.
|
|
224
234
|
"""
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
return False
|
|
235
|
+
from . import django
|
|
236
|
+
|
|
237
|
+
return self._instance_exists and django.IS_SETUP
|
|
229
238
|
|
|
230
239
|
@property
|
|
231
240
|
def private_django_api(self) -> bool:
|
|
@@ -284,12 +293,7 @@ class SetupSettings:
|
|
|
284
293
|
|
|
285
294
|
@property
|
|
286
295
|
def _instance_exists(self):
|
|
287
|
-
|
|
288
|
-
self.instance # noqa
|
|
289
|
-
return True
|
|
290
|
-
# this is implicit logic that catches if no instance is loaded
|
|
291
|
-
except CurrentInstanceNotConfigured:
|
|
292
|
-
return False
|
|
296
|
+
return self.instance.slug != "none/none"
|
|
293
297
|
|
|
294
298
|
@property
|
|
295
299
|
def cache_dir(self) -> UPath:
|
|
@@ -125,10 +125,11 @@ class InstanceSettings:
|
|
|
125
125
|
if self._local_storage is not None:
|
|
126
126
|
value_local = self.local_storage
|
|
127
127
|
representation += f"\n - local storage: {value_local.root_as_str} ({value_local.region})"
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
128
|
+
if value is not None:
|
|
129
|
+
representation += (
|
|
130
|
+
f"\n - cloud storage: {value.root_as_str} ({value.region})"
|
|
131
|
+
)
|
|
132
|
+
elif value is not None:
|
|
132
133
|
representation += (
|
|
133
134
|
f"\n - storage: {value.root_as_str} ({value.region})"
|
|
134
135
|
)
|
|
@@ -322,16 +323,6 @@ class InstanceSettings:
|
|
|
322
323
|
else:
|
|
323
324
|
logger.warning(f"could not set this local storage location: {local_root}")
|
|
324
325
|
|
|
325
|
-
@property
|
|
326
|
-
@deprecated("local_storage")
|
|
327
|
-
def storage_local(self) -> StorageSettings:
|
|
328
|
-
return self.local_storage
|
|
329
|
-
|
|
330
|
-
@storage_local.setter
|
|
331
|
-
@deprecated("local_storage")
|
|
332
|
-
def storage_local(self, local_root_host: tuple[Path | str, str]):
|
|
333
|
-
self.local_storage = local_root_host # type: ignore
|
|
334
|
-
|
|
335
326
|
@property
|
|
336
327
|
def slug(self) -> str:
|
|
337
328
|
"""Unique semantic identifier of form `"{account_handle}/{instance_name}"`."""
|
|
@@ -417,11 +408,6 @@ class InstanceSettings:
|
|
|
417
408
|
else:
|
|
418
409
|
return {module for module in self._schema_str.split(",") if module != ""}
|
|
419
410
|
|
|
420
|
-
@property
|
|
421
|
-
@deprecated("modules")
|
|
422
|
-
def schema(self) -> set[str]:
|
|
423
|
-
return self.modules
|
|
424
|
-
|
|
425
411
|
@property
|
|
426
412
|
def _sqlite_file(self) -> UPath:
|
|
427
413
|
"""SQLite file."""
|
|
@@ -513,17 +499,36 @@ class InstanceSettings:
|
|
|
513
499
|
|
|
514
500
|
@property
|
|
515
501
|
def dialect(self) -> Literal["sqlite", "postgresql"]:
|
|
516
|
-
"""SQL dialect.
|
|
502
|
+
"""SQL dialect.
|
|
503
|
+
|
|
504
|
+
Equivalent to :attr:`vendor`.
|
|
505
|
+
|
|
506
|
+
"vendor" is the Django terminology for the type of database. "dialect" is the SQLAlchemy terminology.
|
|
507
|
+
"""
|
|
517
508
|
if self._db is None or self._db.startswith("sqlite://"):
|
|
518
509
|
return "sqlite"
|
|
519
510
|
else:
|
|
520
511
|
assert self._db.startswith("postgresql"), f"Unexpected DB value: {self._db}"
|
|
521
512
|
return "postgresql"
|
|
522
513
|
|
|
514
|
+
@property
|
|
515
|
+
def vendor(self) -> Literal["sqlite", "postgresql"]:
|
|
516
|
+
"""Database vendor.
|
|
517
|
+
|
|
518
|
+
Equivalent to :attr:`dialect`.
|
|
519
|
+
|
|
520
|
+
"vendor" is the Django terminology for the type of database. "dialect" is the SQLAlchemy terminology.
|
|
521
|
+
"""
|
|
522
|
+
return self.dialect
|
|
523
|
+
|
|
523
524
|
@property
|
|
524
525
|
def _is_cloud_sqlite(self) -> bool:
|
|
525
526
|
"""Is this a cloud instance with sqlite db."""
|
|
526
|
-
return
|
|
527
|
+
return (
|
|
528
|
+
self.dialect == "sqlite"
|
|
529
|
+
and self.storage is not None
|
|
530
|
+
and self.storage.type_is_cloud
|
|
531
|
+
)
|
|
527
532
|
|
|
528
533
|
@property
|
|
529
534
|
def _cloud_sqlite_locker(self):
|
|
@@ -543,14 +548,16 @@ class InstanceSettings:
|
|
|
543
548
|
@property
|
|
544
549
|
def is_remote(self) -> bool:
|
|
545
550
|
"""Boolean indicating if an instance has no local component."""
|
|
551
|
+
if self.storage is None and self.db == "sqlite:///:memory:":
|
|
552
|
+
return False
|
|
546
553
|
return check_is_instance_remote(self.storage.root_as_str, self.db)
|
|
547
554
|
|
|
548
555
|
@property
|
|
549
556
|
def is_on_hub(self) -> bool:
|
|
550
|
-
"""Is this instance on the hub?
|
|
557
|
+
"""Is this instance registered on the hub?
|
|
551
558
|
|
|
552
|
-
Can only
|
|
553
|
-
Will return `False` in case the
|
|
559
|
+
Can only establish if user has access to the instance.
|
|
560
|
+
Will return `False` in case the user token can't find the instance.
|
|
554
561
|
"""
|
|
555
562
|
if self._is_on_hub is None:
|
|
556
563
|
from ._hub_client import call_with_fallback_auth
|
|
@@ -572,6 +579,15 @@ class InstanceSettings:
|
|
|
572
579
|
self._is_on_hub = True
|
|
573
580
|
return self._is_on_hub
|
|
574
581
|
|
|
582
|
+
@property
|
|
583
|
+
def is_managed_by_hub(self) -> bool:
|
|
584
|
+
"""Is this instance managed by the hub?
|
|
585
|
+
|
|
586
|
+
Returns `True` if the instance is _managed_ by LaminHub, i.e.,
|
|
587
|
+
it was connected to LaminHub to manage access, migrations, a REST API, a UI, etc.
|
|
588
|
+
"""
|
|
589
|
+
return self.api_url is not None
|
|
590
|
+
|
|
575
591
|
def _get_settings_file(self) -> Path:
|
|
576
592
|
return instance_settings_file(self.name, self.owner)
|
|
577
593
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
from importlib.util import find_spec
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
from typing import TYPE_CHECKING
|
|
6
7
|
from uuid import UUID, uuid4
|
|
@@ -46,19 +47,36 @@ def load_cache_path_from_settings(storage_settings: Path | None = None) -> Path
|
|
|
46
47
|
return None
|
|
47
48
|
|
|
48
49
|
|
|
50
|
+
def find_module_candidates():
|
|
51
|
+
"""Find all local packages that depend on lamindb."""
|
|
52
|
+
candidates = ["bionty", "wetlab"]
|
|
53
|
+
return [c for c in candidates if find_spec(c) is not None]
|
|
54
|
+
|
|
55
|
+
|
|
49
56
|
def load_instance_settings(instance_settings_file: Path | None = None):
|
|
50
57
|
if instance_settings_file is None:
|
|
51
|
-
|
|
52
|
-
|
|
58
|
+
isettings_file = current_instance_settings_file()
|
|
59
|
+
if not isettings_file.exists():
|
|
60
|
+
isettings = InstanceSettings(
|
|
61
|
+
id=UUID("00000000-0000-0000-0000-000000000000"),
|
|
62
|
+
owner="none",
|
|
63
|
+
name="none",
|
|
64
|
+
storage=None,
|
|
65
|
+
modules=",".join(find_module_candidates()),
|
|
66
|
+
)
|
|
67
|
+
return isettings
|
|
68
|
+
else:
|
|
69
|
+
isettings_file = instance_settings_file
|
|
70
|
+
|
|
71
|
+
if not isettings_file.exists():
|
|
72
|
+
# this errors only if the file was explicitly provided
|
|
53
73
|
raise CurrentInstanceNotConfigured
|
|
54
74
|
try:
|
|
55
|
-
settings_store = InstanceSettingsStore(_env_file=
|
|
75
|
+
settings_store = InstanceSettingsStore(_env_file=isettings_file)
|
|
56
76
|
except (ValidationError, TypeError) as error:
|
|
57
|
-
with open(instance_settings_file) as f:
|
|
58
|
-
content = f.read()
|
|
59
77
|
raise SettingsEnvFileOutdated(
|
|
60
|
-
f"\n\n{error}\n\nYour instance settings file with\n\n{
|
|
61
|
-
f" (likely outdated), see validation error. Please delete {
|
|
78
|
+
f"\n\n{error}\n\nYour instance settings file with\n\n{isettings_file.read_text()}\nis invalid"
|
|
79
|
+
f" (likely outdated), see validation error. Please delete {isettings_file} &"
|
|
62
80
|
" reload (remote) or re-initialize (local) the instance with the same name & storage location."
|
|
63
81
|
) from error
|
|
64
82
|
isettings = setup_instance_from_store(settings_store)
|
lamindb_setup/core/django.py
CHANGED
|
@@ -5,13 +5,15 @@ import builtins
|
|
|
5
5
|
import os
|
|
6
6
|
import sys
|
|
7
7
|
import importlib as il
|
|
8
|
+
import gzip
|
|
8
9
|
import jwt
|
|
9
10
|
import time
|
|
10
11
|
import threading
|
|
11
12
|
from pathlib import Path
|
|
13
|
+
import shutil
|
|
12
14
|
from packaging import version
|
|
13
15
|
from ._settings_instance import InstanceSettings, is_local_db_url
|
|
14
|
-
|
|
16
|
+
from ..errors import CurrentInstanceNotConfigured
|
|
15
17
|
from lamin_utils import logger
|
|
16
18
|
|
|
17
19
|
|
|
@@ -21,6 +23,24 @@ IS_MIGRATING = False
|
|
|
21
23
|
CONN_MAX_AGE = 299
|
|
22
24
|
|
|
23
25
|
|
|
26
|
+
def get_connection(connection_name: str):
|
|
27
|
+
from django.db import connections
|
|
28
|
+
|
|
29
|
+
return connections[connection_name]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def error_no_instance_wrapper(execute, sql, params, many, context):
|
|
33
|
+
connection = context["connection"]
|
|
34
|
+
|
|
35
|
+
if (
|
|
36
|
+
connection.vendor == "sqlite"
|
|
37
|
+
and connection.settings_dict.get("NAME") == ":memory:"
|
|
38
|
+
):
|
|
39
|
+
raise CurrentInstanceNotConfigured
|
|
40
|
+
|
|
41
|
+
return execute(sql, params, many, context)
|
|
42
|
+
|
|
43
|
+
|
|
24
44
|
# db token that refreshes on access if needed
|
|
25
45
|
class DBToken:
|
|
26
46
|
def __init__(
|
|
@@ -64,11 +84,6 @@ class DBTokenManager:
|
|
|
64
84
|
|
|
65
85
|
self.tokens: dict[str, DBToken] = {}
|
|
66
86
|
|
|
67
|
-
def get_connection(self, connection_name: str):
|
|
68
|
-
from django.db import connections
|
|
69
|
-
|
|
70
|
-
return connections[connection_name]
|
|
71
|
-
|
|
72
87
|
def set(self, token: DBToken, connection_name: str = "default"):
|
|
73
88
|
if connection_name in self.tokens:
|
|
74
89
|
return
|
|
@@ -77,11 +92,7 @@ class DBTokenManager:
|
|
|
77
92
|
from django.db.backends.signals import connection_created
|
|
78
93
|
|
|
79
94
|
def set_token_wrapper(execute, sql, params, many, context):
|
|
80
|
-
not_in_atomic_block =
|
|
81
|
-
context is None
|
|
82
|
-
or "connection" not in context
|
|
83
|
-
or not context["connection"].in_atomic_block
|
|
84
|
-
)
|
|
95
|
+
not_in_atomic_block = not context["connection"].in_atomic_block
|
|
85
96
|
# ignore atomic blocks
|
|
86
97
|
if not_in_atomic_block:
|
|
87
98
|
sql = token.token_query + sql
|
|
@@ -98,7 +109,7 @@ class DBTokenManager:
|
|
|
98
109
|
result.nextset()
|
|
99
110
|
return result
|
|
100
111
|
|
|
101
|
-
|
|
112
|
+
get_connection(connection_name).execute_wrappers.append(set_token_wrapper)
|
|
102
113
|
|
|
103
114
|
def connection_callback(sender, connection, **kwargs):
|
|
104
115
|
if (
|
|
@@ -124,7 +135,7 @@ class DBTokenManager:
|
|
|
124
135
|
if connection_name in self.tokens:
|
|
125
136
|
# here we don't use the connection from the closure
|
|
126
137
|
# because Atomic is a single class to manage transactions for all connections
|
|
127
|
-
connection =
|
|
138
|
+
connection = get_connection(connection_name)
|
|
128
139
|
if len(connection.atomic_blocks) == 1:
|
|
129
140
|
token = self.tokens[connection_name]
|
|
130
141
|
# use raw psycopg2 connection here
|
|
@@ -142,7 +153,7 @@ class DBTokenManager:
|
|
|
142
153
|
|
|
143
154
|
from django.db.backends.signals import connection_created
|
|
144
155
|
|
|
145
|
-
connection =
|
|
156
|
+
connection = get_connection(connection_name)
|
|
146
157
|
|
|
147
158
|
connection.execute_wrappers = [
|
|
148
159
|
w
|
|
@@ -291,6 +302,9 @@ def setup_django(
|
|
|
291
302
|
django.db.connections._connections = threading.local()
|
|
292
303
|
logger.debug("django.db.connections._connections has been patched")
|
|
293
304
|
|
|
305
|
+
# error if trying to query with the default connection without setting up an instance
|
|
306
|
+
get_connection("default").execute_wrappers.insert(0, error_no_instance_wrapper)
|
|
307
|
+
|
|
294
308
|
if isettings._fine_grained_access and isettings._db_permissions == "jwt":
|
|
295
309
|
db_token = DBToken(isettings)
|
|
296
310
|
db_token_manager.set(db_token) # sets for the default connection
|
|
@@ -311,6 +325,20 @@ def setup_django(
|
|
|
311
325
|
call_command("migrate", app_name, app_number, verbosity=2)
|
|
312
326
|
isettings._update_cloud_sqlite_file(unlock_cloud_sqlite=False)
|
|
313
327
|
elif init:
|
|
328
|
+
modules_beyond_bionty = isettings.modules.copy()
|
|
329
|
+
compressed_sqlite_path = Path(__file__).parent / "lamin.db.gz"
|
|
330
|
+
if "bionty" in modules_beyond_bionty:
|
|
331
|
+
modules_beyond_bionty.remove("bionty")
|
|
332
|
+
# seed from compressed sqlite file
|
|
333
|
+
if (
|
|
334
|
+
isettings.dialect == "sqlite"
|
|
335
|
+
and os.getenv("LAMINDB_INIT_FROM_SCRATCH", "false") != "true"
|
|
336
|
+
and len(modules_beyond_bionty) == 0
|
|
337
|
+
and compressed_sqlite_path.exists()
|
|
338
|
+
):
|
|
339
|
+
with gzip.open(compressed_sqlite_path, "rb") as f_in:
|
|
340
|
+
with open(isettings._sqlite_file_local, "wb") as f_out:
|
|
341
|
+
shutil.copyfileobj(f_in, f_out)
|
|
314
342
|
global IS_MIGRATING
|
|
315
343
|
IS_MIGRATING = True
|
|
316
344
|
call_command("migrate", verbosity=0)
|
lamindb_setup/core/hashing.py
CHANGED
|
@@ -88,7 +88,7 @@ def hash_file(
|
|
|
88
88
|
file_path: Path,
|
|
89
89
|
file_size: int | None = None,
|
|
90
90
|
chunk_size: int | None = 50 * 1024 * 1024,
|
|
91
|
-
) -> tuple[str, str]:
|
|
91
|
+
) -> tuple[int, str, str]:
|
|
92
92
|
with open(file_path, "rb") as fp:
|
|
93
93
|
if file_size is None:
|
|
94
94
|
fp.seek(0, 2)
|
|
@@ -107,15 +107,15 @@ def hash_file(
|
|
|
107
107
|
hashlib.sha1(first_chunk).digest() + hashlib.sha1(last_chunk).digest()
|
|
108
108
|
).digest()
|
|
109
109
|
hash_type = "sha1-fl"
|
|
110
|
-
return to_b64_str(digest)[:HASH_LENGTH], hash_type
|
|
110
|
+
return file_size, to_b64_str(digest)[:HASH_LENGTH], hash_type
|
|
111
111
|
|
|
112
112
|
|
|
113
|
-
def hash_dir(path: Path):
|
|
113
|
+
def hash_dir(path: Path) -> tuple[int, str, str, int]:
|
|
114
114
|
files = (subpath for subpath in path.rglob("*") if subpath.is_file())
|
|
115
115
|
|
|
116
116
|
def hash_size(file):
|
|
117
|
-
|
|
118
|
-
return
|
|
117
|
+
size, hash, _ = hash_file(file)
|
|
118
|
+
return hash, size
|
|
119
119
|
|
|
120
120
|
try:
|
|
121
121
|
n_workers = len(psutil.Process().cpu_affinity())
|
|
Binary file
|