lamindb_setup 1.15.1__py3-none-any.whl → 1.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +1 -1
- lamindb_setup/_connect_instance.py +0 -1
- lamindb_setup/_schema_metadata.py +9 -11
- lamindb_setup/_set_managed_storage.py +11 -3
- lamindb_setup/_setup_user.py +20 -2
- lamindb_setup/core/__init__.py +6 -1
- lamindb_setup/core/_clone.py +96 -15
- lamindb_setup/core/_hub_client.py +11 -4
- lamindb_setup/core/_private_django_api.py +0 -1
- lamindb_setup/core/_settings.py +1 -2
- lamindb_setup/core/_settings_instance.py +15 -6
- lamindb_setup/core/_settings_load.py +2 -2
- lamindb_setup/core/_settings_save.py +1 -0
- lamindb_setup/core/_settings_storage.py +35 -22
- lamindb_setup/core/_settings_store.py +3 -2
- lamindb_setup/core/django.py +2 -0
- lamindb_setup/core/upath.py +20 -11
- lamindb_setup/io.py +74 -23
- {lamindb_setup-1.15.1.dist-info → lamindb_setup-1.16.0.dist-info}/METADATA +3 -3
- {lamindb_setup-1.15.1.dist-info → lamindb_setup-1.16.0.dist-info}/RECORD +22 -22
- {lamindb_setup-1.15.1.dist-info → lamindb_setup-1.16.0.dist-info}/WHEEL +1 -1
- {lamindb_setup-1.15.1.dist-info/licenses → lamindb_setup-1.16.0.dist-info}/LICENSE +0 -0
lamindb_setup/__init__.py
CHANGED
|
@@ -172,7 +172,8 @@ class _ModelHandler:
|
|
|
172
172
|
self.table_name = model._meta.db_table
|
|
173
173
|
self.included_modules = included_modules
|
|
174
174
|
self.fields = self._get_fields_metadata(self.model)
|
|
175
|
-
self.
|
|
175
|
+
self.is_auto_created = bool(model._meta.auto_created)
|
|
176
|
+
self.is_link_table = issubclass(model, IsLink) or self.is_auto_created
|
|
176
177
|
self.name_field = model._name_field if hasattr(model, "_name_field") else None
|
|
177
178
|
self.ontology_id_field = (
|
|
178
179
|
model._ontology_id_field if hasattr(model, "_ontology_id_field") else None
|
|
@@ -183,6 +184,7 @@ class _ModelHandler:
|
|
|
183
184
|
"fields": self.fields.copy(),
|
|
184
185
|
"class_name": self.class_name,
|
|
185
186
|
"table_name": self.table_name,
|
|
187
|
+
"is_auto_created": self.is_auto_created,
|
|
186
188
|
"is_link_table": self.is_link_table,
|
|
187
189
|
"name_field": self.name_field,
|
|
188
190
|
"ontology_id_field": self.ontology_id_field,
|
|
@@ -249,13 +251,13 @@ class _ModelHandler:
|
|
|
249
251
|
return related_fields
|
|
250
252
|
|
|
251
253
|
def _get_field_metadata(self, model, field: Field):
|
|
252
|
-
from lamindb.models import IsLink
|
|
254
|
+
from lamindb.models import IsLink, Registry
|
|
253
255
|
|
|
254
256
|
internal_type = field.get_internal_type()
|
|
255
257
|
model_name = field.model._meta.model_name
|
|
256
258
|
relation_type = self._get_relation_type(model, field)
|
|
257
259
|
|
|
258
|
-
schema_name = field.model
|
|
260
|
+
schema_name = Registry.__get_module_name__(field.model)
|
|
259
261
|
|
|
260
262
|
if field.related_model is None:
|
|
261
263
|
related_model_name = None
|
|
@@ -265,7 +267,7 @@ class _ModelHandler:
|
|
|
265
267
|
max_length = field.max_length
|
|
266
268
|
else:
|
|
267
269
|
related_model_name = field.related_model._meta.model_name
|
|
268
|
-
related_schema_name = field.related_model
|
|
270
|
+
related_schema_name = Registry.__get_module_name__(field.related_model)
|
|
269
271
|
related_field_name = field.remote_field.name
|
|
270
272
|
is_editable = False
|
|
271
273
|
max_length = None
|
|
@@ -418,14 +420,10 @@ class _SchemaHandler:
|
|
|
418
420
|
all_models = {module_name: {} for module_name in self.included_modules}
|
|
419
421
|
|
|
420
422
|
# Iterate through all registered Django models
|
|
421
|
-
for model in apps.get_models():
|
|
423
|
+
for model in apps.get_models(include_auto_created=True):
|
|
422
424
|
# Check if model meets the criteria
|
|
423
|
-
if
|
|
424
|
-
|
|
425
|
-
and model is not SQLRecord
|
|
426
|
-
and not model._meta.abstract
|
|
427
|
-
):
|
|
428
|
-
module_name = model.__get_module_name__()
|
|
425
|
+
if model is not SQLRecord and not model._meta.abstract:
|
|
426
|
+
module_name = Registry.__get_module_name__(model)
|
|
429
427
|
# Only include if module is in our included list
|
|
430
428
|
if module_name in self.included_modules:
|
|
431
429
|
model_name = model._meta.model_name
|
|
@@ -41,7 +41,7 @@ def set_managed_storage(root: UPathStr, host: str | None = None, **fs_kwargs):
|
|
|
41
41
|
"use a tuple of (local_root, host) instead"
|
|
42
42
|
)
|
|
43
43
|
|
|
44
|
-
# here the storage is registered in the hub
|
|
44
|
+
# here the storage location is registered in the hub
|
|
45
45
|
# hub_record_status="hub-record-created" if a new record is created
|
|
46
46
|
# "hub-record-retrieved" if the storage is in the hub already
|
|
47
47
|
ssettings, hub_record_status = init_storage(
|
|
@@ -65,5 +65,13 @@ def set_managed_storage(root: UPathStr, host: str | None = None, **fs_kwargs):
|
|
|
65
65
|
delete_storage_record(ssettings)
|
|
66
66
|
raise e
|
|
67
67
|
|
|
68
|
-
settings.instance.
|
|
69
|
-
|
|
68
|
+
if ssettings._instance_id != settings.instance._id:
|
|
69
|
+
logger.warning(
|
|
70
|
+
f"registered storage location {root} as read-only for this instance (it's written by instance with uid: {ssettings.instance_uid})"
|
|
71
|
+
)
|
|
72
|
+
logger.warning(
|
|
73
|
+
f"did *not* switch default storage location, it's still: {settings.storage.root_as_str}"
|
|
74
|
+
)
|
|
75
|
+
else:
|
|
76
|
+
settings.instance._storage = ssettings
|
|
77
|
+
settings.storage._set_fs_kwargs(**fs_kwargs)
|
lamindb_setup/_setup_user.py
CHANGED
|
@@ -43,6 +43,14 @@ def load_user(email: str | None = None, handle: str | None = None) -> UserSettin
|
|
|
43
43
|
return user_settings
|
|
44
44
|
|
|
45
45
|
|
|
46
|
+
def current_user_uid() -> str:
|
|
47
|
+
current_user_settings = current_user_settings_file()
|
|
48
|
+
if current_user_settings.exists():
|
|
49
|
+
return load_user_settings(current_user_settings).uid
|
|
50
|
+
|
|
51
|
+
return "00000000" # anonymous
|
|
52
|
+
|
|
53
|
+
|
|
46
54
|
def login(
|
|
47
55
|
user: str | None = None, *, api_key: str | None = None, **kwargs
|
|
48
56
|
) -> UserSettings:
|
|
@@ -90,6 +98,9 @@ def login(
|
|
|
90
98
|
"the legacy API key is deprecated and will likely be removed in a future version"
|
|
91
99
|
)
|
|
92
100
|
|
|
101
|
+
# do this here because load_user overwrites current_user_settings_file
|
|
102
|
+
previous_user_uid = current_user_uid()
|
|
103
|
+
|
|
93
104
|
if api_key is None:
|
|
94
105
|
if "@" in user: # type: ignore
|
|
95
106
|
email, handle = user, None
|
|
@@ -144,8 +155,15 @@ def login(
|
|
|
144
155
|
user_settings.api_key = api_key
|
|
145
156
|
save_user_settings(user_settings)
|
|
146
157
|
|
|
147
|
-
if settings._instance_exists
|
|
148
|
-
|
|
158
|
+
if settings._instance_exists:
|
|
159
|
+
if (
|
|
160
|
+
isettings := settings.instance
|
|
161
|
+
).is_on_hub and previous_user_uid != user_settings.uid:
|
|
162
|
+
logger.important_hint(
|
|
163
|
+
f"consider re-connecting to update permissions: lamin connect {isettings.slug}"
|
|
164
|
+
)
|
|
165
|
+
if _check_instance_setup():
|
|
166
|
+
register_user(user_settings)
|
|
149
167
|
|
|
150
168
|
settings._user_settings = None
|
|
151
169
|
# aws s3 credentials are scoped to the user
|
lamindb_setup/core/__init__.py
CHANGED
|
@@ -23,7 +23,12 @@ Storage
|
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
from . import django, upath
|
|
26
|
-
from ._clone import
|
|
26
|
+
from ._clone import (
|
|
27
|
+
connect_local_sqlite,
|
|
28
|
+
connect_remote_sqlite,
|
|
29
|
+
init_local_sqlite,
|
|
30
|
+
upload_sqlite_clone,
|
|
31
|
+
)
|
|
27
32
|
from ._deprecated import deprecated # documented in lamindb.base
|
|
28
33
|
from ._docs import doc_args # documented in lamindb.base
|
|
29
34
|
from ._settings import SetupSettings
|
lamindb_setup/core/_clone.py
CHANGED
|
@@ -5,14 +5,19 @@
|
|
|
5
5
|
|
|
6
6
|
init_local_sqlite
|
|
7
7
|
connect_local_sqlite
|
|
8
|
+
connect_remote_sqlite
|
|
9
|
+
upload_sqlite_clone
|
|
8
10
|
"""
|
|
9
11
|
|
|
12
|
+
import gzip
|
|
10
13
|
import os
|
|
14
|
+
import shutil
|
|
15
|
+
from pathlib import Path
|
|
11
16
|
|
|
12
|
-
from lamindb_setup.core._settings_instance import InstanceSettings
|
|
13
17
|
from lamindb_setup.core._settings_load import load_instance_settings
|
|
14
18
|
from lamindb_setup.core._settings_store import instance_settings_file
|
|
15
19
|
from lamindb_setup.core.django import reset_django
|
|
20
|
+
from lamindb_setup.core.upath import create_path
|
|
16
21
|
|
|
17
22
|
|
|
18
23
|
def init_local_sqlite(
|
|
@@ -53,28 +58,31 @@ def init_local_sqlite(
|
|
|
53
58
|
if copy_suffix is not None
|
|
54
59
|
else ln_setup.settings.instance.name
|
|
55
60
|
)
|
|
56
|
-
isettings =
|
|
57
|
-
|
|
58
|
-
owner=ln_setup.settings.instance.owner, # type: ignore
|
|
59
|
-
name=name,
|
|
60
|
-
storage=ln_setup.settings.storage,
|
|
61
|
-
db=None,
|
|
62
|
-
modules=",".join(ln_setup.settings.instance.modules),
|
|
63
|
-
is_on_hub=False,
|
|
61
|
+
isettings = ln_setup._connect_instance._connect_instance(
|
|
62
|
+
owner=ln_setup.settings.instance.owner, name=name
|
|
64
63
|
)
|
|
65
|
-
|
|
64
|
+
isettings._db = None
|
|
65
|
+
isettings._is_on_hub = False
|
|
66
|
+
isettings._fine_grained_access = False
|
|
67
|
+
name = (
|
|
68
|
+
f"{isettings.name}{copy_suffix}" if copy_suffix is not None else isettings.name
|
|
69
|
+
)
|
|
70
|
+
isettings._name = name
|
|
71
|
+
isettings._is_clone = True
|
|
66
72
|
isettings._persist(write_to_disk=True)
|
|
67
73
|
|
|
68
74
|
if not isettings._sqlite_file_local.exists():
|
|
69
75
|
# Reset Django configuration before _init_db() because Django was already configured for the original Postgres instance.
|
|
70
|
-
# Without this reset, the if not settings.configured check in setup_django() would skip reconfiguration,
|
|
76
|
+
# Without this reset, the `if not settings.configured`` check in `setup_django()` would skip reconfiguration,
|
|
71
77
|
# causing migrations to run against the old Postgres database instead of the new SQLite clone database.
|
|
72
78
|
reset_django()
|
|
73
79
|
isettings._init_db()
|
|
74
80
|
|
|
75
81
|
|
|
76
|
-
def connect_local_sqlite(
|
|
77
|
-
|
|
82
|
+
def connect_local_sqlite(
|
|
83
|
+
instance: str,
|
|
84
|
+
) -> None:
|
|
85
|
+
"""Load a locally stored SQLite instance of which a remote hub Postgres instance exists.
|
|
78
86
|
|
|
79
87
|
This function bypasses the hub lookup that `lamin connect` performs, loading the SQLite clone directly from local settings files.
|
|
80
88
|
The clone must first be created via `init_local_sqlite()`.
|
|
@@ -86,8 +94,81 @@ def connect_local_sqlite(instance: str) -> None:
|
|
|
86
94
|
settings_file = instance_settings_file(name=name, owner=owner)
|
|
87
95
|
|
|
88
96
|
if not settings_file.exists():
|
|
89
|
-
raise ValueError(
|
|
97
|
+
raise ValueError(
|
|
98
|
+
"SQLite clone not found."
|
|
99
|
+
" Run `init_local_sqlite()` to create a local copy or connect to a remote copy using `connect_remote_sqlite`."
|
|
100
|
+
)
|
|
90
101
|
|
|
91
102
|
isettings = load_instance_settings(settings_file)
|
|
92
103
|
isettings._persist(write_to_disk=False)
|
|
93
|
-
|
|
104
|
+
|
|
105
|
+
# Using `setup_django` instead of `_load_db` to not ping AWS RDS
|
|
106
|
+
from lamindb_setup._check_setup import disable_auto_connect
|
|
107
|
+
|
|
108
|
+
from .django import setup_django
|
|
109
|
+
|
|
110
|
+
disable_auto_connect(setup_django)(isettings)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def connect_remote_sqlite(instance: str, *, copy_suffix: str | None = None) -> None:
|
|
114
|
+
"""Load an existing SQLite copy of a hub instance.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
instance: Instance slug in the form `account/name` (e.g., `laminlabs/privatedata-local`).
|
|
118
|
+
copy_suffix: Optional suffix of the local clone.
|
|
119
|
+
"""
|
|
120
|
+
import lamindb_setup as ln_setup
|
|
121
|
+
|
|
122
|
+
owner, name = instance.split("/")
|
|
123
|
+
|
|
124
|
+
# Step 1: Create the settings file
|
|
125
|
+
isettings = ln_setup._connect_instance._connect_instance(owner=owner, name=name)
|
|
126
|
+
isettings._db = None
|
|
127
|
+
isettings._is_on_hub = False
|
|
128
|
+
isettings._fine_grained_access = False
|
|
129
|
+
isettings._db_permissions = "read"
|
|
130
|
+
name = (
|
|
131
|
+
f"{isettings.name}{copy_suffix}" if copy_suffix is not None else isettings.name
|
|
132
|
+
)
|
|
133
|
+
isettings._name = name
|
|
134
|
+
isettings._is_clone = True
|
|
135
|
+
isettings._persist(write_to_disk=True)
|
|
136
|
+
|
|
137
|
+
connect_local_sqlite(instance=instance + (copy_suffix or ""))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def upload_sqlite_clone(
|
|
141
|
+
local_sqlite_path: Path | str | None = None, compress: bool = True
|
|
142
|
+
) -> None:
|
|
143
|
+
"""Uploads the SQLite clone to the default storage.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
local_sqlite_path: Path to the SQLite file.
|
|
147
|
+
Defaults to the local storage path if not specified.
|
|
148
|
+
compress: Whether to compress the database with gzip before uploading.
|
|
149
|
+
"""
|
|
150
|
+
import lamindb_setup as ln_setup
|
|
151
|
+
|
|
152
|
+
if local_sqlite_path is None:
|
|
153
|
+
local_sqlite_path = ln_setup.settings.instance._sqlite_file_local
|
|
154
|
+
else:
|
|
155
|
+
local_sqlite_path = Path(local_sqlite_path)
|
|
156
|
+
|
|
157
|
+
if not local_sqlite_path.exists():
|
|
158
|
+
raise FileNotFoundError(f"Database not found at {local_sqlite_path}")
|
|
159
|
+
|
|
160
|
+
cloud_db_path = ln_setup.settings.instance._sqlite_file
|
|
161
|
+
|
|
162
|
+
if compress:
|
|
163
|
+
temp_gz_path = local_sqlite_path.with_suffix(".db.gz")
|
|
164
|
+
with (
|
|
165
|
+
open(local_sqlite_path, "rb") as f_in,
|
|
166
|
+
gzip.open(temp_gz_path, "wb") as f_out,
|
|
167
|
+
):
|
|
168
|
+
shutil.copyfileobj(f_in, f_out)
|
|
169
|
+
cloud_destination = create_path(f"{cloud_db_path}.gz")
|
|
170
|
+
cloud_destination.upload_from(temp_gz_path, print_progress=True)
|
|
171
|
+
temp_gz_path.unlink()
|
|
172
|
+
else:
|
|
173
|
+
cloud_destination = create_path(cloud_db_path)
|
|
174
|
+
cloud_destination.upload_from(local_sqlite_path, print_progress=True)
|
|
@@ -96,9 +96,12 @@ def connect_hub(
|
|
|
96
96
|
transports.append(
|
|
97
97
|
RetryTransport(
|
|
98
98
|
retry=LogRetry(total=2, backoff_factor=0.2),
|
|
99
|
-
transport=httpx.HTTPTransport(verify=True, http2=True),
|
|
99
|
+
transport=httpx.HTTPTransport(verify=True, http2=True, trust_env=True),
|
|
100
100
|
)
|
|
101
101
|
)
|
|
102
|
+
# this overwrites transports of existing httpx clients
|
|
103
|
+
# if proxies are set, the default transports that were created on clients init
|
|
104
|
+
# will be used, irrespective of these re-settings
|
|
102
105
|
client.auth._http_client._transport = transports[0]
|
|
103
106
|
client.postgrest.session._transport = transports[1]
|
|
104
107
|
# POST is not retryable by default, but for our functions it should be safe to retry
|
|
@@ -116,7 +119,7 @@ def connect_hub(
|
|
|
116
119
|
"POST",
|
|
117
120
|
],
|
|
118
121
|
),
|
|
119
|
-
transport=httpx.HTTPTransport(verify=True, http2=True),
|
|
122
|
+
transport=httpx.HTTPTransport(verify=True, http2=True, trust_env=True),
|
|
120
123
|
)
|
|
121
124
|
return client
|
|
122
125
|
|
|
@@ -246,9 +249,13 @@ def httpx_client():
|
|
|
246
249
|
else:
|
|
247
250
|
transport = RetryTransport(
|
|
248
251
|
retry=LogRetry(total=2, backoff_factor=0.2),
|
|
249
|
-
transport=httpx.HTTPTransport(verify=True, http2=True),
|
|
252
|
+
transport=httpx.HTTPTransport(verify=True, http2=True, trust_env=True),
|
|
250
253
|
)
|
|
251
|
-
client
|
|
254
|
+
# first we create a client to build the proxy map from the env variables
|
|
255
|
+
# if proxies are set, the default transports will be used
|
|
256
|
+
# otherwise the RetryTransport object that we assign below
|
|
257
|
+
client = httpx.Client(trust_env=True)
|
|
258
|
+
client._transport = transport
|
|
252
259
|
yield client
|
|
253
260
|
finally:
|
|
254
261
|
if client is not None:
|
lamindb_setup/core/_settings.py
CHANGED
|
@@ -320,8 +320,7 @@ class SetupSettings:
|
|
|
320
320
|
def paths(self) -> type[SetupPaths]:
|
|
321
321
|
"""Convert cloud paths to lamindb local paths.
|
|
322
322
|
|
|
323
|
-
Use `settings.paths.cloud_to_local_no_update`
|
|
324
|
-
or `settings.paths.cloud_to_local`.
|
|
323
|
+
Use `settings.paths.cloud_to_local_no_update` or `settings.paths.cloud_to_local`.
|
|
325
324
|
"""
|
|
326
325
|
return SetupPaths
|
|
327
326
|
|
|
@@ -54,8 +54,7 @@ def is_local_db_url(db_url: str) -> bool:
|
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
def check_is_instance_remote(root: UPathStr, db: str | None) -> bool:
|
|
57
|
-
# returns True for cloud SQLite
|
|
58
|
-
# and remote postgres
|
|
57
|
+
# returns True for cloud SQLite and remote postgres
|
|
59
58
|
root_str = str(root)
|
|
60
59
|
if not root_str.startswith("create-s3") and get_storage_type(root_str) == "local":
|
|
61
60
|
return False
|
|
@@ -83,7 +82,8 @@ class InstanceSettings:
|
|
|
83
82
|
schema_id: UUID | None = None,
|
|
84
83
|
fine_grained_access: bool = False,
|
|
85
84
|
db_permissions: str | None = None,
|
|
86
|
-
_locker_user: UserSettings | None = None, # user to lock for if cloud sqlite
|
|
85
|
+
_locker_user: UserSettings | None = None, # user to lock for if cloud sqlite,
|
|
86
|
+
_is_clone: bool = False,
|
|
87
87
|
):
|
|
88
88
|
from ._hub_utils import validate_db_arg
|
|
89
89
|
|
|
@@ -109,6 +109,7 @@ class InstanceSettings:
|
|
|
109
109
|
self._db_permissions = db_permissions
|
|
110
110
|
# if None then settings.user is used
|
|
111
111
|
self._locker_user = _locker_user
|
|
112
|
+
self._is_clone = _is_clone
|
|
112
113
|
|
|
113
114
|
def __repr__(self):
|
|
114
115
|
"""Rich string representation."""
|
|
@@ -434,7 +435,7 @@ class InstanceSettings:
|
|
|
434
435
|
|
|
435
436
|
def _update_cloud_sqlite_file(self, unlock_cloud_sqlite: bool = True) -> None:
|
|
436
437
|
"""Upload the local sqlite file to the cloud file."""
|
|
437
|
-
if self._is_cloud_sqlite:
|
|
438
|
+
if self._is_cloud_sqlite and not self._is_clone:
|
|
438
439
|
sqlite_file = self._sqlite_file
|
|
439
440
|
logger.warning(
|
|
440
441
|
f"updating{' & unlocking' if unlock_cloud_sqlite else ''} cloud SQLite "
|
|
@@ -602,6 +603,14 @@ class InstanceSettings:
|
|
|
602
603
|
disable_auto_connect(setup_django)(self, init=True)
|
|
603
604
|
|
|
604
605
|
def _load_db(self) -> tuple[bool, str]:
|
|
606
|
+
"""Load the database connection.
|
|
607
|
+
|
|
608
|
+
For cloud SQLite instances, downloads the database file from cloud storage.
|
|
609
|
+
For all instances, initializes Django ORM with the database connection.
|
|
610
|
+
|
|
611
|
+
Returns:
|
|
612
|
+
Tuple of (success: bool, error_message: str). Returns (True, "") on success.
|
|
613
|
+
"""
|
|
605
614
|
# Is the database available and initialized as LaminDB?
|
|
606
615
|
# returns a tuple of status code and message
|
|
607
616
|
if self.dialect == "sqlite" and not self._sqlite_file.exists():
|
|
@@ -615,8 +624,8 @@ class InstanceSettings:
|
|
|
615
624
|
return False, f"SQLite file {self._sqlite_file} does not exist"
|
|
616
625
|
# we need the local sqlite to setup django
|
|
617
626
|
self._update_local_sqlite_file()
|
|
618
|
-
|
|
619
|
-
# as warnings
|
|
627
|
+
|
|
628
|
+
# setting up django also performs a check for migrations & prints them as warnings
|
|
620
629
|
# this should fail, e.g., if the db is not reachable
|
|
621
630
|
from lamindb_setup._check_setup import disable_auto_connect
|
|
622
631
|
|
|
@@ -69,8 +69,7 @@ def load_or_create_user_settings(api_key: str | None = None) -> UserSettings:
|
|
|
69
69
|
"""Return current user settings.
|
|
70
70
|
|
|
71
71
|
Args:
|
|
72
|
-
api_key: if provided and there is no current user,
|
|
73
|
-
perform login and return the user settings.
|
|
72
|
+
api_key: if provided and there is no current user, perform login and return the user settings.
|
|
74
73
|
"""
|
|
75
74
|
current_user_settings = current_user_settings_file()
|
|
76
75
|
if not current_user_settings.exists():
|
|
@@ -125,6 +124,7 @@ def setup_instance_from_store(store: InstanceSettingsStore) -> InstanceSettings:
|
|
|
125
124
|
schema_id=None if store.schema_id in {None, "null"} else UUID(store.schema_id),
|
|
126
125
|
fine_grained_access=store.fine_grained_access,
|
|
127
126
|
db_permissions=_null_to_value(store.db_permissions),
|
|
127
|
+
_is_clone=store.is_clone,
|
|
128
128
|
)
|
|
129
129
|
|
|
130
130
|
|
|
@@ -12,17 +12,14 @@ from lamin_utils import logger
|
|
|
12
12
|
from lamindb_setup.errors import StorageAlreadyManaged
|
|
13
13
|
|
|
14
14
|
from ._aws_options import (
|
|
15
|
-
HOSTED_REGIONS,
|
|
16
15
|
LAMIN_ENDPOINTS,
|
|
17
16
|
get_aws_options_manager,
|
|
18
17
|
)
|
|
19
|
-
from ._aws_storage import find_closest_aws_region
|
|
20
18
|
from ._deprecated import deprecated
|
|
21
19
|
from .hashing import hash_and_encode_as_b62
|
|
22
20
|
from .upath import (
|
|
23
21
|
LocalPathClasses,
|
|
24
22
|
UPath,
|
|
25
|
-
_split_path_query,
|
|
26
23
|
create_path,
|
|
27
24
|
get_storage_region,
|
|
28
25
|
)
|
|
@@ -58,12 +55,40 @@ def get_storage_type(root_as_str: str) -> StorageType:
|
|
|
58
55
|
return convert.get(protocol, protocol) # type: ignore
|
|
59
56
|
|
|
60
57
|
|
|
58
|
+
def sanitize_root_user_input(root: UPathStr) -> UPath:
|
|
59
|
+
"""Format a root path string."""
|
|
60
|
+
root_upath = root if isinstance(root, UPath) else UPath(root)
|
|
61
|
+
root_upath = root_upath.expanduser()
|
|
62
|
+
if isinstance(root_upath, LocalPathClasses): # local paths
|
|
63
|
+
try:
|
|
64
|
+
(root_upath / ".lamindb").mkdir(parents=True, exist_ok=True)
|
|
65
|
+
root_upath = root_upath.resolve()
|
|
66
|
+
except Exception:
|
|
67
|
+
logger.warning(f"unable to create .lamindb/ folder in {root_upath}")
|
|
68
|
+
return root_upath
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def convert_sanitized_root_path_to_str(root_upath: UPath) -> str:
|
|
72
|
+
# embed endpoint_url into path string for storing and displaying
|
|
73
|
+
if root_upath.protocol == "s3":
|
|
74
|
+
endpoint_url = root_upath.storage_options.get("endpoint_url", None)
|
|
75
|
+
# LAMIN_ENDPOINTS include None
|
|
76
|
+
if endpoint_url not in LAMIN_ENDPOINTS:
|
|
77
|
+
return f"s3://{root_upath.path.rstrip('/')}?endpoint_url={endpoint_url}"
|
|
78
|
+
return root_upath.as_posix().rstrip("/")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def convert_root_path_to_str(root: UPathStr) -> str:
|
|
82
|
+
"""Format a root path string."""
|
|
83
|
+
sanitized_root_upath = sanitize_root_user_input(root)
|
|
84
|
+
return convert_sanitized_root_path_to_str(sanitized_root_upath)
|
|
85
|
+
|
|
86
|
+
|
|
61
87
|
def mark_storage_root(
|
|
62
88
|
root: UPathStr, uid: str, instance_id: UUID, instance_slug: str
|
|
63
89
|
) -> Literal["__marked__"] | str:
|
|
64
90
|
# we need a file in folder-like storage locations on S3 to avoid
|
|
65
|
-
# permission errors from leveraging s3fs on an empty hosted storage location
|
|
66
|
-
# (path.fs.find raises a PermissionError)
|
|
91
|
+
# permission errors from leveraging s3fs on an empty hosted storage location (path.fs.find raises a PermissionError)
|
|
67
92
|
# we also need it in case a storage location is ambiguous because a server / local environment
|
|
68
93
|
# doesn't have a globally unique identifier, then we screen for this file to map the
|
|
69
94
|
# path on a storage location in the registry
|
|
@@ -97,6 +122,7 @@ def init_storage(
|
|
|
97
122
|
access_token: str | None = None,
|
|
98
123
|
region: str | None = None,
|
|
99
124
|
space_uuid: UUID | None = None,
|
|
125
|
+
skip_mark_storage_root: bool = False,
|
|
100
126
|
) -> tuple[
|
|
101
127
|
StorageSettings,
|
|
102
128
|
Literal["hub-record-not-created", "hub-record-retrieved", "hub-record-created"],
|
|
@@ -156,7 +182,8 @@ def init_storage(
|
|
|
156
182
|
space_id=space_uuid,
|
|
157
183
|
)
|
|
158
184
|
# we check the write access here if the storage record has not been retrieved from the hub
|
|
159
|
-
if hub_record_status
|
|
185
|
+
# Sergei: should it in fact still go through if hub_record_status == "hub-record-not-created"?
|
|
186
|
+
if hub_record_status != "hub-record-retrieved" and not skip_mark_storage_root:
|
|
160
187
|
try:
|
|
161
188
|
# (federated) credentials for AWS access are provisioned under-the-hood
|
|
162
189
|
# discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
|
|
@@ -214,15 +241,7 @@ class StorageSettings:
|
|
|
214
241
|
):
|
|
215
242
|
self._uid = uid
|
|
216
243
|
self._uuid_ = uuid
|
|
217
|
-
self._root_init =
|
|
218
|
-
if isinstance(self._root_init, LocalPathClasses): # local paths
|
|
219
|
-
try:
|
|
220
|
-
(self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
|
|
221
|
-
self._root_init = self._root_init.resolve()
|
|
222
|
-
except Exception:
|
|
223
|
-
logger.warning(
|
|
224
|
-
f"unable to create .lamindb/ folder in {self._root_init}"
|
|
225
|
-
)
|
|
244
|
+
self._root_init: UPath = sanitize_root_user_input(root)
|
|
226
245
|
self._root = None
|
|
227
246
|
self._instance_id = instance_id
|
|
228
247
|
# we don't yet infer region here to make init fast
|
|
@@ -337,13 +356,7 @@ class StorageSettings:
|
|
|
337
356
|
@property
|
|
338
357
|
def root_as_str(self) -> str:
|
|
339
358
|
"""Formatted root string."""
|
|
340
|
-
|
|
341
|
-
if self._root_init.protocol == "s3":
|
|
342
|
-
endpoint_url = self._root_init.storage_options.get("endpoint_url", None)
|
|
343
|
-
# LAMIN_ENDPOINTS include None
|
|
344
|
-
if endpoint_url not in LAMIN_ENDPOINTS:
|
|
345
|
-
return f"s3://{self._root_init.path.rstrip('/')}?endpoint_url={endpoint_url}"
|
|
346
|
-
return self._root_init.as_posix().rstrip("/")
|
|
359
|
+
return convert_sanitized_root_path_to_str(self._root_init)
|
|
347
360
|
|
|
348
361
|
@property
|
|
349
362
|
def cache_dir(
|
|
@@ -67,8 +67,8 @@ class InstanceSettingsStore(BaseSettings):
|
|
|
67
67
|
owner: str
|
|
68
68
|
name: str
|
|
69
69
|
storage_root: str
|
|
70
|
-
storage_region: str | None
|
|
71
|
-
db: str | None
|
|
70
|
+
storage_region: str | None
|
|
71
|
+
db: str | None
|
|
72
72
|
schema_str: str | None
|
|
73
73
|
schema_id: str | None = None
|
|
74
74
|
fine_grained_access: bool = False
|
|
@@ -76,6 +76,7 @@ class InstanceSettingsStore(BaseSettings):
|
|
|
76
76
|
id: str
|
|
77
77
|
git_repo: str | None
|
|
78
78
|
keep_artifacts_local: bool | None
|
|
79
|
+
is_clone: bool = False
|
|
79
80
|
model_config = SettingsConfigDict(env_prefix="lamindb_instance_", env_file=".env")
|
|
80
81
|
|
|
81
82
|
|
lamindb_setup/core/django.py
CHANGED
|
@@ -238,6 +238,8 @@ def setup_django(
|
|
|
238
238
|
if view_schema:
|
|
239
239
|
installed_apps = installed_apps[::-1] # to fix how apps appear
|
|
240
240
|
installed_apps += ["schema_graph", "django.contrib.staticfiles"]
|
|
241
|
+
if isettings.dialect == "postgresql":
|
|
242
|
+
installed_apps.insert(0, "pgtrigger")
|
|
241
243
|
|
|
242
244
|
kwargs = dict(
|
|
243
245
|
INSTALLED_APPS=installed_apps,
|
lamindb_setup/core/upath.py
CHANGED
|
@@ -93,10 +93,12 @@ def extract_suffix_from_path(path: Path, arg_name: str | None = None) -> str:
|
|
|
93
93
|
else:
|
|
94
94
|
return suffix
|
|
95
95
|
|
|
96
|
-
|
|
96
|
+
suffixes = path.suffixes
|
|
97
|
+
|
|
98
|
+
if len(suffixes) <= 1:
|
|
97
99
|
return process_digits(path.suffix)
|
|
98
100
|
|
|
99
|
-
total_suffix = "".join(
|
|
101
|
+
total_suffix = "".join(suffixes)
|
|
100
102
|
if total_suffix in VALID_SIMPLE_SUFFIXES:
|
|
101
103
|
return total_suffix
|
|
102
104
|
elif total_suffix.endswith(tuple(VALID_COMPOSITE_SUFFIXES)):
|
|
@@ -115,14 +117,24 @@ def extract_suffix_from_path(path: Path, arg_name: str | None = None) -> str:
|
|
|
115
117
|
# in COMPRESSION_SUFFIXES to detect something like .random.gz and then
|
|
116
118
|
# add ".random.gz" but concluded it's too dangerous it's safer to just
|
|
117
119
|
# use ".gz" in such a case
|
|
118
|
-
if
|
|
119
|
-
suffix = "".join(
|
|
120
|
-
|
|
120
|
+
if suffixes[-2] in VALID_SIMPLE_SUFFIXES:
|
|
121
|
+
suffix = "".join(suffixes[-2:])
|
|
122
|
+
# if the suffix preceding the compression suffixes is a valid suffix,
|
|
123
|
+
# we account for it; otherwise we don't.
|
|
124
|
+
# i.e. we should have .h5ad.tar.gz or .csv.tar.gz, not just .tar.gz
|
|
125
|
+
if (
|
|
126
|
+
suffix == ".tar.gz"
|
|
127
|
+
and len(suffixes) > 2
|
|
128
|
+
and (suffix_3 := suffixes[-3]) in VALID_SIMPLE_SUFFIXES
|
|
129
|
+
):
|
|
130
|
+
suffix = suffix_3 + suffix
|
|
121
131
|
# do not print a warning for things like .tar.gz, .fastq.gz
|
|
122
|
-
if
|
|
132
|
+
if suffixes[-1] == ".gz":
|
|
123
133
|
print_hint = False
|
|
134
|
+
else:
|
|
135
|
+
msg += f"inferring: '{suffix}'"
|
|
124
136
|
else:
|
|
125
|
-
suffix =
|
|
137
|
+
suffix = suffixes[-1] # this is equivalent to path.suffix
|
|
126
138
|
msg += (
|
|
127
139
|
f"using only last suffix: '{suffix}' - if you want your composite"
|
|
128
140
|
" suffix to be recognized add it to"
|
|
@@ -908,12 +920,9 @@ def get_stat_file_cloud(stat: dict) -> tuple[int, str | None, str | None]:
|
|
|
908
920
|
elif "blob_id" in stat:
|
|
909
921
|
hash = b16_to_b64(stat["blob_id"])
|
|
910
922
|
hash_type = "sha1"
|
|
911
|
-
# s3
|
|
912
|
-
# StorageClass is checked to be sure that it is indeed s3
|
|
913
|
-
# because http also has ETag
|
|
914
923
|
elif "ETag" in stat:
|
|
915
924
|
etag = stat["ETag"]
|
|
916
|
-
if "mimetype" in stat:
|
|
925
|
+
if "mimetype" in stat or ("url" in stat and stat["url"].startswith("http")):
|
|
917
926
|
# http
|
|
918
927
|
hash = hash_string(etag.strip('"'))
|
|
919
928
|
hash_type = "md5-etag"
|
lamindb_setup/io.py
CHANGED
|
@@ -3,7 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import io
|
|
4
4
|
import json
|
|
5
5
|
import warnings
|
|
6
|
-
from concurrent.futures import
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
7
|
from importlib import import_module
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import TYPE_CHECKING
|
|
@@ -14,24 +14,13 @@ from django.db import models, transaction
|
|
|
14
14
|
from rich.progress import Progress
|
|
15
15
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
|
-
from collections.abc import Sequence
|
|
17
|
+
from collections.abc import Iterable, Sequence
|
|
18
18
|
from typing import Literal
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def _get_registries(module_name: str) -> list[str]:
|
|
22
22
|
"""Get registry class names from a module."""
|
|
23
23
|
schema_module = import_module(module_name)
|
|
24
|
-
exclude = {"SQLRecord", "BaseSQLRecord"}
|
|
25
|
-
|
|
26
|
-
if module_name == "lamindb":
|
|
27
|
-
module_filter = lambda cls, name: cls.__module__.startswith(
|
|
28
|
-
f"{module_name}.models."
|
|
29
|
-
) and name in dir(schema_module)
|
|
30
|
-
else:
|
|
31
|
-
module_filter = (
|
|
32
|
-
lambda cls, name: cls.__module__ == f"{module_name}.models"
|
|
33
|
-
and name in dir(schema_module)
|
|
34
|
-
)
|
|
35
24
|
|
|
36
25
|
return [
|
|
37
26
|
name
|
|
@@ -40,8 +29,8 @@ def _get_registries(module_name: str) -> list[str]:
|
|
|
40
29
|
name[0].isupper()
|
|
41
30
|
and isinstance(cls := getattr(schema_module.models, name, None), type)
|
|
42
31
|
and issubclass(cls, models.Model)
|
|
43
|
-
|
|
44
|
-
and
|
|
32
|
+
# Table names starting with `None_` are abstract base classes or Django mixins
|
|
33
|
+
and not cls._meta.db_table.startswith("None_") # type: ignore
|
|
45
34
|
)
|
|
46
35
|
]
|
|
47
36
|
|
|
@@ -59,7 +48,7 @@ def _export_full_table(
|
|
|
59
48
|
For SQLite with large tables, reads in chunks to avoid memory issues when tables exceed available RAM.
|
|
60
49
|
|
|
61
50
|
Args:
|
|
62
|
-
registry_info: Tuple of (module_name, model_name, field_name) where field_name
|
|
51
|
+
registry_info: Tuple of (module_name, model_name, field_name) where `field_name`
|
|
63
52
|
is None for regular tables or the field name for M2M link tables.
|
|
64
53
|
directory: Output directory for parquet files.
|
|
65
54
|
chunk_size: Maximum rows per chunk for SQLite large tables.
|
|
@@ -73,7 +62,7 @@ def _export_full_table(
|
|
|
73
62
|
|
|
74
63
|
module_name, model_name, field_name = registry_info
|
|
75
64
|
schema_module = import_module(module_name)
|
|
76
|
-
registry = getattr(schema_module, model_name)
|
|
65
|
+
registry = getattr(schema_module.models, model_name)
|
|
77
66
|
|
|
78
67
|
if field_name:
|
|
79
68
|
registry = getattr(registry, field_name).through
|
|
@@ -84,12 +73,19 @@ def _export_full_table(
|
|
|
84
73
|
if ln_setup.settings.instance.dialect == "postgresql":
|
|
85
74
|
buffer = io.StringIO()
|
|
86
75
|
with connection.cursor() as cursor:
|
|
76
|
+
cursor.execute("SET statement_timeout = 0")
|
|
87
77
|
cursor.copy_expert(
|
|
88
78
|
f'COPY "{table_name}" TO STDOUT WITH (FORMAT CSV, HEADER TRUE)',
|
|
89
79
|
buffer,
|
|
90
80
|
)
|
|
91
81
|
buffer.seek(0)
|
|
92
|
-
|
|
82
|
+
# Prevent pandas from converting empty strings to float NaN (which PyArrow rejects)
|
|
83
|
+
df = pd.read_csv(buffer, keep_default_na=False)
|
|
84
|
+
# Convert object columns to string to handle mixed types from data corruption,
|
|
85
|
+
# schema migrations, or manual SQL inserts. PyArrow rejects mixed-type objects.
|
|
86
|
+
df = df.astype(
|
|
87
|
+
{col: str for col in df.columns if df[col].dtype == "object"}
|
|
88
|
+
)
|
|
93
89
|
df.to_parquet(directory / f"{table_name}.parquet", compression=None)
|
|
94
90
|
return (
|
|
95
91
|
f"{module_name}.{model_name}.{field_name}"
|
|
@@ -118,11 +114,21 @@ def _export_full_table(
|
|
|
118
114
|
chunk_file = (
|
|
119
115
|
directory / f"{table_name}_chunk_{chunk_id}.parquet"
|
|
120
116
|
)
|
|
117
|
+
df = df.astype(
|
|
118
|
+
{
|
|
119
|
+
col: str
|
|
120
|
+
for col in df.columns
|
|
121
|
+
if df[col].dtype == "object"
|
|
122
|
+
}
|
|
123
|
+
)
|
|
121
124
|
df.to_parquet(chunk_file, compression=None)
|
|
122
125
|
chunk_files.append((table_name, chunk_file))
|
|
123
126
|
return chunk_files
|
|
124
127
|
else:
|
|
125
128
|
df = pd.read_sql_table(table_name, ln_setup.settings.instance.db)
|
|
129
|
+
df = df.astype(
|
|
130
|
+
{col: str for col in df.columns if df[col].dtype == "object"}
|
|
131
|
+
)
|
|
126
132
|
df.to_parquet(directory / f"{table_name}.parquet", compression=None)
|
|
127
133
|
return (
|
|
128
134
|
f"{module_name}.{model_name}.{field_name}"
|
|
@@ -163,7 +169,7 @@ def export_db(
|
|
|
163
169
|
for module_name, model_names in modules.items():
|
|
164
170
|
schema_module = import_module(module_name)
|
|
165
171
|
for model_name in model_names:
|
|
166
|
-
registry = getattr(schema_module, model_name)
|
|
172
|
+
registry = getattr(schema_module.models, model_name)
|
|
167
173
|
tasks.append((module_name, model_name, None))
|
|
168
174
|
for field in registry._meta.many_to_many:
|
|
169
175
|
tasks.append((module_name, model_name, field.name))
|
|
@@ -172,7 +178,9 @@ def export_db(
|
|
|
172
178
|
|
|
173
179
|
with Progress() as progress:
|
|
174
180
|
task_id = progress.add_task("Exporting", total=len(tasks))
|
|
175
|
-
|
|
181
|
+
|
|
182
|
+
# This must be a ThreadPoolExecutor and not a ProcessPoolExecutor to inherit JWTs
|
|
183
|
+
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
176
184
|
futures = {
|
|
177
185
|
executor.submit(_export_full_table, task, directory, chunk_size): task
|
|
178
186
|
for task in tasks
|
|
@@ -222,7 +230,6 @@ def _import_registry(
|
|
|
222
230
|
parquet_file = directory / f"{table_name}.parquet"
|
|
223
231
|
|
|
224
232
|
if not parquet_file.exists():
|
|
225
|
-
print(f"Skipped {table_name} (file not found)")
|
|
226
233
|
return
|
|
227
234
|
|
|
228
235
|
df = pd.read_parquet(parquet_file)
|
|
@@ -237,9 +244,47 @@ def _import_registry(
|
|
|
237
244
|
if mask.any():
|
|
238
245
|
df.loc[mask, col] = df.loc[mask, col].map(_serialize_value)
|
|
239
246
|
|
|
247
|
+
for field in registry._meta.fields:
|
|
248
|
+
# Convert PostgreSQL boolean string literals ('t'/'f') to Python booleans for SQLite compatibility
|
|
249
|
+
if field.get_internal_type() == "BooleanField" and field.column in df.columns:
|
|
250
|
+
df[field.column] = df[field.column].map(
|
|
251
|
+
{"t": True, "f": False, True: True, False: False, None: None}
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
# PostgreSQL CSV export writes NULL as empty string; convert back to None for nullable fields
|
|
255
|
+
if field.null and field.column in df.columns:
|
|
256
|
+
df[field.column] = df[field.column].replace("", None)
|
|
257
|
+
|
|
258
|
+
# Convert numeric fields from strings to proper types for SQLite
|
|
259
|
+
if (
|
|
260
|
+
field.get_internal_type()
|
|
261
|
+
in (
|
|
262
|
+
"IntegerField",
|
|
263
|
+
"BigIntegerField",
|
|
264
|
+
"PositiveIntegerField",
|
|
265
|
+
"FloatField",
|
|
266
|
+
"DecimalField",
|
|
267
|
+
)
|
|
268
|
+
and field.column in df.columns
|
|
269
|
+
):
|
|
270
|
+
df[field.column] = pd.to_numeric(df[field.column], errors="coerce")
|
|
271
|
+
|
|
272
|
+
if if_exists == "append":
|
|
273
|
+
# Fill NULL values in NOT NULL columns to handle schema mismatches between postgres source and SQLite target
|
|
274
|
+
# This allows importing data where fields were nullable
|
|
275
|
+
for field in registry._meta.fields:
|
|
276
|
+
if field.column in df.columns and not field.null:
|
|
277
|
+
df[field.column] = df[field.column].fillna("").infer_objects(copy=False)
|
|
278
|
+
|
|
240
279
|
if df.empty:
|
|
241
280
|
return
|
|
242
281
|
|
|
282
|
+
if if_exists == "append":
|
|
283
|
+
# Clear existing data before import
|
|
284
|
+
# When appending we would run into duplicate errors because of existing values like branches etc
|
|
285
|
+
with connection.cursor() as cursor:
|
|
286
|
+
cursor.execute(f'DELETE FROM "{table_name}"')
|
|
287
|
+
|
|
243
288
|
if connection.vendor == "postgresql":
|
|
244
289
|
columns = df.columns.tolist()
|
|
245
290
|
column_names = ", ".join(f'"{col}"' for col in columns)
|
|
@@ -265,6 +310,7 @@ def _import_registry(
|
|
|
265
310
|
max_vars = 900 # SQLite has a limit of 999 variables per statement
|
|
266
311
|
chunksize = max(1, max_vars // num_cols)
|
|
267
312
|
|
|
313
|
+
# Always use append mode since we set up the tables from a fresh instance
|
|
268
314
|
df.to_sql(
|
|
269
315
|
table_name,
|
|
270
316
|
connection.connection,
|
|
@@ -276,7 +322,7 @@ def _import_registry(
|
|
|
276
322
|
|
|
277
323
|
|
|
278
324
|
def import_db(
|
|
279
|
-
module_names:
|
|
325
|
+
module_names: Iterable[str] | None = None,
|
|
280
326
|
*,
|
|
281
327
|
input_dir: str | Path = "./lamindb_export/",
|
|
282
328
|
if_exists: Literal["fail", "replace", "append"] = "replace",
|
|
@@ -290,6 +336,9 @@ def import_db(
|
|
|
290
336
|
input_dir: Directory containing parquet files to import.
|
|
291
337
|
module_names: Module names to import (e.g., ["lamindb", "bionty", "wetlab"]).
|
|
292
338
|
if_exists: How to behave if table exists: 'fail', 'replace', or 'append'.
|
|
339
|
+
If set to 'replace', existing data is deleted and new data is imported. PKs and indices are not guaranteed to be preserved which can lead to write errors.
|
|
340
|
+
If set to 'append', new data is added to existing data without clearing the table. PKs and indices are preserved but database size will greatly increase.
|
|
341
|
+
If set to 'fail', raises an error if the table contains any data.
|
|
293
342
|
"""
|
|
294
343
|
from django.db import connection
|
|
295
344
|
|
|
@@ -338,7 +387,7 @@ def import_db(
|
|
|
338
387
|
progress.update(
|
|
339
388
|
task, description=f"[cyan]{module_name}.{model_name}"
|
|
340
389
|
)
|
|
341
|
-
registry = getattr(schema_module, model_name)
|
|
390
|
+
registry = getattr(schema_module.models, model_name)
|
|
342
391
|
_import_registry(registry, directory, if_exists=if_exists)
|
|
343
392
|
for field in registry._meta.many_to_many:
|
|
344
393
|
link_orm = getattr(registry, field.name).through
|
|
@@ -352,3 +401,5 @@ def import_db(
|
|
|
352
401
|
cursor.execute("PRAGMA synchronous = FULL")
|
|
353
402
|
cursor.execute("PRAGMA journal_mode = DELETE")
|
|
354
403
|
cursor.execute("PRAGMA foreign_keys = ON")
|
|
404
|
+
# Reclaim space from DELETEs
|
|
405
|
+
cursor.execute("VACUUM")
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: lamindb_setup
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.16.0
|
|
4
4
|
Summary: Setup & configure LaminDB.
|
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
|
6
6
|
Requires-Python: >=3.10
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
|
-
License-File: LICENSE
|
|
9
8
|
Requires-Dist: lamin_utils>=0.3.3
|
|
10
9
|
Requires-Dist: django>=5.2,<5.3
|
|
11
10
|
Requires-Dist: dj_database_url>=1.3.0,<3.0.0
|
|
11
|
+
Requires-Dist: django-pgtrigger
|
|
12
12
|
Requires-Dist: pydantic-settings
|
|
13
13
|
Requires-Dist: platformdirs<5.0.0
|
|
14
14
|
Requires-Dist: httpx_retries<1.0.0
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
lamindb_setup/__init__.py,sha256=
|
|
1
|
+
lamindb_setup/__init__.py,sha256=zAgDC8o3cyH4eXcM60mbulFPECs5XZbqFRxLfGpgpc8,3215
|
|
2
2
|
lamindb_setup/_cache.py,sha256=pGvDNVHGx4HWr_6w5ajqEJOdysmaGc6F221qFnXkT-k,2747
|
|
3
3
|
lamindb_setup/_check.py,sha256=28PcG8Kp6OpjSLSi1r2boL2Ryeh6xkaCL87HFbjs6GA,129
|
|
4
4
|
lamindb_setup/_check_setup.py,sha256=ToKMxsUq8dQBQh8baOrNVlSb1iC8h4zTg5dV8wMu0W4,6760
|
|
5
|
-
lamindb_setup/_connect_instance.py,sha256=
|
|
5
|
+
lamindb_setup/_connect_instance.py,sha256=3dsaZ7LGzJYtOWDbi2RkiVJIJqdxy43suNjQ-6C96_U,17788
|
|
6
6
|
lamindb_setup/_delete.py,sha256=KS3r-xGFuDmAbzPUy-9JR-YnPShYdaHjDRQrAmXQ0qM,5863
|
|
7
7
|
lamindb_setup/_disconnect.py,sha256=FT8EpCm5XXDdhDH7QtAnkO3KPatq2HqT9VXGNjgJDbk,1232
|
|
8
8
|
lamindb_setup/_django.py,sha256=uIQflpkp8l3axyPaKURlk3kacgpElVP5KOKmFxYSMGk,1454
|
|
@@ -11,40 +11,40 @@ lamindb_setup/_init_instance.py,sha256=zNXmZPUHYda1CfLGtsvo4gNhHprK9QVPfffUIfBlT
|
|
|
11
11
|
lamindb_setup/_migrate.py,sha256=SN8uphuQX-8XShH5odLyzV8-eyXATDxB5hWoxwxmgBU,11264
|
|
12
12
|
lamindb_setup/_register_instance.py,sha256=RdUZxZWHLdbvdNZWpF8e0UWROb_T0cStWbzc5yUw34I,1047
|
|
13
13
|
lamindb_setup/_schema.py,sha256=b3uzhhWpV5mQtDwhMINc2MabGCnGLESy51ito3yl6Wc,679
|
|
14
|
-
lamindb_setup/_schema_metadata.py,sha256=
|
|
15
|
-
lamindb_setup/_set_managed_storage.py,sha256=
|
|
16
|
-
lamindb_setup/_setup_user.py,sha256=
|
|
14
|
+
lamindb_setup/_schema_metadata.py,sha256=Whs-e4ZMnA1niZ2l5Eu8il-33IxI4Hr5ylGEgPxx8wk,15628
|
|
15
|
+
lamindb_setup/_set_managed_storage.py,sha256=xQe5DXCRiQ5VseAjVC2Bki0wB0n0tSTchvVKSx9I6eo,3094
|
|
16
|
+
lamindb_setup/_setup_user.py,sha256=cjQ-Md-FkP04PnBxocbHW6wCsZsNtD2T2NB52vAOnHI,6730
|
|
17
17
|
lamindb_setup/_silence_loggers.py,sha256=AKF_YcHvX32eGXdsYK8MJlxEaZ-Uo2f6QDRzjKFCtws,1568
|
|
18
18
|
lamindb_setup/errors.py,sha256=lccF3X3M2mcbHVG_0HxfuJRFFpUE-42paccIxFOfefQ,1958
|
|
19
|
-
lamindb_setup/io.py,sha256=
|
|
19
|
+
lamindb_setup/io.py,sha256=9s4Itt4rrHzsUATY79r4nhGp9zVAm-9uBhiQgg60l6U,16708
|
|
20
20
|
lamindb_setup/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
21
|
lamindb_setup/types.py,sha256=fuQxZJnrGYe7a_Ju9n1RqO-HhkOAr1l1xjpAg9dmBu8,605
|
|
22
|
-
lamindb_setup/core/__init__.py,sha256=
|
|
22
|
+
lamindb_setup/core/__init__.py,sha256=adZtacDwG2C0tgx-ypp9yOAqw9qaR-IRWkgLurKpXVE,668
|
|
23
23
|
lamindb_setup/core/_aws_options.py,sha256=9kQ5BB-cuJQrlJRGNqMRe1m48dP67xMbefOJP2c9OQw,9674
|
|
24
24
|
lamindb_setup/core/_aws_storage.py,sha256=QEtV-riQrwfivcwqHnXBbkJ-9YyNEXL4fLoCmOHZ1BI,2003
|
|
25
|
-
lamindb_setup/core/_clone.py,sha256=
|
|
25
|
+
lamindb_setup/core/_clone.py,sha256=oLTMItGxRQB1THSDP-RG2eH1qPTVcuZ7_-eElttJ518,6451
|
|
26
26
|
lamindb_setup/core/_deprecated.py,sha256=M3vpM4fZPOncxY2qsXQAPeaEph28xWdv7tYaueaUyAA,2554
|
|
27
27
|
lamindb_setup/core/_docs.py,sha256=3k-YY-oVaJd_9UIY-LfBg_u8raKOCNfkZQPA73KsUhs,276
|
|
28
|
-
lamindb_setup/core/_hub_client.py,sha256=
|
|
28
|
+
lamindb_setup/core/_hub_client.py,sha256=vem145S5ppRPcWob7iclGhos8k-BfwJi9AI-l5PteDs,10481
|
|
29
29
|
lamindb_setup/core/_hub_core.py,sha256=GAQK5XkHROIuqA-H8sOQZVlxvN4QIH_cmHY0TENnq2U,29090
|
|
30
30
|
lamindb_setup/core/_hub_crud.py,sha256=j6516H82kLjFUNPqFGUINbDw9YbofMgjxadGzYb0OS4,6362
|
|
31
31
|
lamindb_setup/core/_hub_utils.py,sha256=6dyDGyzYFgVfR_lE3VN3CP1jGp98gxPtr-T91PAP05U,2687
|
|
32
|
-
lamindb_setup/core/_private_django_api.py,sha256=
|
|
33
|
-
lamindb_setup/core/_settings.py,sha256=
|
|
34
|
-
lamindb_setup/core/_settings_instance.py,sha256=
|
|
35
|
-
lamindb_setup/core/_settings_load.py,sha256=
|
|
36
|
-
lamindb_setup/core/_settings_save.py,sha256=
|
|
37
|
-
lamindb_setup/core/_settings_storage.py,sha256=
|
|
38
|
-
lamindb_setup/core/_settings_store.py,sha256=
|
|
32
|
+
lamindb_setup/core/_private_django_api.py,sha256=Z9uGL4CK0OX58rc8R_qarg9rIBp1DgjsjfP9Vj2vJHI,2629
|
|
33
|
+
lamindb_setup/core/_settings.py,sha256=QbTrSkkdx0u685NJ4neNtWzhdHoaGMKcIvrfFnctTQ4,15450
|
|
34
|
+
lamindb_setup/core/_settings_instance.py,sha256=eDkueLK5JZOGFhZRbGa-OffS9iBFlxMp47vF_MfmCYI,24301
|
|
35
|
+
lamindb_setup/core/_settings_load.py,sha256=NQDOln8e3qyGphk8ucU7mm3HVkCv4QV4rDZro3TIwfo,5183
|
|
36
|
+
lamindb_setup/core/_settings_save.py,sha256=96mWdYLyfvbnG_ok_vK4x7jm-rtqcWCD1OHEt2QSAms,3328
|
|
37
|
+
lamindb_setup/core/_settings_storage.py,sha256=22EBagIr5qOZr9pqVkJsTcQtgE14en-Wh0y9rgF4FEQ,15677
|
|
38
|
+
lamindb_setup/core/_settings_store.py,sha256=auZssUBb6qE5oSqdGiHhqI2B46qSpegX89VwObPQksk,2601
|
|
39
39
|
lamindb_setup/core/_settings_user.py,sha256=gFfyMf-738onbh1Mf4wsmLlenQJPtjQfpUgKnOlqc2o,1453
|
|
40
40
|
lamindb_setup/core/_setup_bionty_sources.py,sha256=ox3X-SHiHa2lNPSWjwZhINypbLacX6kGwH6hVVrSFZc,1505
|
|
41
41
|
lamindb_setup/core/cloud_sqlite_locker.py,sha256=H_CTUCjURFXwD1cCtV_Jn0_60iztZTkaesLLXIBgIxc,7204
|
|
42
|
-
lamindb_setup/core/django.py,sha256=
|
|
42
|
+
lamindb_setup/core/django.py,sha256=aBdIN07ZCD8PGT04sjF1rruwlppx0cEobBnjk2Due70,12525
|
|
43
43
|
lamindb_setup/core/exceptions.py,sha256=qjMzqy_uzPA7mCOdnoWnS_fdA6OWbdZGftz-YYplrY0,84
|
|
44
44
|
lamindb_setup/core/hashing.py,sha256=Y8Uc5uSGTfU6L2R_gb5w8DdHhGRog7RnkK-e9FEMjPY,3680
|
|
45
45
|
lamindb_setup/core/types.py,sha256=T7NwspfRHgIIpYsXDcApks8jkOlGeGRW-YbVLB7jNIo,67
|
|
46
|
-
lamindb_setup/core/upath.py,sha256=
|
|
47
|
-
lamindb_setup-1.
|
|
48
|
-
lamindb_setup-1.
|
|
49
|
-
lamindb_setup-1.
|
|
50
|
-
lamindb_setup-1.
|
|
46
|
+
lamindb_setup/core/upath.py,sha256=_xs6CgqQezOe6h8oQURjpOl1WT_1ctROzH3yzesVceE,36188
|
|
47
|
+
lamindb_setup-1.16.0.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
|
|
48
|
+
lamindb_setup-1.16.0.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
|
|
49
|
+
lamindb_setup-1.16.0.dist-info/METADATA,sha256=mkCWug_dpZRqU6Y0ajukECuIC8kezhh4bTYAIuDLAao,1830
|
|
50
|
+
lamindb_setup-1.16.0.dist-info/RECORD,,
|
|
File without changes
|