lamindb_setup 0.81.4__py3-none-any.whl → 1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +31 -2
- lamindb_setup/_check_setup.py +15 -16
- lamindb_setup/_connect_instance.py +35 -33
- lamindb_setup/_delete.py +2 -2
- lamindb_setup/_django.py +6 -6
- lamindb_setup/_exportdb.py +1 -1
- lamindb_setup/_init_instance.py +13 -39
- lamindb_setup/_migrate.py +5 -3
- lamindb_setup/_schema_metadata.py +10 -6
- lamindb_setup/core/_aws_credentials.py +10 -2
- lamindb_setup/core/_hub_core.py +2 -2
- lamindb_setup/core/_hub_crud.py +8 -11
- lamindb_setup/core/_settings.py +2 -2
- lamindb_setup/core/_settings_instance.py +17 -3
- lamindb_setup/core/_settings_storage.py +1 -1
- lamindb_setup/core/_settings_user.py +1 -1
- lamindb_setup/core/django.py +1 -3
- lamindb_setup/core/hashing.py +16 -5
- lamindb_setup/core/upath.py +79 -45
- {lamindb_setup-0.81.4.dist-info → lamindb_setup-1.0a1.dist-info}/METADATA +4 -5
- {lamindb_setup-0.81.4.dist-info → lamindb_setup-1.0a1.dist-info}/RECORD +23 -23
- {lamindb_setup-0.81.4.dist-info → lamindb_setup-1.0a1.dist-info}/WHEEL +1 -1
- {lamindb_setup-0.81.4.dist-info → lamindb_setup-1.0a1.dist-info}/LICENSE +0 -0
lamindb_setup/__init__.py
CHANGED
|
@@ -33,7 +33,7 @@ Modules & settings:
|
|
|
33
33
|
|
|
34
34
|
"""
|
|
35
35
|
|
|
36
|
-
__version__ = "
|
|
36
|
+
__version__ = "1.0a1" # denote a release candidate for 0.1.0 with 0.1rc1
|
|
37
37
|
|
|
38
38
|
import os as _os
|
|
39
39
|
import sys as _sys
|
|
@@ -51,7 +51,36 @@ from ._register_instance import register
|
|
|
51
51
|
from ._setup_user import login, logout
|
|
52
52
|
from .core._settings import settings
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
|
|
55
|
+
def _is_CI_environment() -> bool:
|
|
56
|
+
ci_env_vars = [
|
|
57
|
+
"LAMIN_TESTING", # Set by our nox configurations
|
|
58
|
+
"CI", # Commonly set by many CI systems
|
|
59
|
+
"TRAVIS", # Travis CI
|
|
60
|
+
"GITHUB_ACTIONS", # GitHub Actions
|
|
61
|
+
"GITLAB_CI", # GitLab CI/CD
|
|
62
|
+
"CIRCLECI", # CircleCI
|
|
63
|
+
"JENKINS_URL", # Jenkins
|
|
64
|
+
"TEAMCITY_VERSION", # TeamCity
|
|
65
|
+
"BUILDKITE", # Buildkite
|
|
66
|
+
"BITBUCKET_BUILD_NUMBER", # Bitbucket Pipelines
|
|
67
|
+
"APPVEYOR", # AppVeyor
|
|
68
|
+
"AZURE_HTTP_USER_AGENT", # Azure Pipelines
|
|
69
|
+
"BUDDY", # Buddy
|
|
70
|
+
"DRONE", # Drone CI
|
|
71
|
+
"HUDSON_URL", # Hudson
|
|
72
|
+
"CF_BUILD_ID", # Codefresh
|
|
73
|
+
"WERCKER", # Wercker
|
|
74
|
+
"NOW_BUILDER", # ZEIT Now
|
|
75
|
+
"TASKCLUSTER_ROOT_URL", # TaskCluster
|
|
76
|
+
"SEMAPHORE", # Semaphore CI
|
|
77
|
+
"BUILD_ID", # Generic build environments
|
|
78
|
+
]
|
|
79
|
+
return any(env_var in _os.environ for env_var in ci_env_vars)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
_TESTING = _is_CI_environment()
|
|
83
|
+
|
|
55
84
|
|
|
56
85
|
# hide the supabase error in a thread on windows
|
|
57
86
|
if _os.name == "nt":
|
lamindb_setup/_check_setup.py
CHANGED
|
@@ -27,6 +27,7 @@ If you used the CLI to set up lamindb in a notebook, restart the Python session.
|
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
CURRENT_ISETTINGS: InstanceSettings | None = None
|
|
30
|
+
IS_LOADING: bool = False
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
def _get_current_instance_settings() -> InstanceSettings | None:
|
|
@@ -54,16 +55,11 @@ def _get_current_instance_settings() -> InstanceSettings | None:
|
|
|
54
55
|
|
|
55
56
|
# we make this a private function because in all the places it's used,
|
|
56
57
|
# users should not see it
|
|
57
|
-
def _check_instance_setup(
|
|
58
|
-
from_lamindb: bool = False, from_module: str | None = None
|
|
59
|
-
) -> bool:
|
|
60
|
-
reload_module = from_lamindb or from_module is not None
|
|
61
|
-
from ._init_instance import get_schema_module_name, reload_schema_modules
|
|
62
|
-
|
|
58
|
+
def _check_instance_setup(from_module: str | None = None) -> bool:
|
|
63
59
|
if django.IS_SETUP:
|
|
64
60
|
# reload logic here because module might not yet have been imported
|
|
65
61
|
# upon first setup
|
|
66
|
-
if from_module is not None:
|
|
62
|
+
if from_module is not None and from_module != "lamindb":
|
|
67
63
|
il.reload(il.import_module(from_module))
|
|
68
64
|
return True
|
|
69
65
|
silence_loggers()
|
|
@@ -75,18 +71,21 @@ def _check_instance_setup(
|
|
|
75
71
|
return True
|
|
76
72
|
isettings = _get_current_instance_settings()
|
|
77
73
|
if isettings is not None:
|
|
78
|
-
if
|
|
79
|
-
|
|
74
|
+
if (
|
|
75
|
+
from_module is not None
|
|
76
|
+
and settings.auto_connect
|
|
77
|
+
and not django.IS_SETUP
|
|
78
|
+
and not IS_LOADING
|
|
79
|
+
):
|
|
80
|
+
if not from_module == "lamindb":
|
|
81
|
+
import lamindb
|
|
82
|
+
|
|
83
|
+
il.reload(il.import_module(from_module))
|
|
84
|
+
else:
|
|
80
85
|
django.setup_django(isettings)
|
|
81
|
-
if from_module is not None:
|
|
82
|
-
# this only reloads `from_module`
|
|
83
|
-
il.reload(il.import_module(from_module))
|
|
84
|
-
else:
|
|
85
|
-
# this bulk reloads all schema modules
|
|
86
|
-
reload_schema_modules(isettings)
|
|
87
86
|
logger.important(f"connected lamindb: {isettings.slug}")
|
|
88
87
|
return django.IS_SETUP
|
|
89
88
|
else:
|
|
90
|
-
if
|
|
89
|
+
if from_module is not None and settings.auto_connect:
|
|
91
90
|
logger.warning(InstanceNotSetupError.default_message)
|
|
92
91
|
return False
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import importlib
|
|
3
4
|
import os
|
|
4
|
-
import sys
|
|
5
5
|
from typing import TYPE_CHECKING
|
|
6
6
|
from uuid import UUID
|
|
7
7
|
|
|
@@ -198,6 +198,7 @@ def connect(slug: str, **kwargs) -> str | tuple | None:
|
|
|
198
198
|
"_db",
|
|
199
199
|
"_write_settings",
|
|
200
200
|
"_raise_not_found_error",
|
|
201
|
+
"_reload_lamindb",
|
|
201
202
|
"_test",
|
|
202
203
|
"_user",
|
|
203
204
|
}
|
|
@@ -210,6 +211,7 @@ def connect(slug: str, **kwargs) -> str | tuple | None:
|
|
|
210
211
|
_db: str | None = kwargs.get("_db", None)
|
|
211
212
|
_write_settings: bool = kwargs.get("_write_settings", True)
|
|
212
213
|
_raise_not_found_error: bool = kwargs.get("_raise_not_found_error", True)
|
|
214
|
+
_reload_lamindb: bool = kwargs.get("_reload_lamindb", True)
|
|
213
215
|
_test: bool = kwargs.get("_test", False)
|
|
214
216
|
|
|
215
217
|
access_token: str | None = None
|
|
@@ -257,6 +259,14 @@ def connect(slug: str, **kwargs) -> str | tuple | None:
|
|
|
257
259
|
if _test:
|
|
258
260
|
return None
|
|
259
261
|
silence_loggers()
|
|
262
|
+
# migrate away from lnschema-core
|
|
263
|
+
no_lnschema_core_file = (
|
|
264
|
+
settings_dir / f"no_lnschema_core-{isettings.slug.replace('/', '--')}"
|
|
265
|
+
)
|
|
266
|
+
if not no_lnschema_core_file.exists():
|
|
267
|
+
migrate_lnschema_core(
|
|
268
|
+
isettings, no_lnschema_core_file, write_file=_write_settings
|
|
269
|
+
)
|
|
260
270
|
check, msg = isettings._load_db()
|
|
261
271
|
if not check:
|
|
262
272
|
local_db = (
|
|
@@ -292,21 +302,14 @@ def connect(slug: str, **kwargs) -> str | tuple | None:
|
|
|
292
302
|
# except ProgrammingError:
|
|
293
303
|
# pass
|
|
294
304
|
load_from_isettings(isettings, user=_user, write_settings=_write_settings)
|
|
305
|
+
if _reload_lamindb:
|
|
306
|
+
importlib.reload(importlib.import_module("lamindb"))
|
|
295
307
|
except Exception as e:
|
|
296
308
|
if isettings is not None:
|
|
297
309
|
if _write_settings:
|
|
298
310
|
isettings._get_settings_file().unlink(missing_ok=True) # type: ignore
|
|
299
311
|
settings._instance_settings = None
|
|
300
312
|
raise e
|
|
301
|
-
# rename lnschema_bionty to bionty for sql tables
|
|
302
|
-
if "bionty" in isettings.schema:
|
|
303
|
-
no_lnschema_bionty_file = (
|
|
304
|
-
settings_dir / f"no_lnschema_bionty-{isettings.slug.replace('/', '')}"
|
|
305
|
-
)
|
|
306
|
-
if not no_lnschema_bionty_file.exists():
|
|
307
|
-
migrate_lnschema_bionty(
|
|
308
|
-
isettings, no_lnschema_bionty_file, write_file=_write_settings
|
|
309
|
-
)
|
|
310
313
|
return None
|
|
311
314
|
|
|
312
315
|
|
|
@@ -322,13 +325,10 @@ def load(slug: str) -> str | tuple | None:
|
|
|
322
325
|
return result
|
|
323
326
|
|
|
324
327
|
|
|
325
|
-
def
|
|
326
|
-
isettings: InstanceSettings,
|
|
328
|
+
def migrate_lnschema_core(
|
|
329
|
+
isettings: InstanceSettings, no_lnschema_core_file: Path, write_file: bool = True
|
|
327
330
|
):
|
|
328
|
-
"""Migrate
|
|
329
|
-
|
|
330
|
-
:param db_uri: str, database URI (e.g., 'sqlite:///path/to/db.sqlite' or 'postgresql://user:password@host:port/dbname')
|
|
331
|
-
"""
|
|
331
|
+
"""Migrate lnschema_core tables to lamindb tables."""
|
|
332
332
|
from urllib.parse import urlparse
|
|
333
333
|
|
|
334
334
|
parsed_uri = urlparse(isettings.db)
|
|
@@ -348,60 +348,62 @@ def migrate_lnschema_bionty(
|
|
|
348
348
|
cur = conn.cursor()
|
|
349
349
|
|
|
350
350
|
try:
|
|
351
|
-
# check if bionty_source table exists
|
|
352
351
|
if db_type == "sqlite":
|
|
353
352
|
cur.execute(
|
|
354
|
-
"SELECT name FROM sqlite_master WHERE type='table' AND name='
|
|
353
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='lamindb_user'"
|
|
355
354
|
)
|
|
356
355
|
migrated = cur.fetchone() is not None
|
|
357
356
|
|
|
358
357
|
# tables that need to be renamed
|
|
359
358
|
cur.execute(
|
|
360
|
-
"SELECT name FROM sqlite_master WHERE type='table' AND name LIKE '
|
|
359
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'lnschema_core_%'"
|
|
361
360
|
)
|
|
362
361
|
tables_to_rename = [
|
|
363
|
-
row[0][len("
|
|
362
|
+
row[0][len("lnschema_core_") :] for row in cur.fetchall()
|
|
364
363
|
]
|
|
365
364
|
else: # postgres
|
|
366
365
|
cur.execute(
|
|
367
|
-
"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = '
|
|
366
|
+
"SELECT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'lamindb_user')"
|
|
368
367
|
)
|
|
369
368
|
migrated = cur.fetchone()[0]
|
|
370
369
|
|
|
371
370
|
# tables that need to be renamed
|
|
372
371
|
cur.execute(
|
|
373
|
-
"SELECT table_name FROM information_schema.tables WHERE table_name LIKE '
|
|
372
|
+
"SELECT table_name FROM information_schema.tables WHERE table_name LIKE 'lnschema_core_%'"
|
|
374
373
|
)
|
|
375
374
|
tables_to_rename = [
|
|
376
|
-
row[0][len("
|
|
375
|
+
row[0][len("lnschema_core_") :] for row in cur.fetchall()
|
|
377
376
|
]
|
|
378
377
|
|
|
379
378
|
if migrated:
|
|
380
379
|
if write_file:
|
|
381
|
-
|
|
380
|
+
no_lnschema_core_file.touch(exist_ok=True)
|
|
382
381
|
else:
|
|
383
382
|
try:
|
|
384
|
-
|
|
383
|
+
response = input(
|
|
384
|
+
f"Do you want to migrate to lamindb 0.78 (integrate lnschema_core into lamindb)? (y/n) -- Will rename {tables_to_rename}"
|
|
385
|
+
)
|
|
386
|
+
if response != "y":
|
|
387
|
+
print("Aborted.")
|
|
388
|
+
quit()
|
|
385
389
|
for table in tables_to_rename:
|
|
386
390
|
if db_type == "sqlite":
|
|
387
391
|
cur.execute(
|
|
388
|
-
f"ALTER TABLE
|
|
392
|
+
f"ALTER TABLE lnschema_core_{table} RENAME TO lamindb_{table}"
|
|
389
393
|
)
|
|
390
394
|
else: # postgres
|
|
391
395
|
cur.execute(
|
|
392
|
-
f"ALTER TABLE
|
|
396
|
+
f"ALTER TABLE lnschema_core_{table} RENAME TO lamindb_{table};"
|
|
393
397
|
)
|
|
394
398
|
|
|
395
|
-
# update django_migrations table
|
|
396
399
|
cur.execute(
|
|
397
|
-
"UPDATE django_migrations SET app = '
|
|
400
|
+
"UPDATE django_migrations SET app = 'lamindb' WHERE app = 'lnschema_core'"
|
|
398
401
|
)
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
"Please uninstall lnschema-bionty via `pip uninstall lnschema-bionty`!"
|
|
402
|
+
print(
|
|
403
|
+
"Renaming tables finished.\nNow, *please* call: lamin migrate deploy"
|
|
402
404
|
)
|
|
403
405
|
if write_file:
|
|
404
|
-
|
|
406
|
+
no_lnschema_core_file.touch(exist_ok=True)
|
|
405
407
|
except Exception:
|
|
406
408
|
# read-only users can't rename tables
|
|
407
409
|
pass
|
lamindb_setup/_delete.py
CHANGED
|
@@ -99,7 +99,7 @@ def delete(slug: str, force: bool = False, require_empty: bool = True) -> int |
|
|
|
99
99
|
)
|
|
100
100
|
require_empty = True
|
|
101
101
|
# first the default storage
|
|
102
|
-
|
|
102
|
+
n_files = check_storage_is_empty(
|
|
103
103
|
isettings.storage.root,
|
|
104
104
|
raise_error=require_empty,
|
|
105
105
|
account_for_sqlite_file=isettings.dialect == "sqlite",
|
|
@@ -132,7 +132,7 @@ def delete(slug: str, force: bool = False, require_empty: bool = True) -> int |
|
|
|
132
132
|
delete_instance_on_hub(isettings._id, require_empty=False)
|
|
133
133
|
delete_by_isettings(isettings)
|
|
134
134
|
# if .lndb file was delete, then we might count -1
|
|
135
|
-
if
|
|
135
|
+
if n_files <= 0 and isettings.storage.type == "local":
|
|
136
136
|
# dir is only empty after sqlite file was delete via delete_by_isettings
|
|
137
137
|
if (isettings.storage.root / ".lamindb").exists():
|
|
138
138
|
(isettings.storage.root / ".lamindb").rmdir()
|
lamindb_setup/_django.py
CHANGED
|
@@ -5,24 +5,24 @@ from .core.django import setup_django
|
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
def django(command: str, package_name: str | None = None, **kwargs):
|
|
8
|
-
r"""
|
|
8
|
+
r"""Call Django commands.
|
|
9
9
|
|
|
10
10
|
Examples:
|
|
11
11
|
|
|
12
12
|
Reset auto-incrementing primary integer ids after a database import:
|
|
13
13
|
|
|
14
14
|
>>> import lamindb as ln
|
|
15
|
-
>>> ln.setup.django("sqlsequencereset", "
|
|
15
|
+
>>> ln.setup.django("sqlsequencereset", "lamindb")
|
|
16
16
|
BEGIN;
|
|
17
|
-
SELECT setval(pg_get_serial_sequence('"
|
|
18
|
-
SELECT setval(pg_get_serial_sequence('"
|
|
17
|
+
SELECT setval(pg_get_serial_sequence('"lamindb_user"','id'), coalesce(max("id"), 1), max("id") IS NOT null) FROM "lamindb_user"; # noqa
|
|
18
|
+
SELECT setval(pg_get_serial_sequence('"lamindb_storage"','id'), coalesce(max("id"), 1), max("id") IS NOT null) FROM "lamindb_storage"; # noqa
|
|
19
19
|
COMMIT;
|
|
20
20
|
|
|
21
21
|
You can then run the SQL output that you'll see like so:
|
|
22
22
|
|
|
23
23
|
>>> sql = \"\"\"BEGIN;
|
|
24
|
-
SELECT setval(pg_get_serial_sequence('"
|
|
25
|
-
SELECT setval(pg_get_serial_sequence('"
|
|
24
|
+
SELECT setval(pg_get_serial_sequence('"lamindb_user"','id'), coalesce(max("id"), 1), max("id") IS NOT null) FROM "lamindb_user"; # noqa
|
|
25
|
+
SELECT setval(pg_get_serial_sequence('"lamindb_storage"','id'), coalesce(max("id"), 1), max("id") IS NOT null) FROM "lamindb_storage"; # noqa
|
|
26
26
|
COMMIT;\"\"\"
|
|
27
27
|
>>> from django.db import connection
|
|
28
28
|
>>> with connection.cursor() as cursor:
|
lamindb_setup/_exportdb.py
CHANGED
lamindb_setup/_init_instance.py
CHANGED
|
@@ -2,7 +2,6 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import importlib
|
|
4
4
|
import os
|
|
5
|
-
import sys
|
|
6
5
|
import uuid
|
|
7
6
|
from typing import TYPE_CHECKING, Literal
|
|
8
7
|
from uuid import UUID
|
|
@@ -29,6 +28,8 @@ if TYPE_CHECKING:
|
|
|
29
28
|
def get_schema_module_name(schema_name, raise_import_error: bool = True) -> str | None:
|
|
30
29
|
import importlib.util
|
|
31
30
|
|
|
31
|
+
if schema_name == "core":
|
|
32
|
+
return "lamindb"
|
|
32
33
|
name_attempts = [f"lnschema_{schema_name.replace('-', '_')}", schema_name]
|
|
33
34
|
for name in name_attempts:
|
|
34
35
|
module_spec = importlib.util.find_spec(name)
|
|
@@ -42,8 +43,8 @@ def get_schema_module_name(schema_name, raise_import_error: bool = True) -> str
|
|
|
42
43
|
|
|
43
44
|
|
|
44
45
|
def register_storage_in_instance(ssettings: StorageSettings):
|
|
45
|
-
from
|
|
46
|
-
from
|
|
46
|
+
from lamindb.base.users import current_user_id
|
|
47
|
+
from lamindb.models import Storage
|
|
47
48
|
|
|
48
49
|
from .core.hashing import hash_and_encode_as_b62
|
|
49
50
|
|
|
@@ -71,7 +72,7 @@ def register_storage_in_instance(ssettings: StorageSettings):
|
|
|
71
72
|
|
|
72
73
|
|
|
73
74
|
def register_user(usettings):
|
|
74
|
-
from
|
|
75
|
+
from lamindb.models import User
|
|
75
76
|
|
|
76
77
|
try:
|
|
77
78
|
# need to have try except because of integer primary key migration
|
|
@@ -99,35 +100,6 @@ def register_user_and_storage_in_instance(isettings: InstanceSettings, usettings
|
|
|
99
100
|
logger.warning(f"instance seems not set up ({error})")
|
|
100
101
|
|
|
101
102
|
|
|
102
|
-
def reload_schema_modules(isettings: InstanceSettings, include_core: bool = True):
|
|
103
|
-
schema_names = ["core"] if include_core else []
|
|
104
|
-
# schema_names += list(isettings.schema)
|
|
105
|
-
schema_module_names = [get_schema_module_name(n) for n in schema_names]
|
|
106
|
-
|
|
107
|
-
for schema_module_name in schema_module_names:
|
|
108
|
-
if schema_module_name in sys.modules:
|
|
109
|
-
schema_module = importlib.import_module(schema_module_name)
|
|
110
|
-
importlib.reload(schema_module)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def reload_lamindb_itself(isettings) -> bool:
|
|
114
|
-
reloaded = False
|
|
115
|
-
if "lamindb" in sys.modules:
|
|
116
|
-
import lamindb
|
|
117
|
-
|
|
118
|
-
importlib.reload(lamindb)
|
|
119
|
-
reloaded = True
|
|
120
|
-
return reloaded
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def reload_lamindb(isettings: InstanceSettings):
|
|
124
|
-
log_message = settings.auto_connect
|
|
125
|
-
if not reload_lamindb_itself(isettings):
|
|
126
|
-
log_message = True
|
|
127
|
-
if log_message:
|
|
128
|
-
logger.important(f"connected lamindb: {isettings.slug}")
|
|
129
|
-
|
|
130
|
-
|
|
131
103
|
ERROR_SQLITE_CACHE = """
|
|
132
104
|
Your cached local SQLite file exists, while your cloud SQLite file ({}) doesn't.
|
|
133
105
|
Either delete your cache ({}) or add it back to the cloud (if delete was accidental).
|
|
@@ -329,6 +301,8 @@ def init(
|
|
|
329
301
|
update_schema_in_hub(access_token=access_token)
|
|
330
302
|
if _write_settings:
|
|
331
303
|
settings.auto_connect = True
|
|
304
|
+
importlib.reload(importlib.import_module("lamindb"))
|
|
305
|
+
logger.important(f"initialized lamindb: {isettings.slug}")
|
|
332
306
|
except Exception as e:
|
|
333
307
|
from ._delete import delete_by_isettings
|
|
334
308
|
from .core._hub_core import delete_instance_record, delete_storage_record
|
|
@@ -338,16 +312,17 @@ def init(
|
|
|
338
312
|
delete_by_isettings(isettings)
|
|
339
313
|
else:
|
|
340
314
|
settings._instance_settings = None
|
|
341
|
-
if (
|
|
342
|
-
user_handle != "anonymous" or access_token is not None
|
|
343
|
-
) and isettings.is_on_hub:
|
|
344
|
-
delete_instance_record(isettings._id, access_token=access_token)
|
|
345
315
|
if (
|
|
346
316
|
ssettings is not None
|
|
347
317
|
and (user_handle != "anonymous" or access_token is not None)
|
|
348
318
|
and ssettings.is_on_hub
|
|
349
319
|
):
|
|
350
320
|
delete_storage_record(ssettings._uuid, access_token=access_token) # type: ignore
|
|
321
|
+
if isettings is not None:
|
|
322
|
+
if (
|
|
323
|
+
user_handle != "anonymous" or access_token is not None
|
|
324
|
+
) and isettings.is_on_hub:
|
|
325
|
+
delete_instance_record(isettings._id, access_token=access_token)
|
|
351
326
|
raise e
|
|
352
327
|
return None
|
|
353
328
|
|
|
@@ -378,7 +353,6 @@ def load_from_isettings(
|
|
|
378
353
|
if not isettings._get_settings_file().exists():
|
|
379
354
|
register_user(user)
|
|
380
355
|
isettings._persist(write_to_disk=write_settings)
|
|
381
|
-
reload_lamindb(isettings)
|
|
382
356
|
|
|
383
357
|
|
|
384
358
|
def validate_sqlite_state(isettings: InstanceSettings) -> None:
|
|
@@ -414,7 +388,7 @@ def infer_instance_name(
|
|
|
414
388
|
return str(db).split("/")[-1]
|
|
415
389
|
if storage == "create-s3":
|
|
416
390
|
raise ValueError("pass name to init if storage = 'create-s3'")
|
|
417
|
-
storage_path = UPath(storage)
|
|
391
|
+
storage_path = UPath(storage).resolve()
|
|
418
392
|
# not sure if name is ever ""
|
|
419
393
|
if storage_path.name != "":
|
|
420
394
|
name = storage_path.name
|
lamindb_setup/_migrate.py
CHANGED
|
@@ -5,7 +5,7 @@ from django.db.migrations.loader import MigrationLoader
|
|
|
5
5
|
from lamin_utils import logger
|
|
6
6
|
from packaging import version
|
|
7
7
|
|
|
8
|
-
from .
|
|
8
|
+
from . import _check_setup
|
|
9
9
|
from .core._settings import settings
|
|
10
10
|
from .core.django import setup_django
|
|
11
11
|
|
|
@@ -64,16 +64,18 @@ class migrate:
|
|
|
64
64
|
@classmethod
|
|
65
65
|
def create(cls) -> None:
|
|
66
66
|
"""Create a migration."""
|
|
67
|
-
if _check_instance_setup():
|
|
67
|
+
if _check_setup._check_instance_setup():
|
|
68
68
|
raise RuntimeError("Restart Python session to create migration or use CLI!")
|
|
69
|
+
_check_setup.IS_LOADING = True
|
|
69
70
|
setup_django(settings.instance, create_migrations=True)
|
|
71
|
+
_check_setup.IS_LOADING = False
|
|
70
72
|
|
|
71
73
|
@classmethod
|
|
72
74
|
def deploy(cls) -> None:
|
|
73
75
|
"""Deploy a migration."""
|
|
74
76
|
from ._schema_metadata import update_schema_in_hub
|
|
75
77
|
|
|
76
|
-
if _check_instance_setup():
|
|
78
|
+
if _check_setup._check_instance_setup():
|
|
77
79
|
raise RuntimeError("Restart Python session to migrate or use CLI!")
|
|
78
80
|
from lamindb_setup.core._hub_client import call_with_fallback_auth
|
|
79
81
|
from lamindb_setup.core._hub_crud import (
|
|
@@ -232,7 +232,7 @@ class _ModelHandler:
|
|
|
232
232
|
return related_fields
|
|
233
233
|
|
|
234
234
|
def _get_field_metadata(self, model, field: Field):
|
|
235
|
-
from
|
|
235
|
+
from lamindb.models import LinkORM
|
|
236
236
|
|
|
237
237
|
internal_type = field.get_internal_type()
|
|
238
238
|
model_name = field.model._meta.model_name
|
|
@@ -273,14 +273,16 @@ class _ModelHandler:
|
|
|
273
273
|
through = self._get_through(field)
|
|
274
274
|
|
|
275
275
|
return FieldMetadata(
|
|
276
|
-
schema_name=schema_name,
|
|
276
|
+
schema_name=schema_name if schema_name != "lamindb" else "core",
|
|
277
277
|
model_name=model_name,
|
|
278
278
|
field_name=field_name,
|
|
279
279
|
type=internal_type,
|
|
280
280
|
is_link_table=issubclass(field.model, LinkORM),
|
|
281
281
|
column_name=column,
|
|
282
282
|
relation_type=relation_type,
|
|
283
|
-
related_schema_name=related_schema_name
|
|
283
|
+
related_schema_name=related_schema_name
|
|
284
|
+
if related_schema_name != "lamindb"
|
|
285
|
+
else "core",
|
|
284
286
|
related_model_name=related_model_name,
|
|
285
287
|
related_field_name=related_field_name,
|
|
286
288
|
through=through,
|
|
@@ -288,7 +290,7 @@ class _ModelHandler:
|
|
|
288
290
|
|
|
289
291
|
@staticmethod
|
|
290
292
|
def _get_through_many_to_many(field_or_rel: ManyToManyField | ManyToManyRel):
|
|
291
|
-
from
|
|
293
|
+
from lamindb.models import Registry
|
|
292
294
|
|
|
293
295
|
if isinstance(field_or_rel, ManyToManyField):
|
|
294
296
|
if field_or_rel.model != Registry:
|
|
@@ -365,7 +367,7 @@ class _SchemaHandler:
|
|
|
365
367
|
|
|
366
368
|
def to_dict(self, include_django_objects: bool = True):
|
|
367
369
|
return {
|
|
368
|
-
module_name: {
|
|
370
|
+
module_name if module_name != "lamindb" else "core": {
|
|
369
371
|
model_name: model.to_dict(include_django_objects)
|
|
370
372
|
for model_name, model in module.items()
|
|
371
373
|
}
|
|
@@ -376,7 +378,7 @@ class _SchemaHandler:
|
|
|
376
378
|
return self.to_dict(include_django_objects=False)
|
|
377
379
|
|
|
378
380
|
def _get_modules_metadata(self):
|
|
379
|
-
from
|
|
381
|
+
from lamindb.models import Record, Registry
|
|
380
382
|
|
|
381
383
|
all_models = {
|
|
382
384
|
module_name: {
|
|
@@ -401,6 +403,8 @@ class _SchemaHandler:
|
|
|
401
403
|
module_set_info = []
|
|
402
404
|
for module_name in self.included_modules:
|
|
403
405
|
module = self._get_schema_module(module_name)
|
|
406
|
+
if module_name == "lamindb":
|
|
407
|
+
module_name = "core"
|
|
404
408
|
module_set_info.append(
|
|
405
409
|
{"id": 0, "name": module_name, "version": module.__version__}
|
|
406
410
|
)
|
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import os
|
|
4
4
|
import time
|
|
5
5
|
|
|
6
|
+
from lamin_utils import logger
|
|
6
7
|
from upath.implementations.cloud import S3Path
|
|
7
8
|
|
|
8
9
|
HOSTED_REGIONS = [
|
|
@@ -40,8 +41,15 @@ class AWSCredentialsManager:
|
|
|
40
41
|
|
|
41
42
|
# this is cached so will be resued with the connection initialized
|
|
42
43
|
fs = S3FileSystem(cache_regions=True)
|
|
43
|
-
|
|
44
|
-
|
|
44
|
+
try:
|
|
45
|
+
fs.connect()
|
|
46
|
+
self.anon: bool = fs.session._credentials is None
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.warning(
|
|
49
|
+
f"There is a problem with your default AWS Credentials: {e}\n"
|
|
50
|
+
"`anon` mode will be used for all non-managed buckets."
|
|
51
|
+
)
|
|
52
|
+
self.anon = True
|
|
45
53
|
self.anon_public: bool | None = None
|
|
46
54
|
if not self.anon:
|
|
47
55
|
try:
|
lamindb_setup/core/_hub_core.py
CHANGED
|
@@ -233,11 +233,11 @@ def _delete_instance(
|
|
|
233
233
|
)
|
|
234
234
|
if require_empty:
|
|
235
235
|
for storage_record in storage_records:
|
|
236
|
+
root_string: str = storage_record["root"] # type: ignore
|
|
236
237
|
account_for_sqlite_file = (
|
|
237
238
|
instance_with_storage["db_scheme"] is None
|
|
238
|
-
and instance_with_storage["storage"]["root"] ==
|
|
239
|
+
and instance_with_storage["storage"]["root"] == root_string
|
|
239
240
|
)
|
|
240
|
-
root_string = storage_record["root"]
|
|
241
241
|
# gate storage and instance deletion on empty storage location for
|
|
242
242
|
# normally auth.get_session() doesn't have access_token
|
|
243
243
|
# so this block is useless i think (Sergei)
|
lamindb_setup/core/_hub_crud.py
CHANGED
|
@@ -144,17 +144,14 @@ def select_collaborator(
|
|
|
144
144
|
def select_default_storage_by_instance_id(
|
|
145
145
|
instance_id: str, client: Client
|
|
146
146
|
) -> dict | None:
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
)
|
|
156
|
-
except Exception:
|
|
157
|
-
return None
|
|
147
|
+
data = (
|
|
148
|
+
client.table("storage")
|
|
149
|
+
.select("*")
|
|
150
|
+
.eq("instance_id", instance_id)
|
|
151
|
+
.eq("is_default", True)
|
|
152
|
+
.execute()
|
|
153
|
+
.data
|
|
154
|
+
)
|
|
158
155
|
if len(data) == 0:
|
|
159
156
|
return None
|
|
160
157
|
return data[0]
|
lamindb_setup/core/_settings.py
CHANGED
|
@@ -164,7 +164,7 @@ class SetupSettings:
|
|
|
164
164
|
|
|
165
165
|
@property
|
|
166
166
|
def paths(self) -> type[SetupPaths]:
|
|
167
|
-
"""Convert cloud paths to
|
|
167
|
+
"""Convert cloud paths to lamindb local paths.
|
|
168
168
|
|
|
169
169
|
Use `settings.paths.cloud_to_local_no_update`
|
|
170
170
|
or `settings.paths.cloud_to_local`.
|
|
@@ -179,7 +179,7 @@ class SetupSettings:
|
|
|
179
179
|
repr = self.user.__repr__()
|
|
180
180
|
repr += f"\nAuto-connect in Python: {self.auto_connect}\n"
|
|
181
181
|
repr += f"Private Django API: {self.private_django_api}\n"
|
|
182
|
-
repr += f"Cache directory: {self.cache_dir}\n"
|
|
182
|
+
repr += f"Cache directory: {self.cache_dir.as_posix()}\n"
|
|
183
183
|
if self._instance_exists:
|
|
184
184
|
repr += self.instance.__repr__()
|
|
185
185
|
else:
|
|
@@ -121,7 +121,7 @@ class InstanceSettings:
|
|
|
121
121
|
def _search_local_root(
|
|
122
122
|
self, local_root: str | None = None, mute_warning: bool = False
|
|
123
123
|
) -> StorageSettings | None:
|
|
124
|
-
from
|
|
124
|
+
from lamindb.models import Storage
|
|
125
125
|
|
|
126
126
|
if local_root is not None:
|
|
127
127
|
local_records = Storage.objects.filter(root=local_root)
|
|
@@ -358,7 +358,7 @@ class InstanceSettings:
|
|
|
358
358
|
sqlite_filepath = self.storage.cloud_to_local(
|
|
359
359
|
self._sqlite_file, error_no_origin=False
|
|
360
360
|
)
|
|
361
|
-
return f"sqlite:///{sqlite_filepath}"
|
|
361
|
+
return f"sqlite:///{sqlite_filepath.as_posix()}"
|
|
362
362
|
else:
|
|
363
363
|
return self._db
|
|
364
364
|
|
|
@@ -457,11 +457,24 @@ class InstanceSettings:
|
|
|
457
457
|
settings._instance_settings = self
|
|
458
458
|
|
|
459
459
|
def _init_db(self):
|
|
460
|
+
from lamindb_setup import _check_setup
|
|
461
|
+
|
|
460
462
|
from .django import setup_django
|
|
461
463
|
|
|
464
|
+
_check_setup.IS_LOADING = True
|
|
462
465
|
setup_django(self, init=True)
|
|
466
|
+
_check_setup.IS_LOADING = False
|
|
467
|
+
|
|
468
|
+
from lamindb.models import Space
|
|
469
|
+
|
|
470
|
+
Space.objects.get_or_create(
|
|
471
|
+
name="All",
|
|
472
|
+
description="Every team & user with access to the instance has access.",
|
|
473
|
+
)
|
|
463
474
|
|
|
464
475
|
def _load_db(self) -> tuple[bool, str]:
|
|
476
|
+
from lamindb_setup import _check_setup
|
|
477
|
+
|
|
465
478
|
# Is the database available and initialized as LaminDB?
|
|
466
479
|
# returns a tuple of status code and message
|
|
467
480
|
if self.dialect == "sqlite" and not self._sqlite_file.exists():
|
|
@@ -472,7 +485,6 @@ class InstanceSettings:
|
|
|
472
485
|
f" {legacy_file} to {self._sqlite_file}"
|
|
473
486
|
)
|
|
474
487
|
return False, f"SQLite file {self._sqlite_file} does not exist"
|
|
475
|
-
from lamindb_setup import settings # to check user
|
|
476
488
|
|
|
477
489
|
from .django import setup_django
|
|
478
490
|
|
|
@@ -481,5 +493,7 @@ class InstanceSettings:
|
|
|
481
493
|
# setting up django also performs a check for migrations & prints them
|
|
482
494
|
# as warnings
|
|
483
495
|
# this should fail, e.g., if the db is not reachable
|
|
496
|
+
_check_setup.IS_LOADING = True
|
|
484
497
|
setup_django(self)
|
|
498
|
+
_check_setup.IS_LOADING = False
|
|
485
499
|
return True, ""
|
|
@@ -230,7 +230,7 @@ class StorageSettings:
|
|
|
230
230
|
"""Storage record in the current instance."""
|
|
231
231
|
if self._record is None:
|
|
232
232
|
# dynamic import because of import order
|
|
233
|
-
from
|
|
233
|
+
from lamindb.models import Storage
|
|
234
234
|
|
|
235
235
|
from ._settings import settings
|
|
236
236
|
|
|
@@ -48,7 +48,7 @@ class UserSettings:
|
|
|
48
48
|
@property
|
|
49
49
|
def id(self):
|
|
50
50
|
"""Integer id valid in current intance."""
|
|
51
|
-
from
|
|
51
|
+
from lamindb.base.users import current_user_id
|
|
52
52
|
|
|
53
53
|
# there is no cache needed here because current_user_id()
|
|
54
54
|
# has its own cache
|
lamindb_setup/core/django.py
CHANGED
|
@@ -5,10 +5,8 @@ import builtins
|
|
|
5
5
|
import os
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
import time
|
|
8
|
-
from lamin_utils import logger
|
|
9
|
-
from ._settings_store import current_instance_settings_file
|
|
10
8
|
from ._settings_instance import InstanceSettings
|
|
11
|
-
|
|
9
|
+
|
|
12
10
|
|
|
13
11
|
IS_RUN_FROM_IPYTHON = getattr(builtins, "__IPYTHON__", False)
|
|
14
12
|
IS_SETUP = False
|
lamindb_setup/core/hashing.py
CHANGED
|
@@ -12,6 +12,7 @@ from __future__ import annotations
|
|
|
12
12
|
|
|
13
13
|
import base64
|
|
14
14
|
import hashlib
|
|
15
|
+
import json
|
|
15
16
|
from concurrent.futures import ThreadPoolExecutor
|
|
16
17
|
from typing import TYPE_CHECKING
|
|
17
18
|
|
|
@@ -40,11 +41,21 @@ def b16_to_b64(s: str):
|
|
|
40
41
|
return to_b64_str(base64.b16decode(s.strip('"'), casefold=True))
|
|
41
42
|
|
|
42
43
|
|
|
44
|
+
def hash_string(string: str) -> str:
|
|
45
|
+
# as we're truncating (not here) at 22 b64, we choose md5 over sha512
|
|
46
|
+
return to_b64_str(hashlib.md5(string.encode("utf-8")).digest())
|
|
47
|
+
|
|
48
|
+
|
|
43
49
|
# a lot to read about this: lamin-notes/2022/hashing
|
|
44
50
|
def hash_set(s: set[str]) -> str:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
51
|
+
join_s = ":".join(sorted(s))
|
|
52
|
+
return hash_string(join_s)[:HASH_LENGTH]
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def hash_dict(d: dict) -> str:
|
|
56
|
+
return to_b64_str(hashlib.md5(json.dumps(d, sort_keys=True).encode()).digest())[
|
|
57
|
+
:HASH_LENGTH
|
|
58
|
+
]
|
|
48
59
|
|
|
49
60
|
|
|
50
61
|
def hash_from_hashes_list(hashes: Iterable[str]) -> str:
|
|
@@ -111,6 +122,6 @@ def hash_dir(path: Path):
|
|
|
111
122
|
hashes, sizes = zip(*hashes_sizes)
|
|
112
123
|
|
|
113
124
|
hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
|
|
114
|
-
|
|
125
|
+
n_files = len(hashes)
|
|
115
126
|
size = sum(sizes)
|
|
116
|
-
return size, hash, hash_type,
|
|
127
|
+
return size, hash, hash_type, n_files
|
lamindb_setup/core/upath.py
CHANGED
|
@@ -12,14 +12,15 @@ from itertools import islice
|
|
|
12
12
|
from pathlib import Path, PosixPath, PurePosixPath, WindowsPath
|
|
13
13
|
from typing import TYPE_CHECKING, Any, Literal
|
|
14
14
|
|
|
15
|
+
import click
|
|
15
16
|
import fsspec
|
|
16
17
|
from lamin_utils import logger
|
|
17
18
|
from upath import UPath
|
|
18
|
-
from upath.implementations.cloud import CloudPath
|
|
19
|
+
from upath.implementations.cloud import CloudPath # keep CloudPath!
|
|
19
20
|
from upath.implementations.local import LocalPath
|
|
20
21
|
|
|
21
22
|
from ._aws_credentials import HOSTED_BUCKETS, get_aws_credentials_manager
|
|
22
|
-
from .hashing import HASH_LENGTH, b16_to_b64, hash_from_hashes_list
|
|
23
|
+
from .hashing import HASH_LENGTH, b16_to_b64, hash_from_hashes_list, hash_string
|
|
23
24
|
|
|
24
25
|
if TYPE_CHECKING:
|
|
25
26
|
from .types import UPathStr
|
|
@@ -192,12 +193,15 @@ class ProgressCallback(fsspec.callbacks.Callback):
|
|
|
192
193
|
def update_relative_value(self, inc=1):
|
|
193
194
|
if inc != 0:
|
|
194
195
|
self.value += inc
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
196
|
+
self.call()
|
|
197
|
+
else:
|
|
198
|
+
# this is specific to http filesystem
|
|
199
|
+
# for some reason the last update is 0 always
|
|
200
|
+
# sometimes the reported result is less that 100%
|
|
201
|
+
# here 100% is forced manually in this case
|
|
202
|
+
if self.value < 1.0 and self.value >= 0.999:
|
|
203
|
+
self.value = self.size
|
|
204
|
+
self.call()
|
|
201
205
|
|
|
202
206
|
def branch(self, path_1, path_2, kwargs):
|
|
203
207
|
if self.adjust_size:
|
|
@@ -264,7 +268,17 @@ def download_to(self, local_path: UPathStr, print_progress: bool = True, **kwarg
|
|
|
264
268
|
)
|
|
265
269
|
kwargs["callback"] = callback
|
|
266
270
|
|
|
267
|
-
|
|
271
|
+
cloud_path_str = str(self)
|
|
272
|
+
local_path_str = str(local_path)
|
|
273
|
+
# needed due to https://github.com/fsspec/filesystem_spec/issues/1766
|
|
274
|
+
# otherwise fsspec calls fs._ls_real where it reads the body and parses links
|
|
275
|
+
# so the file is downloaded 2 times
|
|
276
|
+
# upath doesn't call fs.ls to infer type, so it is safe to call
|
|
277
|
+
if self.protocol in {"http", "https"} and self.stat().as_info()["type"] == "file":
|
|
278
|
+
self.fs.use_listings_cache = True
|
|
279
|
+
self.fs.dircache[cloud_path_str] = []
|
|
280
|
+
|
|
281
|
+
self.fs.download(cloud_path_str, local_path_str, **kwargs)
|
|
268
282
|
|
|
269
283
|
|
|
270
284
|
def upload_from(
|
|
@@ -312,8 +326,7 @@ def upload_from(
|
|
|
312
326
|
destination = self.as_posix()
|
|
313
327
|
|
|
314
328
|
# the below lines are to avoid s3fs triggering create_bucket in upload if
|
|
315
|
-
# dirs are present it allows to avoid permission error
|
|
316
|
-
# would be easier to just
|
|
329
|
+
# dirs are present, it allows to avoid the permission error
|
|
317
330
|
if self.protocol == "s3" and local_path_is_dir and create_folder:
|
|
318
331
|
bucket = self.drive
|
|
319
332
|
if bucket not in self.fs.dircache:
|
|
@@ -505,7 +518,7 @@ def compute_file_tree(
|
|
|
505
518
|
skip_suffixes_tuple = ()
|
|
506
519
|
else:
|
|
507
520
|
skip_suffixes_tuple = tuple(skip_suffixes) # type: ignore
|
|
508
|
-
|
|
521
|
+
n_files = 0
|
|
509
522
|
n_directories = 0
|
|
510
523
|
|
|
511
524
|
# by default only including registered files
|
|
@@ -518,7 +531,7 @@ def compute_file_tree(
|
|
|
518
531
|
include_paths = set()
|
|
519
532
|
|
|
520
533
|
def inner(dir_path: Path, prefix: str = "", level: int = -1):
|
|
521
|
-
nonlocal
|
|
534
|
+
nonlocal n_files, n_directories, suffixes
|
|
522
535
|
if level == 0:
|
|
523
536
|
return
|
|
524
537
|
stripped_dir_path = dir_path.as_posix().rstrip("/")
|
|
@@ -551,7 +564,7 @@ def compute_file_tree(
|
|
|
551
564
|
suffix = extract_suffix_from_path(child_path)
|
|
552
565
|
suffixes.add(suffix)
|
|
553
566
|
n_files_per_dir_and_type[suffix] += 1
|
|
554
|
-
|
|
567
|
+
n_files += 1
|
|
555
568
|
if n_files_per_dir_and_type[suffix] == n_max_files_per_dir_and_type:
|
|
556
569
|
yield prefix + "..."
|
|
557
570
|
elif n_files_per_dir_and_type[suffix] > n_max_files_per_dir_and_type:
|
|
@@ -564,15 +577,15 @@ def compute_file_tree(
|
|
|
564
577
|
for line in islice(iterator, n_max_files):
|
|
565
578
|
folder_tree += f"\n{line}"
|
|
566
579
|
if next(iterator, None):
|
|
567
|
-
folder_tree += f"\n... only showing {n_max_files} out of {
|
|
580
|
+
folder_tree += f"\n... only showing {n_max_files} out of {n_files} files"
|
|
568
581
|
directory_info = "directory" if n_directories == 1 else "directories"
|
|
569
582
|
display_suffixes = ", ".join([f"{suffix!r}" for suffix in suffixes])
|
|
570
|
-
suffix_message = f" with suffixes {display_suffixes}" if
|
|
583
|
+
suffix_message = f" with suffixes {display_suffixes}" if n_files > 0 else ""
|
|
571
584
|
message = (
|
|
572
585
|
f"{n_directories} sub-{directory_info} &"
|
|
573
|
-
f" {
|
|
586
|
+
f" {n_files} files{suffix_message}\n{path.resolve()}{folder_tree}"
|
|
574
587
|
)
|
|
575
|
-
return message,
|
|
588
|
+
return message, n_files
|
|
576
589
|
|
|
577
590
|
|
|
578
591
|
# adapted from: https://stackoverflow.com/questions/9727673
|
|
@@ -726,12 +739,26 @@ warnings.filterwarnings(
|
|
|
726
739
|
)
|
|
727
740
|
|
|
728
741
|
|
|
729
|
-
def create_path(path:
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
if
|
|
733
|
-
|
|
734
|
-
|
|
742
|
+
def create_path(path: UPathStr, access_token: str | None = None) -> UPath:
|
|
743
|
+
upath = UPath(path)
|
|
744
|
+
|
|
745
|
+
if upath.protocol == "s3":
|
|
746
|
+
# add managed credentials and other options for AWS s3 paths
|
|
747
|
+
return get_aws_credentials_manager().enrich_path(upath, access_token)
|
|
748
|
+
|
|
749
|
+
if upath.protocol in {"http", "https"}:
|
|
750
|
+
# this is needed because by default aiohttp drops a connection after 5 min
|
|
751
|
+
# so it is impossible to download large files
|
|
752
|
+
client_kwargs = upath.storage_options.get("client_kwargs", {})
|
|
753
|
+
if "timeout" not in client_kwargs:
|
|
754
|
+
from aiohttp import ClientTimeout
|
|
755
|
+
|
|
756
|
+
client_kwargs = {
|
|
757
|
+
**client_kwargs,
|
|
758
|
+
"timeout": ClientTimeout(sock_connect=30, sock_read=30),
|
|
759
|
+
}
|
|
760
|
+
return UPath(upath, client_kwargs=client_kwargs)
|
|
761
|
+
return upath
|
|
735
762
|
|
|
736
763
|
|
|
737
764
|
def get_stat_file_cloud(stat: dict) -> tuple[int, str | None, str | None]:
|
|
@@ -749,20 +776,26 @@ def get_stat_file_cloud(stat: dict) -> tuple[int, str | None, str | None]:
|
|
|
749
776
|
# s3
|
|
750
777
|
# StorageClass is checked to be sure that it is indeed s3
|
|
751
778
|
# because http also has ETag
|
|
752
|
-
elif "ETag" in stat
|
|
779
|
+
elif "ETag" in stat:
|
|
753
780
|
etag = stat["ETag"]
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
# we can add more logic later down-the-road
|
|
759
|
-
hash = b16_to_b64(etag)
|
|
760
|
-
hash_type = "md5"
|
|
781
|
+
if "mimetype" in stat:
|
|
782
|
+
# http
|
|
783
|
+
hash = hash_string(etag.strip('"'))
|
|
784
|
+
hash_type = "md5-etag"
|
|
761
785
|
else:
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
786
|
+
# s3
|
|
787
|
+
# small files
|
|
788
|
+
if "-" not in etag:
|
|
789
|
+
# only store hash for non-multipart uploads
|
|
790
|
+
# we can't rapidly validate multi-part uploaded files client-side
|
|
791
|
+
# we can add more logic later down-the-road
|
|
792
|
+
hash = b16_to_b64(etag)
|
|
793
|
+
hash_type = "md5"
|
|
794
|
+
else:
|
|
795
|
+
stripped_etag, suffix = etag.split("-")
|
|
796
|
+
suffix = suffix.strip('"')
|
|
797
|
+
hash = b16_to_b64(stripped_etag)
|
|
798
|
+
hash_type = f"md5-{suffix}" # this is the S3 chunk-hashing strategy
|
|
766
799
|
if hash is not None:
|
|
767
800
|
hash = hash[:HASH_LENGTH]
|
|
768
801
|
return size, hash, hash_type
|
|
@@ -787,17 +820,18 @@ def get_stat_dir_cloud(path: UPath) -> tuple[int, str | None, str | None, int]:
|
|
|
787
820
|
if compute_list_hash:
|
|
788
821
|
hashes.append(object[accessor].strip('"='))
|
|
789
822
|
size = sum(sizes)
|
|
790
|
-
|
|
823
|
+
n_files = len(sizes)
|
|
791
824
|
if compute_list_hash:
|
|
792
825
|
hash, hash_type = hash_from_hashes_list(hashes), "md5-d"
|
|
793
|
-
return size, hash, hash_type,
|
|
826
|
+
return size, hash, hash_type, n_files
|
|
794
827
|
|
|
795
828
|
|
|
796
|
-
class InstanceNotEmpty(
|
|
797
|
-
|
|
829
|
+
class InstanceNotEmpty(click.ClickException):
|
|
830
|
+
def show(self, file=None):
|
|
831
|
+
pass
|
|
798
832
|
|
|
799
833
|
|
|
800
|
-
# is as fast as boto3: https://lamin.ai/laminlabs/
|
|
834
|
+
# is as fast as boto3: https://lamin.ai/laminlabs/lamin-site-assets/transform/krGp3hT1f78N5zKv
|
|
801
835
|
def check_storage_is_empty(
|
|
802
836
|
root: UPathStr, *, raise_error: bool = True, account_for_sqlite_file: bool = False
|
|
803
837
|
) -> int:
|
|
@@ -820,20 +854,20 @@ def check_storage_is_empty(
|
|
|
820
854
|
root_string += "/"
|
|
821
855
|
directory_string = root_string + ".lamindb"
|
|
822
856
|
objects = root_upath.fs.find(directory_string)
|
|
823
|
-
|
|
824
|
-
n_diff =
|
|
857
|
+
n_files = len(objects)
|
|
858
|
+
n_diff = n_files - n_offset_objects
|
|
825
859
|
ask_for_deletion = (
|
|
826
860
|
"delete them prior to deleting the instance"
|
|
827
861
|
if raise_error
|
|
828
862
|
else "consider deleting them"
|
|
829
863
|
)
|
|
830
864
|
message = (
|
|
831
|
-
f"Storage '{directory_string}' contains {
|
|
865
|
+
f"Storage '{directory_string}' contains {n_files - n_offset_objects} objects"
|
|
832
866
|
f" - {ask_for_deletion}"
|
|
833
867
|
)
|
|
834
868
|
if n_diff > 0:
|
|
835
869
|
if raise_error:
|
|
836
|
-
raise InstanceNotEmpty(message)
|
|
870
|
+
raise InstanceNotEmpty(message) from None
|
|
837
871
|
else:
|
|
838
872
|
logger.warning(message)
|
|
839
873
|
return n_diff
|
|
@@ -1,20 +1,19 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
2
|
Name: lamindb_setup
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.0a1
|
|
4
4
|
Summary: Setup & configure LaminDB.
|
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
|
6
6
|
Requires-Python: >=3.9
|
|
7
7
|
Description-Content-Type: text/markdown
|
|
8
|
-
Requires-Dist: lnschema_core>=0.51.0
|
|
9
8
|
Requires-Dist: lamin_utils>=0.3.3
|
|
10
|
-
Requires-Dist: django
|
|
9
|
+
Requires-Dist: django>=5,<5.2
|
|
11
10
|
Requires-Dist: dj_database_url>=1.3.0,<3.0.0
|
|
12
11
|
Requires-Dist: pydantic-settings
|
|
13
12
|
Requires-Dist: appdirs<2.0.0
|
|
14
13
|
Requires-Dist: requests
|
|
15
14
|
Requires-Dist: universal_pathlib==0.2.5
|
|
16
15
|
Requires-Dist: botocore<2.0.0
|
|
17
|
-
Requires-Dist: supabase>=2.8.1,<=2.
|
|
16
|
+
Requires-Dist: supabase>=2.8.1,<=2.11.0
|
|
18
17
|
Requires-Dist: psutil
|
|
19
18
|
Requires-Dist: urllib3<2 ; extra == "aws"
|
|
20
19
|
Requires-Dist: aiobotocore[boto3]>=2.5.4,<3.0.0 ; extra == "aws"
|
|
@@ -1,47 +1,47 @@
|
|
|
1
|
-
lamindb_setup/__init__.py,sha256=
|
|
1
|
+
lamindb_setup/__init__.py,sha256=k7P3AZb2980R0v07EyB8b-cVODo3vHm5kzBOn5fYj_4,2646
|
|
2
2
|
lamindb_setup/_cache.py,sha256=1XnM-V_KprbjpgPY7Bg3FYn53Iz_2_fEgcMOaSdKKbg,1332
|
|
3
3
|
lamindb_setup/_check.py,sha256=28PcG8Kp6OpjSLSi1r2boL2Ryeh6xkaCL87HFbjs6GA,129
|
|
4
|
-
lamindb_setup/_check_setup.py,sha256=
|
|
4
|
+
lamindb_setup/_check_setup.py,sha256=FQA-wrLJgsBMmK5zJv3-2528CWhS-kwe4vOOXJ06yCM,3023
|
|
5
5
|
lamindb_setup/_close.py,sha256=cXNwK7QTTyNFt2XTpLnO3KHljJ7ShOcISk95np_dltE,1239
|
|
6
|
-
lamindb_setup/_connect_instance.py,sha256=
|
|
7
|
-
lamindb_setup/_delete.py,sha256=
|
|
8
|
-
lamindb_setup/_django.py,sha256=
|
|
6
|
+
lamindb_setup/_connect_instance.py,sha256=YU54loNCweRy-lqfqd2i2Us6xxwftkUPlmCjiGWJ_wI,16109
|
|
7
|
+
lamindb_setup/_delete.py,sha256=Mip5M9tCxyfsjzdcPCl6x9CQ0TkYTqKNNWDIcJ-KVMo,5677
|
|
8
|
+
lamindb_setup/_django.py,sha256=uIQflpkp8l3axyPaKURlk3kacgpElVP5KOKmFxYSMGk,1454
|
|
9
9
|
lamindb_setup/_entry_points.py,sha256=Hs2oJQOCTaGUdWn-1mufM6qUZr9W_EJ_Oc3f0_Vc0Yw,616
|
|
10
|
-
lamindb_setup/_exportdb.py,sha256=
|
|
10
|
+
lamindb_setup/_exportdb.py,sha256=VjaeTsYgnhicbQUN05Ae0Au8lFKPAIeii7flOBcaMk4,2117
|
|
11
11
|
lamindb_setup/_importdb.py,sha256=yYYShzUajTsR-cTW4CZ-UNDWZY2uE5PAgNbp-wn8Ogc,1874
|
|
12
|
-
lamindb_setup/_init_instance.py,sha256=
|
|
13
|
-
lamindb_setup/_migrate.py,sha256=
|
|
12
|
+
lamindb_setup/_init_instance.py,sha256=OFoaF_wadtxD_ICLq9uPLHP-IWTqGeu8Ea57F-CqgJ4,13617
|
|
13
|
+
lamindb_setup/_migrate.py,sha256=bIW6TkgU93rTdnNAKWcyMgjTF0ypc_dzqsJG2lo_4Iw,9001
|
|
14
14
|
lamindb_setup/_register_instance.py,sha256=alQuYp2f8Ct8xvRC1gt8p_HZ0tqCd3gZD3kiPBLPpsI,1269
|
|
15
15
|
lamindb_setup/_schema.py,sha256=b3uzhhWpV5mQtDwhMINc2MabGCnGLESy51ito3yl6Wc,679
|
|
16
|
-
lamindb_setup/_schema_metadata.py,sha256=
|
|
16
|
+
lamindb_setup/_schema_metadata.py,sha256=2uO6uwWScjwggK44z_UgYAuxL66BdKfy-ad8V4ebwxI,13985
|
|
17
17
|
lamindb_setup/_set_managed_storage.py,sha256=4tDxXQMt8Gw028uY3vIQxZQ7qBNXhQMc8saarNK_Z-s,2043
|
|
18
18
|
lamindb_setup/_setup_user.py,sha256=-g7Xj6510BDyM8kuqAsVBZFwehlhBa_uWBSV1rPeuM8,4586
|
|
19
19
|
lamindb_setup/_silence_loggers.py,sha256=AKF_YcHvX32eGXdsYK8MJlxEaZ-Uo2f6QDRzjKFCtws,1568
|
|
20
20
|
lamindb_setup/core/__init__.py,sha256=BxIVMX5HQq8oZ1OuY_saUEJz5Tdd7gaCPngxVu5iou4,417
|
|
21
|
-
lamindb_setup/core/_aws_credentials.py,sha256=
|
|
21
|
+
lamindb_setup/core/_aws_credentials.py,sha256=_wBWC10MGx3PW9UXGhsVNlq7YvCER3RhfRgAdlxEjNM,6120
|
|
22
22
|
lamindb_setup/core/_aws_storage.py,sha256=nEjeUv4xUVpoV0Lx-zjjmyb9w804bDyaeiM-OqbfwM0,1799
|
|
23
23
|
lamindb_setup/core/_deprecated.py,sha256=3qxUI1dnDlSeR0BYrv7ucjqRBEojbqotPgpShXs4KF8,2520
|
|
24
24
|
lamindb_setup/core/_docs.py,sha256=3k-YY-oVaJd_9UIY-LfBg_u8raKOCNfkZQPA73KsUhs,276
|
|
25
25
|
lamindb_setup/core/_hub_client.py,sha256=cN19XbZmvLCxL_GKdOcKbedNRL7kR47vmLmA--NMv-U,6306
|
|
26
|
-
lamindb_setup/core/_hub_core.py,sha256=
|
|
27
|
-
lamindb_setup/core/_hub_crud.py,sha256=
|
|
26
|
+
lamindb_setup/core/_hub_core.py,sha256=qVGGsWVfP6GK9UzmEz1kuR_B8wFkgTstMJJoMHeUF0c,20007
|
|
27
|
+
lamindb_setup/core/_hub_crud.py,sha256=IAuPZes1am8OFwtcf5jSRQPGG1eKwVTEsp9Li-uq0cQ,5377
|
|
28
28
|
lamindb_setup/core/_hub_utils.py,sha256=08NwQsb53-tXa_pr-f0tPTN0FeeVf_i1p3dEbEWD0F4,3016
|
|
29
29
|
lamindb_setup/core/_private_django_api.py,sha256=KIn43HOhiRjkbTbddyJqv-WNTTa1bAizbM1tWXoXPBg,2869
|
|
30
|
-
lamindb_setup/core/_settings.py,sha256=
|
|
31
|
-
lamindb_setup/core/_settings_instance.py,sha256=
|
|
30
|
+
lamindb_setup/core/_settings.py,sha256=eslFO84vb5uRRfJ3r_uu4O8677l8lU5BbpZJMSAYw6A,8244
|
|
31
|
+
lamindb_setup/core/_settings_instance.py,sha256=gUbffwhsp_SygjU0P1bohXnRejAQ2sDTjctSg4-O94A,19199
|
|
32
32
|
lamindb_setup/core/_settings_load.py,sha256=5OpghcbkrK9KBM_0Iu-61FTI76UbOpPkkJpUittXS-w,4098
|
|
33
33
|
lamindb_setup/core/_settings_save.py,sha256=rxGxgaK5i9exKqSJERQQyY1WZio20meoQJoYXlVW-1w,3138
|
|
34
|
-
lamindb_setup/core/_settings_storage.py,sha256=
|
|
34
|
+
lamindb_setup/core/_settings_storage.py,sha256=dPIvbA6PkdjM8gsX6zxtH7VNMc4vhkuEO4luVMZY7RQ,12243
|
|
35
35
|
lamindb_setup/core/_settings_store.py,sha256=WcsgOmgnu9gztcrhp-N4OONNZyxICHV8M0HdJllTaEo,2219
|
|
36
|
-
lamindb_setup/core/_settings_user.py,sha256=
|
|
36
|
+
lamindb_setup/core/_settings_user.py,sha256=lWqV3HmZCsEq2UsU_iVNW0p9ddsNg7-B6xOaMNH1aw0,1475
|
|
37
37
|
lamindb_setup/core/_setup_bionty_sources.py,sha256=jZOPXpipW_5IjMO-bLMk-_wVwk7-5MLd72K2rnqqy7U,4001
|
|
38
38
|
lamindb_setup/core/cloud_sqlite_locker.py,sha256=i6TrT7HG0lqliPvZTlsZ_uplPaqhPBbabyfeR32SkA8,7107
|
|
39
|
-
lamindb_setup/core/django.py,sha256=
|
|
39
|
+
lamindb_setup/core/django.py,sha256=bJme5D9KHmo98S9dLzFktdS2rZ0Aoj4EdkirniirB1I,3763
|
|
40
40
|
lamindb_setup/core/exceptions.py,sha256=4NpLUNUIfXYVTFX2FvLZF8RW34exk2Vn2X3G4YhnTRg,276
|
|
41
|
-
lamindb_setup/core/hashing.py,sha256=
|
|
41
|
+
lamindb_setup/core/hashing.py,sha256=2kZy_7NQB1WoUK8SBrhFX90lBpTkBr1V9tf7FS9Fe2Q,3380
|
|
42
42
|
lamindb_setup/core/types.py,sha256=zJii2le38BJUmsNVvzDrbzGYr0yaeb-9Rw9IKmsBr3k,523
|
|
43
|
-
lamindb_setup/core/upath.py,sha256=
|
|
44
|
-
lamindb_setup-
|
|
45
|
-
lamindb_setup-
|
|
46
|
-
lamindb_setup-
|
|
47
|
-
lamindb_setup-
|
|
43
|
+
lamindb_setup/core/upath.py,sha256=UqfZcSIEflje2393osJ0tleizGKip5N1VJtXXVFUH7U,31061
|
|
44
|
+
lamindb_setup-1.0a1.dist-info/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
|
|
45
|
+
lamindb_setup-1.0a1.dist-info/WHEEL,sha256=CpUCUxeHQbRN5UGRQHYRJorO5Af-Qy_fHMctcQ8DSGI,82
|
|
46
|
+
lamindb_setup-1.0a1.dist-info/METADATA,sha256=YRknY51lYN64_4Nm-ytu8YmaeVvcD4B2XLCutFPNkNo,1689
|
|
47
|
+
lamindb_setup-1.0a1.dist-info/RECORD,,
|
|
File without changes
|