lamindb_setup 1.14.0__tar.gz → 1.15.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/.gitignore +2 -0
  2. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/.pre-commit-config.yaml +3 -1
  3. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/PKG-INFO +1 -1
  4. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/__init__.py +2 -2
  5. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_connect_instance.py +9 -7
  6. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_init_instance.py +4 -2
  7. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_setup_user.py +5 -0
  8. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/__init__.py +15 -0
  9. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_aws_options.py +8 -0
  10. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_settings.py +27 -22
  11. lamindb_setup-1.15.0/lamindb_setup/io.py +194 -0
  12. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/noxfile.py +1 -1
  13. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/pyproject.toml +2 -0
  14. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-local/test_update_schema_in_hub.py +1 -1
  15. lamindb_setup-1.15.0/tests/storage/conftest.py +10 -0
  16. lamindb_setup-1.15.0/tests/storage/test_db_import_export.py +215 -0
  17. lamindb_setup-1.14.0/lamindb_setup/_exportdb.py +0 -68
  18. lamindb_setup-1.14.0/lamindb_setup/_importdb.py +0 -50
  19. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/.github/workflows/build.yml +0 -0
  20. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/.github/workflows/doc-changes.yml +0 -0
  21. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/LICENSE +0 -0
  22. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/README.md +0 -0
  23. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/changelog.md +0 -0
  24. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-cloud/01-init-local-instance.ipynb +0 -0
  25. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-cloud/02-connect-local-instance.ipynb +0 -0
  26. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-cloud/03-add-managed-storage.ipynb +0 -0
  27. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-cloud/04-test-bionty.ipynb +0 -0
  28. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-cloud/05-init-hosted-instance.ipynb +0 -0
  29. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-cloud/06-connect-hosted-instance.ipynb +0 -0
  30. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-cloud/07-keep-artifacts-local.ipynb +0 -0
  31. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-cloud/08-test-multi-session.ipynb +0 -0
  32. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-cloud/09-test-migrate.ipynb +0 -0
  33. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-cloud/test_notebooks.py +0 -0
  34. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-prod/test-cache-management.ipynb +0 -0
  35. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-prod/test-cloud-sync.ipynb +0 -0
  36. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-prod/test-connect-anonymously.ipynb +0 -0
  37. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-prod/test-empty-init.ipynb +0 -0
  38. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-prod/test-import-schema.ipynb +0 -0
  39. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-prod/test-init-load-local-anonymously.ipynb +0 -0
  40. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-prod/test-insufficient-user-info.ipynb +0 -0
  41. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-prod/test-invalid-schema.ipynb +0 -0
  42. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-prod/test-sqlite-lock.ipynb +0 -0
  43. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/hub-prod/test_notebooks2.py +0 -0
  44. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/index.md +0 -0
  45. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/notebooks.md +0 -0
  46. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/docs/reference.md +0 -0
  47. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_cache.py +0 -0
  48. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_check.py +0 -0
  49. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_check_setup.py +0 -0
  50. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_delete.py +0 -0
  51. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_disconnect.py +0 -0
  52. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_django.py +0 -0
  53. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_entry_points.py +0 -0
  54. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_migrate.py +0 -0
  55. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_register_instance.py +0 -0
  56. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_schema.py +0 -0
  57. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_schema_metadata.py +0 -0
  58. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_set_managed_storage.py +0 -0
  59. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/_silence_loggers.py +0 -0
  60. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_aws_storage.py +0 -0
  61. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_clone.py +0 -0
  62. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_deprecated.py +0 -0
  63. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_docs.py +0 -0
  64. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_hub_client.py +0 -0
  65. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_hub_core.py +0 -0
  66. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_hub_crud.py +0 -0
  67. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_hub_utils.py +0 -0
  68. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_private_django_api.py +0 -0
  69. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_settings_instance.py +0 -0
  70. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_settings_load.py +0 -0
  71. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_settings_save.py +0 -0
  72. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_settings_storage.py +0 -0
  73. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_settings_store.py +0 -0
  74. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_settings_user.py +0 -0
  75. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/_setup_bionty_sources.py +0 -0
  76. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/cloud_sqlite_locker.py +0 -0
  77. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/django.py +0 -0
  78. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/exceptions.py +0 -0
  79. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/hashing.py +0 -0
  80. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/types.py +0 -0
  81. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/core/upath.py +0 -0
  82. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/errors.py +0 -0
  83. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/py.typed +0 -0
  84. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/lamindb_setup/types.py +0 -0
  85. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/scripts/script-init-pass-user-no-writes.py +0 -0
  86. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/scripts/script-to-fail-managed-storage.py +0 -0
  87. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/test_clone_instance.py +0 -0
  88. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/test_connect_instance.py +0 -0
  89. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/test_delete_instance.py +0 -0
  90. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/test_edge_request.py +0 -0
  91. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/test_fail_managed_storage.py +0 -0
  92. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/test_init_instance.py +0 -0
  93. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/test_init_pass_user_no_writes.py +0 -0
  94. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/test_login.py +0 -0
  95. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-cloud/test_set_storage.py +0 -0
  96. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-local/README.md +0 -0
  97. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-local/conftest.py +0 -0
  98. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-local/scripts/script-connect-fine-grained-access.py +0 -0
  99. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-local/test_all.py +0 -0
  100. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-prod/conftest.py +0 -0
  101. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-prod/test_aws_options_manager.py +0 -0
  102. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-prod/test_django.py +0 -0
  103. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-prod/test_global_settings.py +0 -0
  104. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-prod/test_migrate.py +0 -0
  105. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-prod/test_switch_and_fallback_env.py +0 -0
  106. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/hub-prod/test_upath.py +0 -0
  107. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/storage/test_entry_point.py +0 -0
  108. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/storage/test_hashing.py +0 -0
  109. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/storage/test_storage_access.py +0 -0
  110. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/storage/test_storage_basis.py +0 -0
  111. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/storage/test_storage_settings.py +0 -0
  112. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/storage/test_storage_stats.py +0 -0
  113. {lamindb_setup-1.14.0 → lamindb_setup-1.15.0}/tests/storage/test_to_url.py +0 -0
@@ -113,3 +113,5 @@ _docs_tmp*
113
113
  *.db
114
114
  storage_uid.txt
115
115
  test.ipynb
116
+ test2.ipynb
117
+ lamindb_export
@@ -42,5 +42,7 @@ repos:
42
42
  - id: mypy
43
43
  exclude: |
44
44
  (?x)(
45
- tests/hub-local/conftest.py
45
+ tests/hub-local/conftest.py|
46
+ tests/hub-prod/conftest.py|
47
+ tests/storage/conftest.py
46
48
  )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lamindb_setup
3
- Version: 1.14.0
3
+ Version: 1.15.0
4
4
  Summary: Setup & configure LaminDB.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.10
@@ -35,7 +35,7 @@ Migration management
35
35
 
36
36
  """
37
37
 
38
- __version__ = "1.14.0" # denote a release candidate for 0.1.0 with 0.1rc1
38
+ __version__ = "1.15.0" # denote a release candidate for 0.1.0 with 0.1rc1
39
39
 
40
40
  import os
41
41
  import warnings
@@ -47,7 +47,7 @@ warnings.filterwarnings("ignore", category=DeprecationWarning, module="postgrest
47
47
 
48
48
  from packaging import version as packaging_version
49
49
 
50
- from . import core, errors, types
50
+ from . import core, errors, io, types
51
51
  from ._check_setup import _check_instance_setup
52
52
  from ._connect_instance import connect
53
53
  from ._delete import delete
@@ -204,18 +204,12 @@ def reset_django_module_variables():
204
204
  app_names = {app.name for app in apps.get_app_configs()}
205
205
  # always copy before iterations over sys.modules
206
206
  # see https://docs.python.org/3/library/sys.html#sys.modules
207
+ # this whole thing runs about 50ms in a big env
207
208
  for name, module in sys.modules.copy().items():
208
209
  if (
209
210
  module is not None
210
211
  and (not name.startswith("__") or name == "__main__")
211
212
  and name not in sys.builtin_module_names
212
- and not (
213
- hasattr(module, "__file__")
214
- and module.__file__
215
- and any(
216
- path in module.__file__ for path in ["/lib/python", "\\lib\\python"]
217
- )
218
- )
219
213
  ):
220
214
  try:
221
215
  for k, v in vars(module).items():
@@ -260,6 +254,10 @@ def _connect_cli(
260
254
  connect(_write_settings=False, _reload_lamindb=False)
261
255
  else:
262
256
  logger.important(f"connected lamindb: {isettings.slug}")
257
+ if settings_.dev_dir is None:
258
+ logger.important_hint(
259
+ "to map a local dev directory, call: lamin settings set dev-dir ."
260
+ )
263
261
  return None
264
262
 
265
263
 
@@ -427,6 +425,10 @@ def connect(instance: str | None = None, **kwargs: Any) -> str | tuple | None:
427
425
  isettings._get_settings_file().unlink(missing_ok=True) # type: ignore
428
426
  settings._instance_settings = None
429
427
  raise e
428
+ if settings.dev_dir is None:
429
+ logger.important_hint(
430
+ "to map a local dev directory, set: ln.setup.settings.dev_dir = '.'"
431
+ )
430
432
  return None
431
433
 
432
434
 
@@ -7,8 +7,6 @@ from typing import TYPE_CHECKING, Literal
7
7
  from uuid import UUID
8
8
 
9
9
  import click
10
- from django.core.exceptions import FieldError
11
- from django.db.utils import IntegrityError, OperationalError, ProgrammingError
12
10
  from lamin_utils import logger
13
11
 
14
12
  from ._disconnect import disconnect
@@ -71,6 +69,10 @@ def register_storage_in_instance(ssettings: StorageSettings) -> Storage:
71
69
 
72
70
 
73
71
  def register_user(usettings: UserSettings, update_user: bool = True) -> None:
72
+ # we have to import this here dynamically because otherwise
73
+ # the except below will fail on re-connect due to reset
74
+ from django.core.exceptions import FieldError
75
+ from django.db.utils import IntegrityError, OperationalError, ProgrammingError
74
76
  from lamindb.models import User
75
77
 
76
78
  if not update_user and User.objects.filter(uid=usettings.uid).exists():
@@ -7,6 +7,7 @@ from lamin_utils import logger
7
7
 
8
8
  from ._check_setup import _check_instance_setup
9
9
  from ._init_instance import register_user
10
+ from .core._aws_options import reset_aws_options_cache
10
11
  from .core._settings import settings
11
12
  from .core._settings_load import load_user_settings
12
13
  from .core._settings_save import save_user_settings
@@ -147,6 +148,8 @@ def login(
147
148
  register_user(user_settings)
148
149
 
149
150
  settings._user_settings = None
151
+ # aws s3 credentials are scoped to the user
152
+ reset_aws_options_cache()
150
153
  return user_settings
151
154
 
152
155
 
@@ -163,6 +166,8 @@ def logout():
163
166
  if current_user_settings_file().exists():
164
167
  current_user_settings_file().unlink()
165
168
  settings._user_settings = None
169
+ # aws s3 credentials are scoped to the user
170
+ reset_aws_options_cache()
166
171
  logger.success("logged out")
167
172
  else:
168
173
  logger.important("already logged out")
@@ -1,8 +1,23 @@
1
1
  """Core setup library.
2
2
 
3
+ General
4
+ -------
5
+
3
6
  .. autoclass:: SetupSettings
7
+
8
+ User
9
+ ----
10
+
4
11
  .. autoclass:: UserSettings
12
+
13
+ Instance
14
+ --------
15
+
5
16
  .. autoclass:: InstanceSettings
17
+
18
+ Storage
19
+ -------
20
+
6
21
  .. autoclass:: StorageSettings
7
22
 
8
23
  """
@@ -245,3 +245,11 @@ def get_aws_options_manager() -> AWSOptionsManager:
245
245
  _aws_options_manager = AWSOptionsManager()
246
246
 
247
247
  return _aws_options_manager
248
+
249
+
250
+ def reset_aws_options_cache():
251
+ global _aws_options_manager
252
+
253
+ if _aws_options_manager is not None:
254
+ _aws_options_manager._credentials_cache = {}
255
+ _aws_options_manager._parameters_cache = {}
@@ -59,7 +59,6 @@ class SetupSettings:
59
59
 
60
60
  _auto_connect_path: Path = settings_dir / "auto_connect"
61
61
  _private_django_api_path: Path = settings_dir / "private_django_api"
62
- _work_dir: Path = settings_dir / "work_dir.txt"
63
62
 
64
63
  _cache_dir: Path | None = None
65
64
 
@@ -70,25 +69,6 @@ class SetupSettings:
70
69
  def _instance_settings_path(self) -> Path:
71
70
  return current_instance_settings_file()
72
71
 
73
- @property
74
- def work_dir(self) -> Path | None:
75
- """Get or set the current working directory.
76
-
77
- If setting it to `None`, the working directory is unset
78
- """
79
- if not self._work_dir.exists():
80
- return None
81
- return Path(self._work_dir.read_text())
82
-
83
- @work_dir.setter
84
- def work_dir(self, value: str | Path | None) -> None:
85
- if value is None:
86
- if self._work_dir.exists():
87
- self._work_dir.unlink()
88
- else:
89
- value_str = Path(value).expanduser().resolve().as_posix()
90
- self._work_dir.write_text(value_str)
91
-
92
72
  @property
93
73
  def settings_dir(self) -> Path:
94
74
  """The directory that holds locally persisted settings."""
@@ -112,6 +92,31 @@ class SetupSettings:
112
92
  else:
113
93
  self._auto_connect_path.unlink(missing_ok=True)
114
94
 
95
+ @property
96
+ def _dev_dir_path(self) -> Path:
97
+ return (
98
+ settings_dir / f"dev-dir--{self.instance.owner}--{self.instance.name}.txt"
99
+ )
100
+
101
+ @property
102
+ def dev_dir(self) -> Path | None:
103
+ """Get or set the local development directory for the current instance.
104
+
105
+ If setting it to `None`, the working development directory is unset.
106
+ """
107
+ if not self._dev_dir_path.exists():
108
+ return None
109
+ return Path(self._dev_dir_path.read_text())
110
+
111
+ @dev_dir.setter
112
+ def dev_dir(self, value: str | Path | None) -> None:
113
+ if value is None:
114
+ if self._dev_dir_path.exists():
115
+ self._dev_dir_path.unlink()
116
+ else:
117
+ value_str = Path(value).expanduser().resolve().as_posix()
118
+ self._dev_dir_path.write_text(value_str)
119
+
115
120
  @property
116
121
  def _branch_path(self) -> Path:
117
122
  return (
@@ -361,9 +366,9 @@ class SetupSettings:
361
366
  if self._instance_exists:
362
367
  instance_rep = self.instance.__repr__().split("\n")
363
368
  repr += f"{colors.cyan('Instance:')} {instance_rep[0].replace('Instance: ', '')}\n"
364
- repr += f" - work-dir: {self.work_dir}\n"
365
369
  repr += f" - branch: {self._read_branch_idlike_name()[1]}\n"
366
- repr += f" - space: {self._read_space_idlike_name()[1]}"
370
+ repr += f" - space: {self._read_space_idlike_name()[1]}\n"
371
+ repr += f" - dev-dir: {self.dev_dir}"
367
372
  repr += f"\n{colors.yellow('Details:')}\n"
368
373
  repr += "\n".join(instance_rep[1:])
369
374
  else:
@@ -0,0 +1,194 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import warnings
5
+ from importlib import import_module
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING
8
+
9
+ import pandas as pd
10
+ from django.db import models, transaction
11
+ from rich.progress import Progress
12
+
13
+ if TYPE_CHECKING:
14
+ from collections.abc import Sequence
15
+ from typing import Literal
16
+
17
+
18
+ def _get_registries(module_name: str) -> list[str]:
19
+ """Get registry class names from a module."""
20
+ schema_module = import_module(module_name)
21
+ exclude = {"SQLRecord", "BaseSQLRecord"}
22
+
23
+ if module_name == "lamindb":
24
+ module_filter = lambda cls, name: cls.__module__.startswith(
25
+ f"{module_name}.models."
26
+ ) and name in dir(schema_module)
27
+ else:
28
+ module_filter = (
29
+ lambda cls, name: cls.__module__ == f"{module_name}.models"
30
+ and name in dir(schema_module)
31
+ )
32
+
33
+ return [
34
+ name
35
+ for name in dir(schema_module.models)
36
+ if (
37
+ name[0].isupper()
38
+ and isinstance(cls := getattr(schema_module.models, name, None), type)
39
+ and issubclass(cls, models.Model)
40
+ and module_filter(cls, name)
41
+ and name not in exclude
42
+ )
43
+ ]
44
+
45
+
46
+ def _export_registry_to_parquet(registry: type[models.Model], directory: Path) -> None:
47
+ """Export a single registry table to parquet."""
48
+ import lamindb_setup as ln_setup
49
+
50
+ table_name = registry._meta.db_table
51
+ with warnings.catch_warnings():
52
+ warnings.filterwarnings("ignore", message="Skipped unsupported reflection")
53
+ df = pd.read_sql_table(table_name, ln_setup.settings.instance.db)
54
+ df.to_parquet(directory / f"{table_name}.parquet", compression=None)
55
+
56
+
57
+ def export_db(
58
+ module_names: Sequence[str] | None = None,
59
+ *,
60
+ output_dir: str | Path = "./lamindb_export/",
61
+ ) -> None:
62
+ """Export registry tables and many-to-many link tables to parquet files.
63
+
64
+ Ensure that you connect to postgres instances using `use_root_db_user=True`.
65
+
66
+ Args:
67
+ module_names: Module names to export (e.g., ["lamindb", "bionty", "wetlab"]).
68
+ Defaults to "lamindb" if not provided.
69
+ output_dir: Directory path for exported parquet files.
70
+ """
71
+ directory = Path(output_dir)
72
+ directory.mkdir(parents=True, exist_ok=True)
73
+
74
+ module_names = module_names or ["lamindb"]
75
+ modules = {name: _get_registries(name) for name in module_names}
76
+ total_models = sum(len(models) for models in modules.values())
77
+
78
+ with Progress() as progress:
79
+ task = progress.add_task("Exporting", total=total_models)
80
+ for module_name, model_names in modules.items():
81
+ schema_module = import_module(module_name)
82
+ for model_name in model_names:
83
+ progress.update(task, description=f"[cyan]{module_name}.{model_name}")
84
+ registry = getattr(schema_module, model_name)
85
+ _export_registry_to_parquet(registry, directory)
86
+ for field in registry._meta.many_to_many:
87
+ link_orm = getattr(registry, field.name).through
88
+ _export_registry_to_parquet(link_orm, directory)
89
+ progress.advance(task)
90
+
91
+
92
+ def _import_registry(
93
+ registry: type[models.Model],
94
+ directory: Path,
95
+ if_exists: Literal["fail", "replace", "append"] = "replace",
96
+ ) -> None:
97
+ """Import a single registry table from parquet.
98
+
99
+ Uses raw SQL export instead of django to later circumvent FK constraints.
100
+ """
101
+ table_name = registry._meta.db_table
102
+ parquet_file = directory / f"{table_name}.parquet"
103
+
104
+ if not parquet_file.exists():
105
+ print(f"Skipped {table_name} (file not found)")
106
+ return
107
+
108
+ df = pd.read_parquet(parquet_file)
109
+
110
+ old_foreign_key_columns = [col for col in df.columns if col.endswith("_old")]
111
+ if old_foreign_key_columns:
112
+ df = df.drop(columns=old_foreign_key_columns)
113
+
114
+ for col in df.columns:
115
+ if df[col].dtype == "object":
116
+ df[col] = df[col].apply(
117
+ lambda x: json.dumps(x) if isinstance(x, (dict, list)) else x
118
+ )
119
+
120
+ from django.db import connection
121
+
122
+ df.to_sql(table_name, connection.connection, if_exists=if_exists, index=False)
123
+
124
+
125
+ def import_db(
126
+ module_names: Sequence[str] | None = None,
127
+ *,
128
+ input_dir: str | Path = "./lamindb_export/",
129
+ if_exists: Literal["fail", "replace", "append"] = "replace",
130
+ ) -> None:
131
+ """Import registry and link tables from parquet files.
132
+
133
+ Temporarily disables FK constraints to allow insertion in arbitrary order.
134
+ Requires superuser/RDS admin privileges for postgres databases.
135
+
136
+ Args:
137
+ input_dir: Directory containing parquet files to import.
138
+ module_names: Module names to import (e.g., ["lamindb", "bionty", "wetlab"]).
139
+ if_exists: How to behave if table exists: 'fail', 'replace', or 'append'.
140
+ """
141
+ from django.db import connection
142
+
143
+ import lamindb_setup as ln_setup
144
+
145
+ directory = Path(input_dir)
146
+
147
+ if not directory.exists():
148
+ raise ValueError(f"Directory does not exist: {directory}")
149
+
150
+ if module_names is None:
151
+ parquet_files = list(directory.glob("*.parquet"))
152
+ detected_modules = {
153
+ f.name.split("_")[0] for f in parquet_files if "_" in f.name
154
+ }
155
+ module_names = sorted(detected_modules)
156
+
157
+ modules = {name: _get_registries(name) for name in module_names}
158
+ total_models = sum(len(models) for models in modules.values())
159
+
160
+ # Disable FK constraints to allow insertion in arbitrary order
161
+ if ln_setup.settings.instance.dialect == "sqlite":
162
+ with connection.cursor() as cursor:
163
+ if ln_setup.settings.instance.dialect == "postgresql":
164
+ cursor.execute("SET session_replication_role = 'replica'")
165
+ elif ln_setup.settings.instance.dialect == "sqlite":
166
+ cursor.execute("PRAGMA foreign_keys = OFF")
167
+
168
+ with transaction.atomic():
169
+ if ln_setup.settings.instance.dialect == "postgresql":
170
+ with connection.cursor() as cursor:
171
+ cursor.execute("SET CONSTRAINTS ALL DEFERRED")
172
+
173
+ with Progress() as progress:
174
+ task = progress.add_task("Importing", total=total_models)
175
+ for module_name, model_names in modules.items():
176
+ schema_module = import_module(module_name)
177
+ for model_name in model_names:
178
+ progress.update(
179
+ task, description=f"[cyan]{module_name}.{model_name}"
180
+ )
181
+ registry = getattr(schema_module, model_name)
182
+ _import_registry(registry, directory, if_exists=if_exists)
183
+ for field in registry._meta.many_to_many:
184
+ link_orm = getattr(registry, field.name).through
185
+ _import_registry(link_orm, directory, if_exists=if_exists)
186
+ progress.advance(task)
187
+
188
+ # Re-enable FK constraints again
189
+ if ln_setup.settings.instance.dialect == "sqlite":
190
+ with connection.cursor() as cursor:
191
+ if ln_setup.settings.instance.dialect == "postgresql":
192
+ cursor.execute("SET session_replication_role = 'origin'")
193
+ elif ln_setup.settings.instance.dialect == "sqlite":
194
+ cursor.execute("PRAGMA foreign_keys = ON")
@@ -40,7 +40,7 @@ uv pip install --system git+https://github.com/laminlabs/bionty
40
40
  elif group == "docs":
41
41
  cmds = modules_deps.strip()
42
42
  elif group == "storage":
43
- cmds = modules_deps + "uv pip install --system gcsfs huggingface_hub"
43
+ cmds = modules_deps + "uv pip install --system gcsfs huggingface_hub sqlalchemy"
44
44
  elif group == "hub-prod":
45
45
  # cmds = "git clone --depth 1 https://github.com/django/django\n"
46
46
  # cmds += "uv pip install --system -e ./django\n"
@@ -158,4 +158,6 @@ filterwarnings = [
158
158
  "ignore:Jupyter is migrating its paths to use standard platformdirs:DeprecationWarning",
159
159
  "ignore:The 'timeout' parameter is deprecated. Please configure it in the http client instead.:DeprecationWarning",
160
160
  "ignore:The 'verify' parameter is deprecated. Please configure it in the http client instead.:DeprecationWarning",
161
+ "ignore:There is no current event loop:DeprecationWarning",
162
+ "ignore:DateTimeField.*received a naive datetime.*while time zone support is active:RuntimeWarning",
161
163
  ]
@@ -44,7 +44,7 @@ def test_update_schema_in_hub(setup_instance):
44
44
  assert not schema["schema_json"]["core"]["artifact"]["is_link_table"]
45
45
  assert schema["schema_json"]["core"]["artifactulabel"]["is_link_table"]
46
46
 
47
- assert schema["schema_json"]["core"]["artifact"]["name_field"] is None
47
+ assert schema["schema_json"]["core"]["artifact"]["name_field"] == "key"
48
48
  assert schema["schema_json"]["core"]["artifact"]["ontology_id_field"] is None
49
49
  assert schema["schema_json"]["bionty"]["gene"]["name_field"] == "symbol"
50
50
  assert (
@@ -0,0 +1,10 @@
1
+ import pytest
2
+
3
+
4
+ @pytest.fixture(scope="session")
5
+ def simple_instance():
6
+ import lamindb_setup as ln_setup
7
+
8
+ ln_setup.init(storage="./testdb", modules="bionty,wetlab")
9
+ yield
10
+ ln_setup.delete("testdb", force=True)
@@ -0,0 +1,215 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ import pandas as pd
6
+ import pytest
7
+ from lamindb_setup.io import _get_registries, export_db, import_db
8
+
9
+ if TYPE_CHECKING:
10
+ from collections.abc import Callable, Generator
11
+ from pathlib import Path
12
+
13
+
14
+ @pytest.fixture
15
+ def cleanup_export_dir(tmp_path) -> Generator[Path, None, None]:
16
+ output_dir = tmp_path / "test_export"
17
+ yield output_dir
18
+
19
+
20
+ def test_get_registries_lamindb(simple_instance: Callable):
21
+ registries = _get_registries("lamindb")
22
+
23
+ assert "Artifact" in registries
24
+ assert "Collection" in registries
25
+ assert "Run" in registries
26
+ assert "Transform" in registries
27
+ assert "Record" in registries
28
+
29
+ assert "SQLRecord" not in registries
30
+
31
+
32
+ def test_get_registries_bionty(simple_instance: Callable):
33
+ registries = _get_registries("bionty")
34
+
35
+ assert "Gene" in registries
36
+ assert "Protein" in registries
37
+ assert "CellType" in registries
38
+ # ... and a few more
39
+ assert len(registries) > 0
40
+
41
+
42
+ def test_exportdb_creates_directory(
43
+ simple_instance: Callable, cleanup_export_dir: Path
44
+ ):
45
+ export_db(
46
+ module_names=["lamindb"],
47
+ output_dir=cleanup_export_dir,
48
+ )
49
+
50
+ assert cleanup_export_dir.exists()
51
+ assert cleanup_export_dir.is_dir()
52
+
53
+
54
+ def test_exportdb_exports_parquet_files(
55
+ simple_instance: Callable, cleanup_export_dir: Path
56
+ ):
57
+ export_db(
58
+ module_names=["lamindb"],
59
+ output_dir=cleanup_export_dir,
60
+ )
61
+
62
+ parquet_files = list(cleanup_export_dir.glob("*.parquet"))
63
+ assert len(parquet_files) > 0
64
+
65
+ for file in parquet_files:
66
+ df = pd.read_parquet(file)
67
+ assert isinstance(df, pd.DataFrame)
68
+
69
+
70
+ def test_exportdb_multiple_modules(simple_instance: Callable, cleanup_export_dir: Path):
71
+ import bionty as bt
72
+ import lamindb as ln
73
+
74
+ artifact = ln.Artifact.from_dataframe(
75
+ pd.DataFrame({"col": [1, 2, 3]}), key="test_artifact.parquet"
76
+ ).save()
77
+ gene = bt.Gene.from_source(symbol="TCF7").save()
78
+ artifact.genes.add(gene)
79
+
80
+ export_db(
81
+ module_names=["lamindb", "bionty"],
82
+ output_dir=cleanup_export_dir,
83
+ )
84
+
85
+ lamindb_files = list(cleanup_export_dir.glob("lamindb_*.parquet"))
86
+ bionty_files = list(cleanup_export_dir.glob("bionty_*.parquet"))
87
+
88
+ assert len(lamindb_files) > 0
89
+ assert len(bionty_files) > 0
90
+
91
+ gene_df = pd.read_parquet(cleanup_export_dir / "bionty_gene.parquet")
92
+ assert "TCF7" in gene_df["symbol"].values
93
+
94
+ artifact_df = pd.read_parquet(cleanup_export_dir / "lamindb_artifact.parquet")
95
+ assert artifact.uid in artifact_df["uid"].values
96
+
97
+ link_df = pd.read_parquet(cleanup_export_dir / "bionty_artifactgene.parquet")
98
+ assert (
99
+ len(
100
+ link_df[
101
+ (link_df["artifact_id"] == artifact.id)
102
+ & (link_df["gene_id"] == gene.id)
103
+ ]
104
+ )
105
+ == 1
106
+ )
107
+
108
+ artifact.delete(permanent=True)
109
+ gene.delete(permanent=True)
110
+
111
+
112
+ def test_exportdb_default_module(simple_instance: Callable, cleanup_export_dir: Path):
113
+ export_db(output_dir=cleanup_export_dir)
114
+
115
+ lamindb_files = list(cleanup_export_dir.glob("lamindb_*.parquet"))
116
+ assert len(lamindb_files) > 0
117
+
118
+
119
+ def test_exportdb_exports_link_tables(
120
+ simple_instance: Callable, cleanup_export_dir: Path
121
+ ):
122
+ export_db(module_names=["lamindb"], output_dir=cleanup_export_dir)
123
+
124
+ parquet_files = [f.name for f in cleanup_export_dir.glob("*.parquet")]
125
+ link_tables = [f for f in parquet_files if "_" in f and "artifact" in f.lower()]
126
+
127
+ assert len(link_tables) > 0
128
+
129
+
130
+ def test_import_db_from_parquet(simple_instance: Callable, tmp_path):
131
+ """Tests imports of a parquet file.
132
+
133
+ Implicitly also tests whether `import_db` can deal with FK constraints.
134
+ """
135
+ import bionty as bt
136
+ import lamindb as ln
137
+ import lamindb_setup as ln_setup
138
+
139
+ export_dir = tmp_path / "export"
140
+ export_dir.mkdir()
141
+
142
+ artifact_data = pd.DataFrame(
143
+ {
144
+ "id": [888],
145
+ "uid": ["test_artifact_uid"],
146
+ "key": ["test_key"],
147
+ "_key_is_virtual": [False],
148
+ "_overwrite_versions": [False],
149
+ "description": ["Test artifact"],
150
+ "suffix": [".txt"],
151
+ "kind": ["dataset"],
152
+ "size": [1024],
153
+ "hash": ["testhash123"],
154
+ "is_latest": [True],
155
+ "is_locked": [False],
156
+ "storage_id": [1],
157
+ "created_by_id": [ln_setup.settings.user.id],
158
+ "created_at": [pd.Timestamp.now()],
159
+ "updated_at": [pd.Timestamp.now()],
160
+ }
161
+ )
162
+ artifact_data.to_parquet(export_dir / "lamindb_artifact.parquet", index=False)
163
+
164
+ gene_data = pd.DataFrame(
165
+ {
166
+ "id": [999],
167
+ "uid": ["test_uid_999"],
168
+ "symbol": ["TESTGENE"],
169
+ "ensembl_gene_id": ["ENSG00000999"],
170
+ "ncbi_gene_ids": [None],
171
+ "biotype": ["protein_coding"],
172
+ "description": ["Test gene for import"],
173
+ "synonyms": [None],
174
+ "organism_id": [1],
175
+ "source_id": [1],
176
+ "created_by_id": [ln_setup.settings.user.id],
177
+ "created_at": [pd.Timestamp.now()],
178
+ "updated_at": [pd.Timestamp.now()],
179
+ }
180
+ )
181
+ gene_data.to_parquet(export_dir / "bionty_gene.parquet", index=False)
182
+
183
+ link_data = pd.DataFrame(
184
+ {
185
+ "id": [1],
186
+ "artifact_id": [888],
187
+ "gene_id": [999],
188
+ "feature_id": [None],
189
+ "feature_ref_is_name": [None],
190
+ "label_ref_is_name": [None],
191
+ "created_at": [pd.Timestamp.now()],
192
+ "created_by_id": [ln_setup.settings.user.id],
193
+ "run_id": [None],
194
+ }
195
+ )
196
+ link_data.to_parquet(export_dir / "bionty_artifactgene.parquet", index=False)
197
+
198
+ import_db(
199
+ input_dir=export_dir,
200
+ module_names=["lamindb", "bionty"],
201
+ if_exists="append",
202
+ )
203
+
204
+ # gene and artifact should exist after the import
205
+ imported_gene = bt.Gene.get(id=999)
206
+ assert imported_gene.symbol == "TESTGENE"
207
+ assert imported_gene.ensembl_gene_id == "ENSG00000999"
208
+ imported_artifact = ln.Artifact.get(id=888)
209
+ assert imported_artifact.key == "test_key"
210
+ assert imported_artifact.genes.count() == 1
211
+
212
+ # they should also be linked
213
+ linked_gene = imported_artifact.genes.first()
214
+ assert linked_gene.id == 999
215
+ assert linked_gene.symbol == "TESTGENE"
@@ -1,68 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from importlib import import_module
4
- from pathlib import Path
5
-
6
- MODELS = {
7
- "lamindb": {
8
- "Collection": False,
9
- "Artifact": False,
10
- "Transform": False,
11
- "Run": True,
12
- "User": False,
13
- "Storage": False,
14
- "Feature": False,
15
- "Schema": False,
16
- "ULabel": False,
17
- },
18
- # "bionty": {
19
- # "Organism": False,
20
- # "Gene": False,
21
- # "Protein": False,
22
- # "CellMarker": False,
23
- # "Tissue": False,
24
- # "CellType": False,
25
- # "Disease": False,
26
- # "CellLine": False,
27
- # "Phenotype": False,
28
- # "Pathway": False,
29
- # "ExperimentalFactor": False,
30
- # "DevelopmentalStage": False,
31
- # "Ethnicity": False,
32
- # "Source": False,
33
- # },
34
- # "wetlab": {
35
- # "ExperimentType": False,
36
- # "Experiment": False,
37
- # "Well": False,
38
- # "TreatmentTarget": False,
39
- # "Treatment": False,
40
- # "Biosample": False,
41
- # "Techsample": False,
42
- # },
43
- }
44
-
45
-
46
- def exportdb() -> None:
47
- directory = Path("./lamindb_export/")
48
- directory.mkdir(parents=True, exist_ok=True)
49
- import pandas as pd
50
-
51
- import lamindb_setup as ln_setup
52
-
53
- def export_registry(registry, directory):
54
- table_name = registry._meta.db_table
55
- df = pd.read_sql_table(table_name, ln_setup.settings.instance.db)
56
- df.to_parquet(directory / f"{table_name}.parquet", compression=None)
57
-
58
- # export data to parquet files
59
- print(f"\nexporting data to parquet files in: {directory}\n")
60
- for module_name, models in MODELS.items():
61
- for model_name in models.keys():
62
- schema_module = import_module(f"lnschema_{module_name}")
63
- registry = getattr(schema_module, model_name)
64
- export_registry(registry, directory)
65
- many_to_many_names = [field.name for field in registry._meta.many_to_many]
66
- for many_to_many_name in many_to_many_names:
67
- link_orm = getattr(registry, many_to_many_name).through
68
- export_registry(link_orm, directory)
@@ -1,50 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from importlib import import_module
4
- from pathlib import Path
5
-
6
- from ._exportdb import MODELS
7
-
8
-
9
- def import_registry(registry, directory, connection):
10
- import pandas as pd
11
-
12
- table_name = registry._meta.db_table
13
- df = pd.read_parquet(directory / f"{table_name}.parquet")
14
- old_foreign_key_columns = [
15
- column for column in df.columns if column.endswith("_old")
16
- ]
17
- for column in old_foreign_key_columns:
18
- df.drop(column, axis=1, inplace=True)
19
- df.to_sql(table_name, connection, if_exists="append", index=False)
20
-
21
-
22
- def importdb() -> None:
23
- # import data from parquet files
24
- directory = Path("./lamindb_export/")
25
- if directory.exists():
26
- response = input(
27
- f"\n\nDo you want to import registries from here: {directory}? (y/n)\n"
28
- )
29
- if response != "y":
30
- return None
31
- from sqlalchemy import create_engine, text
32
-
33
- import lamindb_setup as ln_setup
34
-
35
- engine = create_engine(ln_setup.settings.instance.db, echo=False)
36
- with engine.begin() as connection:
37
- if ln_setup.settings.instance.dialect == "postgresql":
38
- connection.execute(text("SET CONSTRAINTS ALL DEFERRED;"))
39
- for module_name, models in MODELS.items():
40
- for model_name in models.keys():
41
- print(model_name)
42
- schema_module = import_module(f"lnschema_{module_name}")
43
- registry = getattr(schema_module, model_name)
44
- import_registry(registry, directory, connection)
45
- many_to_many_names = [
46
- field.name for field in registry._meta.many_to_many
47
- ]
48
- for many_to_many_name in many_to_many_names:
49
- link_orm = getattr(registry, many_to_many_name).through
50
- import_registry(link_orm, directory, connection)
File without changes
File without changes