lamindb_setup 1.15.2__tar.gz → 1.16.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/.github/workflows/build.yml +4 -4
  2. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/PKG-INFO +2 -1
  3. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/__init__.py +1 -1
  4. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_schema_metadata.py +9 -11
  5. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_setup_user.py +20 -2
  6. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_clone.py +1 -1
  7. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_private_django_api.py +0 -1
  8. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_settings_storage.py +3 -1
  9. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/django.py +2 -0
  10. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/upath.py +19 -7
  11. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/io.py +55 -30
  12. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/pyproject.toml +2 -0
  13. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/test_clone_instance.py +2 -2
  14. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-local/test_update_schema_in_hub.py +10 -6
  15. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/storage/test_db_import_export.py +159 -1
  16. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/storage/test_storage_basis.py +1 -0
  17. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/.github/workflows/doc-changes.yml +0 -0
  18. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/.gitignore +0 -0
  19. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/.pre-commit-config.yaml +0 -0
  20. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/LICENSE +0 -0
  21. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/README.md +0 -0
  22. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/changelog.md +0 -0
  23. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-cloud/01-init-local-instance.ipynb +0 -0
  24. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-cloud/02-connect-local-instance.ipynb +0 -0
  25. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-cloud/03-add-managed-storage.ipynb +0 -0
  26. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-cloud/04-test-bionty.ipynb +0 -0
  27. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-cloud/05-init-hosted-instance.ipynb +0 -0
  28. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-cloud/06-connect-hosted-instance.ipynb +0 -0
  29. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-cloud/07-keep-artifacts-local.ipynb +0 -0
  30. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-cloud/08-test-multi-session.ipynb +0 -0
  31. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-cloud/09-test-migrate.ipynb +0 -0
  32. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-cloud/test_notebooks.py +0 -0
  33. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-prod/test-cache-management.ipynb +0 -0
  34. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-prod/test-cloud-sync.ipynb +0 -0
  35. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-prod/test-connect-anonymously.ipynb +0 -0
  36. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-prod/test-empty-init.ipynb +0 -0
  37. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-prod/test-import-schema.ipynb +0 -0
  38. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-prod/test-init-load-local-anonymously.ipynb +0 -0
  39. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-prod/test-insufficient-user-info.ipynb +0 -0
  40. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-prod/test-invalid-schema.ipynb +0 -0
  41. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-prod/test-sqlite-lock.ipynb +0 -0
  42. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/hub-prod/test_notebooks2.py +0 -0
  43. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/index.md +0 -0
  44. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/notebooks.md +0 -0
  45. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/docs/reference.md +0 -0
  46. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_cache.py +0 -0
  47. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_check.py +0 -0
  48. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_check_setup.py +0 -0
  49. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_connect_instance.py +0 -0
  50. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_delete.py +0 -0
  51. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_disconnect.py +0 -0
  52. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_django.py +0 -0
  53. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_entry_points.py +0 -0
  54. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_init_instance.py +0 -0
  55. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_migrate.py +0 -0
  56. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_register_instance.py +0 -0
  57. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_schema.py +0 -0
  58. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_set_managed_storage.py +0 -0
  59. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/_silence_loggers.py +0 -0
  60. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/__init__.py +0 -0
  61. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_aws_options.py +0 -0
  62. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_aws_storage.py +0 -0
  63. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_deprecated.py +0 -0
  64. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_docs.py +0 -0
  65. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_hub_client.py +0 -0
  66. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_hub_core.py +0 -0
  67. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_hub_crud.py +0 -0
  68. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_hub_utils.py +0 -0
  69. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_settings.py +0 -0
  70. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_settings_instance.py +0 -0
  71. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_settings_load.py +0 -0
  72. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_settings_save.py +0 -0
  73. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_settings_store.py +0 -0
  74. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_settings_user.py +0 -0
  75. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/_setup_bionty_sources.py +0 -0
  76. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/cloud_sqlite_locker.py +0 -0
  77. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/exceptions.py +0 -0
  78. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/hashing.py +0 -0
  79. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/core/types.py +0 -0
  80. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/errors.py +0 -0
  81. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/py.typed +0 -0
  82. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/lamindb_setup/types.py +0 -0
  83. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/noxfile.py +0 -0
  84. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/scripts/script-init-pass-user-no-writes.py +0 -0
  85. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/scripts/script-to-fail-managed-storage.py +0 -0
  86. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/test_connect_instance.py +0 -0
  87. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/test_delete_instance.py +0 -0
  88. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/test_edge_request.py +0 -0
  89. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/test_fail_managed_storage.py +0 -0
  90. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/test_init_instance.py +0 -0
  91. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/test_init_pass_user_no_writes.py +0 -0
  92. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/test_login.py +0 -0
  93. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-cloud/test_set_storage.py +0 -0
  94. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-local/README.md +0 -0
  95. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-local/conftest.py +0 -0
  96. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-local/scripts/script-connect-fine-grained-access.py +0 -0
  97. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-local/test_all.py +0 -0
  98. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-prod/conftest.py +0 -0
  99. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-prod/test_aws_options_manager.py +0 -0
  100. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-prod/test_django.py +0 -0
  101. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-prod/test_global_settings.py +0 -0
  102. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-prod/test_migrate.py +0 -0
  103. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-prod/test_switch_and_fallback_env.py +0 -0
  104. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/hub-prod/test_upath.py +0 -0
  105. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/storage/conftest.py +0 -0
  106. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/storage/test_entry_point.py +0 -0
  107. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/storage/test_hashing.py +0 -0
  108. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/storage/test_httpx_client.py +0 -0
  109. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/storage/test_storage_access.py +0 -0
  110. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/storage/test_storage_settings.py +0 -0
  111. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/storage/test_storage_stats.py +0 -0
  112. {lamindb_setup-1.15.2 → lamindb_setup-1.16.0}/tests/storage/test_to_url.py +0 -0
@@ -12,7 +12,7 @@ jobs:
12
12
  # tests only on production hub
13
13
  hub-prod:
14
14
  runs-on: ubuntu-latest
15
- timeout-minutes: 12
15
+ timeout-minutes: 13
16
16
  steps:
17
17
  - uses: actions/checkout@v4
18
18
  - uses: actions/setup-python@v6
@@ -44,7 +44,7 @@ jobs:
44
44
  python-version: "3.11"
45
45
  - lamin_env: "staging"
46
46
  python-version: "3.10"
47
- timeout-minutes: 12
47
+ timeout-minutes: 13
48
48
  steps:
49
49
  - uses: aws-actions/configure-aws-credentials@v4
50
50
  with:
@@ -94,7 +94,7 @@ jobs:
94
94
  # test user access to storage
95
95
  storage:
96
96
  runs-on: ubuntu-latest
97
- timeout-minutes: 12
97
+ timeout-minutes: 13
98
98
  steps:
99
99
  - uses: actions/checkout@v4
100
100
  - uses: actions/setup-python@v6
@@ -119,7 +119,7 @@ jobs:
119
119
  # test low-level hub functionality
120
120
  hub-local:
121
121
  runs-on: ubuntu-latest
122
- timeout-minutes: 12
122
+ timeout-minutes: 13
123
123
  steps:
124
124
  - uses: aws-actions/configure-aws-credentials@v4
125
125
  with:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: lamindb_setup
3
- Version: 1.15.2
3
+ Version: 1.16.0
4
4
  Summary: Setup & configure LaminDB.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.10
@@ -8,6 +8,7 @@ Description-Content-Type: text/markdown
8
8
  Requires-Dist: lamin_utils>=0.3.3
9
9
  Requires-Dist: django>=5.2,<5.3
10
10
  Requires-Dist: dj_database_url>=1.3.0,<3.0.0
11
+ Requires-Dist: django-pgtrigger
11
12
  Requires-Dist: pydantic-settings
12
13
  Requires-Dist: platformdirs<5.0.0
13
14
  Requires-Dist: httpx_retries<1.0.0
@@ -35,7 +35,7 @@ Migration management
35
35
 
36
36
  """
37
37
 
38
- __version__ = "1.15.2" # denote a release candidate for 0.1.0 with 0.1rc1
38
+ __version__ = "1.16.0" # denote a release candidate for 0.1.0 with 0.1rc1
39
39
 
40
40
  import os
41
41
  import warnings
@@ -172,7 +172,8 @@ class _ModelHandler:
172
172
  self.table_name = model._meta.db_table
173
173
  self.included_modules = included_modules
174
174
  self.fields = self._get_fields_metadata(self.model)
175
- self.is_link_table = issubclass(model, IsLink)
175
+ self.is_auto_created = bool(model._meta.auto_created)
176
+ self.is_link_table = issubclass(model, IsLink) or self.is_auto_created
176
177
  self.name_field = model._name_field if hasattr(model, "_name_field") else None
177
178
  self.ontology_id_field = (
178
179
  model._ontology_id_field if hasattr(model, "_ontology_id_field") else None
@@ -183,6 +184,7 @@ class _ModelHandler:
183
184
  "fields": self.fields.copy(),
184
185
  "class_name": self.class_name,
185
186
  "table_name": self.table_name,
187
+ "is_auto_created": self.is_auto_created,
186
188
  "is_link_table": self.is_link_table,
187
189
  "name_field": self.name_field,
188
190
  "ontology_id_field": self.ontology_id_field,
@@ -249,13 +251,13 @@ class _ModelHandler:
249
251
  return related_fields
250
252
 
251
253
  def _get_field_metadata(self, model, field: Field):
252
- from lamindb.models import IsLink
254
+ from lamindb.models import IsLink, Registry
253
255
 
254
256
  internal_type = field.get_internal_type()
255
257
  model_name = field.model._meta.model_name
256
258
  relation_type = self._get_relation_type(model, field)
257
259
 
258
- schema_name = field.model.__get_module_name__()
260
+ schema_name = Registry.__get_module_name__(field.model)
259
261
 
260
262
  if field.related_model is None:
261
263
  related_model_name = None
@@ -265,7 +267,7 @@ class _ModelHandler:
265
267
  max_length = field.max_length
266
268
  else:
267
269
  related_model_name = field.related_model._meta.model_name
268
- related_schema_name = field.related_model.__get_module_name__()
270
+ related_schema_name = Registry.__get_module_name__(field.related_model)
269
271
  related_field_name = field.remote_field.name
270
272
  is_editable = False
271
273
  max_length = None
@@ -418,14 +420,10 @@ class _SchemaHandler:
418
420
  all_models = {module_name: {} for module_name in self.included_modules}
419
421
 
420
422
  # Iterate through all registered Django models
421
- for model in apps.get_models():
423
+ for model in apps.get_models(include_auto_created=True):
422
424
  # Check if model meets the criteria
423
- if (
424
- model.__class__ is Registry
425
- and model is not SQLRecord
426
- and not model._meta.abstract
427
- ):
428
- module_name = model.__get_module_name__()
425
+ if model is not SQLRecord and not model._meta.abstract:
426
+ module_name = Registry.__get_module_name__(model)
429
427
  # Only include if module is in our included list
430
428
  if module_name in self.included_modules:
431
429
  model_name = model._meta.model_name
@@ -43,6 +43,14 @@ def load_user(email: str | None = None, handle: str | None = None) -> UserSettin
43
43
  return user_settings
44
44
 
45
45
 
46
+ def current_user_uid() -> str:
47
+ current_user_settings = current_user_settings_file()
48
+ if current_user_settings.exists():
49
+ return load_user_settings(current_user_settings).uid
50
+
51
+ return "00000000" # anonymous
52
+
53
+
46
54
  def login(
47
55
  user: str | None = None, *, api_key: str | None = None, **kwargs
48
56
  ) -> UserSettings:
@@ -90,6 +98,9 @@ def login(
90
98
  "the legacy API key is deprecated and will likely be removed in a future version"
91
99
  )
92
100
 
101
+ # do this here because load_user overwrites current_user_settings_file
102
+ previous_user_uid = current_user_uid()
103
+
93
104
  if api_key is None:
94
105
  if "@" in user: # type: ignore
95
106
  email, handle = user, None
@@ -144,8 +155,15 @@ def login(
144
155
  user_settings.api_key = api_key
145
156
  save_user_settings(user_settings)
146
157
 
147
- if settings._instance_exists and _check_instance_setup():
148
- register_user(user_settings)
158
+ if settings._instance_exists:
159
+ if (
160
+ isettings := settings.instance
161
+ ).is_on_hub and previous_user_uid != user_settings.uid:
162
+ logger.important_hint(
163
+ f"consider re-connecting to update permissions: lamin connect {isettings.slug}"
164
+ )
165
+ if _check_instance_setup():
166
+ register_user(user_settings)
149
167
 
150
168
  settings._user_settings = None
151
169
  # aws s3 credentials are scoped to the user
@@ -5,6 +5,7 @@
5
5
 
6
6
  init_local_sqlite
7
7
  connect_local_sqlite
8
+ connect_remote_sqlite
8
9
  upload_sqlite_clone
9
10
  """
10
11
 
@@ -13,7 +14,6 @@ import os
13
14
  import shutil
14
15
  from pathlib import Path
15
16
 
16
- from lamindb_setup.core._settings_instance import InstanceSettings
17
17
  from lamindb_setup.core._settings_load import load_instance_settings
18
18
  from lamindb_setup.core._settings_store import instance_settings_file
19
19
  from lamindb_setup.core.django import reset_django
@@ -37,7 +37,6 @@ def private_django_api(reverse=False):
37
37
  "MultipleObjectsReturned",
38
38
  "add_to_class",
39
39
  "adelete",
40
- "refresh_from_db",
41
40
  "asave",
42
41
  "clean",
43
42
  "clean_fields",
@@ -122,6 +122,7 @@ def init_storage(
122
122
  access_token: str | None = None,
123
123
  region: str | None = None,
124
124
  space_uuid: UUID | None = None,
125
+ skip_mark_storage_root: bool = False,
125
126
  ) -> tuple[
126
127
  StorageSettings,
127
128
  Literal["hub-record-not-created", "hub-record-retrieved", "hub-record-created"],
@@ -181,7 +182,8 @@ def init_storage(
181
182
  space_id=space_uuid,
182
183
  )
183
184
  # we check the write access here if the storage record has not been retrieved from the hub
184
- if hub_record_status != "hub-record-retrieved":
185
+ # Sergei: should it in fact still go through if hub_record_status == "hub-record-not-created"?
186
+ if hub_record_status != "hub-record-retrieved" and not skip_mark_storage_root:
185
187
  try:
186
188
  # (federated) credentials for AWS access are provisioned under-the-hood
187
189
  # discussion: https://laminlabs.slack.com/archives/C04FPE8V01W/p1719260587167489
@@ -238,6 +238,8 @@ def setup_django(
238
238
  if view_schema:
239
239
  installed_apps = installed_apps[::-1] # to fix how apps appear
240
240
  installed_apps += ["schema_graph", "django.contrib.staticfiles"]
241
+ if isettings.dialect == "postgresql":
242
+ installed_apps.insert(0, "pgtrigger")
241
243
 
242
244
  kwargs = dict(
243
245
  INSTALLED_APPS=installed_apps,
@@ -93,10 +93,12 @@ def extract_suffix_from_path(path: Path, arg_name: str | None = None) -> str:
93
93
  else:
94
94
  return suffix
95
95
 
96
- if len(path.suffixes) <= 1:
96
+ suffixes = path.suffixes
97
+
98
+ if len(suffixes) <= 1:
97
99
  return process_digits(path.suffix)
98
100
 
99
- total_suffix = "".join(path.suffixes)
101
+ total_suffix = "".join(suffixes)
100
102
  if total_suffix in VALID_SIMPLE_SUFFIXES:
101
103
  return total_suffix
102
104
  elif total_suffix.endswith(tuple(VALID_COMPOSITE_SUFFIXES)):
@@ -115,14 +117,24 @@ def extract_suffix_from_path(path: Path, arg_name: str | None = None) -> str:
115
117
  # in COMPRESSION_SUFFIXES to detect something like .random.gz and then
116
118
  # add ".random.gz" but concluded it's too dangerous it's safer to just
117
119
  # use ".gz" in such a case
118
- if path.suffixes[-2] in VALID_SIMPLE_SUFFIXES:
119
- suffix = "".join(path.suffixes[-2:])
120
- msg += f"inferring: '{suffix}'"
120
+ if suffixes[-2] in VALID_SIMPLE_SUFFIXES:
121
+ suffix = "".join(suffixes[-2:])
122
+ # if the suffix preceding the compression suffixes is a valid suffix,
123
+ # we account for it; otherwise we don't.
124
+ # i.e. we should have .h5ad.tar.gz or .csv.tar.gz, not just .tar.gz
125
+ if (
126
+ suffix == ".tar.gz"
127
+ and len(suffixes) > 2
128
+ and (suffix_3 := suffixes[-3]) in VALID_SIMPLE_SUFFIXES
129
+ ):
130
+ suffix = suffix_3 + suffix
121
131
  # do not print a warning for things like .tar.gz, .fastq.gz
122
- if path.suffixes[-1] == ".gz":
132
+ if suffixes[-1] == ".gz":
123
133
  print_hint = False
134
+ else:
135
+ msg += f"inferring: '{suffix}'"
124
136
  else:
125
- suffix = path.suffixes[-1] # this is equivalent to path.suffix
137
+ suffix = suffixes[-1] # this is equivalent to path.suffix
126
138
  msg += (
127
139
  f"using only last suffix: '{suffix}' - if you want your composite"
128
140
  " suffix to be recognized add it to"
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import io
4
4
  import json
5
5
  import warnings
6
- from concurrent.futures import ProcessPoolExecutor, as_completed
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
7
7
  from importlib import import_module
8
8
  from pathlib import Path
9
9
  from typing import TYPE_CHECKING
@@ -14,24 +14,13 @@ from django.db import models, transaction
14
14
  from rich.progress import Progress
15
15
 
16
16
  if TYPE_CHECKING:
17
- from collections.abc import Sequence
17
+ from collections.abc import Iterable, Sequence
18
18
  from typing import Literal
19
19
 
20
20
 
21
21
  def _get_registries(module_name: str) -> list[str]:
22
22
  """Get registry class names from a module."""
23
23
  schema_module = import_module(module_name)
24
- exclude = {"SQLRecord", "BaseSQLRecord"}
25
-
26
- if module_name == "lamindb":
27
- module_filter = lambda cls, name: cls.__module__.startswith(
28
- f"{module_name}.models."
29
- ) and name in dir(schema_module)
30
- else:
31
- module_filter = (
32
- lambda cls, name: cls.__module__ == f"{module_name}.models"
33
- and name in dir(schema_module)
34
- )
35
24
 
36
25
  return [
37
26
  name
@@ -40,8 +29,8 @@ def _get_registries(module_name: str) -> list[str]:
40
29
  name[0].isupper()
41
30
  and isinstance(cls := getattr(schema_module.models, name, None), type)
42
31
  and issubclass(cls, models.Model)
43
- and module_filter(cls, name)
44
- and name not in exclude
32
+ # Table names starting with `None_` are abstract base classes or Django mixins
33
+ and not cls._meta.db_table.startswith("None_") # type: ignore
45
34
  )
46
35
  ]
47
36
 
@@ -59,7 +48,7 @@ def _export_full_table(
59
48
  For SQLite with large tables, reads in chunks to avoid memory issues when tables exceed available RAM.
60
49
 
61
50
  Args:
62
- registry_info: Tuple of (module_name, model_name, field_name) where field_name
51
+ registry_info: Tuple of (module_name, model_name, field_name) where `field_name`
63
52
  is None for regular tables or the field name for M2M link tables.
64
53
  directory: Output directory for parquet files.
65
54
  chunk_size: Maximum rows per chunk for SQLite large tables.
@@ -73,7 +62,7 @@ def _export_full_table(
73
62
 
74
63
  module_name, model_name, field_name = registry_info
75
64
  schema_module = import_module(module_name)
76
- registry = getattr(schema_module, model_name)
65
+ registry = getattr(schema_module.models, model_name)
77
66
 
78
67
  if field_name:
79
68
  registry = getattr(registry, field_name).through
@@ -84,12 +73,19 @@ def _export_full_table(
84
73
  if ln_setup.settings.instance.dialect == "postgresql":
85
74
  buffer = io.StringIO()
86
75
  with connection.cursor() as cursor:
76
+ cursor.execute("SET statement_timeout = 0")
87
77
  cursor.copy_expert(
88
78
  f'COPY "{table_name}" TO STDOUT WITH (FORMAT CSV, HEADER TRUE)',
89
79
  buffer,
90
80
  )
91
81
  buffer.seek(0)
92
- df = pd.read_csv(buffer)
82
+ # Prevent pandas from converting empty strings to float NaN (which PyArrow rejects)
83
+ df = pd.read_csv(buffer, keep_default_na=False)
84
+ # Convert object columns to string to handle mixed types from data corruption,
85
+ # schema migrations, or manual SQL inserts. PyArrow rejects mixed-type objects.
86
+ df = df.astype(
87
+ {col: str for col in df.columns if df[col].dtype == "object"}
88
+ )
93
89
  df.to_parquet(directory / f"{table_name}.parquet", compression=None)
94
90
  return (
95
91
  f"{module_name}.{model_name}.{field_name}"
@@ -118,11 +114,21 @@ def _export_full_table(
118
114
  chunk_file = (
119
115
  directory / f"{table_name}_chunk_{chunk_id}.parquet"
120
116
  )
117
+ df = df.astype(
118
+ {
119
+ col: str
120
+ for col in df.columns
121
+ if df[col].dtype == "object"
122
+ }
123
+ )
121
124
  df.to_parquet(chunk_file, compression=None)
122
125
  chunk_files.append((table_name, chunk_file))
123
126
  return chunk_files
124
127
  else:
125
128
  df = pd.read_sql_table(table_name, ln_setup.settings.instance.db)
129
+ df = df.astype(
130
+ {col: str for col in df.columns if df[col].dtype == "object"}
131
+ )
126
132
  df.to_parquet(directory / f"{table_name}.parquet", compression=None)
127
133
  return (
128
134
  f"{module_name}.{model_name}.{field_name}"
@@ -163,7 +169,7 @@ def export_db(
163
169
  for module_name, model_names in modules.items():
164
170
  schema_module = import_module(module_name)
165
171
  for model_name in model_names:
166
- registry = getattr(schema_module, model_name)
172
+ registry = getattr(schema_module.models, model_name)
167
173
  tasks.append((module_name, model_name, None))
168
174
  for field in registry._meta.many_to_many:
169
175
  tasks.append((module_name, model_name, field.name))
@@ -173,13 +179,8 @@ def export_db(
173
179
  with Progress() as progress:
174
180
  task_id = progress.add_task("Exporting", total=len(tasks))
175
181
 
176
- import multiprocessing
177
-
178
- mp_context = multiprocessing.get_context("spawn")
179
-
180
- with ProcessPoolExecutor(
181
- max_workers=max_workers, mp_context=mp_context
182
- ) as executor:
182
+ # This must be a ThreadPoolExecutor and not a ProcessPoolExecutor to inherit JWTs
183
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
183
184
  futures = {
184
185
  executor.submit(_export_full_table, task, directory, chunk_size): task
185
186
  for task in tasks
@@ -229,7 +230,6 @@ def _import_registry(
229
230
  parquet_file = directory / f"{table_name}.parquet"
230
231
 
231
232
  if not parquet_file.exists():
232
- print(f"Skipped {table_name} (file not found)")
233
233
  return
234
234
 
235
235
  df = pd.read_parquet(parquet_file)
@@ -244,12 +244,37 @@ def _import_registry(
244
244
  if mask.any():
245
245
  df.loc[mask, col] = df.loc[mask, col].map(_serialize_value)
246
246
 
247
+ for field in registry._meta.fields:
248
+ # Convert PostgreSQL boolean string literals ('t'/'f') to Python booleans for SQLite compatibility
249
+ if field.get_internal_type() == "BooleanField" and field.column in df.columns:
250
+ df[field.column] = df[field.column].map(
251
+ {"t": True, "f": False, True: True, False: False, None: None}
252
+ )
253
+
254
+ # PostgreSQL CSV export writes NULL as empty string; convert back to None for nullable fields
255
+ if field.null and field.column in df.columns:
256
+ df[field.column] = df[field.column].replace("", None)
257
+
258
+ # Convert numeric fields from strings to proper types for SQLite
259
+ if (
260
+ field.get_internal_type()
261
+ in (
262
+ "IntegerField",
263
+ "BigIntegerField",
264
+ "PositiveIntegerField",
265
+ "FloatField",
266
+ "DecimalField",
267
+ )
268
+ and field.column in df.columns
269
+ ):
270
+ df[field.column] = pd.to_numeric(df[field.column], errors="coerce")
271
+
247
272
  if if_exists == "append":
248
273
  # Fill NULL values in NOT NULL columns to handle schema mismatches between postgres source and SQLite target
249
274
  # This allows importing data where fields were nullable
250
275
  for field in registry._meta.fields:
251
276
  if field.column in df.columns and not field.null:
252
- df[field.column] = df[field.column].fillna("")
277
+ df[field.column] = df[field.column].fillna("").infer_objects(copy=False)
253
278
 
254
279
  if df.empty:
255
280
  return
@@ -297,7 +322,7 @@ def _import_registry(
297
322
 
298
323
 
299
324
  def import_db(
300
- module_names: Sequence[str] | None = None,
325
+ module_names: Iterable[str] | None = None,
301
326
  *,
302
327
  input_dir: str | Path = "./lamindb_export/",
303
328
  if_exists: Literal["fail", "replace", "append"] = "replace",
@@ -362,7 +387,7 @@ def import_db(
362
387
  progress.update(
363
388
  task, description=f"[cyan]{module_name}.{model_name}"
364
389
  )
365
- registry = getattr(schema_module, model_name)
390
+ registry = getattr(schema_module.models, model_name)
366
391
  _import_registry(registry, directory, if_exists=if_exists)
367
392
  for field in registry._meta.many_to_many:
368
393
  link_orm = getattr(registry, field.name).through
@@ -13,6 +13,7 @@ dependencies = [
13
13
  # External dependencies
14
14
  "django>=5.2,<5.3",
15
15
  "dj_database_url>=1.3.0,<3.0.0",
16
+ "django-pgtrigger", # 30kB pure python, no dependencies
16
17
  "pydantic-settings",
17
18
  "platformdirs<5.0.0",
18
19
  "httpx_retries<1.0.0",
@@ -160,4 +161,5 @@ filterwarnings = [
160
161
  "ignore:The 'verify' parameter is deprecated. Please configure it in the http client instead.:DeprecationWarning",
161
162
  "ignore:There is no current event loop:DeprecationWarning",
162
163
  "ignore:DateTimeField.*received a naive datetime.*while time zone support is active:RuntimeWarning",
164
+ "ignore::sqlalchemy.exc.SAWarning"
163
165
  ]
@@ -99,8 +99,8 @@ def test_connect_remote_sqlite(tmp_path):
99
99
  "lamindb_setup._connect_instance._connect_instance"
100
100
  ) as mock_connect_instance,
101
101
  patch("lamindb_setup.settings") as mock_settings,
102
- patch("lamindb_setup.core._clone.InstanceSettings"),
103
- patch("lamindb_setup.core._clone.create_path") as mock_create_path,
102
+ patch("lamindb_setup.core._settings_instance.InstanceSettings"),
103
+ patch("lamindb_setup.core.upath.create_path") as mock_create_path,
104
104
  patch("lamindb_setup.core._clone.connect_local_sqlite"),
105
105
  ):
106
106
  mock_instance = Mock()
@@ -42,7 +42,11 @@ def test_update_schema_in_hub(setup_instance):
42
42
  assert "wetlab" in schema["schema_json"]
43
43
 
44
44
  assert not schema["schema_json"]["core"]["artifact"]["is_link_table"]
45
+ assert not schema["schema_json"]["core"]["artifact"]["is_auto_created"]
45
46
  assert schema["schema_json"]["core"]["artifactulabel"]["is_link_table"]
47
+ assert not schema["schema_json"]["core"]["artifactulabel"]["is_auto_created"]
48
+ assert schema["schema_json"]["core"]["artifact_input_of_runs"]["is_link_table"]
49
+ assert schema["schema_json"]["core"]["artifact_input_of_runs"]["is_auto_created"]
46
50
 
47
51
  assert schema["schema_json"]["core"]["artifact"]["name_field"] == "key"
48
52
  assert schema["schema_json"]["core"]["artifact"]["ontology_id_field"] is None
@@ -170,9 +174,9 @@ def test_update_schema_in_hub(setup_instance):
170
174
  "type": "ManyToManyField",
171
175
  "column_name": None,
172
176
  "through": {
173
- "left_key": "from_transform_id",
174
- "right_key": "to_transform_id",
175
- "link_table_name": "lamindb_transform_predecessors",
177
+ "left_key": "successor_id",
178
+ "right_key": "predecessor_id",
179
+ "link_table_name": "lamindb_transformtransform",
176
180
  },
177
181
  "field_name": "predecessors",
178
182
  "model_name": "transform",
@@ -191,9 +195,9 @@ def test_update_schema_in_hub(setup_instance):
191
195
  "type": "ManyToManyField",
192
196
  "column_name": None,
193
197
  "through": {
194
- "left_key": "to_transform_id",
195
- "right_key": "from_transform_id",
196
- "link_table_name": "lamindb_transform_predecessors",
198
+ "left_key": "predecessor_id",
199
+ "right_key": "successor_id",
200
+ "link_table_name": "lamindb_transformtransform",
197
201
  },
198
202
  "field_name": "successors",
199
203
  "model_name": "transform",
@@ -76,6 +76,8 @@ def test_exportdb_multiple_modules(simple_instance: Callable, cleanup_export_dir
76
76
  ).save()
77
77
  gene = bt.Gene.from_source(symbol="TCF7").save()
78
78
  artifact.genes.add(gene)
79
+ feature = ln.Feature(name="temperature", dtype=int).save()
80
+ artifact.features.add_values({"temperature": 10})
79
81
 
80
82
  export_db(
81
83
  module_names=["lamindb", "bionty"],
@@ -91,6 +93,13 @@ def test_exportdb_multiple_modules(simple_instance: Callable, cleanup_export_dir
91
93
  gene_df = pd.read_parquet(cleanup_export_dir / "bionty_gene.parquet")
92
94
  assert "TCF7" in gene_df["symbol"].values
93
95
 
96
+ featurevalue_df = pd.read_parquet(
97
+ cleanup_export_dir / "lamindb_featurevalue.parquet"
98
+ )
99
+ assert len(featurevalue_df) == 1
100
+ assert featurevalue_df.iloc[0]["feature_id"] == feature.id
101
+ assert featurevalue_df.iloc[0]["value"] == "10"
102
+
94
103
  artifact_df = pd.read_parquet(cleanup_export_dir / "lamindb_artifact.parquet")
95
104
  assert artifact.uid in artifact_df["uid"].values
96
105
 
@@ -107,6 +116,7 @@ def test_exportdb_multiple_modules(simple_instance: Callable, cleanup_export_dir
107
116
 
108
117
  artifact.delete(permanent=True)
109
118
  gene.delete(permanent=True)
119
+ feature.delete(permanent=True)
110
120
 
111
121
 
112
122
  def test_exportdb_default_module(simple_instance: Callable, cleanup_export_dir: Path):
@@ -127,7 +137,42 @@ def test_exportdb_exports_link_tables(
127
137
  assert len(link_tables) > 0
128
138
 
129
139
 
130
- def test_import_db_from_parquet(simple_instance: Callable, tmp_path):
140
+ def test_exportdb_handles_mixed_null_and_string_values(
141
+ simple_instance: Callable, cleanup_export_dir: Path
142
+ ):
143
+ import bionty as bt
144
+ import lamindb_setup as ln_setup
145
+
146
+ organism = bt.Organism.filter(name="human").one()
147
+
148
+ # Create one gene with ncbi_gene_ids populated
149
+ gene1 = bt.Gene(
150
+ symbol="GENE1",
151
+ ensembl_gene_id="ENSG00000001",
152
+ ncbi_gene_ids="12345,67890", # String value
153
+ organism=organism,
154
+ source_id=1,
155
+ created_by_id=ln_setup.settings.user.id,
156
+ ).save()
157
+
158
+ # Create one gene with ncbi_gene_ids NULL
159
+ gene2 = bt.Gene(
160
+ symbol="GENE2",
161
+ ensembl_gene_id="ENSG00000002",
162
+ ncbi_gene_ids=None, # NULL value
163
+ organism=organism,
164
+ source_id=1,
165
+ created_by_id=ln_setup.settings.user.id,
166
+ ).save()
167
+
168
+ # This would crash with ArrowTypeError without keep_default_na=False
169
+ export_db(module_names=["bionty"], output_dir=cleanup_export_dir)
170
+
171
+ gene1.delete(permanent=True)
172
+ gene2.delete(permanent=True)
173
+
174
+
175
+ def test_import_db_from_parquet(simple_instance: Callable, tmp_path: Path):
131
176
  """Tests imports of a parquet file.
132
177
 
133
178
  Implicitly also tests whether `import_db` can deal with FK constraints.
@@ -223,3 +268,116 @@ def test_import_db_from_parquet(simple_instance: Callable, tmp_path):
223
268
  )
224
269
  create_stmt = cursor.fetchone()[0]
225
270
  assert "PRIMARY KEY" in create_stmt
271
+
272
+
273
+ def test_import_db_converts_boolean_strings(simple_instance: Callable, tmp_path: Path):
274
+ import lamindb as ln
275
+ import lamindb_setup as ln_setup
276
+
277
+ export_dir = tmp_path / "export"
278
+ export_dir.mkdir()
279
+
280
+ artifact_data = pd.DataFrame(
281
+ {
282
+ "id": [777],
283
+ "uid": ["test_bool_uid"],
284
+ "key": ["test_bool_key"],
285
+ "_key_is_virtual": ["t"],
286
+ "_overwrite_versions": ["f"],
287
+ "description": ["Test boolean conversion"],
288
+ "suffix": [".txt"],
289
+ "kind": ["dataset"],
290
+ "size": [1024],
291
+ "hash": ["testhash456"],
292
+ "is_latest": ["t"],
293
+ "is_locked": ["f"],
294
+ "storage_id": [1],
295
+ "created_by_id": [ln_setup.settings.user.id],
296
+ "created_at": [pd.Timestamp.now()],
297
+ "updated_at": [pd.Timestamp.now()],
298
+ }
299
+ )
300
+ artifact_data.to_parquet(export_dir / "lamindb_artifact.parquet", index=False)
301
+
302
+ import_db(input_dir=export_dir, module_names=["lamindb"], if_exists="append")
303
+
304
+ imported = ln.Artifact.get(id=777)
305
+ assert imported._key_is_virtual is True
306
+ assert imported._overwrite_versions is False
307
+ assert imported.is_latest is True
308
+ assert imported.is_locked is False
309
+
310
+
311
+ def test_import_db_converts_empty_strings_to_none(
312
+ simple_instance: Callable, tmp_path: Path
313
+ ):
314
+ import lamindb as ln
315
+ import lamindb_setup as ln_setup
316
+
317
+ export_dir = tmp_path / "export"
318
+ export_dir.mkdir()
319
+
320
+ artifact_data = pd.DataFrame(
321
+ {
322
+ "id": [666],
323
+ "uid": ["test_empty_str_uid"],
324
+ "key": ["test_empty_str_key"],
325
+ "_key_is_virtual": ["f"],
326
+ "_overwrite_versions": ["f"],
327
+ "_real_key": [""],
328
+ "description": [""],
329
+ "suffix": [".txt"],
330
+ "kind": ["dataset"],
331
+ "size": [1024],
332
+ "hash": ["testhash789"],
333
+ "is_latest": ["t"],
334
+ "is_locked": ["f"],
335
+ "storage_id": [1],
336
+ "created_by_id": [ln_setup.settings.user.id],
337
+ "created_at": [pd.Timestamp.now()],
338
+ "updated_at": [pd.Timestamp.now()],
339
+ }
340
+ )
341
+ artifact_data.to_parquet(export_dir / "lamindb_artifact.parquet", index=False)
342
+
343
+ import_db(input_dir=export_dir, module_names=["lamindb"], if_exists="append")
344
+
345
+ imported = ln.Artifact.get(id=666)
346
+ assert imported._real_key is None
347
+ assert imported.description is None
348
+
349
+
350
+ def test_import_db_converts_numeric_strings(simple_instance: Callable, tmp_path: Path):
351
+ import lamindb as ln
352
+ import lamindb_setup as ln_setup
353
+
354
+ export_dir = tmp_path / "export"
355
+ export_dir.mkdir()
356
+
357
+ artifact_data = pd.DataFrame(
358
+ {
359
+ "id": ["555"],
360
+ "uid": ["test_numeric_uid"],
361
+ "key": ["test_numeric_key"],
362
+ "_key_is_virtual": ["f"],
363
+ "_overwrite_versions": ["f"],
364
+ "description": ["Test numeric conversion"],
365
+ "suffix": [".txt"],
366
+ "kind": ["dataset"],
367
+ "size": ["2048"],
368
+ "hash": ["testhash999"],
369
+ "is_latest": ["t"],
370
+ "is_locked": ["f"],
371
+ "storage_id": ["1"],
372
+ "created_by_id": [str(ln_setup.settings.user.id)],
373
+ "created_at": [pd.Timestamp.now()],
374
+ "updated_at": [pd.Timestamp.now()],
375
+ }
376
+ )
377
+ artifact_data.to_parquet(export_dir / "lamindb_artifact.parquet", index=False)
378
+
379
+ import_db(input_dir=export_dir, module_names=["lamindb"], if_exists="append")
380
+
381
+ imported = ln.Artifact.get(id=555)
382
+ assert isinstance(imported.size, int)
383
+ assert imported.size == 2048
@@ -20,6 +20,7 @@ def test_extract_suffix_from_path():
20
20
  ("salmon.merged.gene_counts.tsv", ".tsv"),
21
21
  ("salmon.merged.gene_counts.tsv.gz", ".tsv.gz"),
22
22
  ("filename.v1.1.0.anndata.zarr", ".anndata.zarr"),
23
+ ("filename.h5ad.tar.gz", ".h5ad.tar.gz"),
23
24
  ]
24
25
  for path, suffix in collection:
25
26
  filepath = Path(path)
File without changes
File without changes