lamindb_setup 1.15.1__tar.gz → 1.15.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/PKG-INFO +2 -3
  2. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/__init__.py +1 -1
  3. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_connect_instance.py +0 -1
  4. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_set_managed_storage.py +11 -3
  5. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/__init__.py +6 -1
  6. lamindb_setup-1.15.2/lamindb_setup/core/_clone.py +174 -0
  7. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_hub_client.py +11 -4
  8. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_settings.py +1 -2
  9. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_settings_instance.py +15 -6
  10. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_settings_load.py +2 -2
  11. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_settings_save.py +1 -0
  12. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_settings_storage.py +32 -21
  13. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_settings_store.py +3 -2
  14. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/upath.py +1 -4
  15. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/io.py +27 -1
  16. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/test_clone_instance.py +50 -0
  17. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/storage/test_db_import_export.py +10 -0
  18. lamindb_setup-1.15.2/tests/storage/test_httpx_client.py +21 -0
  19. lamindb_setup-1.15.1/lamindb_setup/core/_clone.py +0 -93
  20. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/.github/workflows/build.yml +0 -0
  21. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/.github/workflows/doc-changes.yml +0 -0
  22. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/.gitignore +0 -0
  23. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/.pre-commit-config.yaml +0 -0
  24. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/LICENSE +0 -0
  25. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/README.md +0 -0
  26. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/changelog.md +0 -0
  27. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-cloud/01-init-local-instance.ipynb +0 -0
  28. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-cloud/02-connect-local-instance.ipynb +0 -0
  29. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-cloud/03-add-managed-storage.ipynb +0 -0
  30. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-cloud/04-test-bionty.ipynb +0 -0
  31. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-cloud/05-init-hosted-instance.ipynb +0 -0
  32. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-cloud/06-connect-hosted-instance.ipynb +0 -0
  33. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-cloud/07-keep-artifacts-local.ipynb +0 -0
  34. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-cloud/08-test-multi-session.ipynb +0 -0
  35. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-cloud/09-test-migrate.ipynb +0 -0
  36. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-cloud/test_notebooks.py +0 -0
  37. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-prod/test-cache-management.ipynb +0 -0
  38. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-prod/test-cloud-sync.ipynb +0 -0
  39. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-prod/test-connect-anonymously.ipynb +0 -0
  40. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-prod/test-empty-init.ipynb +0 -0
  41. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-prod/test-import-schema.ipynb +0 -0
  42. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-prod/test-init-load-local-anonymously.ipynb +0 -0
  43. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-prod/test-insufficient-user-info.ipynb +0 -0
  44. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-prod/test-invalid-schema.ipynb +0 -0
  45. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-prod/test-sqlite-lock.ipynb +0 -0
  46. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/hub-prod/test_notebooks2.py +0 -0
  47. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/index.md +0 -0
  48. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/notebooks.md +0 -0
  49. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/docs/reference.md +0 -0
  50. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_cache.py +0 -0
  51. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_check.py +0 -0
  52. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_check_setup.py +0 -0
  53. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_delete.py +0 -0
  54. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_disconnect.py +0 -0
  55. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_django.py +0 -0
  56. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_entry_points.py +0 -0
  57. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_init_instance.py +0 -0
  58. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_migrate.py +0 -0
  59. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_register_instance.py +0 -0
  60. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_schema.py +0 -0
  61. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_schema_metadata.py +0 -0
  62. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_setup_user.py +0 -0
  63. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/_silence_loggers.py +0 -0
  64. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_aws_options.py +0 -0
  65. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_aws_storage.py +0 -0
  66. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_deprecated.py +0 -0
  67. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_docs.py +0 -0
  68. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_hub_core.py +0 -0
  69. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_hub_crud.py +0 -0
  70. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_hub_utils.py +0 -0
  71. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_private_django_api.py +0 -0
  72. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_settings_user.py +0 -0
  73. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/_setup_bionty_sources.py +0 -0
  74. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/cloud_sqlite_locker.py +0 -0
  75. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/django.py +0 -0
  76. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/exceptions.py +0 -0
  77. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/hashing.py +0 -0
  78. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/core/types.py +0 -0
  79. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/errors.py +0 -0
  80. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/py.typed +0 -0
  81. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/lamindb_setup/types.py +0 -0
  82. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/noxfile.py +0 -0
  83. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/pyproject.toml +0 -0
  84. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/scripts/script-init-pass-user-no-writes.py +0 -0
  85. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/scripts/script-to-fail-managed-storage.py +0 -0
  86. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/test_connect_instance.py +0 -0
  87. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/test_delete_instance.py +0 -0
  88. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/test_edge_request.py +0 -0
  89. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/test_fail_managed_storage.py +0 -0
  90. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/test_init_instance.py +0 -0
  91. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/test_init_pass_user_no_writes.py +0 -0
  92. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/test_login.py +0 -0
  93. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-cloud/test_set_storage.py +0 -0
  94. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-local/README.md +0 -0
  95. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-local/conftest.py +0 -0
  96. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-local/scripts/script-connect-fine-grained-access.py +0 -0
  97. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-local/test_all.py +0 -0
  98. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-local/test_update_schema_in_hub.py +0 -0
  99. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-prod/conftest.py +0 -0
  100. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-prod/test_aws_options_manager.py +0 -0
  101. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-prod/test_django.py +0 -0
  102. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-prod/test_global_settings.py +0 -0
  103. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-prod/test_migrate.py +0 -0
  104. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-prod/test_switch_and_fallback_env.py +0 -0
  105. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/hub-prod/test_upath.py +0 -0
  106. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/storage/conftest.py +0 -0
  107. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/storage/test_entry_point.py +0 -0
  108. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/storage/test_hashing.py +0 -0
  109. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/storage/test_storage_access.py +0 -0
  110. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/storage/test_storage_basis.py +0 -0
  111. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/storage/test_storage_settings.py +0 -0
  112. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/storage/test_storage_stats.py +0 -0
  113. {lamindb_setup-1.15.1 → lamindb_setup-1.15.2}/tests/storage/test_to_url.py +0 -0
@@ -1,11 +1,10 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.3
2
2
  Name: lamindb_setup
3
- Version: 1.15.1
3
+ Version: 1.15.2
4
4
  Summary: Setup & configure LaminDB.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.10
7
7
  Description-Content-Type: text/markdown
8
- License-File: LICENSE
9
8
  Requires-Dist: lamin_utils>=0.3.3
10
9
  Requires-Dist: django>=5.2,<5.3
11
10
  Requires-Dist: dj_database_url>=1.3.0,<3.0.0
@@ -35,7 +35,7 @@ Migration management
35
35
 
36
36
  """
37
37
 
38
- __version__ = "1.15.1" # denote a release candidate for 0.1.0 with 0.1rc1
38
+ __version__ = "1.15.2" # denote a release candidate for 0.1.0 with 0.1rc1
39
39
 
40
40
  import os
41
41
  import warnings
@@ -12,7 +12,6 @@ from lamin_utils import logger
12
12
  from ._check_setup import (
13
13
  _check_instance_setup,
14
14
  _get_current_instance_settings,
15
- find_module_candidates,
16
15
  )
17
16
  from ._disconnect import disconnect
18
17
  from ._init_instance import load_from_isettings
@@ -41,7 +41,7 @@ def set_managed_storage(root: UPathStr, host: str | None = None, **fs_kwargs):
41
41
  "use a tuple of (local_root, host) instead"
42
42
  )
43
43
 
44
- # here the storage is registered in the hub
44
+ # here the storage location is registered in the hub
45
45
  # hub_record_status="hub-record-created" if a new record is created
46
46
  # "hub-record-retrieved" if the storage is in the hub already
47
47
  ssettings, hub_record_status = init_storage(
@@ -65,5 +65,13 @@ def set_managed_storage(root: UPathStr, host: str | None = None, **fs_kwargs):
65
65
  delete_storage_record(ssettings)
66
66
  raise e
67
67
 
68
- settings.instance._storage = ssettings
69
- settings.storage._set_fs_kwargs(**fs_kwargs)
68
+ if ssettings._instance_id != settings.instance._id:
69
+ logger.warning(
70
+ f"registered storage location {root} as read-only for this instance (it's written by instance with uid: {ssettings.instance_uid})"
71
+ )
72
+ logger.warning(
73
+ f"did *not* switch default storage location, it's still: {settings.storage.root_as_str}"
74
+ )
75
+ else:
76
+ settings.instance._storage = ssettings
77
+ settings.storage._set_fs_kwargs(**fs_kwargs)
@@ -23,7 +23,12 @@ Storage
23
23
  """
24
24
 
25
25
  from . import django, upath
26
- from ._clone import connect_local_sqlite, init_local_sqlite
26
+ from ._clone import (
27
+ connect_local_sqlite,
28
+ connect_remote_sqlite,
29
+ init_local_sqlite,
30
+ upload_sqlite_clone,
31
+ )
27
32
  from ._deprecated import deprecated # documented in lamindb.base
28
33
  from ._docs import doc_args # documented in lamindb.base
29
34
  from ._settings import SetupSettings
@@ -0,0 +1,174 @@
1
+ """Utilities to copy, clone and load Postgres instances as local SQLite databases.
2
+
3
+ .. autosummary::
4
+ :toctree:
5
+
6
+ init_local_sqlite
7
+ connect_local_sqlite
8
+ upload_sqlite_clone
9
+ """
10
+
11
+ import gzip
12
+ import os
13
+ import shutil
14
+ from pathlib import Path
15
+
16
+ from lamindb_setup.core._settings_instance import InstanceSettings
17
+ from lamindb_setup.core._settings_load import load_instance_settings
18
+ from lamindb_setup.core._settings_store import instance_settings_file
19
+ from lamindb_setup.core.django import reset_django
20
+ from lamindb_setup.core.upath import create_path
21
+
22
+
23
+ def init_local_sqlite(
24
+ instance: str | None = None, copy_suffix: str | None = None
25
+ ) -> None:
26
+ """Initialize SQLite copy of an existing Postgres instance.
27
+
28
+ Creates a SQLite database with the same schema as the source Postgres instance.
29
+ The copy shares the same storage location as the original instance.
30
+
31
+ The copy is intended for read-only access to instance data without requiring a Postgres connection.
32
+ Data synchronization to complete the clone happens via a separate Lambda function.
33
+
34
+ Note that essential user, branch and storage tables are missing.
35
+ Therefore, it is not possible to store Artifacts without having replayed these records first.
36
+
37
+ Args:
38
+ instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
39
+ If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
40
+ If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
41
+ copy_suffix: Optional suffix to append to the local clone name.
42
+ """
43
+ import lamindb_setup as ln_setup
44
+
45
+ if instance is None: # pragma: no cover
46
+ instance = os.environ.get("LAMIN_CURRENT_INSTANCE")
47
+
48
+ if instance is None:
49
+ raise ValueError(
50
+ "No instance identifier provided and LAMIN_CURRENT_INSTANCE is not set"
51
+ )
52
+
53
+ if ln_setup.settings.instance is None: # pragma: no cover
54
+ ln_setup.connect(instance)
55
+
56
+ name = (
57
+ f"{ln_setup.settings.instance.name}{copy_suffix}"
58
+ if copy_suffix is not None
59
+ else ln_setup.settings.instance.name
60
+ )
61
+ isettings = ln_setup._connect_instance._connect_instance(
62
+ owner=ln_setup.settings.instance.owner, name=name
63
+ )
64
+ isettings._db = None
65
+ isettings._is_on_hub = False
66
+ isettings._fine_grained_access = False
67
+ name = (
68
+ f"{isettings.name}{copy_suffix}" if copy_suffix is not None else isettings.name
69
+ )
70
+ isettings._name = name
71
+ isettings._is_clone = True
72
+ isettings._persist(write_to_disk=True)
73
+
74
+ if not isettings._sqlite_file_local.exists():
75
+ # Reset Django configuration before _init_db() because Django was already configured for the original Postgres instance.
76
+ # Without this reset, the `if not settings.configured`` check in `setup_django()` would skip reconfiguration,
77
+ # causing migrations to run against the old Postgres database instead of the new SQLite clone database.
78
+ reset_django()
79
+ isettings._init_db()
80
+
81
+
82
+ def connect_local_sqlite(
83
+ instance: str,
84
+ ) -> None:
85
+ """Load a locally stored SQLite instance of which a remote hub Postgres instance exists.
86
+
87
+ This function bypasses the hub lookup that `lamin connect` performs, loading the SQLite clone directly from local settings files.
88
+ The clone must first be created via `init_local_sqlite()`.
89
+
90
+ Args:
91
+ instance: Instance slug in the form `account/name` (e.g., `laminlabs/privatedata-local`).
92
+ """
93
+ owner, name = instance.split("/")
94
+ settings_file = instance_settings_file(name=name, owner=owner)
95
+
96
+ if not settings_file.exists():
97
+ raise ValueError(
98
+ "SQLite clone not found."
99
+ " Run `init_local_sqlite()` to create a local copy or connect to a remote copy using `connect_remote_sqlite`."
100
+ )
101
+
102
+ isettings = load_instance_settings(settings_file)
103
+ isettings._persist(write_to_disk=False)
104
+
105
+ # Using `setup_django` instead of `_load_db` to not ping AWS RDS
106
+ from lamindb_setup._check_setup import disable_auto_connect
107
+
108
+ from .django import setup_django
109
+
110
+ disable_auto_connect(setup_django)(isettings)
111
+
112
+
113
+ def connect_remote_sqlite(instance: str, *, copy_suffix: str | None = None) -> None:
114
+ """Load an existing SQLite copy of a hub instance.
115
+
116
+ Args:
117
+ instance: Instance slug in the form `account/name` (e.g., `laminlabs/privatedata-local`).
118
+ copy_suffix: Optional suffix of the local clone.
119
+ """
120
+ import lamindb_setup as ln_setup
121
+
122
+ owner, name = instance.split("/")
123
+
124
+ # Step 1: Create the settings file
125
+ isettings = ln_setup._connect_instance._connect_instance(owner=owner, name=name)
126
+ isettings._db = None
127
+ isettings._is_on_hub = False
128
+ isettings._fine_grained_access = False
129
+ isettings._db_permissions = "read"
130
+ name = (
131
+ f"{isettings.name}{copy_suffix}" if copy_suffix is not None else isettings.name
132
+ )
133
+ isettings._name = name
134
+ isettings._is_clone = True
135
+ isettings._persist(write_to_disk=True)
136
+
137
+ connect_local_sqlite(instance=instance + (copy_suffix or ""))
138
+
139
+
140
+ def upload_sqlite_clone(
141
+ local_sqlite_path: Path | str | None = None, compress: bool = True
142
+ ) -> None:
143
+ """Uploads the SQLite clone to the default storage.
144
+
145
+ Args:
146
+ local_sqlite_path: Path to the SQLite file.
147
+ Defaults to the local storage path if not specified.
148
+ compress: Whether to compress the database with gzip before uploading.
149
+ """
150
+ import lamindb_setup as ln_setup
151
+
152
+ if local_sqlite_path is None:
153
+ local_sqlite_path = ln_setup.settings.instance._sqlite_file_local
154
+ else:
155
+ local_sqlite_path = Path(local_sqlite_path)
156
+
157
+ if not local_sqlite_path.exists():
158
+ raise FileNotFoundError(f"Database not found at {local_sqlite_path}")
159
+
160
+ cloud_db_path = ln_setup.settings.instance._sqlite_file
161
+
162
+ if compress:
163
+ temp_gz_path = local_sqlite_path.with_suffix(".db.gz")
164
+ with (
165
+ open(local_sqlite_path, "rb") as f_in,
166
+ gzip.open(temp_gz_path, "wb") as f_out,
167
+ ):
168
+ shutil.copyfileobj(f_in, f_out)
169
+ cloud_destination = create_path(f"{cloud_db_path}.gz")
170
+ cloud_destination.upload_from(temp_gz_path, print_progress=True)
171
+ temp_gz_path.unlink()
172
+ else:
173
+ cloud_destination = create_path(cloud_db_path)
174
+ cloud_destination.upload_from(local_sqlite_path, print_progress=True)
@@ -96,9 +96,12 @@ def connect_hub(
96
96
  transports.append(
97
97
  RetryTransport(
98
98
  retry=LogRetry(total=2, backoff_factor=0.2),
99
- transport=httpx.HTTPTransport(verify=True, http2=True),
99
+ transport=httpx.HTTPTransport(verify=True, http2=True, trust_env=True),
100
100
  )
101
101
  )
102
+ # this overwrites transports of existing httpx clients
103
+ # if proxies are set, the default transports that were created on clients init
104
+ # will be used, irrespective of these re-settings
102
105
  client.auth._http_client._transport = transports[0]
103
106
  client.postgrest.session._transport = transports[1]
104
107
  # POST is not retryable by default, but for our functions it should be safe to retry
@@ -116,7 +119,7 @@ def connect_hub(
116
119
  "POST",
117
120
  ],
118
121
  ),
119
- transport=httpx.HTTPTransport(verify=True, http2=True),
122
+ transport=httpx.HTTPTransport(verify=True, http2=True, trust_env=True),
120
123
  )
121
124
  return client
122
125
 
@@ -246,9 +249,13 @@ def httpx_client():
246
249
  else:
247
250
  transport = RetryTransport(
248
251
  retry=LogRetry(total=2, backoff_factor=0.2),
249
- transport=httpx.HTTPTransport(verify=True, http2=True),
252
+ transport=httpx.HTTPTransport(verify=True, http2=True, trust_env=True),
250
253
  )
251
- client = httpx.Client(transport=transport)
254
+ # first we create a client to build the proxy map from the env variables
255
+ # if proxies are set, the default transports will be used
256
+ # otherwise the RetryTransport object that we assign below
257
+ client = httpx.Client(trust_env=True)
258
+ client._transport = transport
252
259
  yield client
253
260
  finally:
254
261
  if client is not None:
@@ -320,8 +320,7 @@ class SetupSettings:
320
320
  def paths(self) -> type[SetupPaths]:
321
321
  """Convert cloud paths to lamindb local paths.
322
322
 
323
- Use `settings.paths.cloud_to_local_no_update`
324
- or `settings.paths.cloud_to_local`.
323
+ Use `settings.paths.cloud_to_local_no_update` or `settings.paths.cloud_to_local`.
325
324
  """
326
325
  return SetupPaths
327
326
 
@@ -54,8 +54,7 @@ def is_local_db_url(db_url: str) -> bool:
54
54
 
55
55
 
56
56
  def check_is_instance_remote(root: UPathStr, db: str | None) -> bool:
57
- # returns True for cloud SQLite
58
- # and remote postgres
57
+ # returns True for cloud SQLite and remote postgres
59
58
  root_str = str(root)
60
59
  if not root_str.startswith("create-s3") and get_storage_type(root_str) == "local":
61
60
  return False
@@ -83,7 +82,8 @@ class InstanceSettings:
83
82
  schema_id: UUID | None = None,
84
83
  fine_grained_access: bool = False,
85
84
  db_permissions: str | None = None,
86
- _locker_user: UserSettings | None = None, # user to lock for if cloud sqlite
85
+ _locker_user: UserSettings | None = None, # user to lock for if cloud sqlite,
86
+ _is_clone: bool = False,
87
87
  ):
88
88
  from ._hub_utils import validate_db_arg
89
89
 
@@ -109,6 +109,7 @@ class InstanceSettings:
109
109
  self._db_permissions = db_permissions
110
110
  # if None then settings.user is used
111
111
  self._locker_user = _locker_user
112
+ self._is_clone = _is_clone
112
113
 
113
114
  def __repr__(self):
114
115
  """Rich string representation."""
@@ -434,7 +435,7 @@ class InstanceSettings:
434
435
 
435
436
  def _update_cloud_sqlite_file(self, unlock_cloud_sqlite: bool = True) -> None:
436
437
  """Upload the local sqlite file to the cloud file."""
437
- if self._is_cloud_sqlite:
438
+ if self._is_cloud_sqlite and not self._is_clone:
438
439
  sqlite_file = self._sqlite_file
439
440
  logger.warning(
440
441
  f"updating{' & unlocking' if unlock_cloud_sqlite else ''} cloud SQLite "
@@ -602,6 +603,14 @@ class InstanceSettings:
602
603
  disable_auto_connect(setup_django)(self, init=True)
603
604
 
604
605
  def _load_db(self) -> tuple[bool, str]:
606
+ """Load the database connection.
607
+
608
+ For cloud SQLite instances, downloads the database file from cloud storage.
609
+ For all instances, initializes Django ORM with the database connection.
610
+
611
+ Returns:
612
+ Tuple of (success: bool, error_message: str). Returns (True, "") on success.
613
+ """
605
614
  # Is the database available and initialized as LaminDB?
606
615
  # returns a tuple of status code and message
607
616
  if self.dialect == "sqlite" and not self._sqlite_file.exists():
@@ -615,8 +624,8 @@ class InstanceSettings:
615
624
  return False, f"SQLite file {self._sqlite_file} does not exist"
616
625
  # we need the local sqlite to setup django
617
626
  self._update_local_sqlite_file()
618
- # setting up django also performs a check for migrations & prints them
619
- # as warnings
627
+
628
+ # setting up django also performs a check for migrations & prints them as warnings
620
629
  # this should fail, e.g., if the db is not reachable
621
630
  from lamindb_setup._check_setup import disable_auto_connect
622
631
 
@@ -69,8 +69,7 @@ def load_or_create_user_settings(api_key: str | None = None) -> UserSettings:
69
69
  """Return current user settings.
70
70
 
71
71
  Args:
72
- api_key: if provided and there is no current user,
73
- perform login and return the user settings.
72
+ api_key: if provided and there is no current user, perform login and return the user settings.
74
73
  """
75
74
  current_user_settings = current_user_settings_file()
76
75
  if not current_user_settings.exists():
@@ -125,6 +124,7 @@ def setup_instance_from_store(store: InstanceSettingsStore) -> InstanceSettings:
125
124
  schema_id=None if store.schema_id in {None, "null"} else UUID(store.schema_id),
126
125
  fine_grained_access=store.fine_grained_access,
127
126
  db_permissions=_null_to_value(store.db_permissions),
127
+ _is_clone=store.is_clone,
128
128
  )
129
129
 
130
130
 
@@ -63,6 +63,7 @@ def save_settings(
63
63
  "schema_id",
64
64
  "fine_grained_access",
65
65
  "db_permissions",
66
+ "is_clone",
66
67
  }:
67
68
  settings_key = f"_{store_key.rstrip('_')}"
68
69
  else:
@@ -12,17 +12,14 @@ from lamin_utils import logger
12
12
  from lamindb_setup.errors import StorageAlreadyManaged
13
13
 
14
14
  from ._aws_options import (
15
- HOSTED_REGIONS,
16
15
  LAMIN_ENDPOINTS,
17
16
  get_aws_options_manager,
18
17
  )
19
- from ._aws_storage import find_closest_aws_region
20
18
  from ._deprecated import deprecated
21
19
  from .hashing import hash_and_encode_as_b62
22
20
  from .upath import (
23
21
  LocalPathClasses,
24
22
  UPath,
25
- _split_path_query,
26
23
  create_path,
27
24
  get_storage_region,
28
25
  )
@@ -58,12 +55,40 @@ def get_storage_type(root_as_str: str) -> StorageType:
58
55
  return convert.get(protocol, protocol) # type: ignore
59
56
 
60
57
 
58
+ def sanitize_root_user_input(root: UPathStr) -> UPath:
59
+ """Format a root path string."""
60
+ root_upath = root if isinstance(root, UPath) else UPath(root)
61
+ root_upath = root_upath.expanduser()
62
+ if isinstance(root_upath, LocalPathClasses): # local paths
63
+ try:
64
+ (root_upath / ".lamindb").mkdir(parents=True, exist_ok=True)
65
+ root_upath = root_upath.resolve()
66
+ except Exception:
67
+ logger.warning(f"unable to create .lamindb/ folder in {root_upath}")
68
+ return root_upath
69
+
70
+
71
+ def convert_sanitized_root_path_to_str(root_upath: UPath) -> str:
72
+ # embed endpoint_url into path string for storing and displaying
73
+ if root_upath.protocol == "s3":
74
+ endpoint_url = root_upath.storage_options.get("endpoint_url", None)
75
+ # LAMIN_ENDPOINTS include None
76
+ if endpoint_url not in LAMIN_ENDPOINTS:
77
+ return f"s3://{root_upath.path.rstrip('/')}?endpoint_url={endpoint_url}"
78
+ return root_upath.as_posix().rstrip("/")
79
+
80
+
81
+ def convert_root_path_to_str(root: UPathStr) -> str:
82
+ """Format a root path string."""
83
+ sanitized_root_upath = sanitize_root_user_input(root)
84
+ return convert_sanitized_root_path_to_str(sanitized_root_upath)
85
+
86
+
61
87
  def mark_storage_root(
62
88
  root: UPathStr, uid: str, instance_id: UUID, instance_slug: str
63
89
  ) -> Literal["__marked__"] | str:
64
90
  # we need a file in folder-like storage locations on S3 to avoid
65
- # permission errors from leveraging s3fs on an empty hosted storage location
66
- # (path.fs.find raises a PermissionError)
91
+ # permission errors from leveraging s3fs on an empty hosted storage location (path.fs.find raises a PermissionError)
67
92
  # we also need it in case a storage location is ambiguous because a server / local environment
68
93
  # doesn't have a globally unique identifier, then we screen for this file to map the
69
94
  # path on a storage location in the registry
@@ -214,15 +239,7 @@ class StorageSettings:
214
239
  ):
215
240
  self._uid = uid
216
241
  self._uuid_ = uuid
217
- self._root_init = UPath(root).expanduser()
218
- if isinstance(self._root_init, LocalPathClasses): # local paths
219
- try:
220
- (self._root_init / ".lamindb").mkdir(parents=True, exist_ok=True)
221
- self._root_init = self._root_init.resolve()
222
- except Exception:
223
- logger.warning(
224
- f"unable to create .lamindb/ folder in {self._root_init}"
225
- )
242
+ self._root_init: UPath = sanitize_root_user_input(root)
226
243
  self._root = None
227
244
  self._instance_id = instance_id
228
245
  # we don't yet infer region here to make init fast
@@ -337,13 +354,7 @@ class StorageSettings:
337
354
  @property
338
355
  def root_as_str(self) -> str:
339
356
  """Formatted root string."""
340
- # embed endpoint_url into path string for storing and displaying
341
- if self._root_init.protocol == "s3":
342
- endpoint_url = self._root_init.storage_options.get("endpoint_url", None)
343
- # LAMIN_ENDPOINTS include None
344
- if endpoint_url not in LAMIN_ENDPOINTS:
345
- return f"s3://{self._root_init.path.rstrip('/')}?endpoint_url={endpoint_url}"
346
- return self._root_init.as_posix().rstrip("/")
357
+ return convert_sanitized_root_path_to_str(self._root_init)
347
358
 
348
359
  @property
349
360
  def cache_dir(
@@ -67,8 +67,8 @@ class InstanceSettingsStore(BaseSettings):
67
67
  owner: str
68
68
  name: str
69
69
  storage_root: str
70
- storage_region: str | None # take old type annotations here because pydantic
71
- db: str | None # doesn't like new types on 3.9 even with future annotations
70
+ storage_region: str | None
71
+ db: str | None
72
72
  schema_str: str | None
73
73
  schema_id: str | None = None
74
74
  fine_grained_access: bool = False
@@ -76,6 +76,7 @@ class InstanceSettingsStore(BaseSettings):
76
76
  id: str
77
77
  git_repo: str | None
78
78
  keep_artifacts_local: bool | None
79
+ is_clone: bool = False
79
80
  model_config = SettingsConfigDict(env_prefix="lamindb_instance_", env_file=".env")
80
81
 
81
82
 
@@ -908,12 +908,9 @@ def get_stat_file_cloud(stat: dict) -> tuple[int, str | None, str | None]:
908
908
  elif "blob_id" in stat:
909
909
  hash = b16_to_b64(stat["blob_id"])
910
910
  hash_type = "sha1"
911
- # s3
912
- # StorageClass is checked to be sure that it is indeed s3
913
- # because http also has ETag
914
911
  elif "ETag" in stat:
915
912
  etag = stat["ETag"]
916
- if "mimetype" in stat:
913
+ if "mimetype" in stat or ("url" in stat and stat["url"].startswith("http")):
917
914
  # http
918
915
  hash = hash_string(etag.strip('"'))
919
916
  hash_type = "md5-etag"
@@ -172,7 +172,14 @@ def export_db(
172
172
 
173
173
  with Progress() as progress:
174
174
  task_id = progress.add_task("Exporting", total=len(tasks))
175
- with ProcessPoolExecutor(max_workers=max_workers) as executor:
175
+
176
+ import multiprocessing
177
+
178
+ mp_context = multiprocessing.get_context("spawn")
179
+
180
+ with ProcessPoolExecutor(
181
+ max_workers=max_workers, mp_context=mp_context
182
+ ) as executor:
176
183
  futures = {
177
184
  executor.submit(_export_full_table, task, directory, chunk_size): task
178
185
  for task in tasks
@@ -237,9 +244,22 @@ def _import_registry(
237
244
  if mask.any():
238
245
  df.loc[mask, col] = df.loc[mask, col].map(_serialize_value)
239
246
 
247
+ if if_exists == "append":
248
+ # Fill NULL values in NOT NULL columns to handle schema mismatches between postgres source and SQLite target
249
+ # This allows importing data where fields were nullable
250
+ for field in registry._meta.fields:
251
+ if field.column in df.columns and not field.null:
252
+ df[field.column] = df[field.column].fillna("")
253
+
240
254
  if df.empty:
241
255
  return
242
256
 
257
+ if if_exists == "append":
258
+ # Clear existing data before import
259
+ # When appending we would run into duplicate errors because of existing values like branches etc
260
+ with connection.cursor() as cursor:
261
+ cursor.execute(f'DELETE FROM "{table_name}"')
262
+
243
263
  if connection.vendor == "postgresql":
244
264
  columns = df.columns.tolist()
245
265
  column_names = ", ".join(f'"{col}"' for col in columns)
@@ -265,6 +285,7 @@ def _import_registry(
265
285
  max_vars = 900 # SQLite has a limit of 999 variables per statement
266
286
  chunksize = max(1, max_vars // num_cols)
267
287
 
288
+ # Always use append mode since we set up the tables from a fresh instance
268
289
  df.to_sql(
269
290
  table_name,
270
291
  connection.connection,
@@ -290,6 +311,9 @@ def import_db(
290
311
  input_dir: Directory containing parquet files to import.
291
312
  module_names: Module names to import (e.g., ["lamindb", "bionty", "wetlab"]).
292
313
  if_exists: How to behave if table exists: 'fail', 'replace', or 'append'.
314
+ If set to 'replace', existing data is deleted and new data is imported. PKs and indices are not guaranteed to be preserved which can lead to write errors.
315
+ If set to 'append', new data is added to existing data without clearing the table. PKs and indices are preserved but database size will greatly increase.
316
+ If set to 'fail', raises an error if the table contains any data.
293
317
  """
294
318
  from django.db import connection
295
319
 
@@ -352,3 +376,5 @@ def import_db(
352
376
  cursor.execute("PRAGMA synchronous = FULL")
353
377
  cursor.execute("PRAGMA journal_mode = DELETE")
354
378
  cursor.execute("PRAGMA foreign_keys = ON")
379
+ # Reclaim space from DELETEs
380
+ cursor.execute("VACUUM")
@@ -1,5 +1,8 @@
1
1
  import shutil
2
+ from pathlib import Path
2
3
  from subprocess import DEVNULL, run
4
+ from unittest.mock import MagicMock, Mock, patch
5
+ from uuid import uuid4
3
6
 
4
7
  import lamindb_setup as ln_setup
5
8
  import pandas as pd
@@ -85,3 +88,50 @@ def test_connect_local_sqlite(local_postgres_instance):
85
88
 
86
89
  with pytest.raises(ValueError, match="SQLite clone not found"):
87
90
  ln_setup.core.connect_local_sqlite(f"{original_owner}/nonexistent")
91
+
92
+
93
+ def test_connect_remote_sqlite(tmp_path):
94
+ mock_instance_id = uuid4()
95
+ mock_storage_root = "s3://my-bucket/data"
96
+
97
+ with (
98
+ patch(
99
+ "lamindb_setup._connect_instance._connect_instance"
100
+ ) as mock_connect_instance,
101
+ patch("lamindb_setup.settings") as mock_settings,
102
+ patch("lamindb_setup.core._clone.InstanceSettings"),
103
+ patch("lamindb_setup.core._clone.create_path") as mock_create_path,
104
+ patch("lamindb_setup.core._clone.connect_local_sqlite"),
105
+ ):
106
+ mock_instance = Mock()
107
+ mock_instance._id = mock_instance_id
108
+ mock_instance.owner = "testowner"
109
+ mock_instance.name = "testname"
110
+ mock_instance.modules = ["module1", "module2"]
111
+ mock_instance.storage.root = mock_storage_root
112
+ mock_instance.storage.root_as_str = mock_storage_root
113
+
114
+ mock_settings.instance = mock_instance
115
+ mock_settings.storage = Mock()
116
+ mock_settings.cache_dir = Path(tmp_path)
117
+
118
+ mock_connect_instance.return_value = mock_instance
119
+
120
+ def fake_download(target_path):
121
+ target_path.parent.mkdir(parents=True, exist_ok=True)
122
+ target_path.write_bytes(b"fake db content")
123
+
124
+ def mock_create_path_fn(path):
125
+ mock_file = MagicMock()
126
+ if path.endswith(".gz"):
127
+ mock_file.exists.return_value = False
128
+ else:
129
+ mock_file.exists.return_value = True
130
+ mock_file.download_to.side_effect = fake_download
131
+ return mock_file
132
+
133
+ mock_create_path.side_effect = mock_create_path_fn
134
+
135
+ from lamindb_setup.core._clone import connect_remote_sqlite
136
+
137
+ connect_remote_sqlite("testowner/testname", copy_suffix="-copy")
@@ -213,3 +213,13 @@ def test_import_db_from_parquet(simple_instance: Callable, tmp_path):
213
213
  linked_gene = imported_artifact.genes.first()
214
214
  assert linked_gene.id == 999
215
215
  assert linked_gene.symbol == "TESTGENE"
216
+
217
+ # Verify PRIMARY KEY constraint is preserved for "append" mode that we used here
218
+ from django.db import connection
219
+
220
+ with connection.cursor() as cursor:
221
+ cursor.execute(
222
+ "SELECT sql FROM sqlite_master WHERE type='table' AND name='lamindb_artifact'"
223
+ )
224
+ create_stmt = cursor.fetchone()[0]
225
+ assert "PRIMARY KEY" in create_stmt
@@ -0,0 +1,21 @@
1
+ import os
2
+
3
+ from lamindb_setup.core._hub_client import httpx_client
4
+
5
+
6
+ def test_proxy_from_env():
7
+ with httpx_client() as client:
8
+ assert client._mounts == {}
9
+
10
+ os.environ["HTTP_PROXY"] = "http://localhost:8080"
11
+ os.environ["HTTPS_PROXY"] = "http://localhost:8080"
12
+
13
+ with httpx_client() as client:
14
+ patterns = {p.scheme for p in client._mounts}
15
+ assert patterns == {"http", "https"}
16
+
17
+ del os.environ["HTTP_PROXY"]
18
+ del os.environ["HTTPS_PROXY"]
19
+
20
+ with httpx_client() as client:
21
+ assert client._mounts == {}
@@ -1,93 +0,0 @@
1
- """Utilities to copy, clone and load Postgres instances as local SQLite databases.
2
-
3
- .. autosummary::
4
- :toctree:
5
-
6
- init_local_sqlite
7
- connect_local_sqlite
8
- """
9
-
10
- import os
11
-
12
- from lamindb_setup.core._settings_instance import InstanceSettings
13
- from lamindb_setup.core._settings_load import load_instance_settings
14
- from lamindb_setup.core._settings_store import instance_settings_file
15
- from lamindb_setup.core.django import reset_django
16
-
17
-
18
- def init_local_sqlite(
19
- instance: str | None = None, copy_suffix: str | None = None
20
- ) -> None:
21
- """Initialize SQLite copy of an existing Postgres instance.
22
-
23
- Creates a SQLite database with the same schema as the source Postgres instance.
24
- The copy shares the same storage location as the original instance.
25
-
26
- The copy is intended for read-only access to instance data without requiring a Postgres connection.
27
- Data synchronization to complete the clone happens via a separate Lambda function.
28
-
29
- Note that essential user, branch and storage tables are missing.
30
- Therefore, it is not possible to store Artifacts without having replayed these records first.
31
-
32
- Args:
33
- instance: Pass a slug (`account/name`) or URL (`https://lamin.ai/account/name`).
34
- If `None`, looks for an environment variable `LAMIN_CURRENT_INSTANCE` to get the instance identifier.
35
- If it doesn't find this variable, it connects to the instance that was connected with `lamin connect` through the CLI.
36
- copy_suffix: Optional suffix to append to the local clone name.
37
- """
38
- import lamindb_setup as ln_setup
39
-
40
- if instance is None: # pragma: no cover
41
- instance = os.environ.get("LAMIN_CURRENT_INSTANCE")
42
-
43
- if instance is None:
44
- raise ValueError(
45
- "No instance identifier provided and LAMIN_CURRENT_INSTANCE is not set"
46
- )
47
-
48
- if ln_setup.settings.instance is None: # pragma: no cover
49
- ln_setup.connect(instance)
50
-
51
- name = (
52
- f"{ln_setup.settings.instance.name}{copy_suffix}"
53
- if copy_suffix is not None
54
- else ln_setup.settings.instance.name
55
- )
56
- isettings = InstanceSettings(
57
- id=ln_setup.settings.instance._id,
58
- owner=ln_setup.settings.instance.owner, # type: ignore
59
- name=name,
60
- storage=ln_setup.settings.storage,
61
- db=None,
62
- modules=",".join(ln_setup.settings.instance.modules),
63
- is_on_hub=False,
64
- )
65
-
66
- isettings._persist(write_to_disk=True)
67
-
68
- if not isettings._sqlite_file_local.exists():
69
- # Reset Django configuration before _init_db() because Django was already configured for the original Postgres instance.
70
- # Without this reset, the if not settings.configured check in setup_django() would skip reconfiguration,
71
- # causing migrations to run against the old Postgres database instead of the new SQLite clone database.
72
- reset_django()
73
- isettings._init_db()
74
-
75
-
76
- def connect_local_sqlite(instance: str) -> None:
77
- """Load a SQLite instance of which a remote hub Postgres instance exists.
78
-
79
- This function bypasses the hub lookup that `lamin connect` performs, loading the SQLite clone directly from local settings files.
80
- The clone must first be created via `init_local_sqlite()`.
81
-
82
- Args:
83
- instance: Instance slug in the form `account/name` (e.g., `laminlabs/privatedata-local`).
84
- """
85
- owner, name = instance.split("/")
86
- settings_file = instance_settings_file(name=name, owner=owner)
87
-
88
- if not settings_file.exists():
89
- raise ValueError("SQLite clone not found. Run init_local_sqlite() first.")
90
-
91
- isettings = load_instance_settings(settings_file)
92
- isettings._persist(write_to_disk=False)
93
- isettings._load_db()
File without changes
File without changes