lamindb_setup 1.8.3__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. lamindb_setup/__init__.py +107 -107
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check_setup.py +166 -166
  4. lamindb_setup/_connect_instance.py +328 -342
  5. lamindb_setup/_delete.py +141 -141
  6. lamindb_setup/_disconnect.py +32 -32
  7. lamindb_setup/_init_instance.py +440 -440
  8. lamindb_setup/_migrate.py +266 -259
  9. lamindb_setup/_register_instance.py +35 -35
  10. lamindb_setup/_schema_metadata.py +441 -441
  11. lamindb_setup/_set_managed_storage.py +70 -70
  12. lamindb_setup/_setup_user.py +133 -133
  13. lamindb_setup/core/__init__.py +21 -21
  14. lamindb_setup/core/_aws_options.py +223 -211
  15. lamindb_setup/core/_hub_client.py +248 -243
  16. lamindb_setup/core/_hub_core.py +665 -663
  17. lamindb_setup/core/_hub_crud.py +227 -227
  18. lamindb_setup/core/_private_django_api.py +83 -83
  19. lamindb_setup/core/_settings.py +377 -364
  20. lamindb_setup/core/_settings_instance.py +569 -568
  21. lamindb_setup/core/_settings_load.py +141 -141
  22. lamindb_setup/core/_settings_save.py +95 -95
  23. lamindb_setup/core/_settings_storage.py +429 -429
  24. lamindb_setup/core/_settings_store.py +91 -91
  25. lamindb_setup/core/_settings_user.py +55 -55
  26. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  27. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  28. lamindb_setup/core/django.py +305 -291
  29. lamindb_setup/core/exceptions.py +1 -1
  30. lamindb_setup/core/hashing.py +134 -134
  31. lamindb_setup/core/types.py +1 -1
  32. lamindb_setup/core/upath.py +1013 -1009
  33. lamindb_setup/errors.py +70 -70
  34. lamindb_setup/types.py +20 -20
  35. {lamindb_setup-1.8.3.dist-info → lamindb_setup-1.9.1.dist-info}/METADATA +1 -1
  36. lamindb_setup-1.9.1.dist-info/RECORD +50 -0
  37. lamindb_setup-1.8.3.dist-info/RECORD +0 -50
  38. {lamindb_setup-1.8.3.dist-info → lamindb_setup-1.9.1.dist-info}/LICENSE +0 -0
  39. {lamindb_setup-1.8.3.dist-info → lamindb_setup-1.9.1.dist-info}/WHEEL +0 -0
@@ -1,211 +1,223 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
- import os
5
- import time
6
-
7
- from lamin_utils import logger
8
- from upath import UPath
9
-
10
- HOSTED_REGIONS = [
11
- "eu-central-1",
12
- "eu-west-2",
13
- "us-east-1",
14
- "us-east-2",
15
- "us-west-1",
16
- "us-west-2",
17
- ]
18
- lamin_env = os.getenv("LAMIN_ENV")
19
- if lamin_env is None or lamin_env == "prod":
20
- HOSTED_BUCKETS = tuple([f"s3://lamin-{region}" for region in HOSTED_REGIONS])
21
- else:
22
- HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
23
-
24
-
25
- def _keep_trailing_slash(path_str: str):
26
- return path_str if path_str[-1] == "/" else path_str + "/"
27
-
28
-
29
- AWS_CREDENTIALS_EXPIRATION: int = 11 * 60 * 60 # refresh credentials after 11 hours
30
-
31
-
32
- # set anon=True for these buckets if credentials fail for a public bucket
33
- # to be expanded
34
- PUBLIC_BUCKETS: tuple[str, ...] = ("cellxgene-data-public", "bionty-assets")
35
-
36
-
37
- # s3-comaptible endpoints managed by lamin
38
- # None means the standard aws s3 endpoint
39
- LAMIN_ENDPOINTS: tuple[str | None] = (None,)
40
-
41
-
42
- class NoTracebackFilter(logging.Filter):
43
- def filter(self, record):
44
- record.exc_info = None # Remove traceback info from the log record.
45
- return True
46
-
47
-
48
- class AWSOptionsManager:
49
- # suppress giant traceback logs from aiobotocore when failing to refresh sso etc
50
- @staticmethod
51
- def _suppress_aiobotocore_traceback_logging():
52
- logger = logging.getLogger("aiobotocore.credentials")
53
- logger.addFilter(NoTracebackFilter())
54
-
55
- def __init__(self):
56
- self._credentials_cache = {}
57
-
58
- from s3fs import S3FileSystem
59
-
60
- # this is cached so will be resued with the connection initialized
61
- fs = S3FileSystem(
62
- cache_regions=True, use_listings_cache=True, version_aware=False
63
- )
64
-
65
- self._suppress_aiobotocore_traceback_logging()
66
-
67
- try:
68
- fs.connect()
69
- self.anon: bool = fs.session._credentials is None
70
- except Exception as e:
71
- logger.warning(
72
- f"There is a problem with your default AWS Credentials: {e}\n"
73
- "`anon` mode will be used for all non-managed buckets."
74
- )
75
- self.anon = True
76
- self.anon_public: bool | None = None
77
- if not self.anon:
78
- try:
79
- # use lamindata public bucket for this test
80
- fs.call_s3("head_bucket", Bucket="lamindata")
81
- self.anon_public = False
82
- except Exception:
83
- self.anon_public = True
84
-
85
- def _find_root(self, path_str: str) -> str | None:
86
- roots = self._credentials_cache.keys()
87
- if path_str in roots:
88
- return path_str
89
- roots = sorted(roots, key=len, reverse=True)
90
- for root in roots:
91
- if path_str.startswith(root):
92
- return root
93
- return None
94
-
95
- def _is_active(self, root: str) -> bool:
96
- return (
97
- time.time() - self._credentials_cache[root]["time"]
98
- ) < AWS_CREDENTIALS_EXPIRATION
99
-
100
- def _set_cached_credentials(self, root: str, credentials: dict):
101
- if root not in self._credentials_cache:
102
- self._credentials_cache[root] = {}
103
- self._credentials_cache[root]["credentials"] = credentials
104
- self._credentials_cache[root]["time"] = time.time()
105
-
106
- def _get_cached_credentials(self, root: str) -> dict:
107
- return self._credentials_cache[root]["credentials"]
108
-
109
- def _path_inject_options(self, path: UPath, credentials: dict) -> UPath:
110
- if credentials == {}:
111
- # credentials were specified manually for the path
112
- if "anon" in path.storage_options:
113
- anon = path.storage_options["anon"]
114
- elif path.fs.key is not None and path.fs.secret is not None:
115
- anon = False
116
- else:
117
- anon = self.anon
118
- if not anon and self.anon_public and path.drive in PUBLIC_BUCKETS:
119
- anon = True
120
- connection_options = {"anon": anon}
121
- else:
122
- connection_options = credentials
123
-
124
- if "cache_regions" in path.storage_options:
125
- connection_options["cache_regions"] = path.storage_options["cache_regions"]
126
- else:
127
- connection_options["cache_regions"] = (
128
- path.storage_options.get("endpoint_url", None) is None
129
- )
130
- # we use cache to avoid some uneeded downloads or credential problems
131
- # see in upload_from
132
- connection_options["use_listings_cache"] = path.storage_options.get(
133
- "use_listings_cache", True
134
- )
135
- # normally we want to ignore objects vsrsions in a versioned bucket
136
- connection_options["version_aware"] = path.storage_options.get(
137
- "version_aware", False
138
- )
139
-
140
- return UPath(path, **connection_options)
141
-
142
- def enrich_path(self, path: UPath, access_token: str | None = None) -> UPath:
143
- # ignore paths with non-lamin-managed endpoints
144
- if (
145
- endpoint_url := path.storage_options.get("endpoint_url", None)
146
- ) not in LAMIN_ENDPOINTS:
147
- if "r2.cloudflarestorage.com" in endpoint_url:
148
- # fixed_upload_size should always be True for R2
149
- # this option is needed for correct uploads to R2
150
- path = UPath(path, fixed_upload_size=True)
151
- return path
152
- # trailing slash is needed to avoid returning incorrect results
153
- # with .startswith
154
- # for example s3://lamindata-eu should not receive cache for s3://lamindata
155
- path_str = _keep_trailing_slash(path.as_posix())
156
- root = self._find_root(path_str)
157
-
158
- if root is not None:
159
- set_cache = False
160
- credentials = self._get_cached_credentials(root)
161
-
162
- if access_token is not None:
163
- set_cache = True
164
- elif credentials != {}:
165
- # update credentials
166
- if not self._is_active(root):
167
- set_cache = True
168
- else:
169
- set_cache = True
170
-
171
- if set_cache:
172
- from ._hub_core import access_aws
173
- from ._settings import settings
174
-
175
- storage_root_info = access_aws(path_str, access_token=access_token)
176
- accessibility = storage_root_info["accessibility"]
177
- is_managed = accessibility.get("is_managed", False)
178
- if is_managed:
179
- credentials = storage_root_info["credentials"]
180
- else:
181
- credentials = {}
182
-
183
- if access_token is None:
184
- if "storage_root" in accessibility:
185
- root = accessibility["storage_root"]
186
- # just to be safe
187
- root = None if root == "" else root
188
- if root is None:
189
- # heuristic
190
- # do not write the first level for the known hosted buckets
191
- if path_str.startswith(HOSTED_BUCKETS):
192
- root = "/".join(path.path.rstrip("/").split("/")[:2])
193
- else:
194
- # write the bucket for everything else
195
- root = path.drive
196
- root = "s3://" + root
197
- self._set_cached_credentials(_keep_trailing_slash(root), credentials)
198
-
199
- return self._path_inject_options(path, credentials)
200
-
201
-
202
- _aws_options_manager: AWSOptionsManager | None = None
203
-
204
-
205
- def get_aws_options_manager() -> AWSOptionsManager:
206
- global _aws_options_manager
207
-
208
- if _aws_options_manager is None:
209
- _aws_options_manager = AWSOptionsManager()
210
-
211
- return _aws_options_manager
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import time
6
+
7
+ from lamin_utils import logger
8
+ from upath import UPath
9
+
10
+ HOSTED_REGIONS = [
11
+ "eu-central-1",
12
+ "eu-west-2",
13
+ "us-east-1",
14
+ "us-east-2",
15
+ "us-west-1",
16
+ "us-west-2",
17
+ ]
18
+ lamin_env = os.getenv("LAMIN_ENV")
19
+ if lamin_env is None or lamin_env == "prod":
20
+ HOSTED_BUCKETS = tuple([f"s3://lamin-{region}" for region in HOSTED_REGIONS])
21
+ else:
22
+ HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
23
+
24
+
25
+ def _keep_trailing_slash(path_str: str) -> str:
26
+ return path_str if path_str[-1] == "/" else path_str + "/"
27
+
28
+
29
+ AWS_CREDENTIALS_EXPIRATION: int = 11 * 60 * 60 # refresh credentials after 11 hours
30
+
31
+
32
+ # set anon=True for these buckets if credentials fail for a public bucket
33
+ # to be expanded
34
+ PUBLIC_BUCKETS: tuple[str, ...] = ("cellxgene-data-public", "bionty-assets")
35
+
36
+
37
+ # s3-comaptible endpoints managed by lamin
38
+ # None means the standard aws s3 endpoint
39
+ LAMIN_ENDPOINTS: tuple[str | None] = (None,)
40
+
41
+
42
+ class NoTracebackFilter(logging.Filter):
43
+ def filter(self, record):
44
+ record.exc_info = None # Remove traceback info from the log record.
45
+ return True
46
+
47
+
48
+ class AWSOptionsManager:
49
+ # suppress giant traceback logs from aiobotocore when failing to refresh sso etc
50
+ @staticmethod
51
+ def _suppress_aiobotocore_traceback_logging():
52
+ logger = logging.getLogger("aiobotocore.credentials")
53
+ logger.addFilter(NoTracebackFilter())
54
+
55
+ def __init__(self):
56
+ self._credentials_cache = {}
57
+ self._parameters_cache = {} # this is not refreshed
58
+
59
+ from s3fs import S3FileSystem
60
+
61
+ # this is cached so will be resued with the connection initialized
62
+ fs = S3FileSystem(
63
+ cache_regions=True, use_listings_cache=True, version_aware=False
64
+ )
65
+
66
+ self._suppress_aiobotocore_traceback_logging()
67
+
68
+ try:
69
+ fs.connect()
70
+ self.anon: bool = fs.session._credentials is None
71
+ except Exception as e:
72
+ logger.warning(
73
+ f"There is a problem with your default AWS Credentials: {e}\n"
74
+ "`anon` mode will be used for all non-managed buckets."
75
+ )
76
+ self.anon = True
77
+ self.anon_public: bool | None = None
78
+ if not self.anon:
79
+ try:
80
+ # use lamindata public bucket for this test
81
+ fs.call_s3("head_bucket", Bucket="lamindata")
82
+ self.anon_public = False
83
+ except Exception:
84
+ self.anon_public = True
85
+
86
+ def _find_root(self, path_str: str) -> str | None:
87
+ roots = self._credentials_cache.keys()
88
+ if path_str in roots:
89
+ return path_str
90
+ roots = sorted(roots, key=len, reverse=True)
91
+ for root in roots:
92
+ if path_str.startswith(root):
93
+ return root
94
+ return None
95
+
96
+ def _is_active(self, root: str) -> bool:
97
+ return (
98
+ time.time() - self._credentials_cache[root]["time"]
99
+ ) < AWS_CREDENTIALS_EXPIRATION
100
+
101
+ def _set_cached_credentials(self, root: str, credentials: dict):
102
+ if root not in self._credentials_cache:
103
+ self._credentials_cache[root] = {}
104
+ self._credentials_cache[root]["credentials"] = credentials
105
+ self._credentials_cache[root]["time"] = time.time()
106
+
107
+ def _get_cached_credentials(self, root: str) -> dict:
108
+ return self._credentials_cache[root]["credentials"]
109
+
110
+ def _path_inject_options(
111
+ self, path: UPath, credentials: dict, extra_parameters: dict | None = None
112
+ ) -> UPath:
113
+ if credentials == {}:
114
+ # credentials were specified manually for the path
115
+ if "anon" in path.storage_options:
116
+ anon = path.storage_options["anon"]
117
+ elif path.fs.key is not None and path.fs.secret is not None:
118
+ anon = False
119
+ else:
120
+ anon = self.anon
121
+ if not anon and self.anon_public and path.drive in PUBLIC_BUCKETS:
122
+ anon = True
123
+ connection_options = {"anon": anon}
124
+ else:
125
+ connection_options = credentials
126
+
127
+ if "cache_regions" in path.storage_options:
128
+ connection_options["cache_regions"] = path.storage_options["cache_regions"]
129
+ else:
130
+ connection_options["cache_regions"] = (
131
+ path.storage_options.get("endpoint_url", None) is None
132
+ )
133
+ # we use cache to avoid some uneeded downloads or credential problems
134
+ # see in upload_from
135
+ connection_options["use_listings_cache"] = path.storage_options.get(
136
+ "use_listings_cache", True
137
+ )
138
+ # normally we want to ignore objects vsrsions in a versioned bucket
139
+ connection_options["version_aware"] = path.storage_options.get(
140
+ "version_aware", False
141
+ )
142
+
143
+ if extra_parameters:
144
+ connection_options.update(extra_parameters)
145
+
146
+ return UPath(path, **connection_options)
147
+
148
+ def enrich_path(self, path: UPath, access_token: str | None = None) -> UPath:
149
+ # ignore paths with non-lamin-managed endpoints
150
+ if (
151
+ endpoint_url := path.storage_options.get("endpoint_url", None)
152
+ ) not in LAMIN_ENDPOINTS:
153
+ if "r2.cloudflarestorage.com" in endpoint_url:
154
+ # fixed_upload_size should always be True for R2
155
+ # this option is needed for correct uploads to R2
156
+ path = UPath(path, fixed_upload_size=True)
157
+ return path
158
+ # trailing slash is needed to avoid returning incorrect results
159
+ # with .startswith
160
+ # for example s3://lamindata-eu should not receive cache for s3://lamindata
161
+ path_str = _keep_trailing_slash(path.as_posix())
162
+ root = self._find_root(path_str)
163
+
164
+ if root is not None:
165
+ set_cache = False
166
+ credentials = self._get_cached_credentials(root)
167
+ extra_parameters = self._parameters_cache.get(root)
168
+ if access_token is not None:
169
+ set_cache = True
170
+ elif credentials != {}:
171
+ # update credentials
172
+ if not self._is_active(root):
173
+ set_cache = True
174
+ else:
175
+ set_cache = True
176
+
177
+ if set_cache:
178
+ from ._hub_core import access_aws
179
+ from ._settings import settings
180
+
181
+ storage_root_info = access_aws(path_str, access_token=access_token)
182
+ accessibility = storage_root_info["accessibility"]
183
+ is_managed = accessibility.get("is_managed", False)
184
+ if is_managed:
185
+ credentials = storage_root_info["credentials"]
186
+ extra_parameters = accessibility["extra_parameters"]
187
+ else:
188
+ credentials = {}
189
+ extra_parameters = None
190
+
191
+ if access_token is None:
192
+ if "storage_root" in accessibility:
193
+ root = accessibility["storage_root"]
194
+ # just to be safe
195
+ root = None if root == "" else root
196
+ if root is None:
197
+ # heuristic
198
+ # do not write the first level for the known hosted buckets
199
+ if path_str.startswith(HOSTED_BUCKETS):
200
+ root = "/".join(path.path.rstrip("/").split("/")[:2])
201
+ else:
202
+ # write the bucket for everything else
203
+ root = path.drive
204
+ root = "s3://" + root
205
+
206
+ root = _keep_trailing_slash(root)
207
+ assert isinstance(root, str)
208
+ self._set_cached_credentials(root, credentials)
209
+ self._parameters_cache[root] = extra_parameters
210
+
211
+ return self._path_inject_options(path, credentials, extra_parameters)
212
+
213
+
214
+ _aws_options_manager: AWSOptionsManager | None = None
215
+
216
+
217
+ def get_aws_options_manager() -> AWSOptionsManager:
218
+ global _aws_options_manager
219
+
220
+ if _aws_options_manager is None:
221
+ _aws_options_manager = AWSOptionsManager()
222
+
223
+ return _aws_options_manager