lamindb_setup 1.9.0__py3-none-any.whl → 1.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. lamindb_setup/__init__.py +107 -107
  2. lamindb_setup/_cache.py +87 -87
  3. lamindb_setup/_check_setup.py +166 -166
  4. lamindb_setup/_connect_instance.py +328 -342
  5. lamindb_setup/_delete.py +141 -141
  6. lamindb_setup/_disconnect.py +32 -32
  7. lamindb_setup/_init_instance.py +440 -440
  8. lamindb_setup/_migrate.py +266 -266
  9. lamindb_setup/_register_instance.py +35 -35
  10. lamindb_setup/_schema_metadata.py +441 -441
  11. lamindb_setup/_set_managed_storage.py +70 -70
  12. lamindb_setup/_setup_user.py +133 -133
  13. lamindb_setup/core/__init__.py +21 -21
  14. lamindb_setup/core/_aws_options.py +223 -223
  15. lamindb_setup/core/_hub_client.py +248 -248
  16. lamindb_setup/core/_hub_core.py +665 -665
  17. lamindb_setup/core/_hub_crud.py +227 -227
  18. lamindb_setup/core/_private_django_api.py +83 -83
  19. lamindb_setup/core/_settings.py +377 -377
  20. lamindb_setup/core/_settings_instance.py +569 -569
  21. lamindb_setup/core/_settings_load.py +141 -141
  22. lamindb_setup/core/_settings_save.py +95 -95
  23. lamindb_setup/core/_settings_storage.py +429 -429
  24. lamindb_setup/core/_settings_store.py +91 -91
  25. lamindb_setup/core/_settings_user.py +55 -55
  26. lamindb_setup/core/_setup_bionty_sources.py +44 -44
  27. lamindb_setup/core/cloud_sqlite_locker.py +240 -240
  28. lamindb_setup/core/django.py +305 -296
  29. lamindb_setup/core/exceptions.py +1 -1
  30. lamindb_setup/core/hashing.py +134 -134
  31. lamindb_setup/core/types.py +1 -1
  32. lamindb_setup/core/upath.py +1013 -1013
  33. lamindb_setup/errors.py +70 -70
  34. lamindb_setup/types.py +20 -20
  35. {lamindb_setup-1.9.0.dist-info → lamindb_setup-1.9.1.dist-info}/METADATA +1 -1
  36. lamindb_setup-1.9.1.dist-info/RECORD +50 -0
  37. lamindb_setup-1.9.0.dist-info/RECORD +0 -50
  38. {lamindb_setup-1.9.0.dist-info → lamindb_setup-1.9.1.dist-info}/LICENSE +0 -0
  39. {lamindb_setup-1.9.0.dist-info → lamindb_setup-1.9.1.dist-info}/WHEEL +0 -0
@@ -1,223 +1,223 @@
1
- from __future__ import annotations
2
-
3
- import logging
4
- import os
5
- import time
6
-
7
- from lamin_utils import logger
8
- from upath import UPath
9
-
10
- HOSTED_REGIONS = [
11
- "eu-central-1",
12
- "eu-west-2",
13
- "us-east-1",
14
- "us-east-2",
15
- "us-west-1",
16
- "us-west-2",
17
- ]
18
- lamin_env = os.getenv("LAMIN_ENV")
19
- if lamin_env is None or lamin_env == "prod":
20
- HOSTED_BUCKETS = tuple([f"s3://lamin-{region}" for region in HOSTED_REGIONS])
21
- else:
22
- HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
23
-
24
-
25
- def _keep_trailing_slash(path_str: str) -> str:
26
- return path_str if path_str[-1] == "/" else path_str + "/"
27
-
28
-
29
- AWS_CREDENTIALS_EXPIRATION: int = 11 * 60 * 60 # refresh credentials after 11 hours
30
-
31
-
32
- # set anon=True for these buckets if credentials fail for a public bucket
33
- # to be expanded
34
- PUBLIC_BUCKETS: tuple[str, ...] = ("cellxgene-data-public", "bionty-assets")
35
-
36
-
37
- # s3-comaptible endpoints managed by lamin
38
- # None means the standard aws s3 endpoint
39
- LAMIN_ENDPOINTS: tuple[str | None] = (None,)
40
-
41
-
42
- class NoTracebackFilter(logging.Filter):
43
- def filter(self, record):
44
- record.exc_info = None # Remove traceback info from the log record.
45
- return True
46
-
47
-
48
- class AWSOptionsManager:
49
- # suppress giant traceback logs from aiobotocore when failing to refresh sso etc
50
- @staticmethod
51
- def _suppress_aiobotocore_traceback_logging():
52
- logger = logging.getLogger("aiobotocore.credentials")
53
- logger.addFilter(NoTracebackFilter())
54
-
55
- def __init__(self):
56
- self._credentials_cache = {}
57
- self._parameters_cache = {} # this is not refreshed
58
-
59
- from s3fs import S3FileSystem
60
-
61
- # this is cached so will be resued with the connection initialized
62
- fs = S3FileSystem(
63
- cache_regions=True, use_listings_cache=True, version_aware=False
64
- )
65
-
66
- self._suppress_aiobotocore_traceback_logging()
67
-
68
- try:
69
- fs.connect()
70
- self.anon: bool = fs.session._credentials is None
71
- except Exception as e:
72
- logger.warning(
73
- f"There is a problem with your default AWS Credentials: {e}\n"
74
- "`anon` mode will be used for all non-managed buckets."
75
- )
76
- self.anon = True
77
- self.anon_public: bool | None = None
78
- if not self.anon:
79
- try:
80
- # use lamindata public bucket for this test
81
- fs.call_s3("head_bucket", Bucket="lamindata")
82
- self.anon_public = False
83
- except Exception:
84
- self.anon_public = True
85
-
86
- def _find_root(self, path_str: str) -> str | None:
87
- roots = self._credentials_cache.keys()
88
- if path_str in roots:
89
- return path_str
90
- roots = sorted(roots, key=len, reverse=True)
91
- for root in roots:
92
- if path_str.startswith(root):
93
- return root
94
- return None
95
-
96
- def _is_active(self, root: str) -> bool:
97
- return (
98
- time.time() - self._credentials_cache[root]["time"]
99
- ) < AWS_CREDENTIALS_EXPIRATION
100
-
101
- def _set_cached_credentials(self, root: str, credentials: dict):
102
- if root not in self._credentials_cache:
103
- self._credentials_cache[root] = {}
104
- self._credentials_cache[root]["credentials"] = credentials
105
- self._credentials_cache[root]["time"] = time.time()
106
-
107
- def _get_cached_credentials(self, root: str) -> dict:
108
- return self._credentials_cache[root]["credentials"]
109
-
110
- def _path_inject_options(
111
- self, path: UPath, credentials: dict, extra_parameters: dict | None = None
112
- ) -> UPath:
113
- if credentials == {}:
114
- # credentials were specified manually for the path
115
- if "anon" in path.storage_options:
116
- anon = path.storage_options["anon"]
117
- elif path.fs.key is not None and path.fs.secret is not None:
118
- anon = False
119
- else:
120
- anon = self.anon
121
- if not anon and self.anon_public and path.drive in PUBLIC_BUCKETS:
122
- anon = True
123
- connection_options = {"anon": anon}
124
- else:
125
- connection_options = credentials
126
-
127
- if "cache_regions" in path.storage_options:
128
- connection_options["cache_regions"] = path.storage_options["cache_regions"]
129
- else:
130
- connection_options["cache_regions"] = (
131
- path.storage_options.get("endpoint_url", None) is None
132
- )
133
- # we use cache to avoid some uneeded downloads or credential problems
134
- # see in upload_from
135
- connection_options["use_listings_cache"] = path.storage_options.get(
136
- "use_listings_cache", True
137
- )
138
- # normally we want to ignore objects vsrsions in a versioned bucket
139
- connection_options["version_aware"] = path.storage_options.get(
140
- "version_aware", False
141
- )
142
-
143
- if extra_parameters:
144
- connection_options.update(extra_parameters)
145
-
146
- return UPath(path, **connection_options)
147
-
148
- def enrich_path(self, path: UPath, access_token: str | None = None) -> UPath:
149
- # ignore paths with non-lamin-managed endpoints
150
- if (
151
- endpoint_url := path.storage_options.get("endpoint_url", None)
152
- ) not in LAMIN_ENDPOINTS:
153
- if "r2.cloudflarestorage.com" in endpoint_url:
154
- # fixed_upload_size should always be True for R2
155
- # this option is needed for correct uploads to R2
156
- path = UPath(path, fixed_upload_size=True)
157
- return path
158
- # trailing slash is needed to avoid returning incorrect results
159
- # with .startswith
160
- # for example s3://lamindata-eu should not receive cache for s3://lamindata
161
- path_str = _keep_trailing_slash(path.as_posix())
162
- root = self._find_root(path_str)
163
-
164
- if root is not None:
165
- set_cache = False
166
- credentials = self._get_cached_credentials(root)
167
- extra_parameters = self._parameters_cache.get(root)
168
- if access_token is not None:
169
- set_cache = True
170
- elif credentials != {}:
171
- # update credentials
172
- if not self._is_active(root):
173
- set_cache = True
174
- else:
175
- set_cache = True
176
-
177
- if set_cache:
178
- from ._hub_core import access_aws
179
- from ._settings import settings
180
-
181
- storage_root_info = access_aws(path_str, access_token=access_token)
182
- accessibility = storage_root_info["accessibility"]
183
- is_managed = accessibility.get("is_managed", False)
184
- if is_managed:
185
- credentials = storage_root_info["credentials"]
186
- extra_parameters = accessibility["extra_parameters"]
187
- else:
188
- credentials = {}
189
- extra_parameters = None
190
-
191
- if access_token is None:
192
- if "storage_root" in accessibility:
193
- root = accessibility["storage_root"]
194
- # just to be safe
195
- root = None if root == "" else root
196
- if root is None:
197
- # heuristic
198
- # do not write the first level for the known hosted buckets
199
- if path_str.startswith(HOSTED_BUCKETS):
200
- root = "/".join(path.path.rstrip("/").split("/")[:2])
201
- else:
202
- # write the bucket for everything else
203
- root = path.drive
204
- root = "s3://" + root
205
-
206
- root = _keep_trailing_slash(root)
207
- assert isinstance(root, str)
208
- self._set_cached_credentials(root, credentials)
209
- self._parameters_cache[root] = extra_parameters
210
-
211
- return self._path_inject_options(path, credentials, extra_parameters)
212
-
213
-
214
- _aws_options_manager: AWSOptionsManager | None = None
215
-
216
-
217
- def get_aws_options_manager() -> AWSOptionsManager:
218
- global _aws_options_manager
219
-
220
- if _aws_options_manager is None:
221
- _aws_options_manager = AWSOptionsManager()
222
-
223
- return _aws_options_manager
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ import os
5
+ import time
6
+
7
+ from lamin_utils import logger
8
+ from upath import UPath
9
+
10
+ HOSTED_REGIONS = [
11
+ "eu-central-1",
12
+ "eu-west-2",
13
+ "us-east-1",
14
+ "us-east-2",
15
+ "us-west-1",
16
+ "us-west-2",
17
+ ]
18
+ lamin_env = os.getenv("LAMIN_ENV")
19
+ if lamin_env is None or lamin_env == "prod":
20
+ HOSTED_BUCKETS = tuple([f"s3://lamin-{region}" for region in HOSTED_REGIONS])
21
+ else:
22
+ HOSTED_BUCKETS = ("s3://lamin-hosted-test",) # type: ignore
23
+
24
+
25
+ def _keep_trailing_slash(path_str: str) -> str:
26
+ return path_str if path_str[-1] == "/" else path_str + "/"
27
+
28
+
29
+ AWS_CREDENTIALS_EXPIRATION: int = 11 * 60 * 60 # refresh credentials after 11 hours
30
+
31
+
32
+ # set anon=True for these buckets if credentials fail for a public bucket
33
+ # to be expanded
34
+ PUBLIC_BUCKETS: tuple[str, ...] = ("cellxgene-data-public", "bionty-assets")
35
+
36
+
37
+ # s3-comaptible endpoints managed by lamin
38
+ # None means the standard aws s3 endpoint
39
+ LAMIN_ENDPOINTS: tuple[str | None] = (None,)
40
+
41
+
42
+ class NoTracebackFilter(logging.Filter):
43
+ def filter(self, record):
44
+ record.exc_info = None # Remove traceback info from the log record.
45
+ return True
46
+
47
+
48
+ class AWSOptionsManager:
49
+ # suppress giant traceback logs from aiobotocore when failing to refresh sso etc
50
+ @staticmethod
51
+ def _suppress_aiobotocore_traceback_logging():
52
+ logger = logging.getLogger("aiobotocore.credentials")
53
+ logger.addFilter(NoTracebackFilter())
54
+
55
+ def __init__(self):
56
+ self._credentials_cache = {}
57
+ self._parameters_cache = {} # this is not refreshed
58
+
59
+ from s3fs import S3FileSystem
60
+
61
+ # this is cached so will be resued with the connection initialized
62
+ fs = S3FileSystem(
63
+ cache_regions=True, use_listings_cache=True, version_aware=False
64
+ )
65
+
66
+ self._suppress_aiobotocore_traceback_logging()
67
+
68
+ try:
69
+ fs.connect()
70
+ self.anon: bool = fs.session._credentials is None
71
+ except Exception as e:
72
+ logger.warning(
73
+ f"There is a problem with your default AWS Credentials: {e}\n"
74
+ "`anon` mode will be used for all non-managed buckets."
75
+ )
76
+ self.anon = True
77
+ self.anon_public: bool | None = None
78
+ if not self.anon:
79
+ try:
80
+ # use lamindata public bucket for this test
81
+ fs.call_s3("head_bucket", Bucket="lamindata")
82
+ self.anon_public = False
83
+ except Exception:
84
+ self.anon_public = True
85
+
86
+ def _find_root(self, path_str: str) -> str | None:
87
+ roots = self._credentials_cache.keys()
88
+ if path_str in roots:
89
+ return path_str
90
+ roots = sorted(roots, key=len, reverse=True)
91
+ for root in roots:
92
+ if path_str.startswith(root):
93
+ return root
94
+ return None
95
+
96
+ def _is_active(self, root: str) -> bool:
97
+ return (
98
+ time.time() - self._credentials_cache[root]["time"]
99
+ ) < AWS_CREDENTIALS_EXPIRATION
100
+
101
+ def _set_cached_credentials(self, root: str, credentials: dict):
102
+ if root not in self._credentials_cache:
103
+ self._credentials_cache[root] = {}
104
+ self._credentials_cache[root]["credentials"] = credentials
105
+ self._credentials_cache[root]["time"] = time.time()
106
+
107
+ def _get_cached_credentials(self, root: str) -> dict:
108
+ return self._credentials_cache[root]["credentials"]
109
+
110
+ def _path_inject_options(
111
+ self, path: UPath, credentials: dict, extra_parameters: dict | None = None
112
+ ) -> UPath:
113
+ if credentials == {}:
114
+ # credentials were specified manually for the path
115
+ if "anon" in path.storage_options:
116
+ anon = path.storage_options["anon"]
117
+ elif path.fs.key is not None and path.fs.secret is not None:
118
+ anon = False
119
+ else:
120
+ anon = self.anon
121
+ if not anon and self.anon_public and path.drive in PUBLIC_BUCKETS:
122
+ anon = True
123
+ connection_options = {"anon": anon}
124
+ else:
125
+ connection_options = credentials
126
+
127
+ if "cache_regions" in path.storage_options:
128
+ connection_options["cache_regions"] = path.storage_options["cache_regions"]
129
+ else:
130
+ connection_options["cache_regions"] = (
131
+ path.storage_options.get("endpoint_url", None) is None
132
+ )
133
+ # we use cache to avoid some uneeded downloads or credential problems
134
+ # see in upload_from
135
+ connection_options["use_listings_cache"] = path.storage_options.get(
136
+ "use_listings_cache", True
137
+ )
138
+ # normally we want to ignore objects vsrsions in a versioned bucket
139
+ connection_options["version_aware"] = path.storage_options.get(
140
+ "version_aware", False
141
+ )
142
+
143
+ if extra_parameters:
144
+ connection_options.update(extra_parameters)
145
+
146
+ return UPath(path, **connection_options)
147
+
148
+ def enrich_path(self, path: UPath, access_token: str | None = None) -> UPath:
149
+ # ignore paths with non-lamin-managed endpoints
150
+ if (
151
+ endpoint_url := path.storage_options.get("endpoint_url", None)
152
+ ) not in LAMIN_ENDPOINTS:
153
+ if "r2.cloudflarestorage.com" in endpoint_url:
154
+ # fixed_upload_size should always be True for R2
155
+ # this option is needed for correct uploads to R2
156
+ path = UPath(path, fixed_upload_size=True)
157
+ return path
158
+ # trailing slash is needed to avoid returning incorrect results
159
+ # with .startswith
160
+ # for example s3://lamindata-eu should not receive cache for s3://lamindata
161
+ path_str = _keep_trailing_slash(path.as_posix())
162
+ root = self._find_root(path_str)
163
+
164
+ if root is not None:
165
+ set_cache = False
166
+ credentials = self._get_cached_credentials(root)
167
+ extra_parameters = self._parameters_cache.get(root)
168
+ if access_token is not None:
169
+ set_cache = True
170
+ elif credentials != {}:
171
+ # update credentials
172
+ if not self._is_active(root):
173
+ set_cache = True
174
+ else:
175
+ set_cache = True
176
+
177
+ if set_cache:
178
+ from ._hub_core import access_aws
179
+ from ._settings import settings
180
+
181
+ storage_root_info = access_aws(path_str, access_token=access_token)
182
+ accessibility = storage_root_info["accessibility"]
183
+ is_managed = accessibility.get("is_managed", False)
184
+ if is_managed:
185
+ credentials = storage_root_info["credentials"]
186
+ extra_parameters = accessibility["extra_parameters"]
187
+ else:
188
+ credentials = {}
189
+ extra_parameters = None
190
+
191
+ if access_token is None:
192
+ if "storage_root" in accessibility:
193
+ root = accessibility["storage_root"]
194
+ # just to be safe
195
+ root = None if root == "" else root
196
+ if root is None:
197
+ # heuristic
198
+ # do not write the first level for the known hosted buckets
199
+ if path_str.startswith(HOSTED_BUCKETS):
200
+ root = "/".join(path.path.rstrip("/").split("/")[:2])
201
+ else:
202
+ # write the bucket for everything else
203
+ root = path.drive
204
+ root = "s3://" + root
205
+
206
+ root = _keep_trailing_slash(root)
207
+ assert isinstance(root, str)
208
+ self._set_cached_credentials(root, credentials)
209
+ self._parameters_cache[root] = extra_parameters
210
+
211
+ return self._path_inject_options(path, credentials, extra_parameters)
212
+
213
+
214
+ _aws_options_manager: AWSOptionsManager | None = None
215
+
216
+
217
+ def get_aws_options_manager() -> AWSOptionsManager:
218
+ global _aws_options_manager
219
+
220
+ if _aws_options_manager is None:
221
+ _aws_options_manager = AWSOptionsManager()
222
+
223
+ return _aws_options_manager