lamindb_setup 1.9.1__py3-none-any.whl → 1.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb_setup/__init__.py +107 -107
- lamindb_setup/_cache.py +87 -87
- lamindb_setup/_check_setup.py +192 -166
- lamindb_setup/_connect_instance.py +415 -328
- lamindb_setup/_delete.py +144 -141
- lamindb_setup/_disconnect.py +35 -32
- lamindb_setup/_init_instance.py +430 -440
- lamindb_setup/_migrate.py +278 -266
- lamindb_setup/_register_instance.py +32 -35
- lamindb_setup/_schema_metadata.py +441 -441
- lamindb_setup/_set_managed_storage.py +69 -70
- lamindb_setup/_setup_user.py +172 -133
- lamindb_setup/core/__init__.py +21 -21
- lamindb_setup/core/_aws_options.py +223 -223
- lamindb_setup/core/_aws_storage.py +9 -1
- lamindb_setup/core/_hub_client.py +248 -248
- lamindb_setup/core/_hub_core.py +728 -665
- lamindb_setup/core/_hub_crud.py +227 -227
- lamindb_setup/core/_private_django_api.py +83 -83
- lamindb_setup/core/_settings.py +384 -377
- lamindb_setup/core/_settings_instance.py +577 -569
- lamindb_setup/core/_settings_load.py +141 -141
- lamindb_setup/core/_settings_save.py +95 -95
- lamindb_setup/core/_settings_storage.py +427 -429
- lamindb_setup/core/_settings_store.py +91 -91
- lamindb_setup/core/_settings_user.py +55 -55
- lamindb_setup/core/_setup_bionty_sources.py +44 -44
- lamindb_setup/core/cloud_sqlite_locker.py +240 -240
- lamindb_setup/core/django.py +315 -305
- lamindb_setup/core/exceptions.py +1 -1
- lamindb_setup/core/hashing.py +134 -134
- lamindb_setup/core/types.py +1 -1
- lamindb_setup/core/upath.py +1013 -1013
- lamindb_setup/errors.py +80 -70
- lamindb_setup/types.py +20 -20
- {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/METADATA +3 -3
- lamindb_setup-1.10.0.dist-info/RECORD +50 -0
- lamindb_setup-1.9.1.dist-info/RECORD +0 -50
- {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/LICENSE +0 -0
- {lamindb_setup-1.9.1.dist-info → lamindb_setup-1.10.0.dist-info}/WHEEL +0 -0
|
@@ -1,441 +1,441 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import hashlib
|
|
4
|
-
import importlib
|
|
5
|
-
import json
|
|
6
|
-
from typing import TYPE_CHECKING, Literal
|
|
7
|
-
from uuid import UUID
|
|
8
|
-
|
|
9
|
-
from django.db.models import (
|
|
10
|
-
Field,
|
|
11
|
-
ForeignKey,
|
|
12
|
-
ForeignObjectRel,
|
|
13
|
-
ManyToManyField,
|
|
14
|
-
ManyToManyRel,
|
|
15
|
-
ManyToOneRel,
|
|
16
|
-
OneToOneField,
|
|
17
|
-
OneToOneRel,
|
|
18
|
-
)
|
|
19
|
-
from lamin_utils import logger
|
|
20
|
-
from pydantic import BaseModel
|
|
21
|
-
|
|
22
|
-
from lamindb_setup import settings
|
|
23
|
-
from lamindb_setup._init_instance import get_schema_module_name
|
|
24
|
-
from lamindb_setup.core._hub_client import call_with_fallback_auth
|
|
25
|
-
|
|
26
|
-
# surpress pydantic warning about `model_` namespace
|
|
27
|
-
try:
|
|
28
|
-
BaseModel.model_config["protected_namespaces"] = ()
|
|
29
|
-
logger.debug(
|
|
30
|
-
"pydantic.BaseModel.model_config['protected_namespaces'] has been set to ()"
|
|
31
|
-
)
|
|
32
|
-
except Exception:
|
|
33
|
-
pass
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
if TYPE_CHECKING:
|
|
37
|
-
from supabase import Client
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def update_schema_in_hub(access_token: str | None = None) -> tuple[bool, UUID, dict]:
|
|
41
|
-
return call_with_fallback_auth(_synchronize_schema, access_token=access_token)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def _synchronize_schema(client: Client) -> tuple[bool, UUID, dict]:
|
|
45
|
-
schema_metadata = _SchemaHandler()
|
|
46
|
-
schema_metadata_dict = schema_metadata.to_json()
|
|
47
|
-
schema_uuid = _dict_to_uuid(schema_metadata_dict)
|
|
48
|
-
schema = _get_schema_by_id(schema_uuid, client)
|
|
49
|
-
|
|
50
|
-
is_new = schema is None
|
|
51
|
-
if is_new:
|
|
52
|
-
module_set_info = schema_metadata._get_module_set_info()
|
|
53
|
-
module_ids = "-".join(str(module_info["id"]) for module_info in module_set_info)
|
|
54
|
-
schema = (
|
|
55
|
-
client.table("schema")
|
|
56
|
-
.insert(
|
|
57
|
-
{
|
|
58
|
-
"id": schema_uuid.hex,
|
|
59
|
-
"module_ids": module_ids,
|
|
60
|
-
"module_set_info": module_set_info,
|
|
61
|
-
"schema_json": schema_metadata_dict,
|
|
62
|
-
}
|
|
63
|
-
)
|
|
64
|
-
.execute()
|
|
65
|
-
.data[0]
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
instance_response = (
|
|
69
|
-
client.table("instance")
|
|
70
|
-
.update({"schema_id": schema_uuid.hex})
|
|
71
|
-
.eq("id", settings.instance._id.hex)
|
|
72
|
-
.execute()
|
|
73
|
-
)
|
|
74
|
-
assert (
|
|
75
|
-
len(instance_response.data) == 1
|
|
76
|
-
), f"schema of instance {settings.instance._id.hex} could not be updated with schema {schema_uuid.hex}"
|
|
77
|
-
|
|
78
|
-
return is_new, schema_uuid, schema
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def get_schema_by_id(id: UUID):
|
|
82
|
-
return call_with_fallback_auth(_get_schema_by_id, id=id)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def _get_schema_by_id(id: UUID, client: Client):
|
|
86
|
-
response = client.table("schema").select("*").eq("id", id.hex).execute()
|
|
87
|
-
if len(response.data) == 0:
|
|
88
|
-
return None
|
|
89
|
-
return response.data[0]
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def _dict_to_uuid(dict: dict):
|
|
93
|
-
encoded = json.dumps(dict, sort_keys=True).encode("utf-8")
|
|
94
|
-
hash = hashlib.md5(encoded).digest()
|
|
95
|
-
uuid = UUID(bytes=hash[:16])
|
|
96
|
-
return uuid
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
RelationType = Literal["many-to-one", "one-to-many", "many-to-many", "one-to-one"]
|
|
100
|
-
Type = Literal[
|
|
101
|
-
"ForeignKey",
|
|
102
|
-
# the following are generated with `from django.db import models; [attr for attr in dir(models) if attr.endswith('Field')]`
|
|
103
|
-
"AutoField",
|
|
104
|
-
"BigAutoField",
|
|
105
|
-
"BigIntegerField",
|
|
106
|
-
"BinaryField",
|
|
107
|
-
"BooleanField",
|
|
108
|
-
"CharField",
|
|
109
|
-
"CommaSeparatedIntegerField",
|
|
110
|
-
"DateField",
|
|
111
|
-
"DateTimeField",
|
|
112
|
-
"DecimalField",
|
|
113
|
-
"DurationField",
|
|
114
|
-
"EmailField",
|
|
115
|
-
"Field",
|
|
116
|
-
"FileField",
|
|
117
|
-
"FilePathField",
|
|
118
|
-
"FloatField",
|
|
119
|
-
"GeneratedField",
|
|
120
|
-
"GenericIPAddressField",
|
|
121
|
-
"IPAddressField",
|
|
122
|
-
"ImageField",
|
|
123
|
-
"IntegerField",
|
|
124
|
-
"JSONField",
|
|
125
|
-
"ManyToManyField",
|
|
126
|
-
"NullBooleanField",
|
|
127
|
-
"OneToOneField",
|
|
128
|
-
"PositiveBigIntegerField",
|
|
129
|
-
"PositiveIntegerField",
|
|
130
|
-
"PositiveSmallIntegerField",
|
|
131
|
-
"SlugField",
|
|
132
|
-
"SmallAutoField",
|
|
133
|
-
"SmallIntegerField",
|
|
134
|
-
"TextField",
|
|
135
|
-
"TimeField",
|
|
136
|
-
"URLField",
|
|
137
|
-
"UUIDField",
|
|
138
|
-
]
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
class Through(BaseModel):
|
|
142
|
-
left_key: str
|
|
143
|
-
right_key: str
|
|
144
|
-
link_table_name: str | None = None
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
class FieldMetadata(BaseModel):
|
|
148
|
-
type: Type
|
|
149
|
-
column_name: str | None = None
|
|
150
|
-
through: Through | None = None
|
|
151
|
-
field_name: str
|
|
152
|
-
model_name: str
|
|
153
|
-
schema_name: str
|
|
154
|
-
is_link_table: bool
|
|
155
|
-
is_primary_key: bool
|
|
156
|
-
is_editable: bool
|
|
157
|
-
max_length: int | None = None
|
|
158
|
-
relation_type: RelationType | None = None
|
|
159
|
-
related_field_name: str | None = None
|
|
160
|
-
related_model_name: str | None = None
|
|
161
|
-
related_schema_name: str | None = None
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
class _ModelHandler:
|
|
165
|
-
def __init__(self, model, module_name: str, included_modules: list[str]) -> None:
|
|
166
|
-
from lamindb.models import IsLink
|
|
167
|
-
|
|
168
|
-
self.model = model
|
|
169
|
-
self.class_name = model.__name__
|
|
170
|
-
self.module_name = module_name
|
|
171
|
-
self.model_name = model._meta.model_name
|
|
172
|
-
self.table_name = model._meta.db_table
|
|
173
|
-
self.included_modules = included_modules
|
|
174
|
-
self.fields = self._get_fields_metadata(self.model)
|
|
175
|
-
self.is_link_table = issubclass(model, IsLink)
|
|
176
|
-
self.name_field = model._name_field if hasattr(model, "_name_field") else None
|
|
177
|
-
self.ontology_id_field = (
|
|
178
|
-
model._ontology_id_field if hasattr(model, "_ontology_id_field") else None
|
|
179
|
-
)
|
|
180
|
-
|
|
181
|
-
def to_dict(self, include_django_objects: bool = True):
|
|
182
|
-
_dict = {
|
|
183
|
-
"fields": self.fields.copy(),
|
|
184
|
-
"class_name": self.class_name,
|
|
185
|
-
"table_name": self.table_name,
|
|
186
|
-
"is_link_table": self.is_link_table,
|
|
187
|
-
"name_field": self.name_field,
|
|
188
|
-
"ontology_id_field": self.ontology_id_field,
|
|
189
|
-
}
|
|
190
|
-
|
|
191
|
-
for field_name in self.fields.keys():
|
|
192
|
-
_dict["fields"][field_name] = _dict["fields"][field_name].__dict__
|
|
193
|
-
through = _dict["fields"][field_name]["through"]
|
|
194
|
-
if through is not None:
|
|
195
|
-
_dict["fields"][field_name]["through"] = through.__dict__
|
|
196
|
-
|
|
197
|
-
if include_django_objects:
|
|
198
|
-
_dict.update({"model": self.model})
|
|
199
|
-
|
|
200
|
-
return _dict
|
|
201
|
-
|
|
202
|
-
def _get_fields_metadata(self, model):
|
|
203
|
-
related_fields = []
|
|
204
|
-
fields_metadata: dict[str, FieldMetadata] = {}
|
|
205
|
-
|
|
206
|
-
for field in model._meta.get_fields():
|
|
207
|
-
field_metadata = self._get_field_metadata(model, field)
|
|
208
|
-
if field_metadata.related_schema_name is None:
|
|
209
|
-
fields_metadata.update({field.name: field_metadata})
|
|
210
|
-
|
|
211
|
-
if (
|
|
212
|
-
field_metadata.related_schema_name in self.included_modules
|
|
213
|
-
and field_metadata.schema_name in self.included_modules
|
|
214
|
-
):
|
|
215
|
-
related_fields.append(field)
|
|
216
|
-
|
|
217
|
-
related_fields_metadata = self._get_related_fields_metadata(
|
|
218
|
-
model, related_fields
|
|
219
|
-
)
|
|
220
|
-
|
|
221
|
-
fields_metadata = {**fields_metadata, **related_fields_metadata}
|
|
222
|
-
|
|
223
|
-
return fields_metadata
|
|
224
|
-
|
|
225
|
-
def _get_related_fields_metadata(self, model, fields: list[ForeignObjectRel]):
|
|
226
|
-
related_fields: dict[str, FieldMetadata] = {}
|
|
227
|
-
|
|
228
|
-
for field in fields:
|
|
229
|
-
if field.many_to_one:
|
|
230
|
-
related_fields.update(
|
|
231
|
-
{f"{field.name}": self._get_field_metadata(model, field)}
|
|
232
|
-
)
|
|
233
|
-
elif field.one_to_many:
|
|
234
|
-
# exclude self reference as it is already included in the many to one
|
|
235
|
-
if field.related_model == model:
|
|
236
|
-
continue
|
|
237
|
-
related_fields.update(
|
|
238
|
-
{f"{field.name}": self._get_field_metadata(model, field.field)}
|
|
239
|
-
)
|
|
240
|
-
elif field.many_to_many:
|
|
241
|
-
related_fields.update(
|
|
242
|
-
{f"{field.name}": self._get_field_metadata(model, field)}
|
|
243
|
-
)
|
|
244
|
-
elif field.one_to_one:
|
|
245
|
-
related_fields.update(
|
|
246
|
-
{f"{field.name}": self._get_field_metadata(model, field)}
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
return related_fields
|
|
250
|
-
|
|
251
|
-
def _get_field_metadata(self, model, field: Field):
|
|
252
|
-
from lamindb.models import IsLink
|
|
253
|
-
|
|
254
|
-
internal_type = field.get_internal_type()
|
|
255
|
-
model_name = field.model._meta.model_name
|
|
256
|
-
relation_type = self._get_relation_type(model, field)
|
|
257
|
-
|
|
258
|
-
schema_name = field.model.__get_module_name__()
|
|
259
|
-
|
|
260
|
-
if field.related_model is None:
|
|
261
|
-
related_model_name = None
|
|
262
|
-
related_schema_name = None
|
|
263
|
-
related_field_name = None
|
|
264
|
-
is_editable = field.editable
|
|
265
|
-
max_length = field.max_length
|
|
266
|
-
else:
|
|
267
|
-
related_model_name = field.related_model._meta.model_name
|
|
268
|
-
related_schema_name = field.related_model.__get_module_name__()
|
|
269
|
-
related_field_name = field.remote_field.name
|
|
270
|
-
is_editable = False
|
|
271
|
-
max_length = None
|
|
272
|
-
|
|
273
|
-
field_name = field.name
|
|
274
|
-
is_primary_key = getattr(field, "primary_key", False)
|
|
275
|
-
|
|
276
|
-
if relation_type in ["one-to-many"]:
|
|
277
|
-
# For a one-to-many relation, the field belong
|
|
278
|
-
# to the other model as a foreign key.
|
|
279
|
-
# To make usage similar to other relation types
|
|
280
|
-
# we need to invert model and related model.
|
|
281
|
-
schema_name, related_schema_name = related_schema_name, schema_name
|
|
282
|
-
model_name, related_model_name = related_model_name, model_name
|
|
283
|
-
field_name, related_field_name = related_field_name, field_name
|
|
284
|
-
pass
|
|
285
|
-
|
|
286
|
-
column = None
|
|
287
|
-
if relation_type not in ["many-to-many", "one-to-many"]:
|
|
288
|
-
if not isinstance(field, ForeignObjectRel):
|
|
289
|
-
column = field.column
|
|
290
|
-
|
|
291
|
-
if relation_type is None:
|
|
292
|
-
through = None
|
|
293
|
-
elif relation_type == "many-to-many":
|
|
294
|
-
through = self._get_through_many_to_many(field)
|
|
295
|
-
else:
|
|
296
|
-
through = self._get_through(field)
|
|
297
|
-
|
|
298
|
-
return FieldMetadata(
|
|
299
|
-
schema_name=schema_name if schema_name != "lamindb" else "core",
|
|
300
|
-
model_name=model_name,
|
|
301
|
-
field_name=field_name,
|
|
302
|
-
type=internal_type,
|
|
303
|
-
is_link_table=issubclass(field.model, IsLink),
|
|
304
|
-
is_primary_key=is_primary_key,
|
|
305
|
-
is_editable=is_editable,
|
|
306
|
-
max_length=max_length,
|
|
307
|
-
column_name=column,
|
|
308
|
-
relation_type=relation_type,
|
|
309
|
-
related_schema_name=related_schema_name
|
|
310
|
-
if related_schema_name != "lamindb"
|
|
311
|
-
else "core",
|
|
312
|
-
related_model_name=related_model_name,
|
|
313
|
-
related_field_name=related_field_name,
|
|
314
|
-
through=through,
|
|
315
|
-
)
|
|
316
|
-
|
|
317
|
-
@staticmethod
|
|
318
|
-
def _get_through_many_to_many(field_or_rel: ManyToManyField | ManyToManyRel):
|
|
319
|
-
from lamindb.models import Registry
|
|
320
|
-
|
|
321
|
-
if isinstance(field_or_rel, ManyToManyField):
|
|
322
|
-
if field_or_rel.model != Registry:
|
|
323
|
-
return Through(
|
|
324
|
-
left_key=field_or_rel.m2m_column_name(),
|
|
325
|
-
right_key=field_or_rel.m2m_reverse_name(),
|
|
326
|
-
link_table_name=field_or_rel.remote_field.through._meta.db_table,
|
|
327
|
-
)
|
|
328
|
-
else:
|
|
329
|
-
return Through(
|
|
330
|
-
left_key=field_or_rel.m2m_reverse_name(),
|
|
331
|
-
right_key=field_or_rel.m2m_column_name(),
|
|
332
|
-
link_table_name=field_or_rel.remote_field.through._meta.db_table,
|
|
333
|
-
)
|
|
334
|
-
|
|
335
|
-
if isinstance(field_or_rel, ManyToManyRel):
|
|
336
|
-
if field_or_rel.model != Registry:
|
|
337
|
-
return Through(
|
|
338
|
-
left_key=field_or_rel.field.m2m_reverse_name(),
|
|
339
|
-
right_key=field_or_rel.field.m2m_column_name(),
|
|
340
|
-
link_table_name=field_or_rel.through._meta.db_table,
|
|
341
|
-
)
|
|
342
|
-
else:
|
|
343
|
-
return Through(
|
|
344
|
-
left_key=field_or_rel.field.m2m_column_name(),
|
|
345
|
-
right_key=field_or_rel.field.m2m_reverse_name(),
|
|
346
|
-
link_table_name=field_or_rel.through._meta.db_table,
|
|
347
|
-
)
|
|
348
|
-
|
|
349
|
-
def _get_through(
|
|
350
|
-
self, field_or_rel: ForeignKey | OneToOneField | ManyToOneRel | OneToOneRel
|
|
351
|
-
):
|
|
352
|
-
if isinstance(field_or_rel, ForeignObjectRel):
|
|
353
|
-
rel_1 = field_or_rel.field.related_fields[0][0]
|
|
354
|
-
rel_2 = field_or_rel.field.related_fields[0][1]
|
|
355
|
-
else:
|
|
356
|
-
rel_1 = field_or_rel.related_fields[0][0]
|
|
357
|
-
rel_2 = field_or_rel.related_fields[0][1]
|
|
358
|
-
|
|
359
|
-
if rel_1.model._meta.model_name == self.model._meta.model_name:
|
|
360
|
-
return Through(
|
|
361
|
-
left_key=rel_1.column,
|
|
362
|
-
right_key=rel_2.column,
|
|
363
|
-
)
|
|
364
|
-
else:
|
|
365
|
-
return Through(
|
|
366
|
-
left_key=rel_2.column,
|
|
367
|
-
right_key=rel_1.column,
|
|
368
|
-
)
|
|
369
|
-
|
|
370
|
-
@staticmethod
|
|
371
|
-
def _get_relation_type(model, field: Field):
|
|
372
|
-
if field.many_to_one:
|
|
373
|
-
# defined in the model
|
|
374
|
-
if model == field.model:
|
|
375
|
-
return "many-to-one"
|
|
376
|
-
# defined in the related model
|
|
377
|
-
else:
|
|
378
|
-
return "one-to-many"
|
|
379
|
-
elif field.one_to_many:
|
|
380
|
-
return "one-to-many"
|
|
381
|
-
elif field.many_to_many:
|
|
382
|
-
return "many-to-many"
|
|
383
|
-
elif field.one_to_one:
|
|
384
|
-
return "one-to-one"
|
|
385
|
-
else:
|
|
386
|
-
return None
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
class _SchemaHandler:
|
|
390
|
-
def __init__(self) -> None:
|
|
391
|
-
self.included_modules = ["core"] + list(settings.instance.modules)
|
|
392
|
-
self.modules = self._get_modules_metadata()
|
|
393
|
-
|
|
394
|
-
def to_dict(self, include_django_objects: bool = True):
|
|
395
|
-
return {
|
|
396
|
-
module_name if module_name != "lamindb" else "core": {
|
|
397
|
-
model_name: model.to_dict(include_django_objects)
|
|
398
|
-
for model_name, model in module.items()
|
|
399
|
-
}
|
|
400
|
-
for module_name, module in self.modules.items()
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
def to_json(self):
|
|
404
|
-
return self.to_dict(include_django_objects=False)
|
|
405
|
-
|
|
406
|
-
def _get_modules_metadata(self):
|
|
407
|
-
from lamindb.models import Registry, SQLRecord
|
|
408
|
-
|
|
409
|
-
all_models = {
|
|
410
|
-
module_name: {
|
|
411
|
-
model._meta.model_name: _ModelHandler(
|
|
412
|
-
model, module_name, self.included_modules
|
|
413
|
-
)
|
|
414
|
-
for model in self._get_schema_module(
|
|
415
|
-
module_name
|
|
416
|
-
).models.__dict__.values()
|
|
417
|
-
if model.__class__ is Registry
|
|
418
|
-
and model is not SQLRecord
|
|
419
|
-
and not model._meta.abstract
|
|
420
|
-
and model.__get_module_name__() == module_name
|
|
421
|
-
}
|
|
422
|
-
for module_name in self.included_modules
|
|
423
|
-
}
|
|
424
|
-
assert all_models
|
|
425
|
-
return all_models
|
|
426
|
-
|
|
427
|
-
def _get_module_set_info(self):
|
|
428
|
-
# TODO: rely on schemamodule table for this
|
|
429
|
-
module_set_info = []
|
|
430
|
-
for module_name in self.included_modules:
|
|
431
|
-
module = self._get_schema_module(module_name)
|
|
432
|
-
if module_name == "lamindb":
|
|
433
|
-
module_name = "core"
|
|
434
|
-
module_set_info.append(
|
|
435
|
-
{"id": 0, "name": module_name, "version": module.__version__}
|
|
436
|
-
)
|
|
437
|
-
return module_set_info
|
|
438
|
-
|
|
439
|
-
@staticmethod
|
|
440
|
-
def _get_schema_module(module_name):
|
|
441
|
-
return importlib.import_module(get_schema_module_name(module_name))
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import importlib
|
|
5
|
+
import json
|
|
6
|
+
from typing import TYPE_CHECKING, Literal
|
|
7
|
+
from uuid import UUID
|
|
8
|
+
|
|
9
|
+
from django.db.models import (
|
|
10
|
+
Field,
|
|
11
|
+
ForeignKey,
|
|
12
|
+
ForeignObjectRel,
|
|
13
|
+
ManyToManyField,
|
|
14
|
+
ManyToManyRel,
|
|
15
|
+
ManyToOneRel,
|
|
16
|
+
OneToOneField,
|
|
17
|
+
OneToOneRel,
|
|
18
|
+
)
|
|
19
|
+
from lamin_utils import logger
|
|
20
|
+
from pydantic import BaseModel
|
|
21
|
+
|
|
22
|
+
from lamindb_setup import settings
|
|
23
|
+
from lamindb_setup._init_instance import get_schema_module_name
|
|
24
|
+
from lamindb_setup.core._hub_client import call_with_fallback_auth
|
|
25
|
+
|
|
26
|
+
# surpress pydantic warning about `model_` namespace
|
|
27
|
+
try:
|
|
28
|
+
BaseModel.model_config["protected_namespaces"] = ()
|
|
29
|
+
logger.debug(
|
|
30
|
+
"pydantic.BaseModel.model_config['protected_namespaces'] has been set to ()"
|
|
31
|
+
)
|
|
32
|
+
except Exception:
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
from supabase import Client
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def update_schema_in_hub(access_token: str | None = None) -> tuple[bool, UUID, dict]:
|
|
41
|
+
return call_with_fallback_auth(_synchronize_schema, access_token=access_token)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _synchronize_schema(client: Client) -> tuple[bool, UUID, dict]:
|
|
45
|
+
schema_metadata = _SchemaHandler()
|
|
46
|
+
schema_metadata_dict = schema_metadata.to_json()
|
|
47
|
+
schema_uuid = _dict_to_uuid(schema_metadata_dict)
|
|
48
|
+
schema = _get_schema_by_id(schema_uuid, client)
|
|
49
|
+
|
|
50
|
+
is_new = schema is None
|
|
51
|
+
if is_new:
|
|
52
|
+
module_set_info = schema_metadata._get_module_set_info()
|
|
53
|
+
module_ids = "-".join(str(module_info["id"]) for module_info in module_set_info)
|
|
54
|
+
schema = (
|
|
55
|
+
client.table("schema")
|
|
56
|
+
.insert(
|
|
57
|
+
{
|
|
58
|
+
"id": schema_uuid.hex,
|
|
59
|
+
"module_ids": module_ids,
|
|
60
|
+
"module_set_info": module_set_info,
|
|
61
|
+
"schema_json": schema_metadata_dict,
|
|
62
|
+
}
|
|
63
|
+
)
|
|
64
|
+
.execute()
|
|
65
|
+
.data[0]
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
instance_response = (
|
|
69
|
+
client.table("instance")
|
|
70
|
+
.update({"schema_id": schema_uuid.hex})
|
|
71
|
+
.eq("id", settings.instance._id.hex)
|
|
72
|
+
.execute()
|
|
73
|
+
)
|
|
74
|
+
assert (
|
|
75
|
+
len(instance_response.data) == 1
|
|
76
|
+
), f"schema of instance {settings.instance._id.hex} could not be updated with schema {schema_uuid.hex}"
|
|
77
|
+
|
|
78
|
+
return is_new, schema_uuid, schema
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def get_schema_by_id(id: UUID):
|
|
82
|
+
return call_with_fallback_auth(_get_schema_by_id, id=id)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _get_schema_by_id(id: UUID, client: Client):
|
|
86
|
+
response = client.table("schema").select("*").eq("id", id.hex).execute()
|
|
87
|
+
if len(response.data) == 0:
|
|
88
|
+
return None
|
|
89
|
+
return response.data[0]
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _dict_to_uuid(dict: dict):
|
|
93
|
+
encoded = json.dumps(dict, sort_keys=True).encode("utf-8")
|
|
94
|
+
hash = hashlib.md5(encoded).digest()
|
|
95
|
+
uuid = UUID(bytes=hash[:16])
|
|
96
|
+
return uuid
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
RelationType = Literal["many-to-one", "one-to-many", "many-to-many", "one-to-one"]
|
|
100
|
+
Type = Literal[
|
|
101
|
+
"ForeignKey",
|
|
102
|
+
# the following are generated with `from django.db import models; [attr for attr in dir(models) if attr.endswith('Field')]`
|
|
103
|
+
"AutoField",
|
|
104
|
+
"BigAutoField",
|
|
105
|
+
"BigIntegerField",
|
|
106
|
+
"BinaryField",
|
|
107
|
+
"BooleanField",
|
|
108
|
+
"CharField",
|
|
109
|
+
"CommaSeparatedIntegerField",
|
|
110
|
+
"DateField",
|
|
111
|
+
"DateTimeField",
|
|
112
|
+
"DecimalField",
|
|
113
|
+
"DurationField",
|
|
114
|
+
"EmailField",
|
|
115
|
+
"Field",
|
|
116
|
+
"FileField",
|
|
117
|
+
"FilePathField",
|
|
118
|
+
"FloatField",
|
|
119
|
+
"GeneratedField",
|
|
120
|
+
"GenericIPAddressField",
|
|
121
|
+
"IPAddressField",
|
|
122
|
+
"ImageField",
|
|
123
|
+
"IntegerField",
|
|
124
|
+
"JSONField",
|
|
125
|
+
"ManyToManyField",
|
|
126
|
+
"NullBooleanField",
|
|
127
|
+
"OneToOneField",
|
|
128
|
+
"PositiveBigIntegerField",
|
|
129
|
+
"PositiveIntegerField",
|
|
130
|
+
"PositiveSmallIntegerField",
|
|
131
|
+
"SlugField",
|
|
132
|
+
"SmallAutoField",
|
|
133
|
+
"SmallIntegerField",
|
|
134
|
+
"TextField",
|
|
135
|
+
"TimeField",
|
|
136
|
+
"URLField",
|
|
137
|
+
"UUIDField",
|
|
138
|
+
]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class Through(BaseModel):
|
|
142
|
+
left_key: str
|
|
143
|
+
right_key: str
|
|
144
|
+
link_table_name: str | None = None
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
class FieldMetadata(BaseModel):
|
|
148
|
+
type: Type
|
|
149
|
+
column_name: str | None = None
|
|
150
|
+
through: Through | None = None
|
|
151
|
+
field_name: str
|
|
152
|
+
model_name: str
|
|
153
|
+
schema_name: str
|
|
154
|
+
is_link_table: bool
|
|
155
|
+
is_primary_key: bool
|
|
156
|
+
is_editable: bool
|
|
157
|
+
max_length: int | None = None
|
|
158
|
+
relation_type: RelationType | None = None
|
|
159
|
+
related_field_name: str | None = None
|
|
160
|
+
related_model_name: str | None = None
|
|
161
|
+
related_schema_name: str | None = None
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class _ModelHandler:
|
|
165
|
+
def __init__(self, model, module_name: str, included_modules: list[str]) -> None:
|
|
166
|
+
from lamindb.models import IsLink
|
|
167
|
+
|
|
168
|
+
self.model = model
|
|
169
|
+
self.class_name = model.__name__
|
|
170
|
+
self.module_name = module_name
|
|
171
|
+
self.model_name = model._meta.model_name
|
|
172
|
+
self.table_name = model._meta.db_table
|
|
173
|
+
self.included_modules = included_modules
|
|
174
|
+
self.fields = self._get_fields_metadata(self.model)
|
|
175
|
+
self.is_link_table = issubclass(model, IsLink)
|
|
176
|
+
self.name_field = model._name_field if hasattr(model, "_name_field") else None
|
|
177
|
+
self.ontology_id_field = (
|
|
178
|
+
model._ontology_id_field if hasattr(model, "_ontology_id_field") else None
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
def to_dict(self, include_django_objects: bool = True):
|
|
182
|
+
_dict = {
|
|
183
|
+
"fields": self.fields.copy(),
|
|
184
|
+
"class_name": self.class_name,
|
|
185
|
+
"table_name": self.table_name,
|
|
186
|
+
"is_link_table": self.is_link_table,
|
|
187
|
+
"name_field": self.name_field,
|
|
188
|
+
"ontology_id_field": self.ontology_id_field,
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
for field_name in self.fields.keys():
|
|
192
|
+
_dict["fields"][field_name] = _dict["fields"][field_name].__dict__
|
|
193
|
+
through = _dict["fields"][field_name]["through"]
|
|
194
|
+
if through is not None:
|
|
195
|
+
_dict["fields"][field_name]["through"] = through.__dict__
|
|
196
|
+
|
|
197
|
+
if include_django_objects:
|
|
198
|
+
_dict.update({"model": self.model})
|
|
199
|
+
|
|
200
|
+
return _dict
|
|
201
|
+
|
|
202
|
+
def _get_fields_metadata(self, model):
|
|
203
|
+
related_fields = []
|
|
204
|
+
fields_metadata: dict[str, FieldMetadata] = {}
|
|
205
|
+
|
|
206
|
+
for field in model._meta.get_fields():
|
|
207
|
+
field_metadata = self._get_field_metadata(model, field)
|
|
208
|
+
if field_metadata.related_schema_name is None:
|
|
209
|
+
fields_metadata.update({field.name: field_metadata})
|
|
210
|
+
|
|
211
|
+
if (
|
|
212
|
+
field_metadata.related_schema_name in self.included_modules
|
|
213
|
+
and field_metadata.schema_name in self.included_modules
|
|
214
|
+
):
|
|
215
|
+
related_fields.append(field)
|
|
216
|
+
|
|
217
|
+
related_fields_metadata = self._get_related_fields_metadata(
|
|
218
|
+
model, related_fields
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
fields_metadata = {**fields_metadata, **related_fields_metadata}
|
|
222
|
+
|
|
223
|
+
return fields_metadata
|
|
224
|
+
|
|
225
|
+
def _get_related_fields_metadata(self, model, fields: list[ForeignObjectRel]):
|
|
226
|
+
related_fields: dict[str, FieldMetadata] = {}
|
|
227
|
+
|
|
228
|
+
for field in fields:
|
|
229
|
+
if field.many_to_one:
|
|
230
|
+
related_fields.update(
|
|
231
|
+
{f"{field.name}": self._get_field_metadata(model, field)}
|
|
232
|
+
)
|
|
233
|
+
elif field.one_to_many:
|
|
234
|
+
# exclude self reference as it is already included in the many to one
|
|
235
|
+
if field.related_model == model:
|
|
236
|
+
continue
|
|
237
|
+
related_fields.update(
|
|
238
|
+
{f"{field.name}": self._get_field_metadata(model, field.field)}
|
|
239
|
+
)
|
|
240
|
+
elif field.many_to_many:
|
|
241
|
+
related_fields.update(
|
|
242
|
+
{f"{field.name}": self._get_field_metadata(model, field)}
|
|
243
|
+
)
|
|
244
|
+
elif field.one_to_one:
|
|
245
|
+
related_fields.update(
|
|
246
|
+
{f"{field.name}": self._get_field_metadata(model, field)}
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return related_fields
|
|
250
|
+
|
|
251
|
+
def _get_field_metadata(self, model, field: Field):
|
|
252
|
+
from lamindb.models import IsLink
|
|
253
|
+
|
|
254
|
+
internal_type = field.get_internal_type()
|
|
255
|
+
model_name = field.model._meta.model_name
|
|
256
|
+
relation_type = self._get_relation_type(model, field)
|
|
257
|
+
|
|
258
|
+
schema_name = field.model.__get_module_name__()
|
|
259
|
+
|
|
260
|
+
if field.related_model is None:
|
|
261
|
+
related_model_name = None
|
|
262
|
+
related_schema_name = None
|
|
263
|
+
related_field_name = None
|
|
264
|
+
is_editable = field.editable
|
|
265
|
+
max_length = field.max_length
|
|
266
|
+
else:
|
|
267
|
+
related_model_name = field.related_model._meta.model_name
|
|
268
|
+
related_schema_name = field.related_model.__get_module_name__()
|
|
269
|
+
related_field_name = field.remote_field.name
|
|
270
|
+
is_editable = False
|
|
271
|
+
max_length = None
|
|
272
|
+
|
|
273
|
+
field_name = field.name
|
|
274
|
+
is_primary_key = getattr(field, "primary_key", False)
|
|
275
|
+
|
|
276
|
+
if relation_type in ["one-to-many"]:
|
|
277
|
+
# For a one-to-many relation, the field belong
|
|
278
|
+
# to the other model as a foreign key.
|
|
279
|
+
# To make usage similar to other relation types
|
|
280
|
+
# we need to invert model and related model.
|
|
281
|
+
schema_name, related_schema_name = related_schema_name, schema_name
|
|
282
|
+
model_name, related_model_name = related_model_name, model_name
|
|
283
|
+
field_name, related_field_name = related_field_name, field_name
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
column = None
|
|
287
|
+
if relation_type not in ["many-to-many", "one-to-many"]:
|
|
288
|
+
if not isinstance(field, ForeignObjectRel):
|
|
289
|
+
column = field.column
|
|
290
|
+
|
|
291
|
+
if relation_type is None:
|
|
292
|
+
through = None
|
|
293
|
+
elif relation_type == "many-to-many":
|
|
294
|
+
through = self._get_through_many_to_many(field)
|
|
295
|
+
else:
|
|
296
|
+
through = self._get_through(field)
|
|
297
|
+
|
|
298
|
+
return FieldMetadata(
|
|
299
|
+
schema_name=schema_name if schema_name != "lamindb" else "core",
|
|
300
|
+
model_name=model_name,
|
|
301
|
+
field_name=field_name,
|
|
302
|
+
type=internal_type,
|
|
303
|
+
is_link_table=issubclass(field.model, IsLink),
|
|
304
|
+
is_primary_key=is_primary_key,
|
|
305
|
+
is_editable=is_editable,
|
|
306
|
+
max_length=max_length,
|
|
307
|
+
column_name=column,
|
|
308
|
+
relation_type=relation_type,
|
|
309
|
+
related_schema_name=related_schema_name
|
|
310
|
+
if related_schema_name != "lamindb"
|
|
311
|
+
else "core",
|
|
312
|
+
related_model_name=related_model_name,
|
|
313
|
+
related_field_name=related_field_name,
|
|
314
|
+
through=through,
|
|
315
|
+
)
|
|
316
|
+
|
|
317
|
+
@staticmethod
|
|
318
|
+
def _get_through_many_to_many(field_or_rel: ManyToManyField | ManyToManyRel):
|
|
319
|
+
from lamindb.models import Registry
|
|
320
|
+
|
|
321
|
+
if isinstance(field_or_rel, ManyToManyField):
|
|
322
|
+
if field_or_rel.model != Registry:
|
|
323
|
+
return Through(
|
|
324
|
+
left_key=field_or_rel.m2m_column_name(),
|
|
325
|
+
right_key=field_or_rel.m2m_reverse_name(),
|
|
326
|
+
link_table_name=field_or_rel.remote_field.through._meta.db_table,
|
|
327
|
+
)
|
|
328
|
+
else:
|
|
329
|
+
return Through(
|
|
330
|
+
left_key=field_or_rel.m2m_reverse_name(),
|
|
331
|
+
right_key=field_or_rel.m2m_column_name(),
|
|
332
|
+
link_table_name=field_or_rel.remote_field.through._meta.db_table,
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
if isinstance(field_or_rel, ManyToManyRel):
|
|
336
|
+
if field_or_rel.model != Registry:
|
|
337
|
+
return Through(
|
|
338
|
+
left_key=field_or_rel.field.m2m_reverse_name(),
|
|
339
|
+
right_key=field_or_rel.field.m2m_column_name(),
|
|
340
|
+
link_table_name=field_or_rel.through._meta.db_table,
|
|
341
|
+
)
|
|
342
|
+
else:
|
|
343
|
+
return Through(
|
|
344
|
+
left_key=field_or_rel.field.m2m_column_name(),
|
|
345
|
+
right_key=field_or_rel.field.m2m_reverse_name(),
|
|
346
|
+
link_table_name=field_or_rel.through._meta.db_table,
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
def _get_through(
|
|
350
|
+
self, field_or_rel: ForeignKey | OneToOneField | ManyToOneRel | OneToOneRel
|
|
351
|
+
):
|
|
352
|
+
if isinstance(field_or_rel, ForeignObjectRel):
|
|
353
|
+
rel_1 = field_or_rel.field.related_fields[0][0]
|
|
354
|
+
rel_2 = field_or_rel.field.related_fields[0][1]
|
|
355
|
+
else:
|
|
356
|
+
rel_1 = field_or_rel.related_fields[0][0]
|
|
357
|
+
rel_2 = field_or_rel.related_fields[0][1]
|
|
358
|
+
|
|
359
|
+
if rel_1.model._meta.model_name == self.model._meta.model_name:
|
|
360
|
+
return Through(
|
|
361
|
+
left_key=rel_1.column,
|
|
362
|
+
right_key=rel_2.column,
|
|
363
|
+
)
|
|
364
|
+
else:
|
|
365
|
+
return Through(
|
|
366
|
+
left_key=rel_2.column,
|
|
367
|
+
right_key=rel_1.column,
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
@staticmethod
|
|
371
|
+
def _get_relation_type(model, field: Field):
|
|
372
|
+
if field.many_to_one:
|
|
373
|
+
# defined in the model
|
|
374
|
+
if model == field.model:
|
|
375
|
+
return "many-to-one"
|
|
376
|
+
# defined in the related model
|
|
377
|
+
else:
|
|
378
|
+
return "one-to-many"
|
|
379
|
+
elif field.one_to_many:
|
|
380
|
+
return "one-to-many"
|
|
381
|
+
elif field.many_to_many:
|
|
382
|
+
return "many-to-many"
|
|
383
|
+
elif field.one_to_one:
|
|
384
|
+
return "one-to-one"
|
|
385
|
+
else:
|
|
386
|
+
return None
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
class _SchemaHandler:
|
|
390
|
+
def __init__(self) -> None:
|
|
391
|
+
self.included_modules = ["core"] + list(settings.instance.modules)
|
|
392
|
+
self.modules = self._get_modules_metadata()
|
|
393
|
+
|
|
394
|
+
def to_dict(self, include_django_objects: bool = True):
|
|
395
|
+
return {
|
|
396
|
+
module_name if module_name != "lamindb" else "core": {
|
|
397
|
+
model_name: model.to_dict(include_django_objects)
|
|
398
|
+
for model_name, model in module.items()
|
|
399
|
+
}
|
|
400
|
+
for module_name, module in self.modules.items()
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
def to_json(self):
|
|
404
|
+
return self.to_dict(include_django_objects=False)
|
|
405
|
+
|
|
406
|
+
def _get_modules_metadata(self):
|
|
407
|
+
from lamindb.models import Registry, SQLRecord
|
|
408
|
+
|
|
409
|
+
all_models = {
|
|
410
|
+
module_name: {
|
|
411
|
+
model._meta.model_name: _ModelHandler(
|
|
412
|
+
model, module_name, self.included_modules
|
|
413
|
+
)
|
|
414
|
+
for model in self._get_schema_module(
|
|
415
|
+
module_name
|
|
416
|
+
).models.__dict__.values()
|
|
417
|
+
if model.__class__ is Registry
|
|
418
|
+
and model is not SQLRecord
|
|
419
|
+
and not model._meta.abstract
|
|
420
|
+
and model.__get_module_name__() == module_name
|
|
421
|
+
}
|
|
422
|
+
for module_name in self.included_modules
|
|
423
|
+
}
|
|
424
|
+
assert all_models
|
|
425
|
+
return all_models
|
|
426
|
+
|
|
427
|
+
def _get_module_set_info(self):
|
|
428
|
+
# TODO: rely on schemamodule table for this
|
|
429
|
+
module_set_info = []
|
|
430
|
+
for module_name in self.included_modules:
|
|
431
|
+
module = self._get_schema_module(module_name)
|
|
432
|
+
if module_name == "lamindb":
|
|
433
|
+
module_name = "core"
|
|
434
|
+
module_set_info.append(
|
|
435
|
+
{"id": 0, "name": module_name, "version": module.__version__}
|
|
436
|
+
)
|
|
437
|
+
return module_set_info
|
|
438
|
+
|
|
439
|
+
@staticmethod
|
|
440
|
+
def _get_schema_module(module_name):
|
|
441
|
+
return importlib.import_module(get_schema_module_name(module_name))
|