lamindb 1.0.5__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +17 -6
- lamindb/_artifact.py +202 -87
- lamindb/_can_curate.py +27 -8
- lamindb/_collection.py +86 -52
- lamindb/_feature.py +177 -41
- lamindb/_finish.py +21 -7
- lamindb/_from_values.py +83 -98
- lamindb/_parents.py +4 -4
- lamindb/_query_set.py +78 -18
- lamindb/_record.py +170 -53
- lamindb/_run.py +4 -4
- lamindb/_save.py +42 -11
- lamindb/_schema.py +135 -38
- lamindb/_storage.py +1 -1
- lamindb/_tracked.py +129 -0
- lamindb/_transform.py +21 -8
- lamindb/_ulabel.py +5 -14
- lamindb/base/users.py +1 -4
- lamindb/base/validation.py +2 -6
- lamindb/core/__init__.py +13 -14
- lamindb/core/_context.py +14 -9
- lamindb/core/_data.py +29 -25
- lamindb/core/_describe.py +1 -1
- lamindb/core/_django.py +1 -1
- lamindb/core/_feature_manager.py +53 -43
- lamindb/core/_label_manager.py +4 -4
- lamindb/core/_mapped_collection.py +24 -9
- lamindb/core/_track_environment.py +2 -1
- lamindb/core/datasets/__init__.py +6 -1
- lamindb/core/datasets/_core.py +12 -11
- lamindb/core/datasets/_small.py +67 -21
- lamindb/core/exceptions.py +1 -90
- lamindb/core/loaders.py +21 -15
- lamindb/core/relations.py +6 -4
- lamindb/core/storage/_anndata_accessor.py +49 -3
- lamindb/core/storage/_backed_access.py +12 -7
- lamindb/core/storage/_pyarrow_dataset.py +40 -15
- lamindb/core/storage/_tiledbsoma.py +56 -12
- lamindb/core/storage/paths.py +30 -24
- lamindb/core/subsettings/_creation_settings.py +4 -16
- lamindb/curators/__init__.py +2193 -846
- lamindb/curators/_cellxgene_schemas/__init__.py +26 -0
- lamindb/curators/_cellxgene_schemas/schema_versions.yml +104 -0
- lamindb/errors.py +96 -0
- lamindb/integrations/_vitessce.py +3 -3
- lamindb/migrations/0069_squashed.py +76 -75
- lamindb/migrations/0075_lamindbv1_part5.py +4 -5
- lamindb/migrations/0082_alter_feature_dtype.py +21 -0
- lamindb/migrations/0083_alter_feature_is_type_alter_flextable_is_type_and_more.py +94 -0
- lamindb/migrations/0084_alter_schemafeature_feature_and_more.py +35 -0
- lamindb/migrations/0085_alter_feature_is_type_alter_flextable_is_type_and_more.py +63 -0
- lamindb/migrations/0086_various.py +95 -0
- lamindb/migrations/0087_rename__schemas_m2m_artifact_feature_sets_and_more.py +41 -0
- lamindb/migrations/0088_schema_components.py +273 -0
- lamindb/migrations/0088_squashed.py +4372 -0
- lamindb/models.py +475 -168
- {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/METADATA +9 -7
- lamindb-1.1.1.dist-info/RECORD +95 -0
- lamindb/curators/_spatial.py +0 -528
- lamindb/migrations/0052_squashed.py +0 -1261
- lamindb/migrations/0053_alter_featureset_hash_alter_paramvalue_created_by_and_more.py +0 -57
- lamindb/migrations/0054_alter_feature_previous_runs_and_more.py +0 -35
- lamindb/migrations/0055_artifact_type_artifactparamvalue_and_more.py +0 -61
- lamindb/migrations/0056_rename_ulabel_ref_is_name_artifactulabel_label_ref_is_name_and_more.py +0 -22
- lamindb/migrations/0057_link_models_latest_report_and_others.py +0 -356
- lamindb/migrations/0058_artifact__actions_collection__actions.py +0 -22
- lamindb/migrations/0059_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -31
- lamindb/migrations/0060_alter_artifact__actions.py +0 -22
- lamindb/migrations/0061_alter_collection_meta_artifact_alter_run_environment_and_more.py +0 -45
- lamindb/migrations/0062_add_is_latest_field.py +0 -32
- lamindb/migrations/0063_populate_latest_field.py +0 -45
- lamindb/migrations/0064_alter_artifact_version_alter_collection_version_and_more.py +0 -33
- lamindb/migrations/0065_remove_collection_feature_sets_and_more.py +0 -22
- lamindb/migrations/0066_alter_artifact__feature_values_and_more.py +0 -352
- lamindb/migrations/0067_alter_featurevalue_unique_together_and_more.py +0 -20
- lamindb/migrations/0068_alter_artifactulabel_unique_together_and_more.py +0 -20
- lamindb/migrations/0069_alter_artifact__accessor_alter_artifact__hash_type_and_more.py +0 -1294
- lamindb-1.0.5.dist-info/RECORD +0 -102
- {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/LICENSE +0 -0
- {lamindb-1.0.5.dist-info → lamindb-1.1.1.dist-info}/WHEEL +0 -0
lamindb/_finish.py
CHANGED
@@ -96,7 +96,7 @@ def save_run_logs(run: Run, save_run: bool = False) -> None:
|
|
96
96
|
if logs_path.exists():
|
97
97
|
if run.report is not None:
|
98
98
|
logger.important("overwriting run.report")
|
99
|
-
artifact = Artifact(
|
99
|
+
artifact = Artifact( # type: ignore
|
100
100
|
logs_path,
|
101
101
|
description=f"log streams of run {run.uid}",
|
102
102
|
_branch_code=0,
|
@@ -159,7 +159,7 @@ def notebook_to_report(notebook_path: Path, output_path: Path) -> None:
|
|
159
159
|
output_path.write_text(html, encoding="utf-8")
|
160
160
|
|
161
161
|
|
162
|
-
def notebook_to_script(
|
162
|
+
def notebook_to_script( # type: ignore
|
163
163
|
transform: Transform, notebook_path: Path, script_path: Path | None = None
|
164
164
|
) -> None | str:
|
165
165
|
import jupytext
|
@@ -207,8 +207,13 @@ def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
|
|
207
207
|
|
208
208
|
|
209
209
|
def check_filepath_recently_saved(filepath: Path, is_finish_retry: bool) -> bool:
|
210
|
-
recently_saved_time
|
210
|
+
# the recently_saved_time needs to be very low for the first check
|
211
|
+
# because an accidental save (e.g. via auto-save) might otherwise lead
|
212
|
+
# to upload of an outdated notebook
|
213
|
+
# also see implementation for R notebooks below
|
214
|
+
offset_saved_time = 0.3 if not is_finish_retry else 20
|
211
215
|
for retry in range(30):
|
216
|
+
recently_saved_time = offset_saved_time + retry # sleep time is 1 sec
|
212
217
|
if get_seconds_since_modified(filepath) > recently_saved_time:
|
213
218
|
if retry == 0:
|
214
219
|
prefix = f"{LEVEL_TO_COLORS[20]}{LEVEL_TO_ICONS[20]}{RESET_COLOR}"
|
@@ -316,7 +321,8 @@ def save_context_core(
|
|
316
321
|
f"no html report found; to attach one, create an .html export for your {filepath.suffix} file and then run: lamin save {filepath}"
|
317
322
|
)
|
318
323
|
if report_path is not None and is_r_notebook and not from_cli: # R notebooks
|
319
|
-
|
324
|
+
# see comment above in check_filepath_recently_saved
|
325
|
+
recently_saved_time = 0.3 if not is_retry else 20
|
320
326
|
if get_seconds_since_modified(report_path) > recently_saved_time:
|
321
327
|
# the automated retry solution of Jupyter notebooks does not work in RStudio because the execution of the notebook cell
|
322
328
|
# seems to block the event loop of the frontend
|
@@ -365,7 +371,7 @@ def save_context_core(
|
|
365
371
|
artifact = ln.Artifact.filter(hash=hash, _branch_code=0).one_or_none()
|
366
372
|
new_env_artifact = artifact is None
|
367
373
|
if new_env_artifact:
|
368
|
-
artifact = ln.Artifact(
|
374
|
+
artifact = ln.Artifact( # type: ignore
|
369
375
|
env_path,
|
370
376
|
description="requirements.txt",
|
371
377
|
_branch_code=0,
|
@@ -411,7 +417,7 @@ def save_context_core(
|
|
411
417
|
else:
|
412
418
|
logger.important("report is already saved")
|
413
419
|
else:
|
414
|
-
report_file = ln.Artifact(
|
420
|
+
report_file = ln.Artifact( # type: ignore
|
415
421
|
report_path,
|
416
422
|
description=f"Report of run {run.uid}",
|
417
423
|
_branch_code=0, # hidden file
|
@@ -430,7 +436,15 @@ def save_context_core(
|
|
430
436
|
# save both run & transform records if we arrive here
|
431
437
|
if run is not None:
|
432
438
|
run.save()
|
433
|
-
transform.
|
439
|
+
transform_id_prior_to_save = transform.id
|
440
|
+
transform.save() # this in-place updates the state of transform upon hash collision
|
441
|
+
if transform.id != transform_id_prior_to_save:
|
442
|
+
# the hash existed and we're actually back to the previous version
|
443
|
+
# hence, this was in fact a run of the previous transform rather than of
|
444
|
+
# the new transform
|
445
|
+
# this can happen in interactive notebooks if the user makes no change to the notebook
|
446
|
+
run.transform = transform
|
447
|
+
run.save()
|
434
448
|
|
435
449
|
# finalize
|
436
450
|
if not from_cli and run is not None:
|
lamindb/_from_values.py
CHANGED
@@ -9,8 +9,6 @@ from lamin_utils import colors, logger
|
|
9
9
|
from lamindb._query_set import RecordList
|
10
10
|
from lamindb.models import Record
|
11
11
|
|
12
|
-
from .core._settings import settings
|
13
|
-
|
14
12
|
if TYPE_CHECKING:
|
15
13
|
from collections.abc import Iterable
|
16
14
|
|
@@ -29,88 +27,72 @@ def get_or_create_records(
|
|
29
27
|
mute: bool = False,
|
30
28
|
) -> RecordList:
|
31
29
|
"""Get or create records from iterables."""
|
32
|
-
registry = field.field.model
|
30
|
+
registry = field.field.model # type: ignore
|
33
31
|
if create:
|
34
|
-
return RecordList([registry(**{field.field.name: value}) for value in iterable])
|
35
|
-
creation_search_names = settings.creation.search_names
|
32
|
+
return RecordList([registry(**{field.field.name: value}) for value in iterable]) # type: ignore
|
36
33
|
organism = _get_organism_record(field, organism)
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
mute=mute,
|
47
|
-
)
|
34
|
+
iterable_idx = index_iterable(iterable)
|
35
|
+
|
36
|
+
# returns existing records & non-existing values
|
37
|
+
records, nonexist_values, msg = get_existing_records(
|
38
|
+
iterable_idx=iterable_idx,
|
39
|
+
field=field,
|
40
|
+
organism=organism,
|
41
|
+
mute=mute,
|
42
|
+
)
|
48
43
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
44
|
+
# new records to be created based on new values
|
45
|
+
if len(nonexist_values) > 0:
|
46
|
+
source_record = None
|
47
|
+
if from_source:
|
48
|
+
if isinstance(source, Record):
|
49
|
+
source_record = source
|
50
|
+
if not source_record and hasattr(registry, "public"):
|
51
|
+
if organism is None:
|
52
|
+
organism = _ensembl_prefix(nonexist_values[0], field, organism)
|
53
|
+
organism = _get_organism_record(field, organism, force=True)
|
54
|
+
|
55
|
+
if source_record:
|
56
|
+
from bionty.core._add_ontology import check_source_in_db
|
57
|
+
|
58
|
+
check_source_in_db(registry=registry, source=source_record)
|
59
|
+
|
60
|
+
from_source = not source_record.in_db
|
61
|
+
elif hasattr(registry, "source_id"):
|
62
|
+
from_source = True
|
63
|
+
else:
|
64
|
+
from_source = False
|
65
|
+
|
66
|
+
if from_source:
|
67
|
+
records_bionty, unmapped_values = create_records_from_source(
|
68
|
+
iterable_idx=nonexist_values,
|
69
|
+
field=field,
|
70
|
+
organism=organism,
|
71
|
+
source=source_record,
|
72
|
+
msg=msg,
|
73
|
+
mute=mute,
|
74
|
+
)
|
75
|
+
if len(records_bionty) > 0:
|
76
|
+
msg = ""
|
77
|
+
for record in records_bionty:
|
78
|
+
record._from_source = True
|
79
|
+
records += records_bionty
|
80
|
+
else:
|
81
|
+
unmapped_values = nonexist_values
|
82
|
+
# unmapped new_ids will NOT create records
|
83
|
+
if len(unmapped_values) > 0:
|
84
|
+
if len(msg) > 0 and not mute:
|
85
|
+
logger.success(msg)
|
86
|
+
s = "" if len(unmapped_values) == 1 else "s"
|
87
|
+
print_values = colors.yellow(_format_values(unmapped_values))
|
88
|
+
name = registry.__name__
|
89
|
+
n_nonval = colors.yellow(f"{len(unmapped_values)} non-validated")
|
90
|
+
if not mute:
|
91
|
+
logger.warning(
|
92
|
+
f"{colors.red('did not create')} {name} record{s} for "
|
93
|
+
f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}" # type: ignore
|
79
94
|
)
|
80
|
-
|
81
|
-
msg = ""
|
82
|
-
for record in records_bionty:
|
83
|
-
record._from_source = True
|
84
|
-
records += records_bionty
|
85
|
-
else:
|
86
|
-
unmapped_values = nonexist_values
|
87
|
-
# unmapped new_ids will NOT create records
|
88
|
-
if len(unmapped_values) > 0:
|
89
|
-
if len(msg) > 0 and not mute:
|
90
|
-
logger.success(msg)
|
91
|
-
s = "" if len(unmapped_values) == 1 else "s"
|
92
|
-
print_values = colors.yellow(_format_values(unmapped_values))
|
93
|
-
name = registry.__name__
|
94
|
-
n_nonval = colors.yellow(f"{len(unmapped_values)} non-validated")
|
95
|
-
if not mute:
|
96
|
-
logger.warning(
|
97
|
-
f"{colors.red('did not create')} {name} record{s} for "
|
98
|
-
f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}"
|
99
|
-
)
|
100
|
-
# if registry.__get_module_name__() == "bionty" or registry == ULabel:
|
101
|
-
# if isinstance(iterable, pd.Series):
|
102
|
-
# feature = iterable.name
|
103
|
-
# feature_name = None
|
104
|
-
# if isinstance(feature, str):
|
105
|
-
# feature_name = feature
|
106
|
-
# if feature_name is not None:
|
107
|
-
# if feature_name is not None:
|
108
|
-
# for record in records:
|
109
|
-
# record._feature = feature_name
|
110
|
-
# logger.debug(f"added default feature '{feature_name}'")
|
111
|
-
return RecordList(records)
|
112
|
-
finally:
|
113
|
-
settings.creation.search_names = creation_search_names
|
95
|
+
return RecordList(records)
|
114
96
|
|
115
97
|
|
116
98
|
def get_existing_records(
|
@@ -120,10 +102,10 @@ def get_existing_records(
|
|
120
102
|
mute: bool = False,
|
121
103
|
):
|
122
104
|
# NOTE: existing records matching is agnostic to the source
|
123
|
-
model = field.field.model
|
124
|
-
if organism is None and field.field.name == "ensembl_gene_id":
|
105
|
+
model = field.field.model # type: ignore
|
106
|
+
if organism is None and field.field.name == "ensembl_gene_id": # type: ignore
|
125
107
|
if len(iterable_idx) > 0:
|
126
|
-
organism = _ensembl_prefix(iterable_idx[0], field, organism)
|
108
|
+
organism = _ensembl_prefix(iterable_idx[0], field, organism) # type: ignore
|
127
109
|
organism = _get_organism_record(field, organism, force=True)
|
128
110
|
|
129
111
|
# standardize based on the DB reference
|
@@ -152,6 +134,7 @@ def get_existing_records(
|
|
152
134
|
is_validated = model.validate(
|
153
135
|
iterable_idx, field=field, organism=organism, mute=True
|
154
136
|
)
|
137
|
+
|
155
138
|
if len(is_validated) > 0:
|
156
139
|
validated = iterable_idx[is_validated]
|
157
140
|
else:
|
@@ -165,7 +148,7 @@ def get_existing_records(
|
|
165
148
|
msg = (
|
166
149
|
"loaded"
|
167
150
|
f" {colors.green(f'{len(validated)} {model.__name__} record{s}')}"
|
168
|
-
f" matching {colors.italic(f'{field.field.name}')}: {print_values}"
|
151
|
+
f" matching {colors.italic(f'{field.field.name}')}: {print_values}" # type: ignore
|
169
152
|
)
|
170
153
|
if len(syn_mapper) > 0:
|
171
154
|
s = "" if len(syn_mapper) == 1 else "s"
|
@@ -189,7 +172,7 @@ def get_existing_records(
|
|
189
172
|
# get all existing records in the db
|
190
173
|
# if necessary, create records for the values in kwargs
|
191
174
|
# k:v -> k:v_record
|
192
|
-
query = {f"{field.field.name}__in": iterable_idx.values}
|
175
|
+
query = {f"{field.field.name}__in": iterable_idx.values} # type: ignore
|
193
176
|
if organism is not None:
|
194
177
|
query["organism"] = organism
|
195
178
|
records = model.filter(**query).list()
|
@@ -209,7 +192,7 @@ def create_records_from_source(
|
|
209
192
|
msg: str = "",
|
210
193
|
mute: bool = False,
|
211
194
|
):
|
212
|
-
model = field.field.model
|
195
|
+
model = field.field.model # type: ignore
|
213
196
|
records: list = []
|
214
197
|
# populate additional fields from bionty
|
215
198
|
from bionty._bionty import get_source_record
|
@@ -232,11 +215,11 @@ def create_records_from_source(
|
|
232
215
|
# standardize in the bionty reference
|
233
216
|
# do not inspect synonyms if the field is not name field
|
234
217
|
inspect_synonyms = True
|
235
|
-
if hasattr(model, "_name_field") and field.field.name != model._name_field:
|
218
|
+
if hasattr(model, "_name_field") and field.field.name != model._name_field: # type: ignore
|
236
219
|
inspect_synonyms = False
|
237
220
|
result = public_ontology.inspect(
|
238
221
|
iterable_idx,
|
239
|
-
field=field.field.name,
|
222
|
+
field=field.field.name, # type: ignore
|
240
223
|
mute=True,
|
241
224
|
inspect_synonyms=inspect_synonyms,
|
242
225
|
)
|
@@ -257,12 +240,14 @@ def create_records_from_source(
|
|
257
240
|
|
258
241
|
# create records for values that are found in the bionty reference
|
259
242
|
# matching either field or synonyms
|
260
|
-
mapped_values = iterable_idx.intersection(bionty_df[field.field.name])
|
243
|
+
mapped_values = iterable_idx.intersection(bionty_df[field.field.name]) # type: ignore
|
261
244
|
|
262
245
|
multi_msg = ""
|
263
246
|
if len(mapped_values) > 0:
|
264
247
|
bionty_kwargs, multi_msg = _bulk_create_dicts_from_df(
|
265
|
-
keys=mapped_values,
|
248
|
+
keys=mapped_values,
|
249
|
+
column_name=field.field.name, # type: ignore
|
250
|
+
df=bionty_df,
|
266
251
|
)
|
267
252
|
|
268
253
|
if hasattr(model, "organism_id") and organism is None:
|
@@ -274,7 +259,7 @@ def create_records_from_source(
|
|
274
259
|
else {"source": source}
|
275
260
|
)
|
276
261
|
for bk in bionty_kwargs:
|
277
|
-
records.append(model(**bk, **create_kwargs))
|
262
|
+
records.append(model(**bk, **create_kwargs, _skip_validation=True))
|
278
263
|
|
279
264
|
# number of records that matches field (not synonyms)
|
280
265
|
validated = result.validated
|
@@ -288,7 +273,7 @@ def create_records_from_source(
|
|
288
273
|
logger.success(
|
289
274
|
"created"
|
290
275
|
f" {colors.purple(f'{len(validated)} {model.__name__} record{s} from Bionty')}"
|
291
|
-
f" matching {colors.italic(f'{field.field.name}')}: {print_values}"
|
276
|
+
f" matching {colors.italic(f'{field.field.name}')}: {print_values}" # type: ignore
|
292
277
|
)
|
293
278
|
|
294
279
|
# make sure that synonyms logging appears after the field logging
|
@@ -365,7 +350,7 @@ def _has_organism_field(registry: type[Record]) -> bool:
|
|
365
350
|
return False
|
366
351
|
|
367
352
|
|
368
|
-
def _get_organism_record(
|
353
|
+
def _get_organism_record( # type: ignore
|
369
354
|
field: StrField, organism: str | Record, force: bool = False
|
370
355
|
) -> Record:
|
371
356
|
"""Get organism record.
|
@@ -375,10 +360,10 @@ def _get_organism_record(
|
|
375
360
|
organism: the organism to get the record for
|
376
361
|
force: whether to force fetching the organism record
|
377
362
|
"""
|
378
|
-
registry = field.field.model
|
363
|
+
registry = field.field.model # type: ignore
|
379
364
|
check = True
|
380
365
|
if not force and hasattr(registry, "_ontology_id_field"):
|
381
|
-
check = field.field.name != registry._ontology_id_field
|
366
|
+
check = field.field.name != registry._ontology_id_field # type: ignore
|
382
367
|
# e.g. bionty.CellMarker has "name" as _ontology_id_field
|
383
368
|
if not registry._ontology_id_field.endswith("id"):
|
384
369
|
check = True
|
@@ -397,10 +382,10 @@ def _get_organism_record(
|
|
397
382
|
|
398
383
|
|
399
384
|
def _ensembl_prefix(id: str, field: StrField, organism: Record | None) -> str | None:
|
400
|
-
if field.field.name == "ensembl_gene_id" and organism is None:
|
385
|
+
if field.field.name == "ensembl_gene_id" and organism is None: # type: ignore
|
401
386
|
if id.startswith("ENSG"):
|
402
|
-
organism = "human"
|
387
|
+
organism = "human" # type: ignore
|
403
388
|
elif id.startswith("ENSMUSG"):
|
404
|
-
organism = "mouse"
|
389
|
+
organism = "mouse" # type: ignore
|
405
390
|
|
406
391
|
return organism
|
lamindb/_parents.py
CHANGED
@@ -44,7 +44,7 @@ def _query_relatives(
|
|
44
44
|
kind: Literal["parents", "children"],
|
45
45
|
cls: type[HasParents],
|
46
46
|
) -> QuerySet:
|
47
|
-
relatives = cls.objects.none()
|
47
|
+
relatives = cls.objects.none() # type: ignore
|
48
48
|
if len(records) == 0:
|
49
49
|
return relatives
|
50
50
|
for record in records:
|
@@ -350,9 +350,9 @@ def _record_label(record: Record, field: str | None = None):
|
|
350
350
|
)
|
351
351
|
elif isinstance(record, Run):
|
352
352
|
if record.transform.description:
|
353
|
-
name = f
|
353
|
+
name = f"{record.transform.description.replace('&', '&')}"
|
354
354
|
elif record.transform.key:
|
355
|
-
name = f
|
355
|
+
name = f"{record.transform.key.replace('&', '&')}"
|
356
356
|
else:
|
357
357
|
name = f"{record.transform.uid}"
|
358
358
|
user_display = (
|
@@ -366,7 +366,7 @@ def _record_label(record: Record, field: str | None = None):
|
|
366
366
|
rf" user={user_display}<BR/>run={format_field_value(record.started_at)}</FONT>>"
|
367
367
|
)
|
368
368
|
elif isinstance(record, Transform):
|
369
|
-
name = f
|
369
|
+
name = f"{record.name.replace('&', '&')}"
|
370
370
|
return (
|
371
371
|
rf'<{TRANSFORM_EMOJIS.get(str(record.type), "💫")} {name}<BR/><FONT COLOR="GREY" POINT-SIZE="10"'
|
372
372
|
rf' FACE="Monospace">uid={record.uid}<BR/>type={record.type},'
|
lamindb/_query_set.py
CHANGED
@@ -8,6 +8,7 @@ from collections.abc import Iterable as IterableType
|
|
8
8
|
from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
|
9
9
|
|
10
10
|
import pandas as pd
|
11
|
+
from django.core.exceptions import FieldError
|
11
12
|
from django.db import models
|
12
13
|
from django.db.models import F, ForeignKey, ManyToManyField
|
13
14
|
from django.db.models.fields.related import ForeignObjectRel
|
@@ -26,7 +27,7 @@ from lamindb.models import (
|
|
26
27
|
Transform,
|
27
28
|
)
|
28
29
|
|
29
|
-
from .
|
30
|
+
from .errors import DoesNotExist
|
30
31
|
|
31
32
|
T = TypeVar("T")
|
32
33
|
|
@@ -91,14 +92,12 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
|
|
91
92
|
"n_objects": "n_files",
|
92
93
|
"visibility": "_branch_code", # for convenience (and backward compat <1.0)
|
93
94
|
"transform": "run__transform", # for convenience (and backward compat <1.0)
|
94
|
-
"feature_sets": "_schemas_m2m",
|
95
95
|
"type": "kind",
|
96
96
|
"_accessor": "otype",
|
97
97
|
}
|
98
98
|
elif queryset.model == Schema:
|
99
99
|
name_mappings = {
|
100
100
|
"registry": "itype",
|
101
|
-
"artifacts": "_artifacts_m2m", # will raise warning when we start to migrate over
|
102
101
|
}
|
103
102
|
else:
|
104
103
|
return expressions
|
@@ -114,7 +113,6 @@ def get_backward_compat_filter_kwargs(queryset, expressions):
|
|
114
113
|
if parts[0] not in {
|
115
114
|
"transform",
|
116
115
|
"visibility",
|
117
|
-
"feature_sets",
|
118
116
|
"schemas",
|
119
117
|
"artifacts",
|
120
118
|
}:
|
@@ -203,7 +201,7 @@ def get(
|
|
203
201
|
qs = QuerySet(model=registry_or_queryset)
|
204
202
|
registry = registry_or_queryset
|
205
203
|
if isinstance(idlike, int):
|
206
|
-
return super(QuerySet, qs).get(id=idlike)
|
204
|
+
return super(QuerySet, qs).get(id=idlike) # type: ignore
|
207
205
|
elif isinstance(idlike, str):
|
208
206
|
qs = qs.filter(uid__startswith=idlike)
|
209
207
|
if issubclass(registry, IsVersioned):
|
@@ -216,7 +214,27 @@ def get(
|
|
216
214
|
else:
|
217
215
|
assert idlike is None # noqa: S101
|
218
216
|
expressions = process_expressions(qs, expressions)
|
219
|
-
|
217
|
+
# don't want _branch_code here in .get(), only in .filter()
|
218
|
+
expressions.pop("_branch_code", None)
|
219
|
+
# inject is_latest for consistency with idlike
|
220
|
+
is_latest_was_not_in_expressions = "is_latest" not in expressions
|
221
|
+
if issubclass(registry, IsVersioned) and is_latest_was_not_in_expressions:
|
222
|
+
expressions["is_latest"] = True
|
223
|
+
try:
|
224
|
+
return registry.objects.using(qs.db).get(**expressions)
|
225
|
+
except registry.DoesNotExist:
|
226
|
+
# handle the case in which the is_latest injection led to a missed query
|
227
|
+
if "is_latest" in expressions and is_latest_was_not_in_expressions:
|
228
|
+
expressions.pop("is_latest")
|
229
|
+
result = (
|
230
|
+
registry.objects.using(qs.db)
|
231
|
+
.filter(**expressions)
|
232
|
+
.order_by("-created_at")
|
233
|
+
.first()
|
234
|
+
)
|
235
|
+
if result is not None:
|
236
|
+
return result
|
237
|
+
raise registry.DoesNotExist from registry.DoesNotExist
|
220
238
|
|
221
239
|
|
222
240
|
class RecordList(UserList, Generic[T]):
|
@@ -537,13 +555,13 @@ class QuerySet(models.QuerySet):
|
|
537
555
|
elif isinstance(include, str):
|
538
556
|
include = [include]
|
539
557
|
include = get_backward_compat_filter_kwargs(self, include)
|
540
|
-
field_names = get_basic_field_names(self, include, features)
|
558
|
+
field_names = get_basic_field_names(self, include, features) # type: ignore
|
541
559
|
|
542
560
|
annotate_kwargs = {}
|
543
561
|
if features:
|
544
562
|
annotate_kwargs.update(get_feature_annotate_kwargs(features))
|
545
563
|
if include:
|
546
|
-
include = include.copy()[::-1]
|
564
|
+
include = include.copy()[::-1] # type: ignore
|
547
565
|
include_kwargs = {s: F(s) for s in include if s not in field_names}
|
548
566
|
annotate_kwargs.update(include_kwargs)
|
549
567
|
if annotate_kwargs:
|
@@ -561,12 +579,6 @@ class QuerySet(models.QuerySet):
|
|
561
579
|
pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
|
562
580
|
if pk_column_name in df_reshaped.columns:
|
563
581
|
df_reshaped = df_reshaped.set_index(pk_column_name)
|
564
|
-
|
565
|
-
# Compatibility code
|
566
|
-
df_reshaped.columns = df_reshaped.columns.str.replace(
|
567
|
-
r"_schemas_m2m", "feature_sets", regex=True
|
568
|
-
)
|
569
|
-
|
570
582
|
return df_reshaped
|
571
583
|
|
572
584
|
def delete(self, *args, **kwargs):
|
@@ -601,17 +613,65 @@ class QuerySet(models.QuerySet):
|
|
601
613
|
return None
|
602
614
|
return self[0]
|
603
615
|
|
616
|
+
def _handle_unknown_field(self, error: FieldError) -> None:
|
617
|
+
"""Suggest available fields if an unknown field was passed."""
|
618
|
+
if "Cannot resolve keyword" in str(error):
|
619
|
+
field = str(error).split("'")[1]
|
620
|
+
fields = ", ".join(
|
621
|
+
sorted(
|
622
|
+
f.name
|
623
|
+
for f in self.model._meta.get_fields()
|
624
|
+
if not f.name.startswith("_")
|
625
|
+
and not f.name.startswith("links_")
|
626
|
+
and not f.name.endswith("_id")
|
627
|
+
)
|
628
|
+
)
|
629
|
+
raise FieldError(
|
630
|
+
f"Unknown field '{field}'. Available fields: {fields}"
|
631
|
+
) from None
|
632
|
+
raise error # pragma: no cover
|
633
|
+
|
604
634
|
def get(self, idlike: int | str | None = None, **expressions) -> Record:
|
605
635
|
"""Query a single record. Raises error if there are more or none."""
|
606
|
-
|
636
|
+
try:
|
637
|
+
return get(self, idlike, **expressions)
|
638
|
+
except ValueError as e:
|
639
|
+
# Pass through original error for explicit id lookups
|
640
|
+
if "Field 'id' expected a number" in str(e):
|
641
|
+
if "id" in expressions:
|
642
|
+
raise
|
643
|
+
field = next(iter(expressions))
|
644
|
+
raise FieldError(
|
645
|
+
f"Invalid lookup '{expressions[field]}' for {field}. Did you mean {field}__name?"
|
646
|
+
) from None
|
647
|
+
raise # pragma: no cover
|
648
|
+
except FieldError as e:
|
649
|
+
self._handle_unknown_field(e)
|
650
|
+
raise # pragma: no cover
|
607
651
|
|
608
652
|
def filter(self, *queries, **expressions) -> QuerySet:
|
609
653
|
"""Query a set of records."""
|
654
|
+
# Suggest to use __name for related fields such as id when not passed
|
655
|
+
for field, value in expressions.items():
|
656
|
+
if (
|
657
|
+
isinstance(value, str)
|
658
|
+
and value.strip("-").isalpha()
|
659
|
+
and "__" not in field
|
660
|
+
and hasattr(self.model, field)
|
661
|
+
):
|
662
|
+
field_attr = getattr(self.model, field)
|
663
|
+
if hasattr(field_attr, "field") and field_attr.field.related_model:
|
664
|
+
raise FieldError(
|
665
|
+
f"Invalid lookup '{value}' for {field}. Did you mean {field}__name?"
|
666
|
+
)
|
667
|
+
|
610
668
|
expressions = process_expressions(self, expressions)
|
611
669
|
if len(expressions) > 0:
|
612
|
-
|
613
|
-
|
614
|
-
|
670
|
+
try:
|
671
|
+
return super().filter(*queries, **expressions)
|
672
|
+
except FieldError as e:
|
673
|
+
self._handle_unknown_field(e)
|
674
|
+
return self
|
615
675
|
|
616
676
|
def one(self) -> Record:
|
617
677
|
"""Exactly one result. Raises error if there are more or none."""
|