lamindb 0.77.2__py3-none-any.whl → 0.77.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +1 -1
- lamindb/_can_curate.py +6 -4
- lamindb/_curate.py +589 -45
- lamindb/_finish.py +7 -7
- lamindb/_from_values.py +7 -7
- lamindb/_record.py +7 -5
- lamindb/_save.py +9 -2
- lamindb/_view.py +2 -1
- lamindb/core/__init__.py +2 -0
- lamindb/core/_context.py +2 -4
- lamindb/core/_django.py +30 -17
- lamindb/core/_feature_manager.py +64 -41
- lamindb/core/_label_manager.py +15 -14
- lamindb/core/loaders.py +18 -1
- lamindb/core/storage/_tiledbsoma.py +2 -0
- {lamindb-0.77.2.dist-info → lamindb-0.77.4.dist-info}/METADATA +5 -4
- {lamindb-0.77.2.dist-info → lamindb-0.77.4.dist-info}/RECORD +19 -19
- {lamindb-0.77.2.dist-info → lamindb-0.77.4.dist-info}/WHEEL +1 -1
- {lamindb-0.77.2.dist-info → lamindb-0.77.4.dist-info}/LICENSE +0 -0
lamindb/_finish.py
CHANGED
@@ -18,8 +18,8 @@ if TYPE_CHECKING:
|
|
18
18
|
from ._query_set import QuerySet
|
19
19
|
|
20
20
|
|
21
|
-
def
|
22
|
-
return f"Please save the notebook in
|
21
|
+
def get_save_notebook_message() -> str:
|
22
|
+
return f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `finish()`"
|
23
23
|
|
24
24
|
|
25
25
|
def get_shortcut() -> str:
|
@@ -114,7 +114,7 @@ def clean_r_notebook_html(file_path: Path) -> tuple[str | None, Path]:
|
|
114
114
|
cleaned_content = re.sub(pattern_title, "", cleaned_content)
|
115
115
|
cleaned_content = re.sub(pattern_h1, "", cleaned_content)
|
116
116
|
cleaned_content = cleaned_content.replace(
|
117
|
-
f"NotebookNotSaved: {
|
117
|
+
f"NotebookNotSaved: {get_save_notebook_message()}", ""
|
118
118
|
)
|
119
119
|
cleaned_path = file_path.parent / (f"{file_path.stem}.cleaned{file_path.suffix}")
|
120
120
|
cleaned_path.write_text(cleaned_content)
|
@@ -189,6 +189,10 @@ def save_context_core(
|
|
189
189
|
logger.warning(
|
190
190
|
f"no {filepath.with_suffix('.nb.html')} found, save your manually rendered .html report via the CLI: lamin save {filepath}"
|
191
191
|
)
|
192
|
+
if report_path is not None and not from_cli:
|
193
|
+
if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING:
|
194
|
+
# this can happen when auto-knitting an html with RStudio
|
195
|
+
raise NotebookNotSaved(get_save_notebook_message())
|
192
196
|
ln.settings.creation.artifact_silence_missing_run_warning = True
|
193
197
|
# track source code
|
194
198
|
hash, _ = hash_file(source_code_path) # ignore hash_type for now
|
@@ -249,10 +253,6 @@ def save_context_core(
|
|
249
253
|
|
250
254
|
# track report and set is_consecutive
|
251
255
|
if report_path is not None:
|
252
|
-
if not from_cli:
|
253
|
-
if get_seconds_since_modified(report_path) > 2 and not ln_setup._TESTING:
|
254
|
-
# this can happen when auto-knitting an html with RStudio
|
255
|
-
raise NotebookNotSaved(get_r_save_notebook_message())
|
256
256
|
if is_r_notebook:
|
257
257
|
title_text, report_path = clean_r_notebook_html(report_path)
|
258
258
|
if title_text is not None:
|
lamindb/_from_values.py
CHANGED
@@ -95,7 +95,7 @@ def get_or_create_records(
|
|
95
95
|
if len(msg) > 0 and not mute:
|
96
96
|
logger.success(msg)
|
97
97
|
s = "" if len(unmapped_values) == 1 else "s"
|
98
|
-
print_values = colors.yellow(
|
98
|
+
print_values = colors.yellow(_format_values(unmapped_values))
|
99
99
|
name = registry.__name__
|
100
100
|
n_nonval = colors.yellow(f"{len(unmapped_values)} non-validated")
|
101
101
|
if not mute:
|
@@ -167,7 +167,7 @@ def get_existing_records(
|
|
167
167
|
if not mute:
|
168
168
|
if len(validated) > 0:
|
169
169
|
s = "" if len(validated) == 1 else "s"
|
170
|
-
print_values = colors.green(
|
170
|
+
print_values = colors.green(_format_values(validated))
|
171
171
|
msg = (
|
172
172
|
"loaded"
|
173
173
|
f" {colors.green(f'{len(validated)} {model.__name__} record{s}')}"
|
@@ -176,7 +176,7 @@ def get_existing_records(
|
|
176
176
|
if len(syn_mapper) > 0:
|
177
177
|
s = "" if len(syn_mapper) == 1 else "s"
|
178
178
|
names = list(syn_mapper.keys())
|
179
|
-
print_values = colors.green(
|
179
|
+
print_values = colors.green(_format_values(names))
|
180
180
|
syn_msg = (
|
181
181
|
"loaded"
|
182
182
|
f" {colors.green(f'{len(syn_mapper)} {model.__name__} record{s}')}"
|
@@ -243,7 +243,7 @@ def create_records_from_source(
|
|
243
243
|
if len(syn_mapper) > 0:
|
244
244
|
s = "" if len(syn_mapper) == 1 else "s"
|
245
245
|
names = list(syn_mapper.keys())
|
246
|
-
print_values = colors.purple(
|
246
|
+
print_values = colors.purple(_format_values(names))
|
247
247
|
msg_syn = (
|
248
248
|
"created"
|
249
249
|
f" {colors.purple(f'{len(syn_mapper)} {model.__name__} record{s} from Bionty')}"
|
@@ -277,7 +277,7 @@ def create_records_from_source(
|
|
277
277
|
validated = result.validated
|
278
278
|
if len(validated) > 0:
|
279
279
|
s = "" if len(validated) == 1 else "s"
|
280
|
-
print_values = colors.purple(
|
280
|
+
print_values = colors.purple(_format_values(validated))
|
281
281
|
# this is the success msg for existing records in the DB
|
282
282
|
if len(msg) > 0 and not mute:
|
283
283
|
logger.success(msg)
|
@@ -307,7 +307,7 @@ def index_iterable(iterable: Iterable) -> pd.Index:
|
|
307
307
|
return idx[(idx != "") & (~idx.isnull())]
|
308
308
|
|
309
309
|
|
310
|
-
def
|
310
|
+
def _format_values(
|
311
311
|
names: Iterable, n: int = 20, quotes: bool = True, sep: str = "'"
|
312
312
|
) -> str:
|
313
313
|
if isinstance(names, dict):
|
@@ -345,7 +345,7 @@ def _bulk_create_dicts_from_df(
|
|
345
345
|
dup = df.index[df.index.duplicated()].unique().tolist()
|
346
346
|
if len(dup) > 0:
|
347
347
|
s = "" if len(dup) == 1 else "s"
|
348
|
-
print_values =
|
348
|
+
print_values = _format_values(dup)
|
349
349
|
multi_msg = (
|
350
350
|
f"ambiguous validation in Bionty for {len(dup)} record{s}:"
|
351
351
|
f" {print_values}"
|
lamindb/_record.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
import builtins
|
4
|
+
import re
|
4
5
|
from functools import reduce
|
5
6
|
from typing import TYPE_CHECKING, NamedTuple
|
6
7
|
|
@@ -316,6 +317,7 @@ def _search(
|
|
316
317
|
string = string[:n_80_pct]
|
317
318
|
|
318
319
|
string = string.strip()
|
320
|
+
string_escape = re.escape(string)
|
319
321
|
|
320
322
|
exact_lookup = Exact if case_sensitive else IExact
|
321
323
|
regex_lookup = Regex if case_sensitive else IRegex
|
@@ -334,28 +336,28 @@ def _search(
|
|
334
336
|
exact_rank = Cast(exact_expr, output_field=IntegerField()) * 200
|
335
337
|
ranks.append(exact_rank)
|
336
338
|
# exact synonym
|
337
|
-
synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){
|
339
|
+
synonym_expr = regex_lookup(field_expr, rf"(?:^|.*\|){string_escape}(?:\|.*|$)")
|
338
340
|
synonym_rank = Cast(synonym_expr, output_field=IntegerField()) * 200
|
339
341
|
ranks.append(synonym_rank)
|
340
342
|
# match as sub-phrase
|
341
343
|
sub_expr = regex_lookup(
|
342
|
-
field_expr, rf"(?:^|.*[ \|\.,;:]){
|
344
|
+
field_expr, rf"(?:^|.*[ \|\.,;:]){string_escape}(?:[ \|\.,;:].*|$)"
|
343
345
|
)
|
344
346
|
sub_rank = Cast(sub_expr, output_field=IntegerField()) * 10
|
345
347
|
ranks.append(sub_rank)
|
346
348
|
# startswith and avoid matching string with " " on the right
|
347
349
|
# mostly for truncated
|
348
350
|
startswith_expr = regex_lookup(
|
349
|
-
field_expr, rf"(?:^|.*\|){
|
351
|
+
field_expr, rf"(?:^|.*\|){string_escape}[^ ]*(?:\|.*|$)"
|
350
352
|
)
|
351
353
|
startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
|
352
354
|
ranks.append(startswith_rank)
|
353
355
|
# match as sub-phrase from the left, mostly for truncated
|
354
|
-
right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){
|
356
|
+
right_expr = regex_lookup(field_expr, rf"(?:^|.*[ \|]){string_escape}.*")
|
355
357
|
right_rank = Cast(right_expr, output_field=IntegerField()) * 2
|
356
358
|
ranks.append(right_rank)
|
357
359
|
# match as sub-phrase from the right
|
358
|
-
left_expr = regex_lookup(field_expr, rf".*{
|
360
|
+
left_expr = regex_lookup(field_expr, rf".*{string_escape}(?:$|[ \|\.,;:].*)")
|
359
361
|
left_rank = Cast(left_expr, output_field=IntegerField()) * 2
|
360
362
|
ranks.append(left_rank)
|
361
363
|
# simple contains filter
|
lamindb/_save.py
CHANGED
@@ -184,10 +184,17 @@ def copy_or_move_to_cache(
|
|
184
184
|
return None
|
185
185
|
# non-local storage_path further
|
186
186
|
if local_path != cache_path:
|
187
|
-
cache_path.
|
188
|
-
|
187
|
+
if cache_path.exists():
|
188
|
+
logger.warning(
|
189
|
+
f"The cache path {cache_path.as_posix()} already exists, replacing it."
|
190
|
+
)
|
189
191
|
if cache_path.is_dir():
|
190
192
|
shutil.rmtree(cache_path)
|
193
|
+
else:
|
194
|
+
cache_path.unlink()
|
195
|
+
else:
|
196
|
+
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
197
|
+
if cache_dir in local_path.parents:
|
191
198
|
local_path.replace(cache_path)
|
192
199
|
else:
|
193
200
|
if is_dir:
|
lamindb/_view.py
CHANGED
@@ -5,7 +5,6 @@ import importlib
|
|
5
5
|
import inspect
|
6
6
|
from typing import TYPE_CHECKING
|
7
7
|
|
8
|
-
from IPython.display import HTML, display
|
9
8
|
from lamin_utils import colors, logger
|
10
9
|
from lamindb_setup import settings
|
11
10
|
from lamindb_setup._init_instance import get_schema_module_name
|
@@ -24,6 +23,8 @@ is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
|
24
23
|
def display_df_with_descriptions(
|
25
24
|
df: pd.DataFrame, descriptions: dict[str, str] | None = None
|
26
25
|
):
|
26
|
+
from IPython.display import HTML, display
|
27
|
+
|
27
28
|
if descriptions is None:
|
28
29
|
display(df)
|
29
30
|
return None
|
lamindb/core/__init__.py
CHANGED
@@ -33,6 +33,7 @@ Curators:
|
|
33
33
|
DataFrameCurator
|
34
34
|
AnnDataCurator
|
35
35
|
MuDataCurator
|
36
|
+
SOMACurator
|
36
37
|
CurateLookup
|
37
38
|
|
38
39
|
Settings & context:
|
@@ -86,6 +87,7 @@ from lamindb._curate import (
|
|
86
87
|
CurateLookup,
|
87
88
|
DataFrameCurator,
|
88
89
|
MuDataCurator,
|
90
|
+
SOMACurator,
|
89
91
|
)
|
90
92
|
from lamindb._query_manager import QueryManager
|
91
93
|
from lamindb._query_set import QuerySet, RecordList
|
lamindb/core/_context.py
CHANGED
@@ -580,8 +580,8 @@ class Context:
|
|
580
580
|
|
581
581
|
"""
|
582
582
|
from lamindb._finish import (
|
583
|
+
get_save_notebook_message,
|
583
584
|
get_seconds_since_modified,
|
584
|
-
get_shortcut,
|
585
585
|
save_context_core,
|
586
586
|
)
|
587
587
|
|
@@ -604,9 +604,7 @@ class Context:
|
|
604
604
|
self.transform.name = nbproject_title
|
605
605
|
self.transform.save()
|
606
606
|
if get_seconds_since_modified(self._path) > 2 and not ln_setup._TESTING:
|
607
|
-
raise NotebookNotSaved(
|
608
|
-
f"Please save the notebook in your editor (shortcut `{get_shortcut()}`) within 2 sec before calling `ln.finish()`"
|
609
|
-
)
|
607
|
+
raise NotebookNotSaved(get_save_notebook_message())
|
610
608
|
save_context_core(
|
611
609
|
run=self.run,
|
612
610
|
transform=self.run.transform,
|
lamindb/core/_django.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
+
from functools import reduce
|
4
|
+
|
3
5
|
from django.contrib.postgres.aggregates import ArrayAgg
|
4
6
|
from django.db import connection
|
5
7
|
from django.db.models import F, OuterRef, Q, Subquery
|
@@ -81,15 +83,6 @@ def get_artifact_with_related(
|
|
81
83
|
id=F(f"{fk}__id"), name=F(f"{fk}__{name_field}")
|
82
84
|
)
|
83
85
|
|
84
|
-
for name in m2m_relations:
|
85
|
-
related_model = get_related_model(model, name)
|
86
|
-
name_field = get_name_field(related_model)
|
87
|
-
annotations[f"m2mfield_{name}"] = ArrayAgg(
|
88
|
-
JSONObject(id=F(f"{name}__id"), name=F(f"{name}__{name_field}")),
|
89
|
-
filter=Q(**{f"{name}__isnull": False}),
|
90
|
-
distinct=True,
|
91
|
-
)
|
92
|
-
|
93
86
|
for link in link_tables:
|
94
87
|
link_model = getattr(model, link).rel.related_model
|
95
88
|
if not hasattr(link_model, "feature"):
|
@@ -137,9 +130,7 @@ def get_artifact_with_related(
|
|
137
130
|
|
138
131
|
related_data: dict = {"m2m": {}, "fk": {}, "link": {}, "featuresets": {}}
|
139
132
|
for k, v in artifact_meta.items():
|
140
|
-
if k.startswith("
|
141
|
-
related_data["m2m"][k[9:]] = v
|
142
|
-
elif k.startswith("fkfield_"):
|
133
|
+
if k.startswith("fkfield_"):
|
143
134
|
related_data["fk"][k[8:]] = v
|
144
135
|
elif k.startswith("linkfield_"):
|
145
136
|
related_data["link"][k[10:]] = v
|
@@ -149,11 +140,33 @@ def get_artifact_with_related(
|
|
149
140
|
artifact, {i["featureset"]: i["slot"] for i in v}
|
150
141
|
)
|
151
142
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
143
|
+
if len(m2m_relations) == 0:
|
144
|
+
m2m_any = False
|
145
|
+
else:
|
146
|
+
m2m_any_expr = reduce(
|
147
|
+
lambda a, b: a | b,
|
148
|
+
(Q(**{f"{m2m_name}__isnull": False}) for m2m_name in m2m_relations),
|
149
|
+
)
|
150
|
+
# this is needed to avoid querying all m2m relations even if they are all empty
|
151
|
+
# this checks if non-empty m2m relations are present in the record
|
152
|
+
m2m_any = (
|
153
|
+
model.objects.using(artifact._state.db)
|
154
|
+
.filter(uid=artifact.uid)
|
155
|
+
.filter(m2m_any_expr)
|
156
|
+
.exists()
|
157
|
+
)
|
158
|
+
if m2m_any:
|
159
|
+
m2m_data = related_data["m2m"]
|
160
|
+
for m2m_name in m2m_relations:
|
161
|
+
related_model = get_related_model(model, m2m_name)
|
162
|
+
name_field = get_name_field(related_model)
|
163
|
+
m2m_records = (
|
164
|
+
getattr(artifact, m2m_name).values_list("id", name_field).distinct()
|
165
|
+
)
|
166
|
+
for rec_id, rec_name in m2m_records:
|
167
|
+
if m2m_name not in m2m_data:
|
168
|
+
m2m_data[m2m_name] = {}
|
169
|
+
m2m_data[m2m_name][rec_id] = rec_name
|
157
170
|
|
158
171
|
return {
|
159
172
|
**{name: artifact_meta[name] for name in ["id", "uid"]},
|
lamindb/core/_feature_manager.py
CHANGED
@@ -42,7 +42,7 @@ from lamindb._feature import (
|
|
42
42
|
suggest_categorical_for_str_iterable,
|
43
43
|
)
|
44
44
|
from lamindb._feature_set import DICT_KEYS_TYPE, FeatureSet
|
45
|
-
from lamindb._from_values import
|
45
|
+
from lamindb._from_values import _format_values
|
46
46
|
from lamindb._record import (
|
47
47
|
REGISTRY_UNIQUE_FIELD,
|
48
48
|
get_name_field,
|
@@ -282,13 +282,15 @@ def _get_featuresets_postgres(
|
|
282
282
|
return fs_data
|
283
283
|
|
284
284
|
|
285
|
-
def _create_feature_table(
|
285
|
+
def _create_feature_table(
|
286
|
+
name: str, registry_str: str, data: list, show_header: bool = False
|
287
|
+
) -> Table:
|
286
288
|
"""Create a Rich table for a feature group."""
|
287
289
|
table = Table(
|
288
290
|
Column(name, style="", no_wrap=True, width=NAME_WIDTH),
|
289
291
|
Column(registry_str, style="dim", no_wrap=True, width=TYPE_WIDTH),
|
290
292
|
Column("", width=VALUES_WIDTH, no_wrap=True),
|
291
|
-
show_header=
|
293
|
+
show_header=show_header,
|
292
294
|
box=None,
|
293
295
|
pad_edge=False,
|
294
296
|
)
|
@@ -331,7 +333,7 @@ def describe_features(
|
|
331
333
|
fs_data = _get_featuresets_postgres(self, related_data=related_data)
|
332
334
|
for fs_id, (slot, data) in fs_data.items():
|
333
335
|
for registry_str, feature_names in data.items():
|
334
|
-
feature_set = FeatureSet.get(id=fs_id)
|
336
|
+
feature_set = FeatureSet.objects.using(self._state.db).get(id=fs_id)
|
335
337
|
feature_set_data[slot] = (feature_set, feature_names)
|
336
338
|
for feature_name in feature_names:
|
337
339
|
feature_data[feature_name] = (slot, registry_str)
|
@@ -345,15 +347,15 @@ def describe_features(
|
|
345
347
|
for feature_name in feature_names:
|
346
348
|
feature_data[feature_name] = (slot, feature_set.registry)
|
347
349
|
|
348
|
-
internal_feature_names:
|
350
|
+
internal_feature_names: dict[str, str] = {}
|
349
351
|
if isinstance(self, Artifact):
|
350
352
|
feature_sets = self.feature_sets.filter(registry="Feature").all()
|
351
|
-
internal_feature_names =
|
353
|
+
internal_feature_names = {}
|
352
354
|
if len(feature_sets) > 0:
|
353
355
|
for feature_set in feature_sets:
|
354
|
-
internal_feature_names
|
355
|
-
|
356
|
-
)
|
356
|
+
internal_feature_names.update(
|
357
|
+
dict(feature_set.members.values_list("name", "dtype"))
|
358
|
+
)
|
357
359
|
|
358
360
|
# categorical feature values
|
359
361
|
# Get the categorical data using the appropriate method
|
@@ -388,7 +390,7 @@ def describe_features(
|
|
388
390
|
|
389
391
|
# Format message
|
390
392
|
printed_values = (
|
391
|
-
|
393
|
+
_format_values(sorted(values), n=10, quotes=False)
|
392
394
|
if not is_list_type or not feature_dtype.startswith("list")
|
393
395
|
else sorted(values)
|
394
396
|
)
|
@@ -407,25 +409,47 @@ def describe_features(
|
|
407
409
|
if to_dict:
|
408
410
|
return dictionary
|
409
411
|
|
410
|
-
# Dataset section
|
411
|
-
|
412
|
-
|
413
|
-
] = {} # internal features from the `Feature` registry that contain labels
|
412
|
+
# Dataset features section
|
413
|
+
# internal features that contain labels (only `Feature` features contain labels)
|
414
|
+
internal_feature_labels_slot: dict[str, list] = {}
|
414
415
|
for feature_name, feature_row in internal_feature_labels.items():
|
415
416
|
slot, _ = feature_data.get(feature_name)
|
416
|
-
|
417
|
-
dataset_tree_children = []
|
417
|
+
internal_feature_labels_slot.setdefault(slot, []).append(feature_row)
|
418
418
|
|
419
|
+
int_features_tree_children = []
|
419
420
|
for slot, (feature_set, feature_names) in feature_set_data.items():
|
420
|
-
if slot in
|
421
|
-
|
421
|
+
if slot in internal_feature_labels_slot:
|
422
|
+
# add internal Feature features with labels
|
423
|
+
feature_rows = internal_feature_labels_slot[slot]
|
424
|
+
# add internal Feature features without labels
|
425
|
+
feature_rows += [
|
426
|
+
(
|
427
|
+
feature_name,
|
428
|
+
Text(str(internal_feature_names.get(feature_name)), style="dim"),
|
429
|
+
"",
|
430
|
+
)
|
431
|
+
for feature_name in feature_names
|
432
|
+
if feature_name and feature_name not in internal_feature_labels
|
433
|
+
]
|
422
434
|
else:
|
435
|
+
# add internal non-Feature features without labels
|
423
436
|
feature_rows = [
|
424
|
-
(
|
437
|
+
(
|
438
|
+
feature_name,
|
439
|
+
Text(
|
440
|
+
str(
|
441
|
+
internal_feature_names.get(feature_name)
|
442
|
+
if feature_name in internal_feature_names
|
443
|
+
else feature_set.dtype
|
444
|
+
),
|
445
|
+
style="dim",
|
446
|
+
),
|
447
|
+
"",
|
448
|
+
)
|
425
449
|
for feature_name in feature_names
|
426
450
|
if feature_name
|
427
451
|
]
|
428
|
-
|
452
|
+
int_features_tree_children.append(
|
429
453
|
_create_feature_table(
|
430
454
|
Text.assemble(
|
431
455
|
(slot, "violet"),
|
@@ -434,46 +458,45 @@ def describe_features(
|
|
434
458
|
),
|
435
459
|
Text.assemble((f"[{feature_set.registry}]", "pink1")),
|
436
460
|
feature_rows,
|
461
|
+
show_header=True,
|
437
462
|
)
|
438
463
|
)
|
439
464
|
## internal features from the non-`Feature` registry
|
440
|
-
if
|
465
|
+
if int_features_tree_children:
|
441
466
|
dataset_tree = tree.add(
|
442
467
|
Text.assemble(
|
443
|
-
("Dataset", "bold bright_magenta"),
|
468
|
+
("Dataset features", "bold bright_magenta"),
|
444
469
|
("/", "dim"),
|
445
470
|
(".feature_sets", "dim bold"),
|
446
471
|
)
|
447
472
|
)
|
448
|
-
for child in
|
473
|
+
for child in int_features_tree_children:
|
449
474
|
dataset_tree.add(child)
|
450
475
|
|
451
|
-
#
|
452
|
-
|
453
|
-
features_tree_children = []
|
476
|
+
# Linked features
|
477
|
+
ext_features_tree_children = []
|
454
478
|
if external_data:
|
455
|
-
|
479
|
+
ext_features_tree_children.append(
|
456
480
|
_create_feature_table(
|
457
|
-
|
458
|
-
("Params" if print_params else "Features", "green_yellow")
|
459
|
-
),
|
481
|
+
"",
|
460
482
|
"",
|
461
483
|
external_data,
|
462
484
|
)
|
463
485
|
)
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
486
|
+
# ext_features_tree = None
|
487
|
+
ext_features_header = Text(
|
488
|
+
"Params" if print_params else "Linked features", style="bold dark_orange"
|
489
|
+
)
|
490
|
+
if ext_features_tree_children:
|
491
|
+
ext_features_tree = tree.add(ext_features_header)
|
492
|
+
for child in ext_features_tree_children:
|
493
|
+
ext_features_tree.add(child)
|
469
494
|
if with_labels:
|
470
|
-
|
495
|
+
# avoid querying the db if the labels were queried already
|
496
|
+
labels_data = related_data.get("m2m") if related_data is not None else None
|
497
|
+
labels_tree = describe_labels(self, labels_data=labels_data, as_subtree=True)
|
471
498
|
if labels_tree:
|
472
|
-
|
473
|
-
annotations_tree = tree.add(
|
474
|
-
Text("Annotations", style="bold dark_orange")
|
475
|
-
)
|
476
|
-
annotations_tree.add(labels_tree)
|
499
|
+
tree.add(labels_tree)
|
477
500
|
|
478
501
|
return tree
|
479
502
|
|
lamindb/core/_label_manager.py
CHANGED
@@ -5,12 +5,13 @@ from collections import defaultdict
|
|
5
5
|
from typing import TYPE_CHECKING
|
6
6
|
|
7
7
|
from django.db import connections
|
8
|
-
from lamin_utils import
|
8
|
+
from lamin_utils import logger
|
9
9
|
from lnschema_core.models import CanCurate, Feature
|
10
10
|
from rich.table import Column, Table
|
11
11
|
from rich.text import Text
|
12
|
+
from rich.tree import Tree
|
12
13
|
|
13
|
-
from lamindb._from_values import
|
14
|
+
from lamindb._from_values import _format_values
|
14
15
|
from lamindb._record import (
|
15
16
|
REGISTRY_UNIQUE_FIELD,
|
16
17
|
get_name_field,
|
@@ -32,7 +33,6 @@ from .schema import dict_related_model_to_related_name
|
|
32
33
|
|
33
34
|
if TYPE_CHECKING:
|
34
35
|
from lnschema_core.models import Artifact, Collection, Record
|
35
|
-
from rich.tree import Tree
|
36
36
|
|
37
37
|
from lamindb._query_set import QuerySet
|
38
38
|
|
@@ -99,15 +99,10 @@ def describe_labels(
|
|
99
99
|
return tree
|
100
100
|
|
101
101
|
labels_table = Table(
|
102
|
-
Column(
|
103
|
-
Text.assemble(("Labels", "green_yellow")),
|
104
|
-
style="",
|
105
|
-
no_wrap=True,
|
106
|
-
width=NAME_WIDTH,
|
107
|
-
),
|
102
|
+
Column("", style="", no_wrap=True, width=NAME_WIDTH),
|
108
103
|
Column("", style="dim", no_wrap=True, width=TYPE_WIDTH),
|
109
104
|
Column("", width=VALUES_WIDTH, no_wrap=True),
|
110
|
-
|
105
|
+
show_header=False,
|
111
106
|
box=None,
|
112
107
|
pad_edge=False,
|
113
108
|
)
|
@@ -115,10 +110,12 @@ def describe_labels(
|
|
115
110
|
if not labels or related_name == "feature_sets":
|
116
111
|
continue
|
117
112
|
if isinstance(labels, dict): # postgres, labels are a dict[id, name]
|
118
|
-
print_values =
|
113
|
+
print_values = _format_values(labels.values(), n=10, quotes=False)
|
119
114
|
else: # labels are a QuerySet
|
120
115
|
field = get_name_field(labels)
|
121
|
-
print_values =
|
116
|
+
print_values = _format_values(
|
117
|
+
labels.values_list(field, flat=True), n=10, quotes=False
|
118
|
+
)
|
122
119
|
if print_values:
|
123
120
|
related_model = get_related_model(self, related_name)
|
124
121
|
type_str = related_model.__get_name_with_schema__()
|
@@ -126,12 +123,16 @@ def describe_labels(
|
|
126
123
|
f".{related_name}", Text(type_str, style="dim"), print_values
|
127
124
|
)
|
128
125
|
|
126
|
+
labels_header = Text("Labels", style="bold green_yellow")
|
129
127
|
if as_subtree:
|
130
128
|
if labels_table.rows:
|
131
|
-
|
129
|
+
labels_tree = Tree(labels_header, guide_style="dim")
|
130
|
+
labels_tree.add(labels_table)
|
131
|
+
return labels_tree
|
132
132
|
else:
|
133
133
|
if labels_table.rows:
|
134
|
-
tree.add(
|
134
|
+
labels_tree = tree.add(labels_header)
|
135
|
+
labels_tree.add(labels_table)
|
135
136
|
return tree
|
136
137
|
|
137
138
|
|
lamindb/core/loaders.py
CHANGED
@@ -110,8 +110,23 @@ def load_json(path: UPathStr) -> dict:
|
|
110
110
|
return data
|
111
111
|
|
112
112
|
|
113
|
+
def load_yaml(path: UPathStr) -> dict | UPathStr:
|
114
|
+
"""Load `.yaml` to `dict`."""
|
115
|
+
try:
|
116
|
+
import yaml # type: ignore
|
117
|
+
|
118
|
+
with open(path) as f:
|
119
|
+
data = yaml.safe_load(f)
|
120
|
+
return data
|
121
|
+
except ImportError:
|
122
|
+
logger.warning(
|
123
|
+
"Please install PyYAML (`pip install PyYAML`) to load `.yaml` files."
|
124
|
+
)
|
125
|
+
return path
|
126
|
+
|
127
|
+
|
113
128
|
def load_image(path: UPathStr) -> None | UPathStr:
|
114
|
-
"""Display `.
|
129
|
+
"""Display `.jpg`, `.gif` or `.png` in ipython, otherwise return path."""
|
115
130
|
if is_run_from_ipython:
|
116
131
|
from IPython.display import Image, display
|
117
132
|
|
@@ -147,7 +162,9 @@ FILE_LOADERS = {
|
|
147
162
|
".zarr": load_anndata_zarr,
|
148
163
|
".html": load_html,
|
149
164
|
".json": load_json,
|
165
|
+
".yaml": load_yaml,
|
150
166
|
".h5mu": load_h5mu,
|
167
|
+
".gif": load_image,
|
151
168
|
".jpg": load_image,
|
152
169
|
".png": load_image,
|
153
170
|
".svg": load_svg,
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
3
3
|
from typing import TYPE_CHECKING, Literal
|
4
4
|
|
5
5
|
from anndata import AnnData, read_h5ad
|
6
|
+
from lamin_utils import logger
|
6
7
|
from lamindb_setup import settings as setup_settings
|
7
8
|
from lamindb_setup.core._settings_storage import get_storage_region
|
8
9
|
from lamindb_setup.core.upath import LocalPathClasses, create_path
|
@@ -178,6 +179,7 @@ def save_tiledbsoma_experiment(
|
|
178
179
|
assert len(adata_objects) == 1 # noqa: S101
|
179
180
|
n_observations = adata_objects[0].n_obs
|
180
181
|
|
182
|
+
logger.important(f"Writing the tiledbsoma store to {storepath}")
|
181
183
|
for adata_obj in adata_objects:
|
182
184
|
soma_io.from_anndata(
|
183
185
|
storepath,
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.3
|
2
2
|
Name: lamindb
|
3
|
-
Version: 0.77.
|
3
|
+
Version: 0.77.4
|
4
4
|
Summary: A data framework for biology.
|
5
5
|
Author-email: Lamin Labs <open-source@lamin.ai>
|
6
6
|
Requires-Python: >=3.9,<3.13
|
@@ -10,12 +10,13 @@ Classifier: Programming Language :: Python :: 3.10
|
|
10
10
|
Classifier: Programming Language :: Python :: 3.11
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
12
12
|
Requires-Dist: lnschema_core==0.77.1
|
13
|
-
Requires-Dist: lamin_utils==0.13.
|
13
|
+
Requires-Dist: lamin_utils==0.13.10
|
14
14
|
Requires-Dist: lamin_cli==0.22.0
|
15
|
-
Requires-Dist: lamindb_setup==0.81.
|
15
|
+
Requires-Dist: lamindb_setup==0.81.5
|
16
16
|
Requires-Dist: pyarrow
|
17
17
|
Requires-Dist: typing_extensions!=4.6.0
|
18
18
|
Requires-Dist: python-dateutil
|
19
|
+
Requires-Dist: pandas>=2.0.0
|
19
20
|
Requires-Dist: anndata>=0.8.0,<=0.11.1
|
20
21
|
Requires-Dist: fsspec
|
21
22
|
Requires-Dist: graphviz
|