lamindb 0.74.1__py3-none-any.whl → 0.74.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lamindb/__init__.py +8 -5
- lamindb/_artifact.py +31 -41
- lamindb/_can_validate.py +24 -22
- lamindb/_collection.py +5 -6
- lamindb/{_annotate.py → _curate.py} +62 -40
- lamindb/_feature.py +7 -9
- lamindb/_feature_set.py +17 -18
- lamindb/_filter.py +5 -5
- lamindb/_finish.py +18 -6
- lamindb/_from_values.py +12 -12
- lamindb/_is_versioned.py +2 -2
- lamindb/_parents.py +7 -7
- lamindb/_query_manager.py +7 -7
- lamindb/_query_set.py +27 -27
- lamindb/{_registry.py → _record.py} +89 -48
- lamindb/_save.py +6 -6
- lamindb/_storage.py +1 -1
- lamindb/_view.py +4 -4
- lamindb/core/__init__.py +16 -12
- lamindb/core/_data.py +10 -10
- lamindb/core/_feature_manager.py +48 -31
- lamindb/core/_label_manager.py +5 -5
- lamindb/core/_mapped_collection.py +4 -1
- lamindb/core/_run_context.py +2 -2
- lamindb/core/_settings.py +2 -1
- lamindb/core/_sync_git.py +22 -12
- lamindb/core/_track_environment.py +5 -1
- lamindb/core/fields.py +1 -1
- lamindb/core/schema.py +6 -6
- lamindb/core/storage/_backed_access.py +56 -12
- lamindb/core/storage/paths.py +1 -1
- lamindb/core/versioning.py +1 -1
- lamindb/integrations/_vitessce.py +4 -3
- {lamindb-0.74.1.dist-info → lamindb-0.74.2.dist-info}/METADATA +5 -7
- lamindb-0.74.2.dist-info/RECORD +57 -0
- lamindb-0.74.1.dist-info/RECORD +0 -57
- {lamindb-0.74.1.dist-info → lamindb-0.74.2.dist-info}/LICENSE +0 -0
- {lamindb-0.74.1.dist-info → lamindb-0.74.2.dist-info}/WHEEL +0 -0
lamindb/_view.py
CHANGED
@@ -7,7 +7,7 @@ import inspect
|
|
7
7
|
from lamin_utils import colors, logger
|
8
8
|
from lamindb_setup import settings
|
9
9
|
from lamindb_setup._init_instance import get_schema_module_name
|
10
|
-
from lnschema_core import
|
10
|
+
from lnschema_core import Record
|
11
11
|
|
12
12
|
is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
|
13
13
|
|
@@ -21,7 +21,7 @@ def view(
|
|
21
21
|
n: Display the last `n` rows of a registry.
|
22
22
|
schema: Schema module to view. Default's to
|
23
23
|
`None` and displays all schema modules.
|
24
|
-
registries: List of
|
24
|
+
registries: List of Record names. Defaults to
|
25
25
|
`None` and lists all registries.
|
26
26
|
|
27
27
|
Examples:
|
@@ -44,8 +44,8 @@ def view(
|
|
44
44
|
orm
|
45
45
|
for orm in schema_module.__dict__.values()
|
46
46
|
if inspect.isclass(orm)
|
47
|
-
and issubclass(orm,
|
48
|
-
and orm.__name__ != "
|
47
|
+
and issubclass(orm, Record)
|
48
|
+
and orm.__name__ != "Record"
|
49
49
|
}
|
50
50
|
if registries is not None:
|
51
51
|
filtered_registries = {
|
lamindb/core/__init__.py
CHANGED
@@ -5,7 +5,7 @@ Registries:
|
|
5
5
|
.. autosummary::
|
6
6
|
:toctree: .
|
7
7
|
|
8
|
-
|
8
|
+
Record
|
9
9
|
QuerySet
|
10
10
|
QueryManager
|
11
11
|
RecordsList
|
@@ -19,18 +19,20 @@ Registries:
|
|
19
19
|
HasParents
|
20
20
|
TracksRun
|
21
21
|
TracksUpdates
|
22
|
+
ParamValue
|
23
|
+
FeatureValue
|
22
24
|
InspectResult
|
23
25
|
fields
|
24
26
|
|
25
|
-
|
27
|
+
Curators:
|
26
28
|
|
27
29
|
.. autosummary::
|
28
30
|
:toctree: .
|
29
31
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
32
|
+
DataFrameCurator
|
33
|
+
AnnDataCurator
|
34
|
+
MuDataCurator
|
35
|
+
CurateLookup
|
34
36
|
|
35
37
|
Other:
|
36
38
|
|
@@ -57,20 +59,22 @@ Modules:
|
|
57
59
|
from lamin_utils._inspect import InspectResult
|
58
60
|
from lnschema_core.models import (
|
59
61
|
CanValidate,
|
62
|
+
FeatureValue,
|
60
63
|
HasFeatures,
|
61
64
|
HasParams,
|
62
65
|
HasParents,
|
63
66
|
IsVersioned,
|
64
|
-
|
67
|
+
ParamValue,
|
68
|
+
Record,
|
65
69
|
TracksRun,
|
66
70
|
TracksUpdates,
|
67
71
|
)
|
68
72
|
|
69
|
-
from lamindb.
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
73
|
+
from lamindb._curate import (
|
74
|
+
AnnDataCurator,
|
75
|
+
CurateLookup,
|
76
|
+
DataFrameCurator,
|
77
|
+
MuDataCurator,
|
74
78
|
)
|
75
79
|
from lamindb._query_manager import QueryManager
|
76
80
|
from lamindb._query_set import QuerySet, RecordsList
|
lamindb/core/_data.py
CHANGED
@@ -11,7 +11,7 @@ from lnschema_core.models import (
|
|
11
11
|
Feature,
|
12
12
|
FeatureSet,
|
13
13
|
HasFeatures,
|
14
|
-
|
14
|
+
Record,
|
15
15
|
Run,
|
16
16
|
ULabel,
|
17
17
|
__repr__,
|
@@ -20,7 +20,7 @@ from lnschema_core.models import (
|
|
20
20
|
|
21
21
|
from lamindb._parents import view_lineage
|
22
22
|
from lamindb._query_set import QuerySet
|
23
|
-
from lamindb.
|
23
|
+
from lamindb._record import get_default_str_field
|
24
24
|
from lamindb.core._settings import settings
|
25
25
|
|
26
26
|
from ._feature_manager import (
|
@@ -96,7 +96,7 @@ def save_feature_set_links(self: Artifact | Collection) -> None:
|
|
96
96
|
|
97
97
|
@doc_args(HasFeatures.describe.__doc__)
|
98
98
|
def describe(self: HasFeatures, print_types: bool = False):
|
99
|
-
"""{}
|
99
|
+
"""{}""" # noqa: D415
|
100
100
|
# prefetch all many-to-many relationships
|
101
101
|
# doesn't work for describing using artifact
|
102
102
|
# self = (
|
@@ -166,7 +166,7 @@ def describe(self: HasFeatures, print_types: bool = False):
|
|
166
166
|
logger.print(msg)
|
167
167
|
|
168
168
|
|
169
|
-
def validate_feature(feature: Feature, records: list[
|
169
|
+
def validate_feature(feature: Feature, records: list[Record]) -> None:
|
170
170
|
"""Validate feature record, adjust feature.dtype based on labels records."""
|
171
171
|
if not isinstance(feature, Feature):
|
172
172
|
raise TypeError("feature has to be of type Feature")
|
@@ -183,7 +183,7 @@ def get_labels(
|
|
183
183
|
mute: bool = False,
|
184
184
|
flat_names: bool = False,
|
185
185
|
) -> QuerySet | dict[str, QuerySet] | list:
|
186
|
-
"""{}
|
186
|
+
"""{}""" # noqa: D415
|
187
187
|
if not isinstance(feature, Feature):
|
188
188
|
raise TypeError("feature has to be of type Feature")
|
189
189
|
if feature.dtype is None or not feature.dtype.startswith("cat["):
|
@@ -210,7 +210,7 @@ def get_labels(
|
|
210
210
|
).all()
|
211
211
|
if flat_names:
|
212
212
|
# returns a flat list of names
|
213
|
-
from lamindb.
|
213
|
+
from lamindb._record import get_default_str_field
|
214
214
|
|
215
215
|
values = []
|
216
216
|
for v in qs_by_registry.values():
|
@@ -224,18 +224,18 @@ def get_labels(
|
|
224
224
|
|
225
225
|
def add_labels(
|
226
226
|
self,
|
227
|
-
records:
|
227
|
+
records: Record | list[Record] | QuerySet | Iterable,
|
228
228
|
feature: Feature | None = None,
|
229
229
|
*,
|
230
230
|
field: StrField | None = None,
|
231
231
|
) -> None:
|
232
|
-
"""{}
|
232
|
+
"""{}""" # noqa: D415
|
233
233
|
if self._state.adding:
|
234
234
|
raise ValueError("Please save the artifact/collection before adding a label!")
|
235
235
|
|
236
236
|
if isinstance(records, (QuerySet, QuerySet.__base__)): # need to have both
|
237
237
|
records = records.list()
|
238
|
-
if isinstance(records, (str,
|
238
|
+
if isinstance(records, (str, Record)):
|
239
239
|
records = [records]
|
240
240
|
if not isinstance(records, List): # avoids warning for pd Series
|
241
241
|
records = list(records)
|
@@ -260,7 +260,7 @@ def add_labels(
|
|
260
260
|
# ask users to pass records
|
261
261
|
if len(records_validated) == 0:
|
262
262
|
raise ValueError(
|
263
|
-
"Please pass a record (a `
|
263
|
+
"Please pass a record (a `Record` object), not a string, e.g., via:"
|
264
264
|
" label"
|
265
265
|
f" = ln.ULabel(name='{records[0]}')" # type: ignore
|
266
266
|
)
|
lamindb/core/_feature_manager.py
CHANGED
@@ -30,14 +30,14 @@ from lnschema_core.models import (
|
|
30
30
|
ParamManagerArtifact,
|
31
31
|
ParamManagerRun,
|
32
32
|
ParamValue,
|
33
|
-
|
33
|
+
Record,
|
34
34
|
Run,
|
35
35
|
ULabel,
|
36
36
|
)
|
37
37
|
|
38
38
|
from lamindb._feature import FEATURE_TYPES, convert_numpy_dtype_to_lamin_feature_type
|
39
39
|
from lamindb._feature_set import DICT_KEYS_TYPE, FeatureSet
|
40
|
-
from lamindb.
|
40
|
+
from lamindb._record import (
|
41
41
|
REGISTRY_UNIQUE_FIELD,
|
42
42
|
get_default_str_field,
|
43
43
|
transfer_fk_to_default_db_bulk,
|
@@ -118,7 +118,9 @@ def get_feature_set_links(host: Artifact | Collection) -> QuerySet:
|
|
118
118
|
|
119
119
|
def get_link_attr(link: LinkORM | type[LinkORM], data: HasFeatures) -> str:
|
120
120
|
link_model_name = link.__class__.__name__
|
121
|
-
if
|
121
|
+
if (
|
122
|
+
link_model_name == "ModelBase" or link_model_name == "RecordMeta"
|
123
|
+
): # we passed the type of the link
|
122
124
|
link_model_name = link.__name__
|
123
125
|
link_attr = link_model_name.replace(data.__class__.__name__, "")
|
124
126
|
if link_attr == "ExperimentalFactor":
|
@@ -162,6 +164,7 @@ def print_features(
|
|
162
164
|
labels_by_feature[link.feature_id].append(
|
163
165
|
getattr(link, link_attr).name
|
164
166
|
)
|
167
|
+
labels_msgs = []
|
165
168
|
for feature_id, labels_list in labels_by_feature.items():
|
166
169
|
feature = Feature.objects.using(self._state.db).get(id=feature_id)
|
167
170
|
print_values = _print_values(labels_list, n=10)
|
@@ -170,8 +173,9 @@ def print_features(
|
|
170
173
|
dictionary[feature.name] = (
|
171
174
|
labels_list if len(labels_list) > 1 else labels_list[0]
|
172
175
|
)
|
173
|
-
|
174
|
-
if
|
176
|
+
labels_msgs.append(f" '{feature.name}'{type_str} = {print_values}")
|
177
|
+
if len(labels_msgs) > 0:
|
178
|
+
labels_msg = "\n".join(sorted(labels_msgs)) + "\n"
|
175
179
|
msg += labels_msg
|
176
180
|
|
177
181
|
# non-categorical feature values
|
@@ -182,6 +186,7 @@ def print_features(
|
|
182
186
|
getattr(self, f"{attr_name}_values")
|
183
187
|
.values(f"{attr_name}__name", f"{attr_name}__dtype")
|
184
188
|
.annotate(values=custom_aggregate("value", self._state.db))
|
189
|
+
.order_by(f"{attr_name}__name")
|
185
190
|
)
|
186
191
|
if len(feature_values) > 0:
|
187
192
|
for fv in feature_values:
|
@@ -232,7 +237,7 @@ def parse_feature_sets_from_anndata(
|
|
232
237
|
var_field: FieldAttr | None = None,
|
233
238
|
obs_field: FieldAttr = Feature.name,
|
234
239
|
mute: bool = False,
|
235
|
-
organism: str |
|
240
|
+
organism: str | Record | None = None,
|
236
241
|
) -> dict:
|
237
242
|
data_parse = adata
|
238
243
|
if not isinstance(adata, AnnData): # is a path
|
@@ -322,7 +327,12 @@ def infer_feature_type_convert_json(
|
|
322
327
|
return FEATURE_TYPES["str"] + "[ULabel]", value
|
323
328
|
else:
|
324
329
|
return "list[str]", value
|
325
|
-
|
330
|
+
elif first_element_type == Record:
|
331
|
+
return (
|
332
|
+
f"cat[{first_element_type.__get_name_with_schema__()}]",
|
333
|
+
value,
|
334
|
+
)
|
335
|
+
elif isinstance(value, Record):
|
326
336
|
return (f"cat[{value.__class__.__get_name_with_schema__()}]", value)
|
327
337
|
if not mute:
|
328
338
|
logger.warning(f"cannot infer feature type of: {value}, returning '?")
|
@@ -417,7 +427,7 @@ def _add_values(
|
|
417
427
|
feature_param_field: FieldAttr,
|
418
428
|
str_as_ulabel: bool = True,
|
419
429
|
) -> None:
|
420
|
-
"""
|
430
|
+
"""Curate artifact with features & values.
|
421
431
|
|
422
432
|
Args:
|
423
433
|
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
@@ -430,7 +440,7 @@ def _add_values(
|
|
430
440
|
if isinstance(keys, DICT_KEYS_TYPE):
|
431
441
|
keys = list(keys) # type: ignore
|
432
442
|
# deal with other cases later
|
433
|
-
assert all(isinstance(key, str) for key in keys)
|
443
|
+
assert all(isinstance(key, str) for key in keys) # noqa: S101
|
434
444
|
registry = feature_param_field.field.model
|
435
445
|
is_param = registry == Param
|
436
446
|
model = Param if is_param else Feature
|
@@ -483,10 +493,11 @@ def _add_values(
|
|
483
493
|
f"Value for feature '{key}' with type {feature.dtype} must be a number"
|
484
494
|
)
|
485
495
|
elif feature.dtype.startswith("cat"):
|
486
|
-
if
|
487
|
-
|
488
|
-
|
489
|
-
|
496
|
+
if inferred_type != "?":
|
497
|
+
if not (inferred_type.startswith("cat") or isinstance(value, Record)):
|
498
|
+
raise TypeError(
|
499
|
+
f"Value for feature '{key}' with type '{feature.dtype}' must be a string or record."
|
500
|
+
)
|
490
501
|
elif not inferred_type == feature.dtype:
|
491
502
|
raise ValidationError(
|
492
503
|
f"Expected dtype for '{key}' is '{feature.dtype}', got '{inferred_type}'"
|
@@ -499,15 +510,21 @@ def _add_values(
|
|
499
510
|
feature_value = value_model(**filter_kwargs)
|
500
511
|
feature_values.append(feature_value)
|
501
512
|
else:
|
502
|
-
if isinstance(value,
|
503
|
-
|
504
|
-
|
505
|
-
|
513
|
+
if isinstance(value, Record) or (
|
514
|
+
isinstance(value, Iterable) and isinstance(next(iter(value)), Record)
|
515
|
+
):
|
516
|
+
if isinstance(value, Record):
|
517
|
+
label_records = [value]
|
518
|
+
else:
|
519
|
+
label_records = value # type: ignore
|
520
|
+
for record in label_records:
|
521
|
+
if record._state.adding:
|
522
|
+
raise ValidationError(
|
523
|
+
f"Please save {record} before annotation."
|
524
|
+
)
|
525
|
+
features_labels[record.__class__.__get_name_with_schema__()].append(
|
526
|
+
(feature, record)
|
506
527
|
)
|
507
|
-
label_record = value
|
508
|
-
features_labels[
|
509
|
-
label_record.__class__.__get_name_with_schema__()
|
510
|
-
].append((feature, label_record))
|
511
528
|
else:
|
512
529
|
if isinstance(value, str):
|
513
530
|
values = [value] # type: ignore
|
@@ -589,7 +606,7 @@ def _add_values(
|
|
589
606
|
links = [
|
590
607
|
LinkORM(
|
591
608
|
**{
|
592
|
-
f"{self._host.__get_name_with_schema__().lower()}_id": self._host.id,
|
609
|
+
f"{self._host.__class__.__get_name_with_schema__().lower()}_id": self._host.id,
|
593
610
|
valuefield_id: feature_value.id,
|
594
611
|
}
|
595
612
|
)
|
@@ -606,7 +623,7 @@ def add_values_features(
|
|
606
623
|
feature_field: FieldAttr = Feature.name,
|
607
624
|
str_as_ulabel: bool = True,
|
608
625
|
) -> None:
|
609
|
-
"""
|
626
|
+
"""Curate artifact with features & values.
|
610
627
|
|
611
628
|
Args:
|
612
629
|
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
@@ -621,7 +638,7 @@ def add_values_params(
|
|
621
638
|
self,
|
622
639
|
values: dict[str, str | int | float | bool],
|
623
640
|
) -> None:
|
624
|
-
"""
|
641
|
+
"""Curate artifact with features & values.
|
625
642
|
|
626
643
|
Args:
|
627
644
|
values: A dictionary of keys (features) & values (labels, numbers, booleans).
|
@@ -630,7 +647,7 @@ def add_values_params(
|
|
630
647
|
|
631
648
|
|
632
649
|
def add_feature_set(self, feature_set: FeatureSet, slot: str) -> None:
|
633
|
-
"""
|
650
|
+
"""Curate artifact with a feature set.
|
634
651
|
|
635
652
|
Args:
|
636
653
|
feature_set: `FeatureSet` A feature set record.
|
@@ -666,10 +683,10 @@ def _add_set_from_df(
|
|
666
683
|
):
|
667
684
|
"""Add feature set corresponding to column names of DataFrame."""
|
668
685
|
if isinstance(self._host, Artifact):
|
669
|
-
assert self._host.accessor == "DataFrame"
|
686
|
+
assert self._host.accessor == "DataFrame" # noqa: S101
|
670
687
|
else:
|
671
688
|
# Collection
|
672
|
-
assert self._host.artifact.accessor == "DataFrame"
|
689
|
+
assert self._host.artifact.accessor == "DataFrame" # noqa: S101
|
673
690
|
|
674
691
|
# parse and register features
|
675
692
|
registry = field.field.model
|
@@ -693,11 +710,11 @@ def _add_set_from_anndata(
|
|
693
710
|
var_field: FieldAttr,
|
694
711
|
obs_field: FieldAttr | None = Feature.name,
|
695
712
|
mute: bool = False,
|
696
|
-
organism: str |
|
713
|
+
organism: str | Record | None = None,
|
697
714
|
):
|
698
715
|
"""Add features from AnnData."""
|
699
716
|
if isinstance(self._host, Artifact):
|
700
|
-
assert self._host.accessor == "AnnData"
|
717
|
+
assert self._host.accessor == "AnnData" # noqa: S101
|
701
718
|
else:
|
702
719
|
raise NotImplementedError()
|
703
720
|
|
@@ -721,13 +738,13 @@ def _add_set_from_mudata(
|
|
721
738
|
var_fields: dict[str, FieldAttr],
|
722
739
|
obs_fields: dict[str, FieldAttr] = None,
|
723
740
|
mute: bool = False,
|
724
|
-
organism: str |
|
741
|
+
organism: str | Record | None = None,
|
725
742
|
):
|
726
743
|
"""Add features from MuData."""
|
727
744
|
if obs_fields is None:
|
728
745
|
obs_fields = {}
|
729
746
|
if isinstance(self._host, Artifact):
|
730
|
-
assert self._host.accessor == "MuData"
|
747
|
+
assert self._host.accessor == "MuData" # noqa: S101
|
731
748
|
else:
|
732
749
|
raise NotImplementedError()
|
733
750
|
|
lamindb/core/_label_manager.py
CHANGED
@@ -8,7 +8,7 @@ from lamin_utils import colors, logger
|
|
8
8
|
from lnschema_core.models import Feature
|
9
9
|
|
10
10
|
from lamindb._from_values import _print_values
|
11
|
-
from lamindb.
|
11
|
+
from lamindb._record import (
|
12
12
|
REGISTRY_UNIQUE_FIELD,
|
13
13
|
get_default_str_field,
|
14
14
|
transfer_fk_to_default_db_bulk,
|
@@ -20,7 +20,7 @@ from ._settings import settings
|
|
20
20
|
from .schema import dict_related_model_to_related_name
|
21
21
|
|
22
22
|
if TYPE_CHECKING:
|
23
|
-
from lnschema_core.models import Artifact, Collection, HasFeatures,
|
23
|
+
from lnschema_core.models import Artifact, Collection, HasFeatures, Record
|
24
24
|
|
25
25
|
from lamindb._query_set import QuerySet
|
26
26
|
|
@@ -66,7 +66,7 @@ def print_labels(self: HasFeatures, field: str = "name", print_types: bool = Fal
|
|
66
66
|
print_values = _print_values(labels_list, n=10)
|
67
67
|
type_str = f": {related_model}" if print_types else ""
|
68
68
|
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
|
69
|
-
except Exception:
|
69
|
+
except Exception: # noqa: S112
|
70
70
|
continue
|
71
71
|
msg = ""
|
72
72
|
if labels_msg:
|
@@ -102,7 +102,7 @@ def validate_labels(labels: QuerySet | list | dict, parents: bool = True):
|
|
102
102
|
records = registry.from_values(label_uids, field=field)
|
103
103
|
if len(records) > 0:
|
104
104
|
save(records, parents=parents)
|
105
|
-
except Exception:
|
105
|
+
except Exception: # noqa S110
|
106
106
|
pass
|
107
107
|
field = "uid"
|
108
108
|
label_uids = np.array(
|
@@ -146,7 +146,7 @@ class LabelManager:
|
|
146
146
|
|
147
147
|
def add(
|
148
148
|
self,
|
149
|
-
records:
|
149
|
+
records: Record | list[Record] | QuerySet,
|
150
150
|
feature: Feature | None = None,
|
151
151
|
) -> None:
|
152
152
|
"""Add one or several labels and associate them with a feature.
|
@@ -107,7 +107,10 @@ class MappedCollection:
|
|
107
107
|
parallel: bool = False,
|
108
108
|
dtype: str | None = None,
|
109
109
|
):
|
110
|
-
|
110
|
+
if join not in {None, "inner", "outer"}: # pragma: nocover
|
111
|
+
raise ValueError(
|
112
|
+
f"join must be one of None, 'inner, or 'outer' but was {type(join)}"
|
113
|
+
)
|
111
114
|
|
112
115
|
if layers_keys is None:
|
113
116
|
self.layers_keys = ["X"]
|
lamindb/core/_run_context.py
CHANGED
@@ -43,7 +43,7 @@ def get_uid_ext(version: str) -> str:
|
|
43
43
|
# merely zero-padding the nbproject version such that the base62 encoding is
|
44
44
|
# at least 4 characters long doesn't yields sufficiently diverse hashes and
|
45
45
|
# leads to collisions; it'd be nice because the uid_ext would be ordered
|
46
|
-
return encodebytes(hashlib.md5(version.encode()).digest())[:4]
|
46
|
+
return encodebytes(hashlib.md5(version.encode()).digest())[:4] # noqa: S324
|
47
47
|
|
48
48
|
|
49
49
|
def update_stem_uid_or_version(
|
@@ -113,7 +113,7 @@ def get_notebook_name_colab() -> str:
|
|
113
113
|
|
114
114
|
ip = gethostbyname(gethostname()) # 172.28.0.12
|
115
115
|
try:
|
116
|
-
name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"]
|
116
|
+
name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"] # noqa: S113
|
117
117
|
except Exception:
|
118
118
|
logger.warning(
|
119
119
|
"could not get notebook name from Google Colab, using: notebook.ipynb"
|
lamindb/core/_settings.py
CHANGED
@@ -108,7 +108,8 @@ class Settings:
|
|
108
108
|
For example: `ln.sync_git_repo = https://github.com/laminlabs/redun-lamin`
|
109
109
|
"""
|
110
110
|
self._sync_git_repo = sanitize_git_repo_url(value)
|
111
|
-
|
111
|
+
if not self._sync_git_repo.startswith("https://"): # pragma: nocover
|
112
|
+
raise ValueError("git repository URL must start with 'https://'.")
|
112
113
|
|
113
114
|
@property
|
114
115
|
def storage(self) -> StorageSettings:
|
lamindb/core/_sync_git.py
CHANGED
@@ -24,8 +24,7 @@ def get_git_repo_from_remote() -> Path:
|
|
24
24
|
f"running outside of synched git repo, cloning {repo_url} into {repo_dir}"
|
25
25
|
)
|
26
26
|
result = subprocess.run(
|
27
|
-
|
28
|
-
shell=True,
|
27
|
+
["git", "clone", "--depth", "10", f"{repo_url}.git"],
|
29
28
|
capture_output=True,
|
30
29
|
cwd=setup_settings.storage.cache_dir,
|
31
30
|
)
|
@@ -36,8 +35,7 @@ def get_git_repo_from_remote() -> Path:
|
|
36
35
|
|
37
36
|
def check_local_git_repo() -> bool:
|
38
37
|
result = subprocess.run(
|
39
|
-
"git config --get remote.origin.url",
|
40
|
-
shell=True,
|
38
|
+
["git", "config", "--get remote.origin.url"],
|
41
39
|
capture_output=True,
|
42
40
|
)
|
43
41
|
result_str = result.stdout.decode().strip()
|
@@ -55,10 +53,9 @@ def check_local_git_repo() -> bool:
|
|
55
53
|
|
56
54
|
|
57
55
|
def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
|
58
|
-
command =
|
56
|
+
command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
|
59
57
|
result = subprocess.run(
|
60
58
|
command,
|
61
|
-
shell=True,
|
62
59
|
capture_output=True,
|
63
60
|
cwd=repo_dir,
|
64
61
|
)
|
@@ -68,7 +65,7 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
|
|
68
65
|
if commit_hash == "" or result.returncode == 1:
|
69
66
|
return None
|
70
67
|
else:
|
71
|
-
assert (
|
68
|
+
assert ( # noqa: S101
|
72
69
|
len(commit_hash) == 40
|
73
70
|
), f"commit hash |{commit_hash}| is not 40 characters long"
|
74
71
|
return commit_hash
|
@@ -82,21 +79,34 @@ def get_filepath_within_git_repo(
|
|
82
79
|
# from anywhere in the repo, hence, let's get the root
|
83
80
|
repo_root = (
|
84
81
|
subprocess.run(
|
85
|
-
"git rev-parse --show-toplevel",
|
86
|
-
shell=True,
|
82
|
+
["git", "rev-parse", "--show-toplevel"],
|
87
83
|
capture_output=True,
|
88
84
|
cwd=repo_dir,
|
89
85
|
)
|
90
86
|
.stdout.decode()
|
91
87
|
.strip()
|
92
88
|
)
|
93
|
-
|
89
|
+
# Run the git commands separately to circumvent spawning a shell
|
90
|
+
git_command = ["git", "ls-tree", "-r", commit_hash]
|
91
|
+
git_process = subprocess.Popen(
|
92
|
+
git_command,
|
93
|
+
stdout=subprocess.PIPE,
|
94
|
+
cwd=repo_root,
|
95
|
+
)
|
96
|
+
|
97
|
+
grep_command = ["grep", "-E", blob_hash]
|
94
98
|
result = subprocess.run(
|
95
|
-
|
96
|
-
|
99
|
+
grep_command,
|
100
|
+
stdin=git_process.stdout,
|
97
101
|
capture_output=True,
|
98
102
|
cwd=repo_root,
|
99
103
|
)
|
104
|
+
|
105
|
+
# Close the stdout to allow git_process to receive a SIGPIPE if grep_command exits
|
106
|
+
git_process.stdout.close()
|
107
|
+
git_process.wait()
|
108
|
+
|
109
|
+
command = " ".join(git_command) + " | " + " ".join(grep_command)
|
100
110
|
if result.returncode != 0 and result.stderr.decode() != "":
|
101
111
|
raise RuntimeError(f"{command}\n{result.stderr.decode()}")
|
102
112
|
if len(result.stdout.decode()) == 0:
|
@@ -15,7 +15,11 @@ def track_environment(run: Run) -> None:
|
|
15
15
|
# create a requirements.txt
|
16
16
|
# we don't create a conda environment.yml mostly for its slowness
|
17
17
|
try:
|
18
|
-
|
18
|
+
with open(filepath, "w") as f:
|
19
|
+
result = subprocess.run(
|
20
|
+
["pip", "freeze"],
|
21
|
+
stdout=f,
|
22
|
+
)
|
19
23
|
except OSError as e:
|
20
24
|
result = None
|
21
25
|
logger.warning(f"could not run pip freeze with error {e}")
|
lamindb/core/fields.py
CHANGED
lamindb/core/schema.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
3
|
from django.db.models import ManyToManyField
|
4
|
-
from lnschema_core.models import Feature, FeatureSet, LinkORM,
|
4
|
+
from lnschema_core.models import Feature, FeatureSet, LinkORM, Record
|
5
5
|
|
6
6
|
|
7
|
-
def dict_schema_name_to_model_name(orm: type[
|
7
|
+
def dict_schema_name_to_model_name(orm: type[Record]) -> dict[str, Record]:
|
8
8
|
d: dict = {
|
9
9
|
i.related_model.__get_name_with_schema__(): i.related_model
|
10
10
|
for i in orm._meta.related_objects
|
@@ -21,9 +21,9 @@ def dict_schema_name_to_model_name(orm: type[Registry]) -> dict[str, Registry]:
|
|
21
21
|
|
22
22
|
|
23
23
|
def dict_related_model_to_related_name(
|
24
|
-
orm: type[
|
24
|
+
orm: type[Record], links: bool = False
|
25
25
|
) -> dict[str, str]:
|
26
|
-
def include(model:
|
26
|
+
def include(model: Record):
|
27
27
|
return not links != issubclass(model, LinkORM)
|
28
28
|
|
29
29
|
related_objects = orm._meta.related_objects + orm._meta.many_to_many
|
@@ -39,7 +39,7 @@ def dict_related_model_to_related_name(
|
|
39
39
|
return d
|
40
40
|
|
41
41
|
|
42
|
-
def get_related_name(features_type: type[
|
42
|
+
def get_related_name(features_type: type[Record]) -> str:
|
43
43
|
candidates = [
|
44
44
|
field.related_name
|
45
45
|
for field in FeatureSet._meta.related_objects
|
@@ -49,7 +49,7 @@ def get_related_name(features_type: type[Registry]) -> str:
|
|
49
49
|
raise ValueError(
|
50
50
|
f"Can't create feature sets from {features_type.__name__} because it's not"
|
51
51
|
" related to it!\nYou need to create a link model between FeatureSet and"
|
52
|
-
" your
|
52
|
+
" your Record in your custom schema.\nTo do so, add a"
|
53
53
|
" line:\nfeature_sets = models.ManyToMany(FeatureSet,"
|
54
54
|
" related_name='mythings')\n"
|
55
55
|
)
|