lamindb 0.48a2__py3-none-any.whl → 0.48.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_file.py CHANGED
@@ -8,7 +8,7 @@ import pandas as pd
8
8
  from anndata import AnnData
9
9
  from appdirs import AppDirs
10
10
  from django.db.models.query_utils import DeferredAttribute as Field
11
- from lamin_logger import colors, logger
11
+ from lamin_utils import colors, logger
12
12
  from lamindb_setup import settings as setup_settings
13
13
  from lamindb_setup._init_instance import register_storage
14
14
  from lamindb_setup.dev import StorageSettings
@@ -17,6 +17,7 @@ from lnschema_core import Feature, FeatureSet, File, Run, ids
17
17
  from lnschema_core.types import AnnDataLike, DataLike, PathLike
18
18
 
19
19
  from lamindb._context import context
20
+ from lamindb.dev import FeatureManager
20
21
  from lamindb.dev._settings import settings
21
22
  from lamindb.dev.hashing import b16_to_b64, hash_file
22
23
  from lamindb.dev.storage import (
@@ -27,24 +28,15 @@ from lamindb.dev.storage import (
27
28
  size_adata,
28
29
  write_to_file,
29
30
  )
31
+ from lamindb.dev.storage._backed_access import AnnDataAccessor, BackedAccessor
30
32
  from lamindb.dev.storage.file import auto_storage_key_from_file, filepath_from_file
31
33
  from lamindb.dev.utils import attach_func_to_class_method
32
34
 
33
35
  from . import _TESTING
36
+ from ._feature import convert_numpy_dtype_to_lamin_feature_type
34
37
  from .dev._view_parents import view_lineage
35
38
  from .dev.storage.file import AUTO_KEY_PREFIX
36
39
 
37
- try:
38
- from lamindb.dev.storage._backed_access import AnnDataAccessor, BackedAccessor
39
- except ImportError:
40
-
41
- class AnnDataAccessor: # type: ignore
42
- pass
43
-
44
- class BackedAccessor: # type: ignore
45
- pass
46
-
47
-
48
40
  DIRS = AppDirs("lamindb", "laminlabs")
49
41
 
50
42
 
@@ -362,6 +354,19 @@ def data_is_anndata(data: DataLike):
362
354
  return False
363
355
 
364
356
 
357
+ def data_is_mudata(data: DataLike):
358
+ try:
359
+ from mudata import MuData
360
+ except ModuleNotFoundError:
361
+ return False
362
+
363
+ if isinstance(data, MuData):
364
+ return True
365
+ if isinstance(data, (str, Path, UPath)):
366
+ return Path(data).suffix in {".h5mu"}
367
+ return False
368
+
369
+
365
370
  def __init__(file: File, *args, **kwargs):
366
371
  # Below checks for the Django-internal call in from_db()
367
372
  # it'd be better if we could avoid this, but not being able to create a File
@@ -383,9 +388,6 @@ def __init__(file: File, *args, **kwargs):
383
388
  description: Optional[str] = (
384
389
  kwargs.pop("description") if "description" in kwargs else None
385
390
  )
386
- feature_sets: Optional[List[FeatureSet]] = (
387
- kwargs.pop("feature_sets") if "feature_sets" in kwargs else None
388
- )
389
391
  name: Optional[str] = kwargs.pop("name") if "name" in kwargs else None
390
392
  format = kwargs.pop("format") if "format" in kwargs else None
391
393
  log_hint = kwargs.pop("log_hint") if "log_hint" in kwargs else True
@@ -394,9 +396,7 @@ def __init__(file: File, *args, **kwargs):
394
396
  )
395
397
 
396
398
  if not len(kwargs) == 0:
397
- raise ValueError(
398
- "Only data, key, run, description & feature_sets can be passed."
399
- )
399
+ raise ValueError("Only data, key, run, description can be passed.")
400
400
 
401
401
  if name is not None and description is not None:
402
402
  raise ValueError("Only pass description, do not pass a name")
@@ -404,21 +404,8 @@ def __init__(file: File, *args, **kwargs):
404
404
  logger.warning("Argument `name` is deprecated, please use `description`")
405
405
  description = name
406
406
 
407
- if feature_sets is None:
408
- feature_sets = []
409
- if isinstance(data, pd.DataFrame) and log_hint:
410
- logger.hint(
411
- "This is a dataframe, consider using File.from_df() to link column"
412
- " names as features!"
413
- )
414
- elif data_is_anndata(data) and log_hint:
415
- logger.hint(
416
- "This is AnnDataLike, consider using File.from_anndata() to link var"
417
- " and obs.columns as features!"
418
- )
419
-
420
407
  provisional_id = ids.base62_20()
421
- kwargs, privates = get_file_kwargs_from_data(
408
+ kwargs_or_file, privates = get_file_kwargs_from_data(
422
409
  data=data,
423
410
  key=key,
424
411
  run=run,
@@ -426,17 +413,38 @@ def __init__(file: File, *args, **kwargs):
426
413
  provisional_id=provisional_id,
427
414
  skip_check_exists=skip_check_exists,
428
415
  )
416
+
429
417
  # an object with the same hash already exists
430
- if isinstance(kwargs, File):
418
+ if isinstance(kwargs_or_file, File):
431
419
  # this is the way Django instantiates from the DB internally
432
420
  # https://github.com/django/django/blob/549d6ffeb6d626b023acc40c3bb2093b4b25b3d6/django/db/models/base.py#LL488C1-L491C51
433
421
  new_args = [
434
- getattr(kwargs, field.attname) for field in file._meta.concrete_fields
422
+ getattr(kwargs_or_file, field.attname)
423
+ for field in file._meta.concrete_fields
435
424
  ]
436
425
  super(File, file).__init__(*new_args)
437
426
  file._state.adding = False
438
427
  file._state.db = "default"
439
428
  return None
429
+ else:
430
+ kwargs = kwargs_or_file
431
+
432
+ if isinstance(data, pd.DataFrame):
433
+ if log_hint:
434
+ logger.hint(
435
+ "This is a dataframe, consider using File.from_df() to link column"
436
+ " names as features!"
437
+ )
438
+ kwargs["accessor"] = "DataFrame"
439
+ elif data_is_anndata(data):
440
+ if log_hint:
441
+ logger.hint(
442
+ "This is AnnDataLike, consider using File.from_anndata() to link"
443
+ " var_names and obs.columns as features!"
444
+ )
445
+ kwargs["accessor"] = "AnnData"
446
+ elif data_is_mudata(data):
447
+ kwargs["accessor"] = "MuData"
440
448
 
441
449
  kwargs["id"] = provisional_id
442
450
  kwargs["description"] = description
@@ -465,9 +473,6 @@ def __init__(file: File, *args, **kwargs):
465
473
  file._cloud_filepath = privates["cloud_filepath"]
466
474
  file._memory_rep = privates["memory_rep"]
467
475
  file._to_store = not privates["check_path_in_storage"]
468
- file._feature_sets = (
469
- feature_sets if isinstance(feature_sets, list) else [feature_sets]
470
- )
471
476
 
472
477
  super(File, file).__init__(**kwargs)
473
478
 
@@ -484,9 +489,8 @@ def from_df(
484
489
  ) -> "File":
485
490
  """{}"""
486
491
  file = File(data=df, key=key, run=run, description=description, log_hint=False)
487
- features = Feature.from_df(df)
488
- feature_set = FeatureSet(features)
489
- file._feature_sets = [feature_set]
492
+ feature_set = FeatureSet.from_df(df)
493
+ file._feature_sets = {"columns": feature_set}
490
494
  return file
491
495
 
492
496
 
@@ -512,9 +516,25 @@ def from_anndata(
512
516
  data_parse = backed_access(filepath)
513
517
  else:
514
518
  data_parse = ad.read(filepath, backed="r")
515
- feature_sets = []
516
- feature_sets.append(FeatureSet.from_values(data_parse.var.index, var_ref))
517
- feature_sets.append(FeatureSet.from_values(data_parse.obs.columns))
519
+ type = "float"
520
+ else:
521
+ type = convert_numpy_dtype_to_lamin_feature_type(adata.X.dtype)
522
+ feature_sets = {}
523
+ logger.info("Parsing feature names of X, stored in slot .var")
524
+ logger.indent = " "
525
+ feature_set_x = FeatureSet.from_values(
526
+ data_parse.var.index,
527
+ var_ref,
528
+ type=type,
529
+ )
530
+ feature_sets["var"] = feature_set_x
531
+ logger.indent = ""
532
+ if len(data_parse.obs.columns) > 0:
533
+ logger.info("Parsing feature names of slot .obs")
534
+ logger.indent = " "
535
+ feature_set_obs = FeatureSet.from_df(data_parse.obs)
536
+ feature_sets["obs"] = feature_set_obs
537
+ logger.indent = ""
518
538
  file._feature_sets = feature_sets
519
539
  return file
520
540
 
@@ -526,19 +546,13 @@ def from_dir(
526
546
  path: PathLike,
527
547
  *,
528
548
  run: Optional[Run] = None,
549
+ storage_root: Optional[PathLike] = None,
529
550
  ) -> List["File"]:
530
551
  """{}"""
531
552
  folderpath = UPath(path)
532
- check_path_in_storage = check_path_in_default_storage(folderpath)
533
-
534
- if check_path_in_storage:
535
- folder_key = get_relative_path_to_root(path=folderpath).as_posix()
536
- else:
537
- raise RuntimeError(
538
- "Currently, only directories in default storage can be registered!\n"
539
- "You can either move your folder into the current default storage"
540
- "or add a new default storage through `ln.settings.storage`"
541
- )
553
+ folder_key = get_relative_path_to_root(
554
+ path=folderpath, root=storage_root
555
+ ).as_posix()
542
556
  # always sanitize by stripping a trailing slash
543
557
  folder_key = folder_key.rstrip("/")
544
558
  logger.hint(f"using storage prefix = {folder_key}/")
@@ -617,17 +631,18 @@ def backed(
617
631
  " one of the following suffixes for the object name:"
618
632
  f" {', '.join(suffixes)}."
619
633
  )
620
- _track_run_input(self, is_run_input)
621
- # consider the case where an object is already locally cached
622
- local_path = setup_settings.instance.storage.cloud_to_local_no_update(
623
- filepath_from_file(self)
624
- )
625
- if local_path.exists() and self.suffix == ".h5ad":
626
- return ad.read_h5ad(local_path, backed="r")
627
634
 
628
635
  from lamindb.dev.storage._backed_access import backed_access
629
636
 
630
- return backed_access(self)
637
+ _track_run_input(self, is_run_input)
638
+
639
+ filepath = filepath_from_file(self)
640
+ # consider the case where an object is already locally cached
641
+ localpath = setup_settings.instance.storage.cloud_to_local_no_update(filepath)
642
+ if localpath.exists():
643
+ return backed_access(localpath)
644
+ else:
645
+ return backed_access(filepath)
631
646
 
632
647
 
633
648
  def _track_run_input(file: File, is_run_input: Optional[bool] = None):
@@ -638,9 +653,14 @@ def _track_run_input(file: File, is_run_input: Optional[bool] = None):
638
653
  # avoid cycles (a file is both input and output)
639
654
  if file.run != context.run:
640
655
  if settings.track_run_inputs:
656
+ transform_note = ""
657
+ if file.transform is not None:
658
+ transform_note = (
659
+ f", adding parent transform {file.transform.id}"
660
+ )
641
661
  logger.info(
642
- f"Adding file {file.id} as input for run {context.run.id},"
643
- f" adding parent transform {file.transform.id}"
662
+ f"Adding file {file.id} as input for run"
663
+ f" {context.run.id}{transform_note}"
644
664
  )
645
665
  track_run_input = True
646
666
  else:
@@ -659,7 +679,7 @@ def _track_run_input(file: File, is_run_input: Optional[bool] = None):
659
679
  if context.run is None:
660
680
  raise ValueError(
661
681
  "No global run context set. Call ln.context.track() or link input to a"
662
- " run object via `run.inputs.append(file)`"
682
+ " run object via `run.input_files.append(file)`"
663
683
  )
664
684
  # avoid adding the same run twice
665
685
  # avoid cycles (a file is both input and output)
@@ -671,6 +691,8 @@ def _track_run_input(file: File, is_run_input: Optional[bool] = None):
671
691
 
672
692
  def load(self, is_run_input: Optional[bool] = None, stream: bool = False) -> DataLike:
673
693
  _track_run_input(self, is_run_input)
694
+ if hasattr(self, "_memory_rep") and self._memory_rep is not None:
695
+ return self._memory_rep
674
696
  return load_to_memory(filepath_from_file(self), stream=stream)
675
697
 
676
698
 
@@ -718,14 +740,21 @@ def _save_skip_storage(file, *args, **kwargs) -> None:
718
740
  if file.run is not None:
719
741
  file.run.save()
720
742
  if hasattr(file, "_feature_sets"):
721
- for feature_set in file._feature_sets:
743
+ for feature_set in file._feature_sets.values():
722
744
  feature_set.save()
723
- if hasattr(file, "_feature_values"):
724
- for feature_value in file._feature_values:
725
- feature_value.save()
726
745
  super(File, file).save(*args, **kwargs)
727
746
  if hasattr(file, "_feature_sets"):
728
- file.feature_sets.set(file._feature_sets)
747
+ links = []
748
+ for slot, feature_set in file._feature_sets.items():
749
+ links.append(
750
+ File.feature_sets.through(
751
+ file_id=file.id, feature_set_id=feature_set.id, slot=slot
752
+ )
753
+ )
754
+
755
+ from lamindb._save import bulk_create
756
+
757
+ bulk_create(links)
729
758
 
730
759
 
731
760
  def path(self) -> Union[Path, UPath]:
@@ -801,13 +830,13 @@ def inherit_relations(self, file: File, fields: Optional[List[str]] = None):
801
830
  >>> file1.save()
802
831
  >>> file2 = ln.File(pd.DataFrame(index=[2,3]))
803
832
  >>> file2.save()
804
- >>> ln.save(ln.Tag.from_values(["Tag1", "Tag2", "Tag3"], field="name"))
805
- >>> tags = ln.Tag.select(name__icontains = "tag").all()
806
- >>> file1.tags.set(tags)
807
- >>> file2.inherit_relations(file1, ["tags"])
808
- 💬 Inheriting 1 field: ['tags']
809
- >>> file2.tags.list("name")
810
- ['Tag1', 'Tag2', 'Tag3']
833
+ >>> ln.save(ln.Label.from_values(["Label1", "Label2", "Label3"], field="name"))
834
+ >>> labels = ln.Label.select(name__icontains = "label").all()
835
+ >>> file1.labels.set(labels)
836
+ >>> file2.inherit_relations(file1, ["labels"])
837
+ 💬 Inheriting 1 field: ['labels']
838
+ >>> file2.labels.list("name")
839
+ ['Label1', 'Label2', 'Label3']
811
840
  """
812
841
  if fields is None:
813
842
  # fields in the model definition
@@ -822,6 +851,9 @@ def inherit_relations(self, file: File, fields: Optional[List[str]] = None):
822
851
  else:
823
852
  raise KeyError(f"No many-to-many relationship is found with '{field}'")
824
853
 
854
+ if None in related_names:
855
+ related_names.remove(None)
856
+
825
857
  inherit_names = [
826
858
  related_name
827
859
  for related_name in related_names
@@ -836,6 +868,15 @@ def inherit_relations(self, file: File, fields: Optional[List[str]] = None):
836
868
  )
837
869
 
838
870
 
871
+ @property # type: ignore
872
+ @doc_args(File.features.__doc__)
873
+ def features(self) -> "FeatureManager":
874
+ """{}"""
875
+ from lamindb._feature_manager import FeatureManager
876
+
877
+ return FeatureManager(self)
878
+
879
+
839
880
  METHOD_NAMES = [
840
881
  "__init__",
841
882
  "from_anndata",
@@ -866,5 +907,8 @@ for name in METHOD_NAMES:
866
907
  # privates currently dealt with separately
867
908
  File._delete_skip_storage = _delete_skip_storage
868
909
  File._save_skip_storage = _save_skip_storage
910
+ # TODO: move these to METHOD_NAMES
869
911
  setattr(File, "view_lineage", view_lineage)
870
912
  setattr(File, "inherit_relations", inherit_relations)
913
+ # property signature is not tested:
914
+ setattr(File, "features", features)
lamindb/_from_values.py CHANGED
@@ -1,10 +1,11 @@
1
- from typing import Any, Dict, Iterable, List, Tuple, Union
1
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
2
2
 
3
3
  import pandas as pd
4
4
  from django.core.exceptions import FieldDoesNotExist
5
+ from django.db.models import Case, When
5
6
  from django.db.models.query_utils import DeferredAttribute as Field
6
- from lamin_logger import colors, logger
7
- from lnschema_core.models import ORM
7
+ from lamin_utils import colors, logger
8
+ from lnschema_core.models import ORM, Feature
8
9
  from lnschema_core.types import ListLike
9
10
 
10
11
  from .dev._settings import settings
@@ -17,15 +18,26 @@ def get_or_create_records(
17
18
  *,
18
19
  from_bionty: bool = False,
19
20
  **kwargs,
20
- ) -> List:
21
+ ) -> List[ORM]:
21
22
  """Get or create records from iterables."""
22
23
  upon_create_search_names = settings.upon_create_search_names
23
24
  settings.upon_create_search_names = False
25
+ feature: Feature = None
26
+ if "feature" in kwargs:
27
+ feature = kwargs.pop("feature")
28
+ kwargs["feature_id"] = feature.id
29
+ types: Optional[Dict] = None
30
+ if "types" in kwargs:
31
+ types = kwargs.pop("types")
24
32
  try:
25
33
  field_name = field.field.name
26
- model = field.field.model
34
+ ORM = field.field.model
27
35
  iterable_idx = index_iterable(iterable)
28
36
 
37
+ if isinstance(ORM, Feature):
38
+ if types is None:
39
+ raise ValueError("Please pass types as {} or use FeatureSet.from_df()")
40
+
29
41
  # returns existing records & non-existing values
30
42
  records, nonexist_values = get_existing_records(
31
43
  iterable_idx=iterable_idx, field=field, kwargs=kwargs
@@ -43,15 +55,42 @@ def get_or_create_records(
43
55
  # unmapped new_ids will only create records with field and kwargs
44
56
  if len(unmapped_values) > 0:
45
57
  for value in unmapped_values:
46
- records.append(model(**{field_name: value}, **kwargs))
58
+ params = {field_name: value}
59
+ if types is not None:
60
+ params["type"] = str(types[value])
61
+ records.append(ORM(**params, **kwargs))
47
62
  s = "" if len(unmapped_values) == 1 else "s"
48
- print_unmapped_values = ", ".join(unmapped_values[:7])
49
- if len(unmapped_values) > 7:
63
+ print_unmapped_values = ", ".join(unmapped_values[:10])
64
+ if len(unmapped_values) > 10:
50
65
  print_unmapped_values += ", ..."
66
+ additional_info = " "
67
+ if feature is not None:
68
+ additional_info = f" Feature {feature.name} and "
51
69
  logger.warning(
52
- f"Created {colors.yellow(f'{len(unmapped_values)} {model.__name__} record{s}')} setting" # noqa
53
- f" field {colors.yellow(f'{field_name}')} to: {print_unmapped_values}" # noqa
70
+ f"Created {colors.yellow(f'{len(unmapped_values)} {ORM.__name__} record{s}')} for{additional_info}" # noqa
71
+ f"{colors.yellow(f'{field_name}{s}')}: {print_unmapped_values}" # noqa
54
72
  )
73
+ if ORM.__module__.startswith("lnschema_bionty."):
74
+ if isinstance(iterable, pd.Series):
75
+ feature = iterable.name
76
+ else:
77
+ logger.warning(
78
+ "Did not receive values as pd.Series, inferring feature from"
79
+ f" reference ORM: {ORM.__name__}"
80
+ )
81
+ feature = ORM.__name__.lower()
82
+ if isinstance(feature, str):
83
+ feature_name = feature
84
+ feature = Feature.select(name=feature).one_or_none()
85
+ elif feature is not None:
86
+ feature_name = feature.name
87
+ if feature is not None:
88
+ for record in records:
89
+ record._feature = feature
90
+ if feature_name is not None:
91
+ for record in records:
92
+ record._feature = feature_name
93
+ logger.info(f"Mapping records to feature '{feature_name}'")
55
94
  return records
56
95
  finally:
57
96
  settings.upon_create_search_names = upon_create_search_names
@@ -80,10 +119,14 @@ def get_existing_records(iterable_idx: pd.Index, field: Field, kwargs: Dict = {}
80
119
  syn_msg = ""
81
120
  if len(syn_mapper) > 0:
82
121
  s = "" if len(syn_mapper) == 1 else "s"
122
+ names = list(syn_mapper.keys())
123
+ print_values = ", ".join(names[:10])
124
+ if len(names) > 10:
125
+ print_values += ", ..."
83
126
  syn_msg = (
84
127
  "Loaded"
85
128
  f" {colors.green(f'{len(syn_mapper)} {model.__name__} record{s}')} that" # noqa
86
- f" matched {colors.green('synonyms')}"
129
+ f" matched {colors.green('synonyms')}: {print_values}"
87
130
  )
88
131
  iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
89
132
 
@@ -95,22 +138,37 @@ def get_existing_records(iterable_idx: pd.Index, field: Field, kwargs: Dict = {}
95
138
 
96
139
  from ._select import select
97
140
 
98
- stmt = select(model, **condition)
141
+ query_set = select(model, **condition)
142
+
143
+ # new we have to sort the list of queried records
144
+ preserved = Case(
145
+ *[
146
+ When(**{field_name: value}, then=pos)
147
+ for pos, value in enumerate(iterable_idx)
148
+ ]
149
+ )
150
+ records = query_set.order_by(preserved).list()
99
151
 
100
- records = stmt.list() # existing records
101
152
  n_name = len(records) - len(syn_mapper)
153
+ names = [getattr(record, field_name) for record in records]
154
+ names = [name for name in names if name not in syn_mapper.values()]
102
155
  if n_name > 0:
103
156
  s = "" if n_name == 1 else "s"
157
+ print_values = ", ".join(names[:10])
158
+ if len(names) > 10:
159
+ print_values += ", ..."
104
160
  logger.info(
105
161
  "Loaded"
106
162
  f" {colors.green(f'{n_name} {model.__name__} record{s}')} that"
107
- f" matched field {colors.green(f'{field_name}')}"
163
+ f" matched {colors.green(f'{field_name}')}: {print_values}"
108
164
  )
109
165
  # make sure that synonyms logging appears after the field logging
110
166
  if len(syn_msg) > 0:
111
167
  logger.info(syn_msg)
112
168
 
113
- existing_values = iterable_idx.intersection(stmt.values_list(field_name, flat=True))
169
+ existing_values = iterable_idx.intersection(
170
+ query_set.values_list(field_name, flat=True)
171
+ )
114
172
  nonexist_values = iterable_idx.difference(existing_values)
115
173
 
116
174
  return records, nonexist_values
@@ -144,10 +202,14 @@ def create_records_from_bionty(
144
202
  msg_syn: str = ""
145
203
  if len(syn_mapper) > 0:
146
204
  s = "" if len(syn_mapper) == 1 else "s"
205
+ names = list(syn_mapper.keys())
206
+ print_values = ", ".join(names[:10])
207
+ if len(names) > 10:
208
+ print_values += ", ..."
147
209
  msg_syn = (
148
- "Created"
210
+ "Loaded"
149
211
  f" {colors.purple(f'{len(syn_mapper)} {model.__name__} record{s} from Bionty')} that" # noqa
150
- f" matched {colors.purple('synonyms')}"
212
+ f" matched {colors.purple('synonyms')}: {print_values}"
151
213
  )
152
214
 
153
215
  iterable_idx = iterable_idx.to_frame().rename(index=syn_mapper).index
@@ -162,26 +224,24 @@ def create_records_from_bionty(
162
224
  for bk in bionty_kwargs:
163
225
  records.append(model(**bk, **kwargs))
164
226
 
165
- # logging of BiontySource linking
166
- source_msg = (
167
- ""
168
- if kwargs.get("bionty_source") is None
169
- else f" (bionty_source_id={kwargs.get('bionty_source').id})" # type:ignore # noqa
170
- )
171
-
172
227
  # number of records that matches field (not synonyms)
173
228
  n_name = len(records) - len(syn_mapper)
229
+ names = [getattr(record, field_name) for record in records]
230
+ names = [name for name in names if name not in syn_mapper.values()]
174
231
  if n_name > 0:
175
232
  s = "" if n_name == 1 else "s"
233
+ print_values = ", ".join(names[:10])
234
+ if len(names) > 10:
235
+ print_values += ", ..."
176
236
  msg = (
177
- "Created"
237
+ "Loaded"
178
238
  f" {colors.purple(f'{n_name} {model.__name__} record{s} from Bionty')} that" # noqa
179
- f" matched {colors.purple(f'{field_name}')} field"
239
+ f" matched {colors.purple(f'{field_name}')}: {print_values}"
180
240
  )
181
- logger.info(msg + source_msg)
241
+ logger.info(msg)
182
242
  # make sure that synonyms logging appears after the field logging
183
243
  if len(msg_syn) > 0:
184
- logger.info(msg_syn + source_msg)
244
+ logger.info(msg_syn)
185
245
  # warning about multi matches
186
246
  if len(multi_msg) > 0:
187
247
  logger.warning(multi_msg)
lamindb/_label.py ADDED
@@ -0,0 +1,85 @@
1
+ from typing import List, Optional, Union
2
+
3
+ import pandas as pd
4
+ from lamin_utils import logger
5
+ from lamindb_setup.dev._docs import doc_args
6
+ from lnschema_core import Feature, Label
7
+ from lnschema_core.types import ListLike
8
+
9
+ from lamindb.dev.utils import attach_func_to_class_method
10
+
11
+ from . import _TESTING
12
+ from ._from_values import get_or_create_records, index_iterable
13
+
14
+
15
+ def __init__(self, *args, **kwargs):
16
+ if len(args) == len(self._meta.concrete_fields):
17
+ super(Label, self).__init__(*args, **kwargs)
18
+ return None
19
+ # now we proceed with the user-facing constructor
20
+ if len(args) > 0:
21
+ raise ValueError("Only one non-keyword arg allowed")
22
+ name: Optional[str] = kwargs.pop("name") if "name" in kwargs else None
23
+ description: Optional[str] = (
24
+ kwargs.pop("description") if "description" in kwargs else None
25
+ )
26
+ feature: Optional[str] = kwargs.pop("feature") if "feature" in kwargs else None
27
+ feature_id: Optional[str] = (
28
+ kwargs.pop("feature_id") if "feature_id" in kwargs else None
29
+ )
30
+ if len(kwargs) > 0:
31
+ raise ValueError("Only name, description, feature are valid keyword arguments")
32
+ # continue
33
+ if feature is None and feature_id is None:
34
+ logger.warning("Consider passing a corresponding feature for your label!")
35
+ if isinstance(feature, str):
36
+ feature = Feature.select(name=feature).one_or_none()
37
+ if feature is None:
38
+ raise ValueError(
39
+ f"Feature with name {feature} does not exist, please create it:"
40
+ f" ln.Feature(name={feature}, type='float')"
41
+ )
42
+ else:
43
+ feature_id = feature.id
44
+ super(Label, self).__init__(
45
+ name=name, description=description, feature_id=feature_id
46
+ )
47
+
48
+
49
+ @classmethod # type:ignore
50
+ @doc_args(Label.from_values.__doc__)
51
+ def from_values(
52
+ cls, values: ListLike, feature: Optional[Union[Feature, str]] = None, **kwargs
53
+ ) -> List["Label"]:
54
+ """{}"""
55
+ iterable_idx = index_iterable(values)
56
+ if feature is None and isinstance(values, pd.Series):
57
+ feature = values.name
58
+ if isinstance(feature, str):
59
+ feature = Feature.select(name=feature).one()
60
+ records = get_or_create_records(
61
+ iterable=iterable_idx,
62
+ field=Label.name,
63
+ # here, feature_id is a kwarg, which is an additional condition
64
+ # in queries for potentially existing records
65
+ feature=feature,
66
+ )
67
+ return records
68
+
69
+
70
+ METHOD_NAMES = [
71
+ "__init__",
72
+ "from_values",
73
+ ]
74
+
75
+ if _TESTING:
76
+ from inspect import signature
77
+
78
+ SIGS = {
79
+ name: signature(getattr(Label, name))
80
+ for name in METHOD_NAMES
81
+ if name != "__init__"
82
+ }
83
+
84
+ for name in METHOD_NAMES:
85
+ attach_func_to_class_method(name, Label, globals())
lamindb/_logger.py CHANGED
@@ -1 +1 @@
1
- from lamin_logger import colors, logger # noqa
1
+ from lamin_utils import colors, logger # noqa