lamindb 0.69.9__py3-none-any.whl → 0.70.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_finish.py CHANGED
@@ -43,39 +43,35 @@ def finish(i_saved_the_notebook: bool = False):
43
43
  "Please pass `i_saved_the_notebook=True` to `ln.finish()`, save the notebook, and re-run this cell."
44
44
  )
45
45
  return None
46
- notebook_content = read_notebook(run_context.path) # type: ignore
47
- if not check_last_cell(notebook_content, "i_saved_the_notebook"):
48
- raise CallFinishInLastCell(
49
- "Can only run `ln.finish(i_saved_the_notebook=True)` from the last code cell of the notebook."
50
- )
51
46
  save_run_context_core(
52
47
  run=run_context.run,
53
48
  transform=run_context.transform,
54
49
  filepath=run_context.path,
55
50
  finished_at=True,
56
- notebook_content=notebook_content,
57
51
  )
58
52
  else:
59
53
  # scripts
54
+ # save_run_context_core was already called during ln.track()
60
55
  run_context.run.finished_at = datetime.now(timezone.utc) # update run time
61
56
  run_context.run.save()
62
57
 
63
58
 
64
- # do not type because we need to be aware of lnschema_core import order
65
59
  def save_run_context_core(
66
60
  *,
67
61
  run: Run,
68
62
  transform: Transform,
69
63
  filepath: Path,
70
64
  transform_family: QuerySet | None = None,
71
- is_consecutive: bool = True,
72
65
  finished_at: bool = False,
73
- notebook_content=None, # nbproject.Notebook
74
66
  ) -> str | None:
75
67
  import lamindb as ln
76
68
 
77
69
  ln.settings.verbosity = "success"
78
70
 
71
+ # for scripts, things are easy
72
+ is_consecutive = True
73
+ source_code_path = filepath
74
+ # for notebooks, we need more work
79
75
  if transform.type == TransformType.notebook:
80
76
  try:
81
77
  import nbstripout
@@ -88,62 +84,52 @@ def save_run_context_core(
88
84
  "install nbproject & nbstripout: pip install nbproject nbstripout"
89
85
  )
90
86
  return None
91
- if notebook_content is None:
92
- notebook_content = read_notebook(filepath) # type: ignore
87
+ notebook_content = read_notebook(filepath) # type: ignore
93
88
  is_consecutive = check_consecutiveness(notebook_content)
94
89
  if not is_consecutive:
90
+ msg = " Do you still want to proceed with finishing? (y/n) "
95
91
  if os.getenv("LAMIN_TESTING") is None:
96
- decide = input(
97
- " Do you still want to proceed with publishing? (y/n) "
98
- )
92
+ response = input(msg)
99
93
  else:
100
- decide = "n"
101
- if decide != "y":
102
- logger.error("Aborted (non-consecutive)!")
94
+ response = "n"
95
+ if response != "y":
103
96
  return "aborted-non-consecutive"
104
-
105
97
  # convert the notebook file to html
106
98
  # log_level is set to 40 to silence the nbconvert logging
107
- result = subprocess.run(
99
+ subprocess.run(
108
100
  "jupyter nbconvert --to html"
109
101
  f" {filepath.as_posix()} --Application.log_level=40",
110
102
  shell=True,
103
+ check=True,
111
104
  )
112
105
  # move the temporary file into the cache dir in case it's accidentally
113
106
  # in an existing storage location -> we want to move associated
114
107
  # artifacts into default storage and not register them in an existing
115
108
  # location
116
- filepath_html = filepath.with_suffix(".html") # current location
109
+ filepath_html_orig = filepath.with_suffix(".html") # current location
110
+ filepath_html = ln_setup.settings.storage.cache_dir / filepath_html_orig.name
111
+ # don't use Path.rename here because of cross-device link error
112
+ # https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
117
113
  shutil.move(
118
- filepath_html, # type: ignore
119
- ln_setup.settings.storage.cache_dir / filepath_html.name,
120
- ) # move; don't use Path.rename here because of cross-device link error
121
- # see https://laminlabs.slack.com/archives/C04A0RMA0SC/p1710259102686969
122
- filepath_html = (
123
- ln_setup.settings.storage.cache_dir / filepath_html.name
124
- ) # adjust location
125
- assert result.returncode == 0
126
- # copy the notebook file to a temporary file
114
+ filepath_html_orig, # type: ignore
115
+ filepath_html,
116
+ )
117
+ # strip the output from the notebook to create the source code file
118
+ # first, copy the notebook file to a temporary file in the cache
127
119
  source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
128
120
  shutil.copy2(filepath, source_code_path) # copy
129
- result = subprocess.run(f"nbstripout {source_code_path}", shell=True)
130
- assert result.returncode == 0
131
- else:
132
- source_code_path = filepath
121
+ subprocess.run(f"nbstripout {source_code_path}", shell=True, check=True)
133
122
  # find initial versions of source codes and html reports
134
- initial_report = None
135
- initial_source = None
123
+ prev_report = None
124
+ prev_source = None
136
125
  if transform_family is None:
137
126
  transform_family = transform.versions
138
127
  if len(transform_family) > 0:
139
128
  for prev_transform in transform_family.order_by("-created_at"):
140
- # check for id to avoid query
141
129
  if prev_transform.latest_report_id is not None:
142
- # any previous latest report of this transform is OK!
143
- initial_report = prev_transform.latest_report
130
+ prev_report = prev_transform.latest_report
144
131
  if prev_transform.source_code_id is not None:
145
- # any previous source code id is OK!
146
- initial_source = prev_transform.source_code
132
+ prev_source = prev_transform.source_code
147
133
  ln.settings.silence_file_run_transform_warning = True
148
134
  # register the source code
149
135
  if transform.source_code is not None:
@@ -173,7 +159,7 @@ def save_run_context_core(
173
159
  source_code_path,
174
160
  description=f"Source of transform {transform.uid}",
175
161
  version=transform.version,
176
- is_new_version_of=initial_source,
162
+ is_new_version_of=prev_source,
177
163
  visibility=0, # hidden file
178
164
  run=False,
179
165
  )
@@ -207,7 +193,7 @@ def save_run_context_core(
207
193
  report_file = ln.Artifact(
208
194
  filepath_html,
209
195
  description=f"Report of run {run.uid}",
210
- is_new_version_of=initial_report,
196
+ is_new_version_of=prev_report,
211
197
  visibility=0, # hidden file
212
198
  run=False,
213
199
  )
lamindb/_from_values.py CHANGED
@@ -19,19 +19,26 @@ def get_or_create_records(
19
19
  field: StrField,
20
20
  *,
21
21
  from_public: bool = False,
22
- **kwargs,
22
+ organism: Registry | str | None = None,
23
+ public_source: Registry | None = None,
23
24
  ) -> list[Registry]:
24
25
  """Get or create records from iterables."""
25
26
  upon_create_search_names = settings.upon_create_search_names
26
- settings.upon_create_search_names = False
27
27
  feature: Feature = None
28
+ organism = _get_organism_record(field, organism)
29
+ kwargs: dict = {}
30
+ if organism is not None:
31
+ kwargs["organism"] = organism
32
+ if public_source is not None:
33
+ kwargs["public_source"] = public_source
34
+ settings.upon_create_search_names = False
28
35
  try:
29
36
  Registry = field.field.model
30
37
  iterable_idx = index_iterable(iterable)
31
38
 
32
39
  # returns existing records & non-existing values
33
40
  records, nonexist_values, msg = get_existing_records(
34
- iterable_idx=iterable_idx, field=field, kwargs=kwargs
41
+ iterable_idx=iterable_idx, field=field, **kwargs
35
42
  )
36
43
 
37
44
  # new records to be created based on new values
@@ -78,26 +85,14 @@ def get_or_create_records(
78
85
  def get_existing_records(
79
86
  iterable_idx: pd.Index,
80
87
  field: StrField,
81
- kwargs: dict = None,
88
+ **kwargs,
82
89
  ):
83
- if kwargs is None:
84
- kwargs = {}
85
90
  model = field.field.model
86
91
  condition: dict = {} if len(kwargs) == 0 else kwargs.copy()
87
92
  # existing records matching is agnostic to the bionty source
88
93
  if "public_source" in condition:
89
94
  condition.pop("public_source")
90
95
 
91
- if _has_organism_field(model):
92
- from lnschema_bionty._bionty import create_or_get_organism_record
93
-
94
- organism_record = create_or_get_organism_record(
95
- organism=kwargs.get("organism"), orm=model
96
- )
97
- if organism_record is not None:
98
- kwargs.update({"organism": organism_record})
99
- condition.update({"organism": organism_record})
100
-
101
96
  # standardize based on the DB reference
102
97
  # log synonyms mapped terms
103
98
  result = model.inspect(
@@ -252,7 +247,8 @@ def index_iterable(iterable: Iterable) -> pd.Index:
252
247
 
253
248
 
254
249
  def _print_values(names: list, n: int = 20) -> str:
255
- print_values = ", ".join([f"'{name}'" for name in names[:n]])
250
+ names = list(set(names))
251
+ print_values = ", ".join([f"'{name}'" for name in names[:n] if name != "None"])
256
252
  if len(names) > n:
257
253
  print_values += ", ..."
258
254
  return print_values
@@ -322,3 +318,13 @@ def _has_organism_field(orm: Registry) -> bool:
322
318
  return True
323
319
  except FieldDoesNotExist:
324
320
  return False
321
+
322
+
323
+ def _get_organism_record(field: StrField, organism: str | Registry) -> Registry:
324
+ model = field.field.model
325
+ if _has_organism_field(model):
326
+ from lnschema_bionty._bionty import create_or_get_organism_record
327
+
328
+ organism_record = create_or_get_organism_record(organism=organism, orm=model)
329
+ if organism_record is not None:
330
+ return organism_record
lamindb/_registry.py CHANGED
@@ -129,7 +129,11 @@ def __init__(orm: Registry, *args, **kwargs):
129
129
  @classmethod # type:ignore
130
130
  @doc_args(Registry.from_values.__doc__)
131
131
  def from_values(
132
- cls, values: ListLike, field: StrField | None = None, **kwargs
132
+ cls,
133
+ values: ListLike,
134
+ field: StrField | None = None,
135
+ organism: Registry | str | None = None,
136
+ public_source: Registry | None = None,
133
137
  ) -> list[Registry]:
134
138
  """{}."""
135
139
  from_public = True if cls.__module__.startswith("lnschema_bionty.") else False
@@ -138,7 +142,8 @@ def from_values(
138
142
  iterable=values,
139
143
  field=getattr(cls, field_str),
140
144
  from_public=from_public,
141
- **kwargs,
145
+ organism=organism,
146
+ public_source=public_source,
142
147
  )
143
148
 
144
149
 
lamindb/core/__init__.py CHANGED
@@ -14,14 +14,21 @@ Registries:
14
14
  LabelManager
15
15
  IsTree
16
16
  IsVersioned
17
- DataFrameAnnotator
18
- AnnDataAnnotator
19
- AnnotateLookup
20
17
  CanValidate
21
18
  HasParents
22
19
  InspectResult
23
20
  fields
24
21
 
22
+ Annotators:
23
+
24
+ .. autosummary::
25
+ :toctree: .
26
+
27
+ DataFrameAnnotator
28
+ AnnDataAnnotator
29
+ MuDataAnnotator
30
+ AnnotateLookup
31
+
25
32
  Classes:
26
33
 
27
34
  .. autosummary::
@@ -53,7 +60,12 @@ from lnschema_core.models import (
53
60
  Registry,
54
61
  )
55
62
 
56
- from lamindb._annotate import AnnDataAnnotator, AnnotateLookup, DataFrameAnnotator
63
+ from lamindb._annotate import (
64
+ AnnDataAnnotator,
65
+ AnnotateLookup,
66
+ DataFrameAnnotator,
67
+ MuDataAnnotator,
68
+ )
57
69
  from lamindb._query_manager import QueryManager
58
70
  from lamindb._query_set import QuerySet, RecordsList
59
71
  from lamindb.core._feature_manager import FeatureManager
lamindb/core/_data.py CHANGED
@@ -94,6 +94,23 @@ def save_feature_set_links(self: Artifact | Collection) -> None:
94
94
  bulk_create(links, ignore_conflicts=True)
95
95
 
96
96
 
97
+ def format_repr(value: Registry, exclude: list[str] | str | None = None) -> str:
98
+ if isinstance(exclude, str):
99
+ exclude = [exclude]
100
+ exclude_fields = set() if exclude is None else set(exclude)
101
+ exclude_fields.update(["created_at", "updated_at"])
102
+
103
+ fields = [
104
+ f
105
+ for f in value.__repr__(include_foreign_keys=False).split(", ")
106
+ if not any(f"{excluded_field}=" in f for excluded_field in exclude_fields)
107
+ ]
108
+ repr = ", ".join(fields)
109
+ if not repr.endswith(")"):
110
+ repr += ")"
111
+ return repr
112
+
113
+
97
114
  @doc_args(Data.describe.__doc__)
98
115
  def describe(self: Data):
99
116
  """{}."""
@@ -109,17 +126,7 @@ def describe(self: Data):
109
126
  else:
110
127
  direct_fields.append(f.name)
111
128
 
112
- # Display Provenance
113
- # display line by line the foreign key fields
114
- from lamindb._parents import _transform_emoji
115
-
116
- emojis = {
117
- "storage": "🗃️",
118
- "created_by": "👤",
119
- "transform": _transform_emoji(self.transform),
120
- "run": "👣",
121
- "artifact": "📄",
122
- }
129
+ # provenance
123
130
  if len(foreign_key_fields) > 0: # always True for Artifact and Collection
124
131
  record_msg = f"{colors.green(model_name)}{__repr__(self, include_foreign_keys=False).lstrip(model_name)}"
125
132
  msg += f"{record_msg}\n\n"
@@ -127,17 +134,16 @@ def describe(self: Data):
127
134
  msg += f"{colors.green('Provenance')}:\n "
128
135
  related_msg = "".join(
129
136
  [
130
- f"{emojis.get(i, '📎')} {i}: {self.__getattribute__(i)}\n "
131
- for i in foreign_key_fields
132
- if self.__getattribute__(i) is not None
137
+ f"📎 {field}: {format_repr(self.__getattribute__(field))}\n "
138
+ for field in foreign_key_fields
139
+ if self.__getattribute__(field) is not None
133
140
  ]
134
141
  )
135
142
  msg += related_msg
136
143
  # input of
137
- # can only access many-to-many once record is saved
138
144
  if self.id is not None and self.input_of.exists():
139
145
  values = [format_field_value(i.started_at) for i in self.input_of.all()]
140
- msg += f"⬇️ input_of ({colors.italic('core.Run')}): {values}\n "
146
+ msg += f"📎 input_of ({colors.italic('core.Run')}): {values}\n "
141
147
  msg = msg.rstrip(" ") # do not use removesuffix as we need to remove 2 or 4 spaces
142
148
  msg += print_features(self)
143
149
  msg += print_labels(self)
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from itertools import compress
4
- from typing import TYPE_CHECKING, Iterable
4
+ from typing import TYPE_CHECKING, Iterable, Optional
5
5
 
6
6
  import anndata as ad
7
7
  from anndata import AnnData
@@ -91,6 +91,8 @@ def get_feature_set_links(host: Artifact | Collection) -> QuerySet:
91
91
  def print_features(self: Data) -> str:
92
92
  from lamindb._from_values import _print_values
93
93
 
94
+ from ._data import format_repr
95
+
94
96
  msg = ""
95
97
  features_lookup = Feature.objects.using(self._state.db).lookup().dict()
96
98
  for slot, feature_set in self.features._feature_set_by_slot.items():
@@ -98,12 +100,16 @@ def print_features(self: Data) -> str:
98
100
  features = feature_set.members
99
101
  name_field = get_default_str_field(features[0])
100
102
  feature_names = [getattr(feature, name_field) for feature in features]
101
- msg += f" {colors.bold(slot)}: {feature_set}\n"
103
+ msg += (
104
+ f" {colors.bold(slot)}: {format_repr(feature_set, exclude='hash')}\n"
105
+ )
102
106
  print_values = _print_values(feature_names, n=20)
103
107
  msg += f" {print_values}\n"
104
108
  else:
105
109
  df_slot = feature_set.features.df()
106
- msg += f" {colors.bold(slot)}: {feature_set}\n"
110
+ msg += (
111
+ f" {colors.bold(slot)}: {format_repr(feature_set, exclude='hash')}\n"
112
+ )
107
113
  for _, row in df_slot.iterrows():
108
114
  if row["type"] == "category" and row["registries"] is not None:
109
115
  labels = self.labels.get(
@@ -133,9 +139,10 @@ def print_features(self: Data) -> str:
133
139
 
134
140
  def parse_feature_sets_from_anndata(
135
141
  adata: AnnData,
136
- var_field: FieldAttr,
142
+ var_field: FieldAttr | None = None,
137
143
  obs_field: FieldAttr = Feature.name,
138
- **kwargs,
144
+ mute: bool = False,
145
+ organism: str | Registry | None = None,
139
146
  ) -> dict:
140
147
  data_parse = adata
141
148
  if not isinstance(adata, AnnData): # is a path
@@ -149,29 +156,36 @@ def parse_feature_sets_from_anndata(
149
156
  data_parse = ad.read(filepath, backed="r")
150
157
  type = "float"
151
158
  else:
152
- type = convert_numpy_dtype_to_lamin_feature_type(adata.X.dtype)
159
+ type = (
160
+ "float"
161
+ if adata.X is None
162
+ else convert_numpy_dtype_to_lamin_feature_type(adata.X.dtype)
163
+ )
153
164
  feature_sets = {}
154
- logger.info("parsing feature names of X stored in slot 'var'")
155
- logger.indent = " "
156
- feature_set_var = FeatureSet.from_values(
157
- data_parse.var.index,
158
- var_field,
159
- type=type,
160
- **kwargs,
161
- )
162
- if feature_set_var is not None:
163
- feature_sets["var"] = feature_set_var
164
- logger.save(f"linked: {feature_set_var}")
165
- logger.indent = ""
166
- if feature_set_var is None:
167
- logger.warning("skip linking features to artifact in slot 'var'")
165
+ if var_field is not None:
166
+ logger.info("parsing feature names of X stored in slot 'var'")
167
+ logger.indent = " "
168
+ feature_set_var = FeatureSet.from_values(
169
+ data_parse.var.index,
170
+ var_field,
171
+ type=type,
172
+ mute=mute,
173
+ organism=organism,
174
+ )
175
+ if feature_set_var is not None:
176
+ feature_sets["var"] = feature_set_var
177
+ logger.save(f"linked: {feature_set_var}")
178
+ logger.indent = ""
179
+ if feature_set_var is None:
180
+ logger.warning("skip linking features to artifact in slot 'var'")
168
181
  if len(data_parse.obs.columns) > 0:
169
182
  logger.info("parsing feature names of slot 'obs'")
170
183
  logger.indent = " "
171
184
  feature_set_obs = FeatureSet.from_df(
172
185
  df=data_parse.obs,
173
186
  field=obs_field,
174
- **kwargs,
187
+ mute=mute,
188
+ organism=organism,
175
189
  )
176
190
  if feature_set_obs is not None:
177
191
  feature_sets["obs"] = feature_set_obs
@@ -224,7 +238,7 @@ class FeatureManager:
224
238
  slot = "columns" if slot is None else slot
225
239
  self._add_feature_set(feature_set=FeatureSet(features=features), slot=slot)
226
240
 
227
- def add_from_df(self, field: FieldAttr = Feature.name, **kwargs):
241
+ def add_from_df(self, field: FieldAttr = Feature.name, organism: str | None = None):
228
242
  """Add features from DataFrame."""
229
243
  if isinstance(self._host, Artifact):
230
244
  assert self._host.accessor == "DataFrame"
@@ -235,7 +249,7 @@ class FeatureManager:
235
249
  # parse and register features
236
250
  registry = field.field.model
237
251
  df = self._host.load()
238
- features = registry.from_values(df.columns, field=field, **kwargs)
252
+ features = registry.from_values(df.columns, field=field, organism=organism)
239
253
  if len(features) == 0:
240
254
  logger.error(
241
255
  "no validated features found in DataFrame! please register features first!"
@@ -252,7 +266,8 @@ class FeatureManager:
252
266
  self,
253
267
  var_field: FieldAttr,
254
268
  obs_field: FieldAttr | None = Feature.name,
255
- **kwargs,
269
+ mute: bool = False,
270
+ organism: str | Registry | None = None,
256
271
  ):
257
272
  """Add features from AnnData."""
258
273
  if isinstance(self._host, Artifact):
@@ -263,13 +278,53 @@ class FeatureManager:
263
278
  # parse and register features
264
279
  adata = self._host.load()
265
280
  feature_sets = parse_feature_sets_from_anndata(
266
- adata, var_field=var_field, obs_field=obs_field, **kwargs
281
+ adata,
282
+ var_field=var_field,
283
+ obs_field=obs_field,
284
+ mute=mute,
285
+ organism=organism,
267
286
  )
268
287
 
269
288
  # link feature sets
270
289
  self._host._feature_sets = feature_sets
271
290
  self._host.save()
272
291
 
292
+ def add_from_mudata(
293
+ self,
294
+ var_fields: dict[str, FieldAttr],
295
+ obs_fields: dict[str, FieldAttr] = None,
296
+ mute: bool = False,
297
+ organism: str | Registry | None = None,
298
+ ):
299
+ """Add features from MuData."""
300
+ if obs_fields is None:
301
+ obs_fields = {}
302
+ if isinstance(self._host, Artifact):
303
+ assert self._host.accessor == "MuData"
304
+ else:
305
+ raise NotImplementedError()
306
+
307
+ # parse and register features
308
+ mdata = self._host.load()
309
+ feature_sets = {}
310
+ obs_features = features = Feature.from_values(mdata.obs.columns)
311
+ if len(obs_features) > 0:
312
+ feature_sets["obs"] = FeatureSet(features=features)
313
+ for modality, field in var_fields.items():
314
+ modality_fs = parse_feature_sets_from_anndata(
315
+ mdata[modality],
316
+ var_field=field,
317
+ obs_field=obs_fields.get(modality, Feature.name),
318
+ mute=mute,
319
+ organism=organism,
320
+ )
321
+ for k, v in modality_fs.items():
322
+ feature_sets[f"['{modality}'].{k}"] = v
323
+
324
+ # link feature sets
325
+ self._host._feature_sets = feature_sets
326
+ self._host.save()
327
+
273
328
  def _add_feature_set(self, feature_set: FeatureSet, slot: str):
274
329
  """Add new feature set to a slot.
275
330
 
@@ -49,7 +49,7 @@ def print_labels(self: Data):
49
49
  n = labels.count()
50
50
  field = get_default_str_field(labels)
51
51
  print_values = _print_values(labels.list(field), n=10)
52
- labels_msg += f" 🏷️ {related_name} ({n}, {colors.italic(related_model)}): {print_values}\n"
52
+ labels_msg += f" 📎 {related_name} ({n}, {colors.italic(related_model)}): {print_values}\n"
53
53
  if len(labels_msg) > 0:
54
54
  return f"{colors.green('Labels')}:\n{labels_msg}"
55
55
  else: