lamindb 0.69.4__py3-none-any.whl → 0.69.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/__init__.py CHANGED
@@ -40,7 +40,7 @@ Modules & settings:
40
40
 
41
41
  """
42
42
 
43
- __version__ = "0.69.4" # denote a release candidate for 0.1.0 with 0.1rc1
43
+ __version__ = "0.69.5" # denote a release candidate for 0.1.0 with 0.1rc1
44
44
 
45
45
  import os as _os
46
46
 
lamindb/_annotate.py CHANGED
@@ -15,15 +15,34 @@ class ValidationError(ValueError):
15
15
 
16
16
 
17
17
  class AnnotateLookup:
18
- """Lookup features and labels from the reference instance."""
18
+ """Lookup categories from the reference instance."""
19
19
 
20
20
  def __init__(
21
- self, fields: Dict[str, FieldAttr], using: Optional[str] = None
21
+ self,
22
+ categorials: Dict[str, FieldAttr],
23
+ slots: Dict[str, FieldAttr] = None,
24
+ using: Optional[str] = None,
22
25
  ) -> None:
23
- self._fields = fields
26
+ if slots is None:
27
+ slots = {}
28
+ if slots is None:
29
+ slots = {}
30
+ self._fields = {**categorials, **slots}
24
31
  self._using = None if using == "default" else using
25
- self._using_name = using or ln_setup.settings.instance.slug
26
- logger.debug(f"Lookup objects from the {colors.italic(self._using_name)}")
32
+ self._using_name = self._using or ln_setup.settings.instance.slug
33
+ debug_message = f"Lookup objects from the " f"{colors.italic(self._using_name)}"
34
+ logger.debug(debug_message)
35
+
36
+ def __getattr__(self, name):
37
+ if name in self._fields:
38
+ registry = self._fields[name].field.model
39
+ if self._using == "public":
40
+ return registry.public().lookup()
41
+ else:
42
+ return get_registry_instance(registry, self._using).lookup()
43
+ raise AttributeError(
44
+ f"'{self.__class__.__name__}' object has no attribute '{name}'"
45
+ )
27
46
 
28
47
  def __getitem__(self, name):
29
48
  if name in self._fields:
@@ -38,10 +57,17 @@ class AnnotateLookup:
38
57
 
39
58
  def __repr__(self) -> str:
40
59
  if len(self._fields) > 0:
41
- fields = "\n ".join([str([key]) for key in self._fields.keys()])
60
+ getattr_keys = "\n ".join(
61
+ [f".{key}" for key in self._fields if key.isidentifier()]
62
+ )
63
+ getitem_keys = "\n ".join(
64
+ [str([key]) for key in self._fields if not key.isidentifier()]
65
+ )
42
66
  return (
43
- f"Lookup objects from the {colors.italic(self._using_name)}:\n {colors.green(fields)}\n\n"
44
- "Example:\n → categories = validator.lookup().['cell_type']\n"
67
+ f"Lookup objects from the {colors.italic(self._using_name)}:\n "
68
+ f"{colors.green(getattr_keys)}\n "
69
+ f"{colors.green(getitem_keys)}\n\n"
70
+ "Example:\n → categories = validator.lookup().cell_type\n"
45
71
  " → categories.alveolar_type_1_fibroblast_cell"
46
72
  )
47
73
  else:
@@ -53,10 +79,10 @@ class DataFrameAnnotator:
53
79
 
54
80
  Args:
55
81
  df: The DataFrame object to annotate.
56
- fields: A dictionary mapping column to registry_field.
82
+ columns: The field attribute for the feature column.
83
+ categoricals: A dictionary mapping column names to registry_field.
57
84
  For example:
58
- {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
59
- feature_field: The field attribute for the feature column.
85
+ ``{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}``.
60
86
  using: The reference instance containing registries to validate against.
61
87
  verbosity: The verbosity level.
62
88
  """
@@ -64,8 +90,8 @@ class DataFrameAnnotator:
64
90
  def __init__(
65
91
  self,
66
92
  df: pd.DataFrame,
67
- fields: Optional[Dict[str, FieldAttr]] = None,
68
- feature_field: FieldAttr = Feature.name,
93
+ columns: FieldAttr = Feature.name,
94
+ categoricals: Optional[Dict[str, FieldAttr]] = None,
69
95
  using: Optional[str] = None,
70
96
  verbosity: str = "hint",
71
97
  **kwargs,
@@ -73,15 +99,15 @@ class DataFrameAnnotator:
73
99
  from lamindb.core._settings import settings
74
100
 
75
101
  self._df = df
76
- self._fields = fields or {}
77
- self._feature_field = feature_field
102
+ self._fields = categoricals or {}
103
+ self._columns_field = columns
78
104
  self._using = using
79
105
  settings.verbosity = verbosity
80
106
  self._artifact = None
81
107
  self._collection = None
82
108
  self._validated = False
83
109
  self._kwargs: Dict = kwargs
84
- self.save_features()
110
+ self._save_columns()
85
111
 
86
112
  @property
87
113
  def fields(self) -> Dict:
@@ -96,69 +122,89 @@ class DataFrameAnnotator:
96
122
  if None (default), the lookup is performed on the instance specified in "using" parameter of the validator.
97
123
  if "public", the lookup is performed on the public reference.
98
124
  """
99
- fields = {**{"feature": self._feature_field}, **self.fields}
100
- return AnnotateLookup(fields=fields, using=using or self._using)
125
+ return AnnotateLookup(
126
+ categorials=self._fields,
127
+ slots={"columns": self._columns_field},
128
+ using=using or self._using,
129
+ )
101
130
 
102
- def save_features(self, validated_only: bool = True) -> None:
103
- """Register features records."""
131
+ def _save_columns(self, validated_only: bool = True) -> None:
132
+ """Save column name records."""
104
133
  missing_columns = set(self.fields.keys()) - set(self._df.columns)
105
134
  if missing_columns:
106
135
  raise ValueError(
107
136
  f"Columns {missing_columns} are not found in the data object!"
108
137
  )
109
138
 
110
- # Always register features specified as the fields keys
139
+ # Always save features specified as the fields keys
111
140
  update_registry(
112
141
  values=list(self.fields.keys()),
113
- field=self._feature_field,
114
- feature_name="feature",
142
+ field=self._columns_field,
143
+ key="columns",
144
+ save_function="add_new_from_columns",
115
145
  using=self._using,
116
146
  validated_only=False,
117
147
  kwargs=self._kwargs,
118
148
  )
119
149
 
120
- # Register the rest of the columns based on validated_only
150
+ # Save the rest of the columns based on validated_only
121
151
  additional_columns = set(self._df.columns) - set(self.fields.keys())
122
152
  if additional_columns:
123
153
  update_registry(
124
154
  values=list(additional_columns),
125
- field=self._feature_field,
126
- feature_name="feature",
155
+ field=self._columns_field,
156
+ key="columns",
157
+ save_function="add_new_from_columns",
127
158
  using=self._using,
128
159
  validated_only=validated_only,
129
160
  df=self._df, # Get the Feature type from df
130
161
  kwargs=self._kwargs,
131
162
  )
132
163
 
133
- def update_registry(self, feature: str, validated_only: bool = True, **kwargs):
134
- """Register labels for a feature.
164
+ def add_validated_from(self, key: str, **kwargs):
165
+ """Add validated categories.
166
+
167
+ Args:
168
+ key: The key referencing the slot in the DataFrame.
169
+ **kwargs: Additional keyword arguments.
170
+ """
171
+ self._update_registry(key, validated_only=True, **kwargs)
172
+
173
+ def add_new_from(self, key: str, **kwargs):
174
+ """Add validated & new categories.
135
175
 
136
176
  Args:
137
- feature: The name of the feature to register.
138
- validated_only: Whether to register only validated labels.
177
+ key: The key referencing the slot in the DataFrame from which to draw terms.
139
178
  **kwargs: Additional keyword arguments.
140
179
  """
141
- if feature == "all":
180
+ self._update_registry(key, validated_only=False, **kwargs)
181
+
182
+ def add_new_from_columns(self, **kwargs):
183
+ """Add validated & new column names to its registry."""
184
+ self._save_columns(validated_only=False, **kwargs)
185
+
186
+ def _update_registry(self, categorical: str, validated_only: bool = True, **kwargs):
187
+ if categorical == "all":
142
188
  self._update_registry_all(validated_only=validated_only, **kwargs)
143
- elif feature == "feature":
144
- self.save_features(validated_only=validated_only)
189
+ elif categorical == "columns":
190
+ self._save_columns(validated_only=validated_only)
145
191
  else:
146
- if feature not in self.fields:
147
- raise ValueError(f"Feature {feature} is not part of the fields!")
192
+ if categorical not in self.fields:
193
+ raise ValueError(f"Feature {categorical} is not part of the fields!")
148
194
  update_registry(
149
- values=self._df[feature].unique().tolist(),
150
- field=self.fields[feature],
151
- feature_name=feature,
195
+ values=self._df[categorical].unique().tolist(),
196
+ field=self.fields[categorical],
197
+ key=categorical,
152
198
  using=self._using,
153
199
  validated_only=validated_only,
154
200
  kwargs=kwargs,
155
201
  )
156
202
 
157
203
  def _update_registry_all(self, validated_only: bool = True, **kwargs):
158
- """Register labels for all features."""
204
+ """Save labels for all features."""
159
205
  for name in self.fields.keys():
160
- logger.info(f"registering labels for '{name}'")
161
- self.update_registry(feature=name, validated_only=validated_only, **kwargs)
206
+ logger.info(f"saving labels for '{name}'")
207
+ self._update_registry(name, validated_only=validated_only, **kwargs)
162
208
 
163
209
  def validate(self, **kwargs) -> bool:
164
210
  """Validate variables and categorical observations.
@@ -176,14 +222,14 @@ class DataFrameAnnotator:
176
222
  return self._validated
177
223
 
178
224
  def save_artifact(self, description: str, **kwargs) -> Artifact:
179
- """Register the validated DataFrame and metadata.
225
+ """Save the validated DataFrame and metadata.
180
226
 
181
227
  Args:
182
228
  description: Description of the DataFrame object.
183
229
  **kwargs: Object level metadata.
184
230
 
185
231
  Returns:
186
- A registered artifact record.
232
+ A saved artifact record.
187
233
  """
188
234
  from lamindb.core._settings import settings
189
235
 
@@ -193,17 +239,18 @@ class DataFrameAnnotator:
193
239
  f"Data object is not validated, please run {colors.yellow('validate()')}!"
194
240
  )
195
241
 
196
- # Make sure all labels are registered in the current instance
242
+ # Make sure all labels are saved in the current instance
197
243
  verbosity = settings.verbosity
198
244
  try:
199
245
  settings.verbosity = "warning"
200
- self.update_registry("all")
246
+ # save all validated records to the current instance
247
+ self.add_validated_from("all")
201
248
 
202
249
  self._artifact = save_artifact(
203
250
  self._df,
204
251
  description=description,
205
252
  fields=self.fields,
206
- feature_field=self._feature_field,
253
+ columns_field=self._columns_field,
207
254
  **self._kwargs,
208
255
  )
209
256
  finally:
@@ -219,10 +266,10 @@ class DataFrameAnnotator:
219
266
  reference: Optional[str] = None,
220
267
  reference_type: Optional[str] = None,
221
268
  ) -> Collection:
222
- """Register a collection from artifact/artifacts.
269
+ """Save a collection from artifact/artifacts.
223
270
 
224
271
  Args:
225
- artifact: One or several registered Artifacts.
272
+ artifact: One or several saved Artifacts.
226
273
  name: Title of the publication.
227
274
  description: Description of the publication.
228
275
  reference: Accession number (e.g. GSE#, E-MTAB#, etc.).
@@ -238,17 +285,16 @@ class DataFrameAnnotator:
238
285
  slug = ln_setup.settings.instance.slug
239
286
  if collection._state.adding:
240
287
  collection.save()
241
- logger.success(f"registered collection in {colors.italic(slug)}")
242
288
  else:
243
289
  collection.save()
244
290
  logger.warning(f"collection already exists in {colors.italic(slug)}!")
245
291
  if ln_setup.settings.instance.is_remote:
246
- logger.print(f"🔗 https://lamin.ai/{slug}/collection/{collection.uid}")
292
+ logger.print(f"go to https://lamin.ai/{slug}/collection/{collection.uid}")
247
293
  self._collection = collection
248
294
  return collection
249
295
 
250
296
  def clean_up_failed_runs(self):
251
- """Clean up previous failed runs that don't register any outputs."""
297
+ """Clean up previous failed runs that don't save any outputs."""
252
298
  from lamindb.core._run_context import run_context
253
299
 
254
300
  if run_context.transform is not None:
@@ -258,95 +304,93 @@ class DataFrameAnnotator:
258
304
 
259
305
 
260
306
  class AnnDataAnnotator(DataFrameAnnotator):
261
- """Annotation flow for an AnnData object.
307
+ """Annotation flow for an ``AnnData`` object.
262
308
 
263
309
  Args:
264
310
  adata: The AnnData object to annotate.
265
- var_field: The registry field to validate variables index against.
266
- obs_fields: A dictionary mapping obs_column to registry_field.
311
+ var_index: The registry field for mapping the ``.var`` index.
312
+ categoricals: A dictionary mapping ``.obs.columns`` to a registry field.
267
313
  For example:
268
- {"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}
269
- using: The reference instance containing registries to validate against.
314
+ ``{"cell_type_ontology_id": bt.CellType.ontology_id, "donor_id": ln.ULabel.name}``
315
+ using: A reference LaminDB instance.
270
316
  """
271
317
 
272
318
  def __init__(
273
319
  self,
274
320
  adata: ad.AnnData,
275
- var_field: FieldAttr,
276
- obs_fields: Dict[str, FieldAttr],
321
+ var_index: FieldAttr,
322
+ categoricals: Dict[str, FieldAttr],
277
323
  using: str = "default",
278
324
  verbosity: str = "hint",
279
325
  **kwargs,
280
326
  ) -> None:
281
327
  self._adata = adata
282
- self._var_field = var_field
328
+ self._var_field = var_index
283
329
  super().__init__(
284
330
  df=self._adata.obs,
285
- fields=obs_fields,
331
+ categoricals=categoricals,
286
332
  using=using,
287
333
  verbosity=verbosity,
288
334
  **kwargs,
289
335
  )
290
- self._obs_fields = obs_fields
291
- self._save_variables()
336
+ self._obs_fields = categoricals
337
+ self._save_from_var_index()
292
338
 
293
339
  @property
294
- def var_field(self) -> FieldAttr:
340
+ def var_index(self) -> FieldAttr:
295
341
  """Return the registry field to validate variables index against."""
296
342
  return self._var_field
297
343
 
298
344
  @property
299
- def obs_fields(self) -> Dict:
345
+ def categoricals(self) -> Dict:
300
346
  """Return the obs fields to validate against."""
301
347
  return self._obs_fields
302
348
 
303
349
  def lookup(self, using: Optional[str] = None) -> AnnotateLookup:
304
350
  """Lookup features and labels."""
305
- fields = {
306
- **{"feature": Feature.name, "variables": self.var_field},
307
- **self.obs_fields,
308
- }
309
- return AnnotateLookup(fields=fields, using=using or self._using)
310
-
311
- def _save_variables(self, validated_only: bool = True, **kwargs):
312
- """Register variable records."""
351
+ return AnnotateLookup(
352
+ categorials=self._obs_fields,
353
+ slots={"columns": self._columns_field, "var_index": self._var_field},
354
+ using=using or self._using,
355
+ )
356
+
357
+ def _save_from_var_index(self, validated_only: bool = True, **kwargs):
358
+ """Save variable records."""
313
359
  self._kwargs.update(kwargs)
314
360
  update_registry(
315
- values=self._adata.var_names,
316
- field=self.var_field,
317
- feature_name="variables",
361
+ values=self._adata.var.index,
362
+ field=self.var_index,
363
+ key="var_index",
364
+ save_function="add_new_from_var_index",
318
365
  using=self._using,
319
366
  validated_only=validated_only,
320
367
  kwargs=self._kwargs,
321
368
  )
322
369
 
370
+ def add_new_from_var_index(self, **kwargs):
371
+ """Update variable records."""
372
+ self._save_from_var_index(validated_only=False, **kwargs)
373
+
323
374
  def validate(self, **kwargs) -> bool:
324
- """Validate variables and categorical observations."""
375
+ """Validate categories."""
325
376
  self._kwargs.update(kwargs)
326
377
  self._validated = validate_anndata(
327
378
  self._adata,
328
- var_field=self.var_field,
329
- obs_fields=self.obs_fields,
379
+ var_field=self.var_index,
380
+ obs_fields=self.categoricals,
330
381
  **self._kwargs,
331
382
  )
332
383
  return self._validated
333
384
 
334
- def update_registry(self, feature: str, validated_only: bool = True, **kwargs):
335
- """Register labels for a feature."""
336
- if feature == "variables":
337
- self._save_variables(validated_only=validated_only, **kwargs)
338
- else:
339
- super().update_registry(feature, validated_only, **kwargs)
340
-
341
385
  def save_artifact(self, description: str, **kwargs) -> Artifact:
342
- """Register the validated AnnData and metadata.
386
+ """Save the validated AnnData and metadata.
343
387
 
344
388
  Args:
345
389
  description: Description of the AnnData object.
346
390
  **kwargs: Object level metadata.
347
391
 
348
392
  Returns:
349
- A registered artifact record.
393
+ A saved artifact record.
350
394
  """
351
395
  self._kwargs.update(kwargs)
352
396
  if not self._validated:
@@ -355,8 +399,8 @@ class AnnDataAnnotator(DataFrameAnnotator):
355
399
  self._artifact = save_artifact(
356
400
  self._adata,
357
401
  description=description,
358
- feature_field=self.var_field,
359
- fields=self.obs_fields,
402
+ columns_field=self.var_index,
403
+ fields=self.categoricals,
360
404
  **self._kwargs,
361
405
  )
362
406
  return self._artifact
@@ -369,16 +413,16 @@ class Annotate:
369
413
  def from_df(
370
414
  cls,
371
415
  df: pd.DataFrame,
372
- fields: Optional[Dict[str, FieldAttr]] = None,
373
- feature_field: FieldAttr = Feature.name,
416
+ categoricals: Optional[Dict[str, FieldAttr]] = None,
417
+ columns: FieldAttr = Feature.name,
374
418
  using: Optional[str] = None,
375
419
  verbosity: str = "hint",
376
420
  **kwargs,
377
421
  ) -> DataFrameAnnotator:
378
422
  return DataFrameAnnotator(
379
423
  df=df,
380
- fields=fields,
381
- feature_field=feature_field,
424
+ categoricals=categoricals,
425
+ columns=columns,
382
426
  using=using,
383
427
  verbosity=verbosity,
384
428
  **kwargs,
@@ -388,16 +432,16 @@ class Annotate:
388
432
  def from_anndata(
389
433
  cls,
390
434
  adata: ad.AnnData,
391
- var_field: FieldAttr,
392
- obs_fields: Dict[str, FieldAttr],
435
+ var_index: FieldAttr,
436
+ categoricals: Dict[str, FieldAttr],
393
437
  using: str = "default",
394
438
  verbosity: str = "hint",
395
439
  **kwargs,
396
440
  ) -> AnnDataAnnotator:
397
441
  return AnnDataAnnotator(
398
442
  adata=adata,
399
- var_field=var_field,
400
- obs_fields=obs_fields,
443
+ var_index=var_index,
444
+ categoricals=categoricals,
401
445
  using=using,
402
446
  verbosity=verbosity,
403
447
  **kwargs,
@@ -439,18 +483,17 @@ def check_registry_organism(
439
483
  def validate_categories(
440
484
  values: Iterable[str],
441
485
  field: FieldAttr,
442
- feature_name: str,
486
+ key: str,
443
487
  using: Optional[str] = None,
444
488
  **kwargs,
445
489
  ) -> bool:
446
490
  """Validate ontology terms in a pandas series using LaminDB registries."""
447
491
  from lamindb._from_values import _print_values
492
+ from lamindb.core._settings import settings
448
493
 
449
494
  model_field = f"{field.field.model.__name__}.{field.field.name}"
450
495
  logger.indent = ""
451
- logger.info(
452
- f"inspecting '{colors.bold(feature_name)}' by {colors.italic(model_field)}"
453
- )
496
+ logger.info(f"mapping {colors.italic(key)} on {colors.italic(model_field)}")
454
497
  logger.indent = " "
455
498
 
456
499
  registry = field.field.model
@@ -465,6 +508,7 @@ def validate_categories(
465
508
  )
466
509
  non_validated = inspect_result.non_validated
467
510
 
511
+ values_validated = []
468
512
  if using is not None and using != "default" and non_validated:
469
513
  registry = get_registry_instance(registry, using)
470
514
  # Inspect the using instance
@@ -472,19 +516,42 @@ def validate_categories(
472
516
  values=non_validated, field=field, registry=registry, **filter_kwargs
473
517
  )
474
518
  non_validated = inspect_result.non_validated
519
+ values_validated += inspect_result.validated
475
520
 
521
+ # Inspect from public (bionty only)
522
+ if hasattr(registry, "public"):
523
+ verbosity = settings.verbosity
524
+ try:
525
+ settings.verbosity = "error"
526
+ public_records = registry.from_values(
527
+ non_validated, field=field, **filter_kwargs
528
+ )
529
+ values_validated += [getattr(r, field.field.name) for r in public_records]
530
+ finally:
531
+ settings.verbosity = verbosity
532
+
533
+ validated_hint_print = f".add_validated_from('{key}')"
534
+ n_validated = len(values_validated)
535
+ if n_validated > 0:
536
+ logger.warning(
537
+ f"found {colors.yellow(f'{n_validated} terms')} validated terms: "
538
+ f"{colors.yellow(values_validated)}\n → save terms via "
539
+ f"{colors.yellow(validated_hint_print)}"
540
+ )
541
+
542
+ non_validated_hint_print = f".add_new_from('{key}')"
543
+ non_validated = [i for i in non_validated if i not in values_validated]
476
544
  n_non_validated = len(non_validated)
477
545
  if n_non_validated == 0:
478
- logger.success(f"all {feature_name}s are validated")
546
+ logger.success(f"{key} validated")
479
547
  return True
480
548
  else:
481
549
  are = "are" if n_non_validated > 1 else "is"
482
550
  print_values = _print_values(non_validated)
483
- feature_name_print = f".update_registry('{feature_name}')"
484
551
  warning_message = (
485
552
  f"{colors.yellow(f'{n_non_validated} terms')} {are} not validated: "
486
- f"{colors.yellow(print_values)}\n → register terms via "
487
- f"{colors.yellow(feature_name_print)}"
553
+ f"{colors.yellow(print_values)}\n → save terms via "
554
+ f"{colors.yellow(non_validated_hint_print)}"
488
555
  )
489
556
  logger.warning(warning_message)
490
557
  logger.indent = ""
@@ -499,11 +566,11 @@ def validate_categories_in_df(
499
566
  ) -> bool:
500
567
  """Validate categories in DataFrame columns using LaminDB registries."""
501
568
  validated = True
502
- for feature_name, field in fields.items():
569
+ for key, field in fields.items():
503
570
  validated &= validate_categories(
504
- df[feature_name],
571
+ df[key],
505
572
  field=field,
506
- feature_name=feature_name,
573
+ key=key,
507
574
  using=using,
508
575
  **kwargs,
509
576
  )
@@ -526,7 +593,7 @@ def validate_anndata(
526
593
  validated_var = validate_categories(
527
594
  adata.var.index,
528
595
  field=var_field,
529
- feature_name="variables",
596
+ key="var_index",
530
597
  using=using,
531
598
  **kwargs,
532
599
  )
@@ -540,20 +607,20 @@ def save_artifact(
540
607
  data: Union[pd.DataFrame, ad.AnnData],
541
608
  description: str,
542
609
  fields: Dict[str, FieldAttr],
543
- feature_field: FieldAttr,
610
+ columns_field: FieldAttr,
544
611
  **kwargs,
545
612
  ) -> Artifact:
546
- """Register all metadata with an Artifact.
613
+ """Save all metadata with an Artifact.
547
614
 
548
615
  Args:
549
- data: The DataFrame or AnnData object to register.
616
+ data: The DataFrame or AnnData object to save.
550
617
  description: A description of the artifact.
551
618
  fields: A dictionary mapping obs_column to registry_field.
552
- feature_field: The registry field to validate variables index against.
619
+ columns_field: The registry field to validate variables index against.
553
620
  kwargs: Additional keyword arguments to pass to the registry model.
554
621
 
555
622
  Returns:
556
- The registered Artifact.
623
+ The saved Artifact.
557
624
  """
558
625
  if isinstance(data, ad.AnnData):
559
626
  artifact = Artifact.from_anndata(data, description=description)
@@ -566,63 +633,61 @@ def save_artifact(
566
633
 
567
634
  feature_kwargs: Dict = {}
568
635
  organism = check_registry_organism(
569
- feature_field.field.model, kwargs.pop("organism", None)
636
+ columns_field.field.model, kwargs.pop("organism", None)
570
637
  )
571
638
  if organism is not None:
572
639
  feature_kwargs["organism"] = organism
573
640
 
574
641
  if isinstance(data, ad.AnnData):
575
- artifact.features.add_from_anndata(var_field=feature_field, **feature_kwargs)
642
+ artifact.features.add_from_anndata(var_field=columns_field, **feature_kwargs)
576
643
  else:
577
- artifact.features.add_from_df(field=feature_field, **feature_kwargs)
644
+ artifact.features.add_from_df(field=columns_field, **feature_kwargs)
578
645
 
579
646
  features = Feature.lookup().dict()
580
- for feature_name, field in fields.items():
581
- feature = features.get(feature_name)
647
+ for key, field in fields.items():
648
+ feature = features.get(key)
582
649
  registry = field.field.model
583
650
  filter_kwargs = kwargs.copy()
584
651
  organism = check_registry_organism(registry, organism)
585
652
  if organism is not None:
586
653
  filter_kwargs["organism"] = organism
587
654
  df = data.obs if isinstance(data, ad.AnnData) else data
588
- labels = registry.from_values(df[feature_name], field=field, **filter_kwargs)
655
+ labels = registry.from_values(df[key], field=field, **filter_kwargs)
589
656
  artifact.labels.add(labels, feature)
590
657
 
591
658
  slug = ln_setup.settings.instance.slug
592
- logger.success(f"registered artifact in {colors.italic(slug)}")
593
659
  if ln_setup.settings.instance.is_remote:
594
- logger.info(f"🔗 https://lamin.ai/{slug}/artifact/{artifact.uid}")
595
-
660
+ logger.important(f"go to https://lamin.ai/{slug}/artifact/{artifact.uid}")
596
661
  return artifact
597
662
 
598
663
 
599
664
  def update_registry(
600
665
  values: List[str],
601
666
  field: FieldAttr,
602
- feature_name: str,
667
+ key: str,
668
+ save_function: str = "add_new_from",
603
669
  using: Optional[str] = None,
604
670
  validated_only: bool = True,
605
671
  kwargs: Optional[Dict] = None,
606
672
  df: Optional[pd.DataFrame] = None,
607
673
  ) -> None:
608
- """Register features or labels records in the default instance from the using instance.
674
+ """Save features or labels records in the default instance from the using instance.
609
675
 
610
676
  Args:
611
- values: A list of values to be registered as labels.
612
- field: The FieldAttr object representing the field for which labels are being registered.
613
- feature_name: The name of the feature to register.
677
+ values: A list of values to be saved as labels.
678
+ field: The FieldAttr object representing the field for which labels are being saved.
679
+ key: The name of the feature to save.
680
+ save_function: The name of the function to save the labels.
614
681
  using: The name of the instance from which to transfer labels (if applicable).
615
- validated_only: If True, only register validated labels.
682
+ validated_only: If True, only save validated labels.
616
683
  kwargs: Additional keyword arguments to pass to the registry model.
617
- df: A DataFrame to register labels from.
684
+ df: A DataFrame to save labels from.
618
685
  """
619
686
  from lamindb._save import save as ln_save
620
687
  from lamindb.core._settings import settings
621
688
 
622
689
  filter_kwargs = {} if kwargs is None else kwargs.copy()
623
690
  registry = field.field.model
624
- if registry == ULabel:
625
- validated_only = False
626
691
 
627
692
  organism = check_registry_organism(registry, filter_kwargs.pop("organism", None))
628
693
  if organism is not None:
@@ -638,10 +703,10 @@ def update_registry(
638
703
  settings.verbosity = verbosity
639
704
  return
640
705
 
641
- labels_registered: Dict = {"from public": [], "without reference": []}
706
+ labels_saved: Dict = {"from public": [], "without reference": []}
642
707
 
643
708
  (
644
- labels_registered[f"from {using}"],
709
+ labels_saved[f"from {using}"],
645
710
  non_validated_labels,
646
711
  ) = update_registry_from_using_instance(
647
712
  inspect_result_current.non_validated,
@@ -656,11 +721,11 @@ def update_registry(
656
721
  else []
657
722
  )
658
723
  ln_save(public_records)
659
- labels_registered["from public"] = [
724
+ labels_saved["from public"] = [
660
725
  getattr(r, field.field.name) for r in public_records
661
726
  ]
662
- labels_registered["without reference"] = [
663
- i for i in non_validated_labels if i not in labels_registered["from public"]
727
+ labels_saved["without reference"] = [
728
+ i for i in non_validated_labels if i not in labels_saved["from public"]
664
729
  ]
665
730
 
666
731
  if not validated_only:
@@ -670,7 +735,7 @@ def update_registry(
670
735
  else:
671
736
  if "organism" in filter_kwargs:
672
737
  filter_kwargs["organism"] = _save_organism(name=organism)
673
- for value in labels_registered["without reference"]:
738
+ for value in labels_saved["without reference"]:
674
739
  filter_kwargs[field.field.name] = value
675
740
  if registry == Feature:
676
741
  filter_kwargs["type"] = "category"
@@ -678,60 +743,66 @@ def update_registry(
678
743
  ln_save(non_validated_records)
679
744
 
680
745
  if registry == ULabel and field.field.name == "name":
681
- save_ulabels_with_parent(values, field=field, feature_name=feature_name)
746
+ save_ulabels_with_parent(values, field=field, key=key)
682
747
  finally:
683
748
  settings.verbosity = verbosity
684
749
 
685
- log_registered_labels(
686
- labels_registered,
687
- feature_name=feature_name,
750
+ log_saved_labels(
751
+ labels_saved,
752
+ key=key,
753
+ save_function=save_function,
688
754
  model_field=f"{registry.__name__}.{field.field.name}",
689
755
  validated_only=validated_only,
690
756
  )
691
757
 
692
758
 
693
- def log_registered_labels(
694
- labels_registered: Dict,
695
- feature_name: str,
759
+ def log_saved_labels(
760
+ labels_saved: Dict,
761
+ key: str,
762
+ save_function: str,
696
763
  model_field: str,
697
764
  validated_only: bool = True,
698
765
  ) -> None:
699
- """Log the registered labels."""
700
- labels_type = "features" if feature_name == "feature" else "labels"
766
+ """Log the saved labels."""
701
767
  model_field = colors.italic(model_field)
702
- for key, labels in labels_registered.items():
768
+ for k, labels in labels_saved.items():
703
769
  if not labels:
704
770
  continue
705
771
 
706
- if key == "without reference" and validated_only:
772
+ if k == "without reference" and validated_only:
707
773
  msg = colors.yellow(
708
- f"{len(labels)} non-validated {labels_type} are not registered with {model_field}: {labels}!"
774
+ f"{len(labels)} non-validated categories are not saved in {model_field}: {labels}!"
775
+ )
776
+ lookup_print = (
777
+ f"lookup().{key}" if key.isidentifier() else f".lookup()['{key}']"
709
778
  )
710
- lookup_print = f".lookup().['{feature_name}']"
779
+
780
+ hint = f".add_new_from('{key}')"
711
781
  msg += f"\n → to lookup categories, use {lookup_print}"
712
782
  msg += (
713
- f"\n → to register, run {colors.yellow('save_features(validated_only=False)')}"
714
- if labels_type == "features"
715
- else f"\n → to register, set {colors.yellow('validated_only=False')}"
783
+ f"\n → to save, run {colors.yellow(hint)}"
784
+ if save_function == "add_new_from"
785
+ else f"\n → to save, run {colors.yellow(save_function)}"
716
786
  )
717
787
  logger.warning(msg)
718
788
  else:
719
- key = "" if key == "without reference" else f"{colors.green(key)} "
789
+ k = "" if k == "without reference" else f"{colors.green(k)} "
790
+ # the term "transferred" stresses that this is always in the context of transferring
791
+ # labels from a public ontology or a different instance to the present instance
792
+ s = "s" if len(labels) > 1 else ""
720
793
  logger.success(
721
- f"registered {len(labels)} {labels_type} {key}with {model_field}: {labels}"
794
+ f"added {len(labels)} record{s} {k}with {model_field} for {colors.italic(key)}: {labels}"
722
795
  )
723
796
 
724
797
 
725
- def save_ulabels_with_parent(
726
- values: List[str], field: FieldAttr, feature_name: str
727
- ) -> None:
728
- """Register a parent label for the given labels."""
798
+ def save_ulabels_with_parent(values: List[str], field: FieldAttr, key: str) -> None:
799
+ """Save a parent label for the given labels."""
729
800
  registry = field.field.model
730
801
  assert registry == ULabel
731
802
  all_records = registry.from_values(values, field=field)
732
- is_feature = registry.filter(name=f"is_{feature_name}").one_or_none()
803
+ is_feature = registry.filter(name=f"is_{key}").one_or_none()
733
804
  if is_feature is None:
734
- is_feature = registry(name=f"is_{feature_name}")
805
+ is_feature = registry(name=f"is_{key}")
735
806
  is_feature.save()
736
807
  is_feature.children.add(*all_records)
737
808
 
@@ -742,20 +813,20 @@ def update_registry_from_using_instance(
742
813
  using: Optional[str] = None,
743
814
  kwargs: Optional[Dict] = None,
744
815
  ) -> Tuple[List[str], List[str]]:
745
- """Register features or labels records from the using instance.
816
+ """Save features or labels records from the using instance.
746
817
 
747
818
  Args:
748
- values: A list of values to be registered as labels.
749
- field: The FieldAttr object representing the field for which labels are being registered.
819
+ values: A list of values to be saved as labels.
820
+ field: The FieldAttr object representing the field for which labels are being saved.
750
821
  using: The name of the instance from which to transfer labels (if applicable).
751
822
  kwargs: Additional keyword arguments to pass to the registry model.
752
823
 
753
824
  Returns:
754
- A tuple containing the list of registered labels and the list of non-registered labels.
825
+ A tuple containing the list of saved labels and the list of non-saved labels.
755
826
  """
756
827
  kwargs = kwargs or {}
757
- labels_registered = []
758
- not_registered = values
828
+ labels_saved = []
829
+ not_saved = values
759
830
 
760
831
  if using is not None and using != "default":
761
832
  registry = field.field.model
@@ -768,14 +839,14 @@ def update_registry_from_using_instance(
768
839
  ).all()
769
840
  for label_using in labels_using:
770
841
  label_using.save()
771
- labels_registered.append(getattr(label_using, field.field.name))
772
- not_registered = inspect_result_using.non_validated
842
+ labels_saved.append(getattr(label_using, field.field.name))
843
+ not_saved = inspect_result_using.non_validated
773
844
 
774
- return labels_registered, not_registered
845
+ return labels_saved, not_saved
775
846
 
776
847
 
777
848
  def _save_organism(name: str):
778
- """Register an organism record."""
849
+ """Save an organism record."""
779
850
  import bionty as bt
780
851
 
781
852
  organism = bt.Organism.filter(name=name).one_or_none()
@@ -784,7 +855,7 @@ def _save_organism(name: str):
784
855
  if organism is None:
785
856
  raise ValueError(
786
857
  f"Organism '{name}' not found\n"
787
- f" → please register it: bt.Organism(name='{name}').save()"
858
+ f" → please save it: bt.Organism(name='{name}').save()"
788
859
  )
789
860
  organism.save()
790
861
  return organism
lamindb/_finish.py CHANGED
@@ -8,6 +8,7 @@ from typing import Optional
8
8
  import lamindb_setup as ln_setup
9
9
  from lamin_utils import logger
10
10
  from lnschema_core import Run, Transform
11
+ from lnschema_core.types import TransformType
11
12
 
12
13
  from ._query_set import QuerySet
13
14
  from .core._run_context import is_run_from_ipython, run_context
@@ -69,7 +70,7 @@ def save_run_context_core(
69
70
 
70
71
  ln.settings.verbosity = "success"
71
72
 
72
- if transform.type == "notebook":
73
+ if transform.type == TransformType.notebook:
73
74
  try:
74
75
  import nbstripout
75
76
  from nbproject.dev import (
@@ -187,7 +188,7 @@ def save_run_context_core(
187
188
  run.environment = artifact
188
189
  logger.success(f"saved run.environment: {run.environment}")
189
190
  # save report file
190
- if not transform.type == "notebook":
191
+ if not transform.type == TransformType.notebook:
191
192
  run.save()
192
193
  else:
193
194
  if run.report_id is not None:
@@ -212,7 +213,7 @@ def save_run_context_core(
212
213
  run.save()
213
214
  transform.latest_report = run.report
214
215
  transform.save()
215
- if transform.type == "notebook":
216
+ if transform.type == TransformType.notebook:
216
217
  logger.success(f"saved transform.latest_report: {transform.latest_report}")
217
218
  identifier = ln_setup.settings.instance.slug
218
219
  logger.success(f"go to: https://lamin.ai/{identifier}/transform/{transform.uid}")
@@ -26,11 +26,14 @@ def save_vitessce_config(vitessce_config, description: str) -> Artifact:
26
26
  artifact = Artifact(vitesse_export, description=description)
27
27
  artifact.save()
28
28
  config_dict = vitessce_config.to_dict(base_url=artifact.path.to_url())
29
+ logger.important(f"base url: {artifact.path.to_url()}")
29
30
  config_filename = "vitessce_config.json"
30
31
  config_file_local_path = f"{vitesse_export}/{config_filename}"
31
32
  with open(config_file_local_path, "w") as file:
32
33
  json.dump(config_dict, file)
33
- (artifact.path / config_filename).upload_from(config_file_local_path)
34
+ config_file_path = artifact.path / config_filename
35
+ config_file_path.upload_from(config_file_local_path)
36
+ logger.important(f"config url: {config_file_path.to_url()}")
34
37
  slug = ln_setup.settings.instance.slug
35
38
  logger.important(f"go to: https://lamin.ai/{slug}/artifact/{artifact.uid}")
36
39
  return artifact
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: lamindb
3
- Version: 0.69.4
3
+ Version: 0.69.5
4
4
  Summary: A data framework for biology.
5
5
  Author-email: Lamin Labs <open-source@lamin.ai>
6
6
  Requires-Python: >=3.8
@@ -9,10 +9,10 @@ Classifier: Programming Language :: Python :: 3.8
9
9
  Classifier: Programming Language :: Python :: 3.9
10
10
  Classifier: Programming Language :: Python :: 3.10
11
11
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: lnschema_core==0.64.1
13
- Requires-Dist: lamindb_setup==0.68.2
12
+ Requires-Dist: lnschema_core==0.64.4
13
+ Requires-Dist: lamindb_setup==0.68.3
14
14
  Requires-Dist: lamin_utils==0.13.1
15
- Requires-Dist: lamin_cli==0.10.2
15
+ Requires-Dist: lamin_cli==0.11.0
16
16
  Requires-Dist: rapidfuzz
17
17
  Requires-Dist: pyarrow
18
18
  Requires-Dist: typing_extensions!=4.6.0
@@ -1,12 +1,12 @@
1
- lamindb/__init__.py,sha256=3kVakyBIty4I2C-69S114uW3z9lnEOgpnOz3LsZb2ro,2163
2
- lamindb/_annotate.py,sha256=BopvJvzUlNKiTzOELg6MXeXQNUVMUTk-FcRoV9mfZdo,27682
1
+ lamindb/__init__.py,sha256=cWnU0vX4YUkRJTNOMNyjJHRQzTAWQDAj5R7Vp_6EB4M,2163
2
+ lamindb/_annotate.py,sha256=mM-GCej7i9eUH0cU5AcxWZ916k8NRI41WF84dfjJfu4,29955
3
3
  lamindb/_artifact.py,sha256=RV36tcHMZ6wH6u65jOAQ_H4rfmFiIzZmAr8IY7kFhm0,35817
4
4
  lamindb/_can_validate.py,sha256=w7lrUGTWldpvwaRiXBRrjfU_ZRidA7CooOu_r5MbocY,14569
5
5
  lamindb/_collection.py,sha256=SdNNhhMh2O4q0hG4Hf_y1bcwcbkMF_sqk6MIYc-hLZo,14525
6
6
  lamindb/_feature.py,sha256=ahRv87q1tcRLQ0UM5FA3KtcMQvIjW__fZq1yAdRAV7s,6728
7
7
  lamindb/_feature_set.py,sha256=G_Ss6mKh4D0Eji-xSfLRbKVFXwgUE82YOqIUmkV0CAA,8767
8
8
  lamindb/_filter.py,sha256=_PjyQWQBR3ohDAvJbR3hMvZ-2p2GvzFxLfKGC-gPnHI,1320
9
- lamindb/_finish.py,sha256=it-fSpSmMW9ybdsylBV5Lbugh6iXRGWgIiSLwPaow_8,8590
9
+ lamindb/_finish.py,sha256=8lfJzRedTDCA_XXBUf4ECOevpPhVxKqMMj9qgVkmF8M,8672
10
10
  lamindb/_from_values.py,sha256=Ei11ml77Q1xubVekt2C4-mbox2-qnC7kP18B-LhCdSc,11886
11
11
  lamindb/_is_versioned.py,sha256=DXp5t-1DwErpqqMc9eb08kpQPCHOC2fNzaozMoBunR4,1337
12
12
  lamindb/_parents.py,sha256=pTDsW8HjQ_txFbPKrBU0WjjtCNH6sx2LASUuGWpJuYE,14742
@@ -45,10 +45,10 @@ lamindb/core/storage/_zarr.py,sha256=bMQSCsTOCtQy4Yo3KwCVpbUkKdWRApN9FM1rM-d2_G0
45
45
  lamindb/core/storage/file.py,sha256=WTeC4ENn_O6HEoinmTviB89W81UrJT3bSGtnpqPpIyE,7242
46
46
  lamindb/core/storage/object.py,sha256=MPUb2M8Fleq2j9x1Ryqr3BETmvsDKyf11Ifvbxd3NpA,1097
47
47
  lamindb/integrations/__init__.py,sha256=aH2PmO2m4-vwIifMYTB0Fyyr_gZWtVnV71jT0tVWSw0,123
48
- lamindb/integrations/_vitessce.py,sha256=NHOiDJzn2QtzfkThhHEuDJuTTcSkem9x5d_wrQ-8nPI,1474
48
+ lamindb/integrations/_vitessce.py,sha256=n85g8YRP8Y2sfU5DPJdbU84BGPrTfU3Dg2jStdmBBRI,1637
49
49
  lamindb/setup/__init__.py,sha256=OwZpZzPDv5lPPGXZP7-zK6UdO4FHvvuBh439yZvIp3A,410
50
50
  lamindb/setup/core/__init__.py,sha256=LqIIvJNcONxkqjbnP6CUaP4d45Lbd6TSMAcXFp4C7_8,231
51
- lamindb-0.69.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
52
- lamindb-0.69.4.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
53
- lamindb-0.69.4.dist-info/METADATA,sha256=lGw_OTGbviwuQbwwav9IVSHfblR8bwUNC1weeEx6Eok,2856
54
- lamindb-0.69.4.dist-info/RECORD,,
51
+ lamindb-0.69.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
52
+ lamindb-0.69.5.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
53
+ lamindb-0.69.5.dist-info/METADATA,sha256=meDN6DOhRAnUt2jbDcykj9Hclhos_uyO_UbtSPNukG4,2856
54
+ lamindb-0.69.5.dist-info/RECORD,,