lamindb 0.77.0__py3-none-any.whl → 0.77.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lamindb/_query_set.py CHANGED
@@ -1,19 +1,22 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import re
3
4
  from collections import UserList
4
5
  from collections.abc import Iterable
5
6
  from collections.abc import Iterable as IterableType
6
- from typing import TYPE_CHECKING, Any, NamedTuple
7
+ from typing import TYPE_CHECKING, Any, Generic, NamedTuple, TypeVar
7
8
 
8
9
  import pandas as pd
9
10
  from django.db import models
10
- from django.db.models import F
11
- from lamin_utils import colors, logger
11
+ from django.db.models import F, ForeignKey, ManyToManyField
12
+ from django.db.models.fields.related import ForeignObjectRel
13
+ from lamin_utils import logger
12
14
  from lamindb_setup.core._docs import doc_args
13
15
  from lnschema_core.models import (
14
16
  Artifact,
15
17
  CanCurate,
16
18
  Collection,
19
+ Feature,
17
20
  IsVersioned,
18
21
  Record,
19
22
  Registry,
@@ -24,6 +27,8 @@ from lnschema_core.models import (
24
27
 
25
28
  from .core.exceptions import DoesNotExist
26
29
 
30
+ T = TypeVar("T")
31
+
27
32
  if TYPE_CHECKING:
28
33
  from collections.abc import Iterable
29
34
 
@@ -34,6 +39,9 @@ class MultipleResultsFound(Exception):
34
39
  pass
35
40
 
36
41
 
42
+ pd.set_option("display.max_columns", 200)
43
+
44
+
37
45
  # def format_and_convert_to_local_time(series: pd.Series):
38
46
  # tzinfo = datetime.now().astimezone().tzinfo
39
47
  # timedelta = tzinfo.utcoffset(datetime.now()) # type: ignore
@@ -155,21 +163,295 @@ def get(
155
163
  return registry.objects.using(qs.db).get(**expressions)
156
164
 
157
165
 
158
- class RecordsList(UserList):
166
+ class RecordList(UserList, Generic[T]):
159
167
  """Is ordered, can't be queried, but has `.df()`."""
160
168
 
161
- def __init__(self, records: Iterable[Record]):
162
- super().__init__(record for record in records)
169
+ def __init__(self, records: Iterable[T]):
170
+ if isinstance(records, list):
171
+ self.data = records # Direct assignment if already a list, no copy
172
+ else:
173
+ super().__init__(records) # Let UserList handle the conversion
163
174
 
164
175
  def df(self) -> pd.DataFrame:
165
176
  keys = get_keys_from_df(self.data, self.data[0].__class__)
166
177
  values = [record.__dict__ for record in self.data]
167
178
  return pd.DataFrame(values, columns=keys)
168
179
 
169
- def one(self) -> Record:
180
+ def one(self) -> T:
170
181
  """Exactly one result. Throws error if there are more or none."""
171
182
  return one_helper(self)
172
183
 
184
+ def save(self) -> RecordList[T]:
185
+ """Save all records to the database."""
186
+ from lamindb._save import save
187
+
188
+ save(self)
189
+ return self
190
+
191
+
192
+ def get_basic_field_names(
193
+ qs: QuerySet, include: list[str], features: bool | list[str] = False
194
+ ) -> list[str]:
195
+ exclude_field_names = ["updated_at"]
196
+ field_names = [
197
+ field.name
198
+ for field in qs.model._meta.fields
199
+ if (
200
+ not isinstance(field, models.ForeignKey)
201
+ and field.name not in exclude_field_names
202
+ )
203
+ ]
204
+ field_names += [
205
+ f"{field.name}_id"
206
+ for field in qs.model._meta.fields
207
+ if isinstance(field, models.ForeignKey)
208
+ ]
209
+ for field_name in [
210
+ "version",
211
+ "is_latest",
212
+ "run_id",
213
+ "created_at",
214
+ "created_by_id",
215
+ "updated_at",
216
+ ]:
217
+ if field_name in field_names:
218
+ field_names.remove(field_name)
219
+ field_names.append(field_name)
220
+ if field_names[0] != "uid" and "uid" in field_names:
221
+ field_names.remove("uid")
222
+ field_names.insert(0, "uid")
223
+ if include or features:
224
+ subset_field_names = field_names[:4]
225
+ intersection = set(field_names) & set(include)
226
+ subset_field_names += list(intersection)
227
+ field_names = subset_field_names
228
+ return field_names
229
+
230
+
231
+ def get_feature_annotate_kwargs(show_features: bool | list[str]) -> dict[str, Any]:
232
+ features = Feature.filter()
233
+ if isinstance(show_features, list):
234
+ features.filter(name__in=show_features)
235
+ # Get the categorical features
236
+ cat_feature_types = {
237
+ feature.dtype.replace("cat[", "").replace("]", "")
238
+ for feature in features
239
+ if feature.dtype.startswith("cat[")
240
+ }
241
+ # Get relationships of labels and features
242
+ link_models_on_models = {
243
+ getattr(
244
+ Artifact, obj.related_name
245
+ ).through.__get_name_with_schema__(): obj.related_model.__get_name_with_schema__()
246
+ for obj in Artifact._meta.related_objects
247
+ if obj.related_model.__get_name_with_schema__() in cat_feature_types
248
+ }
249
+ link_models_on_models["ArtifactULabel"] = "ULabel"
250
+ link_attributes_on_models = {
251
+ obj.related_name: link_models_on_models[
252
+ obj.related_model.__get_name_with_schema__()
253
+ ]
254
+ for obj in Artifact._meta.related_objects
255
+ if obj.related_model.__get_name_with_schema__() in link_models_on_models
256
+ }
257
+ # Prepare Django's annotate for features
258
+ annotate_kwargs = {}
259
+ for link_attr, feature_type in link_attributes_on_models.items():
260
+ annotate_kwargs[f"{link_attr}__feature__name"] = F(
261
+ f"{link_attr}__feature__name"
262
+ )
263
+ field_name = (
264
+ feature_type.split(".")[1] if "." in feature_type else feature_type
265
+ ).lower()
266
+ annotate_kwargs[f"{link_attr}__{field_name}__name"] = F(
267
+ f"{link_attr}__{field_name}__name"
268
+ )
269
+
270
+ annotate_kwargs["_feature_values__feature__name"] = F(
271
+ "_feature_values__feature__name"
272
+ )
273
+ annotate_kwargs["_feature_values__value"] = F("_feature_values__value")
274
+ return annotate_kwargs
275
+
276
+
277
+ # https://claude.ai/share/16280046-6ae5-4f6a-99ac-dec01813dc3c
278
+ def analyze_lookup_cardinality(
279
+ model_class: Record, lookup_paths: list[str] | None
280
+ ) -> dict[str, str]:
281
+ """Analyze lookup cardinality.
282
+
283
+ Analyzes Django model lookups to determine if they will result in
284
+ one-to-one or one-to-many relationships when used in annotations.
285
+
286
+ Args:
287
+ model_class: The Django model class to analyze
288
+ include: List of lookup paths (e.g. ["created_by__name", "ulabels__name"])
289
+
290
+ Returns:
291
+ Dictionary mapping lookup paths to either 'one' or 'many'
292
+ """
293
+ result = {} # type: ignore
294
+ if lookup_paths is None:
295
+ return result
296
+ for lookup_path in lookup_paths:
297
+ parts = lookup_path.split("__")
298
+ current_model = model_class
299
+ is_many = False
300
+
301
+ # Walk through each part of the lookup path
302
+ for part in parts[:-1]: # Exclude the last part as it's an attribute
303
+ field = None
304
+
305
+ # Handle reverse relations
306
+ for f in current_model._meta.get_fields():
307
+ if isinstance(f, ForeignObjectRel) and f.get_accessor_name() == part:
308
+ field = f
309
+ is_many = not f.one_to_one
310
+ if hasattr(f, "field"):
311
+ current_model = f.field.model
312
+ break
313
+
314
+ # Handle forward relations
315
+ if field is None:
316
+ field = current_model._meta.get_field(part)
317
+ if isinstance(field, ManyToManyField):
318
+ is_many = True
319
+ current_model = field.remote_field.model
320
+ elif isinstance(field, ForeignKey):
321
+ current_model = field.remote_field.model
322
+
323
+ result[lookup_path] = "many" if is_many else "one"
324
+
325
+ return result
326
+
327
+
328
+ # https://lamin.ai/laminlabs/lamindata/transform/BblTiuKxsb2g0003
329
+ # https://claude.ai/chat/6ea2498c-944d-4e7a-af08-29e5ddf637d2
330
+ def reshape_annotate_result(
331
+ field_names: list[str],
332
+ df: pd.DataFrame,
333
+ extra_columns: dict[str, str] | None = None,
334
+ features: bool | list[str] = False,
335
+ ) -> pd.DataFrame:
336
+ """Reshapes experimental data with optional feature handling.
337
+
338
+ Parameters:
339
+ field_names: List of basic fields to include in result
340
+ df: Input dataframe with experimental data
341
+ extra_columns: Dict specifying additional columns to process with types ('one' or 'many')
342
+ e.g., {'ulabels__name': 'many', 'created_by__name': 'one'}
343
+ features: If False, skip feature processing. If True, process all features.
344
+ If list of strings, only process specified features.
345
+
346
+ Returns:
347
+ DataFrame with reshaped data
348
+ """
349
+ extra_columns = extra_columns or {}
350
+
351
+ # Initialize result with basic fields
352
+ result = df[field_names].drop_duplicates(subset=["id"])
353
+
354
+ # Process features if requested
355
+ if features:
356
+ # Handle _feature_values if columns exist
357
+ feature_cols = ["_feature_values__feature__name", "_feature_values__value"]
358
+ if all(col in df.columns for col in feature_cols):
359
+ feature_values = process_feature_values(df, features)
360
+ if not feature_values.empty:
361
+ for col in feature_values.columns:
362
+ if col in result.columns:
363
+ continue
364
+ result.insert(4, col, feature_values[col])
365
+
366
+ # Handle links features if they exist
367
+ links_features = [
368
+ col
369
+ for col in df.columns
370
+ if "feature__name" in col and col.startswith("links_")
371
+ ]
372
+
373
+ if links_features:
374
+ result = process_links_features(df, result, links_features, features)
375
+
376
+ # Process extra columns
377
+ if extra_columns:
378
+ result = process_extra_columns(df, result, extra_columns)
379
+
380
+ return result
381
+
382
+
383
+ def process_feature_values(
384
+ df: pd.DataFrame, features: bool | list[str]
385
+ ) -> pd.DataFrame:
386
+ """Process _feature_values columns."""
387
+ feature_values = df.groupby(["id", "_feature_values__feature__name"])[
388
+ "_feature_values__value"
389
+ ].agg(set)
390
+
391
+ # Filter features if specific ones requested
392
+ if isinstance(features, list):
393
+ feature_values = feature_values[
394
+ feature_values.index.get_level_values(
395
+ "_feature_values__feature__name"
396
+ ).isin(features)
397
+ ]
398
+
399
+ return feature_values.unstack().reset_index()
400
+
401
+
402
+ def process_links_features(
403
+ df: pd.DataFrame,
404
+ result: pd.DataFrame,
405
+ feature_cols: list[str],
406
+ features: bool | list[str],
407
+ ) -> pd.DataFrame:
408
+ """Process links_XXX feature columns."""
409
+ # this loops over different entities that might be linked under a feature
410
+ for feature_col in feature_cols:
411
+ prefix = re.match(r"links_(.+?)__feature__name", feature_col).group(1)
412
+
413
+ value_cols = [
414
+ col
415
+ for col in df.columns
416
+ if col.startswith(f"links_{prefix}__")
417
+ and col.endswith("__name")
418
+ and "feature__name" not in col
419
+ ]
420
+
421
+ if not value_cols:
422
+ continue
423
+
424
+ value_col = value_cols[0]
425
+ feature_names = df[feature_col].unique()
426
+ feature_names = feature_names[~pd.isna(feature_names)]
427
+
428
+ # Filter features if specific ones requested
429
+ if isinstance(features, list):
430
+ feature_names = [f for f in feature_names if f in features]
431
+
432
+ for feature_name in feature_names:
433
+ mask = df[feature_col] == feature_name
434
+ feature_values = df[mask].groupby("id")[value_col].agg(set)
435
+ result.insert(4, feature_name, result["id"].map(feature_values))
436
+
437
+ return result
438
+
439
+
440
+ def process_extra_columns(
441
+ df: pd.DataFrame, result: pd.DataFrame, extra_columns: dict[str, str]
442
+ ) -> pd.DataFrame:
443
+ """Process additional columns based on their specified types."""
444
+ for col, col_type in extra_columns.items():
445
+ if col not in df.columns:
446
+ continue
447
+ if col in result.columns:
448
+ continue
449
+
450
+ values = df.groupby("id")[col].agg(set if col_type == "many" else "first")
451
+ result.insert(4, col, result["id"].map(values))
452
+
453
+ return result
454
+
173
455
 
174
456
  class QuerySet(models.QuerySet):
175
457
  """Sets of records returned by queries.
@@ -180,108 +462,45 @@ class QuerySet(models.QuerySet):
180
462
 
181
463
  Examples:
182
464
 
183
- >>> ln.ULabel(name="my label").save()
184
- >>> queryset = ln.ULabel.filter(name="my label")
465
+ >>> ULabel(name="my label").save()
466
+ >>> queryset = ULabel.filter(name="my label")
185
467
  >>> queryset
186
468
  """
187
469
 
188
470
  @doc_args(Record.df.__doc__)
189
471
  def df(
190
- self, include: str | list[str] | None = None, join: str = "inner"
472
+ self,
473
+ include: str | list[str] | None = None,
474
+ features: bool | list[str] = False,
191
475
  ) -> pd.DataFrame:
192
476
  """{}""" # noqa: D415
193
- # re-order the columns
194
- exclude_field_names = ["updated_at"]
195
- field_names = [
196
- field.name
197
- for field in self.model._meta.fields
198
- if (
199
- not isinstance(field, models.ForeignKey)
200
- and field.name not in exclude_field_names
201
- )
202
- ]
203
- field_names += [
204
- f"{field.name}_id"
205
- for field in self.model._meta.fields
206
- if isinstance(field, models.ForeignKey)
207
- ]
208
- for field_name in ["run_id", "created_at", "created_by_id", "updated_at"]:
209
- if field_name in field_names:
210
- field_names.remove(field_name)
211
- field_names.append(field_name)
212
- if field_names[0] != "uid" and "uid" in field_names:
213
- field_names.remove("uid")
214
- field_names.insert(0, "uid")
215
- # create the dataframe
216
- df = pd.DataFrame(self.values(), columns=field_names)
217
- # if len(df) > 0 and "updated_at" in df:
218
- # df.updated_at = format_and_convert_to_local_time(df.updated_at)
219
- # if len(df) > 0 and "started_at" in df:
220
- # df.started_at = format_and_convert_to_local_time(df.started_at)
221
- pk_name = self.model._meta.pk.name
222
- pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
223
- if pk_column_name in df.columns:
224
- df = df.set_index(pk_column_name)
477
+ if include is None:
478
+ include = []
479
+ elif isinstance(include, str):
480
+ include = [include]
481
+ field_names = get_basic_field_names(self, include, features)
482
+ annotate_kwargs = {}
483
+ if features:
484
+ annotate_kwargs.update(get_feature_annotate_kwargs(features))
485
+ if include:
486
+ include = include.copy()[::-1]
487
+ include_kwargs = {s: F(s) for s in include if s not in field_names}
488
+ annotate_kwargs.update(include_kwargs)
489
+ if annotate_kwargs:
490
+ queryset = self.annotate(**annotate_kwargs)
491
+ else:
492
+ queryset = self
493
+ df = pd.DataFrame(queryset.values(*field_names, *list(annotate_kwargs.keys())))
225
494
  if len(df) == 0:
226
- logger.warning(colors.yellow("No records found"))
495
+ df = pd.DataFrame({}, columns=field_names)
227
496
  return df
228
- if include is not None:
229
- if isinstance(include, str):
230
- include = [include]
231
- # fix ordering
232
- include = include[::-1]
233
- for expression in include:
234
- split = expression.split("__")
235
- field_name = split[0]
236
- if len(split) > 1:
237
- lookup_str = "__".join(split[1:])
238
- else:
239
- lookup_str = "id"
240
- Record = self.model
241
- field = getattr(Record, field_name)
242
- if isinstance(field.field, models.ManyToManyField):
243
- related_ORM = (
244
- field.field.model
245
- if field.field.model != Record
246
- else field.field.related_model
247
- )
248
- if Record == related_ORM:
249
- left_side_link_model = f"from_{Record.__name__.lower()}"
250
- values_expression = (
251
- f"to_{Record.__name__.lower()}__{lookup_str}"
252
- )
253
- else:
254
- left_side_link_model = f"{Record.__name__.lower()}"
255
- values_expression = (
256
- f"{related_ORM.__name__.lower()}__{lookup_str}"
257
- )
258
- link_df = pd.DataFrame(
259
- field.through.objects.using(self.db).values(
260
- left_side_link_model, values_expression
261
- )
262
- )
263
- if link_df.shape[0] == 0:
264
- logger.warning(
265
- f"{colors.yellow(expression)} is not shown because no values are found"
266
- )
267
- continue
268
- link_groupby = link_df.groupby(left_side_link_model)[
269
- values_expression
270
- ].apply(list)
271
- df = pd.concat((link_groupby, df), axis=1, join=join)
272
- df.rename(columns={values_expression: expression}, inplace=True)
273
- else:
274
- # the F() based implementation could also work for many-to-many,
275
- # would need to test what is faster
276
- df_anno = pd.DataFrame(
277
- self.annotate(expression=F(expression)).values(
278
- pk_column_name, "expression"
279
- )
280
- )
281
- df_anno = df_anno.set_index(pk_column_name)
282
- df_anno.rename(columns={"expression": expression}, inplace=True)
283
- df = pd.concat((df_anno, df), axis=1, join=join)
284
- return df
497
+ extra_cols = analyze_lookup_cardinality(self.model, include) # type: ignore
498
+ df_reshaped = reshape_annotate_result(field_names, df, extra_cols, features)
499
+ pk_name = self.model._meta.pk.name
500
+ pk_column_name = pk_name if pk_name in df.columns else f"{pk_name}_id"
501
+ if pk_column_name in df_reshaped.columns:
502
+ df_reshaped = df_reshaped.set_index(pk_column_name)
503
+ return df_reshaped
285
504
 
286
505
  def delete(self, *args, **kwargs):
287
506
  """Delete all records in the query set."""
@@ -335,8 +554,8 @@ class QuerySet(models.QuerySet):
335
554
  """At most one result. Returns it if there is one, otherwise returns ``None``.
336
555
 
337
556
  Examples:
338
- >>> ln.ULabel.filter(name="benchmark").one_or_none()
339
- >>> ln.ULabel.filter(name="non existing label").one_or_none()
557
+ >>> ULabel.filter(name="benchmark").one_or_none()
558
+ >>> ULabel.filter(name="non existing label").one_or_none()
340
559
  """
341
560
  if len(self) == 0:
342
561
  return None
lamindb/_record.py CHANGED
@@ -264,14 +264,14 @@ def get(
264
264
  def df(
265
265
  cls,
266
266
  include: str | list[str] | None = None,
267
- join: str = "inner",
267
+ features: bool | list[str] = False,
268
268
  limit: int = 100,
269
269
  ) -> pd.DataFrame:
270
270
  """{}""" # noqa: D415
271
271
  query_set = cls.filter()
272
272
  if hasattr(cls, "updated_at"):
273
273
  query_set = query_set.order_by("-updated_at")
274
- return query_set[:limit].df(include=include, join=join)
274
+ return query_set[:limit].df(include=include, features=features)
275
275
 
276
276
 
277
277
  def _search(
@@ -345,7 +345,9 @@ def _search(
345
345
  ranks.append(sub_rank)
346
346
  # startswith and avoid matching string with " " on the right
347
347
  # mostly for truncated
348
- startswith_expr = regex_lookup(field_expr, rf"(?:^|\|){string}[^ ]*(\||$)")
348
+ startswith_expr = regex_lookup(
349
+ field_expr, rf"(?:^|.*\|){string}[^ ]*(?:\|.*|$)"
350
+ )
349
351
  startswith_rank = Cast(startswith_expr, output_field=IntegerField()) * 8
350
352
  ranks.append(startswith_rank)
351
353
  # match as sub-phrase from the left, mostly for truncated
lamindb/_save.py CHANGED
@@ -112,6 +112,7 @@ def bulk_create(records: Iterable[Record], ignore_conflicts: bool | None = False
112
112
  records_by_orm[record.__class__].append(record)
113
113
  for registry, records in records_by_orm.items():
114
114
  registry.objects.bulk_create(records, ignore_conflicts=ignore_conflicts)
115
+ # records[:] = created # In-place list update; does not seem to be necessary
115
116
 
116
117
 
117
118
  def bulk_update(records: Iterable[Record], ignore_conflicts: bool | None = False):
lamindb/_view.py CHANGED
@@ -3,22 +3,107 @@ from __future__ import annotations
3
3
  import builtins
4
4
  import importlib
5
5
  import inspect
6
+ from typing import TYPE_CHECKING
6
7
 
8
+ from IPython.display import HTML, display
7
9
  from lamin_utils import colors, logger
8
10
  from lamindb_setup import settings
9
11
  from lamindb_setup._init_instance import get_schema_module_name
10
- from lnschema_core import Record
12
+ from lnschema_core import Feature, Record
13
+
14
+ from lamindb.core import FeatureValue, ParamValue
15
+
16
+ from ._feature import convert_pandas_dtype_to_lamin_dtype
17
+
18
+ if TYPE_CHECKING:
19
+ import pandas as pd
11
20
 
12
21
  is_run_from_ipython = getattr(builtins, "__IPYTHON__", False)
13
22
 
14
23
 
24
+ def display_df_with_descriptions(
25
+ df: pd.DataFrame, descriptions: dict[str, str] | None = None
26
+ ):
27
+ if descriptions is None:
28
+ display(df)
29
+ return None
30
+
31
+ # Start building HTML table
32
+ html = '<table class="dataframe">'
33
+
34
+ # Create header with title and description rows
35
+ html += "<thead>"
36
+
37
+ # Column names row
38
+ html += "<tr>"
39
+ html += '<th class="header-title index-header"></th>' # Index header
40
+ for col in df.columns:
41
+ html += f'<th class="header-title">{col}</th>'
42
+ html += "</tr>"
43
+
44
+ # Descriptions row
45
+ html += "<tr>"
46
+ html += f'<th class="header-desc index-header">{df.index.name or ""}</th>' # Index column
47
+ for col in df.columns:
48
+ desc = descriptions.get(col, "")
49
+ html += f'<th class="header-desc">{desc}</th>'
50
+ html += "</tr>"
51
+
52
+ html += "</thead>"
53
+
54
+ # Add body rows
55
+ html += "<tbody>"
56
+ for idx, row in df.iterrows():
57
+ html += "<tr>"
58
+ html += f'<th class="row-index">{idx}</th>' # Index value
59
+ for col in df.columns:
60
+ html += f"<td>{row[col]}</td>"
61
+ html += "</tr>"
62
+ html += "</tbody>"
63
+ html += "</table>"
64
+
65
+ # Add CSS styles
66
+ styled_html = f"""
67
+ <style>
68
+ .dataframe {{
69
+ border-collapse: collapse;
70
+ margin: 10px 0;
71
+ }}
72
+ .dataframe th, .dataframe td {{
73
+ border: 1px solid #ddd;
74
+ padding: 8px;
75
+ text-align: left;
76
+ }}
77
+ .header-title {{
78
+ font-weight: bold;
79
+ }}
80
+ .header-desc {{
81
+ color: #666;
82
+ font-weight: normal;
83
+ }}
84
+ .row-index {{
85
+ font-weight: bold;
86
+ }}
87
+ .index-header {{
88
+ font-weight: bold;
89
+ }}
90
+ </style>
91
+ {html}
92
+ """
93
+ return display(HTML(styled_html))
94
+
95
+
15
96
  def view(
16
- n: int = 7, schema: str | None = None, registries: list[str] | None = None
97
+ df: pd.DataFrame | None = None,
98
+ limit: int = 7,
99
+ schema: str | None = None,
100
+ registries: list[str] | None = None,
17
101
  ) -> None:
18
- """View latest metadata state.
102
+ """View metadata.
19
103
 
20
104
  Args:
21
- n: Display the last `n` rows of a registry.
105
+ df: A DataFrame to display.
106
+ limit: Display the latest `n` records
22
107
  schema: Schema module to view. Default's to
23
108
  `None` and displays all schema modules.
24
109
  registries: List of Record names. Defaults to
@@ -27,6 +112,16 @@ def view(
27
112
  Examples:
28
113
  >>> ln.view()
29
114
  """
115
+ if df is not None:
116
+ descriptions = {
117
+ col_name: convert_pandas_dtype_to_lamin_dtype(dtype)
118
+ for col_name, dtype in df.dtypes.to_dict().items()
119
+ }
120
+ feature_dtypes = dict(Feature.objects.values_list("name", "dtype"))
121
+ descriptions.update(feature_dtypes)
122
+ display_df_with_descriptions(df, descriptions)
123
+ return None
124
+
30
125
  if is_run_from_ipython:
31
126
  from IPython.display import display as show
32
127
  else:
@@ -39,6 +134,9 @@ def view(
39
134
 
40
135
  for schema_name in schema_names:
41
136
  schema_module = importlib.import_module(get_schema_module_name(schema_name))
137
+ # the below is necessary because a schema module might not have been
138
+ # explicitly accessed
139
+ importlib.reload(schema_module)
42
140
 
43
141
  all_registries = {
44
142
  registry
@@ -47,6 +145,8 @@ def view(
47
145
  and issubclass(registry, Record)
48
146
  and registry is not Record
49
147
  }
148
+ if schema_name == "core":
149
+ all_registries.update({FeatureValue, ParamValue})
50
150
  if registries is not None:
51
151
  filtered_registries = {
52
152
  registry
@@ -62,11 +162,7 @@ def view(
62
162
  logger.print(section)
63
163
  logger.print("*" * len(section_no_color))
64
164
  for registry in sorted(filtered_registries, key=lambda x: x.__name__):
65
- if hasattr(registry, "updated_at"):
66
- df = registry.filter().order_by("-updated_at")[:n].df()
67
- else:
68
- # need to adjust in the future
69
- df = registry.df().iloc[-n:]
165
+ df = registry.df(limit=limit)
70
166
  if df.shape[0] > 0:
71
167
  logger.print(colors.blue(colors.bold(registry.__name__)))
72
168
  show(df)