metaxy 0.0.1.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. metaxy/__init__.py +170 -0
  2. metaxy/_packaging.py +96 -0
  3. metaxy/_testing/__init__.py +55 -0
  4. metaxy/_testing/config.py +43 -0
  5. metaxy/_testing/metaxy_project.py +780 -0
  6. metaxy/_testing/models.py +111 -0
  7. metaxy/_testing/parametric/__init__.py +13 -0
  8. metaxy/_testing/parametric/metadata.py +664 -0
  9. metaxy/_testing/pytest_helpers.py +74 -0
  10. metaxy/_testing/runbook.py +533 -0
  11. metaxy/_utils.py +35 -0
  12. metaxy/_version.py +1 -0
  13. metaxy/cli/app.py +97 -0
  14. metaxy/cli/console.py +13 -0
  15. metaxy/cli/context.py +167 -0
  16. metaxy/cli/graph.py +610 -0
  17. metaxy/cli/graph_diff.py +290 -0
  18. metaxy/cli/list.py +46 -0
  19. metaxy/cli/metadata.py +317 -0
  20. metaxy/cli/migrations.py +999 -0
  21. metaxy/cli/utils.py +268 -0
  22. metaxy/config.py +680 -0
  23. metaxy/entrypoints.py +296 -0
  24. metaxy/ext/__init__.py +1 -0
  25. metaxy/ext/dagster/__init__.py +54 -0
  26. metaxy/ext/dagster/constants.py +10 -0
  27. metaxy/ext/dagster/dagster_type.py +156 -0
  28. metaxy/ext/dagster/io_manager.py +200 -0
  29. metaxy/ext/dagster/metaxify.py +512 -0
  30. metaxy/ext/dagster/observable.py +115 -0
  31. metaxy/ext/dagster/resources.py +27 -0
  32. metaxy/ext/dagster/selection.py +73 -0
  33. metaxy/ext/dagster/table_metadata.py +417 -0
  34. metaxy/ext/dagster/utils.py +462 -0
  35. metaxy/ext/sqlalchemy/__init__.py +23 -0
  36. metaxy/ext/sqlalchemy/config.py +29 -0
  37. metaxy/ext/sqlalchemy/plugin.py +353 -0
  38. metaxy/ext/sqlmodel/__init__.py +13 -0
  39. metaxy/ext/sqlmodel/config.py +29 -0
  40. metaxy/ext/sqlmodel/plugin.py +499 -0
  41. metaxy/graph/__init__.py +29 -0
  42. metaxy/graph/describe.py +325 -0
  43. metaxy/graph/diff/__init__.py +21 -0
  44. metaxy/graph/diff/diff_models.py +446 -0
  45. metaxy/graph/diff/differ.py +769 -0
  46. metaxy/graph/diff/models.py +443 -0
  47. metaxy/graph/diff/rendering/__init__.py +18 -0
  48. metaxy/graph/diff/rendering/base.py +323 -0
  49. metaxy/graph/diff/rendering/cards.py +188 -0
  50. metaxy/graph/diff/rendering/formatter.py +805 -0
  51. metaxy/graph/diff/rendering/graphviz.py +246 -0
  52. metaxy/graph/diff/rendering/mermaid.py +326 -0
  53. metaxy/graph/diff/rendering/rich.py +169 -0
  54. metaxy/graph/diff/rendering/theme.py +48 -0
  55. metaxy/graph/diff/traversal.py +247 -0
  56. metaxy/graph/status.py +329 -0
  57. metaxy/graph/utils.py +58 -0
  58. metaxy/metadata_store/__init__.py +32 -0
  59. metaxy/metadata_store/_ducklake_support.py +419 -0
  60. metaxy/metadata_store/base.py +1792 -0
  61. metaxy/metadata_store/bigquery.py +354 -0
  62. metaxy/metadata_store/clickhouse.py +184 -0
  63. metaxy/metadata_store/delta.py +371 -0
  64. metaxy/metadata_store/duckdb.py +446 -0
  65. metaxy/metadata_store/exceptions.py +61 -0
  66. metaxy/metadata_store/ibis.py +542 -0
  67. metaxy/metadata_store/lancedb.py +391 -0
  68. metaxy/metadata_store/memory.py +292 -0
  69. metaxy/metadata_store/system/__init__.py +57 -0
  70. metaxy/metadata_store/system/events.py +264 -0
  71. metaxy/metadata_store/system/keys.py +9 -0
  72. metaxy/metadata_store/system/models.py +129 -0
  73. metaxy/metadata_store/system/storage.py +957 -0
  74. metaxy/metadata_store/types.py +10 -0
  75. metaxy/metadata_store/utils.py +104 -0
  76. metaxy/metadata_store/warnings.py +36 -0
  77. metaxy/migrations/__init__.py +32 -0
  78. metaxy/migrations/detector.py +291 -0
  79. metaxy/migrations/executor.py +516 -0
  80. metaxy/migrations/generator.py +319 -0
  81. metaxy/migrations/loader.py +231 -0
  82. metaxy/migrations/models.py +528 -0
  83. metaxy/migrations/ops.py +447 -0
  84. metaxy/models/__init__.py +0 -0
  85. metaxy/models/bases.py +12 -0
  86. metaxy/models/constants.py +139 -0
  87. metaxy/models/feature.py +1335 -0
  88. metaxy/models/feature_spec.py +338 -0
  89. metaxy/models/field.py +263 -0
  90. metaxy/models/fields_mapping.py +307 -0
  91. metaxy/models/filter_expression.py +297 -0
  92. metaxy/models/lineage.py +285 -0
  93. metaxy/models/plan.py +232 -0
  94. metaxy/models/types.py +475 -0
  95. metaxy/py.typed +0 -0
  96. metaxy/utils/__init__.py +1 -0
  97. metaxy/utils/constants.py +2 -0
  98. metaxy/utils/exceptions.py +23 -0
  99. metaxy/utils/hashing.py +230 -0
  100. metaxy/versioning/__init__.py +31 -0
  101. metaxy/versioning/engine.py +656 -0
  102. metaxy/versioning/feature_dep_transformer.py +151 -0
  103. metaxy/versioning/ibis.py +249 -0
  104. metaxy/versioning/lineage_handler.py +205 -0
  105. metaxy/versioning/polars.py +189 -0
  106. metaxy/versioning/renamed_df.py +35 -0
  107. metaxy/versioning/types.py +63 -0
  108. metaxy-0.0.1.dev3.dist-info/METADATA +96 -0
  109. metaxy-0.0.1.dev3.dist-info/RECORD +111 -0
  110. metaxy-0.0.1.dev3.dist-info/WHEEL +4 -0
  111. metaxy-0.0.1.dev3.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,73 @@
1
+ """Asset selection helpers for Metaxy assets."""
2
+
3
+ import dagster as dg
4
+
5
+ import metaxy as mx
6
+ from metaxy.ext.dagster.constants import (
7
+ DAGSTER_METAXY_FEATURE_METADATA_KEY,
8
+ DAGSTER_METAXY_PROJECT_TAG_KEY,
9
+ )
10
+
11
+
12
+ def select_metaxy_assets(
13
+ *,
14
+ project: str | None = None,
15
+ feature: mx.CoercibleToFeatureKey | None = None,
16
+ ) -> dg.AssetSelection:
17
+ """Select Metaxy assets by project and/or feature.
18
+
19
+ This helper creates an `AssetSelection` that filters assets tagged by `@metaxify`.
20
+
21
+ Args:
22
+ project: Filter by project name. If None, uses `MetaxyConfig.get().project`.
23
+ feature: Filter by specific feature key. If provided, further narrows the selection.
24
+
25
+ Returns:
26
+ An `AssetSelection` that can be used with `dg.define_asset_job`,
27
+ `dg.materialize`, or `AssetSelection` operations like `|` and `&`.
28
+
29
+ Example: Select all Metaxy assets in current project
30
+ ```python
31
+ import metaxy.ext.dagster as mxd
32
+
33
+ all_metaxy = mxd.select_metaxy_assets()
34
+ ```
35
+
36
+ Example: Select assets for a specific project
37
+ ```python
38
+ prod_assets = mxd.select_metaxy_assets(project="production")
39
+ ```
40
+
41
+ Example: Select a specific feature's assets
42
+ ```python
43
+ feature_assets = mxd.select_metaxy_assets(feature="my/feature/key")
44
+ ```
45
+
46
+ Example: Use with asset jobs
47
+ ```python
48
+ metaxy_job = dg.define_asset_job(
49
+ name="materialize_metaxy",
50
+ selection=mxd.select_metaxy_assets(),
51
+ )
52
+ ```
53
+
54
+ Example: Combine with other selections
55
+ ```python
56
+ # All metaxy assets plus some other assets
57
+ combined = mxd.select_metaxy_assets() | dg.AssetSelection.keys("other_asset")
58
+
59
+ # Metaxy assets that are also in a specific group
60
+ filtered = mxd.select_metaxy_assets() & dg.AssetSelection.groups("my_group")
61
+ ```
62
+ """
63
+ resolved_project = project if project is not None else mx.MetaxyConfig.get().project
64
+
65
+ selection = dg.AssetSelection.tag(DAGSTER_METAXY_PROJECT_TAG_KEY, resolved_project)
66
+
67
+ if feature is not None:
68
+ feature_key = mx.coerce_to_feature_key(feature)
69
+ selection = selection & dg.AssetSelection.tag(
70
+ DAGSTER_METAXY_FEATURE_METADATA_KEY, str(feature_key)
71
+ )
72
+
73
+ return selection
@@ -0,0 +1,417 @@
1
+ """Table metadata utilities for Dagster integration.
2
+
3
+ This module provides utilities for building Dagster table metadata
4
+ (column schema, column lineage, table previews, etc.) from Metaxy feature definitions.
5
+ """
6
+
7
+ import types
8
+ from typing import Any, Union, get_args, get_origin
9
+
10
+ import dagster as dg
11
+ import narwhals as nw
12
+ import polars as pl
13
+
14
+ import metaxy as mx
15
+ from metaxy.ext.dagster.utils import get_asset_key_for_metaxy_feature_spec
16
+ from metaxy.models.constants import ALL_SYSTEM_COLUMNS, SYSTEM_COLUMNS_WITH_LINEAGE
17
+
18
+
19
+ def build_column_schema(feature_cls: type[mx.BaseFeature]) -> dg.TableSchema:
20
+ """Build a Dagster TableSchema from a Metaxy feature class.
21
+
22
+ Creates column definitions from Pydantic model fields, including inherited
23
+ system columns. Field types are converted to strings and field descriptions
24
+ are used as column descriptions.
25
+
26
+ Args:
27
+ feature_cls: The Metaxy feature class to extract schema from.
28
+
29
+ Returns:
30
+ A TableSchema with columns derived from Pydantic model fields,
31
+ sorted alphabetically by name.
32
+
33
+ !!! tip
34
+ This is automatically injected by [`@metaxify`][metaxy.ext.dagster.metaxify.metaxify]
35
+ """
36
+ columns: list[dg.TableColumn] = []
37
+ for field_name, field_info in feature_cls.model_fields.items():
38
+ columns.append(
39
+ dg.TableColumn(
40
+ name=field_name,
41
+ type=_get_type_string(field_info.annotation),
42
+ description=field_info.description,
43
+ )
44
+ )
45
+
46
+ # Sort columns alphabetically by name
47
+ columns.sort(key=lambda col: col.name)
48
+ return dg.TableSchema(columns=columns)
49
+
50
+
51
+ def _get_type_string(annotation: Any) -> str:
52
+ """Get a clean string representation of a type annotation.
53
+
54
+ For generic types (list[str], dict[str, int], etc.), str() works well.
55
+ For simple types (str, int, etc.), use __name__ to avoid "<class 'str'>" output.
56
+
57
+ Special handling:
58
+ - Pydantic datetime types show cleaner representations
59
+ - None is stripped from union types (nullability is handled separately via DB constraints)
60
+ """
61
+ from pydantic import AwareDatetime, NaiveDatetime
62
+
63
+ # Map Pydantic datetime types to cleaner representations
64
+ pydantic_type_names = {
65
+ AwareDatetime: "datetime (UTC)",
66
+ NaiveDatetime: "datetime (naive)",
67
+ }
68
+
69
+ # For generic types (list[str], dict[str, int], Union, etc.), handle args recursively
70
+ origin = get_origin(annotation)
71
+ if origin is not None:
72
+ args = get_args(annotation)
73
+ if args:
74
+ # Handle Union types (X | Y syntax uses types.UnionType, typing.Union is different)
75
+ if origin is Union or isinstance(annotation, types.UnionType):
76
+ # Filter out None - nullability is handled via DB constraints, not Pydantic types
77
+ non_none_args = [arg for arg in args if arg is not type(None)]
78
+ if len(non_none_args) == 1:
79
+ # Simple optional type like `str | None` -> just return the base type
80
+ return _get_type_string(non_none_args[0])
81
+ # Multiple non-None types in union
82
+ clean_args = [_get_type_string(arg) for arg in non_none_args]
83
+ return " | ".join(clean_args)
84
+ # Handle other generic types
85
+ clean_args = [_get_type_string(arg) for arg in args]
86
+ origin_name = getattr(origin, "__name__", str(origin))
87
+ return f"{origin_name}[{', '.join(clean_args)}]"
88
+ return str(annotation)
89
+
90
+ # Check for Pydantic special types
91
+ if annotation in pydantic_type_names:
92
+ return pydantic_type_names[annotation]
93
+
94
+ # For simple types, use __name__ if available
95
+ if hasattr(annotation, "__name__"):
96
+ return annotation.__name__
97
+
98
+ # Fallback to str()
99
+ return str(annotation)
100
+
101
+
102
+ def build_column_lineage(
103
+ feature_cls: type[mx.BaseFeature],
104
+ feature_spec: mx.FeatureSpec | None = None,
105
+ ) -> dg.TableColumnLineage | None:
106
+ """Build column-level lineage from feature dependencies.
107
+
108
+ Tracks column provenance by analyzing:
109
+ - `FeatureDep.rename` mappings: renamed columns trace back to their upstream source
110
+ - `FeatureSpec.lineage`: ID column relationships between features
111
+ - Direct pass-through: columns with same name in both upstream and downstream
112
+ - System columns: `metaxy_provenance_by_field` and `metaxy_provenance` have lineage
113
+ from corresponding upstream columns
114
+
115
+ Args:
116
+ feature_cls: The downstream feature class.
117
+ feature_spec: The downstream feature specification. If None, uses feature_cls.spec().
118
+
119
+ Returns:
120
+ TableColumnLineage mapping downstream columns to their upstream sources,
121
+ or None if no column lineage can be determined.
122
+
123
+ !!! tip
124
+ This is automatically injected by [`@metaxify`][metaxy.ext.dagster.metaxify.metaxify]
125
+ """
126
+ if feature_spec is None:
127
+ feature_spec = feature_cls.spec()
128
+
129
+ if not feature_spec.deps:
130
+ return None
131
+
132
+ deps_by_column: dict[str, list[dg.TableColumnDep]] = {}
133
+ downstream_columns = set(feature_cls.model_fields.keys())
134
+
135
+ for dep in feature_spec.deps:
136
+ upstream_feature_cls = mx.get_feature_by_key(dep.feature)
137
+ upstream_feature_spec = upstream_feature_cls.spec()
138
+ upstream_asset_key = get_asset_key_for_metaxy_feature_spec(
139
+ upstream_feature_spec
140
+ )
141
+ upstream_columns = set(upstream_feature_cls.model_fields.keys())
142
+
143
+ # Build reverse rename map: downstream_name -> upstream_name
144
+ # FeatureDep.rename is {old_upstream_name: new_downstream_name}
145
+ reverse_rename: dict[str, str] = {}
146
+ if dep.rename:
147
+ reverse_rename = {v: k for k, v in dep.rename.items()}
148
+
149
+ # Track columns based on lineage relationship
150
+ lineage = feature_spec.lineage
151
+
152
+ # Get ID column mappings based on lineage type
153
+ id_column_mapping = _get_id_column_mapping(
154
+ downstream_id_columns=feature_spec.id_columns,
155
+ upstream_id_columns=upstream_feature_spec.id_columns,
156
+ lineage=lineage,
157
+ rename=reverse_rename,
158
+ )
159
+
160
+ # Process ID columns
161
+ for downstream_col, upstream_col in id_column_mapping.items():
162
+ if downstream_col in downstream_columns:
163
+ if downstream_col not in deps_by_column:
164
+ deps_by_column[downstream_col] = []
165
+ deps_by_column[downstream_col].append(
166
+ dg.TableColumnDep(
167
+ asset_key=upstream_asset_key,
168
+ column_name=upstream_col,
169
+ )
170
+ )
171
+
172
+ # Process renamed columns (that aren't ID columns)
173
+ for downstream_col, upstream_col in reverse_rename.items():
174
+ if (
175
+ downstream_col in downstream_columns
176
+ and downstream_col not in id_column_mapping
177
+ ):
178
+ if upstream_col in upstream_columns:
179
+ if downstream_col not in deps_by_column:
180
+ deps_by_column[downstream_col] = []
181
+ deps_by_column[downstream_col].append(
182
+ dg.TableColumnDep(
183
+ asset_key=upstream_asset_key,
184
+ column_name=upstream_col,
185
+ )
186
+ )
187
+
188
+ # Process direct pass-through columns (same name in both, not renamed, ID, or system)
189
+ # System columns are handled separately below since only some have lineage
190
+ handled_columns = (
191
+ set(id_column_mapping.keys())
192
+ | set(reverse_rename.keys())
193
+ | ALL_SYSTEM_COLUMNS
194
+ )
195
+ for col in downstream_columns - handled_columns:
196
+ if col in upstream_columns:
197
+ if col not in deps_by_column:
198
+ deps_by_column[col] = []
199
+ deps_by_column[col].append(
200
+ dg.TableColumnDep(
201
+ asset_key=upstream_asset_key,
202
+ column_name=col,
203
+ )
204
+ )
205
+
206
+ # Process system columns with lineage (metaxy_provenance_by_field, metaxy_provenance)
207
+ # These columns are always present in both upstream and downstream features
208
+ # and have a direct lineage relationship (downstream values are computed from upstream)
209
+ for sys_col in SYSTEM_COLUMNS_WITH_LINEAGE:
210
+ if sys_col not in deps_by_column:
211
+ deps_by_column[sys_col] = []
212
+ deps_by_column[sys_col].append(
213
+ dg.TableColumnDep(
214
+ asset_key=upstream_asset_key,
215
+ column_name=sys_col,
216
+ )
217
+ )
218
+
219
+ if not deps_by_column:
220
+ return None
221
+
222
+ # Sort columns alphabetically
223
+ sorted_deps = {k: deps_by_column[k] for k in sorted(deps_by_column)}
224
+ return dg.TableColumnLineage(deps_by_column=sorted_deps)
225
+
226
+
227
+ def _get_id_column_mapping(
228
+ downstream_id_columns: tuple[str, ...],
229
+ upstream_id_columns: tuple[str, ...],
230
+ lineage: mx.LineageRelationship,
231
+ rename: dict[str, str],
232
+ ) -> dict[str, str]:
233
+ """Get mapping of downstream ID columns to upstream ID columns.
234
+
235
+ Args:
236
+ downstream_id_columns: ID columns of the downstream feature.
237
+ upstream_id_columns: ID columns of the upstream feature.
238
+ lineage: The lineage relationship between features.
239
+ rename: Reverse rename map (downstream_name -> upstream_name).
240
+
241
+ Returns:
242
+ Mapping of downstream ID column names to upstream ID column names.
243
+ """
244
+ from metaxy.models.lineage import (
245
+ AggregationRelationship,
246
+ ExpansionRelationship,
247
+ IdentityRelationship,
248
+ )
249
+
250
+ mapping: dict[str, str] = {}
251
+ rel = lineage.relationship
252
+
253
+ if isinstance(rel, IdentityRelationship):
254
+ # 1:1 - downstream ID columns map to same-named upstream ID columns
255
+ # (accounting for any renames)
256
+ for downstream_col in downstream_id_columns:
257
+ # Check if this column was renamed from upstream
258
+ upstream_col = rename.get(downstream_col, downstream_col)
259
+ if upstream_col in upstream_id_columns:
260
+ mapping[downstream_col] = upstream_col
261
+
262
+ elif isinstance(rel, AggregationRelationship):
263
+ # N:1 - aggregation columns map to upstream
264
+ # Use `on` columns if specified, otherwise use all downstream ID columns
265
+ agg_columns = rel.on if rel.on is not None else downstream_id_columns
266
+ for downstream_col in agg_columns:
267
+ if downstream_col in downstream_id_columns:
268
+ upstream_col = rename.get(downstream_col, downstream_col)
269
+ if upstream_col in upstream_id_columns:
270
+ mapping[downstream_col] = upstream_col
271
+
272
+ elif isinstance(rel, ExpansionRelationship):
273
+ # 1:N - `on` columns (parent ID columns) map to upstream ID columns
274
+ for downstream_col in rel.on:
275
+ if downstream_col in downstream_id_columns:
276
+ upstream_col = rename.get(downstream_col, downstream_col)
277
+ if upstream_col in upstream_id_columns:
278
+ mapping[downstream_col] = upstream_col
279
+
280
+ return mapping
281
+
282
+
283
+ def build_table_preview_metadata(
284
+ lazy_df: nw.LazyFrame[Any],
285
+ schema: dg.TableSchema,
286
+ *,
287
+ n_rows: int = 5,
288
+ ) -> dg.TableMetadataValue:
289
+ """Build a Dagster table preview from the last N rows of a LazyFrame.
290
+
291
+ Collects the last `n_rows` from the LazyFrame and converts them to
292
+ Dagster TableRecord objects suitable for display in the Dagster UI.
293
+ Complex types (Struct, List, Array) are converted to JSON strings;
294
+ primitive types (str, int, float, bool, None) are kept as-is.
295
+
296
+ Args:
297
+ lazy_df: A narwhals LazyFrame to preview.
298
+ schema: The TableSchema for the table. Use `build_column_schema()` to
299
+ create this from a Metaxy feature class.
300
+ n_rows: Number of rows to include in the preview (from the end). Defaults to 5.
301
+
302
+ Returns:
303
+ A TableMetadataValue containing the preview rows as TableRecord objects.
304
+ Returns an empty table if the DataFrame is empty.
305
+
306
+ !!! tip
307
+
308
+ This is automatically injected by [`MetaxyIOManager`][metaxy.ext.dagster.io_manager.MetaxyIOManager]
309
+ """
310
+ # Collect the last n_rows from the LazyFrame
311
+ collected_df = lazy_df.tail(n_rows).collect()
312
+ df_polars: pl.DataFrame = collected_df.to_native() # pyright: ignore[reportAssignmentType]
313
+
314
+ # Handle empty DataFrames
315
+ if df_polars.is_empty():
316
+ return dg.MetadataValue.table(records=[], schema=schema)
317
+
318
+ # Convert complex types to strings, keep primitives as-is
319
+ df_processed = _prepare_dataframe_for_table_record(df_polars)
320
+
321
+ # Convert to TableRecord objects
322
+ records = [dg.TableRecord(data=row) for row in df_processed.to_dicts()]
323
+
324
+ return dg.MetadataValue.table(records=records, schema=schema)
325
+
326
+
327
+ def _prepare_dataframe_for_table_record(df: pl.DataFrame) -> pl.DataFrame:
328
+ """Prepare a Polars DataFrame for conversion to Dagster TableRecord objects.
329
+
330
+ Complex types (Struct, List, Array) and temporal types are converted to strings.
331
+ Lists/Arrays with more than 4 items are truncated to show first 2 and last 2
332
+ with "..." in between.
333
+ Primitive types (str, int, float, bool, None) are kept as-is since
334
+ Dagster's TableRecord accepts them directly.
335
+
336
+ Args:
337
+ df: The Polars DataFrame to prepare.
338
+
339
+ Returns:
340
+ A DataFrame with complex/temporal types converted to strings.
341
+ """
342
+ exprs: list[pl.Expr] = []
343
+
344
+ for col_name in df.columns:
345
+ dtype = df[col_name].dtype
346
+ if isinstance(dtype, pl.Struct):
347
+ # Struct types: use json_encode for a clean JSON representation
348
+ exprs.append(pl.col(col_name).struct.json_encode())
349
+ elif isinstance(dtype, pl.List):
350
+ # List types: truncate and convert to string
351
+ exprs.append(_truncate_list_expr(pl.col(col_name), alias=col_name))
352
+ elif isinstance(dtype, pl.Array):
353
+ # Array types: convert to list first, then truncate
354
+ exprs.append(
355
+ _truncate_list_expr(pl.col(col_name).arr.to_list(), alias=col_name)
356
+ )
357
+ elif dtype in (pl.Datetime, pl.Date, pl.Time, pl.Duration) or isinstance(
358
+ dtype, (pl.Datetime, pl.Date, pl.Time, pl.Duration)
359
+ ):
360
+ # Temporal types: cast to string (ISO format)
361
+ exprs.append(pl.col(col_name).cast(pl.String))
362
+ else:
363
+ # Primitive types: keep as-is
364
+ exprs.append(pl.col(col_name))
365
+
366
+ return df.select(exprs)
367
+
368
+
369
+ def _truncate_list_expr(list_expr: pl.Expr, alias: str, max_items: int = 2) -> pl.Expr:
370
+ """Truncate a list expression and convert to string.
371
+
372
+ Lists with more than max_items show first 1 and last 1 items with "..." between.
373
+
374
+ Args:
375
+ list_expr: A Polars expression that evaluates to a List type.
376
+ alias: The output column name.
377
+ max_items: Maximum items to show without truncation. Default 4.
378
+
379
+ Returns:
380
+ A Polars expression that truncates and converts the list to string.
381
+ """
382
+ list_len = list_expr.list.len()
383
+ half = max_items // 2
384
+
385
+ # For short lists: just json_encode the whole thing
386
+ # For long lists: concat first 2 + last 2 and json_encode, then insert "..."
387
+ truncated = pl.concat_list(
388
+ list_expr.list.head(half),
389
+ list_expr.list.tail(half),
390
+ )
391
+
392
+ # Convert to JSON string via struct wrapper
393
+ def to_json(expr: pl.Expr) -> pl.Expr:
394
+ return (
395
+ pl.struct(expr.alias("_"))
396
+ .struct.json_encode()
397
+ .str.extract(r'\{"_":(.*)\}', 1)
398
+ )
399
+
400
+ short_result = to_json(list_expr)
401
+ # For truncated: insert ".." after the first half elements
402
+ # e.g., [1,10] -> [1,..,10]
403
+ # Match: opening bracket, then `half` comma-separated values
404
+ # The pattern matches values that may contain nested brackets
405
+ value_pattern = r"[^\[\],]+(?:\[[^\]]*\])?" # matches value or value[...]
406
+ first_n_values = ",".join([value_pattern] * half)
407
+ long_result = to_json(truncated).str.replace(
408
+ r"^(\[" + first_n_values + r"),",
409
+ "$1,..,",
410
+ )
411
+
412
+ return (
413
+ pl.when(list_len <= max_items)
414
+ .then(short_result)
415
+ .otherwise(long_result)
416
+ .alias(alias)
417
+ )