Flowfile 0.3.1.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +2 -1
- flowfile/api.py +5 -3
- flowfile/web/__init__.py +3 -0
- flowfile/web/static/assets/{AirbyteReader-cb0c1d4a.js → AirbyteReader-2b1cf2d8.js} +10 -9
- flowfile/web/static/assets/{CrossJoin-a514fa59.js → CrossJoin-cc3ab73c.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-f2cecf33.js → DatabaseConnectionSettings-307c4652.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-83ee3c98.js → DatabaseManager-69faa6e1.js} +10 -6
- flowfile/web/static/assets/{DatabaseReader-dc0c6881.js → DatabaseReader-e4134cd0.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-5afe9f8d.js → DatabaseWriter-d32d75b1.js} +9 -9
- flowfile/web/static/assets/{ExploreData-c7ee19cf.js → ExploreData-5eb48389.js} +18639 -18629
- flowfile/web/static/assets/{ExternalSource-17b23a01.js → ExternalSource-29489051.js} +8 -21
- flowfile/web/static/assets/{Filter-90856b4f.js → Filter-031332bb.js} +9 -9
- flowfile/web/static/assets/{Formula-38b71e9e.js → Formula-3b900540.js} +15 -15
- flowfile/web/static/assets/{Formula-d60a74f4.css → Formula-b8cefc31.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-d0f1fe81.js → FuzzyMatch-dee31153.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-0c86bbc6.js → GraphSolver-ca74eb47.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f2772e9f.js → GroupBy-081b6591.js} +8 -7
- flowfile/web/static/assets/{Join-bc3e1cf7.js → Join-b467376f.js} +11 -10
- flowfile/web/static/assets/{ManualInput-03aa0245.js → ManualInput-ffffb80a.js} +11 -8
- flowfile/web/static/assets/{Output-5b35eee8.js → Output-9a87d4ba.js} +4 -4
- flowfile/web/static/assets/{Pivot-7164087c.js → Pivot-ee3e6093.js} +8 -7
- flowfile/web/static/assets/{PolarsCode-3abf6507.js → PolarsCode-03921254.js} +13 -11
- flowfile/web/static/assets/{PopOver-b37ff9be.js → PopOver-3bdf8951.js} +1 -1
- flowfile/web/static/assets/{Read-65966a3e.js → Read-67fee3a0.js} +6 -6
- flowfile/web/static/assets/{RecordCount-c66c6d6d.js → RecordCount-a2acd02d.js} +7 -6
- flowfile/web/static/assets/{RecordId-826dc095.js → RecordId-0c8bcd77.js} +10 -8
- flowfile/web/static/assets/{Sample-4ed555c8.js → Sample-60594a3a.js} +7 -6
- flowfile/web/static/assets/{SecretManager-eac1e97d.js → SecretManager-bbcec2ac.js} +2 -2
- flowfile/web/static/assets/{Select-085f05cc.js → Select-9540e6ca.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-1f5e79c1.js → SettingsSection-48f28104.js} +1 -1
- flowfile/web/static/assets/{Sort-3e6cb414.js → Sort-6dbe3633.js} +6 -6
- flowfile/web/static/assets/{TextToRows-606349bc.js → TextToRows-27aab4a8.js} +18 -13
- flowfile/web/static/assets/{UnavailableFields-b41976ed.js → UnavailableFields-8143044b.js} +2 -2
- flowfile/web/static/assets/{Union-fca91665.js → Union-52460248.js} +7 -6
- flowfile/web/static/assets/{Unique-a59f830e.js → Unique-f6962644.js} +8 -8
- flowfile/web/static/assets/{Unpivot-c3815565.js → Unpivot-1ff1e938.js} +5 -5
- flowfile/web/static/assets/{api-22b338bd.js → api-3b345d92.js} +1 -1
- flowfile/web/static/assets/{designer-e5bbe26f.js → designer-4736134f.js} +72 -42
- flowfile/web/static/assets/{documentation-08045cf2.js → documentation-b9545eba.js} +1 -1
- flowfile/web/static/assets/{dropDown-5e7e9a5a.js → dropDown-d5a4014c.js} +1 -1
- flowfile/web/static/assets/{dropDownGeneric-50a91b99.js → dropDownGeneric-1f4e32ec.js} +2 -2
- flowfile/web/static/assets/{fullEditor-705c6ccb.js → fullEditor-f4791c23.js} +3 -3
- flowfile/web/static/assets/{genericNodeSettings-65587f20.js → genericNodeSettings-1d456350.js} +3 -3
- flowfile/web/static/assets/{index-552863fd.js → index-f25c9283.js} +2608 -1570
- flowfile/web/static/assets/{nodeTitle-cf9bae3c.js → nodeTitle-cad6fd9d.js} +3 -3
- flowfile/web/static/assets/{secretApi-3ad510e1.js → secretApi-01f07e2c.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-bd644891.js → selectDynamic-f46a4e3f.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-dd17b478.js → vue-codemirror.esm-eb98fc8b.js} +15 -14
- flowfile/web/static/assets/{vue-content-loader.es-6b36f05e.js → vue-content-loader.es-860c0380.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/METADATA +1 -3
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/RECORD +97 -88
- flowfile_core/configs/__init__.py +15 -4
- flowfile_core/configs/node_store/nodes.py +2 -4
- flowfile_core/configs/settings.py +5 -3
- flowfile_core/configs/utils.py +18 -0
- flowfile_core/flowfile/FlowfileFlow.py +84 -29
- flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +55 -18
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +42 -3
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +34 -2
- flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
- flowfile_core/flowfile/flow_graph_utils.py +320 -0
- flowfile_core/flowfile/flow_node/flow_node.py +2 -1
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
- flowfile_core/flowfile/utils.py +34 -3
- flowfile_core/main.py +2 -3
- flowfile_core/routes/secrets.py +1 -1
- flowfile_core/schemas/input_schema.py +12 -14
- flowfile_core/schemas/transform_schema.py +25 -47
- flowfile_frame/__init__.py +11 -4
- flowfile_frame/adding_expr.py +280 -0
- flowfile_frame/config.py +9 -0
- flowfile_frame/expr.py +301 -83
- flowfile_frame/expr.pyi +2174 -0
- flowfile_frame/expr_name.py +258 -0
- flowfile_frame/flow_frame.py +616 -627
- flowfile_frame/flow_frame.pyi +336 -0
- flowfile_frame/flow_frame_methods.py +617 -0
- flowfile_frame/group_frame.py +89 -42
- flowfile_frame/join.py +1 -2
- flowfile_frame/lazy.py +704 -0
- flowfile_frame/lazy_methods.py +201 -0
- flowfile_frame/list_name_space.py +324 -0
- flowfile_frame/selectors.py +3 -0
- flowfile_frame/series.py +70 -0
- flowfile_frame/utils.py +80 -4
- flowfile/web/static/assets/GoogleSheet-854294a4.js +0 -2616
- flowfile/web/static/assets/GoogleSheet-92084da7.css +0 -233
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +0 -74
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/LICENSE +0 -0
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/WHEEL +0 -0
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/entry_points.txt +0 -0
- /flowfile_core/{secrets → secret_manager}/__init__.py +0 -0
- /flowfile_core/{secrets/secrets.py → secret_manager/secret_manager.py} +0 -0
flowfile_frame/expr.py
CHANGED
|
@@ -6,9 +6,15 @@ import polars as pl
|
|
|
6
6
|
from polars.expr.string import ExprStringNameSpace
|
|
7
7
|
|
|
8
8
|
from flowfile_core.schemas import transform_schema
|
|
9
|
+
from functools import wraps
|
|
9
10
|
|
|
10
11
|
from builtins import len as built_in_len
|
|
11
12
|
|
|
13
|
+
from flowfile_frame.config import logger
|
|
14
|
+
from flowfile_frame.expr_name import ExprNameNameSpace
|
|
15
|
+
from flowfile_frame.adding_expr import add_expr_methods
|
|
16
|
+
from flowfile_frame.list_name_space import ExprListNameSpace
|
|
17
|
+
|
|
12
18
|
# --- TYPE CHECKING IMPORTS ---
|
|
13
19
|
if TYPE_CHECKING:
|
|
14
20
|
from flowfile_frame.selectors import Selector
|
|
@@ -22,9 +28,11 @@ ExprStrOrList = Union[ExprOrStr, ExprOrStrList]
|
|
|
22
28
|
|
|
23
29
|
def _repr_args(*args, **kwargs):
|
|
24
30
|
"""Helper to represent arguments for __repr__."""
|
|
25
|
-
arg_reprs = [
|
|
31
|
+
arg_reprs = [a.__repr__() for a in args]
|
|
26
32
|
kwarg_reprs = []
|
|
27
33
|
for k, v in kwargs.items():
|
|
34
|
+
if k == '_function_sources':
|
|
35
|
+
continue
|
|
28
36
|
if isinstance(v, pl.DataType):
|
|
29
37
|
kwarg_reprs.append(f"{k}={v!s}")
|
|
30
38
|
elif isinstance(v, type) and issubclass(v, pl.DataType):
|
|
@@ -52,24 +60,32 @@ def _get_expr_and_repr(value: Any) -> tuple[Optional[pl.Expr], str]:
|
|
|
52
60
|
return pl.lit(value), repr(value)
|
|
53
61
|
|
|
54
62
|
|
|
55
|
-
# --- Namespaces ---
|
|
56
|
-
|
|
57
63
|
class StringMethods:
|
|
58
64
|
expr: Optional[ExprStringNameSpace]
|
|
65
|
+
convertable_to_code: bool
|
|
66
|
+
_function_sources: Optional[List[str]]
|
|
59
67
|
|
|
60
|
-
def __init__(self, parent_expr: 'Expr', parent_repr_str: str
|
|
68
|
+
def __init__(self, parent_expr: 'Expr', parent_repr_str: str, convertable_to_code: bool = True,
|
|
69
|
+
_function_sources: Optional[List[str]] = None):
|
|
61
70
|
self.parent = parent_expr
|
|
62
71
|
self.expr = parent_expr.expr.str if parent_expr.expr is not None else None
|
|
63
72
|
self.parent_repr_str = parent_repr_str
|
|
73
|
+
self.convertable_to_code = convertable_to_code
|
|
74
|
+
self._function_sources = _function_sources or []
|
|
64
75
|
|
|
65
|
-
def _create_next_expr(self, *args, method_name: str, result_expr: Optional[pl.Expr], is_complex: bool,
|
|
76
|
+
def _create_next_expr(self, *args, method_name: str, result_expr: Optional[pl.Expr], is_complex: bool,
|
|
77
|
+
convertable_to_code: bool = None, **kwargs) -> 'Expr':
|
|
66
78
|
args_repr = _repr_args(*args, **kwargs)
|
|
67
79
|
new_repr = f"{self.parent_repr_str}.str.{method_name}({args_repr})"
|
|
68
|
-
|
|
80
|
+
if convertable_to_code is None:
|
|
81
|
+
convertable_to_code = self.convertable_to_code
|
|
82
|
+
new_expr = Expr(result_expr, self.parent.column_name, repr_str=new_repr,
|
|
69
83
|
initial_column_name=self.parent._initial_column_name,
|
|
70
84
|
selector=None,
|
|
71
85
|
agg_func=self.parent.agg_func,
|
|
72
|
-
is_complex=is_complex
|
|
86
|
+
is_complex=is_complex,
|
|
87
|
+
convertable_to_code=convertable_to_code,
|
|
88
|
+
_function_sources=self._function_sources)
|
|
73
89
|
return new_expr
|
|
74
90
|
|
|
75
91
|
# ... (String methods remain unchanged from your provided code) ...
|
|
@@ -131,24 +147,31 @@ class StringMethods:
|
|
|
131
147
|
|
|
132
148
|
class DateTimeMethods:
|
|
133
149
|
expr: Optional[Any]
|
|
150
|
+
convertable_to_code: bool
|
|
151
|
+
_function_sources: Optional[List[str]]
|
|
134
152
|
|
|
135
|
-
def __init__(self, parent_expr: 'Expr', parent_repr_str: str
|
|
153
|
+
def __init__(self, parent_expr: 'Expr', parent_repr_str: str, convertable_to_code: bool = True,
|
|
154
|
+
_function_sources: Optional[List[str]] = None):
|
|
136
155
|
self.parent = parent_expr
|
|
137
156
|
self.expr = parent_expr.expr.dt if parent_expr.expr is not None else None
|
|
138
157
|
self.parent_repr_str = parent_repr_str
|
|
158
|
+
self.convertable_to_code = convertable_to_code
|
|
159
|
+
self._function_sources = _function_sources or []
|
|
139
160
|
|
|
140
|
-
def _create_next_expr(self, method_name: str, result_expr: Optional[pl.Expr], *args, **kwargs) -> 'Expr':
|
|
161
|
+
def _create_next_expr(self, method_name: str, result_expr: Optional[pl.Expr], convertable_to_code: bool = None, *args, **kwargs) -> 'Expr':
|
|
141
162
|
args_repr = _repr_args(*args, **kwargs)
|
|
142
163
|
new_repr = f"{self.parent_repr_str}.dt.{method_name}({args_repr})"
|
|
143
|
-
|
|
144
|
-
|
|
164
|
+
if convertable_to_code is None:
|
|
165
|
+
convertable_to_code = self.convertable_to_code
|
|
166
|
+
new_expr = Expr(result_expr, self.parent.column_name, repr_str=new_repr,
|
|
145
167
|
initial_column_name=self.parent._initial_column_name,
|
|
146
168
|
selector=None,
|
|
147
169
|
agg_func=self.parent.agg_func,
|
|
148
|
-
is_complex=True
|
|
170
|
+
is_complex=True,
|
|
171
|
+
convertable_to_code=convertable_to_code,
|
|
172
|
+
_function_sources=self._function_sources)
|
|
149
173
|
return new_expr
|
|
150
174
|
|
|
151
|
-
# ... (DateTime methods remain unchanged from your provided code) ...
|
|
152
175
|
def year(self):
|
|
153
176
|
res_expr = self.expr.year() if self.expr is not None else None
|
|
154
177
|
return self._create_next_expr("year", res_expr)
|
|
@@ -198,8 +221,11 @@ class Expr:
|
|
|
198
221
|
expr: Optional[pl.Expr]
|
|
199
222
|
agg_func: Optional[str]
|
|
200
223
|
_repr_str: str
|
|
201
|
-
|
|
224
|
+
_name_namespace: Optional[ExprNameNameSpace]
|
|
225
|
+
column_name: Optional[str]
|
|
202
226
|
is_complex: bool = False
|
|
227
|
+
convertable_to_code: bool
|
|
228
|
+
_function_sources: List[str] # Add this attribute
|
|
203
229
|
|
|
204
230
|
def __init__(self,
|
|
205
231
|
expr: Optional[pl.Expr],
|
|
@@ -209,14 +235,18 @@ class Expr:
|
|
|
209
235
|
selector: Optional['Selector'] = None,
|
|
210
236
|
agg_func: Optional[str] = None,
|
|
211
237
|
ddof: Optional[int] = None,
|
|
212
|
-
is_complex: bool = False
|
|
238
|
+
is_complex: bool = False,
|
|
239
|
+
convertable_to_code: bool = True,
|
|
240
|
+
_function_sources: Optional[List[str]] = None):
|
|
213
241
|
|
|
214
242
|
self.expr = expr
|
|
215
|
-
self.
|
|
243
|
+
self.column_name = column_name
|
|
216
244
|
self.agg_func = agg_func
|
|
217
245
|
self.selector = selector
|
|
218
246
|
self._initial_column_name = initial_column_name or column_name
|
|
219
247
|
self.is_complex = is_complex
|
|
248
|
+
self.convertable_to_code = convertable_to_code
|
|
249
|
+
self._function_sources = _function_sources or []
|
|
220
250
|
# --- Determine Representation String ---
|
|
221
251
|
if repr_str is not None:
|
|
222
252
|
self._repr_str = repr_str
|
|
@@ -238,17 +268,18 @@ class Expr:
|
|
|
238
268
|
else:
|
|
239
269
|
raise ValueError("Cannot initialize Expr without expr, repr_str, or selector+agg_func")
|
|
240
270
|
|
|
241
|
-
if self.
|
|
271
|
+
if self.column_name is None and self.selector is None and self.expr is not None:
|
|
242
272
|
try:
|
|
243
|
-
self.
|
|
273
|
+
self.column_name = self.expr._output_name
|
|
244
274
|
except AttributeError:
|
|
245
275
|
try:
|
|
246
|
-
self.
|
|
276
|
+
self.column_name = self.expr._name
|
|
247
277
|
except AttributeError:
|
|
248
278
|
pass
|
|
249
|
-
|
|
279
|
+
self._list_namespace: Optional['ExprListNameSpace'] = None
|
|
250
280
|
self._str_namespace: Optional['StringMethods'] = None
|
|
251
281
|
self._dt_namespace: Optional['DateTimeMethods'] = None
|
|
282
|
+
self._name_namespace: Optional['ExprNameNameSpace'] = None
|
|
252
283
|
|
|
253
284
|
def __repr__(self) -> str:
|
|
254
285
|
return self._repr_str
|
|
@@ -308,19 +339,55 @@ class Expr:
|
|
|
308
339
|
# If we reach here, it's a simple expression (just column reference and maybe aggregation)
|
|
309
340
|
return True
|
|
310
341
|
|
|
311
|
-
def
|
|
342
|
+
def arg_unique(self) -> "Expr":
|
|
343
|
+
result_expr = self.expr.arg_unique() if self.expr is not None else None
|
|
344
|
+
return self._create_next_expr(method_name="arg_unique", result_expr=result_expr, is_complex=True)
|
|
345
|
+
|
|
346
|
+
def arg_sort(self, *, descending: bool = False, nulls_last: bool = False) -> "Expr":
|
|
347
|
+
result_expr = self.expr.arg_sort(descending=descending, nulls_last=nulls_last) if self.expr is not None else None
|
|
348
|
+
return self._create_next_expr(descending=descending, nulls_last=nulls_last, method_name="arg_sort",
|
|
349
|
+
result_expr=result_expr, is_complex=True)
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _create_next_expr(self, *args, method_name: str, result_expr: Optional[pl.Expr],
|
|
353
|
+
convertable_to_code: bool = None, is_complex: bool,
|
|
354
|
+
_function_sources: Optional[List[str]] = None, **kwargs) -> 'Expr':
|
|
312
355
|
"""Creates a new Expr instance, appending method call to repr string."""
|
|
313
|
-
|
|
356
|
+
# Filter out _function_sources from kwargs to avoid passing it to _repr_args
|
|
357
|
+
filtered_kwargs = {k: v for k, v in kwargs.items() if k != '_function_sources'}
|
|
358
|
+
args_repr = _repr_args(*args, **filtered_kwargs)
|
|
314
359
|
new_repr = f"{self._repr_str}.{method_name}({args_repr})"
|
|
315
360
|
|
|
361
|
+
if convertable_to_code is None:
|
|
362
|
+
convertable_to_code = self.convertable_to_code
|
|
363
|
+
|
|
364
|
+
# Combine function sources from current expression and new ones
|
|
365
|
+
combined_function_sources = self._function_sources.copy()
|
|
366
|
+
if _function_sources:
|
|
367
|
+
combined_function_sources.extend(_function_sources)
|
|
368
|
+
|
|
316
369
|
# Create new instance, inheriting current agg_func status by default
|
|
317
|
-
new_expr_instance = Expr(
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
370
|
+
new_expr_instance = Expr(
|
|
371
|
+
result_expr,
|
|
372
|
+
self.column_name,
|
|
373
|
+
repr_str=new_repr,
|
|
374
|
+
initial_column_name=self._initial_column_name,
|
|
375
|
+
selector=None,
|
|
376
|
+
agg_func=self.agg_func,
|
|
377
|
+
is_complex=is_complex,
|
|
378
|
+
convertable_to_code=convertable_to_code,
|
|
379
|
+
_function_sources=combined_function_sources # Pass combined function sources
|
|
380
|
+
)
|
|
322
381
|
return new_expr_instance
|
|
323
382
|
|
|
383
|
+
|
|
384
|
+
@property
|
|
385
|
+
def name(self) -> ExprNameNameSpace:
|
|
386
|
+
"""Access the name namespace for expression name operations."""
|
|
387
|
+
if self._name_namespace is None:
|
|
388
|
+
self._name_namespace = ExprNameNameSpace(self, self._repr_str)
|
|
389
|
+
return self._name_namespace
|
|
390
|
+
|
|
324
391
|
def _create_binary_op_expr(
|
|
325
392
|
self, op_symbol: str, other: Any, result_expr: Optional[pl.Expr]
|
|
326
393
|
) -> "Expr":
|
|
@@ -346,7 +413,7 @@ class Expr:
|
|
|
346
413
|
return Expr(
|
|
347
414
|
result_expr,
|
|
348
415
|
None,
|
|
349
|
-
repr_str=f"({new_repr})",
|
|
416
|
+
repr_str=f"({new_repr})",
|
|
350
417
|
initial_column_name=self._initial_column_name,
|
|
351
418
|
selector=None,
|
|
352
419
|
agg_func=None,
|
|
@@ -356,7 +423,7 @@ class Expr:
|
|
|
356
423
|
@property
|
|
357
424
|
def str(self) -> StringMethods:
|
|
358
425
|
if self._str_namespace is None:
|
|
359
|
-
self._str_namespace = StringMethods(self, self._repr_str)
|
|
426
|
+
self._str_namespace = StringMethods(self, self._repr_str, convertable_to_code=self.convertable_to_code)
|
|
360
427
|
return self._str_namespace
|
|
361
428
|
|
|
362
429
|
@property
|
|
@@ -365,12 +432,30 @@ class Expr:
|
|
|
365
432
|
self._dt_namespace = DateTimeMethods(self, self._repr_str)
|
|
366
433
|
return self._dt_namespace
|
|
367
434
|
|
|
435
|
+
@property
|
|
436
|
+
def list(self) -> ExprListNameSpace:
|
|
437
|
+
if self._list_namespace is None:
|
|
438
|
+
self._list_namespace = ExprListNameSpace(self, self._repr_str)
|
|
439
|
+
return self._list_namespace
|
|
440
|
+
|
|
368
441
|
def sum(self):
|
|
369
442
|
result_expr = self.expr.sum() if self.expr is not None else None
|
|
370
443
|
result = self._create_next_expr(method_name="sum", result_expr=result_expr, is_complex=self.is_complex)
|
|
371
444
|
result.agg_func = "sum"
|
|
372
445
|
return result
|
|
373
446
|
|
|
447
|
+
def implode(self):
|
|
448
|
+
result_expr = self.expr.implode() if self.expr is not None else None
|
|
449
|
+
result = self._create_next_expr(method_name="implode", result_expr=result_expr, is_complex=self.is_complex)
|
|
450
|
+
result.agg_func = "implode"
|
|
451
|
+
return result
|
|
452
|
+
|
|
453
|
+
def explode(self):
|
|
454
|
+
result_expr = self.expr.explode() if self.expr is not None else None
|
|
455
|
+
result = self._create_next_expr(method_name="explode", result_expr=result_expr, is_complex=self.is_complex)
|
|
456
|
+
result.agg_func = "explode"
|
|
457
|
+
return result
|
|
458
|
+
|
|
374
459
|
def mean(self):
|
|
375
460
|
result_expr = self.expr.mean() if self.expr is not None else None
|
|
376
461
|
result = self._create_next_expr(method_name="mean", result_expr=result_expr, is_complex=self.is_complex)
|
|
@@ -490,44 +575,43 @@ class Expr:
|
|
|
490
575
|
# --- Right-side Arithmetic ---
|
|
491
576
|
def __radd__(self, other):
|
|
492
577
|
other_expr, other_repr = _get_expr_and_repr(other)
|
|
493
|
-
new_repr = f"{other_repr} + {self._repr_str}"
|
|
578
|
+
new_repr = f"({other_repr} + {self._repr_str})"
|
|
494
579
|
res_expr = other_expr + self.expr if other_expr is not None and self.expr is not None else None
|
|
495
|
-
# Right-side ops also clear agg_func
|
|
496
580
|
return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
|
|
497
581
|
|
|
498
582
|
def __rsub__(self, other):
|
|
499
583
|
other_expr, other_repr = _get_expr_and_repr(other)
|
|
500
|
-
new_repr = f"{other_repr} - {self._repr_str}"
|
|
584
|
+
new_repr = f"({other_repr} - {self._repr_str})"
|
|
501
585
|
res_expr = other_expr - self.expr if other_expr is not None and self.expr is not None else None
|
|
502
586
|
return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
|
|
503
587
|
|
|
504
588
|
def __rmul__(self, other):
|
|
505
589
|
other_expr, other_repr = _get_expr_and_repr(other)
|
|
506
|
-
new_repr = f"{other_repr} * {self._repr_str}"
|
|
590
|
+
new_repr = f"({other_repr} * {self._repr_str})"
|
|
507
591
|
res_expr = other_expr * self.expr if other_expr is not None and self.expr is not None else None
|
|
508
592
|
return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
|
|
509
593
|
|
|
510
594
|
def __rtruediv__(self, other):
|
|
511
595
|
other_expr, other_repr = _get_expr_and_repr(other)
|
|
512
|
-
new_repr = f"{other_repr} / {self._repr_str}"
|
|
596
|
+
new_repr = f"({other_repr} / {self._repr_str})"
|
|
513
597
|
res_expr = other_expr / self.expr if other_expr is not None and self.expr is not None else None
|
|
514
598
|
return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
|
|
515
599
|
|
|
516
600
|
def __rfloordiv__(self, other):
|
|
517
601
|
other_expr, other_repr = _get_expr_and_repr(other)
|
|
518
|
-
new_repr = f"{other_repr} // {self._repr_str}"
|
|
602
|
+
new_repr = f"({other_repr} // {self._repr_str})"
|
|
519
603
|
res_expr = other_expr // self.expr if other_expr is not None and self.expr is not None else None
|
|
520
604
|
return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
|
|
521
605
|
|
|
522
606
|
def __rmod__(self, other):
|
|
523
607
|
other_expr, other_repr = _get_expr_and_repr(other)
|
|
524
|
-
new_repr = f"{other_repr} % {self._repr_str}"
|
|
608
|
+
new_repr = f"({other_repr} % {self._repr_str})"
|
|
525
609
|
res_expr = other_expr % self.expr if other_expr is not None and self.expr is not None else None
|
|
526
610
|
return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
|
|
527
611
|
|
|
528
612
|
def __rpow__(self, other):
|
|
529
613
|
other_expr, other_repr = _get_expr_and_repr(other)
|
|
530
|
-
new_repr = f"{other_repr} ** {self._repr_str}"
|
|
614
|
+
new_repr = f"({other_repr} ** {self._repr_str})"
|
|
531
615
|
base_expr = pl.lit(other) if not isinstance(other, (Expr, pl.Expr)) else other_expr
|
|
532
616
|
res_expr = base_expr.pow(self.expr) if self.expr is not None and base_expr is not None else None
|
|
533
617
|
return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
|
|
@@ -553,18 +637,18 @@ class Expr:
|
|
|
553
637
|
res_expr = self.expr < other_expr if self.expr is not None and other_expr is not None else None
|
|
554
638
|
return self._create_binary_op_expr("<", other, res_expr)
|
|
555
639
|
|
|
556
|
-
def __ge__(self, other):
|
|
640
|
+
def __ge__(self, other) -> "Expr":
|
|
557
641
|
other_expr, _ = _get_expr_and_repr(other)
|
|
558
642
|
res_expr = self.expr >= other_expr if self.expr is not None and other_expr is not None else None
|
|
559
643
|
return self._create_binary_op_expr(">=", other, res_expr)
|
|
560
644
|
|
|
561
|
-
def __le__(self, other):
|
|
645
|
+
def __le__(self, other) -> "Expr":
|
|
562
646
|
other_expr, _ = _get_expr_and_repr(other)
|
|
563
647
|
res_expr = self.expr <= other_expr if self.expr is not None and other_expr is not None else None
|
|
564
648
|
return self._create_binary_op_expr("<=", other, res_expr)
|
|
565
649
|
|
|
566
650
|
# --- Logical operations ---
|
|
567
|
-
def __and__(self, other):
|
|
651
|
+
def __and__(self, other) -> "Expr":
|
|
568
652
|
from flowfile_frame.selectors import Selector
|
|
569
653
|
if isinstance(other, Selector):
|
|
570
654
|
raise TypeError("Unsupported operation: Expr & Selector")
|
|
@@ -572,7 +656,7 @@ class Expr:
|
|
|
572
656
|
res_expr = self.expr & other_expr if self.expr is not None and other_expr is not None else None
|
|
573
657
|
return self._create_binary_op_expr("&", other, res_expr)
|
|
574
658
|
|
|
575
|
-
def __or__(self, other):
|
|
659
|
+
def __or__(self, other) -> "Expr":
|
|
576
660
|
from flowfile_frame.selectors import Selector
|
|
577
661
|
if isinstance(other, Selector):
|
|
578
662
|
raise TypeError("Unsupported operation: Expr | Selector")
|
|
@@ -580,14 +664,19 @@ class Expr:
|
|
|
580
664
|
res_expr = self.expr | other_expr if self.expr is not None and other_expr is not None else None
|
|
581
665
|
return self._create_binary_op_expr("|", other, res_expr)
|
|
582
666
|
|
|
583
|
-
def __invert__(self):
|
|
667
|
+
def __invert__(self) -> "Expr":
|
|
584
668
|
new_repr = f"~({self._repr_str})"
|
|
585
669
|
res_expr = ~self.expr if self.expr is not None else None
|
|
586
670
|
# Invert clears agg_func
|
|
587
671
|
return Expr(res_expr, None, repr_str=new_repr,
|
|
588
672
|
initial_column_name=self._initial_column_name, agg_func=None)
|
|
589
673
|
|
|
590
|
-
|
|
674
|
+
def __neg__(self) -> "Expr":
|
|
675
|
+
new_repr = f"-{self._repr_str}"
|
|
676
|
+
res_expr = -self.expr if self.expr is not None else None
|
|
677
|
+
return Expr(res_expr, None, repr_str=new_repr,
|
|
678
|
+
initial_column_name=self._initial_column_name, agg_func=None)
|
|
679
|
+
|
|
591
680
|
def is_null(self):
|
|
592
681
|
result_expr = self.expr.is_null() if self.expr is not None else None
|
|
593
682
|
# is_null is not an aggregation, resets agg_func
|
|
@@ -636,16 +725,17 @@ class Expr:
|
|
|
636
725
|
try:
|
|
637
726
|
res_expr = self.expr.filter(*processed_predicates)
|
|
638
727
|
except Exception as e:
|
|
639
|
-
|
|
728
|
+
logger.warning("Could not create polars expression for filter(): {e}")
|
|
640
729
|
pass # res_expr will remain None
|
|
641
730
|
|
|
642
731
|
return Expr(
|
|
643
732
|
res_expr,
|
|
644
|
-
self.
|
|
733
|
+
self.column_name,
|
|
645
734
|
repr_str=f"{self._repr_str}.filter({all_args_str})",
|
|
646
735
|
initial_column_name=self._initial_column_name,
|
|
647
736
|
selector=None, # Filter typically removes selector link
|
|
648
737
|
agg_func=self.agg_func, # Preserve aggregation status
|
|
738
|
+
convertable_to_code=self.convertable_to_code
|
|
649
739
|
)
|
|
650
740
|
|
|
651
741
|
def is_not_null(self):
|
|
@@ -670,7 +760,9 @@ class Expr:
|
|
|
670
760
|
initial_column_name=self._initial_column_name,
|
|
671
761
|
selector=None,
|
|
672
762
|
agg_func=self.agg_func,
|
|
673
|
-
is_complex=self.is_complex
|
|
763
|
+
is_complex=self.is_complex,
|
|
764
|
+
convertable_to_code=self.convertable_to_code,
|
|
765
|
+
_function_sources = self._function_sources)
|
|
674
766
|
return new_instance
|
|
675
767
|
|
|
676
768
|
def fill_null(self, value):
|
|
@@ -780,7 +872,7 @@ class Expr:
|
|
|
780
872
|
res_expr = None
|
|
781
873
|
if self.expr is not None:
|
|
782
874
|
try:
|
|
783
|
-
if
|
|
875
|
+
if built_in_len(processed_partition_cols) == 1:
|
|
784
876
|
partition_arg = (
|
|
785
877
|
processed_partition_cols[0].expr
|
|
786
878
|
if hasattr(processed_partition_cols[0], "expr")
|
|
@@ -817,23 +909,49 @@ class Expr:
|
|
|
817
909
|
|
|
818
910
|
except Exception as e:
|
|
819
911
|
|
|
820
|
-
|
|
912
|
+
logger.warning("Could not create polars expression for over(): {e}")
|
|
821
913
|
pass
|
|
822
914
|
|
|
823
915
|
return Expr(
|
|
824
916
|
res_expr,
|
|
825
|
-
self.
|
|
917
|
+
self.column_name,
|
|
826
918
|
repr_str=f"{self._repr_str}.over({args_str_for_repr})",
|
|
827
919
|
initial_column_name=self._initial_column_name,
|
|
828
920
|
selector=None,
|
|
829
921
|
agg_func=None,
|
|
922
|
+
_function_sources = self._function_sources
|
|
830
923
|
)
|
|
831
924
|
|
|
925
|
+
def get_polars_code(self) -> str:
|
|
926
|
+
"""
|
|
927
|
+
Get the Polars code representation of this expression, including any function definitions.
|
|
928
|
+
|
|
929
|
+
Returns
|
|
930
|
+
-------
|
|
931
|
+
str
|
|
932
|
+
The complete Polars code including function definitions if any.
|
|
933
|
+
"""
|
|
934
|
+
if not self._function_sources:
|
|
935
|
+
return self._repr_str
|
|
936
|
+
|
|
937
|
+
# Remove duplicates while preserving order
|
|
938
|
+
unique_sources = []
|
|
939
|
+
seen = set()
|
|
940
|
+
for source in self._function_sources:
|
|
941
|
+
if source not in seen:
|
|
942
|
+
seen.add(source)
|
|
943
|
+
unique_sources.append(source)
|
|
944
|
+
|
|
945
|
+
# Build the complete code with function definitions
|
|
946
|
+
functions_section = "# Function definitions\n" + "\n\n".join(unique_sources)
|
|
947
|
+
return functions_section + "\n#─────SPLIT─────\n\n" + self._repr_str
|
|
948
|
+
|
|
832
949
|
def sort(self, *, descending=False, nulls_last=False):
|
|
833
950
|
res_expr = self.expr.sort(descending=descending, nulls_last=nulls_last) if self.expr is not None else None
|
|
834
|
-
return Expr(res_expr, self.
|
|
951
|
+
return Expr(res_expr, self.column_name,
|
|
835
952
|
repr_str=f"{self._repr_str}.sort(descending={descending}, nulls_last={nulls_last})",
|
|
836
|
-
initial_column_name=self._initial_column_name, agg_func=None
|
|
953
|
+
initial_column_name=self._initial_column_name, agg_func=None,
|
|
954
|
+
_function_sources=self._function_sources)
|
|
837
955
|
|
|
838
956
|
def cast(self, dtype: Union[pl.DataType, str, pl.datatypes.classes.DataTypeClass], *, strict=True):
|
|
839
957
|
""" Casts the Expr to a specified data type. """
|
|
@@ -853,12 +971,14 @@ class Expr:
|
|
|
853
971
|
|
|
854
972
|
res_expr = self.expr.cast(pl_dtype, strict=strict) if self.expr is not None else None
|
|
855
973
|
# Cast preserves aggregation status (e.g., cast(col('a').sum()))
|
|
856
|
-
new_expr = Expr(res_expr, self.
|
|
974
|
+
new_expr = Expr(res_expr, self.column_name,
|
|
857
975
|
repr_str=f"{self._repr_str}.cast({dtype_repr}, strict={strict})",
|
|
858
976
|
initial_column_name=self._initial_column_name,
|
|
859
977
|
selector=None,
|
|
860
978
|
agg_func=self.agg_func,
|
|
861
|
-
is_complex=True
|
|
979
|
+
is_complex=True,
|
|
980
|
+
convertable_to_code=self.convertable_to_code,
|
|
981
|
+
_function_sources=self._function_sources)
|
|
862
982
|
return new_expr
|
|
863
983
|
|
|
864
984
|
|
|
@@ -872,7 +992,7 @@ class Column(Expr):
|
|
|
872
992
|
repr_str=f"pl.col('{name}')",
|
|
873
993
|
initial_column_name=select_input.old_name if select_input else name,
|
|
874
994
|
selector=None,
|
|
875
|
-
agg_func=None)
|
|
995
|
+
agg_func=None,)
|
|
876
996
|
self._select_input = select_input or transform_schema.SelectInput(old_name=name)
|
|
877
997
|
|
|
878
998
|
def alias(self, new_name: str) -> "Column":
|
|
@@ -946,7 +1066,7 @@ class Column(Expr):
|
|
|
946
1066
|
def to_select_input(self) -> transform_schema.SelectInput:
|
|
947
1067
|
"""Convert Column state back to a SelectInput schema object."""
|
|
948
1068
|
# This logic seems correct based on your previous version
|
|
949
|
-
current_name = self.
|
|
1069
|
+
current_name = self.column_name
|
|
950
1070
|
original_name = self._select_input.old_name
|
|
951
1071
|
new_name_attr = self._select_input.new_name
|
|
952
1072
|
|
|
@@ -972,6 +1092,9 @@ class Column(Expr):
|
|
|
972
1092
|
return super().dt
|
|
973
1093
|
|
|
974
1094
|
|
|
1095
|
+
add_expr_methods(Expr)
|
|
1096
|
+
|
|
1097
|
+
|
|
975
1098
|
class When(Expr):
|
|
976
1099
|
"""Class that represents a when-then-otherwise expression chain."""
|
|
977
1100
|
|
|
@@ -1006,7 +1129,7 @@ class When(Expr):
|
|
|
1006
1129
|
try:
|
|
1007
1130
|
self._branch_expr = pl.when(self.condition).then(value_expr)
|
|
1008
1131
|
except Exception as e:
|
|
1009
|
-
|
|
1132
|
+
logger.warning(f"Error in then() creation: {e}")
|
|
1010
1133
|
|
|
1011
1134
|
return self
|
|
1012
1135
|
|
|
@@ -1021,14 +1144,14 @@ class When(Expr):
|
|
|
1021
1144
|
if self._branch_expr is not None:
|
|
1022
1145
|
pl_expr = self._branch_expr.otherwise(value_expr)
|
|
1023
1146
|
except Exception as e:
|
|
1024
|
-
|
|
1147
|
+
logger.warning(f"Could not create when-then-otherwise expression: {e}")
|
|
1025
1148
|
|
|
1026
1149
|
return Expr(pl_expr, repr_str=final_repr)
|
|
1027
1150
|
|
|
1028
1151
|
def when(self, condition):
|
|
1029
1152
|
"""Create a new branch in the chain."""
|
|
1030
1153
|
if self._branch_expr is None:
|
|
1031
|
-
|
|
1154
|
+
logger.warning("Cannot add new branch without a then() first")
|
|
1032
1155
|
return self
|
|
1033
1156
|
|
|
1034
1157
|
condition_expr, condition_repr = self._get_expr_and_repr(condition)
|
|
@@ -1038,7 +1161,7 @@ class When(Expr):
|
|
|
1038
1161
|
try:
|
|
1039
1162
|
self._branch_expr = self._branch_expr.when(condition_expr)
|
|
1040
1163
|
except Exception as e:
|
|
1041
|
-
|
|
1164
|
+
logger.warning(f"Error adding new when() branch: {e}")
|
|
1042
1165
|
|
|
1043
1166
|
# Return self for chaining
|
|
1044
1167
|
return self
|
|
@@ -1058,41 +1181,100 @@ def column(name: str) -> Column:
|
|
|
1058
1181
|
def lit(value: Any) -> Expr:
|
|
1059
1182
|
"""Creates a Literal expression."""
|
|
1060
1183
|
# Literals don't have an agg_func
|
|
1061
|
-
return Expr(pl.lit(value), repr_str=f"pl.lit({repr(value)})", agg_func=None)
|
|
1184
|
+
return Expr(pl.lit(value, allow_object=True), repr_str=f"pl.lit({repr(value)})", agg_func=None)
|
|
1062
1185
|
|
|
1063
1186
|
|
|
1064
1187
|
def len() -> Expr:
|
|
1065
|
-
return Expr(pl.len()
|
|
1188
|
+
return Expr(pl.len(), repr_str="pl.len()")
|
|
1066
1189
|
|
|
1067
1190
|
|
|
1068
|
-
def agg_function(func):
|
|
1191
|
+
def agg_function(func=None, *, customize_repr=True):
|
|
1069
1192
|
"""
|
|
1070
|
-
|
|
1071
|
-
|
|
1193
|
+
Enhanced decorator for aggregation functions that sets appropriate properties
|
|
1194
|
+
and handles representation issues, now supporting all args and kwargs.
|
|
1072
1195
|
|
|
1073
|
-
Parameters
|
|
1074
|
-
|
|
1075
|
-
func : function
|
|
1196
|
+
Parameters
|
|
1197
|
+
----------
|
|
1198
|
+
func : function, optional
|
|
1076
1199
|
The aggregation function to decorate
|
|
1200
|
+
customize_repr : bool, default True
|
|
1201
|
+
Whether to create a custom representation string for the function
|
|
1077
1202
|
|
|
1078
|
-
Returns
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
A wrapped function that returns
|
|
1203
|
+
Returns
|
|
1204
|
+
-------
|
|
1205
|
+
function
|
|
1206
|
+
A wrapped function that returns a properly configured Expr
|
|
1082
1207
|
"""
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1208
|
+
def decorator(func):
|
|
1209
|
+
agg_func_name = func.__name__ # Use the function name as the agg_func
|
|
1210
|
+
|
|
1211
|
+
@wraps(func)
|
|
1212
|
+
def wrapper(*args, **kwargs):
|
|
1213
|
+
from flowfile_frame.expr import Expr
|
|
1214
|
+
# Get the Polars expression from the original function
|
|
1215
|
+
pl_expr = func(*args, **kwargs)
|
|
1216
|
+
|
|
1217
|
+
# Generate representation string
|
|
1218
|
+
if customize_repr:
|
|
1219
|
+
# Process positional arguments
|
|
1220
|
+
args_reprs = []
|
|
1221
|
+
for arg in args:
|
|
1222
|
+
if isinstance(arg, str):
|
|
1223
|
+
args_reprs.append(f"'{arg}'")
|
|
1224
|
+
elif hasattr(arg, '_repr_str'):
|
|
1225
|
+
args_reprs.append(arg._repr_str)
|
|
1226
|
+
else:
|
|
1227
|
+
args_reprs.append(repr(arg))
|
|
1228
|
+
|
|
1229
|
+
# Process keyword arguments
|
|
1230
|
+
kwargs_reprs = []
|
|
1231
|
+
for k, v in kwargs.items():
|
|
1232
|
+
if isinstance(v, str) and not (k == 'method' or k == 'mapping_strategy'):
|
|
1233
|
+
kwargs_reprs.append(f"{k}='{v}'")
|
|
1234
|
+
elif isinstance(v, pl.DataType):
|
|
1235
|
+
kwargs_reprs.append(f"{k}={v!s}")
|
|
1236
|
+
elif isinstance(v, type) and issubclass(v, pl.DataType):
|
|
1237
|
+
kwargs_reprs.append(f"{k}=pl.{v.__name__}")
|
|
1238
|
+
else:
|
|
1239
|
+
kwargs_reprs.append(f"{k}={repr(v)}")
|
|
1240
|
+
|
|
1241
|
+
# Combine into final representation
|
|
1242
|
+
all_args = args_reprs + kwargs_reprs
|
|
1243
|
+
args_str = ", ".join(all_args)
|
|
1244
|
+
repr_str = f"pl.{agg_func_name}({args_str})"
|
|
1245
|
+
else:
|
|
1246
|
+
# Use default representation (rarely needed)
|
|
1247
|
+
repr_str = None
|
|
1248
|
+
|
|
1249
|
+
# Determine initial column name for tracking (if applicable)
|
|
1250
|
+
initial_column_name = None
|
|
1251
|
+
if built_in_len(args) > 0:
|
|
1252
|
+
first_arg = args[0]
|
|
1253
|
+
if isinstance(first_arg, str):
|
|
1254
|
+
initial_column_name = first_arg
|
|
1255
|
+
elif hasattr(first_arg, 'column_name'):
|
|
1256
|
+
initial_column_name = first_arg.column_name
|
|
1257
|
+
|
|
1258
|
+
# Determine if this is a complex expression
|
|
1259
|
+
is_complex = True
|
|
1260
|
+
if built_in_len(args) == 1 and isinstance(args[0], str) and not kwargs:
|
|
1261
|
+
is_complex = False
|
|
1262
|
+
|
|
1263
|
+
# Create the expression with all necessary properties
|
|
1264
|
+
return Expr(
|
|
1265
|
+
pl_expr,
|
|
1266
|
+
repr_str=repr_str,
|
|
1267
|
+
initial_column_name=initial_column_name,
|
|
1268
|
+
agg_func=agg_func_name,
|
|
1269
|
+
is_complex=is_complex,
|
|
1270
|
+
)
|
|
1095
1271
|
|
|
1272
|
+
return wrapper
|
|
1273
|
+
|
|
1274
|
+
# Handle both @agg_function and @agg_function(customize_repr=True)
|
|
1275
|
+
if func is None:
|
|
1276
|
+
return decorator
|
|
1277
|
+
return decorator(func)
|
|
1096
1278
|
|
|
1097
1279
|
@agg_function
|
|
1098
1280
|
def max(*names) -> Expr:
|
|
@@ -1111,6 +1293,8 @@ def first(*names) -> Expr:
|
|
|
1111
1293
|
|
|
1112
1294
|
@agg_function
|
|
1113
1295
|
def last(*names) -> Expr:
|
|
1296
|
+
if built_in_len(names) == 0:
|
|
1297
|
+
return pl.last()
|
|
1114
1298
|
return pl.last(*names)
|
|
1115
1299
|
|
|
1116
1300
|
|
|
@@ -1124,11 +1308,44 @@ def count(*names) -> Expr:
|
|
|
1124
1308
|
return pl.count(*names)
|
|
1125
1309
|
|
|
1126
1310
|
|
|
1311
|
+
@agg_function
|
|
1312
|
+
def implode(*names) -> Expr:
|
|
1313
|
+
return pl.implode(*names)
|
|
1314
|
+
|
|
1315
|
+
|
|
1316
|
+
@agg_function
|
|
1317
|
+
def explode(*names) -> Expr:
|
|
1318
|
+
return pl.explode(*names)
|
|
1319
|
+
|
|
1320
|
+
|
|
1127
1321
|
@agg_function
|
|
1128
1322
|
def sum(*names) -> Expr:
|
|
1129
1323
|
return pl.sum(*names)
|
|
1130
1324
|
|
|
1131
1325
|
|
|
1326
|
+
@agg_function
|
|
1327
|
+
def corr(a: Union[str, Expr], b: Union[str, Expr], *,
|
|
1328
|
+
method: str = "pearson", ddof: int = None, propagate_nans: bool = False) -> Expr:
|
|
1329
|
+
"""
|
|
1330
|
+
Compute the correlation between two columns.
|
|
1331
|
+
"""
|
|
1332
|
+
a_expr = a.expr if isinstance(a, Expr) else pl.col(a) if isinstance(a, str) else a
|
|
1333
|
+
b_expr = b.expr if isinstance(b, Expr) else pl.col(b) if isinstance(b, str) else b
|
|
1334
|
+
|
|
1335
|
+
return pl.corr(a_expr, b_expr, method=method, ddof=ddof, propagate_nans=propagate_nans)
|
|
1336
|
+
|
|
1337
|
+
|
|
1338
|
+
@agg_function
|
|
1339
|
+
def cov(a: Union[str, Expr], b: Union[str, Expr], ddof: int = 1) -> Expr:
|
|
1340
|
+
"""
|
|
1341
|
+
Compute the covariance between two columns.
|
|
1342
|
+
"""
|
|
1343
|
+
a_expr = a.expr if isinstance(a, Expr) else pl.col(a) if isinstance(a, str) else a
|
|
1344
|
+
b_expr = b.expr if isinstance(b, Expr) else pl.col(b) if isinstance(b, str) else b
|
|
1345
|
+
|
|
1346
|
+
return pl.cov(a_expr, b_expr, ddof=ddof)
|
|
1347
|
+
|
|
1348
|
+
|
|
1132
1349
|
def std(column, ddof) -> Expr:
|
|
1133
1350
|
return Expr(column, ddof=ddof, agg_func='std')
|
|
1134
1351
|
|
|
@@ -1161,3 +1378,4 @@ def cum_count(expr, reverse: bool = False) -> Expr:
|
|
|
1161
1378
|
def when(condition):
|
|
1162
1379
|
"""Start a when-then-otherwise expression."""
|
|
1163
1380
|
return When(condition)
|
|
1381
|
+
|