Flowfile 0.3.2__py3-none-any.whl → 0.3.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. flowfile/__init__.py +3 -2
  2. flowfile/web/__init__.py +3 -0
  3. {flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/METADATA +4 -3
  4. {flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/RECORD +46 -35
  5. flowfile_core/configs/__init__.py +15 -4
  6. flowfile_core/configs/settings.py +5 -3
  7. flowfile_core/configs/utils.py +18 -0
  8. flowfile_core/flowfile/FlowfileFlow.py +13 -18
  9. flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
  10. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +54 -17
  11. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
  12. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +42 -3
  13. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +2 -1
  14. flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
  15. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
  16. flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
  17. flowfile_core/flowfile/flow_node/flow_node.py +2 -1
  18. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
  19. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
  20. flowfile_core/flowfile/utils.py +34 -3
  21. flowfile_core/main.py +2 -3
  22. flowfile_core/routes/secrets.py +1 -1
  23. flowfile_core/schemas/input_schema.py +10 -4
  24. flowfile_core/schemas/transform_schema.py +25 -47
  25. flowfile_frame/__init__.py +11 -4
  26. flowfile_frame/adding_expr.py +280 -0
  27. flowfile_frame/config.py +9 -0
  28. flowfile_frame/expr.py +301 -83
  29. flowfile_frame/expr.pyi +2174 -0
  30. flowfile_frame/expr_name.py +258 -0
  31. flowfile_frame/flow_frame.py +584 -1002
  32. flowfile_frame/flow_frame.pyi +368 -0
  33. flowfile_frame/flow_frame_methods.py +617 -0
  34. flowfile_frame/group_frame.py +89 -42
  35. flowfile_frame/join.py +1 -2
  36. flowfile_frame/lazy.py +704 -0
  37. flowfile_frame/lazy_methods.py +201 -0
  38. flowfile_frame/list_name_space.py +324 -0
  39. flowfile_frame/selectors.py +3 -0
  40. flowfile_frame/series.py +70 -0
  41. flowfile_frame/utils.py +80 -4
  42. {flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/LICENSE +0 -0
  43. {flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/WHEEL +0 -0
  44. {flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/entry_points.txt +0 -0
  45. /flowfile_core/{secrets → secret_manager}/__init__.py +0 -0
  46. /flowfile_core/{secrets/secrets.py → secret_manager/secret_manager.py} +0 -0
flowfile_frame/expr.py CHANGED
@@ -6,9 +6,15 @@ import polars as pl
6
6
  from polars.expr.string import ExprStringNameSpace
7
7
 
8
8
  from flowfile_core.schemas import transform_schema
9
+ from functools import wraps
9
10
 
10
11
  from builtins import len as built_in_len
11
12
 
13
+ from flowfile_frame.config import logger
14
+ from flowfile_frame.expr_name import ExprNameNameSpace
15
+ from flowfile_frame.adding_expr import add_expr_methods
16
+ from flowfile_frame.list_name_space import ExprListNameSpace
17
+
12
18
  # --- TYPE CHECKING IMPORTS ---
13
19
  if TYPE_CHECKING:
14
20
  from flowfile_frame.selectors import Selector
@@ -22,9 +28,11 @@ ExprStrOrList = Union[ExprOrStr, ExprOrStrList]
22
28
 
23
29
  def _repr_args(*args, **kwargs):
24
30
  """Helper to represent arguments for __repr__."""
25
- arg_reprs = [repr(a) for a in args]
31
+ arg_reprs = [a.__repr__() for a in args]
26
32
  kwarg_reprs = []
27
33
  for k, v in kwargs.items():
34
+ if k == '_function_sources':
35
+ continue
28
36
  if isinstance(v, pl.DataType):
29
37
  kwarg_reprs.append(f"{k}={v!s}")
30
38
  elif isinstance(v, type) and issubclass(v, pl.DataType):
@@ -52,24 +60,32 @@ def _get_expr_and_repr(value: Any) -> tuple[Optional[pl.Expr], str]:
52
60
  return pl.lit(value), repr(value)
53
61
 
54
62
 
55
- # --- Namespaces ---
56
-
57
63
  class StringMethods:
58
64
  expr: Optional[ExprStringNameSpace]
65
+ convertable_to_code: bool
66
+ _function_sources: Optional[List[str]]
59
67
 
60
- def __init__(self, parent_expr: 'Expr', parent_repr_str: str):
68
+ def __init__(self, parent_expr: 'Expr', parent_repr_str: str, convertable_to_code: bool = True,
69
+ _function_sources: Optional[List[str]] = None):
61
70
  self.parent = parent_expr
62
71
  self.expr = parent_expr.expr.str if parent_expr.expr is not None else None
63
72
  self.parent_repr_str = parent_repr_str
73
+ self.convertable_to_code = convertable_to_code
74
+ self._function_sources = _function_sources or []
64
75
 
65
- def _create_next_expr(self, *args, method_name: str, result_expr: Optional[pl.Expr], is_complex: bool, **kwargs) -> 'Expr':
76
+ def _create_next_expr(self, *args, method_name: str, result_expr: Optional[pl.Expr], is_complex: bool,
77
+ convertable_to_code: bool = None, **kwargs) -> 'Expr':
66
78
  args_repr = _repr_args(*args, **kwargs)
67
79
  new_repr = f"{self.parent_repr_str}.str.{method_name}({args_repr})"
68
- new_expr = Expr(result_expr, self.parent.name, repr_str=new_repr,
80
+ if convertable_to_code is None:
81
+ convertable_to_code = self.convertable_to_code
82
+ new_expr = Expr(result_expr, self.parent.column_name, repr_str=new_repr,
69
83
  initial_column_name=self.parent._initial_column_name,
70
84
  selector=None,
71
85
  agg_func=self.parent.agg_func,
72
- is_complex=is_complex)
86
+ is_complex=is_complex,
87
+ convertable_to_code=convertable_to_code,
88
+ _function_sources=self._function_sources)
73
89
  return new_expr
74
90
 
75
91
  # ... (String methods remain unchanged from your provided code) ...
@@ -131,24 +147,31 @@ class StringMethods:
131
147
 
132
148
  class DateTimeMethods:
133
149
  expr: Optional[Any]
150
+ convertable_to_code: bool
151
+ _function_sources: Optional[List[str]]
134
152
 
135
- def __init__(self, parent_expr: 'Expr', parent_repr_str: str):
153
+ def __init__(self, parent_expr: 'Expr', parent_repr_str: str, convertable_to_code: bool = True,
154
+ _function_sources: Optional[List[str]] = None):
136
155
  self.parent = parent_expr
137
156
  self.expr = parent_expr.expr.dt if parent_expr.expr is not None else None
138
157
  self.parent_repr_str = parent_repr_str
158
+ self.convertable_to_code = convertable_to_code
159
+ self._function_sources = _function_sources or []
139
160
 
140
- def _create_next_expr(self, method_name: str, result_expr: Optional[pl.Expr], *args, **kwargs) -> 'Expr':
161
+ def _create_next_expr(self, method_name: str, result_expr: Optional[pl.Expr], convertable_to_code: bool = None, *args, **kwargs) -> 'Expr':
141
162
  args_repr = _repr_args(*args, **kwargs)
142
163
  new_repr = f"{self.parent_repr_str}.dt.{method_name}({args_repr})"
143
-
144
- new_expr = Expr(result_expr, self.parent.name, repr_str=new_repr,
164
+ if convertable_to_code is None:
165
+ convertable_to_code = self.convertable_to_code
166
+ new_expr = Expr(result_expr, self.parent.column_name, repr_str=new_repr,
145
167
  initial_column_name=self.parent._initial_column_name,
146
168
  selector=None,
147
169
  agg_func=self.parent.agg_func,
148
- is_complex=True)
170
+ is_complex=True,
171
+ convertable_to_code=convertable_to_code,
172
+ _function_sources=self._function_sources)
149
173
  return new_expr
150
174
 
151
- # ... (DateTime methods remain unchanged from your provided code) ...
152
175
  def year(self):
153
176
  res_expr = self.expr.year() if self.expr is not None else None
154
177
  return self._create_next_expr("year", res_expr)
@@ -198,8 +221,11 @@ class Expr:
198
221
  expr: Optional[pl.Expr]
199
222
  agg_func: Optional[str]
200
223
  _repr_str: str
201
- name: Optional[str]
224
+ _name_namespace: Optional[ExprNameNameSpace]
225
+ column_name: Optional[str]
202
226
  is_complex: bool = False
227
+ convertable_to_code: bool
228
+ _function_sources: List[str] # Add this attribute
203
229
 
204
230
  def __init__(self,
205
231
  expr: Optional[pl.Expr],
@@ -209,14 +235,18 @@ class Expr:
209
235
  selector: Optional['Selector'] = None,
210
236
  agg_func: Optional[str] = None,
211
237
  ddof: Optional[int] = None,
212
- is_complex: bool = False):
238
+ is_complex: bool = False,
239
+ convertable_to_code: bool = True,
240
+ _function_sources: Optional[List[str]] = None):
213
241
 
214
242
  self.expr = expr
215
- self.name = column_name
243
+ self.column_name = column_name
216
244
  self.agg_func = agg_func
217
245
  self.selector = selector
218
246
  self._initial_column_name = initial_column_name or column_name
219
247
  self.is_complex = is_complex
248
+ self.convertable_to_code = convertable_to_code
249
+ self._function_sources = _function_sources or []
220
250
  # --- Determine Representation String ---
221
251
  if repr_str is not None:
222
252
  self._repr_str = repr_str
@@ -238,17 +268,18 @@ class Expr:
238
268
  else:
239
269
  raise ValueError("Cannot initialize Expr without expr, repr_str, or selector+agg_func")
240
270
 
241
- if self.name is None and self.selector is None and self.expr is not None:
271
+ if self.column_name is None and self.selector is None and self.expr is not None:
242
272
  try:
243
- self.name = self.expr._output_name
273
+ self.column_name = self.expr._output_name
244
274
  except AttributeError:
245
275
  try:
246
- self.name = self.expr._name
276
+ self.column_name = self.expr._name
247
277
  except AttributeError:
248
278
  pass
249
-
279
+ self._list_namespace: Optional['ExprListNameSpace'] = None
250
280
  self._str_namespace: Optional['StringMethods'] = None
251
281
  self._dt_namespace: Optional['DateTimeMethods'] = None
282
+ self._name_namespace: Optional['ExprNameNameSpace'] = None
252
283
 
253
284
  def __repr__(self) -> str:
254
285
  return self._repr_str
@@ -308,19 +339,55 @@ class Expr:
308
339
  # If we reach here, it's a simple expression (just column reference and maybe aggregation)
309
340
  return True
310
341
 
311
- def _create_next_expr(self, *args, method_name: str, result_expr: Optional[pl.Expr], is_complex: bool, **kwargs) -> 'Expr':
342
+ def arg_unique(self) -> "Expr":
343
+ result_expr = self.expr.arg_unique() if self.expr is not None else None
344
+ return self._create_next_expr(method_name="arg_unique", result_expr=result_expr, is_complex=True)
345
+
346
+ def arg_sort(self, *, descending: bool = False, nulls_last: bool = False) -> "Expr":
347
+ result_expr = self.expr.arg_sort(descending=descending, nulls_last=nulls_last) if self.expr is not None else None
348
+ return self._create_next_expr(descending=descending, nulls_last=nulls_last, method_name="arg_sort",
349
+ result_expr=result_expr, is_complex=True)
350
+
351
+
352
+ def _create_next_expr(self, *args, method_name: str, result_expr: Optional[pl.Expr],
353
+ convertable_to_code: bool = None, is_complex: bool,
354
+ _function_sources: Optional[List[str]] = None, **kwargs) -> 'Expr':
312
355
  """Creates a new Expr instance, appending method call to repr string."""
313
- args_repr = _repr_args(*args, **kwargs)
356
+ # Filter out _function_sources from kwargs to avoid passing it to _repr_args
357
+ filtered_kwargs = {k: v for k, v in kwargs.items() if k != '_function_sources'}
358
+ args_repr = _repr_args(*args, **filtered_kwargs)
314
359
  new_repr = f"{self._repr_str}.{method_name}({args_repr})"
315
360
 
361
+ if convertable_to_code is None:
362
+ convertable_to_code = self.convertable_to_code
363
+
364
+ # Combine function sources from current expression and new ones
365
+ combined_function_sources = self._function_sources.copy()
366
+ if _function_sources:
367
+ combined_function_sources.extend(_function_sources)
368
+
316
369
  # Create new instance, inheriting current agg_func status by default
317
- new_expr_instance = Expr(result_expr, self.name, repr_str=new_repr,
318
- initial_column_name=self._initial_column_name,
319
- selector=None,
320
- agg_func=self.agg_func,
321
- is_complex=is_complex)
370
+ new_expr_instance = Expr(
371
+ result_expr,
372
+ self.column_name,
373
+ repr_str=new_repr,
374
+ initial_column_name=self._initial_column_name,
375
+ selector=None,
376
+ agg_func=self.agg_func,
377
+ is_complex=is_complex,
378
+ convertable_to_code=convertable_to_code,
379
+ _function_sources=combined_function_sources # Pass combined function sources
380
+ )
322
381
  return new_expr_instance
323
382
 
383
+
384
+ @property
385
+ def name(self) -> ExprNameNameSpace:
386
+ """Access the name namespace for expression name operations."""
387
+ if self._name_namespace is None:
388
+ self._name_namespace = ExprNameNameSpace(self, self._repr_str)
389
+ return self._name_namespace
390
+
324
391
  def _create_binary_op_expr(
325
392
  self, op_symbol: str, other: Any, result_expr: Optional[pl.Expr]
326
393
  ) -> "Expr":
@@ -346,7 +413,7 @@ class Expr:
346
413
  return Expr(
347
414
  result_expr,
348
415
  None,
349
- repr_str=f"({new_repr})", # Add parentheses around the ENTIRE expression
416
+ repr_str=f"({new_repr})",
350
417
  initial_column_name=self._initial_column_name,
351
418
  selector=None,
352
419
  agg_func=None,
@@ -356,7 +423,7 @@ class Expr:
356
423
  @property
357
424
  def str(self) -> StringMethods:
358
425
  if self._str_namespace is None:
359
- self._str_namespace = StringMethods(self, self._repr_str)
426
+ self._str_namespace = StringMethods(self, self._repr_str, convertable_to_code=self.convertable_to_code)
360
427
  return self._str_namespace
361
428
 
362
429
  @property
@@ -365,12 +432,30 @@ class Expr:
365
432
  self._dt_namespace = DateTimeMethods(self, self._repr_str)
366
433
  return self._dt_namespace
367
434
 
435
+ @property
436
+ def list(self) -> ExprListNameSpace:
437
+ if self._list_namespace is None:
438
+ self._list_namespace = ExprListNameSpace(self, self._repr_str)
439
+ return self._list_namespace
440
+
368
441
  def sum(self):
369
442
  result_expr = self.expr.sum() if self.expr is not None else None
370
443
  result = self._create_next_expr(method_name="sum", result_expr=result_expr, is_complex=self.is_complex)
371
444
  result.agg_func = "sum"
372
445
  return result
373
446
 
447
+ def implode(self):
448
+ result_expr = self.expr.implode() if self.expr is not None else None
449
+ result = self._create_next_expr(method_name="implode", result_expr=result_expr, is_complex=self.is_complex)
450
+ result.agg_func = "implode"
451
+ return result
452
+
453
+ def explode(self):
454
+ result_expr = self.expr.explode() if self.expr is not None else None
455
+ result = self._create_next_expr(method_name="explode", result_expr=result_expr, is_complex=self.is_complex)
456
+ result.agg_func = "explode"
457
+ return result
458
+
374
459
  def mean(self):
375
460
  result_expr = self.expr.mean() if self.expr is not None else None
376
461
  result = self._create_next_expr(method_name="mean", result_expr=result_expr, is_complex=self.is_complex)
@@ -490,44 +575,43 @@ class Expr:
490
575
  # --- Right-side Arithmetic ---
491
576
  def __radd__(self, other):
492
577
  other_expr, other_repr = _get_expr_and_repr(other)
493
- new_repr = f"{other_repr} + {self._repr_str}"
578
+ new_repr = f"({other_repr} + {self._repr_str})"
494
579
  res_expr = other_expr + self.expr if other_expr is not None and self.expr is not None else None
495
- # Right-side ops also clear agg_func
496
580
  return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
497
581
 
498
582
  def __rsub__(self, other):
499
583
  other_expr, other_repr = _get_expr_and_repr(other)
500
- new_repr = f"{other_repr} - {self._repr_str}"
584
+ new_repr = f"({other_repr} - {self._repr_str})"
501
585
  res_expr = other_expr - self.expr if other_expr is not None and self.expr is not None else None
502
586
  return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
503
587
 
504
588
  def __rmul__(self, other):
505
589
  other_expr, other_repr = _get_expr_and_repr(other)
506
- new_repr = f"{other_repr} * {self._repr_str}"
590
+ new_repr = f"({other_repr} * {self._repr_str})"
507
591
  res_expr = other_expr * self.expr if other_expr is not None and self.expr is not None else None
508
592
  return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
509
593
 
510
594
  def __rtruediv__(self, other):
511
595
  other_expr, other_repr = _get_expr_and_repr(other)
512
- new_repr = f"{other_repr} / {self._repr_str}"
596
+ new_repr = f"({other_repr} / {self._repr_str})"
513
597
  res_expr = other_expr / self.expr if other_expr is not None and self.expr is not None else None
514
598
  return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
515
599
 
516
600
  def __rfloordiv__(self, other):
517
601
  other_expr, other_repr = _get_expr_and_repr(other)
518
- new_repr = f"{other_repr} // {self._repr_str}"
602
+ new_repr = f"({other_repr} // {self._repr_str})"
519
603
  res_expr = other_expr // self.expr if other_expr is not None and self.expr is not None else None
520
604
  return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
521
605
 
522
606
  def __rmod__(self, other):
523
607
  other_expr, other_repr = _get_expr_and_repr(other)
524
- new_repr = f"{other_repr} % {self._repr_str}"
608
+ new_repr = f"({other_repr} % {self._repr_str})"
525
609
  res_expr = other_expr % self.expr if other_expr is not None and self.expr is not None else None
526
610
  return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
527
611
 
528
612
  def __rpow__(self, other):
529
613
  other_expr, other_repr = _get_expr_and_repr(other)
530
- new_repr = f"{other_repr} ** {self._repr_str}"
614
+ new_repr = f"({other_repr} ** {self._repr_str})"
531
615
  base_expr = pl.lit(other) if not isinstance(other, (Expr, pl.Expr)) else other_expr
532
616
  res_expr = base_expr.pow(self.expr) if self.expr is not None and base_expr is not None else None
533
617
  return Expr(res_expr, None, repr_str=new_repr, agg_func=None, is_complex=True)
@@ -553,18 +637,18 @@ class Expr:
553
637
  res_expr = self.expr < other_expr if self.expr is not None and other_expr is not None else None
554
638
  return self._create_binary_op_expr("<", other, res_expr)
555
639
 
556
- def __ge__(self, other):
640
+ def __ge__(self, other) -> "Expr":
557
641
  other_expr, _ = _get_expr_and_repr(other)
558
642
  res_expr = self.expr >= other_expr if self.expr is not None and other_expr is not None else None
559
643
  return self._create_binary_op_expr(">=", other, res_expr)
560
644
 
561
- def __le__(self, other):
645
+ def __le__(self, other) -> "Expr":
562
646
  other_expr, _ = _get_expr_and_repr(other)
563
647
  res_expr = self.expr <= other_expr if self.expr is not None and other_expr is not None else None
564
648
  return self._create_binary_op_expr("<=", other, res_expr)
565
649
 
566
650
  # --- Logical operations ---
567
- def __and__(self, other):
651
+ def __and__(self, other) -> "Expr":
568
652
  from flowfile_frame.selectors import Selector
569
653
  if isinstance(other, Selector):
570
654
  raise TypeError("Unsupported operation: Expr & Selector")
@@ -572,7 +656,7 @@ class Expr:
572
656
  res_expr = self.expr & other_expr if self.expr is not None and other_expr is not None else None
573
657
  return self._create_binary_op_expr("&", other, res_expr)
574
658
 
575
- def __or__(self, other):
659
+ def __or__(self, other) -> "Expr":
576
660
  from flowfile_frame.selectors import Selector
577
661
  if isinstance(other, Selector):
578
662
  raise TypeError("Unsupported operation: Expr | Selector")
@@ -580,14 +664,19 @@ class Expr:
580
664
  res_expr = self.expr | other_expr if self.expr is not None and other_expr is not None else None
581
665
  return self._create_binary_op_expr("|", other, res_expr)
582
666
 
583
- def __invert__(self):
667
+ def __invert__(self) -> "Expr":
584
668
  new_repr = f"~({self._repr_str})"
585
669
  res_expr = ~self.expr if self.expr is not None else None
586
670
  # Invert clears agg_func
587
671
  return Expr(res_expr, None, repr_str=new_repr,
588
672
  initial_column_name=self._initial_column_name, agg_func=None)
589
673
 
590
- # --- Other useful methods ---
674
+ def __neg__(self) -> "Expr":
675
+ new_repr = f"-{self._repr_str}"
676
+ res_expr = -self.expr if self.expr is not None else None
677
+ return Expr(res_expr, None, repr_str=new_repr,
678
+ initial_column_name=self._initial_column_name, agg_func=None)
679
+
591
680
  def is_null(self):
592
681
  result_expr = self.expr.is_null() if self.expr is not None else None
593
682
  # is_null is not an aggregation, resets agg_func
@@ -636,16 +725,17 @@ class Expr:
636
725
  try:
637
726
  res_expr = self.expr.filter(*processed_predicates)
638
727
  except Exception as e:
639
- print(f"Warning: Could not create polars expression for filter(): {e}")
728
+ logger.warning("Could not create polars expression for filter(): {e}")
640
729
  pass # res_expr will remain None
641
730
 
642
731
  return Expr(
643
732
  res_expr,
644
- self.name,
733
+ self.column_name,
645
734
  repr_str=f"{self._repr_str}.filter({all_args_str})",
646
735
  initial_column_name=self._initial_column_name,
647
736
  selector=None, # Filter typically removes selector link
648
737
  agg_func=self.agg_func, # Preserve aggregation status
738
+ convertable_to_code=self.convertable_to_code
649
739
  )
650
740
 
651
741
  def is_not_null(self):
@@ -670,7 +760,9 @@ class Expr:
670
760
  initial_column_name=self._initial_column_name,
671
761
  selector=None,
672
762
  agg_func=self.agg_func,
673
- is_complex=self.is_complex)
763
+ is_complex=self.is_complex,
764
+ convertable_to_code=self.convertable_to_code,
765
+ _function_sources = self._function_sources)
674
766
  return new_instance
675
767
 
676
768
  def fill_null(self, value):
@@ -780,7 +872,7 @@ class Expr:
780
872
  res_expr = None
781
873
  if self.expr is not None:
782
874
  try:
783
- if len(processed_partition_cols) == 1:
875
+ if built_in_len(processed_partition_cols) == 1:
784
876
  partition_arg = (
785
877
  processed_partition_cols[0].expr
786
878
  if hasattr(processed_partition_cols[0], "expr")
@@ -817,23 +909,49 @@ class Expr:
817
909
 
818
910
  except Exception as e:
819
911
 
820
- print(f"Warning: Could not create polars expression for over(): {e}")
912
+ logger.warning("Could not create polars expression for over(): {e}")
821
913
  pass
822
914
 
823
915
  return Expr(
824
916
  res_expr,
825
- self.name,
917
+ self.column_name,
826
918
  repr_str=f"{self._repr_str}.over({args_str_for_repr})",
827
919
  initial_column_name=self._initial_column_name,
828
920
  selector=None,
829
921
  agg_func=None,
922
+ _function_sources = self._function_sources
830
923
  )
831
924
 
925
+ def get_polars_code(self) -> str:
926
+ """
927
+ Get the Polars code representation of this expression, including any function definitions.
928
+
929
+ Returns
930
+ -------
931
+ str
932
+ The complete Polars code including function definitions if any.
933
+ """
934
+ if not self._function_sources:
935
+ return self._repr_str
936
+
937
+ # Remove duplicates while preserving order
938
+ unique_sources = []
939
+ seen = set()
940
+ for source in self._function_sources:
941
+ if source not in seen:
942
+ seen.add(source)
943
+ unique_sources.append(source)
944
+
945
+ # Build the complete code with function definitions
946
+ functions_section = "# Function definitions\n" + "\n\n".join(unique_sources)
947
+ return functions_section + "\n#─────SPLIT─────\n\n" + self._repr_str
948
+
832
949
  def sort(self, *, descending=False, nulls_last=False):
833
950
  res_expr = self.expr.sort(descending=descending, nulls_last=nulls_last) if self.expr is not None else None
834
- return Expr(res_expr, self.name,
951
+ return Expr(res_expr, self.column_name,
835
952
  repr_str=f"{self._repr_str}.sort(descending={descending}, nulls_last={nulls_last})",
836
- initial_column_name=self._initial_column_name, agg_func=None)
953
+ initial_column_name=self._initial_column_name, agg_func=None,
954
+ _function_sources=self._function_sources)
837
955
 
838
956
  def cast(self, dtype: Union[pl.DataType, str, pl.datatypes.classes.DataTypeClass], *, strict=True):
839
957
  """ Casts the Expr to a specified data type. """
@@ -853,12 +971,14 @@ class Expr:
853
971
 
854
972
  res_expr = self.expr.cast(pl_dtype, strict=strict) if self.expr is not None else None
855
973
  # Cast preserves aggregation status (e.g., cast(col('a').sum()))
856
- new_expr = Expr(res_expr, self.name,
974
+ new_expr = Expr(res_expr, self.column_name,
857
975
  repr_str=f"{self._repr_str}.cast({dtype_repr}, strict={strict})",
858
976
  initial_column_name=self._initial_column_name,
859
977
  selector=None,
860
978
  agg_func=self.agg_func,
861
- is_complex=True)
979
+ is_complex=True,
980
+ convertable_to_code=self.convertable_to_code,
981
+ _function_sources=self._function_sources)
862
982
  return new_expr
863
983
 
864
984
 
@@ -872,7 +992,7 @@ class Column(Expr):
872
992
  repr_str=f"pl.col('{name}')",
873
993
  initial_column_name=select_input.old_name if select_input else name,
874
994
  selector=None,
875
- agg_func=None)
995
+ agg_func=None,)
876
996
  self._select_input = select_input or transform_schema.SelectInput(old_name=name)
877
997
 
878
998
  def alias(self, new_name: str) -> "Column":
@@ -946,7 +1066,7 @@ class Column(Expr):
946
1066
  def to_select_input(self) -> transform_schema.SelectInput:
947
1067
  """Convert Column state back to a SelectInput schema object."""
948
1068
  # This logic seems correct based on your previous version
949
- current_name = self.name
1069
+ current_name = self.column_name
950
1070
  original_name = self._select_input.old_name
951
1071
  new_name_attr = self._select_input.new_name
952
1072
 
@@ -972,6 +1092,9 @@ class Column(Expr):
972
1092
  return super().dt
973
1093
 
974
1094
 
1095
+ add_expr_methods(Expr)
1096
+
1097
+
975
1098
  class When(Expr):
976
1099
  """Class that represents a when-then-otherwise expression chain."""
977
1100
 
@@ -1006,7 +1129,7 @@ class When(Expr):
1006
1129
  try:
1007
1130
  self._branch_expr = pl.when(self.condition).then(value_expr)
1008
1131
  except Exception as e:
1009
- print(f"Warning: Error in then() creation: {e}")
1132
+ logger.warning(f"Error in then() creation: {e}")
1010
1133
 
1011
1134
  return self
1012
1135
 
@@ -1021,14 +1144,14 @@ class When(Expr):
1021
1144
  if self._branch_expr is not None:
1022
1145
  pl_expr = self._branch_expr.otherwise(value_expr)
1023
1146
  except Exception as e:
1024
- print(f"Warning: Could not create when-then-otherwise expression: {e}")
1147
+ logger.warning(f"Could not create when-then-otherwise expression: {e}")
1025
1148
 
1026
1149
  return Expr(pl_expr, repr_str=final_repr)
1027
1150
 
1028
1151
  def when(self, condition):
1029
1152
  """Create a new branch in the chain."""
1030
1153
  if self._branch_expr is None:
1031
- print("Warning: Cannot add new branch without a then() first")
1154
+ logger.warning("Cannot add new branch without a then() first")
1032
1155
  return self
1033
1156
 
1034
1157
  condition_expr, condition_repr = self._get_expr_and_repr(condition)
@@ -1038,7 +1161,7 @@ class When(Expr):
1038
1161
  try:
1039
1162
  self._branch_expr = self._branch_expr.when(condition_expr)
1040
1163
  except Exception as e:
1041
- print(f"Warning: Error adding new when() branch: {e}")
1164
+ logger.warning(f"Error adding new when() branch: {e}")
1042
1165
 
1043
1166
  # Return self for chaining
1044
1167
  return self
@@ -1058,41 +1181,100 @@ def column(name: str) -> Column:
1058
1181
  def lit(value: Any) -> Expr:
1059
1182
  """Creates a Literal expression."""
1060
1183
  # Literals don't have an agg_func
1061
- return Expr(pl.lit(value), repr_str=f"pl.lit({repr(value)})", agg_func=None)
1184
+ return Expr(pl.lit(value, allow_object=True), repr_str=f"pl.lit({repr(value)})", agg_func=None)
1062
1185
 
1063
1186
 
1064
1187
  def len() -> Expr:
1065
- return Expr(pl.len()).alias('number_of_records')
1188
+ return Expr(pl.len(), repr_str="pl.len()")
1066
1189
 
1067
1190
 
1068
- def agg_function(func):
1191
+ def agg_function(func=None, *, customize_repr=True):
1069
1192
  """
1070
- Decorator for aggregation functions that sets appropriate properties based on number of arguments.
1071
- Uses the function name as the aggregation function name.
1193
+ Enhanced decorator for aggregation functions that sets appropriate properties
1194
+ and handles representation issues, now supporting all args and kwargs.
1072
1195
 
1073
- Parameters:
1074
- -----------
1075
- func : function
1196
+ Parameters
1197
+ ----------
1198
+ func : function, optional
1076
1199
  The aggregation function to decorate
1200
+ customize_repr : bool, default True
1201
+ Whether to create a custom representation string for the function
1077
1202
 
1078
- Returns:
1079
- --------
1080
- wrapper
1081
- A wrapped function that returns the properly configured Expr
1203
+ Returns
1204
+ -------
1205
+ function
1206
+ A wrapped function that returns a properly configured Expr
1082
1207
  """
1083
- agg_func_name = func.__name__ # Use the function name as the agg_func
1084
-
1085
- def wrapper(*names):
1086
- # Get the Polars expression from the original function
1087
- pl_expr = func(*names)
1088
- if built_in_len(names) == 1 and isinstance(names[0], str):
1089
- return Expr(pl_expr, agg_func=agg_func_name, initial_column_name=names[0], is_complex=False)
1090
- elif built_in_len(names) == 1 and isinstance(names[0], Expr):
1091
- return Expr(pl_expr, agg_func=agg_func_name, initial_column_name=names[0].name, is_complex=names[0].is_complex)
1092
- else:
1093
- return Expr(pl_expr, agg_func=agg_func_name, is_complex=True)
1094
- return wrapper
1208
+ def decorator(func):
1209
+ agg_func_name = func.__name__ # Use the function name as the agg_func
1210
+
1211
+ @wraps(func)
1212
+ def wrapper(*args, **kwargs):
1213
+ from flowfile_frame.expr import Expr
1214
+ # Get the Polars expression from the original function
1215
+ pl_expr = func(*args, **kwargs)
1216
+
1217
+ # Generate representation string
1218
+ if customize_repr:
1219
+ # Process positional arguments
1220
+ args_reprs = []
1221
+ for arg in args:
1222
+ if isinstance(arg, str):
1223
+ args_reprs.append(f"'{arg}'")
1224
+ elif hasattr(arg, '_repr_str'):
1225
+ args_reprs.append(arg._repr_str)
1226
+ else:
1227
+ args_reprs.append(repr(arg))
1228
+
1229
+ # Process keyword arguments
1230
+ kwargs_reprs = []
1231
+ for k, v in kwargs.items():
1232
+ if isinstance(v, str) and not (k == 'method' or k == 'mapping_strategy'):
1233
+ kwargs_reprs.append(f"{k}='{v}'")
1234
+ elif isinstance(v, pl.DataType):
1235
+ kwargs_reprs.append(f"{k}={v!s}")
1236
+ elif isinstance(v, type) and issubclass(v, pl.DataType):
1237
+ kwargs_reprs.append(f"{k}=pl.{v.__name__}")
1238
+ else:
1239
+ kwargs_reprs.append(f"{k}={repr(v)}")
1240
+
1241
+ # Combine into final representation
1242
+ all_args = args_reprs + kwargs_reprs
1243
+ args_str = ", ".join(all_args)
1244
+ repr_str = f"pl.{agg_func_name}({args_str})"
1245
+ else:
1246
+ # Use default representation (rarely needed)
1247
+ repr_str = None
1248
+
1249
+ # Determine initial column name for tracking (if applicable)
1250
+ initial_column_name = None
1251
+ if built_in_len(args) > 0:
1252
+ first_arg = args[0]
1253
+ if isinstance(first_arg, str):
1254
+ initial_column_name = first_arg
1255
+ elif hasattr(first_arg, 'column_name'):
1256
+ initial_column_name = first_arg.column_name
1257
+
1258
+ # Determine if this is a complex expression
1259
+ is_complex = True
1260
+ if built_in_len(args) == 1 and isinstance(args[0], str) and not kwargs:
1261
+ is_complex = False
1262
+
1263
+ # Create the expression with all necessary properties
1264
+ return Expr(
1265
+ pl_expr,
1266
+ repr_str=repr_str,
1267
+ initial_column_name=initial_column_name,
1268
+ agg_func=agg_func_name,
1269
+ is_complex=is_complex,
1270
+ )
1095
1271
 
1272
+ return wrapper
1273
+
1274
+ # Handle both @agg_function and @agg_function(customize_repr=True)
1275
+ if func is None:
1276
+ return decorator
1277
+ return decorator(func)
1096
1278
 
1097
1279
  @agg_function
1098
1280
  def max(*names) -> Expr:
@@ -1111,6 +1293,8 @@ def first(*names) -> Expr:
1111
1293
 
1112
1294
  @agg_function
1113
1295
  def last(*names) -> Expr:
1296
+ if built_in_len(names) == 0:
1297
+ return pl.last()
1114
1298
  return pl.last(*names)
1115
1299
 
1116
1300
 
@@ -1124,11 +1308,44 @@ def count(*names) -> Expr:
1124
1308
  return pl.count(*names)
1125
1309
 
1126
1310
 
1311
+ @agg_function
1312
+ def implode(*names) -> Expr:
1313
+ return pl.implode(*names)
1314
+
1315
+
1316
+ @agg_function
1317
+ def explode(*names) -> Expr:
1318
+ return pl.explode(*names)
1319
+
1320
+
1127
1321
  @agg_function
1128
1322
  def sum(*names) -> Expr:
1129
1323
  return pl.sum(*names)
1130
1324
 
1131
1325
 
1326
+ @agg_function
1327
+ def corr(a: Union[str, Expr], b: Union[str, Expr], *,
1328
+ method: str = "pearson", ddof: int = None, propagate_nans: bool = False) -> Expr:
1329
+ """
1330
+ Compute the correlation between two columns.
1331
+ """
1332
+ a_expr = a.expr if isinstance(a, Expr) else pl.col(a) if isinstance(a, str) else a
1333
+ b_expr = b.expr if isinstance(b, Expr) else pl.col(b) if isinstance(b, str) else b
1334
+
1335
+ return pl.corr(a_expr, b_expr, method=method, ddof=ddof, propagate_nans=propagate_nans)
1336
+
1337
+
1338
+ @agg_function
1339
+ def cov(a: Union[str, Expr], b: Union[str, Expr], ddof: int = 1) -> Expr:
1340
+ """
1341
+ Compute the covariance between two columns.
1342
+ """
1343
+ a_expr = a.expr if isinstance(a, Expr) else pl.col(a) if isinstance(a, str) else a
1344
+ b_expr = b.expr if isinstance(b, Expr) else pl.col(b) if isinstance(b, str) else b
1345
+
1346
+ return pl.cov(a_expr, b_expr, ddof=ddof)
1347
+
1348
+
1132
1349
  def std(column, ddof) -> Expr:
1133
1350
  return Expr(column, ddof=ddof, agg_func='std')
1134
1351
 
@@ -1161,3 +1378,4 @@ def cum_count(expr, reverse: bool = False) -> Expr:
1161
1378
  def when(condition):
1162
1379
  """Start a when-then-otherwise expression."""
1163
1380
  return When(condition)
1381
+