Flowfile 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (46) hide show
  1. flowfile/__init__.py +2 -1
  2. flowfile/web/__init__.py +3 -0
  3. {flowfile-0.3.2.dist-info → flowfile-0.3.3.dist-info}/METADATA +1 -1
  4. {flowfile-0.3.2.dist-info → flowfile-0.3.3.dist-info}/RECORD +46 -35
  5. flowfile_core/configs/__init__.py +15 -4
  6. flowfile_core/configs/settings.py +5 -3
  7. flowfile_core/configs/utils.py +18 -0
  8. flowfile_core/flowfile/FlowfileFlow.py +13 -18
  9. flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
  10. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +54 -17
  11. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
  12. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +42 -3
  13. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +2 -1
  14. flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
  15. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
  16. flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
  17. flowfile_core/flowfile/flow_node/flow_node.py +2 -1
  18. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
  19. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
  20. flowfile_core/flowfile/utils.py +34 -3
  21. flowfile_core/main.py +2 -3
  22. flowfile_core/routes/secrets.py +1 -1
  23. flowfile_core/schemas/input_schema.py +10 -4
  24. flowfile_core/schemas/transform_schema.py +25 -47
  25. flowfile_frame/__init__.py +11 -4
  26. flowfile_frame/adding_expr.py +280 -0
  27. flowfile_frame/config.py +9 -0
  28. flowfile_frame/expr.py +301 -83
  29. flowfile_frame/expr.pyi +2174 -0
  30. flowfile_frame/expr_name.py +258 -0
  31. flowfile_frame/flow_frame.py +587 -1002
  32. flowfile_frame/flow_frame.pyi +336 -0
  33. flowfile_frame/flow_frame_methods.py +617 -0
  34. flowfile_frame/group_frame.py +89 -42
  35. flowfile_frame/join.py +1 -2
  36. flowfile_frame/lazy.py +704 -0
  37. flowfile_frame/lazy_methods.py +201 -0
  38. flowfile_frame/list_name_space.py +324 -0
  39. flowfile_frame/selectors.py +3 -0
  40. flowfile_frame/series.py +70 -0
  41. flowfile_frame/utils.py +80 -4
  42. {flowfile-0.3.2.dist-info → flowfile-0.3.3.dist-info}/LICENSE +0 -0
  43. {flowfile-0.3.2.dist-info → flowfile-0.3.3.dist-info}/WHEEL +0 -0
  44. {flowfile-0.3.2.dist-info → flowfile-0.3.3.dist-info}/entry_points.txt +0 -0
  45. /flowfile_core/{secrets → secret_manager}/__init__.py +0 -0
  46. /flowfile_core/{secrets/secrets.py → secret_manager/secret_manager.py} +0 -0
flowfile_frame/lazy.py ADDED
@@ -0,0 +1,704 @@
1
+ import inspect
2
+ from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
3
+ import polars as pl
4
+ from flowfile_frame.flow_frame import FlowFrame, can_be_expr, generate_node_id
5
+ from flowfile_core.flowfile.FlowfileFlow import FlowGraph
6
+ from flowfile_frame.expr import Expr
7
+ from flowfile_frame.utils import _get_function_source
8
+ from typing import cast
9
+ from functools import wraps
10
+
11
+
12
+ def _determine_return_type(func_signature: inspect.Signature) -> Literal["FlowFrame", "Expr"]:
13
+ """
14
+ Determine the return type based on the function signature.
15
+
16
+ Args:
17
+ func_signature: The inspect.Signature of the polars function
18
+
19
+ Returns:
20
+ Either "FlowFrame" or "Expr" based on the return annotation
21
+
22
+ Raises:
23
+ ValueError: If the function doesn't return a Frame or Expr
24
+ """
25
+ return_annotation = str(func_signature.return_annotation)
26
+
27
+ if return_annotation in ("DataFrame", "LazyFrame"):
28
+ return "FlowFrame"
29
+ elif return_annotation == "Expr":
30
+ return "Expr"
31
+ else:
32
+ # Allow for type aliases or Union types that might include DataFrame/LazyFrame/Expr
33
+ if "DataFrame" in return_annotation or "LazyFrame" in return_annotation:
34
+ return "FlowFrame"
35
+ if "Expr" in return_annotation and "DataFrame" not in return_annotation and "LazyFrame" not in return_annotation:
36
+ return "Expr"
37
+ raise ValueError(
38
+ f"Function does not return a Frame or Expr. "
39
+ f"Got return annotation: {return_annotation}"
40
+ )
41
+
42
+
43
+ def _analyze_parameters(func_signature: inspect.Signature) -> Tuple[
44
+ Dict[str, bool], List[Tuple[str, inspect.Parameter]]]:
45
+ """
46
+ Analyze function parameters to determine which can accept Expr types.
47
+
48
+ Args:
49
+ func_signature: The inspect.Signature of the polars function
50
+
51
+ Returns:
52
+ Tuple of (param_can_be_expr dict, param_list)
53
+ """
54
+ param_can_be_expr = {}
55
+ param_list = list(func_signature.parameters.items())
56
+
57
+ for param_name, param in param_list:
58
+ param_can_be_expr[param_name] = can_be_expr(param)
59
+
60
+ return param_can_be_expr, param_list
61
+
62
+
63
+ def _deep_convert_to_polars_expr(obj: Any) -> Any:
64
+ """
65
+ Recursively convert FlowFile Expr objects to Polars expressions in nested structures.
66
+
67
+ Args:
68
+ obj: Object to convert (can be Expr, list, dict, tuple, or any other type)
69
+
70
+ Returns:
71
+ The object with all FlowFile Expr instances converted to pl.Expr
72
+ """
73
+ if isinstance(obj, Expr):
74
+ # Convert FlowFile Expr to Polars expr
75
+ return obj.expr
76
+ elif isinstance(obj, list):
77
+ # Recursively process list elements
78
+ return [_deep_convert_to_polars_expr(item) for item in obj]
79
+ elif isinstance(obj, tuple):
80
+ # Recursively process tuple elements
81
+ return tuple(_deep_convert_to_polars_expr(item) for item in obj)
82
+ elif isinstance(obj, dict):
83
+ # Recursively process dictionary values
84
+ return {k: _deep_convert_to_polars_expr(v) for k, v in obj.items()}
85
+ else:
86
+ # Return as-is for other types (including pl.Expr which is already correct)
87
+ return obj
88
+
89
+
90
+ def _deep_get_repr(obj: Any, can_be_expr: bool = False) -> str:
91
+ """
92
+ Get string representation of an object, handling nested structures with Expr objects.
93
+
94
+ Args:
95
+ obj: Object to get representation for
96
+ can_be_expr: Whether this parameter can accept Expr types
97
+
98
+ Returns:
99
+ String representation suitable for code generation
100
+ """
101
+ from flowfile_frame.expr import _get_expr_and_repr
102
+ if isinstance(obj, Expr):
103
+ # FlowFile Expr - get its representation
104
+ _, repr_str = _get_expr_and_repr(obj)
105
+ return repr_str
106
+ elif isinstance(obj, pl.Expr):
107
+ # Polars Expr - try to get representation through _get_expr_and_repr
108
+ _, repr_str = _get_expr_and_repr(obj)
109
+ return repr_str
110
+ elif isinstance(obj, list):
111
+ # Recursively process list elements
112
+ inner_reprs = [_deep_get_repr(item, can_be_expr) for item in obj]
113
+ return f"[{', '.join(inner_reprs)}]"
114
+ elif isinstance(obj, tuple):
115
+ # Recursively process tuple elements
116
+ inner_reprs = [_deep_get_repr(item, can_be_expr) for item in obj]
117
+ return f"({', '.join(inner_reprs)})"
118
+ elif isinstance(obj, dict):
119
+ # Recursively process dictionary items
120
+ items = [f"{repr(k)}: {_deep_get_repr(v, can_be_expr)}" for k, v in obj.items()]
121
+ return f"{{{', '.join(items)}}}"
122
+ elif callable(obj) and hasattr(obj, "__name__") and obj.__name__ != "<lambda>":
123
+ # Named function
124
+ return obj.__name__
125
+ elif can_be_expr:
126
+ # Try to convert to expr and get representation
127
+ expr_obj, repr_str = _get_expr_and_repr(obj)
128
+ return repr_str
129
+ else:
130
+ # Default representation
131
+ return repr(obj)
132
+
133
+
134
+ def _process_callable_arg(arg: Any) -> Tuple[str, Any, bool, Optional[str]]:
135
+ """
136
+ Process a callable argument for representation and conversion.
137
+
138
+ Args:
139
+ arg: The callable argument
140
+
141
+ Returns:
142
+ Tuple of (repr_string, processed_arg, convertible_to_code, function_source)
143
+ """
144
+ function_source = None
145
+ if hasattr(arg, "__name__") and arg.__name__ != "<lambda>":
146
+ # Try to get function source
147
+ try:
148
+ function_source, _ = _get_function_source(arg)
149
+ except:
150
+ pass
151
+ return arg.__name__, arg, True, function_source
152
+ else:
153
+ # For lambdas or callables without a proper name
154
+ return repr(arg), arg, False, None
155
+
156
+
157
+ def _process_argument(arg: Any, can_be_expr: bool) -> Tuple[str, Any, bool, Optional[str]]:
158
+ """
159
+ Process a single argument, handling all types including nested structures.
160
+
161
+ Args:
162
+ arg: The argument to process
163
+ can_be_expr: Whether this parameter can accept Expr types
164
+
165
+ Returns:
166
+ Tuple of (repr_string, processed_arg_for_polars, convertible_to_code, function_source)
167
+ """
168
+ # Special handling for callables (but not Expr objects which might be callable)
169
+ if callable(arg) and not isinstance(arg, (Expr, pl.Expr)) and not hasattr(arg, 'expr'):
170
+ return _process_callable_arg(arg)
171
+ repr_str = _deep_get_repr(arg, can_be_expr)
172
+
173
+ processed_arg = _deep_convert_to_polars_expr(arg)
174
+
175
+ convertible = not (callable(arg) and hasattr(arg, "__name__") and arg.__name__ == "<lambda>")
176
+
177
+ return repr_str, processed_arg, convertible, None
178
+
179
+
180
+ def _process_arguments(args: Tuple[Any, ...], param_can_be_expr: Dict[str, bool],
181
+ param_list: List[Tuple[str, inspect.Parameter]]) -> Tuple[List[str], List[Any], bool, List[str]]:
182
+ """
183
+ Process positional arguments for the wrapper function.
184
+
185
+ Args:
186
+ args: Positional arguments passed to the wrapper
187
+ param_can_be_expr: Dictionary indicating which parameters can be Expr
188
+ param_list: List of parameter names and objects from the original Polars function
189
+
190
+ Returns:
191
+ Tuple of (args_repr, pl_args, convertible_to_code, function_sources)
192
+ """
193
+ args_repr = []
194
+ pl_args = []
195
+ convertible_to_code = True
196
+ function_sources = []
197
+
198
+ for i, arg in enumerate(args):
199
+ can_be_expr_arg = False
200
+ if i < len(param_list):
201
+ param_name = param_list[i][0]
202
+ if param_list[i][1].kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.POSITIONAL_ONLY):
203
+ can_be_expr_arg = param_can_be_expr.get(param_name, False)
204
+
205
+ repr_str, processed_arg, is_convertible, func_source = _process_argument(arg, can_be_expr_arg)
206
+ args_repr.append(repr_str)
207
+ pl_args.append(processed_arg)
208
+ if not is_convertible:
209
+ convertible_to_code = False
210
+ if func_source:
211
+ function_sources.append(func_source)
212
+
213
+ return args_repr, pl_args, convertible_to_code, function_sources
214
+
215
+
216
+ def _process_keyword_arguments(kwargs: Dict[str, Any],
217
+ param_can_be_expr: Dict[str, bool]) -> Tuple[List[str], Dict[str, Any], bool, List[str]]:
218
+ """
219
+ Process keyword arguments for the wrapper function.
220
+
221
+ Args:
222
+ kwargs: Keyword arguments passed to the wrapper
223
+ param_can_be_expr: Dictionary indicating which parameters can be Expr
224
+
225
+ Returns:
226
+ Tuple of (kwargs_repr, pl_kwargs, convertible_to_code, function_sources)
227
+ """
228
+ kwargs_repr = []
229
+ pl_kwargs = {}
230
+ convertible_to_code = True
231
+ function_sources = []
232
+
233
+ for key, value in kwargs.items():
234
+ can_be_expr_kwarg = param_can_be_expr.get(key, False)
235
+
236
+ repr_str, processed_value, is_convertible, func_source = _process_argument(value, can_be_expr_kwarg)
237
+ kwargs_repr.append(f"{key}={repr_str}")
238
+ pl_kwargs[key] = processed_value
239
+ if not is_convertible:
240
+ convertible_to_code = False
241
+ if func_source:
242
+ function_sources.append(func_source)
243
+
244
+ return kwargs_repr, pl_kwargs, convertible_to_code, function_sources
245
+
246
+
247
+ def _build_repr_string(polars_func_name: str, args_repr: List[str], kwargs_repr: List[str],
248
+ function_sources: List[str] = None) -> str:
249
+ """
250
+ Build the string representation of the function call.
251
+
252
+ Args:
253
+ polars_func_name: Name of the polars function
254
+ args_repr: List of argument representations
255
+ kwargs_repr: List of keyword argument representations
256
+ function_sources: List of function source code strings
257
+
258
+ Returns:
259
+ Complete function call representation string
260
+ """
261
+ prefix = "pl."
262
+ if polars_func_name.startswith("pl."):
263
+ prefix = ""
264
+
265
+ all_args_str = ", ".join(args_repr)
266
+ all_kwargs_str = ", ".join(kwargs_repr)
267
+
268
+ if all_args_str and all_kwargs_str:
269
+ call_repr = f"{prefix}{polars_func_name}({all_args_str}, {all_kwargs_str})"
270
+ elif all_args_str:
271
+ call_repr = f"{prefix}{polars_func_name}({all_args_str})"
272
+ elif all_kwargs_str:
273
+ call_repr = f"{prefix}{polars_func_name}({all_kwargs_str})"
274
+ else:
275
+ call_repr = f"{prefix}{polars_func_name}()"
276
+
277
+ # If we have function sources, prepend them with separator
278
+ if function_sources:
279
+ # Remove duplicates while preserving order
280
+ unique_sources = []
281
+ seen = set()
282
+ for source in function_sources:
283
+ if source not in seen:
284
+ seen.add(source)
285
+ unique_sources.append(source)
286
+
287
+ functions = "# Function definitions\n" + "\n\n".join(unique_sources)
288
+ return functions + "\n\n─────SPLIT─────\n\noutput_df = " + call_repr
289
+ else:
290
+ return call_repr
291
+
292
+
293
+ def _create_flowframe_result(polars_func_name: str, full_repr: str, flow_graph: Optional[Any]) -> "FlowFrame":
294
+ """
295
+ Create a FlowFrame result for functions that return DataFrames/LazyFrames.
296
+
297
+ Args:
298
+ polars_func_name: Name of the polars function
299
+ full_repr: String representation of the function call
300
+ flow_graph: Optional flow graph to use
301
+
302
+ Returns:
303
+ FlowFrame instance with the operation added to the graph
304
+ """
305
+ from flowfile_core.schemas import input_schema, transform_schema
306
+ from flowfile_frame.utils import create_flow_graph
307
+
308
+ node_id = generate_node_id()
309
+ if not flow_graph:
310
+ flow_graph = create_flow_graph()
311
+
312
+ # Check if we have function definitions (indicated by SPLIT separator)
313
+ if "─────SPLIT─────" in full_repr:
314
+ polars_code = full_repr
315
+ else:
316
+ polars_code = f"output_df = {full_repr}"
317
+
318
+ node_polars_code = input_schema.NodePolarsCode(
319
+ flow_id=flow_graph.flow_id,
320
+ node_id=node_id,
321
+ depending_on_ids=[],
322
+ description=f"Execute: {polars_func_name}",
323
+ polars_code_input=transform_schema.PolarsCodeInput(polars_code)
324
+ )
325
+ flow_graph.add_polars_code(node_polars_code)
326
+
327
+ try:
328
+ class MockNode:
329
+ def get_resulting_data(self):
330
+ class MockData:
331
+ data_frame = pl.DataFrame()
332
+
333
+ return MockData()
334
+
335
+ if not hasattr(flow_graph, 'get_node'):
336
+ flow_graph.get_node = lambda nid: MockNode()
337
+
338
+ actual_data = flow_graph.get_node(node_id).get_resulting_data().data_frame
339
+
340
+ except Exception as e:
341
+ print(f"Warning: Could not simulate DataFrame creation for graph node {node_id} for {polars_func_name}: {e}")
342
+ actual_data = pl.DataFrame()
343
+
344
+ return FlowFrame(
345
+ data=actual_data,
346
+ flow_graph=flow_graph,
347
+ node_id=node_id,
348
+ )
349
+
350
+
351
+ def _check_for_non_serializable_functions(args: List[Any], kwargs: Dict[str, Any]) -> List[str]:
352
+ """
353
+ Check for non-serializable functions in arguments and return warnings.
354
+
355
+ Args:
356
+ args: Processed arguments
357
+ kwargs: Processed keyword arguments
358
+
359
+ Returns:
360
+ List of warning messages for non-serializable functions
361
+ """
362
+ warnings = []
363
+
364
+ def check_value(value: Any, path: str) -> None:
365
+ """Recursively check for non-serializable functions."""
366
+ if callable(value) and not isinstance(value, (type, pl.Expr)):
367
+ # Check if it's a lambda or local function
368
+ if hasattr(value, '__name__'):
369
+ if value.__name__ == '<lambda>':
370
+ warnings.append(
371
+ f"Lambda function found at {path}. "
372
+ "This will cause 'serialization not supported for this opaque function' error. "
373
+ "Consider using a named function at module level instead."
374
+ )
375
+ elif hasattr(value, '__code__') and value.__code__.co_flags & 0x10: # CO_NESTED flag
376
+ # Check if it's a local/nested function (excluding top-level module functions)
377
+ if value.__code__.co_name != '<module>': # Ensure it's not a module itself
378
+ warnings.append(
379
+ f"Local function '{value.__name__}' found at {path}. "
380
+ "This may cause serialization issues. "
381
+ "Consider defining it at module level instead."
382
+ )
383
+ elif isinstance(value, list):
384
+ for i, item in enumerate(value):
385
+ check_value(item, f"{path}[{i}]")
386
+ elif isinstance(value, tuple):
387
+ for i, item in enumerate(value):
388
+ check_value(item, f"{path}[{i}]")
389
+ elif isinstance(value, dict):
390
+ for k, v in value.items():
391
+ check_value(v, f"{path}[{k!r}]")
392
+
393
+ # Check positional arguments
394
+ for i, arg in enumerate(args):
395
+ check_value(arg, f"argument {i}")
396
+
397
+ # Check keyword arguments
398
+ for key, value in kwargs.items():
399
+ check_value(value, f"keyword argument '{key}'")
400
+
401
+ return warnings
402
+
403
+
404
+ def _create_expr_result(polars_func: Callable, pl_args: List[Any], pl_kwargs: Dict[str, Any],
405
+ polars_func_name: str, full_repr: str, is_agg: bool,
406
+ convertible_to_code: bool, function_sources: List[str] = None) -> "Expr":
407
+ """
408
+ Create an Expr result for functions that return expressions.
409
+
410
+ Note: pl_args and pl_kwargs should already have all Expr objects converted to pl.Expr
411
+
412
+ Args:
413
+ polars_func: The actual polars function
414
+ pl_args: Processed positional arguments (already converted)
415
+ pl_kwargs: Processed keyword arguments (already converted)
416
+ polars_func_name: Name of the polars function
417
+ full_repr: String representation of the function call
418
+ is_agg: Whether this is an aggregation function
419
+ convertible_to_code: Whether the expression can be converted to code
420
+ function_sources: List of function source code strings
421
+
422
+ Returns:
423
+ Expr instance wrapping the polars expression
424
+ """
425
+ from flowfile_frame.expr import Expr
426
+ import warnings
427
+
428
+ # Check for non-serializable functions
429
+ serialization_warnings = _check_for_non_serializable_functions(pl_args, pl_kwargs)
430
+
431
+ pl_expr = None
432
+ serialization_error = None
433
+
434
+ try:
435
+ # Try to create the expression
436
+ pl_expr = polars_func(*pl_args, **pl_kwargs)
437
+
438
+ # Try to serialize to check if it will work in FlowFile
439
+ if pl_expr is not None and serialization_warnings:
440
+ try:
441
+ # Test serialization
442
+ import io
443
+ buffer = io.BytesIO()
444
+ pl_expr.serialize(file=buffer, format='json')
445
+ except Exception as e:
446
+ serialization_error = str(e)
447
+
448
+ except Exception as e:
449
+ print(
450
+ f"Warning: Polars function '{polars_func_name}' failed to create an expression with provided arguments. Error: {e}")
451
+ if "serialization not supported" in str(e).lower():
452
+ serialization_error = str(e)
453
+
454
+ # Issue warnings if we found non-serializable functions
455
+ if serialization_warnings:
456
+ warnings.warn(
457
+ f"\n⚠️ SERIALIZATION WARNING for {polars_func_name}:\n" +
458
+ "\n".join(f" • {w}" for w in serialization_warnings) +
459
+ "\n\nThis expression cannot be saved to a FlowFile format and will need to be " +
460
+ "recreated from scratch when loading the flow. The expression will work in the " +
461
+ "current session but won't persist.\n" +
462
+ (f"\nActual error from Polars: {serialization_error}" if serialization_error else ""),
463
+ category=UserWarning,
464
+ stacklevel=3
465
+ )
466
+
467
+ # Extract just the expression part without function definitions for repr_str
468
+ if function_sources and "─────SPLIT─────" in full_repr:
469
+ # Get the part after the split
470
+ repr_str = full_repr.split("─────SPLIT─────")[-1].strip()
471
+ if repr_str.startswith("output_df = "):
472
+ repr_str = repr_str[len("output_df = "):]
473
+ else:
474
+ repr_str = full_repr
475
+
476
+ return Expr(
477
+ pl_expr,
478
+ repr_str=repr_str,
479
+ agg_func=polars_func_name if is_agg else None,
480
+ is_complex=True,
481
+ convertable_to_code=convertible_to_code and (pl_expr is not None),
482
+ _function_sources=function_sources # Pass function sources to Expr
483
+ )
484
+
485
+
486
+ def _copy_function_metadata(original_func: Callable, polars_func_name: str) -> Tuple[str, str]:
487
+ """
488
+ Copy metadata from the original polars function.
489
+
490
+ Args:
491
+ original_func: The original polars function
492
+ polars_func_name: Name of the polars function
493
+
494
+ Returns:
495
+ Tuple of (function_name, docstring)
496
+ """
497
+ original_doc = getattr(original_func, '__doc__', None) or ""
498
+ enhanced_doc = f"""FlowFile wrapper for pl.{polars_func_name}.
499
+
500
+ Original Polars documentation:
501
+ {original_doc}
502
+
503
+ Note: This is a FlowFile wrapper. If it returns a FlowFrame, it may accept an additional
504
+ 'flow_graph: Optional[FlowGraph]' keyword argument to associate the operation with a specific graph.
505
+ Otherwise, a new graph is implicitly created or an existing one is used if chained from a FlowFrame method.
506
+ Wrapped functions returning Exprs will produce FlowFile Expr objects.
507
+ """
508
+ return polars_func_name, enhanced_doc.strip()
509
+
510
+
511
+ def polars_function_wrapper(
512
+ polars_func_name_or_callable: Union[str, Callable],
513
+ is_agg: bool = False,
514
+ return_type: Optional[Literal["FlowFrame", "Expr"]] = None
515
+ ):
516
+ """
517
+ Create a wrapper for a polars function that returns either a FlowFrame or Expr.
518
+
519
+ Args:
520
+ polars_func_name_or_callable: Name of the polars function to wrap (str) or
521
+ the function itself if using @polars_function_wrapper directly.
522
+ is_agg: Whether this is an aggregation function (relevant for Expr results).
523
+ return_type: Expected return type ("FlowFrame" or "Expr"). If None, will be inferred.
524
+
525
+ Returns:
526
+ Wrapped function that integrates with the FlowFile framework.
527
+
528
+ Raises:
529
+ ValueError: If the polars function is not found or doesn't return Frame/Expr.
530
+ """
531
+ # Handle the case where the decorator is used as @polars_function_wrapper directly
532
+ if callable(polars_func_name_or_callable) and not isinstance(polars_func_name_or_callable, str):
533
+ actual_polars_func_name = polars_func_name_or_callable.__name__
534
+
535
+ def decorator_inner_for_direct_use(func_to_decorate: Callable):
536
+ polars_f = getattr(pl, actual_polars_func_name, None)
537
+ if polars_f is None:
538
+ raise ValueError(f"Polars function '{actual_polars_func_name}' (inferred) not found.")
539
+
540
+ original_polars_sig = inspect.signature(polars_f)
541
+ determined_rt = return_type or _determine_return_type(original_polars_sig)
542
+ param_can_be_expr_map, param_list_for_processing = _analyze_parameters(original_polars_sig)
543
+ wrapper_name, wrapper_doc = _copy_function_metadata(polars_f, actual_polars_func_name)
544
+
545
+ current_params = list(original_polars_sig.parameters.values())
546
+ final_params_for_sig = current_params[:]
547
+ wrapper_return_annotation_str: str
548
+
549
+ if determined_rt == "FlowFrame":
550
+ wrapper_return_annotation_str = 'FlowFrame'
551
+ if not any(p.name == 'flow_graph' for p in final_params_for_sig):
552
+ fg_param = inspect.Parameter(
553
+ name='flow_graph', kind=inspect.Parameter.KEYWORD_ONLY,
554
+ default=None, annotation=Optional[FlowGraph] # Corrected annotation
555
+ )
556
+ var_kw_idx = next(
557
+ (i for i, p in enumerate(final_params_for_sig) if p.kind == inspect.Parameter.VAR_KEYWORD), -1)
558
+ if var_kw_idx != -1:
559
+ final_params_for_sig.insert(var_kw_idx, fg_param)
560
+ else:
561
+ final_params_for_sig.append(fg_param)
562
+ elif determined_rt == "Expr":
563
+ wrapper_return_annotation_str = 'Expr'
564
+ else:
565
+ wrapper_return_annotation_str = str(original_polars_sig.return_annotation)
566
+
567
+ wrapper_sig = inspect.Signature(parameters=final_params_for_sig,
568
+ return_annotation=wrapper_return_annotation_str)
569
+
570
+ @wraps(polars_f)
571
+ def wrapper(*args, **kwargs):
572
+ flow_graph_val = None
573
+ if determined_rt == "FlowFrame":
574
+ flow_graph_val = kwargs.pop('flow_graph', None)
575
+
576
+ args_repr_val, pl_args_val, args_conv, args_func_sources = _process_arguments(
577
+ args, param_can_be_expr_map, param_list_for_processing
578
+ )
579
+ kwargs_repr_val, pl_kwargs_val, kwargs_conv, kwargs_func_sources = _process_keyword_arguments(
580
+ kwargs, param_can_be_expr_map
581
+ )
582
+
583
+ conv_to_code = args_conv and kwargs_conv
584
+ all_func_sources = args_func_sources + kwargs_func_sources
585
+ full_repr_val = _build_repr_string(
586
+ actual_polars_func_name, args_repr_val, kwargs_repr_val, all_func_sources
587
+ )
588
+
589
+ if determined_rt == 'FlowFrame':
590
+ return _create_flowframe_result(actual_polars_func_name, full_repr_val, flow_graph_val)
591
+ else: # Expr
592
+ return _create_expr_result(
593
+ polars_f, pl_args_val, pl_kwargs_val, actual_polars_func_name,
594
+ full_repr_val, is_agg, conv_to_code, all_func_sources # Pass function sources
595
+ )
596
+
597
+ wrapper.__name__ = wrapper_name
598
+ wrapper.__doc__ = wrapper_doc
599
+ wrapper.__signature__ = wrapper_sig
600
+ return wrapper
601
+
602
+ return decorator_inner_for_direct_use(polars_func_name_or_callable)
603
+
604
+ else: # Used as @polars_function_wrapper("name", ...) or assigned
605
+ actual_polars_func_name = cast(str, polars_func_name_or_callable)
606
+
607
+ def decorator(func: Optional[Callable] = None): # func is the decorated placeholder
608
+ polars_f = getattr(pl, actual_polars_func_name, None)
609
+ if polars_f is None:
610
+ raise ValueError(f"Polars function '{actual_polars_func_name}' not found.")
611
+
612
+ original_polars_sig = inspect.signature(polars_f)
613
+ determined_rt = return_type or _determine_return_type(original_polars_sig)
614
+
615
+ param_can_be_expr_map, param_list_for_processing = _analyze_parameters(original_polars_sig)
616
+ wrapper_name, wrapper_doc = _copy_function_metadata(polars_f, actual_polars_func_name)
617
+
618
+ current_params = list(original_polars_sig.parameters.values())
619
+ final_params_for_sig = current_params[:]
620
+ wrapper_return_annotation_str: str
621
+
622
+ if determined_rt == "FlowFrame":
623
+ wrapper_return_annotation_str = 'FlowFrame'
624
+ if not any(p.name == 'flow_graph' for p in final_params_for_sig):
625
+ flow_graph_param = inspect.Parameter(
626
+ name='flow_graph',
627
+ kind=inspect.Parameter.KEYWORD_ONLY,
628
+ default=None,
629
+ annotation=Optional[FlowGraph] # Corrected annotation
630
+ )
631
+ var_kw_idx = next(
632
+ (i for i, p in enumerate(final_params_for_sig) if p.kind == inspect.Parameter.VAR_KEYWORD), -1)
633
+ if var_kw_idx != -1:
634
+ final_params_for_sig.insert(var_kw_idx, flow_graph_param)
635
+ else:
636
+ final_params_for_sig.append(flow_graph_param)
637
+ elif determined_rt == "Expr":
638
+ wrapper_return_annotation_str = 'Expr'
639
+ else:
640
+ wrapper_return_annotation_str = str(original_polars_sig.return_annotation)
641
+
642
+ wrapper_signature = inspect.Signature(
643
+ parameters=final_params_for_sig,
644
+ return_annotation=wrapper_return_annotation_str
645
+ )
646
+
647
+ @wraps(polars_f)
648
+ def wrapper(*args, **kwargs):
649
+ flow_graph_val = None
650
+ if determined_rt == "FlowFrame":
651
+ flow_graph_val = kwargs.pop('flow_graph', None)
652
+
653
+ args_repr_val, pl_args_val, args_convertible_val, args_func_sources = _process_arguments(
654
+ args, param_can_be_expr_map, param_list_for_processing
655
+ )
656
+ kwargs_repr_val, pl_kwargs_val, kwargs_convertible_val, kwargs_func_sources = _process_keyword_arguments(
657
+ kwargs, param_can_be_expr_map
658
+ )
659
+
660
+ convertible_to_code_val = args_convertible_val and kwargs_convertible_val # Correct variable for this scope
661
+ all_func_sources = args_func_sources + kwargs_func_sources
662
+
663
+ full_repr_val = _build_repr_string(
664
+ actual_polars_func_name, args_repr_val, kwargs_repr_val, all_func_sources # Corrected variable
665
+ )
666
+
667
+ if determined_rt == 'FlowFrame':
668
+ return _create_flowframe_result(actual_polars_func_name, full_repr_val, flow_graph_val)
669
+ else: # Expr
670
+ return _create_expr_result(polars_f, pl_args_val, pl_kwargs_val, actual_polars_func_name,
671
+ full_repr_val, is_agg, convertible_to_code_val,
672
+ all_func_sources) # Pass function sources
673
+
674
+ wrapper.__name__ = wrapper_name
675
+ wrapper.__doc__ = wrapper_doc
676
+ wrapper.__signature__ = wrapper_signature
677
+ # If func is provided (typically by decorator syntax), it's usually for @wraps or similar.
678
+ # Here, we are replacing func entirely, so we just return the new wrapper.
679
+ return wrapper
680
+
681
+ return decorator
682
+
683
+
684
+ # Example usage with the new decorator (from original snippet):
685
+
686
+ # For functions that return FlowFrames
687
+ @polars_function_wrapper('read_json', return_type="FlowFrame")
688
+ def read_json(*args, flow_graph: Optional[FlowGraph] = None, **kwargs) -> FlowFrame:
689
+ pass
690
+
691
+
692
+ @polars_function_wrapper('read_avro', return_type="FlowFrame")
693
+ def read_avro(*args, flow_graph: Optional[FlowGraph] = None, **kwargs) -> FlowFrame:
694
+ pass
695
+
696
+
697
+ @polars_function_wrapper('read_ndjson', return_type="FlowFrame")
698
+ def read_ndjson(*args, flow_graph: Optional[FlowGraph] = None, **kwargs) -> FlowFrame:
699
+ pass
700
+
701
+
702
+ @polars_function_wrapper('fold', return_type="Expr")
703
+ def fold(*args, **kwargs) -> 'Expr': # Type hint 'Expr' refers to flowfile_frame.expr.Expr
704
+ pass