Flowfile 0.3.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +2 -1
- flowfile/web/__init__.py +3 -0
- {flowfile-0.3.2.dist-info → flowfile-0.3.3.dist-info}/METADATA +1 -1
- {flowfile-0.3.2.dist-info → flowfile-0.3.3.dist-info}/RECORD +46 -35
- flowfile_core/configs/__init__.py +15 -4
- flowfile_core/configs/settings.py +5 -3
- flowfile_core/configs/utils.py +18 -0
- flowfile_core/flowfile/FlowfileFlow.py +13 -18
- flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +54 -17
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +42 -3
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +2 -1
- flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
- flowfile_core/flowfile/flow_node/flow_node.py +2 -1
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
- flowfile_core/flowfile/utils.py +34 -3
- flowfile_core/main.py +2 -3
- flowfile_core/routes/secrets.py +1 -1
- flowfile_core/schemas/input_schema.py +10 -4
- flowfile_core/schemas/transform_schema.py +25 -47
- flowfile_frame/__init__.py +11 -4
- flowfile_frame/adding_expr.py +280 -0
- flowfile_frame/config.py +9 -0
- flowfile_frame/expr.py +301 -83
- flowfile_frame/expr.pyi +2174 -0
- flowfile_frame/expr_name.py +258 -0
- flowfile_frame/flow_frame.py +587 -1002
- flowfile_frame/flow_frame.pyi +336 -0
- flowfile_frame/flow_frame_methods.py +617 -0
- flowfile_frame/group_frame.py +89 -42
- flowfile_frame/join.py +1 -2
- flowfile_frame/lazy.py +704 -0
- flowfile_frame/lazy_methods.py +201 -0
- flowfile_frame/list_name_space.py +324 -0
- flowfile_frame/selectors.py +3 -0
- flowfile_frame/series.py +70 -0
- flowfile_frame/utils.py +80 -4
- {flowfile-0.3.2.dist-info → flowfile-0.3.3.dist-info}/LICENSE +0 -0
- {flowfile-0.3.2.dist-info → flowfile-0.3.3.dist-info}/WHEEL +0 -0
- {flowfile-0.3.2.dist-info → flowfile-0.3.3.dist-info}/entry_points.txt +0 -0
- /flowfile_core/{secrets → secret_manager}/__init__.py +0 -0
- /flowfile_core/{secrets/secrets.py → secret_manager/secret_manager.py} +0 -0
flowfile_frame/lazy.py
ADDED
|
@@ -0,0 +1,704 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
|
|
3
|
+
import polars as pl
|
|
4
|
+
from flowfile_frame.flow_frame import FlowFrame, can_be_expr, generate_node_id
|
|
5
|
+
from flowfile_core.flowfile.FlowfileFlow import FlowGraph
|
|
6
|
+
from flowfile_frame.expr import Expr
|
|
7
|
+
from flowfile_frame.utils import _get_function_source
|
|
8
|
+
from typing import cast
|
|
9
|
+
from functools import wraps
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _determine_return_type(func_signature: inspect.Signature) -> Literal["FlowFrame", "Expr"]:
|
|
13
|
+
"""
|
|
14
|
+
Determine the return type based on the function signature.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
func_signature: The inspect.Signature of the polars function
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Either "FlowFrame" or "Expr" based on the return annotation
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
ValueError: If the function doesn't return a Frame or Expr
|
|
24
|
+
"""
|
|
25
|
+
return_annotation = str(func_signature.return_annotation)
|
|
26
|
+
|
|
27
|
+
if return_annotation in ("DataFrame", "LazyFrame"):
|
|
28
|
+
return "FlowFrame"
|
|
29
|
+
elif return_annotation == "Expr":
|
|
30
|
+
return "Expr"
|
|
31
|
+
else:
|
|
32
|
+
# Allow for type aliases or Union types that might include DataFrame/LazyFrame/Expr
|
|
33
|
+
if "DataFrame" in return_annotation or "LazyFrame" in return_annotation:
|
|
34
|
+
return "FlowFrame"
|
|
35
|
+
if "Expr" in return_annotation and "DataFrame" not in return_annotation and "LazyFrame" not in return_annotation:
|
|
36
|
+
return "Expr"
|
|
37
|
+
raise ValueError(
|
|
38
|
+
f"Function does not return a Frame or Expr. "
|
|
39
|
+
f"Got return annotation: {return_annotation}"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _analyze_parameters(func_signature: inspect.Signature) -> Tuple[
|
|
44
|
+
Dict[str, bool], List[Tuple[str, inspect.Parameter]]]:
|
|
45
|
+
"""
|
|
46
|
+
Analyze function parameters to determine which can accept Expr types.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
func_signature: The inspect.Signature of the polars function
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Tuple of (param_can_be_expr dict, param_list)
|
|
53
|
+
"""
|
|
54
|
+
param_can_be_expr = {}
|
|
55
|
+
param_list = list(func_signature.parameters.items())
|
|
56
|
+
|
|
57
|
+
for param_name, param in param_list:
|
|
58
|
+
param_can_be_expr[param_name] = can_be_expr(param)
|
|
59
|
+
|
|
60
|
+
return param_can_be_expr, param_list
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _deep_convert_to_polars_expr(obj: Any) -> Any:
|
|
64
|
+
"""
|
|
65
|
+
Recursively convert FlowFile Expr objects to Polars expressions in nested structures.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
obj: Object to convert (can be Expr, list, dict, tuple, or any other type)
|
|
69
|
+
|
|
70
|
+
Returns:
|
|
71
|
+
The object with all FlowFile Expr instances converted to pl.Expr
|
|
72
|
+
"""
|
|
73
|
+
if isinstance(obj, Expr):
|
|
74
|
+
# Convert FlowFile Expr to Polars expr
|
|
75
|
+
return obj.expr
|
|
76
|
+
elif isinstance(obj, list):
|
|
77
|
+
# Recursively process list elements
|
|
78
|
+
return [_deep_convert_to_polars_expr(item) for item in obj]
|
|
79
|
+
elif isinstance(obj, tuple):
|
|
80
|
+
# Recursively process tuple elements
|
|
81
|
+
return tuple(_deep_convert_to_polars_expr(item) for item in obj)
|
|
82
|
+
elif isinstance(obj, dict):
|
|
83
|
+
# Recursively process dictionary values
|
|
84
|
+
return {k: _deep_convert_to_polars_expr(v) for k, v in obj.items()}
|
|
85
|
+
else:
|
|
86
|
+
# Return as-is for other types (including pl.Expr which is already correct)
|
|
87
|
+
return obj
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _deep_get_repr(obj: Any, can_be_expr: bool = False) -> str:
|
|
91
|
+
"""
|
|
92
|
+
Get string representation of an object, handling nested structures with Expr objects.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
obj: Object to get representation for
|
|
96
|
+
can_be_expr: Whether this parameter can accept Expr types
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
String representation suitable for code generation
|
|
100
|
+
"""
|
|
101
|
+
from flowfile_frame.expr import _get_expr_and_repr
|
|
102
|
+
if isinstance(obj, Expr):
|
|
103
|
+
# FlowFile Expr - get its representation
|
|
104
|
+
_, repr_str = _get_expr_and_repr(obj)
|
|
105
|
+
return repr_str
|
|
106
|
+
elif isinstance(obj, pl.Expr):
|
|
107
|
+
# Polars Expr - try to get representation through _get_expr_and_repr
|
|
108
|
+
_, repr_str = _get_expr_and_repr(obj)
|
|
109
|
+
return repr_str
|
|
110
|
+
elif isinstance(obj, list):
|
|
111
|
+
# Recursively process list elements
|
|
112
|
+
inner_reprs = [_deep_get_repr(item, can_be_expr) for item in obj]
|
|
113
|
+
return f"[{', '.join(inner_reprs)}]"
|
|
114
|
+
elif isinstance(obj, tuple):
|
|
115
|
+
# Recursively process tuple elements
|
|
116
|
+
inner_reprs = [_deep_get_repr(item, can_be_expr) for item in obj]
|
|
117
|
+
return f"({', '.join(inner_reprs)})"
|
|
118
|
+
elif isinstance(obj, dict):
|
|
119
|
+
# Recursively process dictionary items
|
|
120
|
+
items = [f"{repr(k)}: {_deep_get_repr(v, can_be_expr)}" for k, v in obj.items()]
|
|
121
|
+
return f"{{{', '.join(items)}}}"
|
|
122
|
+
elif callable(obj) and hasattr(obj, "__name__") and obj.__name__ != "<lambda>":
|
|
123
|
+
# Named function
|
|
124
|
+
return obj.__name__
|
|
125
|
+
elif can_be_expr:
|
|
126
|
+
# Try to convert to expr and get representation
|
|
127
|
+
expr_obj, repr_str = _get_expr_and_repr(obj)
|
|
128
|
+
return repr_str
|
|
129
|
+
else:
|
|
130
|
+
# Default representation
|
|
131
|
+
return repr(obj)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _process_callable_arg(arg: Any) -> Tuple[str, Any, bool, Optional[str]]:
|
|
135
|
+
"""
|
|
136
|
+
Process a callable argument for representation and conversion.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
arg: The callable argument
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Tuple of (repr_string, processed_arg, convertible_to_code, function_source)
|
|
143
|
+
"""
|
|
144
|
+
function_source = None
|
|
145
|
+
if hasattr(arg, "__name__") and arg.__name__ != "<lambda>":
|
|
146
|
+
# Try to get function source
|
|
147
|
+
try:
|
|
148
|
+
function_source, _ = _get_function_source(arg)
|
|
149
|
+
except:
|
|
150
|
+
pass
|
|
151
|
+
return arg.__name__, arg, True, function_source
|
|
152
|
+
else:
|
|
153
|
+
# For lambdas or callables without a proper name
|
|
154
|
+
return repr(arg), arg, False, None
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def _process_argument(arg: Any, can_be_expr: bool) -> Tuple[str, Any, bool, Optional[str]]:
|
|
158
|
+
"""
|
|
159
|
+
Process a single argument, handling all types including nested structures.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
arg: The argument to process
|
|
163
|
+
can_be_expr: Whether this parameter can accept Expr types
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Tuple of (repr_string, processed_arg_for_polars, convertible_to_code, function_source)
|
|
167
|
+
"""
|
|
168
|
+
# Special handling for callables (but not Expr objects which might be callable)
|
|
169
|
+
if callable(arg) and not isinstance(arg, (Expr, pl.Expr)) and not hasattr(arg, 'expr'):
|
|
170
|
+
return _process_callable_arg(arg)
|
|
171
|
+
repr_str = _deep_get_repr(arg, can_be_expr)
|
|
172
|
+
|
|
173
|
+
processed_arg = _deep_convert_to_polars_expr(arg)
|
|
174
|
+
|
|
175
|
+
convertible = not (callable(arg) and hasattr(arg, "__name__") and arg.__name__ == "<lambda>")
|
|
176
|
+
|
|
177
|
+
return repr_str, processed_arg, convertible, None
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def _process_arguments(args: Tuple[Any, ...], param_can_be_expr: Dict[str, bool],
|
|
181
|
+
param_list: List[Tuple[str, inspect.Parameter]]) -> Tuple[List[str], List[Any], bool, List[str]]:
|
|
182
|
+
"""
|
|
183
|
+
Process positional arguments for the wrapper function.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
args: Positional arguments passed to the wrapper
|
|
187
|
+
param_can_be_expr: Dictionary indicating which parameters can be Expr
|
|
188
|
+
param_list: List of parameter names and objects from the original Polars function
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Tuple of (args_repr, pl_args, convertible_to_code, function_sources)
|
|
192
|
+
"""
|
|
193
|
+
args_repr = []
|
|
194
|
+
pl_args = []
|
|
195
|
+
convertible_to_code = True
|
|
196
|
+
function_sources = []
|
|
197
|
+
|
|
198
|
+
for i, arg in enumerate(args):
|
|
199
|
+
can_be_expr_arg = False
|
|
200
|
+
if i < len(param_list):
|
|
201
|
+
param_name = param_list[i][0]
|
|
202
|
+
if param_list[i][1].kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, inspect.Parameter.POSITIONAL_ONLY):
|
|
203
|
+
can_be_expr_arg = param_can_be_expr.get(param_name, False)
|
|
204
|
+
|
|
205
|
+
repr_str, processed_arg, is_convertible, func_source = _process_argument(arg, can_be_expr_arg)
|
|
206
|
+
args_repr.append(repr_str)
|
|
207
|
+
pl_args.append(processed_arg)
|
|
208
|
+
if not is_convertible:
|
|
209
|
+
convertible_to_code = False
|
|
210
|
+
if func_source:
|
|
211
|
+
function_sources.append(func_source)
|
|
212
|
+
|
|
213
|
+
return args_repr, pl_args, convertible_to_code, function_sources
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def _process_keyword_arguments(kwargs: Dict[str, Any],
|
|
217
|
+
param_can_be_expr: Dict[str, bool]) -> Tuple[List[str], Dict[str, Any], bool, List[str]]:
|
|
218
|
+
"""
|
|
219
|
+
Process keyword arguments for the wrapper function.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
kwargs: Keyword arguments passed to the wrapper
|
|
223
|
+
param_can_be_expr: Dictionary indicating which parameters can be Expr
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Tuple of (kwargs_repr, pl_kwargs, convertible_to_code, function_sources)
|
|
227
|
+
"""
|
|
228
|
+
kwargs_repr = []
|
|
229
|
+
pl_kwargs = {}
|
|
230
|
+
convertible_to_code = True
|
|
231
|
+
function_sources = []
|
|
232
|
+
|
|
233
|
+
for key, value in kwargs.items():
|
|
234
|
+
can_be_expr_kwarg = param_can_be_expr.get(key, False)
|
|
235
|
+
|
|
236
|
+
repr_str, processed_value, is_convertible, func_source = _process_argument(value, can_be_expr_kwarg)
|
|
237
|
+
kwargs_repr.append(f"{key}={repr_str}")
|
|
238
|
+
pl_kwargs[key] = processed_value
|
|
239
|
+
if not is_convertible:
|
|
240
|
+
convertible_to_code = False
|
|
241
|
+
if func_source:
|
|
242
|
+
function_sources.append(func_source)
|
|
243
|
+
|
|
244
|
+
return kwargs_repr, pl_kwargs, convertible_to_code, function_sources
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def _build_repr_string(polars_func_name: str, args_repr: List[str], kwargs_repr: List[str],
|
|
248
|
+
function_sources: List[str] = None) -> str:
|
|
249
|
+
"""
|
|
250
|
+
Build the string representation of the function call.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
polars_func_name: Name of the polars function
|
|
254
|
+
args_repr: List of argument representations
|
|
255
|
+
kwargs_repr: List of keyword argument representations
|
|
256
|
+
function_sources: List of function source code strings
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
Complete function call representation string
|
|
260
|
+
"""
|
|
261
|
+
prefix = "pl."
|
|
262
|
+
if polars_func_name.startswith("pl."):
|
|
263
|
+
prefix = ""
|
|
264
|
+
|
|
265
|
+
all_args_str = ", ".join(args_repr)
|
|
266
|
+
all_kwargs_str = ", ".join(kwargs_repr)
|
|
267
|
+
|
|
268
|
+
if all_args_str and all_kwargs_str:
|
|
269
|
+
call_repr = f"{prefix}{polars_func_name}({all_args_str}, {all_kwargs_str})"
|
|
270
|
+
elif all_args_str:
|
|
271
|
+
call_repr = f"{prefix}{polars_func_name}({all_args_str})"
|
|
272
|
+
elif all_kwargs_str:
|
|
273
|
+
call_repr = f"{prefix}{polars_func_name}({all_kwargs_str})"
|
|
274
|
+
else:
|
|
275
|
+
call_repr = f"{prefix}{polars_func_name}()"
|
|
276
|
+
|
|
277
|
+
# If we have function sources, prepend them with separator
|
|
278
|
+
if function_sources:
|
|
279
|
+
# Remove duplicates while preserving order
|
|
280
|
+
unique_sources = []
|
|
281
|
+
seen = set()
|
|
282
|
+
for source in function_sources:
|
|
283
|
+
if source not in seen:
|
|
284
|
+
seen.add(source)
|
|
285
|
+
unique_sources.append(source)
|
|
286
|
+
|
|
287
|
+
functions = "# Function definitions\n" + "\n\n".join(unique_sources)
|
|
288
|
+
return functions + "\n\n─────SPLIT─────\n\noutput_df = " + call_repr
|
|
289
|
+
else:
|
|
290
|
+
return call_repr
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def _create_flowframe_result(polars_func_name: str, full_repr: str, flow_graph: Optional[Any]) -> "FlowFrame":
|
|
294
|
+
"""
|
|
295
|
+
Create a FlowFrame result for functions that return DataFrames/LazyFrames.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
polars_func_name: Name of the polars function
|
|
299
|
+
full_repr: String representation of the function call
|
|
300
|
+
flow_graph: Optional flow graph to use
|
|
301
|
+
|
|
302
|
+
Returns:
|
|
303
|
+
FlowFrame instance with the operation added to the graph
|
|
304
|
+
"""
|
|
305
|
+
from flowfile_core.schemas import input_schema, transform_schema
|
|
306
|
+
from flowfile_frame.utils import create_flow_graph
|
|
307
|
+
|
|
308
|
+
node_id = generate_node_id()
|
|
309
|
+
if not flow_graph:
|
|
310
|
+
flow_graph = create_flow_graph()
|
|
311
|
+
|
|
312
|
+
# Check if we have function definitions (indicated by SPLIT separator)
|
|
313
|
+
if "─────SPLIT─────" in full_repr:
|
|
314
|
+
polars_code = full_repr
|
|
315
|
+
else:
|
|
316
|
+
polars_code = f"output_df = {full_repr}"
|
|
317
|
+
|
|
318
|
+
node_polars_code = input_schema.NodePolarsCode(
|
|
319
|
+
flow_id=flow_graph.flow_id,
|
|
320
|
+
node_id=node_id,
|
|
321
|
+
depending_on_ids=[],
|
|
322
|
+
description=f"Execute: {polars_func_name}",
|
|
323
|
+
polars_code_input=transform_schema.PolarsCodeInput(polars_code)
|
|
324
|
+
)
|
|
325
|
+
flow_graph.add_polars_code(node_polars_code)
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
class MockNode:
|
|
329
|
+
def get_resulting_data(self):
|
|
330
|
+
class MockData:
|
|
331
|
+
data_frame = pl.DataFrame()
|
|
332
|
+
|
|
333
|
+
return MockData()
|
|
334
|
+
|
|
335
|
+
if not hasattr(flow_graph, 'get_node'):
|
|
336
|
+
flow_graph.get_node = lambda nid: MockNode()
|
|
337
|
+
|
|
338
|
+
actual_data = flow_graph.get_node(node_id).get_resulting_data().data_frame
|
|
339
|
+
|
|
340
|
+
except Exception as e:
|
|
341
|
+
print(f"Warning: Could not simulate DataFrame creation for graph node {node_id} for {polars_func_name}: {e}")
|
|
342
|
+
actual_data = pl.DataFrame()
|
|
343
|
+
|
|
344
|
+
return FlowFrame(
|
|
345
|
+
data=actual_data,
|
|
346
|
+
flow_graph=flow_graph,
|
|
347
|
+
node_id=node_id,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _check_for_non_serializable_functions(args: List[Any], kwargs: Dict[str, Any]) -> List[str]:
|
|
352
|
+
"""
|
|
353
|
+
Check for non-serializable functions in arguments and return warnings.
|
|
354
|
+
|
|
355
|
+
Args:
|
|
356
|
+
args: Processed arguments
|
|
357
|
+
kwargs: Processed keyword arguments
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
List of warning messages for non-serializable functions
|
|
361
|
+
"""
|
|
362
|
+
warnings = []
|
|
363
|
+
|
|
364
|
+
def check_value(value: Any, path: str) -> None:
|
|
365
|
+
"""Recursively check for non-serializable functions."""
|
|
366
|
+
if callable(value) and not isinstance(value, (type, pl.Expr)):
|
|
367
|
+
# Check if it's a lambda or local function
|
|
368
|
+
if hasattr(value, '__name__'):
|
|
369
|
+
if value.__name__ == '<lambda>':
|
|
370
|
+
warnings.append(
|
|
371
|
+
f"Lambda function found at {path}. "
|
|
372
|
+
"This will cause 'serialization not supported for this opaque function' error. "
|
|
373
|
+
"Consider using a named function at module level instead."
|
|
374
|
+
)
|
|
375
|
+
elif hasattr(value, '__code__') and value.__code__.co_flags & 0x10: # CO_NESTED flag
|
|
376
|
+
# Check if it's a local/nested function (excluding top-level module functions)
|
|
377
|
+
if value.__code__.co_name != '<module>': # Ensure it's not a module itself
|
|
378
|
+
warnings.append(
|
|
379
|
+
f"Local function '{value.__name__}' found at {path}. "
|
|
380
|
+
"This may cause serialization issues. "
|
|
381
|
+
"Consider defining it at module level instead."
|
|
382
|
+
)
|
|
383
|
+
elif isinstance(value, list):
|
|
384
|
+
for i, item in enumerate(value):
|
|
385
|
+
check_value(item, f"{path}[{i}]")
|
|
386
|
+
elif isinstance(value, tuple):
|
|
387
|
+
for i, item in enumerate(value):
|
|
388
|
+
check_value(item, f"{path}[{i}]")
|
|
389
|
+
elif isinstance(value, dict):
|
|
390
|
+
for k, v in value.items():
|
|
391
|
+
check_value(v, f"{path}[{k!r}]")
|
|
392
|
+
|
|
393
|
+
# Check positional arguments
|
|
394
|
+
for i, arg in enumerate(args):
|
|
395
|
+
check_value(arg, f"argument {i}")
|
|
396
|
+
|
|
397
|
+
# Check keyword arguments
|
|
398
|
+
for key, value in kwargs.items():
|
|
399
|
+
check_value(value, f"keyword argument '{key}'")
|
|
400
|
+
|
|
401
|
+
return warnings
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
def _create_expr_result(polars_func: Callable, pl_args: List[Any], pl_kwargs: Dict[str, Any],
|
|
405
|
+
polars_func_name: str, full_repr: str, is_agg: bool,
|
|
406
|
+
convertible_to_code: bool, function_sources: List[str] = None) -> "Expr":
|
|
407
|
+
"""
|
|
408
|
+
Create an Expr result for functions that return expressions.
|
|
409
|
+
|
|
410
|
+
Note: pl_args and pl_kwargs should already have all Expr objects converted to pl.Expr
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
polars_func: The actual polars function
|
|
414
|
+
pl_args: Processed positional arguments (already converted)
|
|
415
|
+
pl_kwargs: Processed keyword arguments (already converted)
|
|
416
|
+
polars_func_name: Name of the polars function
|
|
417
|
+
full_repr: String representation of the function call
|
|
418
|
+
is_agg: Whether this is an aggregation function
|
|
419
|
+
convertible_to_code: Whether the expression can be converted to code
|
|
420
|
+
function_sources: List of function source code strings
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
Expr instance wrapping the polars expression
|
|
424
|
+
"""
|
|
425
|
+
from flowfile_frame.expr import Expr
|
|
426
|
+
import warnings
|
|
427
|
+
|
|
428
|
+
# Check for non-serializable functions
|
|
429
|
+
serialization_warnings = _check_for_non_serializable_functions(pl_args, pl_kwargs)
|
|
430
|
+
|
|
431
|
+
pl_expr = None
|
|
432
|
+
serialization_error = None
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
# Try to create the expression
|
|
436
|
+
pl_expr = polars_func(*pl_args, **pl_kwargs)
|
|
437
|
+
|
|
438
|
+
# Try to serialize to check if it will work in FlowFile
|
|
439
|
+
if pl_expr is not None and serialization_warnings:
|
|
440
|
+
try:
|
|
441
|
+
# Test serialization
|
|
442
|
+
import io
|
|
443
|
+
buffer = io.BytesIO()
|
|
444
|
+
pl_expr.serialize(file=buffer, format='json')
|
|
445
|
+
except Exception as e:
|
|
446
|
+
serialization_error = str(e)
|
|
447
|
+
|
|
448
|
+
except Exception as e:
|
|
449
|
+
print(
|
|
450
|
+
f"Warning: Polars function '{polars_func_name}' failed to create an expression with provided arguments. Error: {e}")
|
|
451
|
+
if "serialization not supported" in str(e).lower():
|
|
452
|
+
serialization_error = str(e)
|
|
453
|
+
|
|
454
|
+
# Issue warnings if we found non-serializable functions
|
|
455
|
+
if serialization_warnings:
|
|
456
|
+
warnings.warn(
|
|
457
|
+
f"\n⚠️ SERIALIZATION WARNING for {polars_func_name}:\n" +
|
|
458
|
+
"\n".join(f" • {w}" for w in serialization_warnings) +
|
|
459
|
+
"\n\nThis expression cannot be saved to a FlowFile format and will need to be " +
|
|
460
|
+
"recreated from scratch when loading the flow. The expression will work in the " +
|
|
461
|
+
"current session but won't persist.\n" +
|
|
462
|
+
(f"\nActual error from Polars: {serialization_error}" if serialization_error else ""),
|
|
463
|
+
category=UserWarning,
|
|
464
|
+
stacklevel=3
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
# Extract just the expression part without function definitions for repr_str
|
|
468
|
+
if function_sources and "─────SPLIT─────" in full_repr:
|
|
469
|
+
# Get the part after the split
|
|
470
|
+
repr_str = full_repr.split("─────SPLIT─────")[-1].strip()
|
|
471
|
+
if repr_str.startswith("output_df = "):
|
|
472
|
+
repr_str = repr_str[len("output_df = "):]
|
|
473
|
+
else:
|
|
474
|
+
repr_str = full_repr
|
|
475
|
+
|
|
476
|
+
return Expr(
|
|
477
|
+
pl_expr,
|
|
478
|
+
repr_str=repr_str,
|
|
479
|
+
agg_func=polars_func_name if is_agg else None,
|
|
480
|
+
is_complex=True,
|
|
481
|
+
convertable_to_code=convertible_to_code and (pl_expr is not None),
|
|
482
|
+
_function_sources=function_sources # Pass function sources to Expr
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _copy_function_metadata(original_func: Callable, polars_func_name: str) -> Tuple[str, str]:
|
|
487
|
+
"""
|
|
488
|
+
Copy metadata from the original polars function.
|
|
489
|
+
|
|
490
|
+
Args:
|
|
491
|
+
original_func: The original polars function
|
|
492
|
+
polars_func_name: Name of the polars function
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
Tuple of (function_name, docstring)
|
|
496
|
+
"""
|
|
497
|
+
original_doc = getattr(original_func, '__doc__', None) or ""
|
|
498
|
+
enhanced_doc = f"""FlowFile wrapper for pl.{polars_func_name}.
|
|
499
|
+
|
|
500
|
+
Original Polars documentation:
|
|
501
|
+
{original_doc}
|
|
502
|
+
|
|
503
|
+
Note: This is a FlowFile wrapper. If it returns a FlowFrame, it may accept an additional
|
|
504
|
+
'flow_graph: Optional[FlowGraph]' keyword argument to associate the operation with a specific graph.
|
|
505
|
+
Otherwise, a new graph is implicitly created or an existing one is used if chained from a FlowFrame method.
|
|
506
|
+
Wrapped functions returning Exprs will produce FlowFile Expr objects.
|
|
507
|
+
"""
|
|
508
|
+
return polars_func_name, enhanced_doc.strip()
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def polars_function_wrapper(
|
|
512
|
+
polars_func_name_or_callable: Union[str, Callable],
|
|
513
|
+
is_agg: bool = False,
|
|
514
|
+
return_type: Optional[Literal["FlowFrame", "Expr"]] = None
|
|
515
|
+
):
|
|
516
|
+
"""
|
|
517
|
+
Create a wrapper for a polars function that returns either a FlowFrame or Expr.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
polars_func_name_or_callable: Name of the polars function to wrap (str) or
|
|
521
|
+
the function itself if using @polars_function_wrapper directly.
|
|
522
|
+
is_agg: Whether this is an aggregation function (relevant for Expr results).
|
|
523
|
+
return_type: Expected return type ("FlowFrame" or "Expr"). If None, will be inferred.
|
|
524
|
+
|
|
525
|
+
Returns:
|
|
526
|
+
Wrapped function that integrates with the FlowFile framework.
|
|
527
|
+
|
|
528
|
+
Raises:
|
|
529
|
+
ValueError: If the polars function is not found or doesn't return Frame/Expr.
|
|
530
|
+
"""
|
|
531
|
+
# Handle the case where the decorator is used as @polars_function_wrapper directly
|
|
532
|
+
if callable(polars_func_name_or_callable) and not isinstance(polars_func_name_or_callable, str):
|
|
533
|
+
actual_polars_func_name = polars_func_name_or_callable.__name__
|
|
534
|
+
|
|
535
|
+
def decorator_inner_for_direct_use(func_to_decorate: Callable):
|
|
536
|
+
polars_f = getattr(pl, actual_polars_func_name, None)
|
|
537
|
+
if polars_f is None:
|
|
538
|
+
raise ValueError(f"Polars function '{actual_polars_func_name}' (inferred) not found.")
|
|
539
|
+
|
|
540
|
+
original_polars_sig = inspect.signature(polars_f)
|
|
541
|
+
determined_rt = return_type or _determine_return_type(original_polars_sig)
|
|
542
|
+
param_can_be_expr_map, param_list_for_processing = _analyze_parameters(original_polars_sig)
|
|
543
|
+
wrapper_name, wrapper_doc = _copy_function_metadata(polars_f, actual_polars_func_name)
|
|
544
|
+
|
|
545
|
+
current_params = list(original_polars_sig.parameters.values())
|
|
546
|
+
final_params_for_sig = current_params[:]
|
|
547
|
+
wrapper_return_annotation_str: str
|
|
548
|
+
|
|
549
|
+
if determined_rt == "FlowFrame":
|
|
550
|
+
wrapper_return_annotation_str = 'FlowFrame'
|
|
551
|
+
if not any(p.name == 'flow_graph' for p in final_params_for_sig):
|
|
552
|
+
fg_param = inspect.Parameter(
|
|
553
|
+
name='flow_graph', kind=inspect.Parameter.KEYWORD_ONLY,
|
|
554
|
+
default=None, annotation=Optional[FlowGraph] # Corrected annotation
|
|
555
|
+
)
|
|
556
|
+
var_kw_idx = next(
|
|
557
|
+
(i for i, p in enumerate(final_params_for_sig) if p.kind == inspect.Parameter.VAR_KEYWORD), -1)
|
|
558
|
+
if var_kw_idx != -1:
|
|
559
|
+
final_params_for_sig.insert(var_kw_idx, fg_param)
|
|
560
|
+
else:
|
|
561
|
+
final_params_for_sig.append(fg_param)
|
|
562
|
+
elif determined_rt == "Expr":
|
|
563
|
+
wrapper_return_annotation_str = 'Expr'
|
|
564
|
+
else:
|
|
565
|
+
wrapper_return_annotation_str = str(original_polars_sig.return_annotation)
|
|
566
|
+
|
|
567
|
+
wrapper_sig = inspect.Signature(parameters=final_params_for_sig,
|
|
568
|
+
return_annotation=wrapper_return_annotation_str)
|
|
569
|
+
|
|
570
|
+
@wraps(polars_f)
|
|
571
|
+
def wrapper(*args, **kwargs):
|
|
572
|
+
flow_graph_val = None
|
|
573
|
+
if determined_rt == "FlowFrame":
|
|
574
|
+
flow_graph_val = kwargs.pop('flow_graph', None)
|
|
575
|
+
|
|
576
|
+
args_repr_val, pl_args_val, args_conv, args_func_sources = _process_arguments(
|
|
577
|
+
args, param_can_be_expr_map, param_list_for_processing
|
|
578
|
+
)
|
|
579
|
+
kwargs_repr_val, pl_kwargs_val, kwargs_conv, kwargs_func_sources = _process_keyword_arguments(
|
|
580
|
+
kwargs, param_can_be_expr_map
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
conv_to_code = args_conv and kwargs_conv
|
|
584
|
+
all_func_sources = args_func_sources + kwargs_func_sources
|
|
585
|
+
full_repr_val = _build_repr_string(
|
|
586
|
+
actual_polars_func_name, args_repr_val, kwargs_repr_val, all_func_sources
|
|
587
|
+
)
|
|
588
|
+
|
|
589
|
+
if determined_rt == 'FlowFrame':
|
|
590
|
+
return _create_flowframe_result(actual_polars_func_name, full_repr_val, flow_graph_val)
|
|
591
|
+
else: # Expr
|
|
592
|
+
return _create_expr_result(
|
|
593
|
+
polars_f, pl_args_val, pl_kwargs_val, actual_polars_func_name,
|
|
594
|
+
full_repr_val, is_agg, conv_to_code, all_func_sources # Pass function sources
|
|
595
|
+
)
|
|
596
|
+
|
|
597
|
+
wrapper.__name__ = wrapper_name
|
|
598
|
+
wrapper.__doc__ = wrapper_doc
|
|
599
|
+
wrapper.__signature__ = wrapper_sig
|
|
600
|
+
return wrapper
|
|
601
|
+
|
|
602
|
+
return decorator_inner_for_direct_use(polars_func_name_or_callable)
|
|
603
|
+
|
|
604
|
+
else: # Used as @polars_function_wrapper("name", ...) or assigned
|
|
605
|
+
actual_polars_func_name = cast(str, polars_func_name_or_callable)
|
|
606
|
+
|
|
607
|
+
def decorator(func: Optional[Callable] = None): # func is the decorated placeholder
|
|
608
|
+
polars_f = getattr(pl, actual_polars_func_name, None)
|
|
609
|
+
if polars_f is None:
|
|
610
|
+
raise ValueError(f"Polars function '{actual_polars_func_name}' not found.")
|
|
611
|
+
|
|
612
|
+
original_polars_sig = inspect.signature(polars_f)
|
|
613
|
+
determined_rt = return_type or _determine_return_type(original_polars_sig)
|
|
614
|
+
|
|
615
|
+
param_can_be_expr_map, param_list_for_processing = _analyze_parameters(original_polars_sig)
|
|
616
|
+
wrapper_name, wrapper_doc = _copy_function_metadata(polars_f, actual_polars_func_name)
|
|
617
|
+
|
|
618
|
+
current_params = list(original_polars_sig.parameters.values())
|
|
619
|
+
final_params_for_sig = current_params[:]
|
|
620
|
+
wrapper_return_annotation_str: str
|
|
621
|
+
|
|
622
|
+
if determined_rt == "FlowFrame":
|
|
623
|
+
wrapper_return_annotation_str = 'FlowFrame'
|
|
624
|
+
if not any(p.name == 'flow_graph' for p in final_params_for_sig):
|
|
625
|
+
flow_graph_param = inspect.Parameter(
|
|
626
|
+
name='flow_graph',
|
|
627
|
+
kind=inspect.Parameter.KEYWORD_ONLY,
|
|
628
|
+
default=None,
|
|
629
|
+
annotation=Optional[FlowGraph] # Corrected annotation
|
|
630
|
+
)
|
|
631
|
+
var_kw_idx = next(
|
|
632
|
+
(i for i, p in enumerate(final_params_for_sig) if p.kind == inspect.Parameter.VAR_KEYWORD), -1)
|
|
633
|
+
if var_kw_idx != -1:
|
|
634
|
+
final_params_for_sig.insert(var_kw_idx, flow_graph_param)
|
|
635
|
+
else:
|
|
636
|
+
final_params_for_sig.append(flow_graph_param)
|
|
637
|
+
elif determined_rt == "Expr":
|
|
638
|
+
wrapper_return_annotation_str = 'Expr'
|
|
639
|
+
else:
|
|
640
|
+
wrapper_return_annotation_str = str(original_polars_sig.return_annotation)
|
|
641
|
+
|
|
642
|
+
wrapper_signature = inspect.Signature(
|
|
643
|
+
parameters=final_params_for_sig,
|
|
644
|
+
return_annotation=wrapper_return_annotation_str
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
@wraps(polars_f)
|
|
648
|
+
def wrapper(*args, **kwargs):
|
|
649
|
+
flow_graph_val = None
|
|
650
|
+
if determined_rt == "FlowFrame":
|
|
651
|
+
flow_graph_val = kwargs.pop('flow_graph', None)
|
|
652
|
+
|
|
653
|
+
args_repr_val, pl_args_val, args_convertible_val, args_func_sources = _process_arguments(
|
|
654
|
+
args, param_can_be_expr_map, param_list_for_processing
|
|
655
|
+
)
|
|
656
|
+
kwargs_repr_val, pl_kwargs_val, kwargs_convertible_val, kwargs_func_sources = _process_keyword_arguments(
|
|
657
|
+
kwargs, param_can_be_expr_map
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
convertible_to_code_val = args_convertible_val and kwargs_convertible_val # Correct variable for this scope
|
|
661
|
+
all_func_sources = args_func_sources + kwargs_func_sources
|
|
662
|
+
|
|
663
|
+
full_repr_val = _build_repr_string(
|
|
664
|
+
actual_polars_func_name, args_repr_val, kwargs_repr_val, all_func_sources # Corrected variable
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
if determined_rt == 'FlowFrame':
|
|
668
|
+
return _create_flowframe_result(actual_polars_func_name, full_repr_val, flow_graph_val)
|
|
669
|
+
else: # Expr
|
|
670
|
+
return _create_expr_result(polars_f, pl_args_val, pl_kwargs_val, actual_polars_func_name,
|
|
671
|
+
full_repr_val, is_agg, convertible_to_code_val,
|
|
672
|
+
all_func_sources) # Pass function sources
|
|
673
|
+
|
|
674
|
+
wrapper.__name__ = wrapper_name
|
|
675
|
+
wrapper.__doc__ = wrapper_doc
|
|
676
|
+
wrapper.__signature__ = wrapper_signature
|
|
677
|
+
# If func is provided (typically by decorator syntax), it's usually for @wraps or similar.
|
|
678
|
+
# Here, we are replacing func entirely, so we just return the new wrapper.
|
|
679
|
+
return wrapper
|
|
680
|
+
|
|
681
|
+
return decorator
|
|
682
|
+
|
|
683
|
+
|
|
684
|
+
# Example usage with the new decorator (from original snippet):
|
|
685
|
+
|
|
686
|
+
# For functions that return FlowFrames
|
|
687
|
+
@polars_function_wrapper('read_json', return_type="FlowFrame")
|
|
688
|
+
def read_json(*args, flow_graph: Optional[FlowGraph] = None, **kwargs) -> FlowFrame:
|
|
689
|
+
pass
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
@polars_function_wrapper('read_avro', return_type="FlowFrame")
|
|
693
|
+
def read_avro(*args, flow_graph: Optional[FlowGraph] = None, **kwargs) -> FlowFrame:
|
|
694
|
+
pass
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
@polars_function_wrapper('read_ndjson', return_type="FlowFrame")
|
|
698
|
+
def read_ndjson(*args, flow_graph: Optional[FlowGraph] = None, **kwargs) -> FlowFrame:
|
|
699
|
+
pass
|
|
700
|
+
|
|
701
|
+
|
|
702
|
+
@polars_function_wrapper('fold', return_type="Expr")
|
|
703
|
+
def fold(*args, **kwargs) -> 'Expr': # Type hint 'Expr' refers to flowfile_frame.expr.Expr
|
|
704
|
+
pass
|