Flowfile 0.3.2__py3-none-any.whl → 0.3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +3 -2
- flowfile/web/__init__.py +3 -0
- {flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/METADATA +4 -3
- {flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/RECORD +46 -35
- flowfile_core/configs/__init__.py +15 -4
- flowfile_core/configs/settings.py +5 -3
- flowfile_core/configs/utils.py +18 -0
- flowfile_core/flowfile/FlowfileFlow.py +13 -18
- flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +54 -17
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +42 -3
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +2 -1
- flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
- flowfile_core/flowfile/flow_node/flow_node.py +2 -1
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
- flowfile_core/flowfile/utils.py +34 -3
- flowfile_core/main.py +2 -3
- flowfile_core/routes/secrets.py +1 -1
- flowfile_core/schemas/input_schema.py +10 -4
- flowfile_core/schemas/transform_schema.py +25 -47
- flowfile_frame/__init__.py +11 -4
- flowfile_frame/adding_expr.py +280 -0
- flowfile_frame/config.py +9 -0
- flowfile_frame/expr.py +301 -83
- flowfile_frame/expr.pyi +2174 -0
- flowfile_frame/expr_name.py +258 -0
- flowfile_frame/flow_frame.py +584 -1002
- flowfile_frame/flow_frame.pyi +368 -0
- flowfile_frame/flow_frame_methods.py +617 -0
- flowfile_frame/group_frame.py +89 -42
- flowfile_frame/join.py +1 -2
- flowfile_frame/lazy.py +704 -0
- flowfile_frame/lazy_methods.py +201 -0
- flowfile_frame/list_name_space.py +324 -0
- flowfile_frame/selectors.py +3 -0
- flowfile_frame/series.py +70 -0
- flowfile_frame/utils.py +80 -4
- {flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/LICENSE +0 -0
- {flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/WHEEL +0 -0
- {flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/entry_points.txt +0 -0
- /flowfile_core/{secrets → secret_manager}/__init__.py +0 -0
- /flowfile_core/{secrets/secrets.py → secret_manager/secret_manager.py} +0 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
import polars as pl
|
|
2
|
+
from functools import wraps
|
|
3
|
+
from typing import Callable, TypeVar, Type
|
|
4
|
+
from flowfile_frame.utils import _get_function_source
|
|
5
|
+
from flowfile_frame.config import logger
|
|
6
|
+
|
|
7
|
+
T = TypeVar('T')
|
|
8
|
+
ExprT = TypeVar('ExprT', bound='Expr')
|
|
9
|
+
PASSTHROUGH_METHODS = {"map_elements", "map_batches"}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_expr_method_wrapper(method_name: str, original_method: Callable) -> Callable:
|
|
13
|
+
"""
|
|
14
|
+
Creates a wrapper for a polars Expr method that properly integrates with your custom Expr class.
|
|
15
|
+
|
|
16
|
+
Parameters
|
|
17
|
+
----------
|
|
18
|
+
method_name : str
|
|
19
|
+
Name of the polars Expr method.
|
|
20
|
+
original_method : Callable
|
|
21
|
+
The original polars Expr method.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
Callable
|
|
26
|
+
A wrapper method appropriate for your Expr class.
|
|
27
|
+
"""
|
|
28
|
+
from flowfile_frame.expr import Expr
|
|
29
|
+
|
|
30
|
+
@wraps(original_method)
|
|
31
|
+
def wrapper(self: Expr, *args, **kwargs):
|
|
32
|
+
from flowfile_frame.expr import Expr
|
|
33
|
+
# Check if we have a valid underlying expression
|
|
34
|
+
if self.expr is None:
|
|
35
|
+
raise ValueError(
|
|
36
|
+
f"Cannot call '{method_name}' on Expr with no underlying polars expression."
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Collect function sources and build representations
|
|
40
|
+
function_sources = []
|
|
41
|
+
args_representations = []
|
|
42
|
+
kwargs_representations = []
|
|
43
|
+
|
|
44
|
+
# Process positional arguments
|
|
45
|
+
for arg in args:
|
|
46
|
+
if callable(arg) and not isinstance(arg, type):
|
|
47
|
+
# Try to get function source
|
|
48
|
+
try:
|
|
49
|
+
source, is_module_level = _get_function_source(arg)
|
|
50
|
+
if source and hasattr(arg, '__name__') and arg.__name__ != '<lambda>':
|
|
51
|
+
function_sources.append(source)
|
|
52
|
+
# Use the function name in the representation
|
|
53
|
+
args_representations.append(arg.__name__)
|
|
54
|
+
else:
|
|
55
|
+
# Fallback to repr if we can't get the source
|
|
56
|
+
args_representations.append(repr(arg))
|
|
57
|
+
except:
|
|
58
|
+
args_representations.append(repr(arg))
|
|
59
|
+
else:
|
|
60
|
+
args_representations.append(repr(arg))
|
|
61
|
+
|
|
62
|
+
# Process keyword arguments
|
|
63
|
+
for key, value in kwargs.items():
|
|
64
|
+
if callable(value) and not isinstance(value, type):
|
|
65
|
+
# Try to get function source
|
|
66
|
+
try:
|
|
67
|
+
source, is_module_level = _get_function_source(value)
|
|
68
|
+
if source and hasattr(value, '__name__') and value.__name__ != '<lambda>':
|
|
69
|
+
function_sources.append(source)
|
|
70
|
+
# Use the function name in the representation
|
|
71
|
+
kwargs_representations.append(f"{key}={value.__name__}")
|
|
72
|
+
else:
|
|
73
|
+
# Fallback to repr if we can't get the source
|
|
74
|
+
kwargs_representations.append(f"{key}={repr(value)}")
|
|
75
|
+
except:
|
|
76
|
+
kwargs_representations.append(f"{key}={repr(value)}")
|
|
77
|
+
else:
|
|
78
|
+
kwargs_representations.append(f"{key}={repr(value)}")
|
|
79
|
+
|
|
80
|
+
# Call the method on the underlying polars expression
|
|
81
|
+
try:
|
|
82
|
+
result_expr = getattr(self.expr, method_name)(*args, **kwargs)
|
|
83
|
+
except Exception as e:
|
|
84
|
+
logger.debug(f"Warning: Error in {method_name}() call: {e}")
|
|
85
|
+
result_expr = None
|
|
86
|
+
|
|
87
|
+
# Format arguments for repr string
|
|
88
|
+
args_repr = ", ".join(args_representations)
|
|
89
|
+
kwargs_repr = ", ".join(kwargs_representations)
|
|
90
|
+
|
|
91
|
+
if args_repr and kwargs_repr:
|
|
92
|
+
params_repr = f"{args_repr}, {kwargs_repr}"
|
|
93
|
+
elif args_repr:
|
|
94
|
+
params_repr = args_repr
|
|
95
|
+
elif kwargs_repr:
|
|
96
|
+
params_repr = kwargs_repr
|
|
97
|
+
else:
|
|
98
|
+
params_repr = ""
|
|
99
|
+
|
|
100
|
+
# Create the repr string for this method call
|
|
101
|
+
new_repr = f"{self._repr_str}.{method_name}({params_repr})"
|
|
102
|
+
|
|
103
|
+
# Methods that typically change the aggregation status or complexity
|
|
104
|
+
agg_methods = {
|
|
105
|
+
"sum", "mean", "min", "max", "median", "first", "last", "std", "var",
|
|
106
|
+
"count", "n_unique", "quantile", "implode", "explode"
|
|
107
|
+
}
|
|
108
|
+
# Methods that typically make expressions complex
|
|
109
|
+
complex_methods = {
|
|
110
|
+
"filter", "map", "shift", "fill_null", "fill_nan", "round", "abs", "alias",
|
|
111
|
+
"cast", "is_between", "over", "sort", "arg_sort", "arg_unique", "arg_min",
|
|
112
|
+
"arg_max", "rolling", "interpolate", "ewm_mean", "ewm_std", "ewm_var",
|
|
113
|
+
"backward_fill", "forward_fill", "rank", "diff", "clip", "dot", "mode",
|
|
114
|
+
"drop_nulls", "drop_nans", "take", "gather", "filter", "shift_and_fill"
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
# Determine new agg_func status
|
|
118
|
+
new_agg_func = method_name if method_name in agg_methods else self.agg_func
|
|
119
|
+
|
|
120
|
+
# Determine if this makes the expression complex
|
|
121
|
+
is_complex = self.is_complex or method_name in complex_methods
|
|
122
|
+
|
|
123
|
+
# Pass function sources to _create_next_expr
|
|
124
|
+
result = self._create_next_expr(
|
|
125
|
+
*args,
|
|
126
|
+
**kwargs,
|
|
127
|
+
result_expr=result_expr,
|
|
128
|
+
is_complex=is_complex,
|
|
129
|
+
method_name=method_name,
|
|
130
|
+
_function_sources=function_sources # Pass function sources
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Set the agg_func if needed
|
|
134
|
+
if new_agg_func != self.agg_func:
|
|
135
|
+
result.agg_func = new_agg_func
|
|
136
|
+
|
|
137
|
+
return result
|
|
138
|
+
|
|
139
|
+
return wrapper
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def add_expr_methods(cls: Type[ExprT]) -> Type[ExprT]:
|
|
143
|
+
"""
|
|
144
|
+
Class decorator that adds all polars Expr methods to a custom Expr class.
|
|
145
|
+
|
|
146
|
+
This adds the methods at class creation time, so they are visible to static type checkers.
|
|
147
|
+
Methods already defined in the class are not overwritten.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
cls : Type[ExprT]
|
|
152
|
+
The class to which the methods will be added.
|
|
153
|
+
|
|
154
|
+
Returns
|
|
155
|
+
-------
|
|
156
|
+
Type[ExprT]
|
|
157
|
+
The modified class.
|
|
158
|
+
"""
|
|
159
|
+
# Get methods already defined in the class (including inherited methods)
|
|
160
|
+
existing_methods = set(dir(cls))
|
|
161
|
+
|
|
162
|
+
skip_methods = {
|
|
163
|
+
name for name in dir(pl.Expr)
|
|
164
|
+
if name.startswith('_') or isinstance(getattr(pl.Expr, name, None), property)
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
# Add all public Expr methods that don't already exist
|
|
168
|
+
for name in dir(pl.Expr):
|
|
169
|
+
if name in existing_methods or name in skip_methods:
|
|
170
|
+
continue
|
|
171
|
+
|
|
172
|
+
attr = getattr(pl.Expr, name)
|
|
173
|
+
if callable(attr):
|
|
174
|
+
if name in PASSTHROUGH_METHODS:
|
|
175
|
+
# Create passthrough method that marks the expression as not convertible to code
|
|
176
|
+
def create_passthrough_method(method_name, method_attr):
|
|
177
|
+
@wraps(method_attr)
|
|
178
|
+
def passthrough_method(self, *args, **kwargs):
|
|
179
|
+
if not hasattr(self, "expr") or self.expr is None:
|
|
180
|
+
raise ValueError(
|
|
181
|
+
f"Cannot call '{method_name}' on Expr with no underlying polars expression."
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
# Collect function sources and build representations
|
|
185
|
+
function_sources = []
|
|
186
|
+
args_representations = []
|
|
187
|
+
kwargs_representations = []
|
|
188
|
+
convertable_to_code = True
|
|
189
|
+
|
|
190
|
+
# Process positional arguments
|
|
191
|
+
for i, arg in enumerate(args):
|
|
192
|
+
if callable(arg) and not isinstance(arg, type):
|
|
193
|
+
# Try to get function source
|
|
194
|
+
try:
|
|
195
|
+
source, is_module_level = _get_function_source(arg)
|
|
196
|
+
if source and hasattr(arg, '__name__') and arg.__name__ != '<lambda>':
|
|
197
|
+
|
|
198
|
+
function_sources.append(source)
|
|
199
|
+
# Use the function name in the representation
|
|
200
|
+
args_representations.append(arg.__name__)
|
|
201
|
+
arg.__repr__ = lambda: arg.__name__
|
|
202
|
+
|
|
203
|
+
else:
|
|
204
|
+
|
|
205
|
+
# Lambda or unnamed function - not convertible
|
|
206
|
+
logger.warning(
|
|
207
|
+
f"Warning: Using anonymous functions in {method_name} is not convertable to UI code")
|
|
208
|
+
logger.warning(f"Consider using defined functions (def abc(a, b, c): return ...), "
|
|
209
|
+
f"In a separate script")
|
|
210
|
+
convertable_to_code = False
|
|
211
|
+
args_representations.append(repr(arg))
|
|
212
|
+
except:
|
|
213
|
+
args_representations.append(repr(arg))
|
|
214
|
+
else:
|
|
215
|
+
args_representations.append(repr(arg))
|
|
216
|
+
|
|
217
|
+
# Process keyword arguments
|
|
218
|
+
for key, value in kwargs.items():
|
|
219
|
+
if callable(value) and not isinstance(value, type):
|
|
220
|
+
# Try to get function source
|
|
221
|
+
try:
|
|
222
|
+
source, is_module_level = _get_function_source(value)
|
|
223
|
+
if source and hasattr(value, '__name__') and value.__name__ != '<lambda>':
|
|
224
|
+
function_sources.append(source)
|
|
225
|
+
# Use the function name in the representation
|
|
226
|
+
kwargs_representations.append(f"{key}={value.__name__}")
|
|
227
|
+
else:
|
|
228
|
+
# Lambda or unnamed function - not convertible
|
|
229
|
+
convertable_to_code = False
|
|
230
|
+
kwargs_representations.append(f"{key}={repr(value)}")
|
|
231
|
+
except:
|
|
232
|
+
kwargs_representations.append(f"{key}={repr(value)}")
|
|
233
|
+
else:
|
|
234
|
+
kwargs_representations.append(f"{key}={repr(value)}")
|
|
235
|
+
|
|
236
|
+
# Call the underlying polars method
|
|
237
|
+
result_expr = getattr(self.expr, method_name)(*args, **kwargs)
|
|
238
|
+
# Build parameter string
|
|
239
|
+
args_repr = ", ".join(args_representations)
|
|
240
|
+
kwargs_repr = ", ".join(kwargs_representations)
|
|
241
|
+
|
|
242
|
+
if args_repr and kwargs_repr:
|
|
243
|
+
params_repr = f"{args_repr}, {kwargs_repr}"
|
|
244
|
+
elif args_repr:
|
|
245
|
+
params_repr = args_repr
|
|
246
|
+
elif kwargs_repr:
|
|
247
|
+
params_repr = kwargs_repr
|
|
248
|
+
else:
|
|
249
|
+
params_repr = ""
|
|
250
|
+
# Create a representation string
|
|
251
|
+
new_repr = f"{self._repr_str}.{method_name}({params_repr})"
|
|
252
|
+
# self._repr_str = new_repr
|
|
253
|
+
# Return a new expression with the convertable_to_code flag set appropriately
|
|
254
|
+
result = self._create_next_expr(
|
|
255
|
+
*args,
|
|
256
|
+
method_name=method_name,
|
|
257
|
+
result_expr=result_expr,
|
|
258
|
+
is_complex=True,
|
|
259
|
+
convertable_to_code=convertable_to_code,
|
|
260
|
+
_function_sources=function_sources, # Pass function sources
|
|
261
|
+
**kwargs
|
|
262
|
+
)
|
|
263
|
+
return result
|
|
264
|
+
|
|
265
|
+
return passthrough_method
|
|
266
|
+
|
|
267
|
+
setattr(cls, name, create_passthrough_method(name, attr))
|
|
268
|
+
else:
|
|
269
|
+
# Use standard wrapper for other methods
|
|
270
|
+
wrapped_method = create_expr_method_wrapper(name, attr)
|
|
271
|
+
setattr(cls, name, wrapped_method)
|
|
272
|
+
|
|
273
|
+
overlap = {
|
|
274
|
+
name for name in existing_methods
|
|
275
|
+
if name in dir(pl.Expr) and not name.startswith('_') and callable(getattr(pl.Expr, name))
|
|
276
|
+
}
|
|
277
|
+
if overlap:
|
|
278
|
+
logger.debug(f"Preserved existing methods in {cls.__name__}: {', '.join(sorted(overlap))}")
|
|
279
|
+
|
|
280
|
+
return cls
|