PyPI - Flowfile - Versions diffs - 0.3.2__py3-none-any.whl → 0.3.3.1__py3-none-any.whl - Mend

Flowfile 0.3.2py3-none-any.whl → 0.3.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

flowfile/__init__.py +3 -2
flowfile/web/__init__.py +3 -0
{flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/METADATA +4 -3
{flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/RECORD +46 -35
flowfile_core/configs/__init__.py +15 -4
flowfile_core/configs/settings.py +5 -3
flowfile_core/configs/utils.py +18 -0
flowfile_core/flowfile/FlowfileFlow.py +13 -18
flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +54 -17
flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +42 -3
flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +2 -1
flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
flowfile_core/flowfile/flow_node/flow_node.py +2 -1
flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
flowfile_core/flowfile/utils.py +34 -3
flowfile_core/main.py +2 -3
flowfile_core/routes/secrets.py +1 -1
flowfile_core/schemas/input_schema.py +10 -4
flowfile_core/schemas/transform_schema.py +25 -47
flowfile_frame/__init__.py +11 -4
flowfile_frame/adding_expr.py +280 -0
flowfile_frame/config.py +9 -0
flowfile_frame/expr.py +301 -83
flowfile_frame/expr.pyi +2174 -0
flowfile_frame/expr_name.py +258 -0
flowfile_frame/flow_frame.py +584 -1002
flowfile_frame/flow_frame.pyi +368 -0
flowfile_frame/flow_frame_methods.py +617 -0
flowfile_frame/group_frame.py +89 -42
flowfile_frame/join.py +1 -2
flowfile_frame/lazy.py +704 -0
flowfile_frame/lazy_methods.py +201 -0
flowfile_frame/list_name_space.py +324 -0
flowfile_frame/selectors.py +3 -0
flowfile_frame/series.py +70 -0
flowfile_frame/utils.py +80 -4
{flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/LICENSE +0 -0
{flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/WHEEL +0 -0
{flowfile-0.3.2.dist-info → flowfile-0.3.3.1.dist-info}/entry_points.txt +0 -0
/flowfile_core/{secrets → secret_manager}/__init__.py +0 -0
/flowfile_core/{secrets/secrets.py → secret_manager/secret_manager.py} +0 -0

flowfile_frame/flow_frame.py CHANGED Viewed

@@ -1,15 +1,15 @@
-import logging
+import inspect
 import os
-from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, Dict, Callable
-from pathlib import Path
+from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, Dict, Callable, get_args, get_origin
-import io
 import re
 import polars as pl
-from polars._typing import (FrameInitTypes, SchemaDefinition, SchemaDict, Orientation, IO, Mapping, PolarsDataType,
-                            Sequence, CsvEncoding)
-# Assume these imports are correct from your original context
+from flowfile_frame.lazy_methods import add_lazyframe_methods
+from polars._typing import (FrameInitTypes, SchemaDefinition, SchemaDict, Orientation)
+from collections.abc import Iterator
 from flowfile_core.flowfile.FlowfileFlow import FlowGraph, add_connection
 from flowfile_core.flowfile.flow_graph_utils import combine_flow_graphs_with_mapping
 from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
@@ -19,19 +19,35 @@ from flowfile_core.schemas import input_schema, transform_schema
 from flowfile_frame.expr import Expr, Column, lit, col
 from flowfile_frame.selectors import Selector
 from flowfile_frame.group_frame import GroupByFrame
-from flowfile_frame.utils import _parse_inputs_as_iterable, create_flow_graph
+from flowfile_frame.utils import (_parse_inputs_as_iterable, create_flow_graph, stringify_values,
+                                  ensure_inputs_as_iterable)
 from flowfile_frame.join import _normalize_columns_to_list, _create_join_mappings
+from flowfile_frame.utils import _check_if_convertible_to_code
+from flowfile_frame.config import logger
 node_id_counter = 0
-logging.basicConfig(
-    level=logging.INFO,
-    format='[%(levelname)s] %(message)s'
-)
+def can_be_expr(param: inspect.Parameter) -> bool:
+    """Check if a parameter can be of type pl.Expr"""
+    if param.annotation == inspect.Parameter.empty:
+        return False
+    # Check direct match or in Union args
+    types = get_args(param.annotation) if get_origin(param.annotation) is Union else [param.annotation]
+    return any(t in (pl.Expr, pl.expr.expr.Expr) for t in types)
+def _contains_lambda_pattern(text: str) -> bool:
+    return "<lambda> at" in text
+def get_method_name_from_code(code: str) -> str | None:
+    split_code = code.split("input_df.")
+    if len(split_code) > 1:
+        return split_code[1].split("(")[0]
-# Create and export the logger
-logger = logging.getLogger('flow_frame')
 def _to_string_val(v) -> str:
     if isinstance(v, str):
@@ -40,12 +56,72 @@ def _to_string_val(v) -> str:
         return v
+def _extract_expr_parts(expr_obj) -> tuple[str, str]:
+    """
+    Extract the pure expression string and any raw definitions (including function sources) from an Expr object.
+    Parameters
+    ----------
+    expr_obj : Expr
+        The expression object to extract parts from
+    Returns
+    -------
+    tuple[str, str]
+        A tuple of (pure_expr_str, raw_definitions_str)
+    """
+    if not isinstance(expr_obj, Expr):
+        # If it's not an Expr, just return its string representation
+        return str(expr_obj), ""
+    # Get the basic representation
+    pure_expr_str = expr_obj._repr_str
+    # Collect all definitions (function sources)
+    raw_definitions = []
+    # Add function sources if any
+    if hasattr(expr_obj, '_function_sources') and expr_obj._function_sources:
+        # Remove duplicates while preserving order
+        unique_sources = []
+        seen = set()
+        for source in expr_obj._function_sources:
+            if source not in seen:
+                seen.add(source)
+                unique_sources.append(source)
+        if unique_sources:
+            raw_definitions.extend(unique_sources)
+    # Join all definitions
+    raw_defs_str = "\n\n".join(raw_definitions) if raw_definitions else ""
+    return pure_expr_str, raw_defs_str
+def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr | None = None,
+                                group_expr: pl.Expr | None = None) -> None:
+    if method_name is None:
+        raise NotImplemented("Cannot create a polars lambda expression without the method")
+    if polars_expr is None:
+        raise NotImplemented("Cannot create polars expressions with lambda function")
+    method_ref = getattr(pl.LazyFrame, method_name)
+    if method_ref is None:
+        raise ModuleNotFoundError(f"Could not find the method {method_name} in polars lazyframe")
+    if method_name == 'group_by':
+        if group_expr is None:
+            raise NotImplemented("Cannot create a polars lambda expression without the groupby expression")
+        if not all(isinstance(ge, pl.Expr) for ge in group_expr):
+            raise NotImplemented("Cannot create a polars lambda expression without the groupby expression")
 def generate_node_id() -> int:
     global node_id_counter
     node_id_counter += 1
     return node_id_counter
+@add_lazyframe_methods
 class FlowFrame:
     """Main class that wraps FlowDataEngine and maintains the ETL graph."""
     flow_graph: FlowGraph
@@ -100,13 +176,11 @@ class FlowFrame:
         # Extract flow-specific parameters
         node_id = node_id or generate_node_id()
         description = "Data imported from Python object"
         # Create a new flow graph if none is provided
         if flow_graph is None:
             flow_graph = create_flow_graph()
         flow_id = flow_graph.flow_id
         # Convert data to a polars DataFrame/LazyFrame
         try:
             # Use polars to convert from various types
@@ -121,25 +195,23 @@ class FlowFrame:
             )
             pl_data = pl_df.lazy()
         except Exception as e:
-            raise ValueError(f"Could not convert data to a polars DataFrame: {e}")
+            raise ValueError(f"Could not dconvert data to a polars DataFrame: {e}")
         # Create a FlowDataEngine to get data in the right format for manual input
         flow_table = FlowDataEngine(raw_data=pl_data)
+        raw_data_format = input_schema.RawData(data=list(flow_table.to_dict().values()),
+                                               columns=[c.get_minimal_field_info() for c in flow_table.schema])
         # Create a manual input node
         input_node = input_schema.NodeManualInput(
             flow_id=flow_id,
             node_id=node_id,
-            raw_data=flow_table.to_pylist(),  # Convert to list of dicts
+            raw_data_format=raw_data_format,
             pos_x=100,
             pos_y=100,
             is_setup=True,
             description=description,
         )
         # Add to graph
         flow_graph.add_manual_input(input_node)
         # Return new frame
         return FlowFrame(
             data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
@@ -163,7 +235,6 @@ class FlowFrame:
         parent_node_id=None,
     ):
         """Create a new FlowFrame instance."""
         # If data is not a LazyFrame, use the factory method
         if data is not None and not isinstance(data, pl.LazyFrame):
             return cls.create_from_any_type(
@@ -179,7 +250,6 @@ class FlowFrame:
                 parent_node_id=parent_node_id,
             )
-        # Otherwise create the instance normally
         instance = super().__new__(cls)
         return instance
@@ -198,7 +268,6 @@ class FlowFrame:
         parent_node_id=None,
     ):
         """Initialize the FlowFrame with data and graph references."""
         if data is None:
             data = pl.LazyFrame()
         if not isinstance(data, pl.LazyFrame):
@@ -230,205 +299,235 @@ class FlowFrame:
     def _create_child_frame(self, new_node_id):
         """Helper method to create a new FlowFrame that's a child of this one"""
         self._add_connection(self.node_id, new_node_id)
-        return FlowFrame(
-            data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
-            flow_graph=self.flow_graph,
-            node_id=new_node_id,
-            parent_node_id=self.node_id,
-        )
+        try:
+            return FlowFrame(
+                data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
+                flow_graph=self.flow_graph,
+                node_id=new_node_id,
+                parent_node_id=self.node_id,
+            )
+        except AttributeError:
+            raise ValueError('Could not execute the function')
-    def sort(
-        self,
-        by: List[Expr | str] | Expr | str,
-        *more_by,
-        descending: bool | List[bool] = False,
-        nulls_last: bool = False,
-        multithreaded: bool = True,
-        maintain_order: bool = False,
-        description: str = None,
-    ):
+    @staticmethod
+    def _generate_sort_polars_code(
+            pure_sort_expr_strs: List[str],
+            descending_values: List[bool],
+            nulls_last_values: List[bool],
+            multithreaded: bool,
+            maintain_order: bool,
+    ) -> str:
         """
-        Sort the dataframe by the given columns.
+        Generates the `input_df.sort(...)` Polars code string using pure expression strings.
+        """
+        kwargs_for_code: Dict[str, Any] = {}
+        if any(descending_values):
+            kwargs_for_code["descending"] = descending_values[0] if len(descending_values) == 1 else descending_values
+        if any(nulls_last_values):
+            kwargs_for_code["nulls_last"] = nulls_last_values[0] if len(nulls_last_values) == 1 else nulls_last_values
+        if not multithreaded:
+            kwargs_for_code["multithreaded"] = multithreaded
+        if maintain_order:
+            kwargs_for_code["maintain_order"] = maintain_order
-        Parameters:
-        -----------
-        by : Expr, str, or list of Expr/str
-            Column(s) to sort by. Accepts expression input. Strings are parsed as column names.
-        *more_by : Expr or str
-            Additional columns to sort by, specified as positional arguments.
-        descending : bool or list of bool, default False
-            Sort in descending order. When sorting by multiple columns, can be specified per column.
-        nulls_last : bool or list of bool, default False
-            Place null values last; can specify a single boolean or a sequence for per-column control.
-        multithreaded : bool, default True
-            Sort using multiple threads.
-        maintain_order : bool, default False
-            Whether the order should be maintained if elements are equal.
-        description : str, optional
-            Description of this operation for the ETL graph.
+        kwargs_str_for_code = ", ".join(f"{k}={repr(v)}" for k, v in kwargs_for_code.items())
-        Returns:
-        --------
-        FlowFrame
-            A new FlowFrame with sorted data.
+        by_arg_for_code = pure_sort_expr_strs[0] if len(
+            pure_sort_expr_strs) == 1 else f"[{', '.join(pure_sort_expr_strs)}]"
+        return f"input_df.sort({by_arg_for_code}{', ' + kwargs_str_for_code if kwargs_str_for_code else ''})"
+    def sort(
+            self,
+            by: Union[List[Union[Expr, str]], Expr, str],
+            *more_by: Union[Expr, str],
+            descending: Union[bool, List[bool]] = False,
+            nulls_last: Union[bool, List[bool]] = False,
+            multithreaded: bool = True,
+            maintain_order: bool = False,
+            description: Optional[str] = None,
+    ) -> "FlowFrame":
+        """
+        Sort the dataframe by the given columns.
         """
-        by = list(_parse_inputs_as_iterable((by,)))
+        initial_by_args = list(_parse_inputs_as_iterable((by,)))
         new_node_id = generate_node_id()
-        sort_expressions = by
+        sort_expressions_input: list = initial_by_args
         if more_by:
-            sort_expressions.extend(more_by)
+            sort_expressions_input.extend(list(_parse_inputs_as_iterable(more_by)))
-        # Determine if we need to use polars code fallback
-        needs_polars_code = False
+        all_processed_expr_objects: List[Expr] = []
+        pure_polars_expr_strings_for_sort: List[str] = []
+        collected_raw_definitions: List[str] = []
+        column_names_for_native_node: List[str] = []
-        # Check for any expressions that are not simple columns
-        for expr in sort_expressions:
-            if not isinstance(expr, (str, Column)) or (
-                isinstance(expr, Column) and expr._select_input.is_altered
-            ):
-                needs_polars_code = True
-                break
+        use_polars_code_path = False
-        # Also need polars code if we're using maintain_order or multithreaded params
         if maintain_order or not multithreaded:
-            needs_polars_code = True
-        # Standardize descending parameter
-        if isinstance(descending, (list, tuple)):
-            # Ensure descending list has the same length as sort_expressions
-            if len(descending) != len(sort_expressions):
-                raise ValueError(
-                    f"Length of descending ({len(descending)}) must match number of sort columns ({len(sort_expressions)})"
-                )
-            descending_values = descending
-        else:
-            descending_values = [descending] * len(sort_expressions)
-        # Standardize nulls_last parameter
-        if isinstance(nulls_last, (list, tuple)):
-            if len(nulls_last) != len(sort_expressions):
-                raise ValueError(
-                    f"Length of nulls_last ({len(nulls_last)}) must match number of sort columns ({len(sort_expressions)})"
-                )
-            nulls_last_values = nulls_last
-            # Any non-default nulls_last needs polars code
-            if any(val is not False for val in nulls_last_values):
-                needs_polars_code = True
-        else:
-            nulls_last_values = [nulls_last] * len(sort_expressions)
-            # Non-default nulls_last needs polars code
-            if nulls_last:
-                needs_polars_code = True
-        if needs_polars_code:
-            # Generate polars code for complex cases
-            code = self._generate_sort_polars_code(
-                sort_expressions,
-                descending_values,
-                nulls_last_values,
-                multithreaded,
-                maintain_order,
-            )
-            self._add_polars_code(new_node_id, code, description)
-        else:
-            # Use native implementation for simple cases
-            sort_inputs = []
-            for i, expr in enumerate(sort_expressions):
-                # Convert expr to column name
-                if isinstance(expr, Column):
-                    column_name = expr.name
-                elif isinstance(expr, str):
-                    column_name = expr
+            use_polars_code_path = True
+        is_nulls_last_list = isinstance(nulls_last, (list, tuple))
+        if is_nulls_last_list and any(val for val in nulls_last if val is not False):
+            use_polars_code_path = True
+        elif not is_nulls_last_list and nulls_last is not False:
+            use_polars_code_path = True
+        for expr_input in sort_expressions_input:
+            current_expr_obj: Expr
+            is_simple_col_for_native = False
+            if isinstance(expr_input, str):
+                current_expr_obj = col(expr_input)
+                column_names_for_native_node.append(expr_input)
+                is_simple_col_for_native = True
+            elif isinstance(expr_input, Column):
+                current_expr_obj = expr_input
+                # Type ignore below due to simplified Column stub
+                if not expr_input._select_input.is_altered:  # type: ignore
+                    column_names_for_native_node.append(expr_input.column_name)  # type: ignore
+                    is_simple_col_for_native = True
                 else:
-                    column_name = str(expr)
+                    use_polars_code_path = True  # Altered Column implies complex expression
+            elif isinstance(expr_input, Expr):
+                current_expr_obj = expr_input
+                use_polars_code_path = True  # General Expr implies complex expression
+            else:  # Convert other types to lit
+                current_expr_obj = lit(expr_input)
+                use_polars_code_path = True  # Literal might be part of a complex sort for Polars code
+            all_processed_expr_objects.append(current_expr_obj)
+            pure_expr_str, raw_defs_str = _extract_expr_parts(current_expr_obj)
+            pure_polars_expr_strings_for_sort.append(pure_expr_str)
+            if raw_defs_str:
+                if raw_defs_str not in collected_raw_definitions:
+                    collected_raw_definitions.append(raw_defs_str)
+                use_polars_code_path = True
+            if not is_simple_col_for_native:  # If it wasn't a simple string or unaltered Column
+                use_polars_code_path = True
+        desc_values = list(descending) if isinstance(descending, list) else [descending] * len(
+            all_processed_expr_objects)
+        null_last_values = list(nulls_last) if isinstance(nulls_last, list) else [nulls_last] * len(
+            all_processed_expr_objects)
+        if len(desc_values) != len(all_processed_expr_objects):
+            raise ValueError("Length of 'descending' does not match the number of sort expressions.")
+        if len(null_last_values) != len(all_processed_expr_objects):
+            raise ValueError("Length of 'nulls_last' does not match the number of sort expressions.")
+        if use_polars_code_path:
+            polars_operation_code = self._generate_sort_polars_code(
+                pure_polars_expr_strings_for_sort, desc_values, null_last_values, multithreaded, maintain_order
+            )
-                # Create SortByInput with appropriate settings
-                sort_inputs.append(
-                    transform_schema.SortByInput(
-                        column=column_name,
-                        how="desc" if descending_values[i] else "asc",
-                    )
+            final_code_for_node: str
+            if collected_raw_definitions:
+                unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))  # Order-preserving unique
+                definitions_section = "\n\n".join(unique_raw_definitions)
+                final_code_for_node = definitions_section + \
+                                      "\#─────SPLIT─────\n\n" + \
+                                      f"output_df = {polars_operation_code}"
+            else:
+                final_code_for_node = polars_operation_code
+            pl_expressions_for_fallback = [e.expr for e in all_processed_expr_objects if
+                                           hasattr(e, 'expr') and e.expr is not None]
+            kwargs_for_fallback = {
+                "descending": desc_values[0] if len(desc_values) == 1 else desc_values,
+                "nulls_last": null_last_values[0] if len(null_last_values) == 1 else null_last_values,
+                "multithreaded": multithreaded, "maintain_order": maintain_order}
+            self._add_polars_code(new_node_id, final_code_for_node, description, method_name="sort",
+                                  convertable_to_code=_check_if_convertible_to_code(all_processed_expr_objects),
+                                  polars_expr=pl_expressions_for_fallback,
+                                  kwargs_expr=kwargs_for_fallback)
+        else:
+            sort_inputs_for_node = []
+            for i, col_name_for_native in enumerate(column_names_for_native_node):
+                sort_inputs_for_node.append(
+                    transform_schema.SortByInput(column=col_name_for_native, how="desc" if desc_values[i] else "asc")
+                    # type: ignore
                 )
             sort_settings = input_schema.NodeSort(
-                flow_id=self.flow_graph.flow_id,
-                node_id=new_node_id,
-                sort_input=sort_inputs,
-                pos_x=200,
-                pos_y=150,
-                is_setup=True,
-                depending_on_id=self.node_id,
-                description=description
-                or f"Sort by {', '.join(str(e) for e in sort_expressions)}",
-            )
+                flow_id=self.flow_graph.flow_id, node_id=new_node_id, sort_input=sort_inputs_for_node,  # type: ignore
+                pos_x=200, pos_y=150, is_setup=True, depending_on_id=self.node_id,
+                description=description or f"Sort by {', '.join(column_names_for_native_node)}")
             self.flow_graph.add_sort(sort_settings)
         return self._create_child_frame(new_node_id)
-    def _generate_sort_polars_code(
-        self,
-        sort_expressions: list,
-        descending_values: list,
-        nulls_last_values: list,
-        multithreaded: bool,
-        maintain_order: bool,
-    ) -> str:
-        """Generate Polars code for sort operations that need fallback."""
-        # Format expressions for code
-        expr_strs = []
-        for expr in sort_expressions:
-            if isinstance(expr, (Expr, Column)):
-                expr_strs.append(str(expr))
-            elif isinstance(expr, str):
-                expr_strs.append(f"'{expr}'")
-            else:
-                expr_strs.append(str(expr))
-        # Format parameters
-        if len(sort_expressions) == 1:
-            by_arg = expr_strs[0]
-        else:
-            by_arg = f"[{', '.join(expr_strs)}]"
-        # Build kwargs
-        kwargs = {}
-        # Only add descending if it's non-default
-        if any(d for d in descending_values):
-            if len(descending_values) == 1:
-                kwargs["descending"] = descending_values[0]
-            else:
-                kwargs["descending"] = descending_values
-        # Only add nulls_last if it's non-default
-        if any(nl for nl in nulls_last_values):
-            if len(nulls_last_values) == 1:
-                kwargs["nulls_last"] = nulls_last_values[0]
+    def _add_polars_code(self, new_node_id: int, code: str, description: str = None,
+                         depending_on_ids: List[str] | None = None, convertable_to_code: bool = True,
+                         method_name: str = None, polars_expr: Expr | List[Expr] | None = None,
+                         group_expr: Expr | List[Expr] | None = None,
+                         kwargs_expr: Dict | None = None,
+                         group_kwargs: Dict | None = None, ):
+        polars_code_for_node: str
+        if not convertable_to_code or _contains_lambda_pattern(code):
+            effective_method_name = get_method_name_from_code(
+                code) if method_name is None and "input_df." in code else method_name
+            pl_expr_list = ensure_inputs_as_iterable(polars_expr) if polars_expr is not None else []
+            group_expr_list = ensure_inputs_as_iterable(group_expr) if group_expr is not None else []
+            _check_ok_for_serialization(polars_expr=pl_expr_list, method_name=effective_method_name,
+                                        group_expr=group_expr_list)
+            current_kwargs_expr = kwargs_expr if kwargs_expr is not None else {}
+            result_lazyframe_or_expr: Any
+            if effective_method_name == "group_by":
+                group_kwargs = {} if group_kwargs is None else group_kwargs
+                if not group_expr_list:
+                    raise ValueError("group_expr is required for group_by method in serialization fallback.")
+                target_obj = getattr(self.data, effective_method_name)(*group_expr_list, **group_kwargs)
+                if not pl_expr_list:
+                    raise ValueError(
+                        "Aggregation expressions (polars_expr) are required for group_by().agg() in serialization fallback.")
+                result_lazyframe_or_expr = target_obj.agg(*pl_expr_list, **current_kwargs_expr)
+            elif effective_method_name:
+                result_lazyframe_or_expr = getattr(self.data, effective_method_name)(*pl_expr_list,
+                                                                                     **current_kwargs_expr)
             else:
-                kwargs["nulls_last"] = nulls_last_values
-        # Add other parameters if they're non-default
-        if not multithreaded:
-            kwargs["multithreaded"] = multithreaded
-        if maintain_order:
-            kwargs["maintain_order"] = maintain_order
-        # Build kwargs string
-        kwargs_str = ", ".join(f"{k}={v}" for k, v in kwargs.items())
-        # Build final code
-        if kwargs_str:
-            return f"input_df.sort({by_arg}, {kwargs_str})"
+                raise ValueError(
+                    "Cannot execute Polars operation: method_name is missing and could not be inferred for serialization fallback.")
+            try:
+                if isinstance(result_lazyframe_or_expr, pl.LazyFrame):
+                    serialized_value_for_code = result_lazyframe_or_expr.serialize(format='json')
+                    polars_code_for_node = "\n".join([
+                        f"serialized_value = r'''{serialized_value_for_code}'''",
+                        "buffer = BytesIO(serialized_value.encode('utf-8'))",
+                        "output_df = pl.LazyFrame.deserialize(buffer, format='json')",
+                    ])
+                    logger.warning(
+                        f"Transformation '{effective_method_name}' uses non-serializable elements. "
+                        "Falling back to serializing the resulting Polars LazyFrame object."
+                        "This will result in a breaking graph when using the the ui."
+                    )
+                else:
+                    logger.error(
+                        f"Fallback for non-convertible code for method '{effective_method_name}' "
+                        f"resulted in a '{type(result_lazyframe_or_expr).__name__}' instead of a Polars LazyFrame. "
+                        "This type cannot be persisted as a LazyFrame node via this fallback."
+                    )
+                    return FlowFrame(result_lazyframe_or_expr, flow_graph=self.flow_graph, node_id=new_node_id)
+            except Exception as e:
+                logger.warning(
+                    f"Critical error: Could not serialize the result of operation '{effective_method_name}' "
+                    f"during fallback for non-convertible code. Error: {e}."
+                    "When using a lambda function, consider defining the function first"
+                )
+                return FlowFrame(result_lazyframe_or_expr, flow_graph=self.flow_graph, node_id=new_node_id)
         else:
-            return f"input_df.sort({by_arg})"
-    def _add_polars_code(self, new_node_id: int, code: str, description: str = None,
-                         depending_on_ids: List[str] | None = None):
+            polars_code_for_node = code
         polars_code_settings = input_schema.NodePolarsCode(
             flow_id=self.flow_graph.flow_id,
             node_id=new_node_id,
-            polars_code_input=transform_schema.PolarsCodeInput(polars_code=code),
+            polars_code_input=transform_schema.PolarsCodeInput(polars_code=polars_code_for_node),
             is_setup=True,
             depending_on_ids=depending_on_ids if depending_on_ids is not None else [self.node_id],
             description=description,
@@ -469,14 +568,17 @@ class FlowFrame:
         validate : {"1:1", "1:m", "m:1", "m:m"}, optional
             Validate join relationship.
         nulls_equal:
-            Join on null values. By default null values will never produce matches.
+            Join on null values. By default, null values will never produce matches.
         coalesce:
             None: -> join specific.
             True: -> Always coalesce join columns.
             False: -> Never coalesce join columns.
         maintain_order:
-            Which DataFrame row order to preserve, if any. Do not rely on any observed ordering without explicitly setting this parameter, as your code may break in a future release. Not specifying any ordering can improve performance Supported for inner, left, right and full joins
-            None: No specific ordering is desired. The ordering might differ across Polars versions or even between different runs.
+            Which DataFrame row order to preserve, if any. Do not rely on any observed ordering without explicitly
+            setting this parameter, as your code may break in a future release.
+            Not specifying any ordering can improve performance Supported for inner, left, right and full joins
+            None: No specific ordering is desired. The ordering might differ across Polars versions or even between
+            different runs.
             left: Preserves the order of the left DataFrame.
             right: Preserves the order of the right DataFrame.
             left_right: First preserves the order of the left DataFrame, then the right.
@@ -494,6 +596,7 @@ class FlowFrame:
                               nulls_equal is False and
                               validate is None and
                               suffix == '_right')
         join_mappings = None
         if self.flow_graph.flow_id != other.flow_graph.flow_id:
             combined_graph, node_mappings = combine_flow_graphs_with_mapping(self.flow_graph, other.flow_graph)
@@ -508,6 +611,7 @@ class FlowFrame:
             global node_id_counter
             node_id_counter += len(combined_graph.nodes)
         new_node_id = generate_node_id()
         if on is not None:
             left_columns = right_columns = _normalize_columns_to_list(on)
         elif left_on is not None and right_on is not None:
@@ -526,10 +630,11 @@ class FlowFrame:
             )
         if not use_polars_code:
             join_mappings, use_polars_code = _create_join_mappings(
-                left_columns, right_columns
+                left_columns or [], right_columns or []
             )
         if use_polars_code or suffix != '_right':
             _on = "["+', '.join(f"'{v}'" if isinstance(v, str) else str(v) for v in _normalize_columns_to_list(on)) + "]" if on else None
             _left = "["+', '.join(f"'{v}'" if isinstance(v, str) else str(v) for v in left_columns) + "]" if left_on else None
             _right = "["+', '.join(f"'{v}'" if isinstance(v, str) else str(v) for v in right_columns) + "]" if right_on else None
@@ -549,31 +654,50 @@ class FlowFrame:
                 parent_node_id=self.node_id,
             )
-        elif join_mappings:
+        elif join_mappings or how == 'cross':
             left_select = transform_schema.SelectInputs.create_from_pl_df(self.data)
             right_select = transform_schema.SelectInputs.create_from_pl_df(other.data)
-            join_input = transform_schema.JoinInput(
-                join_mapping=join_mappings,
-                left_select=left_select.renames,
-                right_select=right_select.renames,
-                how=how,
-            )
+            if how == 'cross':
+                join_input = transform_schema.CrossJoinInput(left_select=left_select.renames,
+                                                             right_select=right_select.renames,)
+            else:
+                join_input = transform_schema.JoinInput(
+                    join_mapping=join_mappings,
+                    left_select=left_select.renames,
+                    right_select=right_select.renames,
+                    how=how,
+                )
             join_input.auto_rename()
-            # Create node settings
-            join_settings = input_schema.NodeJoin(
-                flow_id=self.flow_graph.flow_id,
-                node_id=new_node_id,
-                join_input=join_input,
-                auto_generate_selection=True,
-                verify_integrity=True,
-                pos_x=200,
-                pos_y=150,
-                is_setup=True,
-                depending_on_ids=[self.node_id, other.node_id],
-                description=description or f"Join with {how} strategy",
-            )
-            self.flow_graph.add_join(join_settings)
+            if how == 'cross':
+                cross_join_settings = input_schema.NodeCrossJoin(
+                    flow_id=self.flow_graph.flow_id,
+                    node_id=new_node_id,
+                    cross_join_input=join_input,
+                    is_setup=True,
+                    depending_on_ids=[self.node_id, other.node_id],
+                    description=description or f"Join with {how} strategy",
+                    auto_generate_selection=True,
+                    verify_integrity=True,
+                )
+                self.flow_graph.add_cross_join(cross_join_settings)
+            else:
+                join_settings = input_schema.NodeJoin(
+                    flow_id=self.flow_graph.flow_id,
+                    node_id=new_node_id,
+                    join_input=join_input,
+                    auto_generate_selection=True,
+                    verify_integrity=True,
+                    pos_x=200,
+                    pos_y=150,
+                    is_setup=True,
+                    depending_on_ids=[self.node_id, other.node_id],
+                    description=description or f"Join with {how} strategy",
+                )
+                self.flow_graph.add_join(join_settings)
             self._add_connection(self.node_id, new_node_id, "main")
             other._add_connection(other.node_id, new_node_id, "right")
             result_frame = FlowFrame(
@@ -600,38 +724,65 @@ class FlowFrame:
         self.flow_graph.add_record_count(node_number_of_records)
         return self._create_child_frame(new_node_id)
-    def select(self, *columns, description: str = None):
+    def select(self, *columns: Union[str, Expr, Selector], description: Optional[str] = None) -> "FlowFrame":
         """
         Select columns from the frame.
-        Args:
-            *columns: Column names or expressions
-            description: Description of the step, this will be shown in the flowfile file
-        Returns:
-            A new FlowFrame with selected columns
         """
-        # Create new node ID
-        columns = _parse_inputs_as_iterable(columns)
+        columns_iterable = list(_parse_inputs_as_iterable(columns))
         new_node_id = generate_node_id()
-        existing_columns = self.columns
-        if (len(columns) == 1 and isinstance(columns[0], Expr)
-                and str(columns[0]) == "pl.Expr(len()).alias('number_of_records')"):
+        if (len(columns_iterable) == 1 and isinstance(columns_iterable[0], Expr)
+                and str(columns_iterable[0]) == "pl.Expr(len()).alias('number_of_records')"):
             return self._add_number_of_records(new_node_id, description)
-        if all(isinstance(col_, (str, Column)) for col_ in columns):
-            select_inputs = [
-                transform_schema.SelectInput(old_name=col_) if isinstance(col_, str) else col_.to_select_input()
-                for col_ in columns
-            ]
-            dropped_columns = [transform_schema.SelectInput(c, keep=False) for c in existing_columns if
-                               c not in [s.old_name for s in select_inputs]]
-            select_inputs.extend(dropped_columns)
+        all_input_expr_objects: List[Expr] = []
+        pure_polars_expr_strings_for_select: List[str] = []
+        collected_raw_definitions: List[str] = []
+        selected_col_names_for_native: List[transform_schema.SelectInput] = []  # For native node
+        can_use_native_node = True
+        if len(columns_iterable) == 1 and isinstance(columns_iterable[0], str) and columns_iterable[0] == '*':
+            effective_columns_iterable = [col(c_name) for c_name in self.columns]
+        else:
+            effective_columns_iterable = columns_iterable
+        for expr_input in effective_columns_iterable:
+            current_expr_obj = expr_input
+            is_simple_col_for_native = False
+            if isinstance(expr_input, str):
+                current_expr_obj = col(expr_input)
+                selected_col_names_for_native.append(transform_schema.SelectInput(old_name=expr_input))
+                is_simple_col_for_native = True
+            elif isinstance(expr_input, Column):
+                selected_col_names_for_native.append(expr_input.to_select_input())
+                is_simple_col_for_native = True
+            elif isinstance(expr_input, Selector):
+                can_use_native_node = False
+            elif not isinstance(expr_input, Expr):
+                current_expr_obj = lit(expr_input)
+            all_input_expr_objects.append(current_expr_obj)  # type: ignore
+            pure_expr_str, raw_defs_str = _extract_expr_parts(current_expr_obj)
+            pure_polars_expr_strings_for_select.append(pure_expr_str)
+            if raw_defs_str and raw_defs_str not in collected_raw_definitions:
+                collected_raw_definitions.append(raw_defs_str)
+            if not is_simple_col_for_native and not isinstance(expr_input, Selector):
+                can_use_native_node = False
+        if collected_raw_definitions:  # Has to use Polars code if there are definitions
+            can_use_native_node = False
+        if can_use_native_node:
+            existing_cols = self.columns
+            selected_col_names = {select_col.old_name for select_col in selected_col_names_for_native}
+            dropped_columns = [transform_schema.SelectInput(c, keep=False) for c in existing_cols if
+                               c not in selected_col_names]
+            selected_col_names_for_native.extend(dropped_columns)
             select_settings = input_schema.NodeSelect(
                 flow_id=self.flow_graph.flow_id,
                 node_id=new_node_id,
-                select_input=select_inputs,
+                select_input=selected_col_names_for_native,
                 keep_missing=False,
                 pos_x=200,
                 pos_y=100,
@@ -639,60 +790,97 @@ class FlowFrame:
                 depending_on_id=self.node_id,
                 description=description
             )
-            # Add to graph
             self.flow_graph.add_select(select_settings)
-            return self._create_child_frame(new_node_id)
         else:
-            readable_exprs = []
-            is_readable: bool = True
-            for col_ in columns:
-                if isinstance(col_, Expr):
-                    readable_exprs.append(col_)
-                elif isinstance(col_, Selector):
-                    readable_exprs.append(col_)
-                elif isinstance(col_, pl.expr.Expr):
-                    print('warning this cannot be converted to flowfile frontend. Make sure you use the flowfile expr')
-                    is_readable = False
-                elif isinstance(col_, str) and col_ in self.columns:
-                    col_expr = Column(col_)
-                    readable_exprs.append(col_expr)
-                else:
-                    lit_expr = lit(col_)
-                    readable_exprs.append(lit_expr)
-            if is_readable:
-                code = f"input_df.select([{', '.join(str(e) for e in readable_exprs)}])"
+            polars_operation_code = f"input_df.select([{', '.join(pure_polars_expr_strings_for_select)}])"
+            final_code_for_node: str
+            if collected_raw_definitions:
+                unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))
+                definitions_section = "\n\n".join(unique_raw_definitions)
+                final_code_for_node = definitions_section + \
+                                      "\#─────SPLIT─────\n\n" + \
+                                      f"output_df = {polars_operation_code}"
             else:
-                raise ValueError('Not supported')
+                final_code_for_node = polars_operation_code
-            self._add_polars_code(new_node_id, code, description)
-            return self._create_child_frame(new_node_id)
+            pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
+                                           isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
+            self._add_polars_code(new_node_id, final_code_for_node, description,
+                                  method_name="select",
+                                  convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
+                                  polars_expr=pl_expressions_for_fallback)
-    def filter(self, predicate: Expr | Any = None, *, flowfile_formula: str = None, description: str = None):
+        return self._create_child_frame(new_node_id)
+    def filter(self, *predicates: Union[Expr, Any], flowfile_formula: Optional[str] = None,
+               description: Optional[str] = None, **constraints: Any) -> "FlowFrame":
         """
         Filter rows based on a predicate.
-        Args:
-            predicate: Filter condition
-            flowfile_formula: Native support in frontend
-            description: Description of the step that is performed
-        Returns:
-            A new FlowFrame with filtered rows
         """
+        if (len(predicates) > 0 or len(constraints) > 0) and flowfile_formula:
+            raise ValueError("You can only use one of the following: predicates, constraints or flowfile_formula")
+        available_columns = self.columns
         new_node_id = generate_node_id()
-        # Create new node ID
-        if predicate:
-            # we use for now the fallback on polars code.
-            if isinstance(predicate, Expr):
-                predicate_expr = predicate
+        if len(predicates) > 0 or len(constraints) > 0:
+            all_input_expr_objects: List[Expr] = []
+            pure_polars_expr_strings: List[str] = []
+            collected_raw_definitions: List[str] = []
+            processed_predicates = []
+            for pred_item in predicates:
+                if isinstance(pred_item, (tuple, list, Iterator)):
+                    # If it's a sequence, extend the processed_predicates with its elements
+                    processed_predicates.extend(list(pred_item))
+                else:
+                    # Otherwise, just add the item
+                    processed_predicates.append(pred_item)
+            for pred_input in processed_predicates:  # Loop over the processed_predicates
+                # End of the new/modified section
+                current_expr_obj = None  # Initialize current_expr_obj
+                if isinstance(pred_input, Expr):
+                    current_expr_obj = pred_input
+                elif isinstance(pred_input, str) and pred_input in available_columns:
+                    current_expr_obj = col(pred_input)
+                else:
+                    current_expr_obj = lit(pred_input)
+                all_input_expr_objects.append(current_expr_obj)
+                pure_expr_str, raw_defs_str = _extract_expr_parts(current_expr_obj)
+                pure_polars_expr_strings.append(f"({pure_expr_str})")
+                if raw_defs_str and raw_defs_str not in collected_raw_definitions:
+                    collected_raw_definitions.append(raw_defs_str)
+            for k, v_val in constraints.items():
+                constraint_expr_obj = (col(k) == lit(v_val))
+                all_input_expr_objects.append(constraint_expr_obj)
+                pure_expr_str, raw_defs_str = _extract_expr_parts(
+                    constraint_expr_obj)  # Constraint exprs are unlikely to have defs
+                pure_polars_expr_strings.append(f"({pure_expr_str})")
+                if raw_defs_str and raw_defs_str not in collected_raw_definitions:  # Should be rare here
+                    collected_raw_definitions.append(raw_defs_str)
+            filter_conditions_str = " & ".join(pure_polars_expr_strings) if pure_polars_expr_strings else "pl.lit(True)"
+            polars_operation_code = f"input_df.filter({filter_conditions_str})"
+            final_code_for_node: str
+            if collected_raw_definitions:
+                unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))  # Order-preserving unique
+                definitions_section = "\n\n".join(unique_raw_definitions)
+                final_code_for_node = definitions_section + \
+                                      "\#─────SPLIT─────\n\n" + \
+                                      f"output_df = {polars_operation_code}"
             else:
-                predicate_expr = lit(predicate)
-            code = f"input_df.filter({str(predicate_expr)})"
-            self._add_polars_code(new_node_id, code, description)
+                final_code_for_node = polars_operation_code
+            convertable_to_code = _check_if_convertible_to_code(all_input_expr_objects)
+            pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
+                                           isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
+            self._add_polars_code(new_node_id, final_code_for_node, description, method_name="filter",
+                                  convertable_to_code=convertable_to_code,
+                                  polars_expr=pl_expressions_for_fallback)
         elif flowfile_formula:
-            # Create node settings
             filter_settings = input_schema.NodeFilter(
                 flow_id=self.flow_graph.flow_id,
                 node_id=new_node_id,
@@ -706,8 +894,10 @@ class FlowFrame:
                 depending_on_id=self.node_id,
                 description=description
             )
             self.flow_graph.add_filter(filter_settings)
+        else:
+            logger.info("Filter called with no arguments; creating a pass-through Polars code node.")
+            self._add_polars_code(new_node_id, "output_df = input_df", description or "No-op filter", method_name=None)
         return self._create_child_frame(new_node_id)
@@ -792,7 +982,7 @@ class FlowFrame:
                 if convert_to_absolute_path:
                     output_settings.directory = output_settings.abs_file_path
             except Exception as e:
-                print(f"Warning: Could not determine absolute path for {file_str}: {e}")
+                logger.warning(f"Could not determine absolute path for {file_str}: {e}")
         if not use_polars_code:
             node_output = input_schema.NodeOutput(
@@ -820,7 +1010,7 @@ class FlowFrame:
             # Use sink_parquet for LazyFrames
             code = f"input_df.sink_parquet({args_str})"
-            print(f"Generated Polars Code: {code}")
+            logger.debug(f"Generated Polars Code: {code}")
             self._add_polars_code(new_node_id, code, description)
         return self._create_child_frame(new_node_id)
@@ -868,7 +1058,7 @@ class FlowFrame:
                 if convert_to_absolute_path:
                     output_settings.directory = output_settings.abs_file_path
             except Exception as e:
-                print(f"Warning: Could not determine absolute path for {file_str}: {e}")
+                logger.warning(f"Could not determine absolute path for {file_str}: {e}")
         if not use_polars_code:
             node_output = input_schema.NodeOutput(
@@ -901,7 +1091,7 @@ class FlowFrame:
                 args_str += f", {kwargs_repr}"
             code = f"input_df.collect().write_csv({args_str})"
-            print(f"Generated Polars Code: {code}")
+            logger.debug(f"Generated Polars Code: {code}")
             self._add_polars_code(new_node_id, code, description)
         return self._create_child_frame(new_node_id)
@@ -954,10 +1144,10 @@ class FlowFrame:
             self.flow_graph.apply_layout()
         self.flow_graph.save_flow(file_path)
-    def collect(self):
+    def collect(self, *args, **kwargs):
         """Collect lazy data into memory."""
         if hasattr(self.data, "collect"):
-            return self.data.collect()
+            return self.data.collect(*args, **kwargs)
         return self.data
     def _with_flowfile_formula(self, flowfile_formula: str, output_column_name, description: str = None) -> "FlowFrame":
@@ -1278,9 +1468,10 @@ class FlowFrame:
                 f.node_id = node_mappings.get((f.flow_graph.flow_id, f.node_id), None)
             global node_id_counter
             node_id_counter += len(combined_graph.nodes)
+        else:
+            combined_graph = self.flow_graph
         new_node_id = generate_node_id()
         use_native = how == "diagonal_relaxed" and parallel and not rechunk
         if use_native:
             # Create union input for the transform schema
             union_input = transform_schema.UnionInput(
@@ -1314,7 +1505,6 @@ class FlowFrame:
                 input_vars.append(f"input_df_{i+2}")
             frames_list = f"[{', '.join(input_vars)}]"
             code = f"""
             # Perform concat operation
             output_df = pl.concat(
@@ -1324,19 +1514,20 @@ class FlowFrame:
                 parallel={parallel}
             )
             """
+            self.flow_graph = combined_graph
             # Add polars code node with dependencies on all input frames
             depending_on_ids = [self.node_id] + [frame.node_id for frame in others]
             self._add_polars_code(
                 new_node_id, code, description, depending_on_ids=depending_on_ids
             )
             # Add connections to ensure all frames are available
             self._add_connection(self.node_id, new_node_id, "main")
             for other_frame in others:
-                other_frame._add_connection(other_frame.node_id, new_node_id, "main")
+                other_frame.flow_graph = combined_graph
+                other_frame._add_connection(other_frame.node_id, new_node_id, "main")
         # Create and return the new frame
         return FlowFrame(
             data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
@@ -1373,7 +1564,7 @@ class FlowFrame:
             return False, None
         # Extract the output name
-        output_name = expr.name
+        output_name = expr.column_name
         if ".over(" not in expr._repr_str:
             # Simple cumulative count can be implemented as a record ID with offset=1
@@ -1456,62 +1647,70 @@ class FlowFrame:
         return False, None
     def with_columns(
-        self,
-        exprs: Expr | List[Expr | None] = None,
-        *,
-        flowfile_formulas: Optional[List[str]] = None,
-        output_column_names: Optional[List[str]] = None,
-        description: Optional[str] = None,
+            self,
+            *exprs: Union[Expr, Iterable[Expr], Any],  # Allow Any for implicit lit conversion
+            flowfile_formulas: Optional[List[str]] = None,
+            output_column_names: Optional[List[str]] = None,
+            description: Optional[str] = None,
+            **named_exprs: Union[Expr, Any],  # Allow Any for implicit lit conversion
     ) -> "FlowFrame":
         """
-        Add multiple columns to the DataFrame.
-        Parameters
-        ----------
-        exprs : Expr or List[Expr], optional
-            Expressions to evaluate as new columns
-        flowfile_formulas : List[str], optional
-            Alternative approach using flowfile formula syntax
-        output_column_names : List[str], optional
-            Column names for the flowfile formulas
-        description : str, optional
-            Description of this operation for the ETL graph
-        Returns
-        -------
-        FlowFrame
-            A new FlowFrame with the columns added
-        Raises
-        ------
-        ValueError
-            If neither exprs nor flowfile_formulas with output_column_names are provided,
-            or if the lengths of flowfile_formulas and output_column_names don't match
+        Add or replace columns in the DataFrame.
         """
-        if exprs is not None:
-            new_node_id = generate_node_id()
-            exprs_iterable = _parse_inputs_as_iterable((exprs,))
+        new_node_id = generate_node_id()
-            if len(exprs_iterable) == 1:
-                detected, result = self._detect_cum_count_record_id(
-                    exprs_iterable[0], new_node_id, description
-                )
-                if detected:
-                    return result
-            all_expressions = []
-            for expression in exprs_iterable:
-                if not isinstance(expression, (Expr, Column)):
-                    all_expressions.append(lit(expression))
-                else:
-                    all_expressions.append(expression)
+        all_input_expr_objects: List[Expr] = []
+        pure_polars_expr_strings_for_wc: List[str] = []
+        collected_raw_definitions: List[str] = []
+        has_exprs_or_named_exprs = bool(exprs or named_exprs)
+        if has_exprs_or_named_exprs:
+            actual_exprs_to_process: List[Expr] = []
+            temp_exprs_iterable = list(_parse_inputs_as_iterable(exprs))
+            for item in temp_exprs_iterable:
+                if isinstance(item, Expr):
+                    actual_exprs_to_process.append(item)
+                else:  # auto-lit for non-Expr positional args
+                    actual_exprs_to_process.append(lit(item))
+            for name, val_expr in named_exprs.items():
+                if isinstance(val_expr, Expr):
+                    actual_exprs_to_process.append(val_expr.alias(name))  # type: ignore # Assuming Expr has alias
+                else:  # auto-lit for named args and then alias
+                    actual_exprs_to_process.append(lit(val_expr).alias(name))  # type: ignore
+            if len(actual_exprs_to_process) == 1 and isinstance(actual_exprs_to_process[0], Expr):
+                pass
+            for current_expr_obj in actual_exprs_to_process:
+                all_input_expr_objects.append(current_expr_obj)
+                pure_expr_str, raw_defs_str = _extract_expr_parts(current_expr_obj)
+                pure_polars_expr_strings_for_wc.append(pure_expr_str)  # with_columns takes individual expressions
+                if raw_defs_str and raw_defs_str not in collected_raw_definitions:
+                    collected_raw_definitions.append(raw_defs_str)
+            polars_operation_code = f"input_df.with_columns([{', '.join(pure_polars_expr_strings_for_wc)}])"
+            final_code_for_node: str
+            if collected_raw_definitions:
+                unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))
+                definitions_section = "\n\n".join(unique_raw_definitions)
+                final_code_for_node = definitions_section + \
+                                      "\n#─────SPLIT─────\n\n" + \
+                                      f"output_df = {polars_operation_code}"
+            else:
+                final_code_for_node = polars_operation_code
-            code = (
-                f"input_df.with_columns({', '.join(str(e) for e in all_expressions)})"
-            )
-            self._add_polars_code(new_node_id, code, description)
+            pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
+                                           isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
+            self._add_polars_code(new_node_id, final_code_for_node, description, method_name='with_columns',
+                                  convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
+                                  polars_expr=pl_expressions_for_fallback)
             return self._create_child_frame(new_node_id)
         elif flowfile_formulas is not None and output_column_names is not None:
             if len(output_column_names) != len(flowfile_formulas):
                 raise ValueError(
                     "Length of both the formulas and the output columns names must be identical"
@@ -1524,9 +1723,7 @@ class FlowFrame:
                 ff = ff._with_flowfile_formula(flowfile_formula, output_column_name, f"{i}: {description}")
             return ff
         else:
-            raise ValueError(
-                "Either exprs or flowfile_formulas with output_column_names must be provided"
-            )
+            raise ValueError("Either exprs/named_exprs or flowfile_formulas with output_column_names must be provided")
     def with_row_index(
         self, name: str = "index", offset: int = 0, description: str = None
@@ -1614,26 +1811,27 @@ class FlowFrame:
         if isinstance(columns, (list, tuple)):
             all_columns.extend(
-                [col.name if isinstance(col, Column) else col for col in columns]
+                [col.column_name if isinstance(col, Column) else col for col in columns]
             )
         else:
-            all_columns.append(columns.name if isinstance(columns, Column) else columns)
+            all_columns.append(columns.column_name if isinstance(columns, Column) else columns)
         if more_columns:
             for col in more_columns:
-                all_columns.append(col.name if isinstance(col, Column) else col)
+                all_columns.append(col.column_name if isinstance(col, Column) else col)
         if len(all_columns) == 1:
-            columns_str = f"'{all_columns[0]}'"
+            columns_str = stringify_values(all_columns[0])
         else:
-            columns_str = "[" + ", ".join([f"'{col}'" for col in all_columns]) + "]"
+            columns_str = "[" + ", ".join([ stringify_values(col) for col in all_columns]) + "]"
         code = f"""
         # Explode columns into multiple rows
         output_df = input_df.explode({columns_str})
         """
-        cols_desc = ", ".join(all_columns)
+        cols_desc = ", ".join(str(s) for s in all_columns)
         desc = description or f"Explode column(s): {cols_desc}"
         # Add polars code node
@@ -1676,7 +1874,7 @@ class FlowFrame:
         new_node_id = generate_node_id()
         if isinstance(column, Column):
-            column_name = column.name
+            column_name = column.column_name
         else:
             column_name = column
@@ -1760,7 +1958,7 @@ class FlowFrame:
                     if col_expr._select_input.is_altered:
                         can_use_native = False
                         break
-                    processed_subset.append(col_expr.name)
+                    processed_subset.append(col_expr.column_name)
                 else:
                     can_use_native = False
                     break
@@ -1848,650 +2046,34 @@ class FlowFrame:
         """Get the number of columns."""
         return self.data.width
+    def __contains__(self, key):
+        """This special method enables the 'in' operator to work with FlowFrame objects."""
+        return key in self.data
-def _add_delegated_methods():
-    """Add delegated methods from polars LazyFrame."""
-    delegate_methods = [
-        "collect_async",
-        "profile",
-        "describe",
-        "explain",
-        "show_graph",
-        "serialize",
-        "fetch",
-        "get_meta",
-        "columns",
-        "dtypes",
-        "schema",
-        "estimated_size",
-        "n_chunks",
-        "is_empty",
-        "chunk_lengths",
-        "optimization_toggle",
-        "set_polars_options",
-        "collect_schema"
-    ]
-    already_implemented = set(dir(FlowFrame))
-    for method_name in delegate_methods:
-        if method_name not in already_implemented and hasattr(
-            pl.LazyFrame, method_name
-        ):
-            # Create a simple delegate method
-            def make_delegate(name):
-                def delegate_method(self, *args, **kwargs):
-                    return getattr(self.data, name)(*args, **kwargs)
-                # Set docstring and name
-                delegate_method.__doc__ = (
-                    f"See pl.LazyFrame.{name} for full documentation."
-                )
-                delegate_method.__name__ = name
-                return delegate_method
-            # Add the method to the class
-            setattr(FlowFrame, method_name, make_delegate(method_name))
-_add_delegated_methods()
-def sum(expr):
-    """Sum aggregation function."""
-    if isinstance(expr, str):
-        expr = col(expr)
-    return expr.sum()
-def mean(expr):
-    """Mean aggregation function."""
-    if isinstance(expr, str):
-        expr = col(expr)
-    return expr.mean()
-def min(expr):
-    """Min aggregation function."""
-    if isinstance(expr, str):
-        expr = col(expr)
-    return expr.min()
-def max(expr):
-    """Max aggregation function."""
-    if isinstance(expr, str):
-        expr = col(expr)
-    return expr.max()
-def count(expr):
-    """Count aggregation function."""
-    if isinstance(expr, str):
-        expr = col(expr)
-    return expr.count()
-def read_csv(
-        source: Union[str, Path, IO[bytes], bytes, List[Union[str, Path, IO[bytes], bytes]]],
-        *,
-        flow_graph: Optional[Any] = None, # Using Any for FlowGraph placeholder
-        separator: str = ',',
-        convert_to_absolute_path: bool = True,
-        description: Optional[str] = None,
-        has_header: bool = True,
-        new_columns: Optional[List[str]] = None,
-        comment_prefix: Optional[str] = None,
-        quote_char: Optional[str] = '"',
-        skip_rows: int = 0,
-        skip_lines: int = 0,
-        schema: Optional[SchemaDict] = None,
-        schema_overrides: Optional[Union[SchemaDict, Sequence[PolarsDataType]]] = None,
-        null_values: Optional[Union[str, List[str], Dict[str, str]]] = None,
-        missing_utf8_is_empty_string: bool = False,
-        ignore_errors: bool = False,
-        try_parse_dates: bool = False,
-        infer_schema: bool = True,
-        infer_schema_length: Optional[int] = 100,
-        n_rows: Optional[int] = None,
-        encoding: CsvEncoding = 'utf8',
-        low_memory: bool = False,
-        rechunk: bool = False,
-        storage_options: Optional[Dict[str, Any]] = None,
-        skip_rows_after_header: int = 0,
-        row_index_name: Optional[str] = None,
-        row_index_offset: int = 0,
-        eol_char: str = '\n',
-        raise_if_empty: bool = True,
-        truncate_ragged_lines: bool = False,
-        decimal_comma: bool = False,
-        glob: bool = True,
-        cache: bool = True,
-        with_column_names: Optional[Callable[[List[str]], List[str]]] = None,
-        **other_options: Any
-) -> FlowFrame:
-    """
-    Read a CSV file into a FlowFrame.
-    This function uses the native FlowGraph implementation when the parameters
-    fall within the supported range, and falls back to using Polars' scan_csv implementation
-    for more advanced features.
-    Args:
-        source: Path(s) to CSV file(s), or a file-like object.
-        flow_graph: if you want to add it to an existing graph
-        separator: Single byte character to use as separator in the file.
-        convert_to_absolute_path: If the path needs to be set to a fixed location
-        description: if you want to add a readable name in the frontend (advised)
-        # Polars.scan_csv aligned parameters
-        has_header: Indicate if the first row of the dataset is a header or not.
-        new_columns: Rename columns after selection.
-        comment_prefix: String that indicates a comment line if found at beginning of line.
-        quote_char: Character used for quoting. None to disable.
-        skip_rows: Start reading after this many rows.
-        skip_lines: Skip this many lines by newline char only.
-        schema: Schema to use when reading the CSV.
-        schema_overrides: Schema overrides for specific columns.
-        null_values: Values to interpret as null.
-        missing_utf8_is_empty_string: Treat missing utf8 values as empty strings.
-        ignore_errors: Try to keep reading lines if some parsing errors occur.
-        try_parse_dates: Try to automatically parse dates.
-        infer_schema: Boolean flag. If False, `infer_schema_length` for Polars is set to 0.
-        infer_schema_length: Number of rows to use for schema inference. Polars default is 100.
-        n_rows: Stop reading after this many rows.
-        encoding: Character encoding to use.
-        low_memory: Reduce memory usage at the cost of performance.
-        rechunk: Ensure data is in contiguous memory layout after parsing.
-        storage_options: Options for fsspec for cloud storage.
-        skip_rows_after_header: Skip rows after header.
-        row_index_name: Name of the row index column.
-        row_index_offset: Start value for the row index.
-        eol_char: End of line character.
-        raise_if_empty: Raise error if file is empty.
-        truncate_ragged_lines: Truncate lines with too many values.
-        decimal_comma: Parse floats with decimal comma.
-        glob: Use glob pattern for file path (if source is a string).
-        cache: Cache the result after reading (Polars default True).
-        with_column_names: Apply a function over the column names.
-        other_options: Any other options to pass to polars.scan_csv (e.g. retries, file_cache_ttl).
-    Returns:
-        A FlowFrame with the CSV data.
-    """
-    node_id = generate_node_id() # Assuming generate_node_id is defined
-    if flow_graph is None:
-        flow_graph = create_flow_graph() # Assuming create_flow_graph is defined
-    flow_id = flow_graph.flow_id
-    current_source_path_for_native = None
-    if isinstance(source, (str, os.PathLike)):
-        current_source_path_for_native = str(source)
-        if '~' in current_source_path_for_native:
-            current_source_path_for_native = os.path.expanduser(current_source_path_for_native)
-    elif isinstance(source, list) and all(isinstance(s, (str, os.PathLike)) for s in source):
-        current_source_path_for_native = str(source[0]) if source else None
-        if current_source_path_for_native and '~' in current_source_path_for_native:
-             current_source_path_for_native = os.path.expanduser(current_source_path_for_native)
-    elif isinstance(source, (io.BytesIO, io.StringIO)):
-        logger.warning("Read from bytes io from csv not supported, converting data to raw data")
-        return from_dict(pl.read_csv(source), flow_graph=flow_graph, description=description)
-    actual_infer_schema_length: Optional[int]
-    if not infer_schema:
-        actual_infer_schema_length = 0
-    else:
-        actual_infer_schema_length = infer_schema_length
-    can_use_native = (
-            current_source_path_for_native is not None and
-            comment_prefix is None and
-            skip_lines == 0 and
-            schema is None and
-            schema_overrides is None and
-            null_values is None and
-            not missing_utf8_is_empty_string and
-            not try_parse_dates and
-            n_rows is None and
-            not low_memory and
-            not rechunk and
-            storage_options is None and
-            skip_rows_after_header == 0 and
-            row_index_name is None and
-            row_index_offset == 0 and
-            eol_char == '\n' and
-            not decimal_comma and
-            new_columns is None and
-            glob is True
-    )
-    if can_use_native and current_source_path_for_native:
-        received_table = input_schema.ReceivedTable(
-            file_type='csv',
-            path=current_source_path_for_native,
-            name=Path(current_source_path_for_native).name,
-            delimiter=separator,
-            has_headers=has_header,
-            encoding=encoding,
-            starting_from_line=skip_rows,
-            quote_char=quote_char if quote_char is not None else '"',
-            infer_schema_length=actual_infer_schema_length if actual_infer_schema_length is not None else 10000,
-            truncate_ragged_lines=truncate_ragged_lines,
-            ignore_errors=ignore_errors,
-            row_delimiter=eol_char
-        )
-        if convert_to_absolute_path:
-            try:
-                received_table.set_absolute_filepath()
-                received_table.path = received_table.abs_file_path
-            except Exception as e:
-                print(f"Warning: Could not determine absolute path for {current_source_path_for_native}: {e}")
+    def __bool__(self):
+        """This special method determines how the object behaves in boolean contexts.
+        Returns True if the FlowFrame contains any data, False otherwise."""
+        return bool(self.data)
-        read_node_description = description or f"Read CSV from {Path(current_source_path_for_native).name}"
-        read_node = input_schema.NodeRead(
-            flow_id=flow_id,
-            node_id=node_id,
-            received_file=received_table,
-            pos_x=100,
-            pos_y=100,
-            is_setup=True,
-            description=read_node_description
-        )
-        flow_graph.add_read(read_node)
-        result_frame = FlowFrame(
-            data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
-            flow_graph=flow_graph,
-            node_id=node_id
-        )
-        return result_frame
-    else:
-        polars_source_arg = source
-        polars_code = _build_polars_code_args(
-            source=polars_source_arg,
-            separator=separator,
-            has_header=has_header,
-            new_columns=new_columns,
-            comment_prefix=comment_prefix,
-            quote_char=quote_char,
-            skip_rows=skip_rows,
-            skip_lines=skip_lines,
-            schema=schema,
-            schema_overrides=schema_overrides,
-            null_values=null_values,
-            missing_utf8_is_empty_string=missing_utf8_is_empty_string,
-            ignore_errors=ignore_errors,
-            try_parse_dates=try_parse_dates,
-            infer_schema_length=actual_infer_schema_length,
-            n_rows=n_rows,
-            encoding=encoding,
-            low_memory=low_memory,
-            rechunk=rechunk,
-            storage_options=storage_options,
-            skip_rows_after_header=skip_rows_after_header,
-            row_index_name=row_index_name,
-            row_index_offset=row_index_offset,
-            eol_char=eol_char,
-            raise_if_empty=raise_if_empty,
-            truncate_ragged_lines=truncate_ragged_lines,
-            decimal_comma=decimal_comma,
-            glob=glob,
-            cache=cache,
-            with_column_names=with_column_names,
-            **other_options
-        )
-        polars_code_node_description = description or "Read CSV with Polars scan_csv"
-        if isinstance(source, (str, os.PathLike)):
-            polars_code_node_description = description or f"Read CSV with Polars scan_csv from {Path(source).name}"
-        elif isinstance(source, list) and source and isinstance(source[0], (str, os.PathLike)):
-            polars_code_node_description = description or f"Read CSV with Polars scan_csv from {Path(source[0]).name} (and possibly others)"
-        # Assuming input_schema.NodePolarsCode, transform_schema.PolarsCodeInput are defined
-        polars_code_settings = input_schema.NodePolarsCode(
-            flow_id=flow_id,
-            node_id=node_id,
-            polars_code_input=transform_schema.PolarsCodeInput(polars_code=polars_code),
-            is_setup=True,
-            description=polars_code_node_description
-        )
-        flow_graph.add_polars_code(polars_code_settings)
-        return FlowFrame(
-            data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
-            flow_graph=flow_graph,
-            node_id=node_id,
-        )
-def _build_polars_code_args(
-    source: Union[str, Path, IO[bytes], bytes, List[Union[str, Path, IO[bytes], bytes]]],
-    separator: str,
-    has_header: bool,
-    new_columns: Optional[List[str]],
-    comment_prefix: Optional[str],
-    quote_char: Optional[str],
-    skip_rows: int,
-    skip_lines: int,
-    schema: Optional[SchemaDict],
-    schema_overrides: Optional[Union[SchemaDict, Sequence[PolarsDataType]]],
-    null_values: Optional[Union[str, List[str], Dict[str, str]]],
-    missing_utf8_is_empty_string: bool,
-    ignore_errors: bool,
-    try_parse_dates: bool,
-    infer_schema_length: Optional[int],
-    n_rows: Optional[int],
-    encoding: CsvEncoding,
-    low_memory: bool,
-    rechunk: bool,
-    storage_options: Optional[Dict[str, Any]],
-    skip_rows_after_header: int,
-    row_index_name: Optional[str],
-    row_index_offset: int,
-    eol_char: str,
-    raise_if_empty: bool,
-    truncate_ragged_lines: bool,
-    decimal_comma: bool,
-    glob: bool,
-    cache: bool,
-    with_column_names: Optional[Callable[[List[str]], List[str]]],
-    **other_options: Any
-) -> str:
-    source_repr: str
-    if isinstance(source, (str, Path)):
-        source_repr = repr(str(source))
-    elif isinstance(source, list):
-        source_repr = repr([str(p) for p in source])
-    elif isinstance(source, bytes):
-        source_repr = "source_bytes_obj"
-    elif hasattr(source, 'read'):
-        source_repr = "source_file_like_obj"
-    else:
-        source_repr = repr(source)
-    param_mapping = {
-        'has_header': (True, lambda x: str(x)),
-        'separator': (',', lambda x: repr(str(x))),
-        'comment_prefix': (None, lambda x: repr(str(x)) if x is not None else 'None'),
-        'quote_char': ('"', lambda x: repr(str(x)) if x is not None else 'None'),
-        'skip_rows': (0, str),
-        'skip_lines': (0, str),
-        'schema': (None, lambda x: repr(x) if x is not None else 'None'),
-        'schema_overrides': (None, lambda x: repr(x) if x is not None else 'None'),
-        'null_values': (None, lambda x: repr(x) if x is not None else 'None'),
-        'missing_utf8_is_empty_string': (False, str),
-        'ignore_errors': (False, str),
-        'cache': (True, str),
-        'with_column_names': (None, lambda x: repr(x) if x is not None else 'None'),
-        'infer_schema_length': (100, lambda x: str(x) if x is not None else 'None'),
-        'n_rows': (None, lambda x: str(x) if x is not None else 'None'),
-        'encoding': ('utf8', lambda x: repr(str(x))),
-        'low_memory': (False, str),
-        'rechunk': (False, str),
-        'skip_rows_after_header': (0, str),
-        'row_index_name': (None, lambda x: repr(str(x)) if x is not None else 'None'),
-        'row_index_offset': (0, str),
-        'try_parse_dates': (False, str),
-        'eol_char': ('\n', lambda x: repr(str(x))),
-        'new_columns': (None, lambda x: repr(x) if x is not None else 'None'),
-        'raise_if_empty': (True, str),
-        'truncate_ragged_lines': (False, str),
-        'decimal_comma': (False, str),
-        'glob': (True, str),
-        'storage_options': (None, lambda x: repr(x) if x is not None else 'None'),
-    }
-    all_vars = locals()
-    kwargs_list = []
-    for param_name_key, (default_value, format_func) in param_mapping.items():
-        value = all_vars.get(param_name_key)
-        formatted_value = format_func(value)
-        kwargs_list.append(f"{param_name_key}={formatted_value}")
-    if other_options:
-        for k, v in other_options.items():
-            kwargs_list.append(f"{k}={repr(v)}")
-    kwargs_str = ",\n    ".join(kwargs_list)
-    if kwargs_str:
-        polars_code = f"output_df = pl.scan_csv(\n    {source_repr},\n    {kwargs_str}\n)"
-    else:
-        polars_code = f"output_df = pl.scan_csv({source_repr})"
-    return polars_code
-def read_parquet(file_path, *, flow_graph: FlowGraph = None, description: str = None,
-                 convert_to_absolute_path: bool = True, **options) -> FlowFrame:
-    """
-    Read a Parquet file into a FlowFrame.
-    Args:
-        file_path: Path to Parquet file
-        flow_graph: if you want to add it to an existing graph
-        description: if you want to add a readable name in the frontend (advised)
-        convert_to_absolute_path: If the path needs to be set to a fixed location
-        **options: Options for polars.read_parquet
-    Returns:
-        A FlowFrame with the Parquet data
-    """
-    if '~' in file_path:
-        file_path = os.path.expanduser(file_path)
-    node_id = generate_node_id()
-    if flow_graph is None:
-        flow_graph = create_flow_graph()
-    flow_id = flow_graph.flow_id
-    received_table = input_schema.ReceivedTable(
-        file_type='parquet',
-        path=file_path,
-        name=Path(file_path).name,
-    )
-    if convert_to_absolute_path:
-        received_table.path = received_table.abs_file_path
-    read_node = input_schema.NodeRead(
-        flow_id=flow_id,
-        node_id=node_id,
-        received_file=received_table,
-        pos_x=100,
-        pos_y=100,
-        is_setup=True,
-        description=description
-    )
-    flow_graph.add_read(read_node)
-    return FlowFrame(
-        data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
-        flow_graph=flow_graph,
-        node_id=node_id
-    )
-def from_dict(data, *, flow_graph: FlowGraph = None, description: str = None) -> FlowFrame:
-    """
-    Create a FlowFrame from a dictionary or list of dictionaries.
-    Args:
-        data: Dictionary of lists or list of dictionaries
-        flow_graph: if you want to add it to an existing graph
-        description: if you want to add a readable name in the frontend (advised)
-    Returns:
-        A FlowFrame with the data
-    """
-    # Create new node ID
-    node_id = generate_node_id()
-    if not flow_graph:
-        flow_graph = create_flow_graph()
-    flow_id = flow_graph.flow_id
-    input_node = input_schema.NodeManualInput(
-        flow_id=flow_id,
-        node_id=node_id,
-        raw_data=FlowDataEngine(data).to_pylist(),
-        pos_x=100,
-        pos_y=100,
-        is_setup=True,
-        description=description
-    )
-    # Add to graph
-    flow_graph.add_manual_input(input_node)
-    # Return new frame
-    return FlowFrame(
-        data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
-        flow_graph=flow_graph,
-        node_id=node_id
-    )
-def concat(frames: List['FlowFrame'],
-                  how: str = 'vertical',
-                  rechunk: bool = False,
-                  parallel: bool = True,
-                  description: str = None) -> 'FlowFrame':
-    """
-    Concatenate multiple FlowFrames into one.
-    Parameters
-    ----------
-    frames : List[FlowFrame]
-        List of FlowFrames to concatenate
-    how : str, default 'vertical'
-        How to combine the FlowFrames (see concat method documentation)
-    rechunk : bool, default False
-        Whether to ensure contiguous memory in result
-    parallel : bool, default True
-        Whether to use parallel processing for the operation
-    description : str, optional
-        Description of this operation
-    Returns
-    -------
-    FlowFrame
-        A new FlowFrame with the concatenated data
-    """
-    if not frames:
-        raise ValueError("No frames provided to concat_frames")
-    if len(frames) == 1:
-        return frames[0]
-    # Use first frame's concat method with remaining frames
-    first_frame = frames[0]
-    remaining_frames = frames[1:]
-    return first_frame.concat(remaining_frames, how=how,
-                              rechunk=rechunk, parallel=parallel,
-                              description=description)
-def scan_csv(
-        source: Union[str, Path, IO[bytes], bytes, List[Union[str, Path, IO[bytes], bytes]]],
-        *,
-        flow_graph: Optional[Any] = None,  # Using Any for FlowGraph placeholder
-        separator: str = ',',
-        convert_to_absolute_path: bool = True,
-        description: Optional[str] = None,
-        has_header: bool = True,
-        new_columns: Optional[List[str]] = None,
-        comment_prefix: Optional[str] = None,
-        quote_char: Optional[str] = '"',
-        skip_rows: int = 0,
-        skip_lines: int = 0,
-        schema: Optional[SchemaDict] = None,
-        schema_overrides: Optional[Union[SchemaDict, Sequence[PolarsDataType]]] = None,
-        null_values: Optional[Union[str, List[str], Dict[str, str]]] = None,
-        missing_utf8_is_empty_string: bool = False,
-        ignore_errors: bool = False,
-        try_parse_dates: bool = False,
-        infer_schema: bool = True,
-        infer_schema_length: Optional[int] = 100,
-        n_rows: Optional[int] = None,
-        encoding: CsvEncoding = 'utf8',
-        low_memory: bool = False,
-        rechunk: bool = False,
-        storage_options: Optional[Dict[str, Any]] = None,
-        skip_rows_after_header: int = 0,
-        row_index_name: Optional[str] = None,
-        row_index_offset: int = 0,
-        eol_char: str = '\n',
-        raise_if_empty: bool = True,
-        truncate_ragged_lines: bool = False,
-        decimal_comma: bool = False,
-        glob: bool = True,
-        cache: bool = True,
-        with_column_names: Optional[Callable[[List[str]], List[str]]] = None,
-        **other_options: Any
-) -> FlowFrame:
-    """
-    Scan a CSV file into a FlowFrame. This function is an alias for read_csv.
+    @staticmethod
+    def _comparison_error(operator: str) -> pl.lazyframe.frame.NoReturn:
+        msg = f'"{operator!r}" comparison not supported for LazyFrame objects'
+        raise TypeError(msg)
-    This method is the same as read_csv but is provided for compatibility with
-    the polars API where scan_csv returns a LazyFrame.
+    def __eq__(self, other: object) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error("==")
-    See read_csv for full documentation.
-    """
-    return read_csv(
-        source=source,
-        flow_graph=flow_graph,
-        separator=separator,
-        convert_to_absolute_path=convert_to_absolute_path,
-        description=description,
-        has_header=has_header,
-        new_columns=new_columns,
-        comment_prefix=comment_prefix,
-        quote_char=quote_char,
-        skip_rows=skip_rows,
-        skip_lines=skip_lines,
-        schema=schema,
-        schema_overrides=schema_overrides,
-        null_values=null_values,
-        missing_utf8_is_empty_string=missing_utf8_is_empty_string,
-        ignore_errors=ignore_errors,
-        try_parse_dates=try_parse_dates,
-        infer_schema=infer_schema,
-        infer_schema_length=infer_schema_length,
-        n_rows=n_rows,
-        encoding=encoding,
-        low_memory=low_memory,
-        rechunk=rechunk,
-        storage_options=storage_options,
-        skip_rows_after_header=skip_rows_after_header,
-        row_index_name=row_index_name,
-        row_index_offset=row_index_offset,
-        eol_char=eol_char,
-        raise_if_empty=raise_if_empty,
-        truncate_ragged_lines=truncate_ragged_lines,
-        decimal_comma=decimal_comma,
-        glob=glob,
-        cache=cache,
-        with_column_names=with_column_names,
-        **other_options
-    )
+    def __ne__(self, other: object) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error("!=")
+    def __gt__(self, other: Any) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error(">")
-def scan_parquet(
-        file_path,
-        *,
-        flow_graph: FlowGraph = None,
-        description: str = None,
-        convert_to_absolute_path: bool = True,
-        **options
-) -> FlowFrame:
-    """
-    Scan a Parquet file into a FlowFrame. This function is an alias for read_parquet.
+    def __lt__(self, other: Any) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error("<")
-    This method is the same as read_parquet but is provided for compatibility with
-    the polars API where scan_parquet returns a LazyFrame.
+    def __ge__(self, other: Any) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error(">=")
-    See read_parquet for full documentation.
-    """
-    return read_parquet(
-        file_path=file_path,
-        flow_graph=flow_graph,
-        description=description,
-        convert_to_absolute_path=convert_to_absolute_path,
-        **options
-    )
+    def __le__(self, other: Any) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error("<=")

Flowfile 0.3.2__py3-none-any.whl → 0.3.3.1__py3-none-any.whl

Flowfile 0.3.2py3-none-any.whl → 0.3.3.1py3-none-any.whl