PyPI - Flowfile - Versions diffs - 0.3.1.2__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend - Supply Chain Defender

Flowfile 0.3.1.2py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of Flowfile might be problematic. Click here for more details.

Files changed (100) hide show

flowfile_frame/flow_frame.py CHANGED Viewed

@@ -1,14 +1,17 @@
-import uuid
+import inspect
 import os
-from typing import Any, Iterable, List, Literal, Optional, Tuple, Union
-from pathlib import Path
+from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, Dict, Callable, get_args, get_origin
 import re
 import polars as pl
-from polars._typing import FrameInitTypes, SchemaDefinition, SchemaDict, Orientation
-# Assume these imports are correct from your original context
+from flowfile_frame.lazy_methods import add_lazyframe_methods
+from polars._typing import (FrameInitTypes, SchemaDefinition, SchemaDict, Orientation)
+from collections.abc import Iterator
 from flowfile_core.flowfile.FlowfileFlow import FlowGraph, add_connection
+from flowfile_core.flowfile.flow_graph_utils import combine_flow_graphs_with_mapping
 from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
 from flowfile_core.flowfile.flow_node.flow_node import FlowNode
 from flowfile_core.schemas import input_schema, transform_schema
@@ -16,12 +19,36 @@ from flowfile_core.schemas import input_schema, transform_schema
 from flowfile_frame.expr import Expr, Column, lit, col
 from flowfile_frame.selectors import Selector
 from flowfile_frame.group_frame import GroupByFrame
-from flowfile_frame.utils import _parse_inputs_as_iterable, create_flow_graph
+from flowfile_frame.utils import (_parse_inputs_as_iterable, create_flow_graph, stringify_values,
+                                  ensure_inputs_as_iterable)
 from flowfile_frame.join import _normalize_columns_to_list, _create_join_mappings
+from flowfile_frame.utils import _check_if_convertible_to_code
+from flowfile_frame.config import logger
 node_id_counter = 0
+def can_be_expr(param: inspect.Parameter) -> bool:
+    """Check if a parameter can be of type pl.Expr"""
+    if param.annotation == inspect.Parameter.empty:
+        return False
+    # Check direct match or in Union args
+    types = get_args(param.annotation) if get_origin(param.annotation) is Union else [param.annotation]
+    return any(t in (pl.Expr, pl.expr.expr.Expr) for t in types)
+def _contains_lambda_pattern(text: str) -> bool:
+    return "<lambda> at" in text
+def get_method_name_from_code(code: str) -> str | None:
+    split_code = code.split("input_df.")
+    if len(split_code) > 1:
+        return split_code[1].split("(")[0]
 def _to_string_val(v) -> str:
     if isinstance(v, str):
         return f"'{v}'"
@@ -29,12 +56,72 @@ def _to_string_val(v) -> str:
         return v
+def _extract_expr_parts(expr_obj) -> tuple[str, str]:
+    """
+    Extract the pure expression string and any raw definitions (including function sources) from an Expr object.
+    Parameters
+    ----------
+    expr_obj : Expr
+        The expression object to extract parts from
+    Returns
+    -------
+    tuple[str, str]
+        A tuple of (pure_expr_str, raw_definitions_str)
+    """
+    if not isinstance(expr_obj, Expr):
+        # If it's not an Expr, just return its string representation
+        return str(expr_obj), ""
+    # Get the basic representation
+    pure_expr_str = expr_obj._repr_str
+    # Collect all definitions (function sources)
+    raw_definitions = []
+    # Add function sources if any
+    if hasattr(expr_obj, '_function_sources') and expr_obj._function_sources:
+        # Remove duplicates while preserving order
+        unique_sources = []
+        seen = set()
+        for source in expr_obj._function_sources:
+            if source not in seen:
+                seen.add(source)
+                unique_sources.append(source)
+        if unique_sources:
+            raw_definitions.extend(unique_sources)
+    # Join all definitions
+    raw_defs_str = "\n\n".join(raw_definitions) if raw_definitions else ""
+    return pure_expr_str, raw_defs_str
+def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr | None = None,
+                                group_expr: pl.Expr | None = None) -> None:
+    if method_name is None:
+        raise NotImplemented("Cannot create a polars lambda expression without the method")
+    if polars_expr is None:
+        raise NotImplemented("Cannot create polars expressions with lambda function")
+    method_ref = getattr(pl.LazyFrame, method_name)
+    if method_ref is None:
+        raise ModuleNotFoundError(f"Could not find the method {method_name} in polars lazyframe")
+    if method_name == 'group_by':
+        if group_expr is None:
+            raise NotImplemented("Cannot create a polars lambda expression without the groupby expression")
+        if not all(isinstance(ge, pl.Expr) for ge in group_expr):
+            raise NotImplemented("Cannot create a polars lambda expression without the groupby expression")
 def generate_node_id() -> int:
     global node_id_counter
     node_id_counter += 1
     return node_id_counter
+@add_lazyframe_methods
 class FlowFrame:
     """Main class that wraps FlowDataEngine and maintains the ETL graph."""
     flow_graph: FlowGraph
@@ -89,13 +176,11 @@ class FlowFrame:
         # Extract flow-specific parameters
         node_id = node_id or generate_node_id()
         description = "Data imported from Python object"
         # Create a new flow graph if none is provided
         if flow_graph is None:
             flow_graph = create_flow_graph()
         flow_id = flow_graph.flow_id
         # Convert data to a polars DataFrame/LazyFrame
         try:
             # Use polars to convert from various types
@@ -110,25 +195,23 @@ class FlowFrame:
             )
             pl_data = pl_df.lazy()
         except Exception as e:
-            raise ValueError(f"Could not convert data to a polars DataFrame: {e}")
+            raise ValueError(f"Could not dconvert data to a polars DataFrame: {e}")
         # Create a FlowDataEngine to get data in the right format for manual input
         flow_table = FlowDataEngine(raw_data=pl_data)
+        raw_data_format = input_schema.RawData(data=list(flow_table.to_dict().values()),
+                                               columns=[c.get_minimal_field_info() for c in flow_table.schema])
         # Create a manual input node
         input_node = input_schema.NodeManualInput(
             flow_id=flow_id,
             node_id=node_id,
-            raw_data=flow_table.to_pylist(),  # Convert to list of dicts
+            raw_data_format=raw_data_format,
             pos_x=100,
             pos_y=100,
             is_setup=True,
             description=description,
         )
         # Add to graph
         flow_graph.add_manual_input(input_node)
         # Return new frame
         return FlowFrame(
             data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
@@ -152,7 +235,6 @@ class FlowFrame:
         parent_node_id=None,
     ):
         """Create a new FlowFrame instance."""
         # If data is not a LazyFrame, use the factory method
         if data is not None and not isinstance(data, pl.LazyFrame):
             return cls.create_from_any_type(
@@ -168,7 +250,6 @@ class FlowFrame:
                 parent_node_id=parent_node_id,
             )
-        # Otherwise create the instance normally
         instance = super().__new__(cls)
         return instance
@@ -187,7 +268,6 @@ class FlowFrame:
         parent_node_id=None,
     ):
         """Initialize the FlowFrame with data and graph references."""
         if data is None:
             data = pl.LazyFrame()
         if not isinstance(data, pl.LazyFrame):
@@ -219,205 +299,235 @@ class FlowFrame:
     def _create_child_frame(self, new_node_id):
         """Helper method to create a new FlowFrame that's a child of this one"""
         self._add_connection(self.node_id, new_node_id)
-        return FlowFrame(
-            data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
-            flow_graph=self.flow_graph,
-            node_id=new_node_id,
-            parent_node_id=self.node_id,
-        )
+        try:
+            return FlowFrame(
+                data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
+                flow_graph=self.flow_graph,
+                node_id=new_node_id,
+                parent_node_id=self.node_id,
+            )
+        except AttributeError:
+            raise ValueError('Could not execute the function')
-    def sort(
-        self,
-        by: List[Expr | str] | Expr | str,
-        *more_by,
-        descending: bool | List[bool] = False,
-        nulls_last: bool = False,
-        multithreaded: bool = True,
-        maintain_order: bool = False,
-        description: str = None,
-    ):
+    @staticmethod
+    def _generate_sort_polars_code(
+            pure_sort_expr_strs: List[str],
+            descending_values: List[bool],
+            nulls_last_values: List[bool],
+            multithreaded: bool,
+            maintain_order: bool,
+    ) -> str:
         """
-        Sort the dataframe by the given columns.
+        Generates the `input_df.sort(...)` Polars code string using pure expression strings.
+        """
+        kwargs_for_code: Dict[str, Any] = {}
+        if any(descending_values):
+            kwargs_for_code["descending"] = descending_values[0] if len(descending_values) == 1 else descending_values
+        if any(nulls_last_values):
+            kwargs_for_code["nulls_last"] = nulls_last_values[0] if len(nulls_last_values) == 1 else nulls_last_values
+        if not multithreaded:
+            kwargs_for_code["multithreaded"] = multithreaded
+        if maintain_order:
+            kwargs_for_code["maintain_order"] = maintain_order
-        Parameters:
-        -----------
-        by : Expr, str, or list of Expr/str
-            Column(s) to sort by. Accepts expression input. Strings are parsed as column names.
-        *more_by : Expr or str
-            Additional columns to sort by, specified as positional arguments.
-        descending : bool or list of bool, default False
-            Sort in descending order. When sorting by multiple columns, can be specified per column.
-        nulls_last : bool or list of bool, default False
-            Place null values last; can specify a single boolean or a sequence for per-column control.
-        multithreaded : bool, default True
-            Sort using multiple threads.
-        maintain_order : bool, default False
-            Whether the order should be maintained if elements are equal.
-        description : str, optional
-            Description of this operation for the ETL graph.
+        kwargs_str_for_code = ", ".join(f"{k}={repr(v)}" for k, v in kwargs_for_code.items())
-        Returns:
-        --------
-        FlowFrame
-            A new FlowFrame with sorted data.
+        by_arg_for_code = pure_sort_expr_strs[0] if len(
+            pure_sort_expr_strs) == 1 else f"[{', '.join(pure_sort_expr_strs)}]"
+        return f"input_df.sort({by_arg_for_code}{', ' + kwargs_str_for_code if kwargs_str_for_code else ''})"
+    def sort(
+            self,
+            by: Union[List[Union[Expr, str]], Expr, str],
+            *more_by: Union[Expr, str],
+            descending: Union[bool, List[bool]] = False,
+            nulls_last: Union[bool, List[bool]] = False,
+            multithreaded: bool = True,
+            maintain_order: bool = False,
+            description: Optional[str] = None,
+    ) -> "FlowFrame":
+        """
+        Sort the dataframe by the given columns.
         """
-        by = list(_parse_inputs_as_iterable((by,)))
+        initial_by_args = list(_parse_inputs_as_iterable((by,)))
         new_node_id = generate_node_id()
-        sort_expressions = by
+        sort_expressions_input: list = initial_by_args
         if more_by:
-            sort_expressions.extend(more_by)
+            sort_expressions_input.extend(list(_parse_inputs_as_iterable(more_by)))
-        # Determine if we need to use polars code fallback
-        needs_polars_code = False
+        all_processed_expr_objects: List[Expr] = []
+        pure_polars_expr_strings_for_sort: List[str] = []
+        collected_raw_definitions: List[str] = []
+        column_names_for_native_node: List[str] = []
-        # Check for any expressions that are not simple columns
-        for expr in sort_expressions:
-            if not isinstance(expr, (str, Column)) or (
-                isinstance(expr, Column) and expr._select_input.is_altered
-            ):
-                needs_polars_code = True
-                break
+        use_polars_code_path = False
-        # Also need polars code if we're using maintain_order or multithreaded params
         if maintain_order or not multithreaded:
-            needs_polars_code = True
-        # Standardize descending parameter
-        if isinstance(descending, (list, tuple)):
-            # Ensure descending list has the same length as sort_expressions
-            if len(descending) != len(sort_expressions):
-                raise ValueError(
-                    f"Length of descending ({len(descending)}) must match number of sort columns ({len(sort_expressions)})"
-                )
-            descending_values = descending
-        else:
-            descending_values = [descending] * len(sort_expressions)
-        # Standardize nulls_last parameter
-        if isinstance(nulls_last, (list, tuple)):
-            if len(nulls_last) != len(sort_expressions):
-                raise ValueError(
-                    f"Length of nulls_last ({len(nulls_last)}) must match number of sort columns ({len(sort_expressions)})"
-                )
-            nulls_last_values = nulls_last
-            # Any non-default nulls_last needs polars code
-            if any(val is not False for val in nulls_last_values):
-                needs_polars_code = True
-        else:
-            nulls_last_values = [nulls_last] * len(sort_expressions)
-            # Non-default nulls_last needs polars code
-            if nulls_last:
-                needs_polars_code = True
-        if needs_polars_code:
-            # Generate polars code for complex cases
-            code = self._generate_sort_polars_code(
-                sort_expressions,
-                descending_values,
-                nulls_last_values,
-                multithreaded,
-                maintain_order,
-            )
-            self._add_polars_code(new_node_id, code, description)
-        else:
-            # Use native implementation for simple cases
-            sort_inputs = []
-            for i, expr in enumerate(sort_expressions):
-                # Convert expr to column name
-                if isinstance(expr, Column):
-                    column_name = expr.name
-                elif isinstance(expr, str):
-                    column_name = expr
+            use_polars_code_path = True
+        is_nulls_last_list = isinstance(nulls_last, (list, tuple))
+        if is_nulls_last_list and any(val for val in nulls_last if val is not False):
+            use_polars_code_path = True
+        elif not is_nulls_last_list and nulls_last is not False:
+            use_polars_code_path = True
+        for expr_input in sort_expressions_input:
+            current_expr_obj: Expr
+            is_simple_col_for_native = False
+            if isinstance(expr_input, str):
+                current_expr_obj = col(expr_input)
+                column_names_for_native_node.append(expr_input)
+                is_simple_col_for_native = True
+            elif isinstance(expr_input, Column):
+                current_expr_obj = expr_input
+                # Type ignore below due to simplified Column stub
+                if not expr_input._select_input.is_altered:  # type: ignore
+                    column_names_for_native_node.append(expr_input.column_name)  # type: ignore
+                    is_simple_col_for_native = True
                 else:
-                    column_name = str(expr)
+                    use_polars_code_path = True  # Altered Column implies complex expression
+            elif isinstance(expr_input, Expr):
+                current_expr_obj = expr_input
+                use_polars_code_path = True  # General Expr implies complex expression
+            else:  # Convert other types to lit
+                current_expr_obj = lit(expr_input)
+                use_polars_code_path = True  # Literal might be part of a complex sort for Polars code
+            all_processed_expr_objects.append(current_expr_obj)
+            pure_expr_str, raw_defs_str = _extract_expr_parts(current_expr_obj)
+            pure_polars_expr_strings_for_sort.append(pure_expr_str)
+            if raw_defs_str:
+                if raw_defs_str not in collected_raw_definitions:
+                    collected_raw_definitions.append(raw_defs_str)
+                use_polars_code_path = True
+            if not is_simple_col_for_native:  # If it wasn't a simple string or unaltered Column
+                use_polars_code_path = True
+        desc_values = list(descending) if isinstance(descending, list) else [descending] * len(
+            all_processed_expr_objects)
+        null_last_values = list(nulls_last) if isinstance(nulls_last, list) else [nulls_last] * len(
+            all_processed_expr_objects)
+        if len(desc_values) != len(all_processed_expr_objects):
+            raise ValueError("Length of 'descending' does not match the number of sort expressions.")
+        if len(null_last_values) != len(all_processed_expr_objects):
+            raise ValueError("Length of 'nulls_last' does not match the number of sort expressions.")
+        if use_polars_code_path:
+            polars_operation_code = self._generate_sort_polars_code(
+                pure_polars_expr_strings_for_sort, desc_values, null_last_values, multithreaded, maintain_order
+            )
-                # Create SortByInput with appropriate settings
-                sort_inputs.append(
-                    transform_schema.SortByInput(
-                        column=column_name,
-                        how="desc" if descending_values[i] else "asc",
-                    )
+            final_code_for_node: str
+            if collected_raw_definitions:
+                unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))  # Order-preserving unique
+                definitions_section = "\n\n".join(unique_raw_definitions)
+                final_code_for_node = definitions_section + \
+                                      "\#─────SPLIT─────\n\n" + \
+                                      f"output_df = {polars_operation_code}"
+            else:
+                final_code_for_node = polars_operation_code
+            pl_expressions_for_fallback = [e.expr for e in all_processed_expr_objects if
+                                           hasattr(e, 'expr') and e.expr is not None]
+            kwargs_for_fallback = {
+                "descending": desc_values[0] if len(desc_values) == 1 else desc_values,
+                "nulls_last": null_last_values[0] if len(null_last_values) == 1 else null_last_values,
+                "multithreaded": multithreaded, "maintain_order": maintain_order}
+            self._add_polars_code(new_node_id, final_code_for_node, description, method_name="sort",
+                                  convertable_to_code=_check_if_convertible_to_code(all_processed_expr_objects),
+                                  polars_expr=pl_expressions_for_fallback,
+                                  kwargs_expr=kwargs_for_fallback)
+        else:
+            sort_inputs_for_node = []
+            for i, col_name_for_native in enumerate(column_names_for_native_node):
+                sort_inputs_for_node.append(
+                    transform_schema.SortByInput(column=col_name_for_native, how="desc" if desc_values[i] else "asc")
+                    # type: ignore
                 )
             sort_settings = input_schema.NodeSort(
-                flow_id=self.flow_graph.flow_id,
-                node_id=new_node_id,
-                sort_input=sort_inputs,
-                pos_x=200,
-                pos_y=150,
-                is_setup=True,
-                depending_on_id=self.node_id,
-                description=description
-                or f"Sort by {', '.join(str(e) for e in sort_expressions)}",
-            )
+                flow_id=self.flow_graph.flow_id, node_id=new_node_id, sort_input=sort_inputs_for_node,  # type: ignore
+                pos_x=200, pos_y=150, is_setup=True, depending_on_id=self.node_id,
+                description=description or f"Sort by {', '.join(column_names_for_native_node)}")
             self.flow_graph.add_sort(sort_settings)
         return self._create_child_frame(new_node_id)
-    def _generate_sort_polars_code(
-        self,
-        sort_expressions: list,
-        descending_values: list,
-        nulls_last_values: list,
-        multithreaded: bool,
-        maintain_order: bool,
-    ) -> str:
-        """Generate Polars code for sort operations that need fallback."""
-        # Format expressions for code
-        expr_strs = []
-        for expr in sort_expressions:
-            if isinstance(expr, (Expr, Column)):
-                expr_strs.append(str(expr))
-            elif isinstance(expr, str):
-                expr_strs.append(f"'{expr}'")
-            else:
-                expr_strs.append(str(expr))
-        # Format parameters
-        if len(sort_expressions) == 1:
-            by_arg = expr_strs[0]
-        else:
-            by_arg = f"[{', '.join(expr_strs)}]"
-        # Build kwargs
-        kwargs = {}
-        # Only add descending if it's non-default
-        if any(d for d in descending_values):
-            if len(descending_values) == 1:
-                kwargs["descending"] = descending_values[0]
-            else:
-                kwargs["descending"] = descending_values
-        # Only add nulls_last if it's non-default
-        if any(nl for nl in nulls_last_values):
-            if len(nulls_last_values) == 1:
-                kwargs["nulls_last"] = nulls_last_values[0]
+    def _add_polars_code(self, new_node_id: int, code: str, description: str = None,
+                         depending_on_ids: List[str] | None = None, convertable_to_code: bool = True,
+                         method_name: str = None, polars_expr: Expr | List[Expr] | None = None,
+                         group_expr: Expr | List[Expr] | None = None,
+                         kwargs_expr: Dict | None = None,
+                         group_kwargs: Dict | None = None, ):
+        polars_code_for_node: str
+        if not convertable_to_code or _contains_lambda_pattern(code):
+            effective_method_name = get_method_name_from_code(
+                code) if method_name is None and "input_df." in code else method_name
+            pl_expr_list = ensure_inputs_as_iterable(polars_expr) if polars_expr is not None else []
+            group_expr_list = ensure_inputs_as_iterable(group_expr) if group_expr is not None else []
+            _check_ok_for_serialization(polars_expr=pl_expr_list, method_name=effective_method_name,
+                                        group_expr=group_expr_list)
+            current_kwargs_expr = kwargs_expr if kwargs_expr is not None else {}
+            result_lazyframe_or_expr: Any
+            if effective_method_name == "group_by":
+                group_kwargs = {} if group_kwargs is None else group_kwargs
+                if not group_expr_list:
+                    raise ValueError("group_expr is required for group_by method in serialization fallback.")
+                target_obj = getattr(self.data, effective_method_name)(*group_expr_list, **group_kwargs)
+                if not pl_expr_list:
+                    raise ValueError(
+                        "Aggregation expressions (polars_expr) are required for group_by().agg() in serialization fallback.")
+                result_lazyframe_or_expr = target_obj.agg(*pl_expr_list, **current_kwargs_expr)
+            elif effective_method_name:
+                result_lazyframe_or_expr = getattr(self.data, effective_method_name)(*pl_expr_list,
+                                                                                     **current_kwargs_expr)
             else:
-                kwargs["nulls_last"] = nulls_last_values
-        # Add other parameters if they're non-default
-        if not multithreaded:
-            kwargs["multithreaded"] = multithreaded
-        if maintain_order:
-            kwargs["maintain_order"] = maintain_order
-        # Build kwargs string
-        kwargs_str = ", ".join(f"{k}={v}" for k, v in kwargs.items())
-        # Build final code
-        if kwargs_str:
-            return f"input_df.sort({by_arg}, {kwargs_str})"
+                raise ValueError(
+                    "Cannot execute Polars operation: method_name is missing and could not be inferred for serialization fallback.")
+            try:
+                if isinstance(result_lazyframe_or_expr, pl.LazyFrame):
+                    serialized_value_for_code = result_lazyframe_or_expr.serialize(format='json')
+                    polars_code_for_node = "\n".join([
+                        f"serialized_value = r'''{serialized_value_for_code}'''",
+                        "buffer = BytesIO(serialized_value.encode('utf-8'))",
+                        "output_df = pl.LazyFrame.deserialize(buffer, format='json')",
+                    ])
+                    logger.warning(
+                        f"Transformation '{effective_method_name}' uses non-serializable elements. "
+                        "Falling back to serializing the resulting Polars LazyFrame object."
+                        "This will result in a breaking graph when using the the ui."
+                    )
+                else:
+                    logger.error(
+                        f"Fallback for non-convertible code for method '{effective_method_name}' "
+                        f"resulted in a '{type(result_lazyframe_or_expr).__name__}' instead of a Polars LazyFrame. "
+                        "This type cannot be persisted as a LazyFrame node via this fallback."
+                    )
+                    return FlowFrame(result_lazyframe_or_expr, flow_graph=self.flow_graph, node_id=new_node_id)
+            except Exception as e:
+                logger.warning(
+                    f"Critical error: Could not serialize the result of operation '{effective_method_name}' "
+                    f"during fallback for non-convertible code. Error: {e}."
+                    "When using a lambda function, consider defining the function first"
+                )
+                return FlowFrame(result_lazyframe_or_expr, flow_graph=self.flow_graph, node_id=new_node_id)
         else:
-            return f"input_df.sort({by_arg})"
-    def _add_polars_code(self, new_node_id: int, code: str, description: str = None,
-                         depending_on_ids: List[str] | None = None):
+            polars_code_for_node = code
         polars_code_settings = input_schema.NodePolarsCode(
             flow_id=self.flow_graph.flow_id,
             node_id=new_node_id,
-            polars_code_input=transform_schema.PolarsCodeInput(polars_code=code),
+            polars_code_input=transform_schema.PolarsCodeInput(polars_code=polars_code_for_node),
             is_setup=True,
             depending_on_ids=depending_on_ids if depending_on_ids is not None else [self.node_id],
             description=description,
@@ -458,14 +568,17 @@ class FlowFrame:
         validate : {"1:1", "1:m", "m:1", "m:m"}, optional
             Validate join relationship.
         nulls_equal:
-            Join on null values. By default null values will never produce matches.
+            Join on null values. By default, null values will never produce matches.
         coalesce:
             None: -> join specific.
             True: -> Always coalesce join columns.
             False: -> Never coalesce join columns.
         maintain_order:
-            Which DataFrame row order to preserve, if any. Do not rely on any observed ordering without explicitly setting this parameter, as your code may break in a future release. Not specifying any ordering can improve performance Supported for inner, left, right and full joins
-            None: No specific ordering is desired. The ordering might differ across Polars versions or even between different runs.
+            Which DataFrame row order to preserve, if any. Do not rely on any observed ordering without explicitly
+            setting this parameter, as your code may break in a future release.
+            Not specifying any ordering can improve performance Supported for inner, left, right and full joins
+            None: No specific ordering is desired. The ordering might differ across Polars versions or even between
+            different runs.
             left: Preserves the order of the left DataFrame.
             right: Preserves the order of the right DataFrame.
             left_right: First preserves the order of the left DataFrame, then the right.
@@ -478,14 +591,27 @@ class FlowFrame:
         FlowFrame
             New FlowFrame with join operation applied.
         """
-        new_node_id = generate_node_id()
-        print('new node id', new_node_id)
         use_polars_code = not(maintain_order is None and
                               coalesce is None and
                               nulls_equal is False and
                               validate is None and
                               suffix == '_right')
         join_mappings = None
+        if self.flow_graph.flow_id != other.flow_graph.flow_id:
+            combined_graph, node_mappings = combine_flow_graphs_with_mapping(self.flow_graph, other.flow_graph)
+            new_self_node_id = node_mappings.get((self.flow_graph.flow_id, self.node_id), None)
+            new_other_node_id = node_mappings.get((other.flow_graph.flow_id, other.node_id), None)
+            if new_other_node_id is None or new_self_node_id is None:
+                raise ValueError("Cannot remap the nodes")
+            self.node_id = new_self_node_id
+            other.node_id = new_other_node_id
+            self.flow_graph = combined_graph
+            other.flow_graph = combined_graph
+            global node_id_counter
+            node_id_counter += len(combined_graph.nodes)
+        new_node_id = generate_node_id()
         if on is not None:
             left_columns = right_columns = _normalize_columns_to_list(on)
         elif left_on is not None and right_on is not None:
@@ -504,10 +630,11 @@ class FlowFrame:
             )
         if not use_polars_code:
             join_mappings, use_polars_code = _create_join_mappings(
-                left_columns, right_columns
+                left_columns or [], right_columns or []
             )
         if use_polars_code or suffix != '_right':
             _on = "["+', '.join(f"'{v}'" if isinstance(v, str) else str(v) for v in _normalize_columns_to_list(on)) + "]" if on else None
             _left = "["+', '.join(f"'{v}'" if isinstance(v, str) else str(v) for v in left_columns) + "]" if left_on else None
             _right = "["+', '.join(f"'{v}'" if isinstance(v, str) else str(v) for v in right_columns) + "]" if right_on else None
@@ -527,31 +654,50 @@ class FlowFrame:
                 parent_node_id=self.node_id,
             )
-        elif join_mappings:
+        elif join_mappings or how == 'cross':
             left_select = transform_schema.SelectInputs.create_from_pl_df(self.data)
             right_select = transform_schema.SelectInputs.create_from_pl_df(other.data)
-            join_input = transform_schema.JoinInput(
-                join_mapping=join_mappings,
-                left_select=left_select.renames,
-                right_select=right_select.renames,
-                how=how,
-            )
+            if how == 'cross':
+                join_input = transform_schema.CrossJoinInput(left_select=left_select.renames,
+                                                             right_select=right_select.renames,)
+            else:
+                join_input = transform_schema.JoinInput(
+                    join_mapping=join_mappings,
+                    left_select=left_select.renames,
+                    right_select=right_select.renames,
+                    how=how,
+                )
             join_input.auto_rename()
-            # Create node settings
-            join_settings = input_schema.NodeJoin(
-                flow_id=self.flow_graph.flow_id,
-                node_id=new_node_id,
-                join_input=join_input,
-                auto_generate_selection=True,
-                verify_integrity=True,
-                pos_x=200,
-                pos_y=150,
-                is_setup=True,
-                depending_on_ids=[self.node_id, other.node_id],
-                description=description or f"Join with {how} strategy",
-            )
-            self.flow_graph.add_join(join_settings)
+            if how == 'cross':
+                cross_join_settings = input_schema.NodeCrossJoin(
+                    flow_id=self.flow_graph.flow_id,
+                    node_id=new_node_id,
+                    cross_join_input=join_input,
+                    is_setup=True,
+                    depending_on_ids=[self.node_id, other.node_id],
+                    description=description or f"Join with {how} strategy",
+                    auto_generate_selection=True,
+                    verify_integrity=True,
+                )
+                self.flow_graph.add_cross_join(cross_join_settings)
+            else:
+                join_settings = input_schema.NodeJoin(
+                    flow_id=self.flow_graph.flow_id,
+                    node_id=new_node_id,
+                    join_input=join_input,
+                    auto_generate_selection=True,
+                    verify_integrity=True,
+                    pos_x=200,
+                    pos_y=150,
+                    is_setup=True,
+                    depending_on_ids=[self.node_id, other.node_id],
+                    description=description or f"Join with {how} strategy",
+                )
+                self.flow_graph.add_join(join_settings)
             self._add_connection(self.node_id, new_node_id, "main")
             other._add_connection(other.node_id, new_node_id, "right")
             result_frame = FlowFrame(
@@ -578,40 +724,68 @@ class FlowFrame:
         self.flow_graph.add_record_count(node_number_of_records)
         return self._create_child_frame(new_node_id)
-    def select(self, *columns, description: str = None):
+    def select(self, *columns: Union[str, Expr, Selector], description: Optional[str] = None) -> "FlowFrame":
         """
         Select columns from the frame.
-        Args:
-            *columns: Column names or expressions
-            description: Description of the step, this will be shown in the flowfile file
-        Returns:
-            A new FlowFrame with selected columns
         """
-        # Create new node ID
-        columns = _parse_inputs_as_iterable(columns)
+        columns_iterable = list(_parse_inputs_as_iterable(columns))
         new_node_id = generate_node_id()
-        existing_columns = self.columns
-        if (len(columns) == 1 and isinstance(columns[0], Expr)
-                and str(columns[0]) == "pl.Expr(len()).alias('number_of_records')"):
+        if (len(columns_iterable) == 1 and isinstance(columns_iterable[0], Expr)
+                and str(columns_iterable[0]) == "pl.Expr(len()).alias('number_of_records')"):
             return self._add_number_of_records(new_node_id, description)
-        # Handle simple column names
-        if all(isinstance(col_, (str, Column)) for col_ in columns):
-            # Create select inputs
-            select_inputs = [
-                transform_schema.SelectInput(old_name=col_) if isinstance(col_, str) else col_.to_select_input()
-                for col_ in columns
-            ]
-            dropped_columns = [transform_schema.SelectInput(c, keep=False) for c in existing_columns if
-                               c not in [s.old_name for s in select_inputs]]
-            select_inputs.extend(dropped_columns)
+        all_input_expr_objects: List[Expr] = []
+        pure_polars_expr_strings_for_select: List[str] = []
+        collected_raw_definitions: List[str] = []
+        selected_col_names_for_native: List[str] = []  # For native node
+        can_use_native_node = True
+        if len(columns_iterable) == 1 and isinstance(columns_iterable[0], str) and columns_iterable[0] == '*':
+            effective_columns_iterable = [col(c_name) for c_name in self.columns]
+        else:
+            effective_columns_iterable = columns_iterable
+        for expr_input in effective_columns_iterable:
+            current_expr_obj = expr_input
+            is_simple_col_for_native = False
+            if isinstance(expr_input, str):
+                current_expr_obj = col(expr_input)
+                selected_col_names_for_native.append(expr_input)
+                is_simple_col_for_native = True
+            elif isinstance(expr_input, Column) and not expr_input._select_input.is_altered:  # type: ignore
+                selected_col_names_for_native.append(expr_input.column_name)  # type: ignore
+                is_simple_col_for_native = True
+            elif isinstance(expr_input, Selector):  # Selectors imply Polars code path
+                can_use_native_node = False
+                # current_expr_obj = expr_input # Already an Expr-like via selector
+            elif not isinstance(expr_input, Expr):  # Includes Column
+                current_expr_obj = lit(expr_input)
+            all_input_expr_objects.append(current_expr_obj)  # type: ignore
+            pure_expr_str, raw_defs_str = _extract_expr_parts(current_expr_obj)
+            pure_polars_expr_strings_for_select.append(pure_expr_str)
+            if raw_defs_str and raw_defs_str not in collected_raw_definitions:
+                collected_raw_definitions.append(raw_defs_str)
+            if not is_simple_col_for_native and not isinstance(expr_input, Selector):
+                can_use_native_node = False  # Complex expressions require Polars code
+        if collected_raw_definitions:  # Has to use Polars code if there are definitions
+            can_use_native_node = False
+        if can_use_native_node:
+            select_inputs_for_node = [transform_schema.SelectInput(old_name=name) for name in
+                                      selected_col_names_for_native]
+            existing_cols = self.columns
+            dropped_columns = [transform_schema.SelectInput(c, keep=False) for c in existing_cols if
+                               c not in selected_col_names_for_native]
+            select_inputs_for_node.extend(dropped_columns)
             select_settings = input_schema.NodeSelect(
                 flow_id=self.flow_graph.flow_id,
                 node_id=new_node_id,
-                select_input=select_inputs,
+                select_input=select_inputs_for_node,
                 keep_missing=False,
                 pos_x=200,
                 pos_y=100,
@@ -619,60 +793,97 @@ class FlowFrame:
                 depending_on_id=self.node_id,
                 description=description
             )
-            # Add to graph
             self.flow_graph.add_select(select_settings)
-            return self._create_child_frame(new_node_id)
         else:
-            readable_exprs = []
-            is_readable: bool = True
-            for col_ in columns:
-                if isinstance(col_, Expr):
-                    readable_exprs.append(col_)
-                elif isinstance(col_, Selector):
-                    readable_exprs.append(col_)
-                elif isinstance(col_, pl.expr.Expr):
-                    print('warning this cannot be converted to flowfile frontend. Make sure you use the flowfile expr')
-                    is_readable = False
-                elif isinstance(col_, str) and col_ in self.columns:
-                    col_expr = Column(col_)
-                    readable_exprs.append(col_expr)
-                else:
-                    lit_expr = lit(col_)
-                    readable_exprs.append(lit_expr)
-            if is_readable:
-                code = f"input_df.select([{', '.join(str(e) for e in readable_exprs)}])"
+            polars_operation_code = f"input_df.select([{', '.join(pure_polars_expr_strings_for_select)}])"
+            final_code_for_node: str
+            if collected_raw_definitions:
+                unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))
+                definitions_section = "\n\n".join(unique_raw_definitions)
+                final_code_for_node = definitions_section + \
+                                      "\#─────SPLIT─────\n\n" + \
+                                      f"output_df = {polars_operation_code}"
             else:
-                raise ValueError('Not supported')
+                final_code_for_node = polars_operation_code
-            self._add_polars_code(new_node_id, code, description)
-            return self._create_child_frame(new_node_id)
+            pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
+                                           isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
+            self._add_polars_code(new_node_id, final_code_for_node, description,
+                                  method_name="select",
+                                  convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
+                                  polars_expr=pl_expressions_for_fallback)
+        return self._create_child_frame(new_node_id)
-    def filter(self, predicate: Expr | Any = None, *, flowfile_formula: str = None, description: str = None):
+    def filter(self, *predicates: Union[Expr, Any], flowfile_formula: Optional[str] = None,
+               description: Optional[str] = None, **constraints: Any) -> "FlowFrame":
         """
         Filter rows based on a predicate.
-        Args:
-            predicate: Filter condition
-            flowfile_formula: Native support in frontend
-            description: Description of the step that is performed
-        Returns:
-            A new FlowFrame with filtered rows
         """
+        if (len(predicates) > 0 or len(constraints) > 0) and flowfile_formula:
+            raise ValueError("You can only use one of the following: predicates, constraints or flowfile_formula")
+        available_columns = self.columns
         new_node_id = generate_node_id()
-        # Create new node ID
-        if predicate:
-            # we use for now the fallback on polars code.
-            if isinstance(predicate, Expr):
-                predicate_expr = predicate
+        if len(predicates) > 0 or len(constraints) > 0:
+            all_input_expr_objects: List[Expr] = []
+            pure_polars_expr_strings: List[str] = []
+            collected_raw_definitions: List[str] = []
+            processed_predicates = []
+            for pred_item in predicates:
+                if isinstance(pred_item, (tuple, list, Iterator)):
+                    # If it's a sequence, extend the processed_predicates with its elements
+                    processed_predicates.extend(list(pred_item))
+                else:
+                    # Otherwise, just add the item
+                    processed_predicates.append(pred_item)
+            for pred_input in processed_predicates:  # Loop over the processed_predicates
+                # End of the new/modified section
+                current_expr_obj = None  # Initialize current_expr_obj
+                if isinstance(pred_input, Expr):
+                    current_expr_obj = pred_input
+                elif isinstance(pred_input, str) and pred_input in available_columns:
+                    current_expr_obj = col(pred_input)
+                else:
+                    current_expr_obj = lit(pred_input)
+                all_input_expr_objects.append(current_expr_obj)
+                pure_expr_str, raw_defs_str = _extract_expr_parts(current_expr_obj)
+                pure_polars_expr_strings.append(f"({pure_expr_str})")
+                if raw_defs_str and raw_defs_str not in collected_raw_definitions:
+                    collected_raw_definitions.append(raw_defs_str)
+            for k, v_val in constraints.items():
+                constraint_expr_obj = (col(k) == lit(v_val))
+                all_input_expr_objects.append(constraint_expr_obj)
+                pure_expr_str, raw_defs_str = _extract_expr_parts(
+                    constraint_expr_obj)  # Constraint exprs are unlikely to have defs
+                pure_polars_expr_strings.append(f"({pure_expr_str})")
+                if raw_defs_str and raw_defs_str not in collected_raw_definitions:  # Should be rare here
+                    collected_raw_definitions.append(raw_defs_str)
+            filter_conditions_str = " & ".join(pure_polars_expr_strings) if pure_polars_expr_strings else "pl.lit(True)"
+            polars_operation_code = f"input_df.filter({filter_conditions_str})"
+            final_code_for_node: str
+            if collected_raw_definitions:
+                unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))  # Order-preserving unique
+                definitions_section = "\n\n".join(unique_raw_definitions)
+                final_code_for_node = definitions_section + \
+                                      "\#─────SPLIT─────\n\n" + \
+                                      f"output_df = {polars_operation_code}"
             else:
-                predicate_expr = lit(predicate)
-            code = f"input_df.filter({str(predicate_expr)})"
-            self._add_polars_code(new_node_id, code, description)
+                final_code_for_node = polars_operation_code
+            convertable_to_code = _check_if_convertible_to_code(all_input_expr_objects)
+            pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
+                                           isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
+            self._add_polars_code(new_node_id, final_code_for_node, description, method_name="filter",
+                                  convertable_to_code=convertable_to_code,
+                                  polars_expr=pl_expressions_for_fallback)
         elif flowfile_formula:
-            # Create node settings
             filter_settings = input_schema.NodeFilter(
                 flow_id=self.flow_graph.flow_id,
                 node_id=new_node_id,
@@ -686,8 +897,10 @@ class FlowFrame:
                 depending_on_id=self.node_id,
                 description=description
             )
             self.flow_graph.add_filter(filter_settings)
+        else:
+            logger.info("Filter called with no arguments; creating a pass-through Polars code node.")
+            self._add_polars_code(new_node_id, "output_df = input_df", description or "No-op filter", method_name=None)
         return self._create_child_frame(new_node_id)
@@ -772,7 +985,7 @@ class FlowFrame:
                 if convert_to_absolute_path:
                     output_settings.directory = output_settings.abs_file_path
             except Exception as e:
-                print(f"Warning: Could not determine absolute path for {file_str}: {e}")
+                logger.warning(f"Could not determine absolute path for {file_str}: {e}")
         if not use_polars_code:
             node_output = input_schema.NodeOutput(
@@ -800,7 +1013,7 @@ class FlowFrame:
             # Use sink_parquet for LazyFrames
             code = f"input_df.sink_parquet({args_str})"
-            print(f"Generated Polars Code: {code}")
+            logger.debug(f"Generated Polars Code: {code}")
             self._add_polars_code(new_node_id, code, description)
         return self._create_child_frame(new_node_id)
@@ -848,7 +1061,7 @@ class FlowFrame:
                 if convert_to_absolute_path:
                     output_settings.directory = output_settings.abs_file_path
             except Exception as e:
-                print(f"Warning: Could not determine absolute path for {file_str}: {e}")
+                logger.warning(f"Could not determine absolute path for {file_str}: {e}")
         if not use_polars_code:
             node_output = input_schema.NodeOutput(
@@ -881,7 +1094,7 @@ class FlowFrame:
                 args_str += f", {kwargs_repr}"
             code = f"input_df.collect().write_csv({args_str})"
-            print(f"Generated Polars Code: {code}")
+            logger.debug(f"Generated Polars Code: {code}")
             self._add_polars_code(new_node_id, code, description)
         return self._create_child_frame(new_node_id)
@@ -934,10 +1147,10 @@ class FlowFrame:
             self.flow_graph.apply_layout()
         self.flow_graph.save_flow(file_path)
-    def collect(self):
+    def collect(self, *args, **kwargs):
         """Collect lazy data into memory."""
         if hasattr(self.data, "collect"):
-            return self.data.collect()
+            return self.data.collect(*args, **kwargs)
         return self.data
     def _with_flowfile_formula(self, flowfile_formula: str, output_column_name, description: str = None) -> "FlowFrame":
@@ -946,7 +1159,7 @@ class FlowFrame:
             input_schema.NodeFormula(flow_id=self.flow_graph.flow_id, node_id=new_node_id, depending_on_id=self.node_id,
                                      function=transform_schema.FunctionInput(
                                          function=flowfile_formula,
-                                         field=transform_schema.FieldInput(name=output_column_name)),
+                                         field=transform_schema.FieldInput(name=output_column_name, data_type='Auto')),
                                      description=description))
         self.flow_graph.add_formula(function_settings)
         return self._create_child_frame(new_node_id)
@@ -1241,16 +1454,27 @@ class FlowFrame:
         FlowFrame
             A new FlowFrame with the concatenated data
         """
-        new_node_id = generate_node_id()
         # Convert single FlowFrame to list
         if isinstance(other, FlowFrame):
             others = [other]
         else:
             others = other
+        all_graphs = []
+        all_graph_ids = []
+        for g in [self.flow_graph] + [f.flow_graph for f in others]:
+            if g.flow_id not in all_graph_ids:
+                all_graph_ids.append(g.flow_id)
+                all_graphs.append(g)
+        if len(all_graphs) > 1:
+            combined_graph, node_mappings = combine_flow_graphs_with_mapping(*all_graphs)
+            for f in [self] + other:
+                f.node_id = node_mappings.get((f.flow_graph.flow_id, f.node_id), None)
+            global node_id_counter
+            node_id_counter += len(combined_graph.nodes)
+        else:
+            combined_graph = self.flow_graph
+        new_node_id = generate_node_id()
         use_native = how == "diagonal_relaxed" and parallel and not rechunk
         if use_native:
             # Create union input for the transform schema
             union_input = transform_schema.UnionInput(
@@ -1284,7 +1508,6 @@ class FlowFrame:
                 input_vars.append(f"input_df_{i+2}")
             frames_list = f"[{', '.join(input_vars)}]"
             code = f"""
             # Perform concat operation
             output_df = pl.concat(
@@ -1294,19 +1517,20 @@ class FlowFrame:
                 parallel={parallel}
             )
             """
+            self.flow_graph = combined_graph
             # Add polars code node with dependencies on all input frames
             depending_on_ids = [self.node_id] + [frame.node_id for frame in others]
             self._add_polars_code(
                 new_node_id, code, description, depending_on_ids=depending_on_ids
             )
             # Add connections to ensure all frames are available
             self._add_connection(self.node_id, new_node_id, "main")
             for other_frame in others:
-                other_frame._add_connection(other_frame.node_id, new_node_id, "main")
+                other_frame.flow_graph = combined_graph
+                other_frame._add_connection(other_frame.node_id, new_node_id, "main")
         # Create and return the new frame
         return FlowFrame(
             data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
@@ -1343,7 +1567,7 @@ class FlowFrame:
             return False, None
         # Extract the output name
-        output_name = expr.name
+        output_name = expr.column_name
         if ".over(" not in expr._repr_str:
             # Simple cumulative count can be implemented as a record ID with offset=1
@@ -1426,62 +1650,70 @@ class FlowFrame:
         return False, None
     def with_columns(
-        self,
-        exprs: Expr | List[Expr | None] = None,
-        *,
-        flowfile_formulas: Optional[List[str]] = None,
-        output_column_names: Optional[List[str]] = None,
-        description: Optional[str] = None,
+            self,
+            *exprs: Union[Expr, Iterable[Expr], Any],  # Allow Any for implicit lit conversion
+            flowfile_formulas: Optional[List[str]] = None,
+            output_column_names: Optional[List[str]] = None,
+            description: Optional[str] = None,
+            **named_exprs: Union[Expr, Any],  # Allow Any for implicit lit conversion
     ) -> "FlowFrame":
         """
-        Add multiple columns to the DataFrame.
-        Parameters
-        ----------
-        exprs : Expr or List[Expr], optional
-            Expressions to evaluate as new columns
-        flowfile_formulas : List[str], optional
-            Alternative approach using flowfile formula syntax
-        output_column_names : List[str], optional
-            Column names for the flowfile formulas
-        description : str, optional
-            Description of this operation for the ETL graph
-        Returns
-        -------
-        FlowFrame
-            A new FlowFrame with the columns added
-        Raises
-        ------
-        ValueError
-            If neither exprs nor flowfile_formulas with output_column_names are provided,
-            or if the lengths of flowfile_formulas and output_column_names don't match
+        Add or replace columns in the DataFrame.
         """
-        if exprs is not None:
-            new_node_id = generate_node_id()
-            exprs_iterable = _parse_inputs_as_iterable((exprs,))
+        new_node_id = generate_node_id()
-            if len(exprs_iterable) == 1:
-                detected, result = self._detect_cum_count_record_id(
-                    exprs_iterable[0], new_node_id, description
-                )
-                if detected:
-                    return result
-            all_expressions = []
-            for expression in exprs_iterable:
-                if not isinstance(expression, (Expr, Column)):
-                    all_expressions.append(lit(expression))
-                else:
-                    all_expressions.append(expression)
+        all_input_expr_objects: List[Expr] = []
+        pure_polars_expr_strings_for_wc: List[str] = []
+        collected_raw_definitions: List[str] = []
+        has_exprs_or_named_exprs = bool(exprs or named_exprs)
+        if has_exprs_or_named_exprs:
+            actual_exprs_to_process: List[Expr] = []
+            temp_exprs_iterable = list(_parse_inputs_as_iterable(exprs))
+            for item in temp_exprs_iterable:
+                if isinstance(item, Expr):
+                    actual_exprs_to_process.append(item)
+                else:  # auto-lit for non-Expr positional args
+                    actual_exprs_to_process.append(lit(item))
+            for name, val_expr in named_exprs.items():
+                if isinstance(val_expr, Expr):
+                    actual_exprs_to_process.append(val_expr.alias(name))  # type: ignore # Assuming Expr has alias
+                else:  # auto-lit for named args and then alias
+                    actual_exprs_to_process.append(lit(val_expr).alias(name))  # type: ignore
+            if len(actual_exprs_to_process) == 1 and isinstance(actual_exprs_to_process[0], Expr):
+                pass
+            for current_expr_obj in actual_exprs_to_process:
+                all_input_expr_objects.append(current_expr_obj)
+                pure_expr_str, raw_defs_str = _extract_expr_parts(current_expr_obj)
+                pure_polars_expr_strings_for_wc.append(pure_expr_str)  # with_columns takes individual expressions
+                if raw_defs_str and raw_defs_str not in collected_raw_definitions:
+                    collected_raw_definitions.append(raw_defs_str)
+            polars_operation_code = f"input_df.with_columns([{', '.join(pure_polars_expr_strings_for_wc)}])"
+            final_code_for_node: str
+            if collected_raw_definitions:
+                unique_raw_definitions = list(dict.fromkeys(collected_raw_definitions))
+                definitions_section = "\n\n".join(unique_raw_definitions)
+                final_code_for_node = definitions_section + \
+                                      "\n#─────SPLIT─────\n\n" + \
+                                      f"output_df = {polars_operation_code}"
+            else:
+                final_code_for_node = polars_operation_code
-            code = (
-                f"input_df.with_columns({', '.join(str(e) for e in all_expressions)})"
-            )
-            self._add_polars_code(new_node_id, code, description)
+            pl_expressions_for_fallback = [e.expr for e in all_input_expr_objects if
+                                           isinstance(e, Expr) and hasattr(e, 'expr') and e.expr is not None]
+            self._add_polars_code(new_node_id, final_code_for_node, description, method_name='with_columns',
+                                  convertable_to_code=_check_if_convertible_to_code(all_input_expr_objects),
+                                  polars_expr=pl_expressions_for_fallback)
             return self._create_child_frame(new_node_id)
         elif flowfile_formulas is not None and output_column_names is not None:
             if len(output_column_names) != len(flowfile_formulas):
                 raise ValueError(
                     "Length of both the formulas and the output columns names must be identical"
@@ -1494,9 +1726,7 @@ class FlowFrame:
                 ff = ff._with_flowfile_formula(flowfile_formula, output_column_name, f"{i}: {description}")
             return ff
         else:
-            raise ValueError(
-                "Either exprs or flowfile_formulas with output_column_names must be provided"
-            )
+            raise ValueError("Either exprs/named_exprs or flowfile_formulas with output_column_names must be provided")
     def with_row_index(
         self, name: str = "index", offset: int = 0, description: str = None
@@ -1584,26 +1814,27 @@ class FlowFrame:
         if isinstance(columns, (list, tuple)):
             all_columns.extend(
-                [col.name if isinstance(col, Column) else col for col in columns]
+                [col.column_name if isinstance(col, Column) else col for col in columns]
             )
         else:
-            all_columns.append(columns.name if isinstance(columns, Column) else columns)
+            all_columns.append(columns.column_name if isinstance(columns, Column) else columns)
         if more_columns:
             for col in more_columns:
-                all_columns.append(col.name if isinstance(col, Column) else col)
+                all_columns.append(col.column_name if isinstance(col, Column) else col)
         if len(all_columns) == 1:
-            columns_str = f"'{all_columns[0]}'"
+            columns_str = stringify_values(all_columns[0])
         else:
-            columns_str = "[" + ", ".join([f"'{col}'" for col in all_columns]) + "]"
+            columns_str = "[" + ", ".join([ stringify_values(col) for col in all_columns]) + "]"
         code = f"""
         # Explode columns into multiple rows
         output_df = input_df.explode({columns_str})
         """
-        cols_desc = ", ".join(all_columns)
+        cols_desc = ", ".join(str(s) for s in all_columns)
         desc = description or f"Explode column(s): {cols_desc}"
         # Add polars code node
@@ -1646,7 +1877,7 @@ class FlowFrame:
         new_node_id = generate_node_id()
         if isinstance(column, Column):
-            column_name = column.name
+            column_name = column.column_name
         else:
             column_name = column
@@ -1730,7 +1961,7 @@ class FlowFrame:
                     if col_expr._select_input.is_altered:
                         can_use_native = False
                         break
-                    processed_subset.append(col_expr.name)
+                    processed_subset.append(col_expr.column_name)
                 else:
                     can_use_native = False
                     break
@@ -1818,276 +2049,34 @@ class FlowFrame:
         """Get the number of columns."""
         return self.data.width
+    def __contains__(self, key):
+        """This special method enables the 'in' operator to work with FlowFrame objects."""
+        return key in self.data
-def _add_delegated_methods():
-    """Add delegated methods from polars LazyFrame."""
-    delegate_methods = [
-        "collect_async",
-        "profile",
-        "describe",
-        "explain",
-        "show_graph",
-        "serialize",
-        "fetch",
-        "get_meta",
-        "columns",
-        "dtypes",
-        "schema",
-        "estimated_size",
-        "n_chunks",
-        "is_empty",
-        "chunk_lengths",
-        "optimization_toggle",
-        "set_polars_options",
-        "collect_schema"
-    ]
-    already_implemented = set(dir(FlowFrame))
-    for method_name in delegate_methods:
-        if method_name not in already_implemented and hasattr(
-            pl.LazyFrame, method_name
-        ):
-            # Create a simple delegate method
-            def make_delegate(name):
-                def delegate_method(self, *args, **kwargs):
-                    return getattr(self.data, name)(*args, **kwargs)
-                # Set docstring and name
-                delegate_method.__doc__ = (
-                    f"See pl.LazyFrame.{name} for full documentation."
-                )
-                delegate_method.__name__ = name
-                return delegate_method
-            # Add the method to the class
-            setattr(FlowFrame, method_name, make_delegate(method_name))
-_add_delegated_methods()
-def sum(expr):
-    """Sum aggregation function."""
-    if isinstance(expr, str):
-        expr = col(expr)
-    return expr.sum()
-def mean(expr):
-    """Mean aggregation function."""
-    if isinstance(expr, str):
-        expr = col(expr)
-    return expr.mean()
-def min(expr):
-    """Min aggregation function."""
-    if isinstance(expr, str):
-        expr = col(expr)
-    return expr.min()
-def max(expr):
-    """Max aggregation function."""
-    if isinstance(expr, str):
-        expr = col(expr)
-    return expr.max()
-def count(expr):
-    """Count aggregation function."""
-    if isinstance(expr, str):
-        expr = col(expr)
-    return expr.count()
-def read_csv(file_path, *, flow_graph: FlowGraph = None, separator: str = ';',
-             convert_to_absolute_path: bool = True,
-             description: str = None, **options):
-    """
-    Read a CSV file into a FlowFrame.
-    Args:
-        file_path: Path to CSV file
-        flow_graph: if you want to add it to an existing graph
-        separator: Single byte character to use as separator in the file.
-        convert_to_absolute_path: If the path needs to be set to a fixed location
-        description: if you want to add a readable name in the frontend (advised)
-        **options: Options for polars.read_csv
-    Returns:
-        A FlowFrame with the CSV data
-    """
-    # Create new node ID
-    node_id = generate_node_id()
-    if flow_graph is None:
-        flow_graph = create_flow_graph()
-    flow_id = flow_graph.flow_id
-    has_headers = options.get('has_header', True)
-    encoding = options.get('encoding', 'utf-8')
-    if '~' in file_path:
-        file_path = os.path.expanduser(file_path)
-    received_table = input_schema.ReceivedTable(
-        file_type='csv',
-        path=file_path,
-        name=Path(file_path).name,
-        delimiter=separator,
-        has_headers=has_headers,
-        encoding=encoding
-    )
-    if convert_to_absolute_path:
-        received_table.path = received_table.abs_file_path
-    read_node = input_schema.NodeRead(
-        flow_id=flow_id,
-        node_id=node_id,
-        received_file=received_table,
-        pos_x=100,
-        pos_y=100,
-        is_setup=True
-    )
-    flow_graph.add_read(read_node)
-    return FlowFrame(
-        data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
-        flow_graph=flow_graph,
-        node_id=node_id
-    )
-def read_parquet(file_path, *, flow_graph: FlowGraph = None, description: str = None,
-                 convert_to_absolute_path: bool = True, **options) -> FlowFrame:
-    """
-    Read a Parquet file into a FlowFrame.
-    Args:
-        file_path: Path to Parquet file
-        flow_graph: if you want to add it to an existing graph
-        description: if you want to add a readable name in the frontend (advised)
-        convert_to_absolute_path: If the path needs to be set to a fixed location
-        **options: Options for polars.read_parquet
-    Returns:
-        A FlowFrame with the Parquet data
-    """
-    if '~' in file_path:
-        file_path = os.path.expanduser(file_path)
-    node_id = generate_node_id()
-    if flow_graph is None:
-        flow_graph = create_flow_graph()
-    flow_id = flow_graph.flow_id
-    received_table = input_schema.ReceivedTable(
-        file_type='parquet',
-        path=file_path,
-        name=Path(file_path).name,
-    )
-    if convert_to_absolute_path:
-        received_table.path = received_table.abs_file_path
-    read_node = input_schema.NodeRead(
-        flow_id=flow_id,
-        node_id=node_id,
-        received_file=received_table,
-        pos_x=100,
-        pos_y=100,
-        is_setup=True,
-        description=description
-    )
-    flow_graph.add_read(read_node)
-    return FlowFrame(
-        data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
-        flow_graph=flow_graph,
-        node_id=node_id
-    )
-def from_dict(data, *, flow_graph: FlowGraph = None, description: str = None) -> FlowFrame:
-    """
-    Create a FlowFrame from a dictionary or list of dictionaries.
-    Args:
-        data: Dictionary of lists or list of dictionaries
-        flow_graph: if you want to add it to an existing graph
-        description: if you want to add a readable name in the frontend (advised)
-    Returns:
-        A FlowFrame with the data
-    """
-    # Create new node ID
-    node_id = generate_node_id()
-    if not flow_graph:
-        flow_graph = create_flow_graph()
-    flow_id = flow_graph.flow_id
-    input_node = input_schema.NodeManualInput(
-        flow_id=flow_id,
-        node_id=node_id,
-        raw_data=FlowDataEngine(data).to_pylist(),
-        pos_x=100,
-        pos_y=100,
-        is_setup=True,
-        description=description
-    )
-    # Add to graph
-    flow_graph.add_manual_input(input_node)
-    # Return new frame
-    return FlowFrame(
-        data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
-        flow_graph=flow_graph,
-        node_id=node_id
-    )
+    def __bool__(self):
+        """This special method determines how the object behaves in boolean contexts.
+        Returns True if the FlowFrame contains any data, False otherwise."""
+        return bool(self.data)
+    @staticmethod
+    def _comparison_error(operator: str) -> pl.lazyframe.frame.NoReturn:
+        msg = f'"{operator!r}" comparison not supported for LazyFrame objects'
+        raise TypeError(msg)
-def concat(frames: List['FlowFrame'],
-                  how: str = 'vertical',
-                  rechunk: bool = False,
-                  parallel: bool = True,
-                  description: str = None) -> 'FlowFrame':
-    """
-    Concatenate multiple FlowFrames into one.
+    def __eq__(self, other: object) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error("==")
-    Parameters
-    ----------
-    frames : List[FlowFrame]
-        List of FlowFrames to concatenate
-    how : str, default 'vertical'
-        How to combine the FlowFrames (see concat method documentation)
-    rechunk : bool, default False
-        Whether to ensure contiguous memory in result
-    parallel : bool, default True
-        Whether to use parallel processing for the operation
-    description : str, optional
-        Description of this operation
+    def __ne__(self, other: object) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error("!=")
-    Returns
-    -------
-    FlowFrame
-        A new FlowFrame with the concatenated data
-    """
-    if not frames:
-        raise ValueError("No frames provided to concat_frames")
+    def __gt__(self, other: Any) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error(">")
-    if len(frames) == 1:
-        return frames[0]
+    def __lt__(self, other: Any) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error("<")
-    # Use first frame's concat method with remaining frames
-    first_frame = frames[0]
-    remaining_frames = frames[1:]
+    def __ge__(self, other: Any) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error(">=")
-    return first_frame.concat(remaining_frames, how=how,
-                              rechunk=rechunk, parallel=parallel,
-                              description=description)
+    def __le__(self, other: Any) -> pl.lazyframe.frame.NoReturn:
+        self._comparison_error("<=")