Flowfile 0.3.1.2__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (100) hide show
  1. flowfile/__init__.py +2 -1
  2. flowfile/api.py +5 -3
  3. flowfile/web/__init__.py +3 -0
  4. flowfile/web/static/assets/{AirbyteReader-cb0c1d4a.js → AirbyteReader-2b1cf2d8.js} +10 -9
  5. flowfile/web/static/assets/{CrossJoin-a514fa59.js → CrossJoin-cc3ab73c.js} +8 -8
  6. flowfile/web/static/assets/{DatabaseConnectionSettings-f2cecf33.js → DatabaseConnectionSettings-307c4652.js} +2 -2
  7. flowfile/web/static/assets/{DatabaseManager-83ee3c98.js → DatabaseManager-69faa6e1.js} +10 -6
  8. flowfile/web/static/assets/{DatabaseReader-dc0c6881.js → DatabaseReader-e4134cd0.js} +9 -9
  9. flowfile/web/static/assets/{DatabaseWriter-5afe9f8d.js → DatabaseWriter-d32d75b1.js} +9 -9
  10. flowfile/web/static/assets/{ExploreData-c7ee19cf.js → ExploreData-5eb48389.js} +18639 -18629
  11. flowfile/web/static/assets/{ExternalSource-17b23a01.js → ExternalSource-29489051.js} +8 -21
  12. flowfile/web/static/assets/{Filter-90856b4f.js → Filter-031332bb.js} +9 -9
  13. flowfile/web/static/assets/{Formula-38b71e9e.js → Formula-3b900540.js} +15 -15
  14. flowfile/web/static/assets/{Formula-d60a74f4.css → Formula-b8cefc31.css} +4 -4
  15. flowfile/web/static/assets/{FuzzyMatch-d0f1fe81.js → FuzzyMatch-dee31153.js} +9 -9
  16. flowfile/web/static/assets/{GraphSolver-0c86bbc6.js → GraphSolver-ca74eb47.js} +5 -5
  17. flowfile/web/static/assets/{GroupBy-f2772e9f.js → GroupBy-081b6591.js} +8 -7
  18. flowfile/web/static/assets/{Join-bc3e1cf7.js → Join-b467376f.js} +11 -10
  19. flowfile/web/static/assets/{ManualInput-03aa0245.js → ManualInput-ffffb80a.js} +11 -8
  20. flowfile/web/static/assets/{Output-5b35eee8.js → Output-9a87d4ba.js} +4 -4
  21. flowfile/web/static/assets/{Pivot-7164087c.js → Pivot-ee3e6093.js} +8 -7
  22. flowfile/web/static/assets/{PolarsCode-3abf6507.js → PolarsCode-03921254.js} +13 -11
  23. flowfile/web/static/assets/{PopOver-b37ff9be.js → PopOver-3bdf8951.js} +1 -1
  24. flowfile/web/static/assets/{Read-65966a3e.js → Read-67fee3a0.js} +6 -6
  25. flowfile/web/static/assets/{RecordCount-c66c6d6d.js → RecordCount-a2acd02d.js} +7 -6
  26. flowfile/web/static/assets/{RecordId-826dc095.js → RecordId-0c8bcd77.js} +10 -8
  27. flowfile/web/static/assets/{Sample-4ed555c8.js → Sample-60594a3a.js} +7 -6
  28. flowfile/web/static/assets/{SecretManager-eac1e97d.js → SecretManager-bbcec2ac.js} +2 -2
  29. flowfile/web/static/assets/{Select-085f05cc.js → Select-9540e6ca.js} +8 -8
  30. flowfile/web/static/assets/{SettingsSection-1f5e79c1.js → SettingsSection-48f28104.js} +1 -1
  31. flowfile/web/static/assets/{Sort-3e6cb414.js → Sort-6dbe3633.js} +6 -6
  32. flowfile/web/static/assets/{TextToRows-606349bc.js → TextToRows-27aab4a8.js} +18 -13
  33. flowfile/web/static/assets/{UnavailableFields-b41976ed.js → UnavailableFields-8143044b.js} +2 -2
  34. flowfile/web/static/assets/{Union-fca91665.js → Union-52460248.js} +7 -6
  35. flowfile/web/static/assets/{Unique-a59f830e.js → Unique-f6962644.js} +8 -8
  36. flowfile/web/static/assets/{Unpivot-c3815565.js → Unpivot-1ff1e938.js} +5 -5
  37. flowfile/web/static/assets/{api-22b338bd.js → api-3b345d92.js} +1 -1
  38. flowfile/web/static/assets/{designer-e5bbe26f.js → designer-4736134f.js} +72 -42
  39. flowfile/web/static/assets/{documentation-08045cf2.js → documentation-b9545eba.js} +1 -1
  40. flowfile/web/static/assets/{dropDown-5e7e9a5a.js → dropDown-d5a4014c.js} +1 -1
  41. flowfile/web/static/assets/{dropDownGeneric-50a91b99.js → dropDownGeneric-1f4e32ec.js} +2 -2
  42. flowfile/web/static/assets/{fullEditor-705c6ccb.js → fullEditor-f4791c23.js} +3 -3
  43. flowfile/web/static/assets/{genericNodeSettings-65587f20.js → genericNodeSettings-1d456350.js} +3 -3
  44. flowfile/web/static/assets/{index-552863fd.js → index-f25c9283.js} +2608 -1570
  45. flowfile/web/static/assets/{nodeTitle-cf9bae3c.js → nodeTitle-cad6fd9d.js} +3 -3
  46. flowfile/web/static/assets/{secretApi-3ad510e1.js → secretApi-01f07e2c.js} +1 -1
  47. flowfile/web/static/assets/{selectDynamic-bd644891.js → selectDynamic-f46a4e3f.js} +3 -3
  48. flowfile/web/static/assets/{vue-codemirror.esm-dd17b478.js → vue-codemirror.esm-eb98fc8b.js} +15 -14
  49. flowfile/web/static/assets/{vue-content-loader.es-6b36f05e.js → vue-content-loader.es-860c0380.js} +1 -1
  50. flowfile/web/static/index.html +1 -1
  51. {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/METADATA +1 -3
  52. {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/RECORD +97 -88
  53. flowfile_core/configs/__init__.py +15 -4
  54. flowfile_core/configs/node_store/nodes.py +2 -4
  55. flowfile_core/configs/settings.py +5 -3
  56. flowfile_core/configs/utils.py +18 -0
  57. flowfile_core/flowfile/FlowfileFlow.py +84 -29
  58. flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
  59. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +55 -18
  60. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
  61. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +42 -3
  62. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +34 -2
  63. flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
  64. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
  65. flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
  66. flowfile_core/flowfile/flow_graph_utils.py +320 -0
  67. flowfile_core/flowfile/flow_node/flow_node.py +2 -1
  68. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
  69. flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +0 -1
  70. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
  71. flowfile_core/flowfile/utils.py +34 -3
  72. flowfile_core/main.py +2 -3
  73. flowfile_core/routes/secrets.py +1 -1
  74. flowfile_core/schemas/input_schema.py +12 -14
  75. flowfile_core/schemas/transform_schema.py +25 -47
  76. flowfile_frame/__init__.py +11 -4
  77. flowfile_frame/adding_expr.py +280 -0
  78. flowfile_frame/config.py +9 -0
  79. flowfile_frame/expr.py +301 -83
  80. flowfile_frame/expr.pyi +2174 -0
  81. flowfile_frame/expr_name.py +258 -0
  82. flowfile_frame/flow_frame.py +616 -627
  83. flowfile_frame/flow_frame.pyi +336 -0
  84. flowfile_frame/flow_frame_methods.py +617 -0
  85. flowfile_frame/group_frame.py +89 -42
  86. flowfile_frame/join.py +1 -2
  87. flowfile_frame/lazy.py +704 -0
  88. flowfile_frame/lazy_methods.py +201 -0
  89. flowfile_frame/list_name_space.py +324 -0
  90. flowfile_frame/selectors.py +3 -0
  91. flowfile_frame/series.py +70 -0
  92. flowfile_frame/utils.py +80 -4
  93. flowfile/web/static/assets/GoogleSheet-854294a4.js +0 -2616
  94. flowfile/web/static/assets/GoogleSheet-92084da7.css +0 -233
  95. flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +0 -74
  96. {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/LICENSE +0 -0
  97. {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/WHEEL +0 -0
  98. {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/entry_points.txt +0 -0
  99. /flowfile_core/{secrets → secret_manager}/__init__.py +0 -0
  100. /flowfile_core/{secrets/secrets.py → secret_manager/secret_manager.py} +0 -0
@@ -0,0 +1,201 @@
1
+ import polars as pl
2
+ from functools import wraps
3
+ from typing import Optional, TypeVar, Type, Callable
4
+ from flowfile_frame.utils import _get_function_source
5
+ from flowfile_frame.config import logger
6
+
7
+ T = TypeVar('T')
8
+ FlowFrameT = TypeVar('FlowFrameT', bound='FlowFrame')
9
+
10
+ PASSTHROUGH_METHODS = {
11
+ 'collect', 'collect_async', 'profile', 'describe', 'explain',
12
+ 'show_graph', 'fetch', 'collect_schema', 'columns', 'dtypes',
13
+ 'schema', 'width', 'estimated_size', 'n_chunks', 'is_empty',
14
+ 'chunk_lengths', 'get_meta'
15
+ }
16
+
17
+
18
+ def create_lazyframe_method_wrapper(method_name: str, original_method: Callable) -> Callable:
19
+ """
20
+ Creates a wrapper for a LazyFrame method that properly integrates with FlowFrame.
21
+
22
+ Parameters
23
+ ----------
24
+ method_name : str
25
+ Name of the LazyFrame method.
26
+ original_method : Callable
27
+ The original LazyFrame method.
28
+
29
+ Returns
30
+ -------
31
+ Callable
32
+ A wrapper method appropriate for FlowFrame.
33
+ """
34
+ # Determine if the original method returns a LazyFrame based on known method names
35
+ lazyframe_returning_methods = {
36
+ "drop", "select", "with_columns", "sort", "filter", "join", "head", "tail",
37
+ "limit", "drop_nulls", "fill_null", "with_row_index", "group_by", "explode",
38
+ "unique", "slice", "shift", "reverse", "max", "min", "sum", "mean", "median",
39
+ "std", "var", "drop_nans", "fill_nan", "interpolate", "null_count", "quantile",
40
+ "unpivot", "melt", "first", "last"
41
+ }
42
+
43
+ non_lazyframe_methods = {
44
+ "collect", "collect_schema", "fetch", "columns", "dtypes", "schema", "width",
45
+ "describe", "explain", "profile", "show_graph"
46
+ }
47
+
48
+ returns_lazyframe = (
49
+ method_name in lazyframe_returning_methods or
50
+ (method_name not in non_lazyframe_methods and not method_name.startswith("_"))
51
+ )
52
+
53
+ @wraps(original_method)
54
+ def wrapper(self, *args, description: Optional[str] = None, **kwargs):
55
+ # Import here to avoid circular imports
56
+ from flowfile_frame.flow_frame import generate_node_id
57
+ new_node_id = generate_node_id()
58
+
59
+ if not all([True if not hasattr(arg, "convertable_to_code") else getattr(arg, 'convertable_to_code') for arg in
60
+ args]):
61
+ logger.debug("Warning, could not create a good node")
62
+ return self.__class__(getattr(self.data, method_name)(arg.expr for arg in args), flow_graph=self.flow_graph)
63
+
64
+ # Collect function sources and build representations
65
+ function_sources = []
66
+ args_representations = []
67
+ kwargs_representations = []
68
+
69
+ # Process positional arguments
70
+ for arg in args:
71
+ if callable(arg) and not isinstance(arg, type):
72
+ # Try to get function source
73
+ try:
74
+ source, is_module_level = _get_function_source(arg)
75
+ if source and hasattr(arg, '__name__') and arg.__name__ != '<lambda>':
76
+ function_sources.append(source)
77
+ # Use the function name in the representation
78
+ args_representations.append(arg.__name__)
79
+ else:
80
+ # Fallback to repr if we can't get the source
81
+ args_representations.append(repr(arg))
82
+ except:
83
+ args_representations.append(repr(arg))
84
+ else:
85
+ args_representations.append(repr(arg))
86
+ # Process keyword arguments
87
+ for key, value in kwargs.items():
88
+ if callable(value) and not isinstance(value, type):
89
+ # Try to get function source
90
+ try:
91
+ source, is_module_level = _get_function_source(value)
92
+ if source and hasattr(value, '__name__') and value.__name__ != '<lambda>':
93
+ function_sources.append(source)
94
+ kwargs_representations.append(f"{key}={value.__name__}")
95
+ else:
96
+ kwargs_representations.append(f"{key}={repr(value)}")
97
+ except:
98
+ kwargs_representations.append(f"{key}={repr(value)}")
99
+ else:
100
+ kwargs_representations.append(f"{key}={repr(value)}")
101
+
102
+ # Build parameter string
103
+ args_str = ", ".join(args_representations)
104
+ kwargs_str = ", ".join(kwargs_representations)
105
+
106
+ if args_str and kwargs_str:
107
+ params_str = f"{args_str}, {kwargs_str}"
108
+ elif args_str:
109
+ params_str = args_str
110
+ elif kwargs_str:
111
+ params_str = kwargs_str
112
+ else:
113
+ params_str = ""
114
+
115
+ # Build the code
116
+ operation_code = f"input_df.{method_name}({params_str})"
117
+
118
+ if function_sources:
119
+ unique_sources = []
120
+ seen = set()
121
+ for source in function_sources:
122
+ if source not in seen:
123
+ seen.add(source)
124
+ unique_sources.append(source)
125
+
126
+ functions_section = "# Function definitions\n" + "\n\n".join(unique_sources)
127
+ code = functions_section + "\n#─────SPLIT─────\n\noutput_df = " + operation_code
128
+ else:
129
+ code = "output_df = " + operation_code
130
+
131
+ # Use provided description or generate a default one
132
+ if description is None:
133
+ description = f"{method_name.replace('_', ' ').title()} operation"
134
+
135
+ self._add_polars_code(new_node_id, code, description)
136
+
137
+ if returns_lazyframe:
138
+ # Return a new FlowFrame with the result
139
+ return self._create_child_frame(new_node_id)
140
+ else:
141
+ # For methods that don't return a LazyFrame, return the result directly
142
+ return getattr(self.data, method_name)(*args, **kwargs)
143
+
144
+ return wrapper
145
+
146
+
147
+ def add_lazyframe_methods(cls):
148
+ """
149
+ Class decorator that adds all LazyFrame methods to a class.
150
+
151
+ This adds the methods at class creation time, so they are visible to static type checkers.
152
+ Methods already defined in the class are not overwritten.
153
+
154
+ Parameters
155
+ ----------
156
+ cls : Type
157
+ The class to which the methods will be added.
158
+
159
+ Returns
160
+ -------
161
+ Type
162
+ The modified class.
163
+ """
164
+ # Get methods already defined in the class (including inherited methods)
165
+ existing_methods = set(dir(cls))
166
+
167
+ # Skip properties and private methods
168
+ skip_methods = {
169
+ name for name in dir(pl.LazyFrame)
170
+ if name.startswith('_') or isinstance(getattr(pl.LazyFrame, name), property)
171
+ }
172
+
173
+ # Add all public LazyFrame methods that don't already exist
174
+ for name in dir(pl.LazyFrame):
175
+ if name in existing_methods or name in skip_methods:
176
+ continue
177
+ attr = getattr(pl.LazyFrame, name)
178
+ if name in PASSTHROUGH_METHODS:
179
+ def create_passthrough_method(method_name, method_attr):
180
+
181
+ @wraps(method_attr)
182
+ def passthrough_method(self, *args, **kwargs):
183
+ return getattr(self.data, method_name)(*args, **kwargs)
184
+
185
+ return passthrough_method
186
+
187
+ setattr(cls, name, create_passthrough_method(name, attr))
188
+
189
+ else:
190
+ attr = getattr(pl.LazyFrame, name)
191
+ if callable(attr):
192
+ wrapped_method = create_lazyframe_method_wrapper(name, attr)
193
+ setattr(cls, name, wrapped_method)
194
+
195
+ overlap = {
196
+ name for name in existing_methods
197
+ if name in dir(pl.LazyFrame) and not name.startswith('_') and callable(getattr(pl.LazyFrame, name))
198
+ }
199
+ if overlap:
200
+ logger.debug(f"Preserved existing methods in {cls.__name__}: {', '.join(sorted(overlap))}")
201
+ return cls
@@ -0,0 +1,324 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Optional, Union, TYPE_CHECKING, List, TypeVar, Callable, Sequence, Literal
4
+
5
+ import polars as pl
6
+
7
+
8
+ # --- TYPE CHECKING IMPORTS ---
9
+ if TYPE_CHECKING:
10
+ from flowfile_frame.expr import Expr, _get_expr_and_repr, col, lit
11
+ from polars._typing import IntoExprColumn, NullBehavior, ListToStructWidthStrategy
12
+ from datetime import date, datetime, time
13
+
14
+
15
+ class ExprListNameSpace:
16
+ """Namespace for list related expressions."""
17
+
18
+ def __init__(self, parent_expr: 'Expr', parent_repr_str: str):
19
+ self.parent = parent_expr
20
+ self.expr = parent_expr.expr.list if parent_expr.expr is not None else None
21
+ self.parent_repr_str = parent_repr_str
22
+
23
+ def _create_next_expr(self, *args, method_name: str, result_expr: Optional[pl.Expr], is_complex: bool = True, **kwargs) -> 'Expr':
24
+ from flowfile_frame.expr import Expr
25
+ """Creates a new Expr instance, appending method call to repr string."""
26
+ args_repr = ", ".join(repr(a) for a in args)
27
+ kwargs_repr = ", ".join(f"{k}={repr(v)}" for k, v in kwargs.items())
28
+
29
+ if args_repr and kwargs_repr:
30
+ args_str = f"{args_repr}, {kwargs_repr}"
31
+ elif args_repr:
32
+ args_str = args_repr
33
+ elif kwargs_repr:
34
+ args_str = kwargs_repr
35
+ else:
36
+ args_str = ""
37
+
38
+ new_repr = f"{self.parent_repr_str}.list.{method_name}({args_str})"
39
+
40
+ # Create new instance, inheriting current agg_func status by default
41
+ new_expr_instance = Expr(
42
+ result_expr,
43
+ self.parent.column_name,
44
+ repr_str=new_repr,
45
+ initial_column_name=self.parent._initial_column_name,
46
+ selector=None,
47
+ agg_func=self.parent.agg_func,
48
+ is_complex=is_complex,
49
+ convertable_to_code=self.parent.convertable_to_code
50
+ )
51
+ return new_expr_instance
52
+
53
+ def all(self) -> Expr:
54
+ res_expr = self.expr.all() if self.expr is not None else None
55
+ return self._create_next_expr(method_name="all", result_expr=res_expr)
56
+
57
+ def any(self) -> Expr:
58
+ res_expr = self.expr.any() if self.expr is not None else None
59
+ return self._create_next_expr(method_name="any", result_expr=res_expr)
60
+
61
+ def len(self) -> Expr:
62
+ res_expr = self.expr.len() if self.expr is not None else None
63
+ return self._create_next_expr(method_name="len", result_expr=res_expr)
64
+
65
+ def drop_nulls(self) -> Expr:
66
+ res_expr = self.expr.drop_nulls() if self.expr is not None else None
67
+ return self._create_next_expr(method_name="drop_nulls", result_expr=res_expr)
68
+
69
+ def sample(
70
+ self,
71
+ n: int | IntoExprColumn | None = None,
72
+ *,
73
+ fraction: float | IntoExprColumn | None = None,
74
+ with_replacement: bool = False,
75
+ shuffle: bool = False,
76
+ seed: int | None = None,
77
+ ) -> Expr:
78
+ if n is not None and fraction is not None:
79
+ raise ValueError("cannot specify both `n` and `fraction`")
80
+
81
+ res_expr = None
82
+ if self.expr is not None:
83
+ try:
84
+ if fraction is not None:
85
+ expr_fraction = fraction.expr if hasattr(fraction, 'expr') else fraction
86
+ res_expr = self.expr.sample(n=None, fraction=expr_fraction,
87
+ with_replacement=with_replacement,
88
+ shuffle=shuffle, seed=seed)
89
+ else:
90
+ expr_n = n.expr if hasattr(n, 'expr') else (1 if n is None else n)
91
+ res_expr = self.expr.sample(n=expr_n, fraction=None,
92
+ with_replacement=with_replacement,
93
+ shuffle=shuffle, seed=seed)
94
+ except Exception as e:
95
+ print(f"Warning: Could not create polars expression for list.sample(): {e}")
96
+
97
+ return self._create_next_expr(
98
+ n if n is not None else None,
99
+ method_name="sample",
100
+ result_expr=res_expr,
101
+ fraction=fraction,
102
+ with_replacement=with_replacement,
103
+ shuffle=shuffle,
104
+ seed=seed
105
+ )
106
+
107
+ def sum(self) -> Expr:
108
+ res_expr = self.expr.sum() if self.expr is not None else None
109
+ return self._create_next_expr(method_name="sum", result_expr=res_expr)
110
+
111
+ def max(self) -> Expr:
112
+ res_expr = self.expr.max() if self.expr is not None else None
113
+ return self._create_next_expr(method_name="max", result_expr=res_expr)
114
+
115
+ def min(self) -> Expr:
116
+ res_expr = self.expr.min() if self.expr is not None else None
117
+ return self._create_next_expr(method_name="min", result_expr=res_expr)
118
+
119
+ def mean(self) -> Expr:
120
+ res_expr = self.expr.mean() if self.expr is not None else None
121
+ return self._create_next_expr(method_name="mean", result_expr=res_expr)
122
+
123
+ def median(self) -> Expr:
124
+ res_expr = self.expr.median() if self.expr is not None else None
125
+ return self._create_next_expr(method_name="median", result_expr=res_expr)
126
+
127
+ def std(self, ddof: int = 1) -> Expr:
128
+ res_expr = self.expr.std(ddof=ddof) if self.expr is not None else None
129
+ return self._create_next_expr(method_name="std", result_expr=res_expr, ddof=ddof)
130
+
131
+ def var(self, ddof: int = 1) -> Expr:
132
+ res_expr = self.expr.var(ddof=ddof) if self.expr is not None else None
133
+ return self._create_next_expr(method_name="var", result_expr=res_expr, ddof=ddof)
134
+
135
+ def sort(self, *, descending: bool = False, nulls_last: bool = False) -> Expr:
136
+ res_expr = self.expr.sort(descending=descending, nulls_last=nulls_last) if self.expr is not None else None
137
+ return self._create_next_expr(method_name="sort", result_expr=res_expr, descending=descending, nulls_last=nulls_last)
138
+
139
+ def reverse(self) -> Expr:
140
+ res_expr = self.expr.reverse() if self.expr is not None else None
141
+ return self._create_next_expr(method_name="reverse", result_expr=res_expr)
142
+
143
+ def unique(self, *, maintain_order: bool = False) -> Expr:
144
+ res_expr = self.expr.unique(maintain_order=maintain_order) if self.expr is not None else None
145
+ return self._create_next_expr(method_name="unique", result_expr=res_expr, maintain_order=maintain_order)
146
+
147
+ def n_unique(self) -> Expr:
148
+ res_expr = self.expr.n_unique() if self.expr is not None else None
149
+ return self._create_next_expr(method_name="n_unique", result_expr=res_expr)
150
+
151
+ def concat(self, other: list[Expr | str] | Expr | str | pl.Series | list[Any]) -> Expr:
152
+ res_expr = None
153
+ other_expr = None
154
+
155
+ # Handle different types of 'other'
156
+ if isinstance(other, (Expr, str)):
157
+ if isinstance(other, Expr):
158
+ other_expr = other.expr
159
+ else:
160
+ other_expr = pl.col(other)
161
+ elif isinstance(other, pl.Series):
162
+ other_expr = pl.lit(other)
163
+ elif isinstance(other, list):
164
+ if len(other) > 0 and isinstance(other[0], (Expr, str, pl.Series)):
165
+ # List of expressions
166
+ other_expr = [o.expr if hasattr(o, 'expr') else (pl.col(o) if isinstance(o, str) else o) for o in other]
167
+ else:
168
+ # List of values
169
+ other_expr = pl.lit(other)
170
+
171
+ # Create the polars expression if possible
172
+ if self.expr is not None and other_expr is not None:
173
+ try:
174
+ if isinstance(other_expr, list):
175
+ # Insert self.expr at the beginning
176
+ all_exprs = [self.parent.expr] + other_expr
177
+ res_expr = pl.concat_list(all_exprs)
178
+ else:
179
+ res_expr = self.expr.concat(other_expr)
180
+ except Exception as e:
181
+ print(f"Warning: Could not create polars expression for list.concat(): {e}")
182
+
183
+ return self._create_next_expr(other, method_name="concat", result_expr=res_expr)
184
+
185
+ def get(self, index: int | Expr | str, *, null_on_oob: bool = False) -> Expr:
186
+ index_expr = index.expr if hasattr(index, 'expr') else index
187
+ res_expr = self.expr.get(index_expr, null_on_oob=null_on_oob) if self.expr is not None else None
188
+ return self._create_next_expr(index, method_name="get", result_expr=res_expr, null_on_oob=null_on_oob)
189
+
190
+ def gather(self, indices: Expr | pl.Series | list[int] | list[list[int]], *, null_on_oob: bool = False) -> Expr:
191
+ indices_expr = indices
192
+ if isinstance(indices, list):
193
+ indices_expr = pl.Series(indices)
194
+ elif hasattr(indices, 'expr'):
195
+ indices_expr = indices.expr
196
+
197
+ res_expr = self.expr.gather(indices_expr, null_on_oob=null_on_oob) if self.expr is not None else None
198
+ return self._create_next_expr(indices, method_name="gather", result_expr=res_expr, null_on_oob=null_on_oob)
199
+
200
+ def gather_every(self, n: int | IntoExprColumn, offset: int | IntoExprColumn = 0) -> Expr:
201
+ n_expr = n.expr if hasattr(n, 'expr') else n
202
+ offset_expr = offset.expr if hasattr(offset, 'expr') else offset
203
+
204
+ res_expr = self.expr.gather_every(n_expr, offset_expr) if self.expr is not None else None
205
+ return self._create_next_expr(n, method_name="gather_every", result_expr=res_expr, offset=offset)
206
+
207
+ def first(self) -> Expr:
208
+ res_expr = self.expr.first() if self.expr is not None else None
209
+ return self._create_next_expr(method_name="first", result_expr=res_expr)
210
+
211
+ def last(self) -> Expr:
212
+ res_expr = self.expr.last() if self.expr is not None else None
213
+ return self._create_next_expr(method_name="last", result_expr=res_expr)
214
+
215
+ def contains(self, item: float | str | bool | int | date | datetime | time | IntoExprColumn) -> Expr:
216
+ item_expr = item.expr if hasattr(item, 'expr') else item
217
+ res_expr = self.expr.contains(item_expr) if self.expr is not None else None
218
+ return self._create_next_expr(item, method_name="contains", result_expr=res_expr)
219
+
220
+ def join(self, separator: IntoExprColumn, *, ignore_nulls: bool = True) -> Expr:
221
+ separator_expr = separator.expr if hasattr(separator, 'expr') else separator
222
+ res_expr = self.expr.join(separator_expr, ignore_nulls=ignore_nulls) if self.expr is not None else None
223
+ return self._create_next_expr(separator, method_name="join", result_expr=res_expr, ignore_nulls=ignore_nulls)
224
+
225
+ def arg_min(self) -> Expr:
226
+ res_expr = self.expr.arg_min() if self.expr is not None else None
227
+ return self._create_next_expr(method_name="arg_min", result_expr=res_expr)
228
+
229
+ def arg_max(self) -> Expr:
230
+ res_expr = self.expr.arg_max() if self.expr is not None else None
231
+ return self._create_next_expr(method_name="arg_max", result_expr=res_expr)
232
+
233
+ def diff(self, n: int = 1, null_behavior: NullBehavior = "ignore") -> Expr:
234
+ res_expr = self.expr.diff(n, null_behavior) if self.expr is not None else None
235
+ return self._create_next_expr(method_name="diff", result_expr=res_expr, n=n, null_behavior=null_behavior)
236
+
237
+ def shift(self, n: int | IntoExprColumn = 1) -> Expr:
238
+ n_expr = n.expr if hasattr(n, 'expr') else n
239
+ res_expr = self.expr.shift(n_expr) if self.expr is not None else None
240
+ return self._create_next_expr(n, method_name="shift", result_expr=res_expr)
241
+
242
+ def slice(self, offset: int | str | Expr, length: int | str | Expr | None = None) -> Expr:
243
+ offset_expr = offset.expr if hasattr(offset, 'expr') else offset
244
+ length_expr = length.expr if hasattr(length, 'expr') and length is not None else length
245
+
246
+ res_expr = self.expr.slice(offset_expr, length_expr) if self.expr is not None else None
247
+ return self._create_next_expr(offset, length, method_name="slice", result_expr=res_expr)
248
+
249
+ def head(self, n: int | str | Expr = 5) -> Expr:
250
+ n_expr = n.expr if hasattr(n, 'expr') else n
251
+ res_expr = self.expr.head(n_expr) if self.expr is not None else None
252
+ return self._create_next_expr(n, method_name="head", result_expr=res_expr)
253
+
254
+ def tail(self, n: int | str | Expr = 5) -> Expr:
255
+ n_expr = n.expr if hasattr(n, 'expr') else n
256
+ res_expr = self.expr.tail(n_expr) if self.expr is not None else None
257
+ return self._create_next_expr(n, method_name="tail", result_expr=res_expr)
258
+
259
+ def explode(self) -> Expr:
260
+ res_expr = self.expr.explode() if self.expr is not None else None
261
+ return self._create_next_expr(method_name="explode", result_expr=res_expr)
262
+
263
+ def count_matches(self, element: Any) -> Expr:
264
+ element_expr = element.expr if hasattr(element, 'expr') else element
265
+ res_expr = self.expr.count_matches(element_expr) if self.expr is not None else None
266
+ return self._create_next_expr(element, method_name="count_matches", result_expr=res_expr)
267
+
268
+ def to_array(self, width: int) -> Expr:
269
+ res_expr = self.expr.to_array(width) if self.expr is not None else None
270
+ return self._create_next_expr(width, method_name="to_array", result_expr=res_expr)
271
+
272
+ def to_struct(
273
+ self,
274
+ n_field_strategy: ListToStructWidthStrategy = "first_non_null",
275
+ fields: Sequence[str] | Callable[[int], str] | None = None,
276
+ upper_bound: int = 0,
277
+ ) -> Expr:
278
+ res_expr = None
279
+
280
+ if self.expr is not None:
281
+ try:
282
+ if isinstance(fields, Sequence):
283
+ res_expr = self.expr.to_struct(fields=fields)
284
+ else:
285
+ res_expr = self.expr.to_struct(
286
+ n_field_strategy=n_field_strategy,
287
+ fields=fields,
288
+ upper_bound=upper_bound
289
+ )
290
+ except Exception as e:
291
+ print(f"Warning: Could not create polars expression for list.to_struct(): {e}")
292
+
293
+ return self._create_next_expr(
294
+ method_name="to_struct",
295
+ result_expr=res_expr,
296
+ n_field_strategy=n_field_strategy,
297
+ fields=fields,
298
+ upper_bound=upper_bound,
299
+ )
300
+
301
+ def eval(self, expr: Expr, *, parallel: bool = False) -> Expr:
302
+ expr_inner = expr.expr if hasattr(expr, 'expr') else expr
303
+ res_expr = self.expr.eval(expr_inner, parallel=parallel) if self.expr is not None else None
304
+ return self._create_next_expr(expr, method_name="eval", result_expr=res_expr, parallel=parallel)
305
+
306
+ def set_union(self, other: Any) -> Expr:
307
+ other_expr = other.expr if hasattr(other, 'expr') else other
308
+ res_expr = self.expr.set_union(other_expr) if self.expr is not None else None
309
+ return self._create_next_expr(other, method_name="set_union", result_expr=res_expr)
310
+
311
+ def set_difference(self, other: Any) -> Expr:
312
+ other_expr = other.expr if hasattr(other, 'expr') else other
313
+ res_expr = self.expr.set_difference(other_expr) if self.expr is not None else None
314
+ return self._create_next_expr(other, method_name="set_difference", result_expr=res_expr)
315
+
316
+ def set_intersection(self, other: Any) -> Expr:
317
+ other_expr = other.expr if hasattr(other, 'expr') else other
318
+ res_expr = self.expr.set_intersection(other_expr) if self.expr is not None else None
319
+ return self._create_next_expr(other, method_name="set_intersection", result_expr=res_expr)
320
+
321
+ def set_symmetric_difference(self, other: Any) -> Expr:
322
+ other_expr = other.expr if hasattr(other, 'expr') else other
323
+ res_expr = self.expr.set_symmetric_difference(other_expr) if self.expr is not None else None
324
+ return self._create_next_expr(other, method_name="set_symmetric_difference", result_expr=res_expr)
@@ -51,6 +51,9 @@ class Selector:
51
51
  # Expr init will handle creating the 'pl.sum(selector)' repr
52
52
  return Expr(expr=None, selector=self, agg_func="sum")
53
53
 
54
+ def expr(self):
55
+ return eval(self.repr_str)
56
+
54
57
  def mean(self) -> 'Expr':
55
58
  """Create an expression to average columns selected by this selector."""
56
59
  return Expr(expr=None, selector=self, agg_func="mean")
@@ -0,0 +1,70 @@
1
+ from __future__ import annotations
2
+ import polars as pl
3
+ from typing import Any, Optional, Union, List
4
+
5
+
6
+ class Series:
7
+ """
8
+ A wrapper around polars.Series that represents itself as the code to create it.
9
+ """
10
+
11
+ def __init__(
12
+ self,
13
+ name: str | list | pl.Series | None = None,
14
+ values: list | None = None,
15
+ dtype: Any = None,
16
+ **kwargs # Ignored parameters
17
+ ):
18
+ """
19
+ Initialize a FlowSeries with the same API as pl.Series.
20
+ """
21
+ # Store the original arguments for proper representation
22
+ self._name = name
23
+ self._values = values
24
+ self._dtype = dtype
25
+
26
+ # Handle the different initialization forms
27
+ if isinstance(name, pl.Series):
28
+ self._s = name
29
+ # Update our attributes to match the series
30
+ self._name = name.name
31
+ self._values = name.to_list()
32
+ self._dtype = name.dtype
33
+ elif isinstance(name, (list, tuple)) and values is None:
34
+ self._s = pl.Series(values=name, dtype=dtype)
35
+ self._name = "" # Default name is empty string
36
+ self._values = name
37
+ else:
38
+ self._s = pl.Series(name=name, values=values, dtype=dtype)
39
+
40
+ def __repr__(self) -> str:
41
+ """
42
+ Return a string that looks like the code to create this Series.
43
+ Example: pl.Series("c", [1, 2, 3])
44
+ """
45
+ # Format name
46
+ if self._name:
47
+ name_str = f'"{self._name}"'
48
+ else:
49
+ name_str = '""'
50
+
51
+ # Format values
52
+ if self._values is None:
53
+ values_str = "[]"
54
+ elif len(self._values) <= 10:
55
+ values_str = str(self._values)
56
+ else:
57
+ # Show first few elements for long lists
58
+ sample = self._values[:3]
59
+ values_str = f"[{', '.join(map(str, sample))}, ...]"
60
+
61
+ # Format dtype if provided
62
+ dtype_str = ""
63
+ if self._dtype is not None:
64
+ dtype_str = f", dtype={self._dtype}"
65
+
66
+ return f"pl.Series({name_str}, {values_str}{dtype_str})"
67
+
68
+ def __str__(self) -> str:
69
+ """Same as __repr__."""
70
+ return self.__repr__()