Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (145) hide show
  1. flowfile/__init__.py +27 -6
  2. flowfile/api.py +1 -0
  3. flowfile/web/__init__.py +2 -2
  4. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
  5. flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
  6. flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
  7. flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
  8. flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
  9. flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
  10. flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
  11. flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
  12. flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
  14. flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
  15. flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
  16. flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
  17. flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
  18. flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
  19. flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
  20. flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
  21. flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
  22. flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
  23. flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
  24. flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
  25. flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
  26. flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
  27. flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
  28. flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
  29. flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
  30. flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
  31. flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
  32. flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
  33. flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
  34. flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
  35. flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
  36. flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
  37. flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
  38. flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
  39. flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
  40. flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
  41. flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
  42. flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
  43. flowfile/web/static/assets/api-6ef0dcef.js +80 -0
  44. flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
  45. flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
  46. flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
  47. flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
  48. flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
  49. flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
  50. flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
  51. flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
  52. flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
  53. flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
  54. flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
  55. flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
  56. flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
  57. flowfile/web/static/index.html +1 -1
  58. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
  59. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
  60. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
  61. flowfile_core/__init__.py +3 -0
  62. flowfile_core/auth/jwt.py +39 -0
  63. flowfile_core/configs/node_store/nodes.py +9 -6
  64. flowfile_core/configs/settings.py +6 -5
  65. flowfile_core/database/connection.py +63 -15
  66. flowfile_core/database/init_db.py +0 -1
  67. flowfile_core/database/models.py +49 -2
  68. flowfile_core/flowfile/code_generator/code_generator.py +472 -17
  69. flowfile_core/flowfile/connection_manager/models.py +1 -1
  70. flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
  71. flowfile_core/flowfile/extensions.py +1 -1
  72. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
  73. flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
  74. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
  75. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
  76. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
  77. flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
  78. flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
  79. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
  80. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
  81. flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
  82. flowfile_core/flowfile/flow_graph.py +718 -253
  83. flowfile_core/flowfile/flow_graph_utils.py +2 -2
  84. flowfile_core/flowfile/flow_node/flow_node.py +563 -117
  85. flowfile_core/flowfile/flow_node/models.py +154 -20
  86. flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
  87. flowfile_core/flowfile/handler.py +2 -33
  88. flowfile_core/flowfile/manage/open_flowfile.py +1 -2
  89. flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
  90. flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
  91. flowfile_core/flowfile/util/calculate_layout.py +0 -2
  92. flowfile_core/flowfile/utils.py +35 -26
  93. flowfile_core/main.py +35 -15
  94. flowfile_core/routes/cloud_connections.py +77 -0
  95. flowfile_core/routes/logs.py +2 -7
  96. flowfile_core/routes/public.py +1 -0
  97. flowfile_core/routes/routes.py +130 -90
  98. flowfile_core/routes/secrets.py +72 -14
  99. flowfile_core/schemas/__init__.py +8 -0
  100. flowfile_core/schemas/cloud_storage_schemas.py +215 -0
  101. flowfile_core/schemas/input_schema.py +121 -71
  102. flowfile_core/schemas/output_model.py +19 -3
  103. flowfile_core/schemas/schemas.py +150 -12
  104. flowfile_core/schemas/transform_schema.py +175 -35
  105. flowfile_core/utils/utils.py +40 -1
  106. flowfile_core/utils/validate_setup.py +41 -0
  107. flowfile_frame/__init__.py +9 -1
  108. flowfile_frame/cloud_storage/frame_helpers.py +39 -0
  109. flowfile_frame/cloud_storage/secret_manager.py +73 -0
  110. flowfile_frame/expr.py +28 -1
  111. flowfile_frame/expr.pyi +76 -61
  112. flowfile_frame/flow_frame.py +481 -208
  113. flowfile_frame/flow_frame.pyi +140 -91
  114. flowfile_frame/flow_frame_methods.py +160 -22
  115. flowfile_frame/group_frame.py +3 -0
  116. flowfile_frame/utils.py +25 -3
  117. flowfile_worker/external_sources/s3_source/main.py +216 -0
  118. flowfile_worker/external_sources/s3_source/models.py +142 -0
  119. flowfile_worker/funcs.py +51 -6
  120. flowfile_worker/models.py +22 -2
  121. flowfile_worker/routes.py +40 -38
  122. flowfile_worker/utils.py +1 -1
  123. test_utils/s3/commands.py +46 -0
  124. test_utils/s3/data_generator.py +292 -0
  125. test_utils/s3/demo_data_generator.py +186 -0
  126. test_utils/s3/fixtures.py +214 -0
  127. flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
  128. flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
  129. flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
  130. flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
  131. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
  132. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
  133. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
  134. flowfile_core/schemas/defaults.py +0 -9
  135. flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
  136. flowfile_core/schemas/models.py +0 -193
  137. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
  138. flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
  139. flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
  140. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  141. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
  142. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
  143. {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
  144. {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
  145. {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
@@ -5,16 +5,22 @@ import os
5
5
  import sys
6
6
  import typing
7
7
  from io import IOBase
8
- from typing import List, Optional, ForwardRef
8
+ from typing import List, Optional, ForwardRef, TypeVar, Any, Iterable, Sequence, Mapping, Collection, Callable, Literal, IO, Union
9
+ from datetime import timedelta
10
+ from pathlib import Path
9
11
  from collections.abc import Awaitable
10
12
 
11
13
  # Third-party imports
12
14
  import polars as pl
13
- from polars._typing import * # Consider specifying needed imports
15
+ from polars._typing import *
16
+ from polars._typing import ParquetMetadata, PlanStage
14
17
  from polars._utils.async_ import _GeventDataFrameResult
15
18
  from polars.dependencies import polars_cloud as pc
16
19
  from polars.io.cloud import CredentialProviderFunction
17
20
  from polars.lazyframe.frame import LazyGroupBy
21
+ from polars import LazyFrame, DataFrame, QueryOptFlags
22
+ from polars.io.parquet import ParquetFieldOverwrites
23
+ from polars.lazyframe.opt_flags import DEFAULT_QUERY_OPT_FLAGS
18
24
  from polars.type_aliases import (Schema, IntoExpr, ClosedInterval, Label, StartBy, RollingInterpolationMethod, IpcCompression, CompatLevel, SyncOnCloseMethod, ExplainFormat, EngineType, SerializationFormat, AsofJoinStrategy)
19
25
 
20
26
  # Local application/library specific imports
@@ -23,10 +29,11 @@ from flowfile_core.flowfile.flow_graph import FlowGraph
23
29
  from flowfile_core.flowfile.flow_node.flow_node import FlowNode
24
30
  from flowfile_frame import group_frame
25
31
  from flowfile_frame.expr import Expr
32
+ from flowfile_core.schemas import transform_schema
26
33
 
27
34
  # Conditional imports
28
35
  if sys.version_info >= (3, 10):
29
- pass # from typing import ParamSpec, Concatenate (if needed)
36
+ from typing import Concatenate
30
37
  else:
31
38
  from typing_extensions import Concatenate
32
39
 
@@ -64,8 +71,8 @@ class FlowFrame:
64
71
 
65
72
  def __gt__(self, other: Any) -> typing.NoReturn: ...
66
73
 
67
- # Initialize the FlowFrame with data and graph references.
68
- def __init__(self, data: typing.Union[LazyFrame, collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable')]=None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType]=None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None=None, strict: bool=True, orient: typing.Optional[typing.Literal['col', 'row']]=None, infer_schema_length: int | None=100, nan_to_null: bool=False, flow_graph=None, node_id=None, parent_node_id=None) -> None: ...
74
+ # The __init__ method is intentionally left empty.
75
+ def __init__(self, *args, **kwargs) -> None: ...
69
76
 
70
77
  def __le__(self, other: Any) -> typing.NoReturn: ...
71
78
 
@@ -73,17 +80,26 @@ class FlowFrame:
73
80
 
74
81
  def __ne__(self, other: object) -> typing.NoReturn: ...
75
82
 
76
- # Create a new FlowFrame instance.
77
- def __new__(cls, data: typing.Union[LazyFrame, collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable')]=None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType]=None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None=None, strict: bool=True, orient: typing.Optional[typing.Literal['col', 'row']]=None, infer_schema_length: int | None=100, nan_to_null: bool=False, flow_graph=None, node_id=None, parent_node_id=None) -> Self: ...
83
+ # Create and configure a new FlowFrame instance, mimicking Polars' flexible constructor.
84
+ def __new__(cls, data: typing.Union[LazyFrame, collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable'), ForwardRef('torch.Tensor')] = None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType] = None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None = None, strict: bool = True, orient: typing.Optional[typing.Literal['col', 'row']] = None, infer_schema_length: int | None = 100, nan_to_null: bool = False, flow_graph: typing.Optional[flowfile_core.flowfile.flow_graph.FlowGraph] = None, node_id: typing.Optional[int] = None, parent_node_id: typing.Optional[int] = None, override_initial: bool = False) -> Self: ...
78
85
 
79
86
  def __repr__(self) -> Any: ...
80
87
 
81
88
  # Helper method to add a connection between nodes
82
- def _add_connection(self, from_id, to_id, input_type: typing.Literal['main', 'left', 'right']='main') -> Any: ...
89
+ def _add_connection(self, from_id, to_id, input_type: typing.Literal['main', 'left', 'right'] = 'main') -> Any: ...
83
90
 
84
- def _add_number_of_records(self, new_node_id: int, description: str=None) -> 'FlowFrame': ...
91
+ # Add a cross join node to the graph.
92
+ def _add_cross_join_node(self, new_node_id: int, join_input: transform_schema.CrossJoinInput, description: str, other: FlowFrame) -> None: ...
85
93
 
86
- def _add_polars_code(self, new_node_id: int, code: str, depending_on_ids: typing.Optional[typing.List[str]]=None, convertable_to_code: bool=True, method_name: str=None, polars_expr: typing.Union[flowfile_frame.expr.Expr, typing.List[flowfile_frame.expr.Expr], NoneType]=None, group_expr: typing.Union[flowfile_frame.expr.Expr, typing.List[flowfile_frame.expr.Expr], NoneType]=None, kwargs_expr: typing.Optional[typing.Dict]=None, group_kwargs: typing.Optional[typing.Dict]=None, description: str=None) -> Any: ...
94
+ def _add_number_of_records(self, new_node_id: int, description: str = None) -> 'FlowFrame': ...
95
+
96
+ def _add_polars_code(self, new_node_id: int, code: str, depending_on_ids: typing.Optional[typing.List[str]] = None, convertable_to_code: bool = True, method_name: str = None, polars_expr: typing.Union[flowfile_frame.expr.Expr, typing.List[flowfile_frame.expr.Expr], NoneType] = None, group_expr: typing.Union[flowfile_frame.expr.Expr, typing.List[flowfile_frame.expr.Expr], NoneType] = None, kwargs_expr: typing.Optional[typing.Dict] = None, group_kwargs: typing.Optional[typing.Dict] = None, description: str = None) -> Any: ...
97
+
98
+ # Add a regular join node to the graph.
99
+ def _add_regular_join_node(self, new_node_id: int, join_input: transform_schema.JoinInput, description: str, other: FlowFrame) -> None: ...
100
+
101
+ # Build kwargs dictionary for Polars join code.
102
+ def _build_polars_join_kwargs(self, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_columns: typing.Optional[typing.List[str]], right_columns: typing.Optional[typing.List[str]], how: str, suffix: str, validate: str, nulls_equal: bool, coalesce: bool, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left']) -> dict: ...
87
103
 
88
104
  def _comparison_error(self, operator: str) -> typing.NoReturn: ...
89
105
 
@@ -91,35 +107,53 @@ class FlowFrame:
91
107
  def _create_child_frame(self, new_node_id) -> 'FlowFrame': ...
92
108
 
93
109
  # Detect if the expression is a cum_count operation and use record_id if possible.
94
- def _detect_cum_count_record_id(self, expr: Any, new_node_id: int, description: typing.Optional[str]=None) -> 'FlowFrame': ...
110
+ def _detect_cum_count_record_id(self, expr: Any, new_node_id: int, description: typing.Optional[str] = None) -> 'FlowFrame': ...
111
+
112
+ # Ensure both FlowFrames are in the same graph, combining if necessary.
113
+ def _ensure_same_graph(self, other: FlowFrame) -> None: ...
114
+
115
+ # Execute join using native FlowFile join nodes.
116
+ def _execute_native_join(self, other: FlowFrame, new_node_id: int, join_mappings: typing.Optional[typing.List], how: str, description: str) -> 'FlowFrame': ...
117
+
118
+ # Execute join using Polars code approach.
119
+ def _execute_polars_code_join(self, other: FlowFrame, new_node_id: int, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_columns: typing.Optional[typing.List[str]], right_columns: typing.Optional[typing.List[str]], how: str, suffix: str, validate: str, nulls_equal: bool, coalesce: bool, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'], description: str) -> 'FlowFrame': ...
120
+
121
+ # Internal constructor to create a FlowFrame instance that wraps an
122
+ def _from_existing_node(self, data: LazyFrame, flow_graph: FlowGraph, node_id: int, parent_node_id: typing.Optional[int] = None) -> 'FlowFrame': ...
95
123
 
96
124
  # Generates the `input_df.sort(...)` Polars code string using pure expression strings.
97
125
  def _generate_sort_polars_code(self, pure_sort_expr_strs: typing.List[str], descending_values: typing.List[bool], nulls_last_values: typing.List[bool], multithreaded: bool, maintain_order: bool) -> str: ...
98
126
 
99
- def _with_flowfile_formula(self, flowfile_formula: str, output_column_name, description: str=None) -> 'FlowFrame': ...
127
+ # Parse and validate join column specifications.
128
+ def _parse_join_columns(self, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column], how: str) -> tuple[typing.Optional[typing.List[str]], typing.Optional[typing.List[str]]]: ...
129
+
130
+ # Determine if we should use Polars code instead of native join.
131
+ def _should_use_polars_code_for_join(self, maintain_order, coalesce, nulls_equal, validate, suffix) -> bool: ...
132
+
133
+ def _with_flowfile_formula(self, flowfile_formula: str, output_column_name, description: str = None) -> 'FlowFrame': ...
100
134
 
101
135
  # Approximate count of unique values.
102
- def approx_n_unique(self, description: Optional[str] = None) -> LazyFrame: ...
136
+ def approx_n_unique(self, description: Optional[str] = None) -> 'FlowFrame': ...
103
137
 
104
138
  # Return the `k` smallest rows.
105
- def bottom_k(self, k: int, by: IntoExpr | Iterable[IntoExpr], reverse: bool | Sequence[bool]=False, description: Optional[str] = None) -> LazyFrame: ...
139
+ def bottom_k(self, k: int, by: IntoExpr | Iterable[IntoExpr], reverse: bool | Sequence[bool] = False, description: Optional[str] = None) -> 'FlowFrame': ...
106
140
 
107
141
  def cache(self, description: Optional[str] = None) -> 'FlowFrame': ...
108
142
 
109
143
  # Cast LazyFrame column(s) to the specified dtype(s).
110
- def cast(self, dtypes: Mapping[ColumnNameOrSelector | PolarsDataType, PolarsDataType | PythonDataType] | PolarsDataType, strict: bool=True, description: Optional[str] = None) -> LazyFrame: ...
144
+ def cast(self, dtypes: Mapping[ColumnNameOrSelector | PolarsDataType, PolarsDataType | PythonDataType] | PolarsDataType | pl.DataTypeExpr, strict: bool = True, description: Optional[str] = None) -> 'FlowFrame': ...
111
145
 
112
146
  # Create an empty copy of the current LazyFrame, with zero to 'n' rows.
113
- def clear(self, n: int=0, description: Optional[str] = None) -> LazyFrame: ...
147
+ def clear(self, n: int = 0, description: Optional[str] = None) -> 'FlowFrame': ...
114
148
 
115
149
  # Create a copy of this LazyFrame.
116
- def clone(self, description: Optional[str] = None) -> LazyFrame: ...
150
+ def clone(self, description: Optional[str] = None) -> 'FlowFrame': ...
117
151
 
118
152
  # Collect lazy data into memory.
119
- def collect(self, *args, **kwargs) -> Any: ...
153
+ def collect(self, *args, **kwargs) -> DataFrame: ...
120
154
 
121
155
  # Collect DataFrame asynchronously in thread pool.
122
- def collect_async(self, gevent: bool=False, type_coercion: bool=True, _type_check: bool=True, predicate_pushdown: bool=True, projection_pushdown: bool=True, simplify_expression: bool=True, no_optimization: bool=False, slice_pushdown: bool=True, comm_subplan_elim: bool=True, comm_subexpr_elim: bool=True, cluster_with_columns: bool=True, collapse_joins: bool=True, engine: EngineType='auto', _check_order: bool=True) -> Awaitable[DataFrame] | _GeventDataFrameResult[DataFrame]: ...
156
+ def collect_async(self, gevent: bool = False, engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS) -> Awaitable[DataFrame] | _GeventDataFrameResult[DataFrame]: ...
123
157
 
124
158
  # Resolve the schema of this LazyFrame.
125
159
  def collect_schema(self) -> Schema: ...
@@ -129,56 +163,56 @@ class FlowFrame:
129
163
  def columns(self) -> typing.List[str]: ...
130
164
 
131
165
  # Combine multiple FlowFrames into a single FlowFrame.
132
- def concat(self, other: typing.Union[ForwardRef('FlowFrame'), typing.List[ForwardRef('FlowFrame')]], how: str='vertical', rechunk: bool=False, parallel: bool=True, description: str=None) -> 'FlowFrame': ...
166
+ def concat(self, other: typing.Union[ForwardRef('FlowFrame'), typing.List[ForwardRef('FlowFrame')]], how: str = 'vertical', rechunk: bool = False, parallel: bool = True, description: str = None) -> 'FlowFrame': ...
133
167
 
134
168
  # Return the number of non-null elements for each column.
135
- def count(self, description: Optional[str] = None) -> LazyFrame: ...
169
+ def count(self, description: Optional[str] = None) -> 'FlowFrame': ...
136
170
 
137
171
  # Simple naive implementation of creating the frame from any type. It converts the data to a polars frame,
138
- def create_from_any_type(self, data: typing.Union[collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable')]=None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType]=None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None=None, strict: bool=True, orient: typing.Optional[typing.Literal['col', 'row']]=None, infer_schema_length: int | None=100, nan_to_null: bool=False, flow_graph=None, node_id=None, parent_node_id=None, description: Optional[str] = None) -> Any: ...
172
+ def create_from_any_type(self, data: typing.Union[collections.abc.Mapping[str, typing.Union[collections.abc.Sequence[object], collections.abc.Mapping[str, collections.abc.Sequence[object]], ForwardRef('Series')]], collections.abc.Sequence[typing.Any], ForwardRef('np.ndarray[Any, Any]'), ForwardRef('pa.Table'), ForwardRef('pd.DataFrame'), ForwardRef('ArrowArrayExportable'), ForwardRef('ArrowStreamExportable'), ForwardRef('torch.Tensor')] = None, schema: typing.Union[collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]], collections.abc.Sequence[typing.Union[str, tuple[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType'), type[int], type[float], type[bool], type[str], type['date'], type['time'], type['datetime'], type['timedelta'], type[list[typing.Any]], type[tuple[typing.Any, ...]], type[bytes], type[object], type['Decimal'], type[None], NoneType]]]], NoneType] = None, schema_overrides: collections.abc.Mapping[str, typing.Union[ForwardRef('DataTypeClass'), ForwardRef('DataType')]] | None = None, strict: bool = True, orient: typing.Optional[typing.Literal['col', 'row']] = None, infer_schema_length: int | None = 100, nan_to_null: bool = False, flow_graph = None, node_id = None, parent_node_id = None, description: Optional[str] = None) -> Any: ...
139
173
 
140
174
  # Creates a summary of statistics for a LazyFrame, returning a DataFrame.
141
- def describe(self, percentiles: Sequence[float] | float | None=(0.25, 0.5, 0.75), interpolation: RollingInterpolationMethod='nearest') -> DataFrame: ...
175
+ def describe(self, percentiles: Sequence[float] | float | None = ..., interpolation: QuantileMethod = 'nearest') -> DataFrame: ...
142
176
 
143
177
  # Read a logical plan from a file to construct a LazyFrame.
144
- def deserialize(self, source: str | Path | IOBase, format: SerializationFormat='binary', description: Optional[str] = None) -> LazyFrame: ...
178
+ def deserialize(self, source: str | Path | IOBase, format: SerializationFormat = 'binary', description: Optional[str] = None) -> 'FlowFrame': ...
145
179
 
146
180
  # Remove columns from the DataFrame.
147
- def drop(self, *columns, strict: bool=True, description: Optional[str] = None) -> LazyFrame: ...
181
+ def drop(self, *columns, strict: bool = True, description: Optional[str] = None) -> 'FlowFrame': ...
148
182
 
149
183
  # Drop all rows that contain one or more NaN values.
150
- def drop_nans(self, subset: ColumnNameOrSelector | Collection[ColumnNameOrSelector] | None=None, description: Optional[str] = None) -> LazyFrame: ...
184
+ def drop_nans(self, subset: ColumnNameOrSelector | Collection[ColumnNameOrSelector] | None = None, description: Optional[str] = None) -> 'FlowFrame': ...
151
185
 
152
186
  # Drop all rows that contain one or more null values.
153
- def drop_nulls(self, subset: ColumnNameOrSelector | Collection[ColumnNameOrSelector] | None=None, description: Optional[str] = None) -> LazyFrame: ...
187
+ def drop_nulls(self, subset: ColumnNameOrSelector | Collection[ColumnNameOrSelector] | None = None, description: Optional[str] = None) -> 'FlowFrame': ...
154
188
 
155
189
  # Get the column data types.
156
190
  @property
157
191
  def dtypes(self) -> typing.List[pl.classes.DataType]: ...
158
192
 
159
193
  # Create a string representation of the query plan.
160
- def explain(self, format: ExplainFormat='plain', optimized: bool=True, type_coercion: bool=True, _type_check: bool=True, predicate_pushdown: bool=True, projection_pushdown: bool=True, simplify_expression: bool=True, slice_pushdown: bool=True, comm_subplan_elim: bool=True, comm_subexpr_elim: bool=True, cluster_with_columns: bool=True, collapse_joins: bool=True, streaming: bool=False, engine: EngineType='auto', tree_format: bool | None=None, _check_order: bool=True) -> str: ...
194
+ def explain(self, format: ExplainFormat = 'plain', optimized: bool = True, type_coercion: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, collapse_joins: bool = True, streaming: bool = False, engine: EngineType = 'auto', tree_format: bool | None = None, optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS) -> str: ...
161
195
 
162
196
  # Explode the dataframe to long format by exploding the given columns.
163
- def explode(self, columns: typing.Union[str, flowfile_frame.expr.Column, typing.Iterable[str | flowfile_frame.expr.Column]], *more_columns, description: str=None) -> 'FlowFrame': ...
197
+ def explode(self, columns: typing.Union[str, flowfile_frame.expr.Column, typing.Iterable[str | flowfile_frame.expr.Column]], *more_columns, description: str = None) -> 'FlowFrame': ...
164
198
 
165
199
  # Collect a small number of rows for debugging purposes.
166
- def fetch(self, n_rows: int=500, type_coercion: bool=True, _type_check: bool=True, predicate_pushdown: bool=True, projection_pushdown: bool=True, simplify_expression: bool=True, no_optimization: bool=False, slice_pushdown: bool=True, comm_subplan_elim: bool=True, comm_subexpr_elim: bool=True, cluster_with_columns: bool=True, collapse_joins: bool=True) -> DataFrame: ...
200
+ def fetch(self, n_rows: int = 500, type_coercion: bool = True, _type_check: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, no_optimization: bool = False, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, collapse_joins: bool = True) -> DataFrame: ...
167
201
 
168
202
  # Fill floating point NaN values.
169
- def fill_nan(self, value: int | float | Expr | None, description: Optional[str] = None) -> LazyFrame: ...
203
+ def fill_nan(self, value: int | float | Expr | None, description: Optional[str] = None) -> 'FlowFrame': ...
170
204
 
171
205
  # Fill null values using the specified value or strategy.
172
- def fill_null(self, value: Any | Expr | None=None, strategy: FillNullStrategy | None=None, limit: int | None=None, matches_supertype: bool=True, description: Optional[str] = None) -> LazyFrame: ...
206
+ def fill_null(self, value: Any | Expr | None = None, strategy: FillNullStrategy | None = None, limit: int | None = None, matches_supertype: bool = True, description: Optional[str] = None) -> 'FlowFrame': ...
173
207
 
174
208
  # Filter rows based on a predicate.
175
- def filter(self, *predicates, flowfile_formula: typing.Optional[str]=None, description: typing.Optional[str]=None, **constraints) -> 'FlowFrame': ...
209
+ def filter(self, *predicates, flowfile_formula: typing.Optional[str] = None, description: typing.Optional[str] = None, **constraints) -> 'FlowFrame': ...
176
210
 
177
211
  # Get the first row of the DataFrame.
178
- def first(self, description: Optional[str] = None) -> LazyFrame: ...
212
+ def first(self, description: Optional[str] = None) -> 'FlowFrame': ...
179
213
 
180
214
  # Take every nth row in the LazyFrame and return as a new LazyFrame.
181
- def gather_every(self, n: int, offset: int=0, description: Optional[str] = None) -> LazyFrame: ...
215
+ def gather_every(self, n: int, offset: int = 0, description: Optional[str] = None) -> 'FlowFrame': ...
182
216
 
183
217
  def get_node_settings(self, description: Optional[str] = None) -> FlowNode: ...
184
218
 
@@ -186,162 +220,165 @@ class FlowFrame:
186
220
  def group_by(self, *by, description: Optional[str] = None, maintain_order: bool = False, **named_by) -> group_frame.GroupByFrame: ...
187
221
 
188
222
  # Group based on a time value (or index value of type Int32, Int64).
189
- def group_by_dynamic(self, index_column: IntoExpr, every: str | timedelta, period: str | timedelta | None=None, offset: str | timedelta | None=None, include_boundaries: bool=False, closed: ClosedInterval='left', label: Label='left', group_by: IntoExpr | Iterable[IntoExpr] | None=None, start_by: StartBy='window', description: Optional[str] = None) -> LazyGroupBy: ...
223
+ def group_by_dynamic(self, index_column: IntoExpr, every: str | timedelta, period: str | timedelta | None = None, offset: str | timedelta | None = None, include_boundaries: bool = False, closed: ClosedInterval = 'left', label: Label = 'left', group_by: IntoExpr | Iterable[IntoExpr] | None = None, start_by: StartBy = 'window', description: Optional[str] = None) -> LazyGroupBy: ...
190
224
 
191
- def head(self, n: int, description: str=None) -> Any: ...
225
+ def head(self, n: int, description: str = None) -> Any: ...
192
226
 
193
227
  # Inspect a node in the computation graph.
194
- def inspect(self, fmt: str='{}', description: Optional[str] = None) -> LazyFrame: ...
228
+ def inspect(self, fmt: str = '{}', description: Optional[str] = None) -> 'FlowFrame': ...
195
229
 
196
230
  # Interpolate intermediate values. The interpolation method is linear.
197
- def interpolate(self, description: Optional[str] = None) -> LazyFrame: ...
231
+ def interpolate(self, description: Optional[str] = None) -> 'FlowFrame': ...
198
232
 
199
233
  # Add a join operation to the Logical Plan.
200
- def join(self, other, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column]=None, how: str='inner', left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column]=None, right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column]=None, suffix: str='_right', validate: str=None, nulls_equal: bool=False, coalesce: bool=None, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left']=None, description: str=None) -> Any: ...
234
+ def join(self, other, on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, how: str = 'inner', left_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, right_on: typing.Union[typing.List[str | flowfile_frame.expr.Column], str, flowfile_frame.expr.Column] = None, suffix: str = '_right', validate: str = None, nulls_equal: bool = False, coalesce: bool = None, maintain_order: typing.Literal[None, 'left', 'right', 'left_right', 'right_left'] = None, description: str = None) -> Any: ...
201
235
 
202
236
  # Perform an asof join.
203
- def join_asof(self, other: LazyFrame, left_on: str | None | Expr=None, right_on: str | None | Expr=None, on: str | None | Expr=None, by_left: str | Sequence[str] | None=None, by_right: str | Sequence[str] | None=None, by: str | Sequence[str] | None=None, strategy: AsofJoinStrategy='backward', suffix: str='_right', tolerance: str | int | float | timedelta | None=None, allow_parallel: bool=True, force_parallel: bool=False, coalesce: bool=True, allow_exact_matches: bool=True, check_sortedness: bool=True, description: Optional[str] = None) -> LazyFrame: ...
237
+ def join_asof(self, other: FlowFrame, left_on: str | None | Expr = None, right_on: str | None | Expr = None, on: str | None | Expr = None, by_left: str | Sequence[str] | None = None, by_right: str | Sequence[str] | None = None, by: str | Sequence[str] | None = None, strategy: AsofJoinStrategy = 'backward', suffix: str = '_right', tolerance: str | int | float | timedelta | None = None, allow_parallel: bool = True, force_parallel: bool = False, coalesce: bool = True, allow_exact_matches: bool = True, check_sortedness: bool = True, description: Optional[str] = None) -> 'FlowFrame': ...
204
238
 
205
239
  # Perform a join based on one or multiple (in)equality predicates.
206
- def join_where(self, other: LazyFrame, *predicates, suffix: str='_right', description: Optional[str] = None) -> LazyFrame: ...
240
+ def join_where(self, other: FlowFrame, *predicates, suffix: str = '_right', description: Optional[str] = None) -> 'FlowFrame': ...
207
241
 
208
242
  # Get the last row of the DataFrame.
209
- def last(self, description: Optional[str] = None) -> LazyFrame: ...
243
+ def last(self, description: Optional[str] = None) -> 'FlowFrame': ...
210
244
 
211
245
  # Return lazy representation, i.e. itself.
212
- def lazy(self, description: Optional[str] = None) -> LazyFrame: ...
246
+ def lazy(self, description: Optional[str] = None) -> 'FlowFrame': ...
213
247
 
214
- def limit(self, n: int, description: str=None) -> Any: ...
248
+ def limit(self, n: int, description: str = None) -> Any: ...
215
249
 
216
250
  # Apply a custom function.
217
- def map_batches(self, function: Callable[[DataFrame], DataFrame], predicate_pushdown: bool=True, projection_pushdown: bool=True, slice_pushdown: bool=True, no_optimizations: bool=False, schema: None | SchemaDict=None, validate_output_schema: bool=True, streamable: bool=False, description: Optional[str] = None) -> LazyFrame: ...
251
+ def map_batches(self, function: Callable[[DataFrame], DataFrame], predicate_pushdown: bool = True, projection_pushdown: bool = True, slice_pushdown: bool = True, no_optimizations: bool = False, schema: None | SchemaDict = None, validate_output_schema: bool = True, streamable: bool = False, description: Optional[str] = None) -> 'FlowFrame': ...
252
+
253
+ # Match or evolve the schema of a LazyFrame into a specific schema.
254
+ def match_to_schema(self, schema: SchemaDict | Schema, missing_columns: Literal['insert', 'raise'] | Mapping[str, Literal['insert', 'raise'] | Expr] = 'raise', missing_struct_fields: Literal['insert', 'raise'] | Mapping[str, Literal['insert', 'raise']] = 'raise', extra_columns: Literal['ignore', 'raise'] = 'raise', extra_struct_fields: Literal['ignore', 'raise'] | Mapping[str, Literal['ignore', 'raise']] = 'raise', integer_cast: Literal['upcast', 'forbid'] | Mapping[str, Literal['upcast', 'forbid']] = 'forbid', float_cast: Literal['upcast', 'forbid'] | Mapping[str, Literal['upcast', 'forbid']] = 'forbid', description: Optional[str] = None) -> 'FlowFrame': ...
218
255
 
219
256
  # Aggregate the columns in the LazyFrame to their maximum value.
220
- def max(self, description: Optional[str] = None) -> LazyFrame: ...
257
+ def max(self, description: Optional[str] = None) -> 'FlowFrame': ...
221
258
 
222
259
  # Aggregate the columns in the LazyFrame to their mean value.
223
- def mean(self, description: Optional[str] = None) -> LazyFrame: ...
260
+ def mean(self, description: Optional[str] = None) -> 'FlowFrame': ...
224
261
 
225
262
  # Aggregate the columns in the LazyFrame to their median value.
226
- def median(self, description: Optional[str] = None) -> LazyFrame: ...
263
+ def median(self, description: Optional[str] = None) -> 'FlowFrame': ...
227
264
 
228
265
  # Unpivot a DataFrame from wide to long format.
229
- def melt(self, id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None=None, value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None=None, variable_name: str | None=None, value_name: str | None=None, streamable: bool=True, description: Optional[str] = None) -> LazyFrame: ...
266
+ def melt(self, id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, streamable: bool = True, description: Optional[str] = None) -> 'FlowFrame': ...
230
267
 
231
268
  # Take two sorted DataFrames and merge them by the sorted key.
232
- def merge_sorted(self, other: LazyFrame, key: str, description: Optional[str] = None) -> LazyFrame: ...
269
+ def merge_sorted(self, other: FlowFrame, key: str, description: Optional[str] = None) -> 'FlowFrame': ...
233
270
 
234
271
  # Aggregate the columns in the LazyFrame to their minimum value.
235
- def min(self, description: Optional[str] = None) -> LazyFrame: ...
272
+ def min(self, description: Optional[str] = None) -> 'FlowFrame': ...
236
273
 
237
274
  # Aggregate the columns in the LazyFrame as the sum of their null value count.
238
- def null_count(self, description: Optional[str] = None) -> LazyFrame: ...
275
+ def null_count(self, description: Optional[str] = None) -> 'FlowFrame': ...
239
276
 
240
277
  # Offers a structured way to apply a sequence of user-defined functions (UDFs).
241
278
  def pipe(self, function: Callable[Concatenate[LazyFrame, P], T], *args, description: Optional[str] = None, **kwargs) -> T: ...
242
279
 
243
280
  # Pivot a DataFrame from long to wide format.
244
- def pivot(self, on: str | list[str], index: str | list[str] | None=None, values: str | list[str] | None=None, aggregate_function: str | None='first', maintain_order: bool=True, sort_columns: bool=False, separator: str='_', description: str=None) -> 'FlowFrame': ...
281
+ def pivot(self, on: str | list[str], index: str | list[str] | None = None, values: str | list[str] | None = None, aggregate_function: str | None = 'first', maintain_order: bool = True, sort_columns: bool = False, separator: str = '_', description: str = None) -> 'FlowFrame': ...
245
282
 
246
283
  # Profile a LazyFrame.
247
- def profile(self, type_coercion: bool=True, _type_check: bool=True, predicate_pushdown: bool=True, projection_pushdown: bool=True, simplify_expression: bool=True, no_optimization: bool=False, slice_pushdown: bool=True, comm_subplan_elim: bool=True, comm_subexpr_elim: bool=True, cluster_with_columns: bool=True, collapse_joins: bool=True, show_plot: bool=False, truncate_nodes: int=0, figsize: tuple[int, int]=(18, 8), engine: EngineType='auto', _check_order: bool=True, **_kwargs) -> tuple[DataFrame, DataFrame]: ...
284
+ def profile(self, type_coercion: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, no_optimization: bool = False, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, collapse_joins: bool = True, show_plot: bool = False, truncate_nodes: int = 0, figsize: tuple[int, int] = ..., engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, **_kwargs) -> tuple[DataFrame, DataFrame]: ...
248
285
 
249
286
  # Aggregate the columns in the LazyFrame to their quantile value.
250
- def quantile(self, quantile: float | Expr, interpolation: RollingInterpolationMethod='nearest', description: Optional[str] = None) -> LazyFrame: ...
287
+ def quantile(self, quantile: float | Expr, interpolation: QuantileMethod = 'nearest', description: Optional[str] = None) -> 'FlowFrame': ...
251
288
 
252
289
  # Run a query remotely on Polars Cloud.
253
- def remote(self, context: pc.ComputeContext | None=None, plan_type: pc._typing.PlanTypePreference='dot', description: Optional[str] = None) -> pc.LazyFrameExt: ...
290
+ def remote(self, context: pc.ComputeContext | None = None, plan_type: pc._typing.PlanTypePreference = 'dot', description: Optional[str] = None) -> pc.LazyFrameExt: ...
254
291
 
255
292
  # Remove rows, dropping those that match the given predicate expression(s).
256
- def remove(self, *predicates, description: Optional[str] = None, **constraints) -> LazyFrame: ...
293
+ def remove(self, *predicates, description: Optional[str] = None, **constraints) -> 'FlowFrame': ...
257
294
 
258
295
  # Rename column names.
259
- def rename(self, mapping: dict[str, str] | Callable[[str], str], strict: bool=True, description: Optional[str] = None) -> LazyFrame: ...
296
+ def rename(self, mapping: Mapping[str, str] | Callable[[str], str], strict: bool = True, description: Optional[str] = None) -> 'FlowFrame': ...
260
297
 
261
298
  # Reverse the DataFrame.
262
- def reverse(self, description: Optional[str] = None) -> LazyFrame: ...
299
+ def reverse(self, description: Optional[str] = None) -> 'FlowFrame': ...
263
300
 
264
301
  # Create rolling groups based on a temporal or integer column.
265
- def rolling(self, index_column: IntoExpr, period: str | timedelta, offset: str | timedelta | None=None, closed: ClosedInterval='right', group_by: IntoExpr | Iterable[IntoExpr] | None=None, description: Optional[str] = None) -> LazyGroupBy: ...
302
+ def rolling(self, index_column: IntoExpr, period: str | timedelta, offset: str | timedelta | None = None, closed: ClosedInterval = 'right', group_by: IntoExpr | Iterable[IntoExpr] | None = None, description: Optional[str] = None) -> LazyGroupBy: ...
266
303
 
267
304
  # Save the graph
268
- def save_graph(self, file_path: str, auto_arrange: bool=True, description: Optional[str] = None) -> Any: ...
305
+ def save_graph(self, file_path: str, auto_arrange: bool = True, description: Optional[str] = None) -> Any: ...
269
306
 
270
307
  # Get an ordered mapping of column names to their data type.
271
308
  @property
272
309
  def schema(self) -> pl.Schema: ...
273
310
 
274
311
  # Select columns from the frame.
275
- def select(self, *columns, description: typing.Optional[str]=None) -> 'FlowFrame': ...
312
+ def select(self, *columns, description: typing.Optional[str] = None) -> 'FlowFrame': ...
276
313
 
277
314
  # Select columns from this LazyFrame.
278
- def select_seq(self, *exprs, description: Optional[str] = None, **named_exprs) -> LazyFrame: ...
315
+ def select_seq(self, *exprs, description: Optional[str] = None, **named_exprs) -> 'FlowFrame': ...
279
316
 
280
317
  # Serialize the logical plan of this LazyFrame to a file or string in JSON format.
281
- def serialize(self, file: IOBase | str | Path | None=None, format: SerializationFormat='binary', description: Optional[str] = None) -> bytes | str | None: ...
318
+ def serialize(self, file: IOBase | str | Path | None = None, format: SerializationFormat = 'binary', description: Optional[str] = None) -> bytes | str | None: ...
282
319
 
283
320
  # Flag a column as sorted.
284
- def set_sorted(self, column: str, descending: bool=False, description: Optional[str] = None) -> LazyFrame: ...
321
+ def set_sorted(self, column: str, descending: bool = False, description: Optional[str] = None) -> 'FlowFrame': ...
285
322
 
286
323
  # Shift values by the given number of indices.
287
- def shift(self, n: int | IntoExprColumn=1, fill_value: IntoExpr | None=None, description: Optional[str] = None) -> LazyFrame: ...
324
+ def shift(self, n: int | IntoExprColumn = 1, fill_value: IntoExpr | None = None, description: Optional[str] = None) -> 'FlowFrame': ...
288
325
 
289
326
  # Show a plot of the query plan.
290
- def show_graph(self, optimized: bool=True, show: bool=True, output_path: str | Path | None=None, raw_output: bool=False, figsize: tuple[float, float]=(16.0, 12.0), type_coercion: bool=True, _type_check: bool=True, predicate_pushdown: bool=True, projection_pushdown: bool=True, simplify_expression: bool=True, slice_pushdown: bool=True, comm_subplan_elim: bool=True, comm_subexpr_elim: bool=True, cluster_with_columns: bool=True, collapse_joins: bool=True, streaming: bool=False, engine: EngineType='auto', _check_order: bool=True) -> str | None: ...
327
+ def show_graph(self, optimized: bool = True, show: bool = True, output_path: str | Path | None = None, raw_output: bool = False, figsize: tuple[float, float] = ..., type_coercion: bool = True, _type_check: bool = True, predicate_pushdown: bool = True, projection_pushdown: bool = True, simplify_expression: bool = True, slice_pushdown: bool = True, comm_subplan_elim: bool = True, comm_subexpr_elim: bool = True, cluster_with_columns: bool = True, collapse_joins: bool = True, engine: EngineType = 'auto', plan_stage: PlanStage = 'ir', _check_order: bool = True, optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS) -> str | None: ...
291
328
 
292
329
  # Write the data to a CSV file.
293
- def sink_csv(self, file: str, *args, separator: str=',', encoding: str='utf-8', description: str=None) -> 'FlowFrame': ...
330
+ def sink_csv(self, file: str, *args, separator: str = ',', encoding: str = 'utf-8', description: str = None) -> 'FlowFrame': ...
294
331
 
295
332
  # Evaluate the query in streaming mode and write to an IPC file.
296
- def sink_ipc(self, path: str | Path, compression: IpcCompression | None='zstd', compat_level: CompatLevel | None=None, maintain_order: bool=True, type_coercion: bool=True, _type_check: bool=True, predicate_pushdown: bool=True, projection_pushdown: bool=True, simplify_expression: bool=True, slice_pushdown: bool=True, collapse_joins: bool=True, no_optimization: bool=False, storage_options: dict[str, Any] | None=None, credential_provider: CredentialProviderFunction | Literal['auto'] | None='auto', retries: int=2, sync_on_close: SyncOnCloseMethod | None=None, mkdir: bool=False, lazy: bool=False, engine: EngineType='auto', description: Optional[str] = None) -> LazyFrame | None: ...
333
+ def sink_ipc(self, path: str | Path | IO[bytes] | PartitioningScheme, compression: IpcCompression | None = 'uncompressed', compat_level: CompatLevel | None = None, maintain_order: bool = True, storage_options: dict[str, Any] | None = None, credential_provider: CredentialProviderFunction | Literal['auto'] | None = 'auto', retries: int = 2, sync_on_close: SyncOnCloseMethod | None = None, mkdir: bool = False, lazy: bool = False, engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, description: Optional[str] = None) -> LazyFrame | None: ...
297
334
 
298
335
  # Evaluate the query in streaming mode and write to an NDJSON file.
299
- def sink_ndjson(self, path: str | Path, maintain_order: bool=True, type_coercion: bool=True, _type_check: bool=True, predicate_pushdown: bool=True, projection_pushdown: bool=True, simplify_expression: bool=True, slice_pushdown: bool=True, collapse_joins: bool=True, no_optimization: bool=False, storage_options: dict[str, Any] | None=None, credential_provider: CredentialProviderFunction | Literal['auto'] | None='auto', retries: int=2, sync_on_close: SyncOnCloseMethod | None=None, mkdir: bool=False, lazy: bool=False, engine: EngineType='auto', description: Optional[str] = None) -> LazyFrame | None: ...
336
+ def sink_ndjson(self, path: str | Path | IO[bytes] | IO[str] | PartitioningScheme, maintain_order: bool = True, storage_options: dict[str, Any] | None = None, credential_provider: CredentialProviderFunction | Literal['auto'] | None = 'auto', retries: int = 2, sync_on_close: SyncOnCloseMethod | None = None, mkdir: bool = False, lazy: bool = False, engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, description: Optional[str] = None) -> LazyFrame | None: ...
300
337
 
301
338
  # Evaluate the query in streaming mode and write to a Parquet file.
302
- def sink_parquet(self, path: str | Path, compression: str='zstd', compression_level: int | None=None, statistics: bool | str | dict[str, bool]=True, row_group_size: int | None=None, data_page_size: int | None=None, maintain_order: bool=True, type_coercion: bool=True, _type_check: bool=True, predicate_pushdown: bool=True, projection_pushdown: bool=True, simplify_expression: bool=True, slice_pushdown: bool=True, collapse_joins: bool=True, no_optimization: bool=False, storage_options: dict[str, Any] | None=None, credential_provider: CredentialProviderFunction | Literal['auto'] | None='auto', retries: int=2, sync_on_close: SyncOnCloseMethod | None=None, mkdir: bool=False, lazy: bool=False, engine: EngineType='auto', description: Optional[str] = None) -> LazyFrame | None: ...
339
+ def sink_parquet(self, path: str | Path | IO[bytes] | PartitioningScheme, compression: str = 'zstd', compression_level: int | None = None, statistics: bool | str | dict[str, bool] = True, row_group_size: int | None = None, data_page_size: int | None = None, maintain_order: bool = True, storage_options: dict[str, Any] | None = None, credential_provider: CredentialProviderFunction | Literal['auto'] | None = 'auto', retries: int = 2, sync_on_close: SyncOnCloseMethod | None = None, metadata: ParquetMetadata | None = None, mkdir: bool = False, lazy: bool = False, field_overwrites: ParquetFieldOverwrites | Sequence[ParquetFieldOverwrites] | Mapping[str, ParquetFieldOverwrites] | None = None, engine: EngineType = 'auto', optimizations: QueryOptFlags = DEFAULT_QUERY_OPT_FLAGS, description: Optional[str] = None) -> LazyFrame | None: ...
303
340
 
304
341
  # Get a slice of this DataFrame.
305
- def slice(self, offset: int, length: int | None=None, description: Optional[str] = None) -> LazyFrame: ...
342
+ def slice(self, offset: int, length: int | None = None, description: Optional[str] = None) -> 'FlowFrame': ...
306
343
 
307
344
  # Sort the dataframe by the given columns.
308
- def sort(self, by: typing.Union[typing.List[typing.Union[flowfile_frame.expr.Expr, str]], flowfile_frame.expr.Expr, str], *more_by, descending: typing.Union[bool, typing.List[bool]]=False, nulls_last: typing.Union[bool, typing.List[bool]]=False, multithreaded: bool=True, maintain_order: bool=False, description: typing.Optional[str]=None) -> 'FlowFrame': ...
345
+ def sort(self, by: typing.Union[typing.List[typing.Union[flowfile_frame.expr.Expr, str]], flowfile_frame.expr.Expr, str], *more_by, descending: typing.Union[bool, typing.List[bool]] = False, nulls_last: typing.Union[bool, typing.List[bool]] = False, multithreaded: bool = True, maintain_order: bool = False, description: typing.Optional[str] = None) -> 'FlowFrame': ...
309
346
 
310
347
  # Execute a SQL query against the LazyFrame.
311
- def sql(self, query: str, table_name: str='self', description: Optional[str] = None) -> LazyFrame: ...
348
+ def sql(self, query: str, table_name: str = 'self', description: Optional[str] = None) -> 'FlowFrame': ...
312
349
 
313
350
  # Aggregate the columns in the LazyFrame to their standard deviation value.
314
- def std(self, ddof: int=1, description: Optional[str] = None) -> LazyFrame: ...
351
+ def std(self, ddof: int = 1, description: Optional[str] = None) -> 'FlowFrame': ...
315
352
 
316
353
  # Aggregate the columns in the LazyFrame to their sum value.
317
- def sum(self, description: Optional[str] = None) -> LazyFrame: ...
354
+ def sum(self, description: Optional[str] = None) -> 'FlowFrame': ...
318
355
 
319
356
  # Get the last `n` rows.
320
- def tail(self, n: int=5, description: Optional[str] = None) -> LazyFrame: ...
357
+ def tail(self, n: int = 5, description: Optional[str] = None) -> 'FlowFrame': ...
321
358
 
322
359
  # Split text in a column into multiple rows.
323
- def text_to_rows(self, column: str | flowfile_frame.expr.Column, output_column: str=None, delimiter: str=None, split_by_column: str=None, description: str=None) -> 'FlowFrame': ...
360
+ def text_to_rows(self, column: str | flowfile_frame.expr.Column, output_column: str = None, delimiter: str = None, split_by_column: str = None, description: str = None) -> 'FlowFrame': ...
324
361
 
325
362
  # Get the underlying ETL graph.
326
363
  def to_graph(self, description: Optional[str] = None) -> Any: ...
327
364
 
328
365
  # Return the `k` largest rows.
329
- def top_k(self, k: int, by: IntoExpr | Iterable[IntoExpr], reverse: bool | Sequence[bool]=False, description: Optional[str] = None) -> LazyFrame: ...
366
+ def top_k(self, k: int, by: IntoExpr | Iterable[IntoExpr], reverse: bool | Sequence[bool] = False, description: Optional[str] = None) -> 'FlowFrame': ...
330
367
 
331
368
  # Drop duplicate rows from this dataframe.
332
- def unique(self, subset: typing.Union[str, ForwardRef('Expr'), typing.List[typing.Union[ForwardRef('Expr'), str]]]=None, keep: typing.Literal['first', 'last', 'any', 'none']='any', maintain_order: bool=False, description: str=None) -> 'FlowFrame': ...
369
+ def unique(self, subset: typing.Union[str, ForwardRef('Expr'), typing.List[typing.Union[ForwardRef('Expr'), str]]] = None, keep: typing.Literal['first', 'last', 'any', 'none'] = 'any', maintain_order: bool = False, description: str = None) -> 'FlowFrame': ...
333
370
 
334
371
  # Decompose struct columns into separate columns for each of their fields.
335
- def unnest(self, columns: ColumnNameOrSelector | Collection[ColumnNameOrSelector], *more_columns, description: Optional[str] = None) -> LazyFrame: ...
372
+ def unnest(self, columns: ColumnNameOrSelector | Collection[ColumnNameOrSelector], *more_columns, description: Optional[str] = None) -> 'FlowFrame': ...
336
373
 
337
374
  # Unpivot a DataFrame from wide to long format.
338
- def unpivot(self, on: list[str | flowfile_frame.selectors.Selector] | str | None | flowfile_frame.selectors.Selector=None, index: list[str] | str | None=None, variable_name: str='variable', value_name: str='value', description: str=None) -> 'FlowFrame': ...
375
+ def unpivot(self, on: list[str | flowfile_frame.selectors.Selector] | str | None | flowfile_frame.selectors.Selector = None, index: list[str] | str | None = None, variable_name: str = 'variable', value_name: str = 'value', description: str = None) -> 'FlowFrame': ...
339
376
 
340
377
  # Update the values in this `LazyFrame` with the values in `other`.
341
- def update(self, other: LazyFrame, on: str | Sequence[str] | None=None, how: Literal['left', 'inner', 'full']='left', left_on: str | Sequence[str] | None=None, right_on: str | Sequence[str] | None=None, include_nulls: bool=False, description: Optional[str] = None) -> LazyFrame: ...
378
+ def update(self, other: FlowFrame, on: str | Sequence[str] | None = None, how: Literal['left', 'inner', 'full'] = 'left', left_on: str | Sequence[str] | None = None, right_on: str | Sequence[str] | None = None, include_nulls: bool = False, maintain_order: MaintainOrderJoin | None = 'left', description: Optional[str] = None) -> 'FlowFrame': ...
342
379
 
343
380
  # Aggregate the columns in the LazyFrame to their variance value.
344
- def var(self, ddof: int=1, description: Optional[str] = None) -> LazyFrame: ...
381
+ def var(self, ddof: int = 1, description: Optional[str] = None) -> 'FlowFrame': ...
345
382
 
346
383
  # Get the number of columns.
347
384
  @property
@@ -351,18 +388,30 @@ class FlowFrame:
351
388
  def with_columns(self, *exprs: Union[Expr, Iterable[Expr], Any], flowfile_formulas: Optional[List[str]] = None, output_column_names: Optional[List[str]] = None, description: Optional[str] = None, **named_exprs: Union[Expr, Any]) -> 'FlowFrame': ...
352
389
 
353
390
  # Add columns to this LazyFrame.
354
- def with_columns_seq(self, *exprs, description: Optional[str] = None, **named_exprs) -> LazyFrame: ...
391
+ def with_columns_seq(self, *exprs, description: Optional[str] = None, **named_exprs) -> 'FlowFrame': ...
355
392
 
356
393
  # Add an external context to the computation graph.
357
- def with_context(self, other: Self | list[Self], description: Optional[str] = None) -> LazyFrame: ...
394
+ def with_context(self, other: Self | list[Self], description: Optional[str] = None) -> 'FlowFrame': ...
358
395
 
359
396
  # Add a column at index 0 that counts the rows.
360
- def with_row_count(self, name: str='row_nr', offset: int=0, description: Optional[str] = None) -> LazyFrame: ...
397
+ def with_row_count(self, name: str = 'row_nr', offset: int = 0, description: Optional[str] = None) -> 'FlowFrame': ...
361
398
 
362
399
  # Add a row index as the first column in the DataFrame.
363
- def with_row_index(self, name: str='index', offset: int=0, description: str=None) -> 'FlowFrame': ...
400
+ def with_row_index(self, name: str = 'index', offset: int = 0, description: str = None) -> 'FlowFrame': ...
401
+
402
+ def write_csv(self, file: str | os.PathLike, separator: str = ',', encoding: str = 'utf-8', convert_to_absolute_path: bool = True, description: str = None, **kwargs) -> 'FlowFrame': ...
364
403
 
365
- def write_csv(self, file: str | os.PathLike, separator: str=',', encoding: str='utf-8', convert_to_absolute_path: bool=True, description: str=None, **kwargs) -> 'FlowFrame': ...
404
+ # Write the data frame to cloud storage in CSV format.
405
+ def write_csv_to_cloud_storage(self, path: str, connection_name: typing.Optional[str] = None, delimiter: str = ';', encoding: typing.Literal['utf8', 'utf8-lossy'] = 'utf8', description: typing.Optional[str] = None) -> 'FlowFrame': ...
406
+
407
+ # Write the data frame to cloud storage in Delta Lake format.
408
+ def write_delta(self, path: str, connection_name: typing.Optional[str] = None, write_mode: typing.Literal['overwrite', 'append'] = 'overwrite', description: typing.Optional[str] = None) -> 'FlowFrame': ...
409
+
410
+ # Write the data frame to cloud storage in JSON format.
411
+ def write_json_to_cloud_storage(self, path: str, connection_name: typing.Optional[str] = None, description: typing.Optional[str] = None) -> 'FlowFrame': ...
366
412
 
367
413
  # Write the data to a Parquet file. Creates a standard Output node if only
368
- def write_parquet(self, path: str | os.PathLike, convert_to_absolute_path: bool=True, description: str=None, **kwargs) -> 'FlowFrame': ...
414
+ def write_parquet(self, path: str | os.PathLike, convert_to_absolute_path: bool = True, description: str = None, **kwargs) -> 'FlowFrame': ...
415
+
416
+ # Write the data frame to cloud storage in Parquet format.
417
+ def write_parquet_to_cloud_storage(self, path: str, connection_name: typing.Optional[str] = None, compression: typing.Literal['snappy', 'gzip', 'brotli', 'lz4', 'zstd'] = 'snappy', description: typing.Optional[str] = None) -> 'FlowFrame': ...