Flowfile 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. flowfile/__init__.py +27 -6
  2. flowfile/api.py +5 -2
  3. flowfile/web/__init__.py +4 -2
  4. flowfile/web/static/assets/{CloudConnectionManager-d004942f.js → CloudConnectionManager-c20a740f.js} +3 -4
  5. flowfile/web/static/assets/{CloudStorageReader-eccf9fc2.js → CloudStorageReader-960b400a.js} +7 -7
  6. flowfile/web/static/assets/{CloudStorageWriter-b1ba6bba.js → CloudStorageWriter-e3decbdd.js} +7 -7
  7. flowfile/web/static/assets/{CrossJoin-68981877.js → CrossJoin-d67e2405.js} +8 -8
  8. flowfile/web/static/assets/{DatabaseConnectionSettings-0b06649c.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
  9. flowfile/web/static/assets/{DatabaseManager-8349a426.js → DatabaseManager-9ea35e84.js} +2 -2
  10. flowfile/web/static/assets/{DatabaseReader-905344f8.js → DatabaseReader-9578bfa5.js} +9 -9
  11. flowfile/web/static/assets/{DatabaseWriter-9f5b8638.js → DatabaseWriter-19531098.js} +9 -9
  12. flowfile/web/static/assets/{ExploreData-131a6d53.js → ExploreData-40476474.js} +47141 -43697
  13. flowfile/web/static/assets/{ExternalSource-e3549dcc.js → ExternalSource-2297ef96.js} +6 -6
  14. flowfile/web/static/assets/{Filter-6e0730ae.js → Filter-f211c03a.js} +8 -8
  15. flowfile/web/static/assets/{Formula-02f033e6.js → Formula-4207ea31.js} +8 -8
  16. flowfile/web/static/assets/{FuzzyMatch-54c14036.js → FuzzyMatch-bf120df0.js} +9 -9
  17. flowfile/web/static/assets/{GraphSolver-08a3f499.js → GraphSolver-5bb7497a.js} +5 -5
  18. flowfile/web/static/assets/{GroupBy-2ae38139.js → GroupBy-92c81b65.js} +6 -6
  19. flowfile/web/static/assets/{Join-493b9772.js → Join-4e49a274.js} +9 -9
  20. flowfile/web/static/assets/{ManualInput-4373d163.js → ManualInput-90998ae8.js} +5 -5
  21. flowfile/web/static/assets/{Output-b534f3c7.js → Output-81e3e917.js} +4 -4
  22. flowfile/web/static/assets/{Pivot-2968ff65.js → Pivot-a3419842.js} +6 -6
  23. flowfile/web/static/assets/{PolarsCode-65136536.js → PolarsCode-72710deb.js} +6 -6
  24. flowfile/web/static/assets/{Read-c56339ed.js → Read-c4059daf.js} +6 -6
  25. flowfile/web/static/assets/{RecordCount-1c641a5e.js → RecordCount-c2b5e095.js} +5 -5
  26. flowfile/web/static/assets/{RecordId-df308b8f.js → RecordId-10baf191.js} +6 -6
  27. flowfile/web/static/assets/{Sample-293e8a64.js → Sample-3ed9a0ae.js} +5 -5
  28. flowfile/web/static/assets/{SecretManager-03911655.js → SecretManager-0d49c0e8.js} +2 -2
  29. flowfile/web/static/assets/{Select-3058a13d.js → Select-8a02a0b3.js} +8 -8
  30. flowfile/web/static/assets/{SettingsSection-fbf4fb39.js → SettingsSection-4c0f45f5.js} +1 -1
  31. flowfile/web/static/assets/{Sort-a29bbaf7.js → Sort-f55c9f9d.js} +6 -6
  32. flowfile/web/static/assets/{TextToRows-c7d7760e.js → TextToRows-5dbc2145.js} +8 -8
  33. flowfile/web/static/assets/{UnavailableFields-118f1d20.js → UnavailableFields-a1768e52.js} +2 -2
  34. flowfile/web/static/assets/{Union-f0589571.js → Union-f2aefdc9.js} +5 -5
  35. flowfile/web/static/assets/{Unique-7329a207.js → Unique-46b250da.js} +8 -8
  36. flowfile/web/static/assets/{Unpivot-30b0be15.js → Unpivot-25ac84cc.js} +5 -5
  37. flowfile/web/static/assets/{api-fb67319c.js → api-6ef0dcef.js} +1 -1
  38. flowfile/web/static/assets/{api-602fb95c.js → api-a0abbdc7.js} +1 -1
  39. flowfile/web/static/assets/{designer-94a6bf4d.js → designer-13eabd83.js} +4 -4
  40. flowfile/web/static/assets/{documentation-a224831e.js → documentation-b87e7f6f.js} +1 -1
  41. flowfile/web/static/assets/{dropDown-c2d2aa97.js → dropDown-13564764.js} +1 -1
  42. flowfile/web/static/assets/{fullEditor-921ac5fd.js → fullEditor-fd2cd6f9.js} +2 -2
  43. flowfile/web/static/assets/{genericNodeSettings-7013cc94.js → genericNodeSettings-71e11604.js} +3 -3
  44. flowfile/web/static/assets/{index-3a75211d.js → index-f6c15e76.js} +46 -22
  45. flowfile/web/static/assets/{nodeTitle-a63d4680.js → nodeTitle-988d9efe.js} +3 -3
  46. flowfile/web/static/assets/{secretApi-763aec6e.js → secretApi-dd636aa2.js} +1 -1
  47. flowfile/web/static/assets/{selectDynamic-08464729.js → selectDynamic-af36165e.js} +3 -3
  48. flowfile/web/static/assets/{vue-codemirror.esm-f15a5f87.js → vue-codemirror.esm-2847001e.js} +1 -1
  49. flowfile/web/static/assets/{vue-content-loader.es-93bd09d7.js → vue-content-loader.es-0371da73.js} +1 -1
  50. flowfile/web/static/index.html +1 -1
  51. {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/METADATA +2 -2
  52. {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/RECORD +100 -98
  53. flowfile_core/__init__.py +1 -0
  54. flowfile_core/auth/jwt.py +39 -0
  55. flowfile_core/configs/node_store/nodes.py +1 -0
  56. flowfile_core/configs/settings.py +6 -5
  57. flowfile_core/configs/utils.py +5 -0
  58. flowfile_core/database/connection.py +1 -3
  59. flowfile_core/flowfile/code_generator/code_generator.py +71 -0
  60. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +1 -2
  61. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +598 -310
  62. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
  63. flowfile_core/flowfile/flow_graph.py +620 -192
  64. flowfile_core/flowfile/flow_graph_utils.py +2 -2
  65. flowfile_core/flowfile/flow_node/flow_node.py +510 -89
  66. flowfile_core/flowfile/flow_node/models.py +125 -20
  67. flowfile_core/flowfile/handler.py +2 -33
  68. flowfile_core/flowfile/manage/open_flowfile.py +1 -2
  69. flowfile_core/flowfile/util/calculate_layout.py +0 -2
  70. flowfile_core/flowfile/utils.py +36 -5
  71. flowfile_core/main.py +32 -13
  72. flowfile_core/routes/cloud_connections.py +7 -11
  73. flowfile_core/routes/logs.py +2 -6
  74. flowfile_core/routes/public.py +1 -0
  75. flowfile_core/routes/routes.py +127 -51
  76. flowfile_core/routes/secrets.py +72 -14
  77. flowfile_core/schemas/__init__.py +8 -0
  78. flowfile_core/schemas/input_schema.py +92 -64
  79. flowfile_core/schemas/output_model.py +19 -3
  80. flowfile_core/schemas/schemas.py +144 -11
  81. flowfile_core/schemas/transform_schema.py +82 -17
  82. flowfile_core/utils/arrow_reader.py +8 -3
  83. flowfile_core/utils/validate_setup.py +0 -2
  84. flowfile_frame/__init__.py +9 -1
  85. flowfile_frame/cloud_storage/__init__.py +0 -0
  86. flowfile_frame/cloud_storage/frame_helpers.py +39 -0
  87. flowfile_frame/cloud_storage/secret_manager.py +73 -0
  88. flowfile_frame/expr.py +42 -1
  89. flowfile_frame/expr.pyi +76 -61
  90. flowfile_frame/flow_frame.py +233 -111
  91. flowfile_frame/flow_frame.pyi +137 -91
  92. flowfile_frame/flow_frame_methods.py +150 -12
  93. flowfile_frame/group_frame.py +3 -0
  94. flowfile_frame/utils.py +25 -3
  95. test_utils/s3/data_generator.py +1 -0
  96. test_utils/s3/demo_data_generator.py +186 -0
  97. test_utils/s3/fixtures.py +6 -1
  98. flowfile_core/schemas/defaults.py +0 -9
  99. flowfile_core/schemas/models.py +0 -193
  100. {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/LICENSE +0 -0
  101. {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/WHEEL +0 -0
  102. {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/entry_points.txt +0 -0
@@ -5,7 +5,7 @@ from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, Dict, C
5
5
  import re
6
6
 
7
7
  import polars as pl
8
-
8
+ from polars._typing import (CsvEncoding)
9
9
  from flowfile_frame.lazy_methods import add_lazyframe_methods
10
10
 
11
11
  from polars._typing import (FrameInitTypes, SchemaDefinition, SchemaDict, Orientation)
@@ -20,13 +20,12 @@ from flowfile_frame.expr import Expr, Column, lit, col
20
20
  from flowfile_frame.selectors import Selector
21
21
  from flowfile_frame.group_frame import GroupByFrame
22
22
  from flowfile_frame.utils import (_parse_inputs_as_iterable, create_flow_graph, stringify_values,
23
- ensure_inputs_as_iterable)
23
+ ensure_inputs_as_iterable, generate_node_id,
24
+ set_node_id, data as node_id_data)
24
25
  from flowfile_frame.join import _normalize_columns_to_list, _create_join_mappings
25
26
  from flowfile_frame.utils import _check_if_convertible_to_code
26
27
  from flowfile_frame.config import logger
27
-
28
-
29
- node_id_counter = 0
28
+ from flowfile_frame.cloud_storage.frame_helpers import add_write_ff_to_cloud_storage
30
29
 
31
30
 
32
31
  def can_be_expr(param: inspect.Parameter) -> bool:
@@ -115,12 +114,6 @@ def _check_ok_for_serialization(method_name: str = None, polars_expr: pl.Expr |
115
114
  raise NotImplementedError("Cannot create a polars lambda expression without the groupby expression")
116
115
 
117
116
 
118
- def generate_node_id() -> int:
119
- global node_id_counter
120
- node_id_counter += 1
121
- return node_id_counter
122
-
123
-
124
117
  @add_lazyframe_methods
125
118
  class FlowFrame:
126
119
  """Main class that wraps FlowDataEngine and maintains the ETL graph."""
@@ -181,38 +174,41 @@ class FlowFrame:
181
174
  flow_graph = create_flow_graph()
182
175
 
183
176
  flow_id = flow_graph.flow_id
184
- # Convert data to a polars DataFrame/LazyFrame
185
- try:
186
- # Use polars to convert from various types
187
- pl_df = pl.DataFrame(
188
- data,
189
- schema=schema,
190
- schema_overrides=schema_overrides,
191
- strict=strict,
192
- orient=orient,
193
- infer_schema_length=infer_schema_length,
194
- nan_to_null=nan_to_null,
177
+ # Convert data to a polars DataFrame/LazyFram
178
+ if isinstance(data, pl.LazyFrame):
179
+ flow_graph.add_dependency_on_polars_lazy_frame(data.lazy(), node_id)
180
+ else:
181
+ try:
182
+ # Use polars to convert from various types
183
+ pl_df = pl.DataFrame(
184
+ data,
185
+ schema=schema,
186
+ schema_overrides=schema_overrides,
187
+ strict=strict,
188
+ orient=orient,
189
+ infer_schema_length=infer_schema_length,
190
+ nan_to_null=nan_to_null,
191
+ )
192
+ pl_data = pl_df.lazy()
193
+ except Exception as e:
194
+ raise ValueError(f"Could not dconvert data to a polars DataFrame: {e}")
195
+ # Create a FlowDataEngine to get data in the right format for manual input
196
+ flow_table = FlowDataEngine(raw_data=pl_data)
197
+ raw_data_format = input_schema.RawData(data=list(flow_table.to_dict().values()),
198
+ columns=[c.get_minimal_field_info() for c in flow_table.schema])
199
+ # Create a manual input node
200
+ input_node = input_schema.NodeManualInput(
201
+ flow_id=flow_id,
202
+ node_id=node_id,
203
+ raw_data_format=raw_data_format,
204
+ pos_x=100,
205
+ pos_y=100,
206
+ is_setup=True,
207
+ description=description,
195
208
  )
196
- pl_data = pl_df.lazy()
197
- except Exception as e:
198
- raise ValueError(f"Could not dconvert data to a polars DataFrame: {e}")
199
- # Create a FlowDataEngine to get data in the right format for manual input
200
- flow_table = FlowDataEngine(raw_data=pl_data)
201
- raw_data_format = input_schema.RawData(data=list(flow_table.to_dict().values()),
202
- columns=[c.get_minimal_field_info() for c in flow_table.schema])
203
- # Create a manual input node
204
- input_node = input_schema.NodeManualInput(
205
- flow_id=flow_id,
206
- node_id=node_id,
207
- raw_data_format=raw_data_format,
208
- pos_x=100,
209
- pos_y=100,
210
- is_setup=True,
211
- description=description,
212
- )
213
- # Add to graph
214
- flow_graph.add_manual_input(input_node)
215
- # Return new frame
209
+ # Add to graph
210
+ flow_graph.add_manual_input(input_node)
211
+ # Return new fram
216
212
  return FlowFrame(
217
213
  data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
218
214
  flow_graph=flow_graph,
@@ -221,69 +217,92 @@ class FlowFrame:
221
217
  )
222
218
 
223
219
  def __new__(
224
- cls,
225
- data: pl.LazyFrame | FrameInitTypes = None,
226
- schema: SchemaDefinition | None = None,
227
- *,
228
- schema_overrides: SchemaDict | None = None,
229
- strict: bool = True,
230
- orient: Orientation | None = None,
231
- infer_schema_length: int | None = 100,
232
- nan_to_null: bool = False,
233
- flow_graph=None,
234
- node_id=None,
235
- parent_node_id=None,
236
- ):
237
- """Create a new FlowFrame instance."""
238
- # If data is not a LazyFrame, use the factory method
239
- if data is not None and not isinstance(data, pl.LazyFrame):
240
- return cls.create_from_any_type(
241
- data=data,
242
- schema=schema,
243
- schema_overrides=schema_overrides,
244
- strict=strict,
245
- orient=orient,
246
- infer_schema_length=infer_schema_length,
247
- nan_to_null=nan_to_null,
248
- flow_graph=flow_graph,
249
- node_id=node_id,
250
- parent_node_id=parent_node_id,
251
- )
220
+ cls,
221
+ data: pl.LazyFrame | FrameInitTypes = None,
222
+ schema: SchemaDefinition | None = None,
223
+ *,
224
+ schema_overrides: SchemaDict | None = None,
225
+ strict: bool = True,
226
+ orient: Orientation | None = None,
227
+ infer_schema_length: int | None = 100,
228
+ nan_to_null: bool = False,
229
+ flow_graph: Optional[FlowGraph] = None,
230
+ node_id: Optional[int] = None,
231
+ parent_node_id: Optional[int] = None,
232
+ **kwargs, # Accept and ignore any other kwargs for API compatibility
233
+ ) -> "FlowFrame":
234
+ """
235
+ Unified constructor for FlowFrame.
252
236
 
253
- instance = super().__new__(cls)
254
- return instance
237
+ - If `flow_graph` and `node_id` are provided, it creates a lightweight Python
238
+ wrapper around an existing node in the graph.
239
+ - Otherwise, it creates a new source node in a new or existing graph
240
+ from the provided data.
241
+ """
242
+ # --- Path 1: Internal Wrapper Creation ---
243
+ # This path is taken by methods like .join(), .sort(), etc., which provide an existing graph.
244
+ if flow_graph is not None and node_id is not None:
245
+ instance = super().__new__(cls)
246
+ instance.data = data
247
+ instance.flow_graph = flow_graph
248
+ instance.node_id = node_id
249
+ instance.parent_node_id = parent_node_id
250
+ return instance
251
+ elif flow_graph is not None and not isinstance(data, pl.LazyFrame):
252
+ instance = cls.create_from_any_type(data=data, schema=schema, schema_overrides=schema_overrides,
253
+ strict=strict, orient=orient, infer_schema_length=infer_schema_length,
254
+ nan_to_null=nan_to_null, flow_graph=flow_graph, node_id=node_id,
255
+ parent_node_id=parent_node_id
256
+ )
257
+ return instance
258
+
259
+ source_graph = create_flow_graph()
260
+ source_node_id = generate_node_id()
255
261
 
256
- def __init__(
257
- self,
258
- data: pl.LazyFrame | FrameInitTypes = None,
259
- schema: SchemaDefinition | None = None,
260
- *,
261
- schema_overrides: SchemaDict | None = None,
262
- strict: bool = True,
263
- orient: Orientation | None = None,
264
- infer_schema_length: int | None = 100,
265
- nan_to_null: bool = False,
266
- flow_graph=None,
267
- node_id=None,
268
- parent_node_id=None,
269
- ):
270
- """Initialize the FlowFrame with data and graph references."""
271
262
  if data is None:
272
263
  data = pl.LazyFrame()
273
264
  if not isinstance(data, pl.LazyFrame):
274
- return
275
- self.node_id = node_id or generate_node_id()
276
- self.parent_node_id = parent_node_id
277
265
 
278
- # Initialize graph
279
- if flow_graph is None:
280
- flow_graph = create_flow_graph()
281
- self.flow_graph = flow_graph
282
- # Set up data
283
- if isinstance(data, FlowDataEngine):
284
- self.data = data.data_frame
266
+ description = "Data imported from Python object"
267
+ try:
268
+ pl_df = pl.DataFrame(
269
+ data, schema=schema, schema_overrides=schema_overrides,
270
+ strict=strict, orient=orient, infer_schema_length=infer_schema_length,
271
+ nan_to_null=nan_to_null
272
+ )
273
+ pl_data = pl_df.lazy()
274
+ except Exception as e:
275
+ raise ValueError(f"Could not convert data to a Polars DataFrame: {e}")
276
+
277
+ flow_table = FlowDataEngine(raw_data=pl_data)
278
+ raw_data_format = input_schema.RawData(data=list(flow_table.to_dict().values()),
279
+ columns=[c.get_minimal_field_info() for c in flow_table.schema])
280
+ input_node = input_schema.NodeManualInput(
281
+ flow_id=source_graph.flow_id, node_id=source_node_id,
282
+ raw_data_format=raw_data_format, pos_x=100, pos_y=100,
283
+ is_setup=True, description=description
284
+ )
285
+ source_graph.add_manual_input(input_node)
285
286
  else:
286
- self.data = data
287
+ source_graph.add_dependency_on_polars_lazy_frame(data, source_node_id)
288
+
289
+ final_data = source_graph.get_node(source_node_id).get_resulting_data().data_frame
290
+ return cls(
291
+ data=final_data,
292
+ flow_graph=source_graph,
293
+ node_id=source_node_id,
294
+ parent_node_id=parent_node_id
295
+ )
296
+
297
+ def __init__(self, *args, **kwargs):
298
+ """
299
+ The __init__ method is intentionally left empty.
300
+ All initialization logic is handled in the `__new__` method to support
301
+ the flexible factory pattern and prevent state from being overwritten.
302
+ Python automatically calls __init__ after __new__, so this empty
303
+ method catches that call and safely does nothing.
304
+ """
305
+ pass
287
306
 
288
307
  def __repr__(self):
289
308
  return str(self.data)
@@ -546,7 +565,7 @@ class FlowFrame:
546
565
  coalesce: bool = None,
547
566
  maintain_order: Literal[None, "left", "right", "left_right", "right_left"] = None,
548
567
  description: str = None,
549
- ):
568
+ ) -> "FlowFrame":
550
569
  """
551
570
  Add a join operation to the Logical Plan.
552
571
 
@@ -594,7 +613,6 @@ class FlowFrame:
594
613
  use_polars_code = self._should_use_polars_code_for_join(
595
614
  maintain_order, coalesce, nulls_equal, validate, suffix
596
615
  )
597
-
598
616
  # Step 2: Ensure both FlowFrames are in the same graph
599
617
  self._ensure_same_graph(other)
600
618
 
@@ -662,9 +680,7 @@ class FlowFrame:
662
680
  other.node_id = new_other_node_id
663
681
  self.flow_graph = combined_graph
664
682
  other.flow_graph = combined_graph
665
-
666
- global node_id_counter
667
- node_id_counter += len(combined_graph.nodes)
683
+ node_id_data["c"] = node_id_data["c"] + len(combined_graph.nodes)
668
684
 
669
685
  def _parse_join_columns(
670
686
  self,
@@ -781,7 +797,6 @@ class FlowFrame:
781
797
  # Create select inputs for both frames
782
798
  left_select = transform_schema.SelectInputs.create_from_pl_df(self.data)
783
799
  right_select = transform_schema.SelectInputs.create_from_pl_df(other.data)
784
-
785
800
  # Create appropriate join input based on join type
786
801
  if how == 'cross':
787
802
  join_input = transform_schema.CrossJoinInput(
@@ -811,7 +826,6 @@ class FlowFrame:
811
826
  # Add connections
812
827
  self._add_connection(self.node_id, new_node_id, "main")
813
828
  other._add_connection(other.node_id, new_node_id, "right")
814
-
815
829
  # Create and return result frame
816
830
  return FlowFrame(
817
831
  data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
@@ -1074,7 +1088,7 @@ class FlowFrame:
1074
1088
 
1075
1089
  def write_parquet(
1076
1090
  self,
1077
- path: str|os.PathLike,
1091
+ path: str | os.PathLike,
1078
1092
  *,
1079
1093
  description: str = None,
1080
1094
  convert_to_absolute_path: bool = True,
@@ -1244,6 +1258,117 @@ class FlowFrame:
1244
1258
 
1245
1259
  return self._create_child_frame(new_node_id)
1246
1260
 
1261
+ def write_parquet_to_cloud_storage(self,
1262
+ path: str,
1263
+ connection_name: Optional[str] = None,
1264
+ compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] = "snappy",
1265
+ description: Optional[str] = None,
1266
+ ) -> "FlowFrame":
1267
+ """
1268
+ Write the data frame to cloud storage in Parquet format.
1269
+
1270
+ Args:
1271
+ path (str): The destination path in cloud storage where the Parquet file will be written.
1272
+ connection_name (Optional[str], optional): The name of the storage connection
1273
+ that a user can create. If None, uses the default connection. Defaults to None.
1274
+ compression (Literal["snappy", "gzip", "brotli", "lz4", "zstd"], optional):
1275
+ The compression algorithm to use for the Parquet file. Defaults to "snappy".
1276
+ description (Optional[str], optional): Description of this operation for the ETL graph.
1277
+
1278
+ Returns:
1279
+ FlowFrame: A new child data frame representing the written data.
1280
+ """
1281
+
1282
+ new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1283
+ connection_name=connection_name,
1284
+ depends_on_node_id=self.node_id,
1285
+ parquet_compression=compression,
1286
+ file_format="parquet",
1287
+ description=description)
1288
+ return self._create_child_frame(new_node_id)
1289
+
1290
+ def write_csv_to_cloud_storage(self,
1291
+ path: str,
1292
+ connection_name: Optional[str] = None,
1293
+ delimiter: str = ";",
1294
+ encoding: CsvEncoding = "utf8",
1295
+ description: Optional[str] = None,
1296
+ ) -> "FlowFrame":
1297
+ """
1298
+ Write the data frame to cloud storage in CSV format.
1299
+
1300
+ Args:
1301
+ path (str): The destination path in cloud storage where the CSV file will be written.
1302
+ connection_name (Optional[str], optional): The name of the storage connection
1303
+ that a user can create. If None, uses the default connection. Defaults to None.
1304
+ delimiter (str, optional): The character used to separate fields in the CSV file.
1305
+ Defaults to ";".
1306
+ encoding (CsvEncoding, optional): The character encoding to use for the CSV file.
1307
+ Defaults to "utf8".
1308
+ description (Optional[str], optional): Description of this operation for the ETL graph.
1309
+
1310
+ Returns:
1311
+ FlowFrame: A new child data frame representing the written data.
1312
+ """
1313
+ new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1314
+ connection_name=connection_name,
1315
+ depends_on_node_id=self.node_id,
1316
+ csv_delimiter=delimiter,
1317
+ csv_encoding=encoding,
1318
+ file_format="csv",
1319
+ description=description)
1320
+ return self._create_child_frame(new_node_id)
1321
+
1322
+ def write_delta(self,
1323
+ path: str,
1324
+ connection_name: Optional[str] = None,
1325
+ write_mode: Literal["overwrite", "append"] = "overwrite",
1326
+ description: Optional[str] = None,
1327
+ ) -> "FlowFrame":
1328
+ """
1329
+ Write the data frame to cloud storage in Delta Lake format.
1330
+
1331
+ Args:
1332
+ path (str): The destination path in cloud storage where the Delta table will be written.
1333
+ connection_name (Optional[str], optional): The name of the storage connection
1334
+ that a user can create. If None, uses the default connection. Defaults to None.
1335
+ write_mode (Literal["overwrite", "append"], optional): The write mode for the Delta table.
1336
+ "overwrite" replaces existing data, "append" adds to existing data. Defaults to "overwrite".
1337
+ description (Optional[str], optional): Description of this operation for the ETL graph.
1338
+ Returns:
1339
+ FlowFrame: A new child data frame representing the written data.
1340
+ """
1341
+ new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1342
+ connection_name=connection_name,
1343
+ depends_on_node_id=self.node_id,
1344
+ write_mode=write_mode,
1345
+ file_format="delta",
1346
+ description=description)
1347
+ return self._create_child_frame(new_node_id)
1348
+
1349
+ def write_json_to_cloud_storage(self,
1350
+ path: str,
1351
+ connection_name: Optional[str] = None,
1352
+ description: Optional[str] = None,
1353
+ ) -> "FlowFrame":
1354
+ """
1355
+ Write the data frame to cloud storage in JSON format.
1356
+
1357
+ Args:
1358
+ path (str): The destination path in cloud storage where the JSON file will be written.
1359
+ connection_name (Optional[str], optional): The name of the storage connection
1360
+ that a user can create. If None, uses the default connection. Defaults to None.
1361
+ description (Optional[str], optional): Description of this operation for the ETL graph.
1362
+ Returns:
1363
+ FlowFrame: A new child data frame representing the written data.
1364
+ """
1365
+ new_node_id = add_write_ff_to_cloud_storage(path, flow_graph=self.flow_graph,
1366
+ connection_name=connection_name,
1367
+ depends_on_node_id=self.node_id,
1368
+ file_format="json",
1369
+ description=description)
1370
+ return self._create_child_frame(new_node_id)
1371
+
1247
1372
  def group_by(self, *by, description: str = None, maintain_order=False, **named_by) -> GroupByFrame:
1248
1373
  """
1249
1374
  Start a group by operation.
@@ -1275,7 +1400,6 @@ class FlowFrame:
1275
1400
  by_cols.append(col(col_expr).alias(new_name))
1276
1401
  elif isinstance(col_expr, Expr):
1277
1402
  by_cols.append(col_expr.alias(new_name))
1278
-
1279
1403
  # Create a GroupByFrame
1280
1404
  return GroupByFrame(
1281
1405
  node_id=new_node_id,
@@ -1292,7 +1416,7 @@ class FlowFrame:
1292
1416
  self.flow_graph.apply_layout()
1293
1417
  self.flow_graph.save_flow(file_path)
1294
1418
 
1295
- def collect(self, *args, **kwargs):
1419
+ def collect(self, *args, **kwargs) -> pl.DataFrame:
1296
1420
  """Collect lazy data into memory."""
1297
1421
  if hasattr(self.data, "collect"):
1298
1422
  return self.data.collect(*args, **kwargs)
@@ -1614,8 +1738,7 @@ class FlowFrame:
1614
1738
  combined_graph, node_mappings = combine_flow_graphs_with_mapping(*all_graphs)
1615
1739
  for f in [self] + other:
1616
1740
  f.node_id = node_mappings.get((f.flow_graph.flow_id, f.node_id), None)
1617
- global node_id_counter
1618
- node_id_counter += len(combined_graph.nodes)
1741
+ node_id_data["c"] = node_id_data["c"] + len(combined_graph.nodes)
1619
1742
  else:
1620
1743
  combined_graph = self.flow_graph
1621
1744
  new_node_id = generate_node_id()
@@ -1810,7 +1933,6 @@ class FlowFrame:
1810
1933
  all_input_expr_objects: List[Expr] = []
1811
1934
  pure_polars_expr_strings_for_wc: List[str] = []
1812
1935
  collected_raw_definitions: List[str] = []
1813
-
1814
1936
  has_exprs_or_named_exprs = bool(exprs or named_exprs)
1815
1937
  if has_exprs_or_named_exprs:
1816
1938
  actual_exprs_to_process: List[Expr] = []