Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +27 -6
- flowfile/api.py +1 -0
- flowfile/web/__init__.py +2 -2
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
- flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
- flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
- flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
- flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
- flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
- flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
- flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
- flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
- flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
- flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
- flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
- flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
- flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
- flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
- flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
- flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
- flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
- flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
- flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
- flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
- flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
- flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
- flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
- flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
- flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
- flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
- flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
- flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
- flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
- flowfile/web/static/assets/api-6ef0dcef.js +80 -0
- flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
- flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
- flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
- flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
- flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
- flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
- flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
- flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
- flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
- flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/auth/jwt.py +39 -0
- flowfile_core/configs/node_store/nodes.py +9 -6
- flowfile_core/configs/settings.py +6 -5
- flowfile_core/database/connection.py +63 -15
- flowfile_core/database/init_db.py +0 -1
- flowfile_core/database/models.py +49 -2
- flowfile_core/flowfile/code_generator/code_generator.py +472 -17
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
- flowfile_core/flowfile/extensions.py +1 -1
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
- flowfile_core/flowfile/flow_graph.py +718 -253
- flowfile_core/flowfile/flow_graph_utils.py +2 -2
- flowfile_core/flowfile/flow_node/flow_node.py +563 -117
- flowfile_core/flowfile/flow_node/models.py +154 -20
- flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
- flowfile_core/flowfile/handler.py +2 -33
- flowfile_core/flowfile/manage/open_flowfile.py +1 -2
- flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
- flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
- flowfile_core/flowfile/util/calculate_layout.py +0 -2
- flowfile_core/flowfile/utils.py +35 -26
- flowfile_core/main.py +35 -15
- flowfile_core/routes/cloud_connections.py +77 -0
- flowfile_core/routes/logs.py +2 -7
- flowfile_core/routes/public.py +1 -0
- flowfile_core/routes/routes.py +130 -90
- flowfile_core/routes/secrets.py +72 -14
- flowfile_core/schemas/__init__.py +8 -0
- flowfile_core/schemas/cloud_storage_schemas.py +215 -0
- flowfile_core/schemas/input_schema.py +121 -71
- flowfile_core/schemas/output_model.py +19 -3
- flowfile_core/schemas/schemas.py +150 -12
- flowfile_core/schemas/transform_schema.py +175 -35
- flowfile_core/utils/utils.py +40 -1
- flowfile_core/utils/validate_setup.py +41 -0
- flowfile_frame/__init__.py +9 -1
- flowfile_frame/cloud_storage/frame_helpers.py +39 -0
- flowfile_frame/cloud_storage/secret_manager.py +73 -0
- flowfile_frame/expr.py +28 -1
- flowfile_frame/expr.pyi +76 -61
- flowfile_frame/flow_frame.py +481 -208
- flowfile_frame/flow_frame.pyi +140 -91
- flowfile_frame/flow_frame_methods.py +160 -22
- flowfile_frame/group_frame.py +3 -0
- flowfile_frame/utils.py +25 -3
- flowfile_worker/external_sources/s3_source/main.py +216 -0
- flowfile_worker/external_sources/s3_source/models.py +142 -0
- flowfile_worker/funcs.py +51 -6
- flowfile_worker/models.py +22 -2
- flowfile_worker/routes.py +40 -38
- flowfile_worker/utils.py +1 -1
- test_utils/s3/commands.py +46 -0
- test_utils/s3/data_generator.py +292 -0
- test_utils/s3/demo_data_generator.py +186 -0
- test_utils/s3/fixtures.py +214 -0
- flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
- flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
- flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
- flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
- flowfile_core/schemas/defaults.py +0 -9
- flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
- flowfile_core/schemas/models.py +0 -193
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
- flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
- flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
- {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
- {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
- {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
|
@@ -1,22 +1,21 @@
|
|
|
1
|
-
import
|
|
1
|
+
import io
|
|
2
2
|
import os
|
|
3
|
-
from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, Dict, Callable
|
|
4
3
|
from pathlib import Path
|
|
4
|
+
from typing import Any, List, Optional, Union, Dict, Callable, Literal
|
|
5
5
|
|
|
6
|
-
import io
|
|
7
6
|
import polars as pl
|
|
8
|
-
from polars._typing import (SchemaDict, IO,PolarsDataType,
|
|
7
|
+
from polars._typing import (SchemaDict, IO, PolarsDataType,
|
|
9
8
|
Sequence, CsvEncoding)
|
|
10
9
|
|
|
11
|
-
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
12
10
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
13
|
-
from flowfile_core.
|
|
14
|
-
|
|
11
|
+
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
12
|
+
from flowfile_core.schemas import input_schema, transform_schema, cloud_storage_schemas
|
|
13
|
+
from flowfile_frame.config import logger
|
|
15
14
|
from flowfile_frame.expr import col
|
|
16
|
-
|
|
15
|
+
from flowfile_frame.flow_frame import FlowFrame
|
|
17
16
|
from flowfile_frame.utils import create_flow_graph
|
|
18
|
-
from flowfile_frame.
|
|
19
|
-
from flowfile_frame.
|
|
17
|
+
from flowfile_frame.cloud_storage.secret_manager import get_current_user_id
|
|
18
|
+
from flowfile_frame.utils import generate_node_id
|
|
20
19
|
|
|
21
20
|
def sum(expr):
|
|
22
21
|
"""Sum aggregation function."""
|
|
@@ -140,11 +139,10 @@ def read_csv(
|
|
|
140
139
|
Returns:
|
|
141
140
|
A FlowFrame with the CSV data.
|
|
142
141
|
"""
|
|
143
|
-
node_id = generate_node_id()
|
|
142
|
+
node_id = generate_node_id()
|
|
144
143
|
if flow_graph is None:
|
|
145
|
-
flow_graph = create_flow_graph()
|
|
144
|
+
flow_graph = create_flow_graph()
|
|
146
145
|
flow_id = flow_graph.flow_id
|
|
147
|
-
|
|
148
146
|
current_source_path_for_native = None
|
|
149
147
|
if isinstance(source, (str, os.PathLike)):
|
|
150
148
|
current_source_path_for_native = str(source)
|
|
@@ -216,11 +214,14 @@ def read_csv(
|
|
|
216
214
|
description=read_node_description
|
|
217
215
|
)
|
|
218
216
|
flow_graph.add_read(read_node)
|
|
217
|
+
flow_graph.get_node(1)
|
|
218
|
+
|
|
219
219
|
result_frame = FlowFrame(
|
|
220
220
|
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
221
221
|
flow_graph=flow_graph,
|
|
222
222
|
node_id=node_id
|
|
223
223
|
)
|
|
224
|
+
flow_graph.get_node(1)
|
|
224
225
|
return result_frame
|
|
225
226
|
else:
|
|
226
227
|
polars_source_arg = source
|
|
@@ -278,6 +279,7 @@ def read_csv(
|
|
|
278
279
|
node_id=node_id,
|
|
279
280
|
)
|
|
280
281
|
|
|
282
|
+
|
|
281
283
|
def _build_polars_code_args(
|
|
282
284
|
source: Union[str, Path, IO[bytes], bytes, List[Union[str, Path, IO[bytes], bytes]]],
|
|
283
285
|
separator: str,
|
|
@@ -377,13 +379,13 @@ def _build_polars_code_args(
|
|
|
377
379
|
return polars_code
|
|
378
380
|
|
|
379
381
|
|
|
380
|
-
def read_parquet(
|
|
382
|
+
def read_parquet(source, *, flow_graph: FlowGraph = None, description: str = None,
|
|
381
383
|
convert_to_absolute_path: bool = True, **options) -> FlowFrame:
|
|
382
384
|
"""
|
|
383
385
|
Read a Parquet file into a FlowFrame.
|
|
384
386
|
|
|
385
387
|
Args:
|
|
386
|
-
|
|
388
|
+
source: Path to Parquet file
|
|
387
389
|
flow_graph: if you want to add it to an existing graph
|
|
388
390
|
description: if you want to add a readable name in the frontend (advised)
|
|
389
391
|
convert_to_absolute_path: If the path needs to be set to a fixed location
|
|
@@ -392,8 +394,8 @@ def read_parquet(file_path, *, flow_graph: FlowGraph = None, description: str =
|
|
|
392
394
|
Returns:
|
|
393
395
|
A FlowFrame with the Parquet data
|
|
394
396
|
"""
|
|
395
|
-
if '~' in
|
|
396
|
-
file_path = os.path.expanduser(
|
|
397
|
+
if '~' in source:
|
|
398
|
+
file_path = os.path.expanduser(source)
|
|
397
399
|
node_id = generate_node_id()
|
|
398
400
|
|
|
399
401
|
if flow_graph is None:
|
|
@@ -403,8 +405,8 @@ def read_parquet(file_path, *, flow_graph: FlowGraph = None, description: str =
|
|
|
403
405
|
|
|
404
406
|
received_table = input_schema.ReceivedTable(
|
|
405
407
|
file_type='parquet',
|
|
406
|
-
path=
|
|
407
|
-
name=Path(
|
|
408
|
+
path=source,
|
|
409
|
+
name=Path(source).name,
|
|
408
410
|
)
|
|
409
411
|
if convert_to_absolute_path:
|
|
410
412
|
received_table.path = received_table.abs_file_path
|
|
@@ -449,7 +451,7 @@ def from_dict(data, *, flow_graph: FlowGraph = None, description: str = None) ->
|
|
|
449
451
|
input_node = input_schema.NodeManualInput(
|
|
450
452
|
flow_id=flow_id,
|
|
451
453
|
node_id=node_id,
|
|
452
|
-
|
|
454
|
+
raw_data_format=FlowDataEngine(data).to_raw_data(),
|
|
453
455
|
pos_x=100,
|
|
454
456
|
pos_y=100,
|
|
455
457
|
is_setup=True,
|
|
@@ -592,7 +594,7 @@ def scan_csv(
|
|
|
592
594
|
|
|
593
595
|
|
|
594
596
|
def scan_parquet(
|
|
595
|
-
|
|
597
|
+
source,
|
|
596
598
|
*,
|
|
597
599
|
flow_graph: FlowGraph = None,
|
|
598
600
|
description: str = None,
|
|
@@ -608,10 +610,146 @@ def scan_parquet(
|
|
|
608
610
|
See read_parquet for full documentation.
|
|
609
611
|
"""
|
|
610
612
|
return read_parquet(
|
|
611
|
-
|
|
613
|
+
source=source,
|
|
612
614
|
flow_graph=flow_graph,
|
|
613
615
|
description=description,
|
|
614
616
|
convert_to_absolute_path=convert_to_absolute_path,
|
|
615
617
|
**options
|
|
616
618
|
)
|
|
617
619
|
|
|
620
|
+
|
|
621
|
+
def scan_parquet_from_cloud_storage(
|
|
622
|
+
source: str,
|
|
623
|
+
*,
|
|
624
|
+
flow_graph: Optional[FlowGraph] = None,
|
|
625
|
+
connection_name: Optional[str] = None,
|
|
626
|
+
scan_mode: Literal["single_file", "directory", None] = None,
|
|
627
|
+
description: Optional[str] = None
|
|
628
|
+
) -> FlowFrame:
|
|
629
|
+
node_id = generate_node_id()
|
|
630
|
+
|
|
631
|
+
if scan_mode is None:
|
|
632
|
+
if source[-1] in ("*", "/"):
|
|
633
|
+
scan_mode: Literal["single_file", "directory"] = "directory"
|
|
634
|
+
else:
|
|
635
|
+
scan_mode: Literal["single_file", "directory"] = "single_file"
|
|
636
|
+
|
|
637
|
+
if flow_graph is None:
|
|
638
|
+
flow_graph = create_flow_graph()
|
|
639
|
+
|
|
640
|
+
flow_id = flow_graph.flow_id
|
|
641
|
+
settings = input_schema.NodeCloudStorageReader(
|
|
642
|
+
flow_id=flow_id,
|
|
643
|
+
node_id=node_id,
|
|
644
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(resource_path=source,
|
|
645
|
+
scan_mode=scan_mode,
|
|
646
|
+
connection_name=connection_name,
|
|
647
|
+
file_format="parquet"),
|
|
648
|
+
user_id=get_current_user_id(),
|
|
649
|
+
description=description)
|
|
650
|
+
flow_graph.add_cloud_storage_reader(settings)
|
|
651
|
+
return FlowFrame(
|
|
652
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
653
|
+
flow_graph=flow_graph,
|
|
654
|
+
node_id=node_id
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
|
|
658
|
+
def scan_csv_from_cloud_storage(
|
|
659
|
+
source: str,
|
|
660
|
+
*,
|
|
661
|
+
flow_graph: Optional[FlowGraph] = None,
|
|
662
|
+
connection_name: Optional[str] = None,
|
|
663
|
+
scan_mode: Literal["single_file", "directory", None] = None,
|
|
664
|
+
delimiter: str = ";",
|
|
665
|
+
has_header: Optional[bool] = True,
|
|
666
|
+
encoding: Optional[CsvEncoding] = "utf8") -> FlowFrame:
|
|
667
|
+
node_id = generate_node_id()
|
|
668
|
+
|
|
669
|
+
if scan_mode is None:
|
|
670
|
+
if source[-1] in ("*", "/"):
|
|
671
|
+
scan_mode: Literal["single_file", "directory"] = "directory"
|
|
672
|
+
else:
|
|
673
|
+
scan_mode: Literal["single_file", "directory"] = "single_file"
|
|
674
|
+
|
|
675
|
+
if flow_graph is None:
|
|
676
|
+
flow_graph = create_flow_graph()
|
|
677
|
+
flow_id = flow_graph.flow_id
|
|
678
|
+
settings = input_schema.NodeCloudStorageReader(
|
|
679
|
+
flow_id=flow_id,
|
|
680
|
+
node_id=node_id,
|
|
681
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(resource_path=source,
|
|
682
|
+
scan_mode=scan_mode,
|
|
683
|
+
connection_name=connection_name,
|
|
684
|
+
csv_delimiter=delimiter,
|
|
685
|
+
csv_encoding=encoding,
|
|
686
|
+
csv_has_header=has_header,
|
|
687
|
+
file_format="csv"),
|
|
688
|
+
user_id=get_current_user_id())
|
|
689
|
+
flow_graph.add_cloud_storage_reader(settings)
|
|
690
|
+
return FlowFrame(
|
|
691
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
692
|
+
flow_graph=flow_graph,
|
|
693
|
+
node_id=node_id
|
|
694
|
+
)
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
def scan_delta(
|
|
698
|
+
source: str,
|
|
699
|
+
*,
|
|
700
|
+
flow_graph: Optional[FlowGraph] = None,
|
|
701
|
+
connection_name: Optional[str] = None,
|
|
702
|
+
version: int = None) -> FlowFrame:
|
|
703
|
+
node_id = generate_node_id()
|
|
704
|
+
if flow_graph is None:
|
|
705
|
+
flow_graph = create_flow_graph()
|
|
706
|
+
flow_id = flow_graph.flow_id
|
|
707
|
+
settings = input_schema.NodeCloudStorageReader(
|
|
708
|
+
flow_id=flow_id,
|
|
709
|
+
node_id=node_id,
|
|
710
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(resource_path=source,
|
|
711
|
+
connection_name=connection_name,
|
|
712
|
+
file_format="delta",
|
|
713
|
+
delta_version=version),
|
|
714
|
+
user_id=get_current_user_id())
|
|
715
|
+
flow_graph.add_cloud_storage_reader(settings)
|
|
716
|
+
return FlowFrame(
|
|
717
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
718
|
+
flow_graph=flow_graph,
|
|
719
|
+
node_id=node_id
|
|
720
|
+
)
|
|
721
|
+
|
|
722
|
+
|
|
723
|
+
def scan_json_from_cloud_storage(
|
|
724
|
+
source: str,
|
|
725
|
+
*,
|
|
726
|
+
flow_graph: Optional[FlowGraph] = None,
|
|
727
|
+
connection_name: Optional[str] = None,
|
|
728
|
+
scan_mode: Literal["single_file", "directory", None] = None,
|
|
729
|
+
) -> FlowFrame:
|
|
730
|
+
node_id = generate_node_id()
|
|
731
|
+
|
|
732
|
+
if scan_mode is None:
|
|
733
|
+
if source[-1] in ("*", "/"):
|
|
734
|
+
scan_mode: Literal["single_file", "directory"] = "directory"
|
|
735
|
+
else:
|
|
736
|
+
scan_mode: Literal["single_file", "directory"] = "single_file"
|
|
737
|
+
|
|
738
|
+
if flow_graph is None:
|
|
739
|
+
flow_graph = create_flow_graph()
|
|
740
|
+
flow_id = flow_graph.flow_id
|
|
741
|
+
settings = input_schema.NodeCloudStorageReader(
|
|
742
|
+
flow_id=flow_id,
|
|
743
|
+
node_id=node_id,
|
|
744
|
+
cloud_storage_settings=cloud_storage_schemas.CloudStorageReadSettings(resource_path=source,
|
|
745
|
+
scan_mode=scan_mode,
|
|
746
|
+
connection_name=connection_name,
|
|
747
|
+
file_format="json"),
|
|
748
|
+
user_id=get_current_user_id())
|
|
749
|
+
flow_graph.add_cloud_storage_reader(settings)
|
|
750
|
+
return FlowFrame(
|
|
751
|
+
data=flow_graph.get_node(node_id).get_resulting_data().data_frame,
|
|
752
|
+
flow_graph=flow_graph,
|
|
753
|
+
node_id=node_id
|
|
754
|
+
)
|
|
755
|
+
|
flowfile_frame/group_frame.py
CHANGED
|
@@ -91,6 +91,8 @@ class GroupByFrame:
|
|
|
91
91
|
if isinstance(col_expr, str):
|
|
92
92
|
agg_cols.append(transform_schema.AggColl(old_name=col_expr, agg="groupby"))
|
|
93
93
|
elif isinstance(col_expr, Expr):
|
|
94
|
+
if col_expr.is_complex:
|
|
95
|
+
return False
|
|
94
96
|
agg_cols.append(transform_schema.AggColl(old_name=col_expr.column_name, agg="groupby"))
|
|
95
97
|
elif isinstance(col_expr, Selector):
|
|
96
98
|
return False
|
|
@@ -151,6 +153,7 @@ class GroupByFrame:
|
|
|
151
153
|
def _create_agg_node(self, node_id_to_use: int, can_be_converted: bool, agg_cols: list, agg_expressions,
|
|
152
154
|
named_agg_exprs, convertable_to_code: bool, description: str):
|
|
153
155
|
"""Create node for explicit aggregations via self.agg()."""
|
|
156
|
+
|
|
154
157
|
if can_be_converted:
|
|
155
158
|
group_by_settings = input_schema.NodeGroupBy(
|
|
156
159
|
flow_id=self.parent.flow_graph.flow_id,
|
flowfile_frame/utils.py
CHANGED
|
@@ -88,14 +88,23 @@ def _generate_id() -> int:
|
|
|
88
88
|
return int(uuid.uuid4().int % 100000)
|
|
89
89
|
|
|
90
90
|
|
|
91
|
-
def create_flow_graph() -> FlowGraph:
|
|
92
|
-
|
|
91
|
+
def create_flow_graph(flow_id: int = None) -> FlowGraph:
|
|
92
|
+
"""
|
|
93
|
+
Create a new FlowGraph instance with a unique flow ID.
|
|
94
|
+
Parameters
|
|
95
|
+
- flow_id (int): Optional flow ID. If not provided, a new unique ID will be generated.
|
|
96
|
+
Returns
|
|
97
|
+
- FlowGraph: A new instance of FlowGraph with the specified or generated flow ID.
|
|
98
|
+
|
|
99
|
+
"""
|
|
100
|
+
if flow_id is None:
|
|
101
|
+
flow_id = _generate_id()
|
|
93
102
|
flow_settings = schemas.FlowSettings(
|
|
94
103
|
flow_id=flow_id,
|
|
95
104
|
name=f"Flow_{flow_id}",
|
|
96
105
|
path=f"flow_{flow_id}"
|
|
97
106
|
)
|
|
98
|
-
flow_graph = FlowGraph(
|
|
107
|
+
flow_graph = FlowGraph(flow_settings=flow_settings)
|
|
99
108
|
flow_graph.flow_settings.execution_location = 'local' # always create a local frame so that the run time does not attempt to use the flowfile_worker process
|
|
100
109
|
return flow_graph
|
|
101
110
|
|
|
@@ -119,3 +128,16 @@ def stringify_values(v: Any) -> str:
|
|
|
119
128
|
else:
|
|
120
129
|
# Handle any other types
|
|
121
130
|
return str(v)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
data = {"c": 0}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def generate_node_id() -> int:
|
|
137
|
+
data["c"] += 1
|
|
138
|
+
return data["c"]
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def set_node_id(node_id):
|
|
142
|
+
"""Set the node ID to a specific value."""
|
|
143
|
+
data["c"] = node_id
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""Cloud storage writer module for FlowFile Worker.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to write Polars LazyFrames to various cloud storage
|
|
4
|
+
services (S3, Azure ADLS, Google Cloud Storage) in different file formats.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import polars as pl
|
|
8
|
+
from typing import Dict, Any
|
|
9
|
+
from logging import Logger
|
|
10
|
+
|
|
11
|
+
from flowfile_worker.external_sources.s3_source.models import (
|
|
12
|
+
CloudStorageWriteSettings,
|
|
13
|
+
WriteSettings
|
|
14
|
+
)
|
|
15
|
+
from flowfile_worker.utils import collect_lazy_frame
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _write_parquet_to_cloud(
|
|
19
|
+
df: pl.LazyFrame,
|
|
20
|
+
resource_path: str,
|
|
21
|
+
storage_options: Dict[str, Any],
|
|
22
|
+
write_settings: WriteSettings,
|
|
23
|
+
logger: Logger
|
|
24
|
+
) -> None:
|
|
25
|
+
"""Write LazyFrame to a Parquet file in cloud storage.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
df: Polars LazyFrame to write.
|
|
29
|
+
resource_path: Cloud storage path where the file will be written.
|
|
30
|
+
storage_options: Storage-specific options for authentication and configuration.
|
|
31
|
+
write_settings: Write configuration including compression settings.
|
|
32
|
+
logger: Logger instance for logging operations.
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
Exception: If writing fails, wrapped with a descriptive error message.
|
|
36
|
+
"""
|
|
37
|
+
try:
|
|
38
|
+
sink_kwargs = {
|
|
39
|
+
"path": resource_path,
|
|
40
|
+
"compression": write_settings.parquet_compression,
|
|
41
|
+
}
|
|
42
|
+
if storage_options:
|
|
43
|
+
sink_kwargs["storage_options"] = storage_options
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
# Try to use sink_parquet for lazy execution
|
|
47
|
+
df.sink_parquet(**sink_kwargs)
|
|
48
|
+
except Exception as e:
|
|
49
|
+
# Fall back to collecting and writing if sink fails
|
|
50
|
+
logger.warning(f"Failed to use sink_parquet, falling back to collect and write: {str(e)}")
|
|
51
|
+
pl_df = collect_lazy_frame(df)
|
|
52
|
+
sink_kwargs['file'] = sink_kwargs.pop("path")
|
|
53
|
+
pl_df.write_parquet(**sink_kwargs)
|
|
54
|
+
|
|
55
|
+
except Exception as e:
|
|
56
|
+
logger.error(f"Failed to write Parquet to {resource_path}: {str(e)}")
|
|
57
|
+
raise Exception(f"Failed to write Parquet to cloud storage: {str(e)}")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _write_delta_to_cloud(
|
|
61
|
+
df: pl.LazyFrame,
|
|
62
|
+
resource_path: str,
|
|
63
|
+
storage_options: Dict[str, Any],
|
|
64
|
+
write_settings: WriteSettings,
|
|
65
|
+
logger: Logger
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Write LazyFrame to Delta Lake format in cloud storage.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
df: Polars LazyFrame to write.
|
|
71
|
+
resource_path: Cloud storage path where the Delta table will be written.
|
|
72
|
+
storage_options: Storage-specific options for authentication and configuration.
|
|
73
|
+
write_settings: Write configuration including write mode.
|
|
74
|
+
logger: Logger instance for logging operations.
|
|
75
|
+
"""
|
|
76
|
+
sink_kwargs = {
|
|
77
|
+
"target": resource_path,
|
|
78
|
+
"mode": write_settings.write_mode,
|
|
79
|
+
}
|
|
80
|
+
if storage_options:
|
|
81
|
+
sink_kwargs["storage_options"] = storage_options
|
|
82
|
+
|
|
83
|
+
# Delta format requires collecting the LazyFrame first
|
|
84
|
+
collect_lazy_frame(df).write_delta(**sink_kwargs)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _write_csv_to_cloud(
|
|
88
|
+
df: pl.LazyFrame,
|
|
89
|
+
resource_path: str,
|
|
90
|
+
storage_options: Dict[str, Any],
|
|
91
|
+
write_settings: WriteSettings,
|
|
92
|
+
logger: Logger
|
|
93
|
+
) -> None:
|
|
94
|
+
"""Write LazyFrame to a CSV file in cloud storage.
|
|
95
|
+
|
|
96
|
+
Args:
|
|
97
|
+
df: Polars LazyFrame to write.
|
|
98
|
+
resource_path: Cloud storage path where the CSV file will be written.
|
|
99
|
+
storage_options: Storage-specific options for authentication and configuration.
|
|
100
|
+
write_settings: Write configuration including delimiter settings.
|
|
101
|
+
logger: Logger instance for logging operations.
|
|
102
|
+
|
|
103
|
+
Raises:
|
|
104
|
+
Exception: If writing fails, wrapped with a descriptive error message.
|
|
105
|
+
"""
|
|
106
|
+
try:
|
|
107
|
+
sink_kwargs = {
|
|
108
|
+
"path": resource_path,
|
|
109
|
+
"separator": write_settings.csv_delimiter,
|
|
110
|
+
}
|
|
111
|
+
if storage_options:
|
|
112
|
+
sink_kwargs["storage_options"] = storage_options
|
|
113
|
+
|
|
114
|
+
# sink_csv executes the lazy query and writes the result
|
|
115
|
+
df.sink_csv(**sink_kwargs)
|
|
116
|
+
|
|
117
|
+
except Exception as e:
|
|
118
|
+
logger.error(f"Failed to write CSV to {resource_path}: {str(e)}")
|
|
119
|
+
raise Exception(f"Failed to write CSV to cloud storage: {str(e)}")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _write_json_to_cloud(
|
|
123
|
+
df: pl.LazyFrame,
|
|
124
|
+
resource_path: str,
|
|
125
|
+
storage_options: Dict[str, Any],
|
|
126
|
+
write_settings: WriteSettings,
|
|
127
|
+
logger: Logger
|
|
128
|
+
) -> None:
|
|
129
|
+
"""Write LazyFrame to a line-delimited JSON (NDJSON) file in cloud storage.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
df: Polars LazyFrame to write.
|
|
133
|
+
resource_path: Cloud storage path where the NDJSON file will be written.
|
|
134
|
+
storage_options: Storage-specific options for authentication and configuration.
|
|
135
|
+
write_settings: Write configuration settings.
|
|
136
|
+
logger: Logger instance for logging operations.
|
|
137
|
+
|
|
138
|
+
Raises:
|
|
139
|
+
Exception: If writing fails, wrapped with a descriptive error message.
|
|
140
|
+
"""
|
|
141
|
+
try:
|
|
142
|
+
sink_kwargs = {"path": resource_path}
|
|
143
|
+
if storage_options:
|
|
144
|
+
sink_kwargs["storage_options"] = storage_options
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
# Try to use sink_ndjson for lazy execution
|
|
148
|
+
df.sink_ndjson(**sink_kwargs)
|
|
149
|
+
except Exception as e:
|
|
150
|
+
# Fall back to collecting and writing if sink fails
|
|
151
|
+
pl_df = collect_lazy_frame(df)
|
|
152
|
+
sink_kwargs['file'] = sink_kwargs.pop("path")
|
|
153
|
+
pl_df.write_ndjson(**sink_kwargs)
|
|
154
|
+
logger.error(f"Failed to use sink_ndjson, falling back to collect and write: {str(e)}")
|
|
155
|
+
|
|
156
|
+
except Exception as e:
|
|
157
|
+
logger.error(f"Failed to write JSON to {resource_path}: {str(e)}")
|
|
158
|
+
raise Exception(f"Failed to write JSON to cloud storage: {str(e)}")
|
|
159
|
+
|
|
160
|
+
writers = {
|
|
161
|
+
"parquet": _write_parquet_to_cloud,
|
|
162
|
+
"delta": _write_delta_to_cloud,
|
|
163
|
+
"csv": _write_csv_to_cloud,
|
|
164
|
+
"json": _write_json_to_cloud,
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def write_df_to_cloud(
|
|
169
|
+
df: pl.LazyFrame,
|
|
170
|
+
settings: CloudStorageWriteSettings,
|
|
171
|
+
logger: Logger
|
|
172
|
+
) -> None:
|
|
173
|
+
"""Write a Polars LazyFrame to an object in cloud storage.
|
|
174
|
+
|
|
175
|
+
Supports writing to S3, Azure ADLS, and Google Cloud Storage. Currently supports
|
|
176
|
+
'overwrite' write mode. The 'append' mode is not yet implemented for most formats.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
df: Polars LazyFrame to write to cloud storage.
|
|
180
|
+
settings: Cloud storage write settings containing connection details and write options.
|
|
181
|
+
logger: Logger instance for logging operations.
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
ValueError: If the specified file format is not supported.
|
|
185
|
+
NotImplementedError: If 'append' write mode is used for non-delta formats.
|
|
186
|
+
Exception: If writing to cloud storage fails.
|
|
187
|
+
"""
|
|
188
|
+
connection = settings.connection
|
|
189
|
+
write_settings = settings.write_settings
|
|
190
|
+
logger.info(
|
|
191
|
+
f"Writing to {connection.storage_type} storage: {write_settings.resource_path}"
|
|
192
|
+
)
|
|
193
|
+
# Validate write mode
|
|
194
|
+
if write_settings.write_mode == 'append' and write_settings.file_format != "delta":
|
|
195
|
+
raise NotImplementedError(
|
|
196
|
+
"The 'append' write mode is not yet supported for this destination."
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
storage_options = connection.get_storage_options()
|
|
200
|
+
|
|
201
|
+
# Dispatch to the appropriate writer
|
|
202
|
+
writer_func = writers.get(write_settings.file_format)
|
|
203
|
+
if not writer_func:
|
|
204
|
+
raise ValueError(
|
|
205
|
+
f"Unsupported file format for writing: {write_settings.file_format}"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
writer_func(
|
|
209
|
+
df,
|
|
210
|
+
write_settings.resource_path,
|
|
211
|
+
storage_options,
|
|
212
|
+
write_settings,
|
|
213
|
+
logger
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
logger.info(f"Successfully wrote data to {write_settings.resource_path}")
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""Cloud storage connection schemas for S3, ADLS, and other cloud providers."""
|
|
2
|
+
|
|
3
|
+
from typing import Optional, Literal, Dict, Any
|
|
4
|
+
import boto3
|
|
5
|
+
from pydantic import BaseModel, SecretStr
|
|
6
|
+
from flowfile_worker.secrets import decrypt_secret
|
|
7
|
+
|
|
8
|
+
CloudStorageType = Literal["s3", "adls", "gcs"]
|
|
9
|
+
AuthMethod = Literal["access_key", "iam_role", "service_principal", "managed_identity", "sas_token", "aws-cli", "env_vars"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def create_storage_options_from_boto_credentials(profile_name: Optional[str],
|
|
13
|
+
region_name: Optional[str] = None) -> Dict[str, Any]:
|
|
14
|
+
"""
|
|
15
|
+
Create a storage options dictionary from AWS credentials using a boto3 profile.
|
|
16
|
+
This is the most robust way to handle profile-based authentication as it
|
|
17
|
+
bypasses Polars' internal credential provider chain, avoiding conflicts.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
profile_name
|
|
22
|
+
The name of the AWS profile in ~/.aws/credentials.
|
|
23
|
+
region_name
|
|
24
|
+
The AWS region to use.
|
|
25
|
+
|
|
26
|
+
Returns
|
|
27
|
+
-------
|
|
28
|
+
Dict[str, Any]
|
|
29
|
+
A storage options dictionary for Polars with explicit credentials.
|
|
30
|
+
"""
|
|
31
|
+
session = boto3.Session(profile_name=profile_name, region_name=region_name)
|
|
32
|
+
credentials = session.get_credentials()
|
|
33
|
+
frozen_creds = credentials.get_frozen_credentials()
|
|
34
|
+
|
|
35
|
+
storage_options = {
|
|
36
|
+
"aws_access_key_id": frozen_creds.access_key,
|
|
37
|
+
"aws_secret_access_key": frozen_creds.secret_key,
|
|
38
|
+
"aws_session_token": frozen_creds.token,
|
|
39
|
+
}
|
|
40
|
+
# Use the session's region if one was resolved, otherwise use the provided one
|
|
41
|
+
if session.region_name:
|
|
42
|
+
storage_options["aws_region"] = session.region_name
|
|
43
|
+
|
|
44
|
+
print("Boto3: Successfully created storage options with explicit credentials.")
|
|
45
|
+
return storage_options
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class FullCloudStorageConnection(BaseModel):
|
|
49
|
+
"""Internal model with decrypted secrets"""
|
|
50
|
+
storage_type: CloudStorageType
|
|
51
|
+
auth_method: AuthMethod
|
|
52
|
+
connection_name: Optional[str] = "None" # This is the reference to the item we will fetch that contains the data
|
|
53
|
+
|
|
54
|
+
# AWS S3
|
|
55
|
+
aws_region: Optional[str] = None
|
|
56
|
+
aws_access_key_id: Optional[str] = None
|
|
57
|
+
aws_secret_access_key: Optional[SecretStr] = None
|
|
58
|
+
aws_role_arn: Optional[str] = None
|
|
59
|
+
aws_allow_unsafe_html: Optional[bool] = None
|
|
60
|
+
|
|
61
|
+
# Azure ADLS
|
|
62
|
+
azure_account_name: Optional[str] = None
|
|
63
|
+
azure_account_key: Optional[SecretStr] = None
|
|
64
|
+
azure_tenant_id: Optional[str] = None
|
|
65
|
+
azure_client_id: Optional[str] = None
|
|
66
|
+
azure_client_secret: Optional[SecretStr] = None
|
|
67
|
+
|
|
68
|
+
# Common
|
|
69
|
+
endpoint_url: Optional[str] = None
|
|
70
|
+
verify_ssl: bool = True
|
|
71
|
+
|
|
72
|
+
def get_storage_options(self) -> Dict[str, Any]:
|
|
73
|
+
"""
|
|
74
|
+
Build storage options dict based on the connection type and auth method.
|
|
75
|
+
|
|
76
|
+
Returns:
|
|
77
|
+
Dict containing appropriate storage options for the provider
|
|
78
|
+
"""
|
|
79
|
+
if self.storage_type == "s3":
|
|
80
|
+
return self._get_s3_storage_options()
|
|
81
|
+
|
|
82
|
+
def _get_s3_storage_options(self) -> Dict[str, Any]:
|
|
83
|
+
"""Build S3-specific storage options."""
|
|
84
|
+
auth_method = self.auth_method
|
|
85
|
+
print(f"Building S3 storage options for auth_method: '{auth_method}'")
|
|
86
|
+
|
|
87
|
+
if auth_method == "aws-cli":
|
|
88
|
+
return create_storage_options_from_boto_credentials(
|
|
89
|
+
profile_name=self.connection_name,
|
|
90
|
+
region_name=self.aws_region
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
storage_options = {}
|
|
94
|
+
if self.aws_region:
|
|
95
|
+
storage_options["aws_region"] = self.aws_region
|
|
96
|
+
if self.endpoint_url:
|
|
97
|
+
storage_options["endpoint_url"] = self.endpoint_url
|
|
98
|
+
if not self.verify_ssl:
|
|
99
|
+
storage_options["verify"] = "False"
|
|
100
|
+
if self.aws_allow_unsafe_html: # Note: Polars uses aws_allow_http
|
|
101
|
+
storage_options["aws_allow_http"] = "true"
|
|
102
|
+
|
|
103
|
+
if auth_method == "access_key":
|
|
104
|
+
storage_options["aws_access_key_id"] = self.aws_access_key_id
|
|
105
|
+
storage_options["aws_secret_access_key"] = decrypt_secret(
|
|
106
|
+
self.aws_secret_access_key.get_secret_value()).get_secret_value()
|
|
107
|
+
# Explicitly clear any session token from the environment
|
|
108
|
+
storage_options["aws_session_token"] = ""
|
|
109
|
+
|
|
110
|
+
elif auth_method == "iam_role":
|
|
111
|
+
# Correctly implement IAM role assumption using boto3 STS client.
|
|
112
|
+
sts_client = boto3.client('sts', region_name=self.aws_region)
|
|
113
|
+
assumed_role_object = sts_client.assume_role(
|
|
114
|
+
RoleArn=self.aws_role_arn,
|
|
115
|
+
RoleSessionName="PolarsCloudStorageReaderSession" # A descriptive session name
|
|
116
|
+
)
|
|
117
|
+
credentials = assumed_role_object['Credentials']
|
|
118
|
+
storage_options["aws_access_key_id"] = credentials['AccessKeyId']
|
|
119
|
+
storage_options["aws_secret_access_key"] = decrypt_secret(credentials['SecretAccessKey']).get_secret_value()
|
|
120
|
+
storage_options["aws_session_token"] = decrypt_secret(credentials['SessionToken']).get_secret_value()
|
|
121
|
+
|
|
122
|
+
return storage_options
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class WriteSettings(BaseModel):
|
|
126
|
+
"""Settings for writing to cloud storage"""
|
|
127
|
+
resource_path: str # s3://bucket/path/to/file.csv
|
|
128
|
+
|
|
129
|
+
write_mode: Literal["overwrite", "append"] = "overwrite"
|
|
130
|
+
file_format: Literal["csv", "parquet", "json", "delta"] = "parquet"
|
|
131
|
+
|
|
132
|
+
parquet_compression: Literal["snappy", "gzip", "brotli", "lz4", "zstd"] = "snappy"
|
|
133
|
+
|
|
134
|
+
csv_delimiter: str = ","
|
|
135
|
+
csv_encoding: str = "utf8"
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class CloudStorageWriteSettings(BaseModel):
|
|
139
|
+
write_settings: WriteSettings
|
|
140
|
+
connection: FullCloudStorageConnection
|
|
141
|
+
flowfile_flow_id: int = 1
|
|
142
|
+
flowfile_node_id: int | str = -1
|