Flowfile 0.3.1.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/api.py +10 -4
- flowfile/web/static/assets/{AirbyteReader-cb0c1d4a.js → AirbyteReader-2b1cf2d8.js} +10 -9
- flowfile/web/static/assets/{CrossJoin-a514fa59.js → CrossJoin-cc3ab73c.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-f2cecf33.js → DatabaseConnectionSettings-307c4652.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-83ee3c98.js → DatabaseManager-69faa6e1.js} +10 -6
- flowfile/web/static/assets/{DatabaseReader-dc0c6881.js → DatabaseReader-e4134cd0.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-5afe9f8d.js → DatabaseWriter-d32d75b1.js} +9 -9
- flowfile/web/static/assets/{ExploreData-c7ee19cf.js → ExploreData-5eb48389.js} +18639 -18629
- flowfile/web/static/assets/{ExternalSource-17b23a01.js → ExternalSource-29489051.js} +8 -21
- flowfile/web/static/assets/{Filter-90856b4f.js → Filter-031332bb.js} +9 -9
- flowfile/web/static/assets/{Formula-38b71e9e.js → Formula-3b900540.js} +15 -15
- flowfile/web/static/assets/{Formula-d60a74f4.css → Formula-b8cefc31.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-d0f1fe81.js → FuzzyMatch-dee31153.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-0c86bbc6.js → GraphSolver-ca74eb47.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f2772e9f.js → GroupBy-081b6591.js} +8 -7
- flowfile/web/static/assets/{Join-bc3e1cf7.js → Join-b467376f.js} +11 -10
- flowfile/web/static/assets/{ManualInput-03aa0245.js → ManualInput-ffffb80a.js} +11 -8
- flowfile/web/static/assets/{Output-5b35eee8.js → Output-9a87d4ba.js} +4 -4
- flowfile/web/static/assets/{Pivot-7164087c.js → Pivot-ee3e6093.js} +8 -7
- flowfile/web/static/assets/{PolarsCode-3abf6507.js → PolarsCode-03921254.js} +13 -11
- flowfile/web/static/assets/{PopOver-b37ff9be.js → PopOver-3bdf8951.js} +1 -1
- flowfile/web/static/assets/{Read-65966a3e.js → Read-67fee3a0.js} +6 -6
- flowfile/web/static/assets/{RecordCount-c66c6d6d.js → RecordCount-a2acd02d.js} +7 -6
- flowfile/web/static/assets/{RecordId-826dc095.js → RecordId-0c8bcd77.js} +10 -8
- flowfile/web/static/assets/{Sample-4ed555c8.js → Sample-60594a3a.js} +7 -6
- flowfile/web/static/assets/{SecretManager-eac1e97d.js → SecretManager-bbcec2ac.js} +2 -2
- flowfile/web/static/assets/{Select-085f05cc.js → Select-9540e6ca.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-1f5e79c1.js → SettingsSection-48f28104.js} +1 -1
- flowfile/web/static/assets/{Sort-3e6cb414.js → Sort-6dbe3633.js} +6 -6
- flowfile/web/static/assets/{TextToRows-606349bc.js → TextToRows-27aab4a8.js} +18 -13
- flowfile/web/static/assets/{UnavailableFields-b41976ed.js → UnavailableFields-8143044b.js} +2 -2
- flowfile/web/static/assets/{Union-fca91665.js → Union-52460248.js} +7 -6
- flowfile/web/static/assets/{Unique-a59f830e.js → Unique-f6962644.js} +8 -8
- flowfile/web/static/assets/{Unpivot-c3815565.js → Unpivot-1ff1e938.js} +5 -5
- flowfile/web/static/assets/{api-22b338bd.js → api-3b345d92.js} +1 -1
- flowfile/web/static/assets/{designer-e5bbe26f.js → designer-4736134f.js} +72 -42
- flowfile/web/static/assets/{documentation-08045cf2.js → documentation-b9545eba.js} +1 -1
- flowfile/web/static/assets/{dropDown-5e7e9a5a.js → dropDown-d5a4014c.js} +1 -1
- flowfile/web/static/assets/{dropDownGeneric-50a91b99.js → dropDownGeneric-1f4e32ec.js} +2 -2
- flowfile/web/static/assets/{fullEditor-705c6ccb.js → fullEditor-f4791c23.js} +3 -3
- flowfile/web/static/assets/{genericNodeSettings-65587f20.js → genericNodeSettings-1d456350.js} +3 -3
- flowfile/web/static/assets/{index-552863fd.js → index-f25c9283.js} +2608 -1570
- flowfile/web/static/assets/{nodeTitle-cf9bae3c.js → nodeTitle-cad6fd9d.js} +3 -3
- flowfile/web/static/assets/{secretApi-3ad510e1.js → secretApi-01f07e2c.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-bd644891.js → selectDynamic-f46a4e3f.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-dd17b478.js → vue-codemirror.esm-eb98fc8b.js} +15 -14
- flowfile/web/static/assets/{vue-content-loader.es-6b36f05e.js → vue-content-loader.es-860c0380.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.1.1.dist-info → flowfile-0.3.2.dist-info}/METADATA +1 -3
- {flowfile-0.3.1.1.dist-info → flowfile-0.3.2.dist-info}/RECORD +62 -64
- flowfile_core/configs/node_store/nodes.py +2 -4
- flowfile_core/flowfile/FlowfileFlow.py +72 -12
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1 -1
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +32 -1
- flowfile_core/flowfile/flow_graph_utils.py +320 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +0 -1
- flowfile_core/schemas/input_schema.py +2 -10
- flowfile_frame/__init__.py +1 -1
- flowfile_frame/flow_frame.py +455 -51
- flowfile/web/static/assets/GoogleSheet-854294a4.js +0 -2616
- flowfile/web/static/assets/GoogleSheet-92084da7.css +0 -233
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +0 -74
- {flowfile-0.3.1.1.dist-info → flowfile-0.3.2.dist-info}/LICENSE +0 -0
- {flowfile-0.3.1.1.dist-info → flowfile-0.3.2.dist-info}/WHEEL +0 -0
- {flowfile-0.3.1.1.dist-info → flowfile-0.3.2.dist-info}/entry_points.txt +0 -0
|
@@ -2,6 +2,8 @@ import datetime
|
|
|
2
2
|
import pickle
|
|
3
3
|
import polars as pl
|
|
4
4
|
import fastexcel
|
|
5
|
+
import copy
|
|
6
|
+
|
|
5
7
|
from fastapi.exceptions import HTTPException
|
|
6
8
|
from time import time
|
|
7
9
|
from functools import partial
|
|
@@ -203,8 +205,7 @@ class FlowGraph:
|
|
|
203
205
|
sample_size: int = 10000
|
|
204
206
|
|
|
205
207
|
def analysis_preparation(flowfile_table: FlowDataEngine):
|
|
206
|
-
|
|
207
|
-
if flowfile_table.number_of_records<0:
|
|
208
|
+
if flowfile_table.number_of_records < 0:
|
|
208
209
|
|
|
209
210
|
number_of_records = ExternalDfFetcher(
|
|
210
211
|
lf=flowfile_table.data_frame,
|
|
@@ -219,7 +220,7 @@ class FlowGraph:
|
|
|
219
220
|
|
|
220
221
|
external_sampler = ExternalDfFetcher(
|
|
221
222
|
lf=flowfile_table.data_frame,
|
|
222
|
-
file_ref=node.hash,
|
|
223
|
+
file_ref="__gf_walker"+node.hash,
|
|
223
224
|
wait_on_completion=True,
|
|
224
225
|
node_id=node.node_id,
|
|
225
226
|
flow_id=self.flow_id,
|
|
@@ -439,11 +440,11 @@ class FlowGraph:
|
|
|
439
440
|
|
|
440
441
|
def add_formula(self, function_settings: input_schema.NodeFormula):
|
|
441
442
|
error = ""
|
|
442
|
-
if function_settings.function.field.data_type
|
|
443
|
+
if function_settings.function.field.data_type not in (None, "Auto"):
|
|
443
444
|
output_type = type_to_polars_str(function_settings.function.field.data_type)
|
|
444
445
|
else:
|
|
445
446
|
output_type = None
|
|
446
|
-
if output_type
|
|
447
|
+
if output_type not in (None, "Auto"):
|
|
447
448
|
new_col = [FlowfileColumn.from_input(column_name=function_settings.function.field.name,
|
|
448
449
|
data_type=str(output_type))]
|
|
449
450
|
else:
|
|
@@ -587,6 +588,8 @@ class FlowGraph:
|
|
|
587
588
|
input_cols = set(f.name for f in table.schema)
|
|
588
589
|
ids_to_remove = []
|
|
589
590
|
for i, select_col in enumerate(select_cols):
|
|
591
|
+
if select_col.data_type is None:
|
|
592
|
+
select_col.data_type = table.get_schema_column(select_col.old_name).data_type
|
|
590
593
|
if select_col.old_name not in input_cols:
|
|
591
594
|
select_col.is_available = False
|
|
592
595
|
if not select_col.keep:
|
|
@@ -900,9 +903,6 @@ class FlowGraph:
|
|
|
900
903
|
if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
|
|
901
904
|
logger.info('Using provided schema in the node')
|
|
902
905
|
|
|
903
|
-
def add_google_sheet(self, external_source_input: input_schema.NodeExternalSource):
|
|
904
|
-
logger.info('Adding google sheet reader')
|
|
905
|
-
self.add_external_source(external_source_input)
|
|
906
906
|
|
|
907
907
|
def add_sql_source(self, external_source_input: input_schema.NodeExternalSource):
|
|
908
908
|
logger.info('Adding sql source')
|
|
@@ -1083,7 +1083,7 @@ class FlowGraph:
|
|
|
1083
1083
|
self._output_cols += cols_available
|
|
1084
1084
|
|
|
1085
1085
|
@property
|
|
1086
|
-
def input_data_columns(self) -> List[str]:
|
|
1086
|
+
def input_data_columns(self) -> List[str] | None:
|
|
1087
1087
|
if self._input_cols:
|
|
1088
1088
|
return list(set([col for col in self._input_cols if
|
|
1089
1089
|
col in [table_col.name for table_col in self._input_data.schema]]))
|
|
@@ -1102,7 +1102,7 @@ class FlowGraph:
|
|
|
1102
1102
|
return implicit_starting_nodes
|
|
1103
1103
|
|
|
1104
1104
|
@execution_mode.setter
|
|
1105
|
-
def execution_mode(self, mode:
|
|
1105
|
+
def execution_mode(self, mode: schemas.ExecutionModeLiteral):
|
|
1106
1106
|
self.flow_settings.execution_mode = mode
|
|
1107
1107
|
|
|
1108
1108
|
@property
|
|
@@ -1158,13 +1158,13 @@ class FlowGraph:
|
|
|
1158
1158
|
continue
|
|
1159
1159
|
node_result.success = node.results.errors is None
|
|
1160
1160
|
node_result.end_timestamp = time()
|
|
1161
|
-
node_result.run_time = node_result.end_timestamp - node_result.start_timestamp
|
|
1161
|
+
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1162
1162
|
node_result.is_running = False
|
|
1163
1163
|
except Exception as e:
|
|
1164
1164
|
node_result.error = 'Node did not run'
|
|
1165
1165
|
node_result.success = False
|
|
1166
1166
|
node_result.end_timestamp = time()
|
|
1167
|
-
node_result.run_time = node_result.end_timestamp - node_result.start_timestamp
|
|
1167
|
+
node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
|
|
1168
1168
|
node_result.is_running = False
|
|
1169
1169
|
node_logger.error(f'Error in node {node.node_id}: {e}')
|
|
1170
1170
|
if not node_result.success:
|
|
@@ -1352,6 +1352,66 @@ class FlowGraph:
|
|
|
1352
1352
|
getattr(self, f"add_{node_type}")(combined_settings)
|
|
1353
1353
|
|
|
1354
1354
|
|
|
1355
|
+
def combine_flow_graphs(*flow_graphs: FlowGraph) -> FlowGraph:
|
|
1356
|
+
"""
|
|
1357
|
+
Combine multiple flow graphs into a single graph, ensuring node IDs don't overlap.
|
|
1358
|
+
|
|
1359
|
+
Args:
|
|
1360
|
+
*flow_graphs: Multiple FlowGraph instances to combine
|
|
1361
|
+
|
|
1362
|
+
Returns:
|
|
1363
|
+
A new FlowGraph containing all nodes and edges from the input graphs with remapped IDs
|
|
1364
|
+
|
|
1365
|
+
Raises:
|
|
1366
|
+
ValueError: If any flow_ids overlap
|
|
1367
|
+
"""
|
|
1368
|
+
# Validate flow IDs are unique
|
|
1369
|
+
_validate_unique_flow_ids(flow_graphs)
|
|
1370
|
+
|
|
1371
|
+
# Create ID mapping for all nodes
|
|
1372
|
+
node_id_mapping = _create_node_id_mapping(flow_graphs)
|
|
1373
|
+
|
|
1374
|
+
# Remap and combine nodes
|
|
1375
|
+
all_nodes = _remap_nodes(flow_graphs, node_id_mapping)
|
|
1376
|
+
|
|
1377
|
+
# Create a new combined flow graph
|
|
1378
|
+
combined_flow_id = hash(tuple(fg.flow_id for fg in flow_graphs))
|
|
1379
|
+
# return FlowGraph(flow_id=combined_flow_id, nodes=all_nodes, edges=all_edges)
|
|
1380
|
+
|
|
1381
|
+
|
|
1382
|
+
def _validate_unique_flow_ids(flow_graphs: Tuple[FlowGraph, ...]) -> None:
|
|
1383
|
+
"""Ensure all flow graphs have unique flow_ids."""
|
|
1384
|
+
all_flow_ids = [fg.flow_id for fg in flow_graphs]
|
|
1385
|
+
if len(all_flow_ids) != len(set(all_flow_ids)):
|
|
1386
|
+
raise ValueError("Cannot combine overlapping graphs, make sure the graphs have a unique identifier")
|
|
1387
|
+
|
|
1388
|
+
|
|
1389
|
+
def _create_node_id_mapping(flow_graphs: Tuple[FlowGraph, ...]) -> Dict[int, Dict[int, int]]:
|
|
1390
|
+
"""Create a mapping from original node IDs to new unique node IDs."""
|
|
1391
|
+
node_id_mapping: Dict[int, Dict[int, int]] = {}
|
|
1392
|
+
next_node_id = 0
|
|
1393
|
+
|
|
1394
|
+
for fg in flow_graphs:
|
|
1395
|
+
node_id_mapping[fg.flow_id] = {}
|
|
1396
|
+
for node in fg.nodes:
|
|
1397
|
+
node_id_mapping[fg.flow_id][node.node_id] = next_node_id
|
|
1398
|
+
next_node_id += 1
|
|
1399
|
+
|
|
1400
|
+
return node_id_mapping
|
|
1401
|
+
|
|
1402
|
+
|
|
1403
|
+
def _remap_nodes(flow_graphs: Tuple[FlowGraph, ...],
|
|
1404
|
+
node_id_mapping: Dict[int, Dict[int, int]]) -> List:
|
|
1405
|
+
"""Create new nodes with remapped IDs."""
|
|
1406
|
+
all_nodes = []
|
|
1407
|
+
for fg in flow_graphs:
|
|
1408
|
+
for node in fg.nodes:
|
|
1409
|
+
new_node = copy.deepcopy(node)
|
|
1410
|
+
new_node.node_id = node_id_mapping[fg.flow_id][node.node_id]
|
|
1411
|
+
all_nodes.append(new_node)
|
|
1412
|
+
return all_nodes
|
|
1413
|
+
|
|
1414
|
+
|
|
1355
1415
|
def combine_existing_settings_and_new_settings(setting_input: Any, new_settings: input_schema.NodePromise) -> Any:
|
|
1356
1416
|
"""Combine excopy_nodeisting settings with new settings from a NodePromise."""
|
|
1357
1417
|
copied_setting_input = deepcopy(setting_input)
|
|
@@ -1345,7 +1345,7 @@ class FlowDataEngine:
|
|
|
1345
1345
|
FlowDataEngine: New instance with added column
|
|
1346
1346
|
"""
|
|
1347
1347
|
expr = to_expr(func)
|
|
1348
|
-
if output_data_type
|
|
1348
|
+
if output_data_type not in (None, "Auto"):
|
|
1349
1349
|
df = self.data_frame.with_columns(expr.cast(output_data_type).alias(col_name))
|
|
1350
1350
|
else:
|
|
1351
1351
|
df = self.data_frame.with_columns(expr.alias(col_name))
|
|
@@ -126,6 +126,37 @@ class PolarsCodeParser:
|
|
|
126
126
|
'col': pl.col,
|
|
127
127
|
'lit': pl.lit,
|
|
128
128
|
'expr': pl.expr,
|
|
129
|
+
|
|
130
|
+
# Polars datatypes - added directly
|
|
131
|
+
'Int8': pl.Int8,
|
|
132
|
+
'Int16': pl.Int16,
|
|
133
|
+
'Int32': pl.Int32,
|
|
134
|
+
'Int64': pl.Int64,
|
|
135
|
+
'Int128': pl.Int128,
|
|
136
|
+
'UInt8': pl.UInt8,
|
|
137
|
+
'UInt16': pl.UInt16,
|
|
138
|
+
'UInt32': pl.UInt32,
|
|
139
|
+
'UInt64': pl.UInt64,
|
|
140
|
+
'Float32': pl.Float32,
|
|
141
|
+
'Float64': pl.Float64,
|
|
142
|
+
'Boolean': pl.Boolean,
|
|
143
|
+
'String': pl.String,
|
|
144
|
+
'Utf8': pl.Utf8,
|
|
145
|
+
'Binary': pl.Binary,
|
|
146
|
+
'Null': pl.Null,
|
|
147
|
+
'List': pl.List,
|
|
148
|
+
'Array': pl.Array,
|
|
149
|
+
'Struct': pl.Struct,
|
|
150
|
+
'Object': pl.Object,
|
|
151
|
+
'Date': pl.Date,
|
|
152
|
+
'Time': pl.Time,
|
|
153
|
+
'Datetime': pl.Datetime,
|
|
154
|
+
'Duration': pl.Duration,
|
|
155
|
+
'Categorical': pl.Categorical,
|
|
156
|
+
'Decimal': pl.Decimal,
|
|
157
|
+
'Enum': pl.Enum,
|
|
158
|
+
'Unknown': pl.Unknown,
|
|
159
|
+
|
|
129
160
|
# Basic Python built-ins
|
|
130
161
|
'print': print,
|
|
131
162
|
'len': len,
|
|
@@ -142,7 +173,7 @@ class PolarsCodeParser:
|
|
|
142
173
|
'True': True,
|
|
143
174
|
'False': False,
|
|
144
175
|
'None': None,
|
|
145
|
-
'time': time
|
|
176
|
+
'time': time,
|
|
146
177
|
}
|
|
147
178
|
|
|
148
179
|
@staticmethod
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
from typing import Dict, Tuple, Optional, List, Set, Callable
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
from flowfile_core.schemas import input_schema, schemas
|
|
4
|
+
from flowfile_core.flowfile.FlowfileFlow import FlowGraph, add_connection
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def combine_flow_graphs_with_mapping(
|
|
8
|
+
*flow_graphs: FlowGraph,
|
|
9
|
+
target_flow_id: Optional[int] = None) -> Tuple[FlowGraph, Dict[Tuple[int, int], int]]:
|
|
10
|
+
# Validate input parameters
|
|
11
|
+
_validate_input(flow_graphs)
|
|
12
|
+
|
|
13
|
+
# Generate a unique flow ID if not provided
|
|
14
|
+
if target_flow_id is None:
|
|
15
|
+
target_flow_id = _generate_unique_flow_id(flow_graphs)
|
|
16
|
+
|
|
17
|
+
flow_settings = _create_flow_settings(flow_graphs[0], target_flow_id)
|
|
18
|
+
combined_graph = FlowGraph(flow_id=target_flow_id, flow_settings=flow_settings)
|
|
19
|
+
node_id_mapping = _create_node_id_mapping(flow_graphs)
|
|
20
|
+
_add_nodes_to_combined_graph(flow_graphs, combined_graph, node_id_mapping, target_flow_id)
|
|
21
|
+
_add_connections_to_combined_graph(flow_graphs, combined_graph, node_id_mapping)
|
|
22
|
+
return combined_graph, node_id_mapping
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def combine_flow_graphs(*flow_graphs: FlowGraph, target_flow_id: Optional[int] = None) -> FlowGraph:
|
|
26
|
+
"""
|
|
27
|
+
Combine multiple flow graphs into a single graph, ensuring node IDs don't overlap.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
*flow_graphs: Multiple FlowGraph instances to combine
|
|
31
|
+
target_flow_id: Optional ID for the new combined graph. If None, a new ID will be generated.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
A new FlowGraph containing all nodes and edges from the input graphs with remapped IDs
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
ValueError: If no flow graphs are provided
|
|
38
|
+
"""
|
|
39
|
+
# Validate input parameters
|
|
40
|
+
_validate_input(flow_graphs)
|
|
41
|
+
|
|
42
|
+
# Generate a unique flow ID if not provided
|
|
43
|
+
if target_flow_id is None:
|
|
44
|
+
target_flow_id = _generate_unique_flow_id(flow_graphs)
|
|
45
|
+
|
|
46
|
+
flow_settings = _create_flow_settings(flow_graphs[0], target_flow_id)
|
|
47
|
+
combined_graph = FlowGraph(flow_id=target_flow_id, flow_settings=flow_settings)
|
|
48
|
+
node_id_mapping = _create_node_id_mapping(flow_graphs)
|
|
49
|
+
_add_nodes_to_combined_graph(flow_graphs, combined_graph, node_id_mapping, target_flow_id)
|
|
50
|
+
_add_connections_to_combined_graph(flow_graphs, combined_graph, node_id_mapping)
|
|
51
|
+
|
|
52
|
+
return combined_graph
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _validate_input(flow_graphs: Tuple[FlowGraph, ...]) -> None:
|
|
56
|
+
"""
|
|
57
|
+
Validate input parameters.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
flow_graphs: Flow graphs to validate
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
ValueError: If validation fails
|
|
64
|
+
"""
|
|
65
|
+
if not flow_graphs:
|
|
66
|
+
raise ValueError("At least one FlowGraph must be provided")
|
|
67
|
+
|
|
68
|
+
# Check for duplicate flow IDs
|
|
69
|
+
flow_ids = [fg.flow_id for fg in flow_graphs]
|
|
70
|
+
if len(flow_ids) != len(set(flow_ids)):
|
|
71
|
+
raise ValueError("Cannot combine flows with duplicate flow IDs")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _generate_unique_flow_id(flow_graphs: Tuple[FlowGraph, ...]) -> int:
|
|
75
|
+
"""
|
|
76
|
+
Generate a unique flow ID based on the input flow graphs.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
flow_graphs: Flow graphs to generate ID from
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
int: A new unique flow ID
|
|
83
|
+
"""
|
|
84
|
+
return abs(hash(tuple(fg.flow_id for fg in flow_graphs))) % 1000000
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _create_flow_settings(base_flow_graph: FlowGraph, target_flow_id: int) -> schemas.FlowSettings:
|
|
88
|
+
"""
|
|
89
|
+
Create flow settings for the combined graph based on an existing graph.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
base_flow_graph: Flow graph to base settings on
|
|
93
|
+
target_flow_id: The new flow ID
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
schemas.FlowSettings: Flow settings for the combined graph
|
|
97
|
+
"""
|
|
98
|
+
flow_settings = deepcopy(base_flow_graph.flow_settings)
|
|
99
|
+
flow_settings.flow_id = target_flow_id
|
|
100
|
+
flow_settings.name = f"Combined Flow {target_flow_id}"
|
|
101
|
+
return flow_settings
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _create_node_id_mapping(flow_graphs: Tuple[FlowGraph, ...]) -> Dict[Tuple[int, int], int]:
|
|
105
|
+
"""
|
|
106
|
+
Create a mapping from (flow_id, original_node_id) to new unique node IDs.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
flow_graphs: Flow graphs to process
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Dict: Mapping from (flow_id, node_id) to new node ID
|
|
113
|
+
"""
|
|
114
|
+
node_id_mapping = {}
|
|
115
|
+
next_node_id = _get_next_available_node_id(flow_graphs)
|
|
116
|
+
|
|
117
|
+
for fg in flow_graphs:
|
|
118
|
+
for node in fg.nodes:
|
|
119
|
+
node_id_mapping[(fg.flow_id, node.node_id)] = next_node_id
|
|
120
|
+
next_node_id += 1
|
|
121
|
+
|
|
122
|
+
return node_id_mapping
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _get_next_available_node_id(flow_graphs: Tuple[FlowGraph, ...]) -> int:
|
|
126
|
+
"""
|
|
127
|
+
Find the next available node ID.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
flow_graphs: Flow graphs to examine
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
int: Next available node ID
|
|
134
|
+
"""
|
|
135
|
+
max_id = 0
|
|
136
|
+
for fg in flow_graphs:
|
|
137
|
+
for node in fg.nodes:
|
|
138
|
+
max_id = max(max_id, node.node_id)
|
|
139
|
+
return max_id + 1
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _add_nodes_to_combined_graph(
|
|
143
|
+
flow_graphs: Tuple[FlowGraph, ...],
|
|
144
|
+
combined_graph: FlowGraph,
|
|
145
|
+
node_id_mapping: Dict[Tuple[int, int], int],
|
|
146
|
+
target_flow_id: int
|
|
147
|
+
) -> None:
|
|
148
|
+
"""
|
|
149
|
+
Add all nodes from source graphs to the combined graph.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
flow_graphs: Source flow graphs
|
|
153
|
+
combined_graph: Target combined graph
|
|
154
|
+
node_id_mapping: Mapping of node IDs
|
|
155
|
+
target_flow_id: Target flow ID
|
|
156
|
+
"""
|
|
157
|
+
processed_nodes = set()
|
|
158
|
+
|
|
159
|
+
for fg in flow_graphs:
|
|
160
|
+
for node in fg.nodes:
|
|
161
|
+
# Skip if already processed
|
|
162
|
+
if (fg.flow_id, node.node_id) in processed_nodes:
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
# Generate new node ID
|
|
166
|
+
new_node_id = node_id_mapping[(fg.flow_id, node.node_id)]
|
|
167
|
+
|
|
168
|
+
# Create and update setting input
|
|
169
|
+
setting_input = _create_updated_setting_input(
|
|
170
|
+
node.setting_input,
|
|
171
|
+
new_node_id,
|
|
172
|
+
target_flow_id,
|
|
173
|
+
fg.flow_id,
|
|
174
|
+
node_id_mapping
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Add node to combined graph
|
|
178
|
+
_add_node_to_graph(combined_graph, new_node_id, target_flow_id, node.node_type, setting_input)
|
|
179
|
+
|
|
180
|
+
processed_nodes.add((fg.flow_id, node.node_id))
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _create_updated_setting_input(
|
|
184
|
+
original_setting_input: any,
|
|
185
|
+
new_node_id: int,
|
|
186
|
+
target_flow_id: int,
|
|
187
|
+
source_flow_id: int,
|
|
188
|
+
node_id_mapping: Dict[Tuple[int, int], int]
|
|
189
|
+
) -> any:
|
|
190
|
+
"""
|
|
191
|
+
Create an updated setting input with new node and flow IDs.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
original_setting_input: Original setting input
|
|
195
|
+
new_node_id: New node ID
|
|
196
|
+
target_flow_id: Target flow ID
|
|
197
|
+
source_flow_id: Source flow ID
|
|
198
|
+
node_id_mapping: Mapping of node IDs
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Updated setting input
|
|
202
|
+
"""
|
|
203
|
+
setting_input = deepcopy(original_setting_input)
|
|
204
|
+
|
|
205
|
+
# Update node ID
|
|
206
|
+
if hasattr(setting_input, 'node_id'):
|
|
207
|
+
setting_input.node_id = new_node_id
|
|
208
|
+
|
|
209
|
+
# Update flow ID
|
|
210
|
+
if hasattr(setting_input, 'flow_id'):
|
|
211
|
+
setting_input.flow_id = target_flow_id
|
|
212
|
+
|
|
213
|
+
# Update depending_on_id if present
|
|
214
|
+
if hasattr(setting_input, 'depending_on_id') and setting_input.depending_on_id != -1:
|
|
215
|
+
orig_depending_id = setting_input.depending_on_id
|
|
216
|
+
setting_input.depending_on_id = node_id_mapping.get((source_flow_id, orig_depending_id), -1)
|
|
217
|
+
|
|
218
|
+
# Update depending_on_ids list if present
|
|
219
|
+
if hasattr(setting_input, 'depending_on_ids'):
|
|
220
|
+
setting_input.depending_on_ids = [
|
|
221
|
+
node_id_mapping.get((source_flow_id, dep_id), -1)
|
|
222
|
+
for dep_id in setting_input.depending_on_ids
|
|
223
|
+
if dep_id != -1
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
return setting_input
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _add_node_to_graph(
|
|
230
|
+
graph: FlowGraph,
|
|
231
|
+
node_id: int,
|
|
232
|
+
flow_id: int,
|
|
233
|
+
node_type: str,
|
|
234
|
+
setting_input: any
|
|
235
|
+
) -> None:
|
|
236
|
+
"""
|
|
237
|
+
Add a node to the graph.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
graph: Target graph
|
|
241
|
+
node_id: Node ID
|
|
242
|
+
flow_id: Flow ID
|
|
243
|
+
node_type: Node type
|
|
244
|
+
setting_input: Setting input
|
|
245
|
+
"""
|
|
246
|
+
# Add node promise to graph
|
|
247
|
+
node_promise = input_schema.NodePromise(
|
|
248
|
+
node_id=node_id,
|
|
249
|
+
flow_id=flow_id,
|
|
250
|
+
node_type=node_type,
|
|
251
|
+
is_setup=True,
|
|
252
|
+
pos_x=getattr(setting_input, 'pos_x', 0),
|
|
253
|
+
pos_y=getattr(setting_input, 'pos_y', 0),
|
|
254
|
+
description=getattr(setting_input, 'description', '')
|
|
255
|
+
)
|
|
256
|
+
graph.add_node_promise(node_promise)
|
|
257
|
+
|
|
258
|
+
# Get node type-specific add method
|
|
259
|
+
add_method_name = f"add_{node_type}"
|
|
260
|
+
if hasattr(graph, add_method_name):
|
|
261
|
+
add_method = getattr(graph, add_method_name)
|
|
262
|
+
add_method(setting_input)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _add_connections_to_combined_graph(
|
|
266
|
+
flow_graphs: Tuple[FlowGraph, ...],
|
|
267
|
+
combined_graph: FlowGraph,
|
|
268
|
+
node_id_mapping: Dict[Tuple[int, int], int]
|
|
269
|
+
) -> None:
|
|
270
|
+
"""
|
|
271
|
+
Add all connections from source graphs to the combined graph.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
flow_graphs: Source flow graphs
|
|
275
|
+
combined_graph: Target combined graph
|
|
276
|
+
node_id_mapping: Mapping of node IDs
|
|
277
|
+
"""
|
|
278
|
+
for fg in flow_graphs:
|
|
279
|
+
for connection in fg.node_connections:
|
|
280
|
+
source_id, target_id = connection
|
|
281
|
+
new_source_id = node_id_mapping.get((fg.flow_id, source_id))
|
|
282
|
+
new_target_id = node_id_mapping.get((fg.flow_id, target_id))
|
|
283
|
+
|
|
284
|
+
if new_source_id is not None and new_target_id is not None:
|
|
285
|
+
input_type = _determine_connection_input_type(fg, source_id, target_id)
|
|
286
|
+
|
|
287
|
+
# Create connection in combined graph
|
|
288
|
+
node_connection = input_schema.NodeConnection.create_from_simple_input(
|
|
289
|
+
from_id=new_source_id,
|
|
290
|
+
to_id=new_target_id,
|
|
291
|
+
input_type=input_type
|
|
292
|
+
)
|
|
293
|
+
add_connection(combined_graph, node_connection)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _determine_connection_input_type(
|
|
297
|
+
flow_graph: FlowGraph,
|
|
298
|
+
source_id: int,
|
|
299
|
+
target_id: int
|
|
300
|
+
) -> str:
|
|
301
|
+
"""
|
|
302
|
+
Determine the input type for a connection.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
flow_graph: Source flow graph
|
|
306
|
+
source_id: Source node ID
|
|
307
|
+
target_id: Target node ID
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
str: Input type (main, left, right)
|
|
311
|
+
"""
|
|
312
|
+
from_node = flow_graph.get_node(source_id)
|
|
313
|
+
to_node = flow_graph.get_node(target_id)
|
|
314
|
+
|
|
315
|
+
if from_node and to_node:
|
|
316
|
+
input_types = to_node.get_input_type(from_node.node_id)
|
|
317
|
+
if input_types:
|
|
318
|
+
return input_types[0]
|
|
319
|
+
|
|
320
|
+
return "main"
|
|
@@ -15,6 +15,7 @@ InputConnectionClass = Literal['input-0', 'input-1', 'input-2', 'input-3', 'inpu
|
|
|
15
15
|
|
|
16
16
|
InputType = Literal["main", "left", "right"]
|
|
17
17
|
|
|
18
|
+
|
|
18
19
|
class NewDirectory(BaseModel):
|
|
19
20
|
source_path: str
|
|
20
21
|
dir_name: str
|
|
@@ -341,15 +342,6 @@ class SampleUsers(ExternalSource):
|
|
|
341
342
|
size: int = 100
|
|
342
343
|
|
|
343
344
|
|
|
344
|
-
class GoogleSheet(ExternalSource):
|
|
345
|
-
GOOGLE_SHEET: bool
|
|
346
|
-
class_name: str = "google_sheet"
|
|
347
|
-
access_token: SecretStr = None
|
|
348
|
-
sheet_id: str
|
|
349
|
-
worksheet_name: str
|
|
350
|
-
sheet_name: str
|
|
351
|
-
|
|
352
|
-
|
|
353
345
|
class AirbyteReader(AirbyteConfig):
|
|
354
346
|
class_name: Optional[str] = "airbyte_reader"
|
|
355
347
|
fields: Optional[List[MinimalFieldInfo]] = None
|
|
@@ -362,7 +354,7 @@ class AccessToken(BaseModel):
|
|
|
362
354
|
|
|
363
355
|
class NodeExternalSource(NodeBase):
|
|
364
356
|
identifier: str
|
|
365
|
-
source_settings: SampleUsers
|
|
357
|
+
source_settings: SampleUsers
|
|
366
358
|
|
|
367
359
|
|
|
368
360
|
class NodeAirbyteReader(NodeExternalSource):
|
flowfile_frame/__init__.py
CHANGED
|
@@ -23,7 +23,7 @@ from flowfile_frame.selectors import ( # noqa: F401
|
|
|
23
23
|
|
|
24
24
|
# File I/O
|
|
25
25
|
from flowfile_frame.flow_frame import ( # noqa: F401
|
|
26
|
-
read_csv, read_parquet, from_dict, concat
|
|
26
|
+
read_csv, read_parquet, from_dict, concat, scan_csv, scan_parquet
|
|
27
27
|
)
|
|
28
28
|
|
|
29
29
|
from polars.datatypes import ( # noqa: F401
|