Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +27 -6
- flowfile/api.py +1 -0
- flowfile/web/__init__.py +2 -2
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
- flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
- flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
- flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
- flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
- flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
- flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
- flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
- flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
- flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
- flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
- flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
- flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
- flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
- flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
- flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
- flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
- flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
- flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
- flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
- flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
- flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
- flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
- flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
- flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
- flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
- flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
- flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
- flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
- flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
- flowfile/web/static/assets/api-6ef0dcef.js +80 -0
- flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
- flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
- flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
- flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
- flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
- flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
- flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
- flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
- flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
- flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/auth/jwt.py +39 -0
- flowfile_core/configs/node_store/nodes.py +9 -6
- flowfile_core/configs/settings.py +6 -5
- flowfile_core/database/connection.py +63 -15
- flowfile_core/database/init_db.py +0 -1
- flowfile_core/database/models.py +49 -2
- flowfile_core/flowfile/code_generator/code_generator.py +472 -17
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
- flowfile_core/flowfile/extensions.py +1 -1
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
- flowfile_core/flowfile/flow_graph.py +718 -253
- flowfile_core/flowfile/flow_graph_utils.py +2 -2
- flowfile_core/flowfile/flow_node/flow_node.py +563 -117
- flowfile_core/flowfile/flow_node/models.py +154 -20
- flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
- flowfile_core/flowfile/handler.py +2 -33
- flowfile_core/flowfile/manage/open_flowfile.py +1 -2
- flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
- flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
- flowfile_core/flowfile/util/calculate_layout.py +0 -2
- flowfile_core/flowfile/utils.py +35 -26
- flowfile_core/main.py +35 -15
- flowfile_core/routes/cloud_connections.py +77 -0
- flowfile_core/routes/logs.py +2 -7
- flowfile_core/routes/public.py +1 -0
- flowfile_core/routes/routes.py +130 -90
- flowfile_core/routes/secrets.py +72 -14
- flowfile_core/schemas/__init__.py +8 -0
- flowfile_core/schemas/cloud_storage_schemas.py +215 -0
- flowfile_core/schemas/input_schema.py +121 -71
- flowfile_core/schemas/output_model.py +19 -3
- flowfile_core/schemas/schemas.py +150 -12
- flowfile_core/schemas/transform_schema.py +175 -35
- flowfile_core/utils/utils.py +40 -1
- flowfile_core/utils/validate_setup.py +41 -0
- flowfile_frame/__init__.py +9 -1
- flowfile_frame/cloud_storage/frame_helpers.py +39 -0
- flowfile_frame/cloud_storage/secret_manager.py +73 -0
- flowfile_frame/expr.py +28 -1
- flowfile_frame/expr.pyi +76 -61
- flowfile_frame/flow_frame.py +481 -208
- flowfile_frame/flow_frame.pyi +140 -91
- flowfile_frame/flow_frame_methods.py +160 -22
- flowfile_frame/group_frame.py +3 -0
- flowfile_frame/utils.py +25 -3
- flowfile_worker/external_sources/s3_source/main.py +216 -0
- flowfile_worker/external_sources/s3_source/models.py +142 -0
- flowfile_worker/funcs.py +51 -6
- flowfile_worker/models.py +22 -2
- flowfile_worker/routes.py +40 -38
- flowfile_worker/utils.py +1 -1
- test_utils/s3/commands.py +46 -0
- test_utils/s3/data_generator.py +292 -0
- test_utils/s3/demo_data_generator.py +186 -0
- test_utils/s3/fixtures.py +214 -0
- flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
- flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
- flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
- flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
- flowfile_core/schemas/defaults.py +0 -9
- flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
- flowfile_core/schemas/models.py +0 -193
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
- flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
- flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
- {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
- {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
- {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
|
@@ -2,7 +2,7 @@ import datetime
|
|
|
2
2
|
import pickle
|
|
3
3
|
import polars as pl
|
|
4
4
|
import fastexcel
|
|
5
|
-
import
|
|
5
|
+
import re
|
|
6
6
|
from fastapi.exceptions import HTTPException
|
|
7
7
|
from time import time
|
|
8
8
|
from functools import partial
|
|
@@ -11,37 +11,58 @@ from uuid import uuid1
|
|
|
11
11
|
from copy import deepcopy
|
|
12
12
|
from pyarrow.parquet import ParquetFile
|
|
13
13
|
from flowfile_core.configs import logger
|
|
14
|
+
from flowfile_core.configs.settings import OFFLOAD_TO_WORKER
|
|
14
15
|
from flowfile_core.configs.flow_logger import FlowLogger
|
|
15
16
|
from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
|
|
16
|
-
from flowfile_core.flowfile.sources.external_sources.airbyte_sources.settings import airbyte_settings_from_config
|
|
17
17
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import cast_str_to_polars_type, FlowfileColumn
|
|
18
18
|
from flowfile_core.flowfile.flow_data_engine.fuzzy_matching.settings_validator import (calculate_fuzzy_match_schema,
|
|
19
19
|
pre_calculate_pivot_schema)
|
|
20
|
+
from flowfile_core.flowfile.flow_data_engine.cloud_storage_reader import CloudStorageReader
|
|
20
21
|
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
21
22
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine, execute_polars_code
|
|
22
23
|
from flowfile_core.flowfile.flow_data_engine.read_excel_tables import get_open_xlsx_datatypes, \
|
|
23
24
|
get_calamine_xlsx_data_types
|
|
24
25
|
from flowfile_core.flowfile.sources import external_sources
|
|
25
26
|
from flowfile_core.schemas import input_schema, schemas, transform_schema
|
|
26
|
-
from flowfile_core.schemas.output_model import
|
|
27
|
-
from flowfile_core.
|
|
27
|
+
from flowfile_core.schemas.output_model import NodeData, NodeResult, RunInformation
|
|
28
|
+
from flowfile_core.schemas.cloud_storage_schemas import (CloudStorageReadSettingsInternal,
|
|
29
|
+
CloudStorageWriteSettingsInternal,
|
|
30
|
+
FullCloudStorageConnection,
|
|
31
|
+
get_cloud_storage_write_settings_worker_interface, AuthMethod)
|
|
32
|
+
from flowfile_core.flowfile.utils import snake_case_to_camel_case
|
|
28
33
|
from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
|
|
29
34
|
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
30
35
|
from flowfile_core.flowfile.util.execution_orderer import determine_execution_order
|
|
31
36
|
from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
|
|
32
|
-
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (
|
|
33
|
-
ExternalDatabaseFetcher,
|
|
37
|
+
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (ExternalDatabaseFetcher,
|
|
34
38
|
ExternalDatabaseWriter,
|
|
35
|
-
ExternalDfFetcher
|
|
39
|
+
ExternalDfFetcher,
|
|
40
|
+
ExternalCloudWriter)
|
|
36
41
|
from flowfile_core.secret_manager.secret_manager import get_encrypted_secret, decrypt_secret
|
|
37
42
|
from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils, models as sql_models
|
|
38
43
|
from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import SqlSource, BaseSqlSource
|
|
39
|
-
from flowfile_core.flowfile.database_connection_manager.db_connections import get_local_database_connection
|
|
44
|
+
from flowfile_core.flowfile.database_connection_manager.db_connections import (get_local_database_connection,
|
|
45
|
+
get_local_cloud_connection)
|
|
40
46
|
from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
|
|
41
47
|
|
|
42
48
|
|
|
43
49
|
def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
|
|
44
50
|
end_row: int, end_column: int, has_headers: bool):
|
|
51
|
+
"""Calculates the schema of an XLSX file by reading a sample of rows.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
engine: The engine to use for reading ('openpyxl' or 'calamine').
|
|
55
|
+
file_path: The path to the XLSX file.
|
|
56
|
+
sheet_name: The name of the sheet to read.
|
|
57
|
+
start_row: The starting row for data reading.
|
|
58
|
+
start_column: The starting column for data reading.
|
|
59
|
+
end_row: The ending row for data reading.
|
|
60
|
+
end_column: The ending column for data reading.
|
|
61
|
+
has_headers: A boolean indicating if the file has a header row.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
A list of FlowfileColumn objects representing the schema.
|
|
65
|
+
"""
|
|
45
66
|
try:
|
|
46
67
|
logger.info('Starting to calculate the schema')
|
|
47
68
|
if engine == 'openpyxl':
|
|
@@ -64,35 +85,80 @@ def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int
|
|
|
64
85
|
|
|
65
86
|
|
|
66
87
|
def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
|
|
88
|
+
"""Logs a warning message listing all nodes that will be skipped during execution.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
flow_logger: The logger instance for the flow.
|
|
92
|
+
nodes: A list of FlowNode objects to be skipped.
|
|
93
|
+
"""
|
|
67
94
|
if len(nodes) > 0:
|
|
68
95
|
msg = "\n".join(str(node) for node in nodes)
|
|
69
96
|
flow_logger.warning(f'skipping nodes:\n{msg}')
|
|
70
97
|
|
|
71
98
|
|
|
72
99
|
def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
|
|
100
|
+
"""Logs an informational message showing the determined execution order of nodes.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
flow_logger: The logger instance for the flow.
|
|
104
|
+
nodes: A list of FlowNode objects in the order they will be executed.
|
|
105
|
+
"""
|
|
73
106
|
msg = "\n".join(str(node) for node in nodes)
|
|
74
107
|
flow_logger.info(f'execution order:\n{msg}')
|
|
75
108
|
|
|
76
109
|
|
|
77
110
|
def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
|
|
78
111
|
end_row: int, end_column: int, has_headers: bool):
|
|
112
|
+
"""Creates a partially applied function for lazy calculation of an XLSX schema.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
engine: The engine to use for reading.
|
|
116
|
+
file_path: The path to the XLSX file.
|
|
117
|
+
sheet_name: The name of the sheet.
|
|
118
|
+
start_row: The starting row.
|
|
119
|
+
start_column: The starting column.
|
|
120
|
+
end_row: The ending row.
|
|
121
|
+
end_column: The ending column.
|
|
122
|
+
has_headers: A boolean indicating if the file has headers.
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
A callable function that, when called, will execute `get_xlsx_schema`.
|
|
126
|
+
"""
|
|
79
127
|
return partial(get_xlsx_schema, engine=engine, file_path=file_path, sheet_name=sheet_name, start_row=start_row,
|
|
80
128
|
start_column=start_column, end_row=end_row, end_column=end_column, has_headers=has_headers)
|
|
81
129
|
|
|
82
130
|
|
|
83
|
-
|
|
131
|
+
def get_cloud_connection_settings(connection_name: str,
|
|
132
|
+
user_id: int, auth_mode: AuthMethod) -> FullCloudStorageConnection:
|
|
133
|
+
"""Retrieves cloud storage connection settings, falling back to environment variables if needed.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
connection_name: The name of the saved connection.
|
|
137
|
+
user_id: The ID of the user owning the connection.
|
|
138
|
+
auth_mode: The authentication method specified by the user.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
A FullCloudStorageConnection object with the connection details.
|
|
142
|
+
|
|
143
|
+
Raises:
|
|
144
|
+
HTTPException: If the connection settings cannot be found.
|
|
84
145
|
"""
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
146
|
+
cloud_connection_settings = get_local_cloud_connection(connection_name, user_id)
|
|
147
|
+
if cloud_connection_settings is None and auth_mode in ("env_vars", "auto"):
|
|
148
|
+
# If the auth mode is aws-cli, we do not need connection settings
|
|
149
|
+
cloud_connection_settings = FullCloudStorageConnection(storage_type="s3", auth_method="env_vars")
|
|
150
|
+
elif cloud_connection_settings is None and auth_mode == "aws-cli":
|
|
151
|
+
cloud_connection_settings = FullCloudStorageConnection(storage_type="s3", auth_method="aws-cli")
|
|
152
|
+
if cloud_connection_settings is None:
|
|
153
|
+
raise HTTPException(status_code=400, detail="Cloud connection settings not found")
|
|
154
|
+
return cloud_connection_settings
|
|
88
155
|
|
|
89
|
-
The class offers methods to add transformations and data sources, as well as
|
|
90
|
-
methods to run the transformations and generate results.
|
|
91
156
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
157
|
+
class FlowGraph:
|
|
158
|
+
"""A class representing a Directed Acyclic Graph (DAG) for data processing pipelines.
|
|
159
|
+
|
|
160
|
+
It manages nodes, connections, and the execution of the entire flow.
|
|
161
|
+
"""
|
|
96
162
|
uuid: str
|
|
97
163
|
depends_on: Dict[int, Union[ParquetFile, FlowDataEngine, "FlowGraph", pl.DataFrame,]]
|
|
98
164
|
_flow_id: int
|
|
@@ -114,13 +180,27 @@ class FlowGraph:
|
|
|
114
180
|
flow_settings: schemas.FlowSettings = None
|
|
115
181
|
flow_logger: FlowLogger
|
|
116
182
|
|
|
117
|
-
def __init__(self,
|
|
118
|
-
flow_settings: schemas.FlowSettings,
|
|
183
|
+
def __init__(self,
|
|
184
|
+
flow_settings: schemas.FlowSettings | schemas.FlowGraphConfig,
|
|
119
185
|
name: str = None, input_cols: List[str] = None,
|
|
120
186
|
output_cols: List[str] = None,
|
|
121
187
|
path_ref: str = None,
|
|
122
188
|
input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
|
|
123
189
|
cache_results: bool = False):
|
|
190
|
+
"""Initializes a new FlowGraph instance.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
flow_settings: The configuration settings for the flow.
|
|
194
|
+
name: The name of the flow.
|
|
195
|
+
input_cols: A list of input column names.
|
|
196
|
+
output_cols: A list of output column names.
|
|
197
|
+
path_ref: An optional path to an initial data source.
|
|
198
|
+
input_flow: An optional existing data object to start the flow with.
|
|
199
|
+
cache_results: A global flag to enable or disable result caching.
|
|
200
|
+
"""
|
|
201
|
+
if isinstance(flow_settings, schemas.FlowGraphConfig):
|
|
202
|
+
flow_settings = schemas.FlowSettings.from_flow_settings_input(flow_settings)
|
|
203
|
+
|
|
124
204
|
self.flow_settings = flow_settings
|
|
125
205
|
self.uuid = str(uuid1())
|
|
126
206
|
self.nodes_completed = 0
|
|
@@ -128,8 +208,8 @@ class FlowGraph:
|
|
|
128
208
|
self.end_datetime = None
|
|
129
209
|
self.latest_run_info = None
|
|
130
210
|
self.node_results = []
|
|
131
|
-
self._flow_id = flow_id
|
|
132
|
-
self.flow_logger = FlowLogger(flow_id)
|
|
211
|
+
self._flow_id = flow_settings.flow_id
|
|
212
|
+
self.flow_logger = FlowLogger(flow_settings.flow_id)
|
|
133
213
|
self._flow_starts: List[FlowNode] = []
|
|
134
214
|
self._results = None
|
|
135
215
|
self.schema = None
|
|
@@ -147,7 +227,13 @@ class FlowGraph:
|
|
|
147
227
|
self.add_datasource(input_file=input_flow)
|
|
148
228
|
|
|
149
229
|
def add_node_promise(self, node_promise: input_schema.NodePromise):
|
|
230
|
+
"""Adds a placeholder node to the graph that is not yet fully configured.
|
|
231
|
+
|
|
232
|
+
Useful for building the graph structure before all settings are available.
|
|
150
233
|
|
|
234
|
+
Args:
|
|
235
|
+
node_promise: A promise object containing basic node information.
|
|
236
|
+
"""
|
|
151
237
|
def placeholder(n: FlowNode = None):
|
|
152
238
|
if n is None:
|
|
153
239
|
return FlowDataEngine()
|
|
@@ -156,10 +242,75 @@ class FlowGraph:
|
|
|
156
242
|
self.add_node_step(node_id=node_promise.node_id, node_type=node_promise.node_type, function=placeholder,
|
|
157
243
|
setting_input=node_promise)
|
|
158
244
|
|
|
159
|
-
def
|
|
245
|
+
def print_tree(self, show_schema=False, show_descriptions=False):
|
|
246
|
+
"""
|
|
247
|
+
Print flow_graph as a tree.
|
|
160
248
|
"""
|
|
161
|
-
|
|
162
|
-
|
|
249
|
+
max_node_id = max(self._node_db.keys())
|
|
250
|
+
|
|
251
|
+
tree = ""
|
|
252
|
+
tabs = 0
|
|
253
|
+
tab_counter = 0
|
|
254
|
+
for node in self.nodes:
|
|
255
|
+
tab_counter += 1
|
|
256
|
+
node_input = node.setting_input
|
|
257
|
+
operation = str(self._node_db[node_input.node_id]).split("(")[1][:-1].replace("_", " ").title()
|
|
258
|
+
|
|
259
|
+
if operation == "Formula":
|
|
260
|
+
operation = "With Columns"
|
|
261
|
+
|
|
262
|
+
tree += str(operation) + " (id=" + str(node_input.node_id) + ")"
|
|
263
|
+
|
|
264
|
+
if show_descriptions & show_schema:
|
|
265
|
+
raise ValueError('show_descriptions and show_schema cannot be True simultaneously')
|
|
266
|
+
if show_descriptions:
|
|
267
|
+
tree += ": " + str(node_input.description)
|
|
268
|
+
elif show_schema:
|
|
269
|
+
tree += " -> ["
|
|
270
|
+
if operation == "Manual Input":
|
|
271
|
+
schema = ", ".join([str(i.name) + ": " + str(i.data_type) for i in node_input.raw_data_format.columns])
|
|
272
|
+
tree += schema
|
|
273
|
+
elif operation == "With Columns":
|
|
274
|
+
tree_with_col_schema = ", " + node_input.function.field.name + ": " + node_input.function.field.data_type
|
|
275
|
+
tree += schema + tree_with_col_schema
|
|
276
|
+
elif operation == "Filter":
|
|
277
|
+
index = node_input.filter_input.advanced_filter.find("]")
|
|
278
|
+
filtered_column = str(node_input.filter_input.advanced_filter[1:index])
|
|
279
|
+
schema = re.sub('({str(filtered_column)}: [A-Za-z0-9]+\,\s)', "", schema)
|
|
280
|
+
tree += schema
|
|
281
|
+
elif operation == "Group By":
|
|
282
|
+
for col in node_input.groupby_input.agg_cols:
|
|
283
|
+
schema = re.sub(str(col.old_name) + ': [a-z0-9]+\, ', "", schema)
|
|
284
|
+
tree += schema
|
|
285
|
+
tree += "]"
|
|
286
|
+
else:
|
|
287
|
+
if operation == "Manual Input":
|
|
288
|
+
tree += ": " + str(node_input.raw_data_format.data)
|
|
289
|
+
elif operation == "With Columns":
|
|
290
|
+
tree += ": " + str(node_input.function)
|
|
291
|
+
elif operation == "Filter":
|
|
292
|
+
tree += ": " + str(node_input.filter_input.advanced_filter)
|
|
293
|
+
elif operation == "Group By":
|
|
294
|
+
tree += ": groupby=[" + ", ".join([col.old_name for col in node_input.groupby_input.agg_cols if col.agg == "groupby"]) + "], "
|
|
295
|
+
tree += "agg=[" + ", ".join([str(col.agg) + "(" + str(col.old_name) + ")" for col in node_input.groupby_input.agg_cols if col.agg != "groupby"]) + "]"
|
|
296
|
+
|
|
297
|
+
if node_input.node_id < max_node_id:
|
|
298
|
+
tree += "\n" + "# " + " "*3*(tabs-1) + "|___ "
|
|
299
|
+
print("\n"*2)
|
|
300
|
+
|
|
301
|
+
return print(tree)
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def apply_layout(self, y_spacing: int = 150, x_spacing: int = 200, initial_y: int = 100):
|
|
306
|
+
"""Calculates and applies a layered layout to all nodes in the graph.
|
|
307
|
+
|
|
308
|
+
This updates their x and y positions for UI rendering.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
y_spacing: The vertical spacing between layers.
|
|
312
|
+
x_spacing: The horizontal spacing between nodes in the same layer.
|
|
313
|
+
initial_y: The initial y-position for the first layer.
|
|
163
314
|
"""
|
|
164
315
|
self.flow_logger.info("Applying layered layout...")
|
|
165
316
|
start_time = time()
|
|
@@ -186,7 +337,7 @@ class FlowGraph:
|
|
|
186
337
|
else:
|
|
187
338
|
self.flow_logger.warning(f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes.")
|
|
188
339
|
elif node:
|
|
189
|
-
|
|
340
|
+
self.flow_logger.warning(f"Node {node_id} lacks setting_input attribute.")
|
|
190
341
|
# else: Node not found, already warned by calculate_layered_layout
|
|
191
342
|
|
|
192
343
|
end_time = time()
|
|
@@ -194,51 +345,20 @@ class FlowGraph:
|
|
|
194
345
|
|
|
195
346
|
except Exception as e:
|
|
196
347
|
self.flow_logger.error(f"Error applying layout: {e}")
|
|
197
|
-
raise
|
|
198
|
-
|
|
199
|
-
def add_initial_node_analysis(self, node_promise: input_schema.NodePromise):
|
|
200
|
-
node_analysis = create_graphic_walker_node_from_node_promise(node_promise)
|
|
201
|
-
self.add_explore_data(node_analysis)
|
|
202
|
-
|
|
203
|
-
def add_explore_data(self, node_analysis: input_schema.NodeExploreData):
|
|
204
|
-
sample_size: int = 10000
|
|
205
|
-
|
|
206
|
-
def analysis_preparation(flowfile_table: FlowDataEngine):
|
|
207
|
-
if flowfile_table.number_of_records <= 0:
|
|
208
|
-
number_of_records = flowfile_table.get_number_of_records(calculate_in_worker_process=True)
|
|
209
|
-
else:
|
|
210
|
-
number_of_records = flowfile_table.number_of_records
|
|
211
|
-
if number_of_records > sample_size:
|
|
212
|
-
flowfile_table = flowfile_table.get_sample(sample_size, random=True)
|
|
213
|
-
external_sampler = ExternalDfFetcher(
|
|
214
|
-
lf=flowfile_table.data_frame,
|
|
215
|
-
file_ref="__gf_walker"+node.hash,
|
|
216
|
-
wait_on_completion=True,
|
|
217
|
-
node_id=node.node_id,
|
|
218
|
-
flow_id=self.flow_id,
|
|
219
|
-
)
|
|
220
|
-
node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref)
|
|
221
|
-
return flowfile_table
|
|
222
|
-
|
|
223
|
-
def schema_callback():
|
|
224
|
-
node = self.get_node(node_analysis.node_id)
|
|
225
|
-
if len(node.all_inputs) == 1:
|
|
226
|
-
input_node = node.all_inputs[0]
|
|
227
|
-
return input_node.schema
|
|
228
|
-
else:
|
|
229
|
-
return [FlowfileColumn.from_input('col_1', 'na')]
|
|
230
|
-
|
|
231
|
-
self.add_node_step(node_id=node_analysis.node_id, node_type='explore_data',
|
|
232
|
-
function=analysis_preparation,
|
|
233
|
-
setting_input=node_analysis, schema_callback=schema_callback)
|
|
234
|
-
node = self.get_node(node_analysis.node_id)
|
|
348
|
+
raise # Optional: re-raise the exception
|
|
235
349
|
|
|
236
350
|
@property
|
|
237
351
|
def flow_id(self) -> int:
|
|
352
|
+
"""Gets the unique identifier of the flow."""
|
|
238
353
|
return self._flow_id
|
|
239
354
|
|
|
240
355
|
@flow_id.setter
|
|
241
356
|
def flow_id(self, new_id: int):
|
|
357
|
+
"""Sets the unique identifier for the flow and updates all child nodes.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
new_id: The new flow ID.
|
|
361
|
+
"""
|
|
242
362
|
self._flow_id = new_id
|
|
243
363
|
for node in self.nodes:
|
|
244
364
|
if hasattr(node.setting_input, 'flow_id'):
|
|
@@ -246,23 +366,35 @@ class FlowGraph:
|
|
|
246
366
|
self.flow_settings.flow_id = new_id
|
|
247
367
|
|
|
248
368
|
def __repr__(self):
|
|
249
|
-
"""
|
|
250
|
-
Official string representation of the FlowGraph class.
|
|
251
|
-
"""
|
|
369
|
+
"""Provides the official string representation of the FlowGraph instance."""
|
|
252
370
|
settings_str = " -" + '\n -'.join(f"{k}: {v}" for k, v in self.flow_settings)
|
|
253
371
|
return f"FlowGraph(\nNodes: {self._node_db}\n\nSettings:\n{settings_str}"
|
|
254
372
|
|
|
255
373
|
def get_nodes_overview(self):
|
|
374
|
+
"""Gets a list of dictionary representations for all nodes in the graph."""
|
|
256
375
|
output = []
|
|
257
376
|
for v in self._node_db.values():
|
|
258
377
|
output.append(v.get_repr())
|
|
259
378
|
return output
|
|
260
379
|
|
|
261
380
|
def remove_from_output_cols(self, columns: List[str]):
|
|
381
|
+
"""Removes specified columns from the list of expected output columns.
|
|
382
|
+
|
|
383
|
+
Args:
|
|
384
|
+
columns: A list of column names to remove.
|
|
385
|
+
"""
|
|
262
386
|
cols = set(columns)
|
|
263
387
|
self._output_cols = [c for c in self._output_cols if c not in cols]
|
|
264
388
|
|
|
265
|
-
def get_node(self, node_id: Union[int, str] = None) -> FlowNode:
|
|
389
|
+
def get_node(self, node_id: Union[int, str] = None) -> FlowNode | None:
|
|
390
|
+
"""Retrieves a node from the graph by its ID.
|
|
391
|
+
|
|
392
|
+
Args:
|
|
393
|
+
node_id: The ID of the node to retrieve. If None, retrieves the last added node.
|
|
394
|
+
|
|
395
|
+
Returns:
|
|
396
|
+
The FlowNode object, or None if not found.
|
|
397
|
+
"""
|
|
266
398
|
if node_id is None:
|
|
267
399
|
node_id = self._node_ids[-1]
|
|
268
400
|
node = self._node_db.get(node_id)
|
|
@@ -270,6 +402,12 @@ class FlowGraph:
|
|
|
270
402
|
return node
|
|
271
403
|
|
|
272
404
|
def add_pivot(self, pivot_settings: input_schema.NodePivot):
|
|
405
|
+
"""Adds a pivot node to the graph.
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
pivot_settings: The settings for the pivot operation.
|
|
409
|
+
"""
|
|
410
|
+
|
|
273
411
|
def _func(fl: FlowDataEngine):
|
|
274
412
|
return fl.do_pivot(pivot_settings.pivot_input, self.flow_logger.get_node_logger(pivot_settings.node_id))
|
|
275
413
|
|
|
@@ -289,6 +427,11 @@ class FlowGraph:
|
|
|
289
427
|
node.schema_callback = schema_callback
|
|
290
428
|
|
|
291
429
|
def add_unpivot(self, unpivot_settings: input_schema.NodeUnpivot):
|
|
430
|
+
"""Adds an unpivot node to the graph.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
unpivot_settings: The settings for the unpivot operation.
|
|
434
|
+
"""
|
|
292
435
|
|
|
293
436
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
294
437
|
return fl.unpivot(unpivot_settings.unpivot_input)
|
|
@@ -300,6 +443,12 @@ class FlowGraph:
|
|
|
300
443
|
input_node_ids=[unpivot_settings.depending_on_id])
|
|
301
444
|
|
|
302
445
|
def add_union(self, union_settings: input_schema.NodeUnion):
|
|
446
|
+
"""Adds a union node to combine multiple data streams.
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
union_settings: The settings for the union operation.
|
|
450
|
+
"""
|
|
451
|
+
|
|
303
452
|
def _func(*flowfile_tables: FlowDataEngine):
|
|
304
453
|
dfs: List[pl.LazyFrame] | List[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
|
|
305
454
|
return FlowDataEngine(pl.concat(dfs, how='diagonal_relaxed'))
|
|
@@ -310,7 +459,59 @@ class FlowGraph:
|
|
|
310
459
|
setting_input=union_settings,
|
|
311
460
|
input_node_ids=union_settings.depending_on_ids)
|
|
312
461
|
|
|
462
|
+
def add_initial_node_analysis(self, node_promise: input_schema.NodePromise):
|
|
463
|
+
"""Adds a data exploration/analysis node based on a node promise.
|
|
464
|
+
|
|
465
|
+
Args:
|
|
466
|
+
node_promise: The promise representing the node to be analyzed.
|
|
467
|
+
"""
|
|
468
|
+
node_analysis = create_graphic_walker_node_from_node_promise(node_promise)
|
|
469
|
+
self.add_explore_data(node_analysis)
|
|
470
|
+
|
|
471
|
+
def add_explore_data(self, node_analysis: input_schema.NodeExploreData):
|
|
472
|
+
"""Adds a specialized node for data exploration and visualization.
|
|
473
|
+
|
|
474
|
+
Args:
|
|
475
|
+
node_analysis: The settings for the data exploration node.
|
|
476
|
+
"""
|
|
477
|
+
sample_size: int = 10000
|
|
478
|
+
|
|
479
|
+
def analysis_preparation(flowfile_table: FlowDataEngine):
|
|
480
|
+
if flowfile_table.number_of_records <= 0:
|
|
481
|
+
number_of_records = flowfile_table.get_number_of_records(calculate_in_worker_process=True)
|
|
482
|
+
else:
|
|
483
|
+
number_of_records = flowfile_table.number_of_records
|
|
484
|
+
if number_of_records > sample_size:
|
|
485
|
+
flowfile_table = flowfile_table.get_sample(sample_size, random=True)
|
|
486
|
+
external_sampler = ExternalDfFetcher(
|
|
487
|
+
lf=flowfile_table.data_frame,
|
|
488
|
+
file_ref="__gf_walker"+node.hash,
|
|
489
|
+
wait_on_completion=True,
|
|
490
|
+
node_id=node.node_id,
|
|
491
|
+
flow_id=self.flow_id,
|
|
492
|
+
)
|
|
493
|
+
node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref)
|
|
494
|
+
return flowfile_table
|
|
495
|
+
|
|
496
|
+
def schema_callback():
|
|
497
|
+
node = self.get_node(node_analysis.node_id)
|
|
498
|
+
if len(node.all_inputs) == 1:
|
|
499
|
+
input_node = node.all_inputs[0]
|
|
500
|
+
return input_node.schema
|
|
501
|
+
else:
|
|
502
|
+
return [FlowfileColumn.from_input('col_1', 'na')]
|
|
503
|
+
|
|
504
|
+
self.add_node_step(node_id=node_analysis.node_id, node_type='explore_data',
|
|
505
|
+
function=analysis_preparation,
|
|
506
|
+
setting_input=node_analysis, schema_callback=schema_callback)
|
|
507
|
+
node = self.get_node(node_analysis.node_id)
|
|
508
|
+
|
|
313
509
|
def add_group_by(self, group_by_settings: input_schema.NodeGroupBy):
|
|
510
|
+
"""Adds a group-by aggregation node to the graph.
|
|
511
|
+
|
|
512
|
+
Args:
|
|
513
|
+
group_by_settings: The settings for the group-by operation.
|
|
514
|
+
"""
|
|
314
515
|
|
|
315
516
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
316
517
|
return fl.do_group_by(group_by_settings.groupby_input, False)
|
|
@@ -324,6 +525,7 @@ class FlowGraph:
|
|
|
324
525
|
node = self.get_node(group_by_settings.node_id)
|
|
325
526
|
|
|
326
527
|
def schema_callback():
|
|
528
|
+
|
|
327
529
|
output_columns = [(c.old_name, c.new_name, c.output_type) for c in group_by_settings.groupby_input.agg_cols]
|
|
328
530
|
depends_on = node.node_inputs.main_inputs[0]
|
|
329
531
|
input_schema_dict: Dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
|
|
@@ -335,22 +537,13 @@ class FlowGraph:
|
|
|
335
537
|
|
|
336
538
|
node.schema_callback = schema_callback
|
|
337
539
|
|
|
338
|
-
def add_or_update_column_func(self, col_name: str, pl_dtype: pl.DataType, depends_on: FlowNode):
|
|
339
|
-
col_output = FlowfileColumn.from_input(column_name=col_name, data_type=str(pl_dtype))
|
|
340
|
-
schema = depends_on.schema
|
|
341
|
-
col_exist = depends_on.get_flow_file_column_schema(col_name)
|
|
342
|
-
if col_exist is None:
|
|
343
|
-
new_schema = schema + [col_output]
|
|
344
|
-
else:
|
|
345
|
-
new_schema = []
|
|
346
|
-
for s in self.schema:
|
|
347
|
-
if s.name == col_name:
|
|
348
|
-
new_schema.append(col_output)
|
|
349
|
-
else:
|
|
350
|
-
new_schema.append(s)
|
|
351
|
-
return new_schema
|
|
352
|
-
|
|
353
540
|
def add_filter(self, filter_settings: input_schema.NodeFilter):
|
|
541
|
+
"""Adds a filter node to the graph.
|
|
542
|
+
|
|
543
|
+
Args:
|
|
544
|
+
filter_settings: The settings for the filter operation.
|
|
545
|
+
"""
|
|
546
|
+
|
|
354
547
|
is_advanced = filter_settings.filter_input.filter_type == 'advanced'
|
|
355
548
|
if is_advanced:
|
|
356
549
|
predicate = filter_settings.filter_input.advanced_filter
|
|
@@ -384,6 +577,12 @@ class FlowGraph:
|
|
|
384
577
|
)
|
|
385
578
|
|
|
386
579
|
def add_record_count(self, node_number_of_records: input_schema.NodeRecordCount):
|
|
580
|
+
"""Adds a filter node to the graph.
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
node_number_of_records: The settings for the record count operation.
|
|
584
|
+
"""
|
|
585
|
+
|
|
387
586
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
388
587
|
return fl.get_record_count()
|
|
389
588
|
|
|
@@ -394,9 +593,14 @@ class FlowGraph:
|
|
|
394
593
|
input_node_ids=[node_number_of_records.depending_on_id])
|
|
395
594
|
|
|
396
595
|
def add_polars_code(self, node_polars_code: input_schema.NodePolarsCode):
|
|
596
|
+
"""Adds a node that executes custom Polars code.
|
|
597
|
+
|
|
598
|
+
Args:
|
|
599
|
+
node_polars_code: The settings for the Polars code node.
|
|
600
|
+
"""
|
|
601
|
+
|
|
397
602
|
def _func(*flowfile_tables: FlowDataEngine) -> FlowDataEngine:
|
|
398
603
|
return execute_polars_code(*flowfile_tables, code=node_polars_code.polars_code_input.polars_code)
|
|
399
|
-
|
|
400
604
|
self.add_node_step(node_id=node_polars_code.node_id,
|
|
401
605
|
function=_func,
|
|
402
606
|
node_type='polars_code',
|
|
@@ -409,7 +613,31 @@ class FlowGraph:
|
|
|
409
613
|
node = self.get_node(node_id=node_polars_code.node_id)
|
|
410
614
|
node.results.errors = str(e)
|
|
411
615
|
|
|
616
|
+
def add_dependency_on_polars_lazy_frame(self,
|
|
617
|
+
lazy_frame: pl.LazyFrame,
|
|
618
|
+
node_id: int):
|
|
619
|
+
"""Adds a special node that directly injects a Polars LazyFrame into the graph.
|
|
620
|
+
|
|
621
|
+
Note: This is intended for backend use and will not work in the UI editor.
|
|
622
|
+
|
|
623
|
+
Args:
|
|
624
|
+
lazy_frame: The Polars LazyFrame to inject.
|
|
625
|
+
node_id: The ID for the new node.
|
|
626
|
+
"""
|
|
627
|
+
def _func():
|
|
628
|
+
return FlowDataEngine(lazy_frame)
|
|
629
|
+
node_promise = input_schema.NodePromise(flow_id=self.flow_id,
|
|
630
|
+
node_id=node_id, node_type="polars_lazy_frame",
|
|
631
|
+
is_setup=True)
|
|
632
|
+
self.add_node_step(node_id=node_promise.node_id, node_type=node_promise.node_type, function=_func,
|
|
633
|
+
setting_input=node_promise)
|
|
634
|
+
|
|
412
635
|
def add_unique(self, unique_settings: input_schema.NodeUnique):
|
|
636
|
+
"""Adds a node to find and remove duplicate rows.
|
|
637
|
+
|
|
638
|
+
Args:
|
|
639
|
+
unique_settings: The settings for the unique operation.
|
|
640
|
+
"""
|
|
413
641
|
|
|
414
642
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
415
643
|
return fl.make_unique(unique_settings.unique_input)
|
|
@@ -422,6 +650,16 @@ class FlowGraph:
|
|
|
422
650
|
input_node_ids=[unique_settings.depending_on_id])
|
|
423
651
|
|
|
424
652
|
def add_graph_solver(self, graph_solver_settings: input_schema.NodeGraphSolver):
|
|
653
|
+
"""Adds a node that solves graph-like problems within the data.
|
|
654
|
+
|
|
655
|
+
This node can be used for operations like finding network paths,
|
|
656
|
+
calculating connected components, or performing other graph algorithms
|
|
657
|
+
on relational data that represents nodes and edges.
|
|
658
|
+
|
|
659
|
+
Args:
|
|
660
|
+
graph_solver_settings: The settings object defining the graph inputs
|
|
661
|
+
and the specific algorithm to apply.
|
|
662
|
+
"""
|
|
425
663
|
def _func(fl: FlowDataEngine) -> FlowDataEngine:
|
|
426
664
|
return fl.solve_graph(graph_solver_settings.graph_solver_input)
|
|
427
665
|
|
|
@@ -432,6 +670,12 @@ class FlowGraph:
|
|
|
432
670
|
input_node_ids=[graph_solver_settings.depending_on_id])
|
|
433
671
|
|
|
434
672
|
def add_formula(self, function_settings: input_schema.NodeFormula):
|
|
673
|
+
"""Adds a node that applies a formula to create or modify a column.
|
|
674
|
+
|
|
675
|
+
Args:
|
|
676
|
+
function_settings: The settings for the formula operation.
|
|
677
|
+
"""
|
|
678
|
+
|
|
435
679
|
error = ""
|
|
436
680
|
if function_settings.function.field.data_type not in (None, "Auto"):
|
|
437
681
|
output_type = cast_str_to_polars_type(function_settings.function.field.data_type)
|
|
@@ -463,6 +707,14 @@ class FlowGraph:
|
|
|
463
707
|
return True, ""
|
|
464
708
|
|
|
465
709
|
def add_cross_join(self, cross_join_settings: input_schema.NodeCrossJoin) -> "FlowGraph":
|
|
710
|
+
"""Adds a cross join node to the graph.
|
|
711
|
+
|
|
712
|
+
Args:
|
|
713
|
+
cross_join_settings: The settings for the cross join operation.
|
|
714
|
+
|
|
715
|
+
Returns:
|
|
716
|
+
The `FlowGraph` instance for method chaining.
|
|
717
|
+
"""
|
|
466
718
|
|
|
467
719
|
def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
|
|
468
720
|
for left_select in cross_join_settings.cross_join_input.left_select.renames:
|
|
@@ -484,6 +736,15 @@ class FlowGraph:
|
|
|
484
736
|
return self
|
|
485
737
|
|
|
486
738
|
def add_join(self, join_settings: input_schema.NodeJoin) -> "FlowGraph":
|
|
739
|
+
"""Adds a join node to combine two data streams based on key columns.
|
|
740
|
+
|
|
741
|
+
Args:
|
|
742
|
+
join_settings: The settings for the join operation.
|
|
743
|
+
|
|
744
|
+
Returns:
|
|
745
|
+
The `FlowGraph` instance for method chaining.
|
|
746
|
+
"""
|
|
747
|
+
|
|
487
748
|
def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
|
|
488
749
|
for left_select in join_settings.join_input.left_select.renames:
|
|
489
750
|
left_select.is_available = True if left_select.old_name in main.schema else False
|
|
@@ -504,6 +765,15 @@ class FlowGraph:
|
|
|
504
765
|
return self
|
|
505
766
|
|
|
506
767
|
def add_fuzzy_match(self, fuzzy_settings: input_schema.NodeFuzzyMatch) -> "FlowGraph":
|
|
768
|
+
"""Adds a fuzzy matching node to join data on approximate string matches.
|
|
769
|
+
|
|
770
|
+
Args:
|
|
771
|
+
fuzzy_settings: The settings for the fuzzy match operation.
|
|
772
|
+
|
|
773
|
+
Returns:
|
|
774
|
+
The `FlowGraph` instance for method chaining.
|
|
775
|
+
"""
|
|
776
|
+
|
|
507
777
|
def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
|
|
508
778
|
f = main.start_fuzzy_join(fuzzy_match_input=fuzzy_settings.join_input, other=right, file_ref=node.hash,
|
|
509
779
|
flow_id=self.flow_id, node_id=fuzzy_settings.node_id)
|
|
@@ -528,6 +798,18 @@ class FlowGraph:
|
|
|
528
798
|
return self
|
|
529
799
|
|
|
530
800
|
def add_text_to_rows(self, node_text_to_rows: input_schema.NodeTextToRows) -> "FlowGraph":
|
|
801
|
+
"""Adds a node that splits cell values into multiple rows.
|
|
802
|
+
|
|
803
|
+
This is useful for un-nesting data where a single field contains multiple
|
|
804
|
+
values separated by a delimiter.
|
|
805
|
+
|
|
806
|
+
Args:
|
|
807
|
+
node_text_to_rows: The settings object that specifies the column to split
|
|
808
|
+
and the delimiter to use.
|
|
809
|
+
|
|
810
|
+
Returns:
|
|
811
|
+
The `FlowGraph` instance for method chaining.
|
|
812
|
+
"""
|
|
531
813
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
532
814
|
return table.split(node_text_to_rows.text_to_rows_input)
|
|
533
815
|
|
|
@@ -539,6 +821,15 @@ class FlowGraph:
|
|
|
539
821
|
return self
|
|
540
822
|
|
|
541
823
|
def add_sort(self, sort_settings: input_schema.NodeSort) -> "FlowGraph":
|
|
824
|
+
"""Adds a node to sort the data based on one or more columns.
|
|
825
|
+
|
|
826
|
+
Args:
|
|
827
|
+
sort_settings: The settings for the sort operation.
|
|
828
|
+
|
|
829
|
+
Returns:
|
|
830
|
+
The `FlowGraph` instance for method chaining.
|
|
831
|
+
"""
|
|
832
|
+
|
|
542
833
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
543
834
|
return table.do_sort(sort_settings.sort_input)
|
|
544
835
|
|
|
@@ -550,6 +841,14 @@ class FlowGraph:
|
|
|
550
841
|
return self
|
|
551
842
|
|
|
552
843
|
def add_sample(self, sample_settings: input_schema.NodeSample) -> "FlowGraph":
|
|
844
|
+
"""Adds a node to take a random or top-N sample of the data.
|
|
845
|
+
|
|
846
|
+
Args:
|
|
847
|
+
sample_settings: The settings object specifying the size of the sample.
|
|
848
|
+
|
|
849
|
+
Returns:
|
|
850
|
+
The `FlowGraph` instance for method chaining.
|
|
851
|
+
"""
|
|
553
852
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
554
853
|
return table.get_sample(sample_settings.sample_size)
|
|
555
854
|
|
|
@@ -562,6 +861,15 @@ class FlowGraph:
|
|
|
562
861
|
return self
|
|
563
862
|
|
|
564
863
|
def add_record_id(self, record_id_settings: input_schema.NodeRecordId) -> "FlowGraph":
|
|
864
|
+
"""Adds a node to create a new column with a unique ID for each record.
|
|
865
|
+
|
|
866
|
+
Args:
|
|
867
|
+
record_id_settings: The settings object specifying the name of the
|
|
868
|
+
new record ID column.
|
|
869
|
+
|
|
870
|
+
Returns:
|
|
871
|
+
The `FlowGraph` instance for method chaining.
|
|
872
|
+
"""
|
|
565
873
|
|
|
566
874
|
def _func(table: FlowDataEngine) -> FlowDataEngine:
|
|
567
875
|
return table.add_record_id(record_id_settings.record_id_input)
|
|
@@ -575,6 +883,15 @@ class FlowGraph:
|
|
|
575
883
|
return self
|
|
576
884
|
|
|
577
885
|
def add_select(self, select_settings: input_schema.NodeSelect) -> "FlowGraph":
|
|
886
|
+
"""Adds a node to select, rename, reorder, or drop columns.
|
|
887
|
+
|
|
888
|
+
Args:
|
|
889
|
+
select_settings: The settings for the select operation.
|
|
890
|
+
|
|
891
|
+
Returns:
|
|
892
|
+
The `FlowGraph` instance for method chaining.
|
|
893
|
+
"""
|
|
894
|
+
|
|
578
895
|
select_cols = select_settings.select_input
|
|
579
896
|
drop_cols = tuple(s.old_name for s in select_settings.select_input)
|
|
580
897
|
|
|
@@ -608,9 +925,18 @@ class FlowGraph:
|
|
|
608
925
|
|
|
609
926
|
@property
|
|
610
927
|
def graph_has_functions(self) -> bool:
|
|
928
|
+
"""Checks if the graph has any nodes."""
|
|
611
929
|
return len(self._node_ids) > 0
|
|
612
930
|
|
|
613
931
|
def delete_node(self, node_id: Union[int, str]):
|
|
932
|
+
"""Deletes a node from the graph and updates all its connections.
|
|
933
|
+
|
|
934
|
+
Args:
|
|
935
|
+
node_id: The ID of the node to delete.
|
|
936
|
+
|
|
937
|
+
Raises:
|
|
938
|
+
Exception: If the node with the given ID does not exist.
|
|
939
|
+
"""
|
|
614
940
|
logger.info(f"Starting deletion of node with ID: {node_id}")
|
|
615
941
|
|
|
616
942
|
node = self._node_db.get(node_id)
|
|
@@ -643,6 +969,7 @@ class FlowGraph:
|
|
|
643
969
|
|
|
644
970
|
@property
|
|
645
971
|
def graph_has_input_data(self) -> bool:
|
|
972
|
+
"""Checks if the graph has an initial input data source."""
|
|
646
973
|
return self._input_data is not None
|
|
647
974
|
|
|
648
975
|
def add_node_step(self,
|
|
@@ -656,7 +983,25 @@ class FlowGraph:
|
|
|
656
983
|
setting_input: Any = None,
|
|
657
984
|
cache_results: bool = None,
|
|
658
985
|
schema_callback: Callable = None,
|
|
659
|
-
input_node_ids: List[int] = None):
|
|
986
|
+
input_node_ids: List[int] = None) -> FlowNode:
|
|
987
|
+
"""The core method for adding or updating a node in the graph.
|
|
988
|
+
|
|
989
|
+
Args:
|
|
990
|
+
node_id: The unique ID for the node.
|
|
991
|
+
function: The core processing function for the node.
|
|
992
|
+
input_columns: A list of input column names required by the function.
|
|
993
|
+
output_schema: A predefined schema for the node's output.
|
|
994
|
+
node_type: A string identifying the type of node (e.g., 'filter', 'join').
|
|
995
|
+
drop_columns: A list of columns to be dropped after the function executes.
|
|
996
|
+
renew_schema: If True, the schema is recalculated after execution.
|
|
997
|
+
setting_input: A configuration object containing settings for the node.
|
|
998
|
+
cache_results: If True, the node's results are cached for future runs.
|
|
999
|
+
schema_callback: A function that dynamically calculates the output schema.
|
|
1000
|
+
input_node_ids: A list of IDs for the nodes that this node depends on.
|
|
1001
|
+
|
|
1002
|
+
Returns:
|
|
1003
|
+
The created or updated FlowNode object.
|
|
1004
|
+
"""
|
|
660
1005
|
existing_node = self.get_node(node_id)
|
|
661
1006
|
if existing_node is not None:
|
|
662
1007
|
if existing_node.node_type != node_type:
|
|
@@ -668,15 +1013,13 @@ class FlowGraph:
|
|
|
668
1013
|
input_nodes = [self.get_node(node_id) for node_id in input_node_ids]
|
|
669
1014
|
else:
|
|
670
1015
|
input_nodes = None
|
|
671
|
-
if cache_results is None:
|
|
672
|
-
if hasattr(setting_input, 'cache_results'):
|
|
673
|
-
cache_results = getattr(setting_input, 'cache_results')
|
|
674
|
-
cache_results = False if cache_results is None else cache_results
|
|
675
1016
|
if isinstance(input_columns, str):
|
|
676
1017
|
input_columns = [input_columns]
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
1018
|
+
if (
|
|
1019
|
+
input_nodes is not None or
|
|
1020
|
+
function.__name__ in ('placeholder', 'analysis_preparation') or
|
|
1021
|
+
node_type in ("cloud_storage_reader", "polars_lazy_frame", "input_data")
|
|
1022
|
+
):
|
|
680
1023
|
if not existing_node:
|
|
681
1024
|
node = FlowNode(node_id=node_id,
|
|
682
1025
|
function=function,
|
|
@@ -697,14 +1040,18 @@ class FlowGraph:
|
|
|
697
1040
|
setting_input=setting_input,
|
|
698
1041
|
schema_callback=schema_callback)
|
|
699
1042
|
node = existing_node
|
|
700
|
-
elif node_type == 'input_data':
|
|
701
|
-
node = None
|
|
702
1043
|
else:
|
|
703
1044
|
raise Exception("No data initialized")
|
|
704
1045
|
self._node_db[node_id] = node
|
|
705
1046
|
self._node_ids.append(node_id)
|
|
1047
|
+
return node
|
|
706
1048
|
|
|
707
1049
|
def add_include_cols(self, include_columns: List[str]):
|
|
1050
|
+
"""Adds columns to both the input and output column lists.
|
|
1051
|
+
|
|
1052
|
+
Args:
|
|
1053
|
+
include_columns: A list of column names to include.
|
|
1054
|
+
"""
|
|
708
1055
|
for column in include_columns:
|
|
709
1056
|
if column not in self._input_cols:
|
|
710
1057
|
self._input_cols.append(column)
|
|
@@ -713,6 +1060,12 @@ class FlowGraph:
|
|
|
713
1060
|
return self
|
|
714
1061
|
|
|
715
1062
|
def add_output(self, output_file: input_schema.NodeOutput):
|
|
1063
|
+
"""Adds an output node to write the final data to a destination.
|
|
1064
|
+
|
|
1065
|
+
Args:
|
|
1066
|
+
output_file: The settings for the output file.
|
|
1067
|
+
"""
|
|
1068
|
+
|
|
716
1069
|
def _func(df: FlowDataEngine):
|
|
717
1070
|
output_file.output_settings.populate_abs_file_path()
|
|
718
1071
|
execute_remote = self.execution_location != 'local'
|
|
@@ -734,7 +1087,12 @@ class FlowGraph:
|
|
|
734
1087
|
input_node_ids=[input_node_id])
|
|
735
1088
|
|
|
736
1089
|
def add_database_writer(self, node_database_writer: input_schema.NodeDatabaseWriter):
|
|
737
|
-
|
|
1090
|
+
"""Adds a node to write data to a database.
|
|
1091
|
+
|
|
1092
|
+
Args:
|
|
1093
|
+
node_database_writer: The settings for the database writer node.
|
|
1094
|
+
"""
|
|
1095
|
+
|
|
738
1096
|
node_type = 'database_writer'
|
|
739
1097
|
database_settings: input_schema.DatabaseWriteSettings = node_database_writer.database_write_settings
|
|
740
1098
|
database_connection: Optional[input_schema.DatabaseConnection | input_schema.FullDatabaseConnection]
|
|
@@ -782,6 +1140,12 @@ class FlowGraph:
|
|
|
782
1140
|
node = self.get_node(node_database_writer.node_id)
|
|
783
1141
|
|
|
784
1142
|
def add_database_reader(self, node_database_reader: input_schema.NodeDatabaseReader):
|
|
1143
|
+
"""Adds a node to read data from a database.
|
|
1144
|
+
|
|
1145
|
+
Args:
|
|
1146
|
+
node_database_reader: The settings for the database reader node.
|
|
1147
|
+
"""
|
|
1148
|
+
|
|
785
1149
|
logger.info("Adding database reader")
|
|
786
1150
|
node_type = 'database_reader'
|
|
787
1151
|
database_settings: input_schema.DatabaseSettings = node_database_reader.database_settings
|
|
@@ -854,80 +1218,129 @@ class FlowGraph:
|
|
|
854
1218
|
self._flow_starts.append(node)
|
|
855
1219
|
self._node_ids.append(node_database_reader.node_id)
|
|
856
1220
|
|
|
857
|
-
def
|
|
858
|
-
|
|
859
|
-
node_type = 'airbyte_reader'
|
|
860
|
-
source_settings: input_schema.AirbyteReader = external_source_input.source_settings
|
|
861
|
-
airbyte_settings = airbyte_settings_from_config(source_settings, flow_id=self.flow_id,
|
|
862
|
-
node_id=external_source_input.node_id)
|
|
1221
|
+
def add_sql_source(self, external_source_input: input_schema.NodeExternalSource):
|
|
1222
|
+
"""Adds a node that reads data from a SQL source.
|
|
863
1223
|
|
|
864
|
-
|
|
865
|
-
airbyte_settings.fields = source_settings.fields
|
|
866
|
-
external_source = data_source_factory(source_type='airbyte', airbyte_settings=airbyte_settings)
|
|
1224
|
+
This is a convenience alias for `add_external_source`.
|
|
867
1225
|
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
1226
|
+
Args:
|
|
1227
|
+
external_source_input: The settings for the external SQL source node.
|
|
1228
|
+
"""
|
|
1229
|
+
logger.info('Adding sql source')
|
|
1230
|
+
self.add_external_source(external_source_input)
|
|
1231
|
+
|
|
1232
|
+
def add_cloud_storage_writer(self, node_cloud_storage_writer: input_schema.NodeCloudStorageWriter) -> None:
|
|
1233
|
+
"""Adds a node to write data to a cloud storage provider.
|
|
1234
|
+
|
|
1235
|
+
Args:
|
|
1236
|
+
node_cloud_storage_writer: The settings for the cloud storage writer node.
|
|
1237
|
+
"""
|
|
1238
|
+
|
|
1239
|
+
node_type = "cloud_storage_writer"
|
|
1240
|
+
def _func(df: FlowDataEngine):
|
|
1241
|
+
df.lazy = True
|
|
1242
|
+
execute_remote = self.execution_location != 'local'
|
|
1243
|
+
cloud_connection_settings = get_cloud_connection_settings(
|
|
1244
|
+
connection_name=node_cloud_storage_writer.cloud_storage_settings.connection_name,
|
|
1245
|
+
user_id=node_cloud_storage_writer.user_id,
|
|
1246
|
+
auth_mode=node_cloud_storage_writer.cloud_storage_settings.auth_mode
|
|
1247
|
+
)
|
|
1248
|
+
full_cloud_storage_connection = FullCloudStorageConnection(
|
|
1249
|
+
storage_type=cloud_connection_settings.storage_type,
|
|
1250
|
+
auth_method=cloud_connection_settings.auth_method,
|
|
1251
|
+
aws_allow_unsafe_html=cloud_connection_settings.aws_allow_unsafe_html,
|
|
1252
|
+
**CloudStorageReader.get_storage_options(cloud_connection_settings)
|
|
1253
|
+
)
|
|
1254
|
+
if execute_remote:
|
|
1255
|
+
settings = get_cloud_storage_write_settings_worker_interface(
|
|
1256
|
+
write_settings=node_cloud_storage_writer.cloud_storage_settings,
|
|
1257
|
+
connection=full_cloud_storage_connection,
|
|
1258
|
+
lf=df.data_frame,
|
|
1259
|
+
flowfile_node_id=node_cloud_storage_writer.node_id,
|
|
1260
|
+
flowfile_flow_id=self.flow_id)
|
|
1261
|
+
external_database_writer = ExternalCloudWriter(settings, wait_on_completion=False)
|
|
1262
|
+
node._fetch_cached_df = external_database_writer
|
|
1263
|
+
external_database_writer.get_result()
|
|
1264
|
+
else:
|
|
1265
|
+
cloud_storage_write_settings_internal = CloudStorageWriteSettingsInternal(
|
|
1266
|
+
connection=full_cloud_storage_connection,
|
|
1267
|
+
write_settings=node_cloud_storage_writer.cloud_storage_settings,
|
|
1268
|
+
)
|
|
1269
|
+
df.to_cloud_storage_obj(cloud_storage_write_settings_internal)
|
|
1270
|
+
return df
|
|
875
1271
|
|
|
876
1272
|
def schema_callback():
|
|
877
|
-
|
|
1273
|
+
logger.info("Starting to run the schema callback for cloud storage writer")
|
|
1274
|
+
if self.get_node(node_cloud_storage_writer.node_id).is_correct:
|
|
1275
|
+
return self.get_node(node_cloud_storage_writer.node_id).node_inputs.main_inputs[0].schema
|
|
1276
|
+
else:
|
|
1277
|
+
return [FlowfileColumn.from_input(column_name="__error__", data_type="String")]
|
|
878
1278
|
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
node.schema_callback = schema_callback
|
|
889
|
-
else:
|
|
890
|
-
node = FlowNode(external_source_input.node_id, function=_func,
|
|
891
|
-
setting_input=external_source_input,
|
|
892
|
-
name=node_type, node_type=node_type, parent_uuid=self.uuid,
|
|
893
|
-
schema_callback=schema_callback)
|
|
894
|
-
self._node_db[external_source_input.node_id] = node
|
|
895
|
-
self._flow_starts.append(node)
|
|
896
|
-
self._node_ids.append(external_source_input.node_id)
|
|
897
|
-
if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
|
|
898
|
-
logger.info('Using provided schema in the node')
|
|
1279
|
+
self.add_node_step(
|
|
1280
|
+
node_id=node_cloud_storage_writer.node_id,
|
|
1281
|
+
function=_func,
|
|
1282
|
+
input_columns=[],
|
|
1283
|
+
node_type=node_type,
|
|
1284
|
+
setting_input=node_cloud_storage_writer,
|
|
1285
|
+
schema_callback=schema_callback,
|
|
1286
|
+
input_node_ids=[node_cloud_storage_writer.depending_on_id]
|
|
1287
|
+
)
|
|
899
1288
|
|
|
1289
|
+
node = self.get_node(node_cloud_storage_writer.node_id)
|
|
900
1290
|
|
|
901
|
-
def
|
|
902
|
-
|
|
903
|
-
|
|
1291
|
+
def add_cloud_storage_reader(self, node_cloud_storage_reader: input_schema.NodeCloudStorageReader) -> None:
|
|
1292
|
+
"""Adds a cloud storage read node to the flow graph.
|
|
1293
|
+
|
|
1294
|
+
Args:
|
|
1295
|
+
node_cloud_storage_reader: The settings for the cloud storage read node.
|
|
1296
|
+
"""
|
|
1297
|
+
node_type = "cloud_storage_reader"
|
|
1298
|
+
logger.info("Adding cloud storage reader")
|
|
1299
|
+
cloud_storage_read_settings = node_cloud_storage_reader.cloud_storage_settings
|
|
1300
|
+
|
|
1301
|
+
def _func():
|
|
1302
|
+
logger.info("Starting to run the schema callback for cloud storage reader")
|
|
1303
|
+
self.flow_logger.info("Starting to run the schema callback for cloud storage reader")
|
|
1304
|
+
settings = CloudStorageReadSettingsInternal(read_settings=cloud_storage_read_settings,
|
|
1305
|
+
connection=get_cloud_connection_settings(
|
|
1306
|
+
connection_name=cloud_storage_read_settings.connection_name,
|
|
1307
|
+
user_id=node_cloud_storage_reader.user_id,
|
|
1308
|
+
auth_mode=cloud_storage_read_settings.auth_mode
|
|
1309
|
+
))
|
|
1310
|
+
fl = FlowDataEngine.from_cloud_storage_obj(settings)
|
|
1311
|
+
return fl
|
|
1312
|
+
|
|
1313
|
+
node = self.add_node_step(node_id=node_cloud_storage_reader.node_id,
|
|
1314
|
+
function=_func,
|
|
1315
|
+
cache_results=node_cloud_storage_reader.cache_results,
|
|
1316
|
+
setting_input=node_cloud_storage_reader,
|
|
1317
|
+
node_type=node_type,
|
|
1318
|
+
)
|
|
1319
|
+
if node_cloud_storage_reader.node_id not in set(start_node.node_id for start_node in self._flow_starts):
|
|
1320
|
+
self._flow_starts.append(node)
|
|
904
1321
|
|
|
905
1322
|
def add_external_source(self,
|
|
906
|
-
external_source_input: input_schema.NodeExternalSource
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
external_source = data_source_factory(source_type='custom',
|
|
920
|
-
data_getter=data_getter,
|
|
921
|
-
initial_data_getter=initial_getter,
|
|
922
|
-
orientation=external_source_input.source_settings.orientation,
|
|
923
|
-
schema=None)
|
|
1323
|
+
external_source_input: input_schema.NodeExternalSource):
|
|
1324
|
+
"""Adds a node for a custom external data source.
|
|
1325
|
+
|
|
1326
|
+
Args:
|
|
1327
|
+
external_source_input: The settings for the external source node.
|
|
1328
|
+
"""
|
|
1329
|
+
|
|
1330
|
+
node_type = 'external_source'
|
|
1331
|
+
external_source_script = getattr(external_sources.custom_external_sources, external_source_input.identifier)
|
|
1332
|
+
source_settings = (getattr(input_schema, snake_case_to_camel_case(external_source_input.identifier)).
|
|
1333
|
+
model_validate(external_source_input.source_settings))
|
|
1334
|
+
if hasattr(external_source_script, 'initial_getter'):
|
|
1335
|
+
initial_getter = getattr(external_source_script, 'initial_getter')(source_settings)
|
|
924
1336
|
else:
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
1337
|
+
initial_getter = None
|
|
1338
|
+
data_getter = external_source_script.getter(source_settings)
|
|
1339
|
+
external_source = data_source_factory(source_type='custom',
|
|
1340
|
+
data_getter=data_getter,
|
|
1341
|
+
initial_data_getter=initial_getter,
|
|
1342
|
+
orientation=external_source_input.source_settings.orientation,
|
|
1343
|
+
schema=None)
|
|
931
1344
|
|
|
932
1345
|
def _func():
|
|
933
1346
|
logger.info('Calling external source')
|
|
@@ -969,6 +1382,12 @@ class FlowGraph:
|
|
|
969
1382
|
setting_input=external_source_input)
|
|
970
1383
|
|
|
971
1384
|
def add_read(self, input_file: input_schema.NodeRead):
|
|
1385
|
+
"""Adds a node to read data from a local file (e.g., CSV, Parquet, Excel).
|
|
1386
|
+
|
|
1387
|
+
Args:
|
|
1388
|
+
input_file: The settings for the read operation.
|
|
1389
|
+
"""
|
|
1390
|
+
|
|
972
1391
|
if input_file.received_file.file_type in ('xlsx', 'excel') and input_file.received_file.sheet_name == '':
|
|
973
1392
|
sheet_name = fastexcel.read_excel(input_file.received_file.path).sheet_names[0]
|
|
974
1393
|
input_file.received_file.sheet_name = sheet_name
|
|
@@ -984,8 +1403,8 @@ class FlowGraph:
|
|
|
984
1403
|
input_data = FlowDataEngine.create_from_path(input_file.received_file)
|
|
985
1404
|
else:
|
|
986
1405
|
input_data = FlowDataEngine.create_from_path_worker(input_file.received_file,
|
|
987
|
-
|
|
988
|
-
|
|
1406
|
+
node_id=input_file.node_id,
|
|
1407
|
+
flow_id=self.flow_id)
|
|
989
1408
|
input_data.name = input_file.received_file.name
|
|
990
1409
|
return input_data
|
|
991
1410
|
|
|
@@ -1037,9 +1456,19 @@ class FlowGraph:
|
|
|
1037
1456
|
node.schema_callback = schema_callback
|
|
1038
1457
|
return self
|
|
1039
1458
|
|
|
1040
|
-
def add_datasource(self, input_file: input_schema.NodeDatasource
|
|
1459
|
+
def add_datasource(self, input_file: Union[input_schema.NodeDatasource, input_schema.NodeManualInput]) -> "FlowGraph":
|
|
1460
|
+
"""Adds a data source node to the graph.
|
|
1461
|
+
|
|
1462
|
+
This method serves as a factory for creating starting nodes, handling both
|
|
1463
|
+
file-based sources and direct manual data entry.
|
|
1464
|
+
|
|
1465
|
+
Args:
|
|
1466
|
+
input_file: The configuration object for the data source.
|
|
1467
|
+
|
|
1468
|
+
Returns:
|
|
1469
|
+
The `FlowGraph` instance for method chaining.
|
|
1470
|
+
"""
|
|
1041
1471
|
if isinstance(input_file, input_schema.NodeManualInput):
|
|
1042
|
-
_handle_raw_data(input_file)
|
|
1043
1472
|
input_data = FlowDataEngine(input_file.raw_data_format)
|
|
1044
1473
|
ref = 'manual_input'
|
|
1045
1474
|
else:
|
|
@@ -1051,10 +1480,8 @@ class FlowGraph:
|
|
|
1051
1480
|
node.name = ref
|
|
1052
1481
|
node.function = input_data
|
|
1053
1482
|
node.setting_input = input_file
|
|
1054
|
-
|
|
1055
1483
|
if not input_file.node_id in set(start_node.node_id for start_node in self._flow_starts):
|
|
1056
1484
|
self._flow_starts.append(node)
|
|
1057
|
-
|
|
1058
1485
|
else:
|
|
1059
1486
|
input_data.collect()
|
|
1060
1487
|
node = FlowNode(input_file.node_id, function=input_data,
|
|
@@ -1066,29 +1493,35 @@ class FlowGraph:
|
|
|
1066
1493
|
return self
|
|
1067
1494
|
|
|
1068
1495
|
def add_manual_input(self, input_file: input_schema.NodeManualInput):
|
|
1496
|
+
"""Adds a node for manual data entry.
|
|
1497
|
+
|
|
1498
|
+
This is a convenience alias for `add_datasource`.
|
|
1499
|
+
|
|
1500
|
+
Args:
|
|
1501
|
+
input_file: The settings and data for the manual input node.
|
|
1502
|
+
"""
|
|
1069
1503
|
self.add_datasource(input_file)
|
|
1070
1504
|
|
|
1071
1505
|
@property
|
|
1072
1506
|
def nodes(self) -> List[FlowNode]:
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
def check_for_missed_cols(self, expected_cols: List):
|
|
1076
|
-
not_filled_cols = set(expected_cols) - set(self._output_cols)
|
|
1077
|
-
cols_available = list(not_filled_cols & set([c.name for c in self._input_data.schema]))
|
|
1078
|
-
self._output_cols += cols_available
|
|
1507
|
+
"""Gets a list of all FlowNode objects in the graph."""
|
|
1079
1508
|
|
|
1080
|
-
|
|
1081
|
-
def input_data_columns(self) -> List[str] | None:
|
|
1082
|
-
if self._input_cols:
|
|
1083
|
-
return list(set([col for col in self._input_cols if
|
|
1084
|
-
col in [table_col.name for table_col in self._input_data.schema]]))
|
|
1509
|
+
return list(self._node_db.values())
|
|
1085
1510
|
|
|
1086
1511
|
@property
|
|
1087
|
-
def execution_mode(self) ->
|
|
1512
|
+
def execution_mode(self) -> schemas.ExecutionModeLiteral:
|
|
1513
|
+
"""Gets the current execution mode ('Development' or 'Performance')."""
|
|
1088
1514
|
return self.flow_settings.execution_mode
|
|
1089
1515
|
|
|
1090
1516
|
def get_implicit_starter_nodes(self) -> List[FlowNode]:
|
|
1091
|
-
"""
|
|
1517
|
+
"""Finds nodes that can act as starting points but are not explicitly defined as such.
|
|
1518
|
+
|
|
1519
|
+
Some nodes, like the Polars Code node, can function without an input. This
|
|
1520
|
+
method identifies such nodes if they have no incoming connections.
|
|
1521
|
+
|
|
1522
|
+
Returns:
|
|
1523
|
+
A list of `FlowNode` objects that are implicit starting nodes.
|
|
1524
|
+
"""
|
|
1092
1525
|
starting_node_ids = [node.node_id for node in self._flow_starts]
|
|
1093
1526
|
implicit_starting_nodes = []
|
|
1094
1527
|
for node in self.nodes:
|
|
@@ -1098,17 +1531,39 @@ class FlowGraph:
|
|
|
1098
1531
|
|
|
1099
1532
|
@execution_mode.setter
|
|
1100
1533
|
def execution_mode(self, mode: schemas.ExecutionModeLiteral):
|
|
1534
|
+
"""Sets the execution mode for the flow.
|
|
1535
|
+
|
|
1536
|
+
Args:
|
|
1537
|
+
mode: The execution mode to set.
|
|
1538
|
+
"""
|
|
1101
1539
|
self.flow_settings.execution_mode = mode
|
|
1102
1540
|
|
|
1103
1541
|
@property
|
|
1104
1542
|
def execution_location(self) -> schemas.ExecutionLocationsLiteral:
|
|
1543
|
+
"""Gets the current execution location."""
|
|
1105
1544
|
return self.flow_settings.execution_location
|
|
1106
1545
|
|
|
1107
1546
|
@execution_location.setter
|
|
1108
1547
|
def execution_location(self, execution_location: schemas.ExecutionLocationsLiteral):
|
|
1548
|
+
"""Sets the execution location for the flow.
|
|
1549
|
+
|
|
1550
|
+
Args:
|
|
1551
|
+
execution_location: The execution location to set.
|
|
1552
|
+
"""
|
|
1109
1553
|
self.flow_settings.execution_location = execution_location
|
|
1110
1554
|
|
|
1111
|
-
def run_graph(self):
|
|
1555
|
+
def run_graph(self) -> RunInformation | None:
|
|
1556
|
+
"""Executes the entire data flow graph from start to finish.
|
|
1557
|
+
|
|
1558
|
+
It determines the correct execution order, runs each node,
|
|
1559
|
+
collects results, and handles errors and cancellations.
|
|
1560
|
+
|
|
1561
|
+
Returns:
|
|
1562
|
+
A RunInformation object summarizing the execution results.
|
|
1563
|
+
|
|
1564
|
+
Raises:
|
|
1565
|
+
Exception: If the flow is already running.
|
|
1566
|
+
"""
|
|
1112
1567
|
if self.flow_settings.is_running:
|
|
1113
1568
|
raise Exception('Flow is already running')
|
|
1114
1569
|
try:
|
|
@@ -1130,6 +1585,8 @@ class FlowGraph:
|
|
|
1130
1585
|
skip_node_message(self.flow_logger, skip_nodes)
|
|
1131
1586
|
execution_order_message(self.flow_logger, execution_order)
|
|
1132
1587
|
performance_mode = self.flow_settings.execution_mode == 'Performance'
|
|
1588
|
+
if self.flow_settings.execution_location == 'local':
|
|
1589
|
+
OFFLOAD_TO_WORKER.value = False
|
|
1133
1590
|
for node in execution_order:
|
|
1134
1591
|
node_logger = self.flow_logger.get_node_logger(node.node_id)
|
|
1135
1592
|
if self.flow_settings.is_canceled:
|
|
@@ -1178,6 +1635,11 @@ class FlowGraph:
|
|
|
1178
1635
|
self.flow_settings.is_running = False
|
|
1179
1636
|
|
|
1180
1637
|
def get_run_info(self) -> RunInformation:
|
|
1638
|
+
"""Gets a summary of the most recent graph execution.
|
|
1639
|
+
|
|
1640
|
+
Returns:
|
|
1641
|
+
A RunInformation object with details about the last run.
|
|
1642
|
+
"""
|
|
1181
1643
|
if self.latest_run_info is None:
|
|
1182
1644
|
node_results = self.node_results
|
|
1183
1645
|
success = all(nr.success for nr in node_results)
|
|
@@ -1197,6 +1659,11 @@ class FlowGraph:
|
|
|
1197
1659
|
|
|
1198
1660
|
@property
|
|
1199
1661
|
def node_connections(self) -> List[Tuple[int, int]]:
|
|
1662
|
+
"""Computes and returns a list of all connections in the graph.
|
|
1663
|
+
|
|
1664
|
+
Returns:
|
|
1665
|
+
A list of tuples, where each tuple is a (source_id, target_id) pair.
|
|
1666
|
+
"""
|
|
1200
1667
|
connections = set()
|
|
1201
1668
|
for node in self.nodes:
|
|
1202
1669
|
outgoing_connections = [(node.node_id, ltn.node_id) for ltn in node.leads_to_nodes]
|
|
@@ -1208,28 +1675,30 @@ class FlowGraph:
|
|
|
1208
1675
|
connections.add(node_connection)
|
|
1209
1676
|
return list(connections)
|
|
1210
1677
|
|
|
1211
|
-
def
|
|
1212
|
-
|
|
1213
|
-
if len(self._node_ids) > 0:
|
|
1214
|
-
self.schema = self._node_db[self._node_ids[0]].schema
|
|
1215
|
-
return self.schema
|
|
1678
|
+
def get_node_data(self, node_id: int, include_example: bool = True) -> NodeData:
|
|
1679
|
+
"""Retrieves all data needed to render a node in the UI.
|
|
1216
1680
|
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1681
|
+
Args:
|
|
1682
|
+
node_id: The ID of the node.
|
|
1683
|
+
include_example: Whether to include data samples in the result.
|
|
1220
1684
|
|
|
1221
|
-
|
|
1685
|
+
Returns:
|
|
1686
|
+
A NodeData object, or None if the node is not found.
|
|
1687
|
+
"""
|
|
1222
1688
|
node = self._node_db[node_id]
|
|
1223
1689
|
return node.get_node_data(flow_id=self.flow_id, include_example=include_example)
|
|
1224
1690
|
|
|
1225
1691
|
def get_node_storage(self) -> schemas.FlowInformation:
|
|
1692
|
+
"""Serializes the entire graph's state into a storable format.
|
|
1226
1693
|
|
|
1694
|
+
Returns:
|
|
1695
|
+
A FlowInformation object representing the complete graph.
|
|
1696
|
+
"""
|
|
1227
1697
|
node_information = {node.node_id: node.get_node_information() for
|
|
1228
1698
|
node in self.nodes if node.is_setup and node.is_correct}
|
|
1229
1699
|
|
|
1230
1700
|
return schemas.FlowInformation(flow_id=self.flow_id,
|
|
1231
1701
|
flow_name=self.__name__,
|
|
1232
|
-
storage_location=self.flow_settings.path,
|
|
1233
1702
|
flow_settings=self.flow_settings,
|
|
1234
1703
|
data=node_information,
|
|
1235
1704
|
node_starts=[v.node_id for v in self._flow_starts],
|
|
@@ -1237,6 +1706,8 @@ class FlowGraph:
|
|
|
1237
1706
|
)
|
|
1238
1707
|
|
|
1239
1708
|
def cancel(self):
|
|
1709
|
+
"""Cancels an ongoing graph execution."""
|
|
1710
|
+
|
|
1240
1711
|
if not self.flow_settings.is_running:
|
|
1241
1712
|
return
|
|
1242
1713
|
self.flow_settings.is_canceled = True
|
|
@@ -1244,15 +1715,30 @@ class FlowGraph:
|
|
|
1244
1715
|
node.cancel()
|
|
1245
1716
|
|
|
1246
1717
|
def close_flow(self):
|
|
1718
|
+
"""Performs cleanup operations, such as clearing node caches."""
|
|
1719
|
+
|
|
1247
1720
|
for node in self.nodes:
|
|
1248
1721
|
node.remove_cache()
|
|
1249
1722
|
|
|
1250
1723
|
def save_flow(self, flow_path: str):
|
|
1724
|
+
"""Saves the current state of the flow graph to a file.
|
|
1725
|
+
|
|
1726
|
+
Args:
|
|
1727
|
+
flow_path: The path where the flow file will be saved.
|
|
1728
|
+
"""
|
|
1251
1729
|
with open(flow_path, 'wb') as f:
|
|
1252
1730
|
pickle.dump(self.get_node_storage(), f)
|
|
1253
1731
|
self.flow_settings.path = flow_path
|
|
1254
1732
|
|
|
1255
|
-
def get_frontend_data(self):
|
|
1733
|
+
def get_frontend_data(self) -> dict:
|
|
1734
|
+
"""Formats the graph structure into a JSON-like dictionary for a specific legacy frontend.
|
|
1735
|
+
|
|
1736
|
+
This method transforms the graph's state into a format compatible with the
|
|
1737
|
+
Drawflow.js library.
|
|
1738
|
+
|
|
1739
|
+
Returns:
|
|
1740
|
+
A dictionary representing the graph in Drawflow format.
|
|
1741
|
+
"""
|
|
1256
1742
|
result = {
|
|
1257
1743
|
'Home': {
|
|
1258
1744
|
"data": {}
|
|
@@ -1323,6 +1809,11 @@ class FlowGraph:
|
|
|
1323
1809
|
return result
|
|
1324
1810
|
|
|
1325
1811
|
def get_vue_flow_input(self) -> schemas.VueFlowInput:
|
|
1812
|
+
"""Formats the graph's nodes and edges into a schema suitable for the VueFlow frontend.
|
|
1813
|
+
|
|
1814
|
+
Returns:
|
|
1815
|
+
A VueFlowInput object.
|
|
1816
|
+
"""
|
|
1326
1817
|
edges: List[schemas.NodeEdge] = []
|
|
1327
1818
|
nodes: List[schemas.NodeInput] = []
|
|
1328
1819
|
for node in self.nodes:
|
|
@@ -1331,11 +1822,19 @@ class FlowGraph:
|
|
|
1331
1822
|
return schemas.VueFlowInput(node_edges=edges, node_inputs=nodes)
|
|
1332
1823
|
|
|
1333
1824
|
def reset(self):
|
|
1825
|
+
"""Forces a deep reset on all nodes in the graph."""
|
|
1826
|
+
|
|
1334
1827
|
for node in self.nodes:
|
|
1335
1828
|
node.reset(True)
|
|
1336
1829
|
|
|
1337
1830
|
def copy_node(self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str) -> None:
|
|
1338
|
-
"""
|
|
1831
|
+
"""Creates a copy of an existing node.
|
|
1832
|
+
|
|
1833
|
+
Args:
|
|
1834
|
+
new_node_settings: The promise containing new settings (like ID and position).
|
|
1835
|
+
existing_setting_input: The settings object from the node being copied.
|
|
1836
|
+
node_type: The type of the node being copied.
|
|
1837
|
+
"""
|
|
1339
1838
|
self.add_node_promise(new_node_settings)
|
|
1340
1839
|
|
|
1341
1840
|
if isinstance(existing_setting_input, input_schema.NodePromise):
|
|
@@ -1346,69 +1845,26 @@ class FlowGraph:
|
|
|
1346
1845
|
)
|
|
1347
1846
|
getattr(self, f"add_{node_type}")(combined_settings)
|
|
1348
1847
|
|
|
1848
|
+
def generate_code(self):
|
|
1849
|
+
"""Generates code for the flow graph.
|
|
1850
|
+
This method exports the flow graph to a Polars-compatible format.
|
|
1851
|
+
"""
|
|
1852
|
+
from flowfile_core.flowfile.code_generator.code_generator import export_flow_to_polars
|
|
1853
|
+
print(export_flow_to_polars(self))
|
|
1349
1854
|
|
|
1350
|
-
|
|
1351
|
-
|
|
1352
|
-
|
|
1855
|
+
|
|
1856
|
+
def combine_existing_settings_and_new_settings(setting_input: Any, new_settings: input_schema.NodePromise) -> Any:
|
|
1857
|
+
"""Merges settings from an existing object with new settings from a NodePromise.
|
|
1858
|
+
|
|
1859
|
+
Typically used when copying a node to apply a new ID and position.
|
|
1353
1860
|
|
|
1354
1861
|
Args:
|
|
1355
|
-
|
|
1862
|
+
setting_input: The original settings object.
|
|
1863
|
+
new_settings: The NodePromise with new positional and ID data.
|
|
1356
1864
|
|
|
1357
1865
|
Returns:
|
|
1358
|
-
A new
|
|
1359
|
-
|
|
1360
|
-
Raises:
|
|
1361
|
-
ValueError: If any flow_ids overlap
|
|
1866
|
+
A new settings object with the merged properties.
|
|
1362
1867
|
"""
|
|
1363
|
-
# Validate flow IDs are unique
|
|
1364
|
-
_validate_unique_flow_ids(flow_graphs)
|
|
1365
|
-
|
|
1366
|
-
# Create ID mapping for all nodes
|
|
1367
|
-
node_id_mapping = _create_node_id_mapping(flow_graphs)
|
|
1368
|
-
|
|
1369
|
-
# Remap and combine nodes
|
|
1370
|
-
all_nodes = _remap_nodes(flow_graphs, node_id_mapping)
|
|
1371
|
-
|
|
1372
|
-
# Create a new combined flow graph
|
|
1373
|
-
combined_flow_id = hash(tuple(fg.flow_id for fg in flow_graphs))
|
|
1374
|
-
# return FlowGraph(flow_id=combined_flow_id, nodes=all_nodes, edges=all_edges)
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
def _validate_unique_flow_ids(flow_graphs: Tuple[FlowGraph, ...]) -> None:
|
|
1378
|
-
"""Ensure all flow graphs have unique flow_ids."""
|
|
1379
|
-
all_flow_ids = [fg.flow_id for fg in flow_graphs]
|
|
1380
|
-
if len(all_flow_ids) != len(set(all_flow_ids)):
|
|
1381
|
-
raise ValueError("Cannot combine overlapping graphs, make sure the graphs have a unique identifier")
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
def _create_node_id_mapping(flow_graphs: Tuple[FlowGraph, ...]) -> Dict[int, Dict[int, int]]:
|
|
1385
|
-
"""Create a mapping from original node IDs to new unique node IDs."""
|
|
1386
|
-
node_id_mapping: Dict[int, Dict[int, int]] = {}
|
|
1387
|
-
next_node_id = 0
|
|
1388
|
-
|
|
1389
|
-
for fg in flow_graphs:
|
|
1390
|
-
node_id_mapping[fg.flow_id] = {}
|
|
1391
|
-
for node in fg.nodes:
|
|
1392
|
-
node_id_mapping[fg.flow_id][node.node_id] = next_node_id
|
|
1393
|
-
next_node_id += 1
|
|
1394
|
-
|
|
1395
|
-
return node_id_mapping
|
|
1396
|
-
|
|
1397
|
-
|
|
1398
|
-
def _remap_nodes(flow_graphs: Tuple[FlowGraph, ...],
|
|
1399
|
-
node_id_mapping: Dict[int, Dict[int, int]]) -> List:
|
|
1400
|
-
"""Create new nodes with remapped IDs."""
|
|
1401
|
-
all_nodes = []
|
|
1402
|
-
for fg in flow_graphs:
|
|
1403
|
-
for node in fg.nodes:
|
|
1404
|
-
new_node = copy.deepcopy(node)
|
|
1405
|
-
new_node.node_id = node_id_mapping[fg.flow_id][node.node_id]
|
|
1406
|
-
all_nodes.append(new_node)
|
|
1407
|
-
return all_nodes
|
|
1408
|
-
|
|
1409
|
-
|
|
1410
|
-
def combine_existing_settings_and_new_settings(setting_input: Any, new_settings: input_schema.NodePromise) -> Any:
|
|
1411
|
-
"""Combine excopy_nodeisting settings with new settings from a NodePromise."""
|
|
1412
1868
|
copied_setting_input = deepcopy(setting_input)
|
|
1413
1869
|
|
|
1414
1870
|
# Update only attributes that exist on new_settings
|
|
@@ -1427,7 +1883,13 @@ def combine_existing_settings_and_new_settings(setting_input: Any, new_settings:
|
|
|
1427
1883
|
return copied_setting_input
|
|
1428
1884
|
|
|
1429
1885
|
|
|
1430
|
-
def add_connection(flow: FlowGraph, node_connection: input_schema.NodeConnection):
|
|
1886
|
+
def add_connection(flow: FlowGraph, node_connection: input_schema.NodeConnection) -> None:
|
|
1887
|
+
"""Adds a connection between two nodes in the flow graph.
|
|
1888
|
+
|
|
1889
|
+
Args:
|
|
1890
|
+
flow: The FlowGraph instance to modify.
|
|
1891
|
+
node_connection: An object defining the source and target of the connection.
|
|
1892
|
+
"""
|
|
1431
1893
|
logger.info('adding a connection')
|
|
1432
1894
|
from_node = flow.get_node(node_connection.output_connection.node_id)
|
|
1433
1895
|
to_node = flow.get_node(node_connection.input_connection.node_id)
|
|
@@ -1439,7 +1901,12 @@ def add_connection(flow: FlowGraph, node_connection: input_schema.NodeConnection
|
|
|
1439
1901
|
|
|
1440
1902
|
|
|
1441
1903
|
def delete_connection(graph, node_connection: input_schema.NodeConnection):
|
|
1442
|
-
"""
|
|
1904
|
+
"""Deletes a connection between two nodes in the flow graph.
|
|
1905
|
+
|
|
1906
|
+
Args:
|
|
1907
|
+
graph: The FlowGraph instance to modify.
|
|
1908
|
+
node_connection: An object defining the connection to be removed.
|
|
1909
|
+
"""
|
|
1443
1910
|
from_node = graph.get_node(node_connection.output_connection.node_id)
|
|
1444
1911
|
to_node = graph.get_node(node_connection.input_connection.node_id)
|
|
1445
1912
|
connection_valid = to_node.node_inputs.validate_if_input_connection_exists(
|
|
@@ -1455,6 +1922,4 @@ def delete_connection(graph, node_connection: input_schema.NodeConnection):
|
|
|
1455
1922
|
to_node.delete_input_node(
|
|
1456
1923
|
node_connection.output_connection.node_id,
|
|
1457
1924
|
connection_type=node_connection.input_connection.connection_class,
|
|
1458
|
-
)
|
|
1459
|
-
|
|
1460
|
-
|
|
1925
|
+
)
|