Flowfile 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +27 -6
- flowfile/api.py +5 -2
- flowfile/web/__init__.py +4 -2
- flowfile/web/static/assets/{CloudConnectionManager-d004942f.js → CloudConnectionManager-c20a740f.js} +3 -4
- flowfile/web/static/assets/{CloudStorageReader-eccf9fc2.js → CloudStorageReader-960b400a.js} +7 -7
- flowfile/web/static/assets/{CloudStorageWriter-b1ba6bba.js → CloudStorageWriter-e3decbdd.js} +7 -7
- flowfile/web/static/assets/{CrossJoin-68981877.js → CrossJoin-d67e2405.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-0b06649c.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-8349a426.js → DatabaseManager-9ea35e84.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-905344f8.js → DatabaseReader-9578bfa5.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-9f5b8638.js → DatabaseWriter-19531098.js} +9 -9
- flowfile/web/static/assets/{ExploreData-131a6d53.js → ExploreData-40476474.js} +47141 -43697
- flowfile/web/static/assets/{ExternalSource-e3549dcc.js → ExternalSource-2297ef96.js} +6 -6
- flowfile/web/static/assets/{Filter-6e0730ae.js → Filter-f211c03a.js} +8 -8
- flowfile/web/static/assets/{Formula-02f033e6.js → Formula-4207ea31.js} +8 -8
- flowfile/web/static/assets/{FuzzyMatch-54c14036.js → FuzzyMatch-bf120df0.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-08a3f499.js → GraphSolver-5bb7497a.js} +5 -5
- flowfile/web/static/assets/{GroupBy-2ae38139.js → GroupBy-92c81b65.js} +6 -6
- flowfile/web/static/assets/{Join-493b9772.js → Join-4e49a274.js} +9 -9
- flowfile/web/static/assets/{ManualInput-4373d163.js → ManualInput-90998ae8.js} +5 -5
- flowfile/web/static/assets/{Output-b534f3c7.js → Output-81e3e917.js} +4 -4
- flowfile/web/static/assets/{Pivot-2968ff65.js → Pivot-a3419842.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-65136536.js → PolarsCode-72710deb.js} +6 -6
- flowfile/web/static/assets/{Read-c56339ed.js → Read-c4059daf.js} +6 -6
- flowfile/web/static/assets/{RecordCount-1c641a5e.js → RecordCount-c2b5e095.js} +5 -5
- flowfile/web/static/assets/{RecordId-df308b8f.js → RecordId-10baf191.js} +6 -6
- flowfile/web/static/assets/{Sample-293e8a64.js → Sample-3ed9a0ae.js} +5 -5
- flowfile/web/static/assets/{SecretManager-03911655.js → SecretManager-0d49c0e8.js} +2 -2
- flowfile/web/static/assets/{Select-3058a13d.js → Select-8a02a0b3.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-fbf4fb39.js → SettingsSection-4c0f45f5.js} +1 -1
- flowfile/web/static/assets/{Sort-a29bbaf7.js → Sort-f55c9f9d.js} +6 -6
- flowfile/web/static/assets/{TextToRows-c7d7760e.js → TextToRows-5dbc2145.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-118f1d20.js → UnavailableFields-a1768e52.js} +2 -2
- flowfile/web/static/assets/{Union-f0589571.js → Union-f2aefdc9.js} +5 -5
- flowfile/web/static/assets/{Unique-7329a207.js → Unique-46b250da.js} +8 -8
- flowfile/web/static/assets/{Unpivot-30b0be15.js → Unpivot-25ac84cc.js} +5 -5
- flowfile/web/static/assets/{api-fb67319c.js → api-6ef0dcef.js} +1 -1
- flowfile/web/static/assets/{api-602fb95c.js → api-a0abbdc7.js} +1 -1
- flowfile/web/static/assets/{designer-94a6bf4d.js → designer-13eabd83.js} +4 -4
- flowfile/web/static/assets/{documentation-a224831e.js → documentation-b87e7f6f.js} +1 -1
- flowfile/web/static/assets/{dropDown-c2d2aa97.js → dropDown-13564764.js} +1 -1
- flowfile/web/static/assets/{fullEditor-921ac5fd.js → fullEditor-fd2cd6f9.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-7013cc94.js → genericNodeSettings-71e11604.js} +3 -3
- flowfile/web/static/assets/{index-3a75211d.js → index-f6c15e76.js} +46 -22
- flowfile/web/static/assets/{nodeTitle-a63d4680.js → nodeTitle-988d9efe.js} +3 -3
- flowfile/web/static/assets/{secretApi-763aec6e.js → secretApi-dd636aa2.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-08464729.js → selectDynamic-af36165e.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-f15a5f87.js → vue-codemirror.esm-2847001e.js} +1 -1
- flowfile/web/static/assets/{vue-content-loader.es-93bd09d7.js → vue-content-loader.es-0371da73.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/METADATA +2 -2
- {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/RECORD +100 -98
- flowfile_core/__init__.py +1 -0
- flowfile_core/auth/jwt.py +39 -0
- flowfile_core/configs/node_store/nodes.py +1 -0
- flowfile_core/configs/settings.py +6 -5
- flowfile_core/configs/utils.py +5 -0
- flowfile_core/database/connection.py +1 -3
- flowfile_core/flowfile/code_generator/code_generator.py +71 -0
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +1 -2
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +598 -310
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
- flowfile_core/flowfile/flow_graph.py +620 -192
- flowfile_core/flowfile/flow_graph_utils.py +2 -2
- flowfile_core/flowfile/flow_node/flow_node.py +510 -89
- flowfile_core/flowfile/flow_node/models.py +125 -20
- flowfile_core/flowfile/handler.py +2 -33
- flowfile_core/flowfile/manage/open_flowfile.py +1 -2
- flowfile_core/flowfile/util/calculate_layout.py +0 -2
- flowfile_core/flowfile/utils.py +36 -5
- flowfile_core/main.py +32 -13
- flowfile_core/routes/cloud_connections.py +7 -11
- flowfile_core/routes/logs.py +2 -6
- flowfile_core/routes/public.py +1 -0
- flowfile_core/routes/routes.py +127 -51
- flowfile_core/routes/secrets.py +72 -14
- flowfile_core/schemas/__init__.py +8 -0
- flowfile_core/schemas/input_schema.py +92 -64
- flowfile_core/schemas/output_model.py +19 -3
- flowfile_core/schemas/schemas.py +144 -11
- flowfile_core/schemas/transform_schema.py +82 -17
- flowfile_core/utils/arrow_reader.py +8 -3
- flowfile_core/utils/validate_setup.py +0 -2
- flowfile_frame/__init__.py +9 -1
- flowfile_frame/cloud_storage/__init__.py +0 -0
- flowfile_frame/cloud_storage/frame_helpers.py +39 -0
- flowfile_frame/cloud_storage/secret_manager.py +73 -0
- flowfile_frame/expr.py +42 -1
- flowfile_frame/expr.pyi +76 -61
- flowfile_frame/flow_frame.py +233 -111
- flowfile_frame/flow_frame.pyi +137 -91
- flowfile_frame/flow_frame_methods.py +150 -12
- flowfile_frame/group_frame.py +3 -0
- flowfile_frame/utils.py +25 -3
- test_utils/s3/data_generator.py +1 -0
- test_utils/s3/demo_data_generator.py +186 -0
- test_utils/s3/fixtures.py +6 -1
- flowfile_core/schemas/defaults.py +0 -9
- flowfile_core/schemas/models.py +0 -193
- {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/LICENSE +0 -0
- {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/WHEEL +0 -0
- {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
from typing import List, Union, Callable, Any, Optional, Generator, Literal
|
|
3
2
|
from flowfile_core.configs import logger
|
|
4
3
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
@@ -6,6 +5,7 @@ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEng
|
|
|
6
5
|
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
7
6
|
from flowfile_core.schemas import input_schema, schemas
|
|
8
7
|
from flowfile_core.configs.flow_logger import NodeLogger
|
|
8
|
+
from flowfile_core.configs.settings import SINGLE_FILE_MODE, OFFLOAD_TO_WORKER
|
|
9
9
|
|
|
10
10
|
from flowfile_core.schemas.output_model import TableExample, FileColumn, NodeData
|
|
11
11
|
from flowfile_core.flowfile.utils import get_hash
|
|
@@ -13,13 +13,19 @@ from flowfile_core.configs.node_store import nodes as node_interface
|
|
|
13
13
|
from flowfile_core.flowfile.setting_generator import setting_generator, setting_updator
|
|
14
14
|
from time import sleep
|
|
15
15
|
from flowfile_core.flowfile.flow_data_engine.subprocess_operations import (
|
|
16
|
-
ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result,
|
|
16
|
+
ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result,
|
|
17
|
+
ExternalDatabaseFetcher, ExternalDatabaseWriter, ExternalCloudWriter)
|
|
17
18
|
from flowfile_core.flowfile.flow_node.models import (NodeStepSettings, NodeStepInputs, NodeSchemaInformation,
|
|
18
19
|
NodeStepStats, NodeResults)
|
|
19
20
|
from flowfile_core.flowfile.flow_node.schema_callback import SingleExecutionFuture
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class FlowNode:
|
|
24
|
+
"""Represents a single node in a data flow graph.
|
|
25
|
+
|
|
26
|
+
This class manages the node's state, its data processing function,
|
|
27
|
+
and its connections to other nodes within the graph.
|
|
28
|
+
"""
|
|
23
29
|
parent_uuid: str
|
|
24
30
|
node_type: str
|
|
25
31
|
node_template: node_interface.NodeTemplate
|
|
@@ -35,12 +41,62 @@ class FlowNode:
|
|
|
35
41
|
_setting_input: Any = None
|
|
36
42
|
_hash: Optional[str] = None # host this for caching results
|
|
37
43
|
_function: Callable = None # the function that needs to be executed when triggered
|
|
44
|
+
_name: str = None # name of the node, used for display
|
|
38
45
|
_schema_callback: Optional[SingleExecutionFuture] = None # Function that calculates the schema without executing
|
|
39
46
|
_state_needs_reset: bool = False
|
|
40
47
|
_fetch_cached_df: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter | ExternalCloudWriter] = None
|
|
41
48
|
_cache_progress: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter | ExternalCloudWriter] = None
|
|
42
49
|
|
|
50
|
+
def __init__(self, node_id: Union[str, int], function: Callable,
|
|
51
|
+
parent_uuid: str,
|
|
52
|
+
setting_input: Any,
|
|
53
|
+
name: str,
|
|
54
|
+
node_type: str,
|
|
55
|
+
input_columns: List[str] = None,
|
|
56
|
+
output_schema: List[FlowfileColumn] = None,
|
|
57
|
+
drop_columns: List[str] = None,
|
|
58
|
+
renew_schema: bool = True,
|
|
59
|
+
pos_x: float = 0,
|
|
60
|
+
pos_y: float = 0,
|
|
61
|
+
schema_callback: Callable = None,
|
|
62
|
+
):
|
|
63
|
+
"""Initializes a FlowNode instance.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
node_id: Unique identifier for the node.
|
|
67
|
+
function: The core data processing function for the node.
|
|
68
|
+
parent_uuid: The UUID of the parent flow.
|
|
69
|
+
setting_input: The configuration/settings object for the node.
|
|
70
|
+
name: The name of the node.
|
|
71
|
+
node_type: The type identifier of the node (e.g., 'join', 'filter').
|
|
72
|
+
input_columns: List of column names expected as input.
|
|
73
|
+
output_schema: The schema of the columns to be added.
|
|
74
|
+
drop_columns: List of column names to be dropped.
|
|
75
|
+
renew_schema: Flag to indicate if the schema should be renewed.
|
|
76
|
+
pos_x: The x-coordinate on the canvas.
|
|
77
|
+
pos_y: The y-coordinate on the canvas.
|
|
78
|
+
schema_callback: A custom function to calculate the output schema.
|
|
79
|
+
"""
|
|
80
|
+
self._name = None
|
|
81
|
+
self.parent_uuid = parent_uuid
|
|
82
|
+
self.post_init()
|
|
83
|
+
self.active = True
|
|
84
|
+
self.node_information.id = node_id
|
|
85
|
+
self.node_type = node_type
|
|
86
|
+
self.node_settings.renew_schema = renew_schema
|
|
87
|
+
self.update_node(function=function,
|
|
88
|
+
input_columns=input_columns,
|
|
89
|
+
output_schema=output_schema,
|
|
90
|
+
drop_columns=drop_columns,
|
|
91
|
+
setting_input=setting_input,
|
|
92
|
+
name=name,
|
|
93
|
+
pos_x=pos_x,
|
|
94
|
+
pos_y=pos_y,
|
|
95
|
+
schema_callback=schema_callback,
|
|
96
|
+
)
|
|
97
|
+
|
|
43
98
|
def post_init(self):
|
|
99
|
+
"""Initializes or resets the node's attributes to their default states."""
|
|
44
100
|
self.node_inputs = NodeStepInputs()
|
|
45
101
|
self.node_stats = NodeStepStats()
|
|
46
102
|
self.node_settings = NodeStepSettings()
|
|
@@ -54,19 +110,32 @@ class FlowNode:
|
|
|
54
110
|
self._state_needs_reset = False
|
|
55
111
|
|
|
56
112
|
@property
|
|
57
|
-
def state_needs_reset(self):
|
|
113
|
+
def state_needs_reset(self) -> bool:
|
|
114
|
+
"""Checks if the node's state needs to be reset.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
True if a reset is required, False otherwise.
|
|
118
|
+
"""
|
|
58
119
|
return self._state_needs_reset
|
|
59
120
|
|
|
60
121
|
@state_needs_reset.setter
|
|
61
122
|
def state_needs_reset(self, v: bool):
|
|
123
|
+
"""Sets the flag indicating that the node's state needs to be reset.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
v: The boolean value to set.
|
|
127
|
+
"""
|
|
62
128
|
self._state_needs_reset = v
|
|
63
129
|
|
|
64
130
|
@staticmethod
|
|
65
131
|
def create_schema_callback_from_function(f: Callable) -> Callable[[], List[FlowfileColumn]]:
|
|
66
|
-
"""
|
|
67
|
-
|
|
68
|
-
:
|
|
69
|
-
|
|
132
|
+
"""Wraps a node's function to create a schema callback that extracts the schema.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
f: The node's core function that returns a FlowDataEngine instance.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
A callable that, when executed, returns the output schema.
|
|
70
139
|
"""
|
|
71
140
|
def schema_callback() -> List[FlowfileColumn]:
|
|
72
141
|
try:
|
|
@@ -79,6 +148,13 @@ class FlowNode:
|
|
|
79
148
|
|
|
80
149
|
@property
|
|
81
150
|
def schema_callback(self) -> SingleExecutionFuture:
|
|
151
|
+
"""Gets the schema callback function, creating one if it doesn't exist.
|
|
152
|
+
|
|
153
|
+
The callback is used for predicting the output schema without full execution.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
A SingleExecutionFuture instance wrapping the schema function.
|
|
157
|
+
"""
|
|
82
158
|
if self._schema_callback is None:
|
|
83
159
|
if self.user_provided_schema_callback is not None:
|
|
84
160
|
self.schema_callback = self.user_provided_schema_callback
|
|
@@ -88,6 +164,11 @@ class FlowNode:
|
|
|
88
164
|
|
|
89
165
|
@schema_callback.setter
|
|
90
166
|
def schema_callback(self, f: Callable):
|
|
167
|
+
"""Sets the schema callback function for the node.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
f: The function to be used for schema calculation.
|
|
171
|
+
"""
|
|
91
172
|
if f is None:
|
|
92
173
|
return
|
|
93
174
|
|
|
@@ -101,9 +182,24 @@ class FlowNode:
|
|
|
101
182
|
|
|
102
183
|
@property
|
|
103
184
|
def is_start(self) -> bool:
|
|
185
|
+
"""Determines if the node is a starting node in the flow.
|
|
186
|
+
|
|
187
|
+
A starting node requires no inputs.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
True if the node is a start node, False otherwise.
|
|
191
|
+
"""
|
|
104
192
|
return not self.has_input and self.node_template.input == 0
|
|
105
193
|
|
|
106
194
|
def get_input_type(self, node_id: int) -> List:
|
|
195
|
+
"""Gets the type of connection ('main', 'left', 'right') for a given input node ID.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
node_id: The ID of the input node.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
A list of connection types for that node ID.
|
|
202
|
+
"""
|
|
107
203
|
relation_type = []
|
|
108
204
|
if node_id in [n.node_id for n in self.node_inputs.main_inputs]:
|
|
109
205
|
relation_type.append('main')
|
|
@@ -113,36 +209,6 @@ class FlowNode:
|
|
|
113
209
|
relation_type.append('right')
|
|
114
210
|
return list(set(relation_type))
|
|
115
211
|
|
|
116
|
-
def __init__(self, node_id: Union[str, int], function: Callable,
|
|
117
|
-
parent_uuid: str,
|
|
118
|
-
setting_input: Any,
|
|
119
|
-
name: str,
|
|
120
|
-
node_type: str,
|
|
121
|
-
input_columns: List[str] = None,
|
|
122
|
-
output_schema: List[FlowfileColumn] = None,
|
|
123
|
-
drop_columns: List[str] = None,
|
|
124
|
-
renew_schema: bool = True,
|
|
125
|
-
pos_x: float = 0,
|
|
126
|
-
pos_y: float = 0,
|
|
127
|
-
schema_callback: Callable = None,
|
|
128
|
-
):
|
|
129
|
-
self.parent_uuid = parent_uuid
|
|
130
|
-
self.post_init()
|
|
131
|
-
self.active = True
|
|
132
|
-
self.node_information.id = node_id
|
|
133
|
-
self.node_type = node_type
|
|
134
|
-
self.node_settings.renew_schema = renew_schema
|
|
135
|
-
self.update_node(function=function,
|
|
136
|
-
input_columns=input_columns,
|
|
137
|
-
output_schema=output_schema,
|
|
138
|
-
drop_columns=drop_columns,
|
|
139
|
-
setting_input=setting_input,
|
|
140
|
-
name=name,
|
|
141
|
-
pos_x=pos_x,
|
|
142
|
-
pos_y=pos_y,
|
|
143
|
-
schema_callback=schema_callback,
|
|
144
|
-
)
|
|
145
|
-
|
|
146
212
|
def update_node(self,
|
|
147
213
|
function: Callable,
|
|
148
214
|
input_columns: List[str] = None,
|
|
@@ -154,9 +220,24 @@ class FlowNode:
|
|
|
154
220
|
pos_y: float = 0,
|
|
155
221
|
schema_callback: Callable = None,
|
|
156
222
|
):
|
|
223
|
+
"""Updates the properties of the node.
|
|
224
|
+
|
|
225
|
+
This is called during initialization and when settings are changed.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
function: The new core data processing function.
|
|
229
|
+
input_columns: The new list of input columns.
|
|
230
|
+
output_schema: The new schema of added columns.
|
|
231
|
+
drop_columns: The new list of dropped columns.
|
|
232
|
+
name: The new name for the node.
|
|
233
|
+
setting_input: The new settings object.
|
|
234
|
+
pos_x: The new x-coordinate.
|
|
235
|
+
pos_y: The new y-coordinate.
|
|
236
|
+
schema_callback: The new custom schema callback function.
|
|
237
|
+
"""
|
|
157
238
|
self.user_provided_schema_callback = schema_callback
|
|
158
|
-
self.node_information.y_position = pos_y
|
|
159
|
-
self.node_information.x_position = pos_x
|
|
239
|
+
self.node_information.y_position = int(pos_y)
|
|
240
|
+
self.node_information.x_position = int(pos_x)
|
|
160
241
|
self.node_information.setting_input = setting_input
|
|
161
242
|
self.name = self.node_type if name is None else name
|
|
162
243
|
self._function = function
|
|
@@ -178,20 +259,40 @@ class FlowNode:
|
|
|
178
259
|
self.setting_input = setting_input # wait until the end so that the hash is calculated correctly
|
|
179
260
|
|
|
180
261
|
@property
|
|
181
|
-
def name(self):
|
|
262
|
+
def name(self) -> str:
|
|
263
|
+
"""Gets the name of the node.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
The node's name.
|
|
267
|
+
"""
|
|
182
268
|
return self._name
|
|
183
269
|
|
|
184
270
|
@name.setter
|
|
185
271
|
def name(self, name: str):
|
|
272
|
+
"""Sets the name of the node.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
name: The new name.
|
|
276
|
+
"""
|
|
186
277
|
self._name = name
|
|
187
278
|
self.__name__ = name
|
|
188
279
|
|
|
189
280
|
@property
|
|
190
|
-
def setting_input(self):
|
|
281
|
+
def setting_input(self) -> Any:
|
|
282
|
+
"""Gets the node's specific configuration settings.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
The settings object.
|
|
286
|
+
"""
|
|
191
287
|
return self._setting_input
|
|
192
288
|
|
|
193
289
|
@setting_input.setter
|
|
194
290
|
def setting_input(self, setting_input: Any):
|
|
291
|
+
"""Sets the node's configuration and triggers a reset if necessary.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
setting_input: The new settings object.
|
|
295
|
+
"""
|
|
195
296
|
is_manual_input = (self.node_type == 'manual_input' and
|
|
196
297
|
isinstance(setting_input, input_schema.NodeManualInput) and
|
|
197
298
|
isinstance(self._setting_input, input_schema.NodeManualInput)
|
|
@@ -209,24 +310,48 @@ class FlowNode:
|
|
|
209
310
|
self.reset()
|
|
210
311
|
|
|
211
312
|
@property
|
|
212
|
-
def node_id(self):
|
|
313
|
+
def node_id(self) -> Union[str, int]:
|
|
314
|
+
"""Gets the unique identifier of the node.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
The node's ID.
|
|
318
|
+
"""
|
|
213
319
|
return self.node_information.id
|
|
214
320
|
|
|
215
321
|
@property
|
|
216
|
-
def left_input(self):
|
|
322
|
+
def left_input(self) -> Optional["FlowNode"]:
|
|
323
|
+
"""Gets the node connected to the left input port.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
The left input FlowNode, or None.
|
|
327
|
+
"""
|
|
217
328
|
return self.node_inputs.left_input
|
|
218
329
|
|
|
219
330
|
@property
|
|
220
|
-
def right_input(self):
|
|
331
|
+
def right_input(self) -> Optional["FlowNode"]:
|
|
332
|
+
"""Gets the node connected to the right input port.
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
The right input FlowNode, or None.
|
|
336
|
+
"""
|
|
221
337
|
return self.node_inputs.right_input
|
|
222
338
|
|
|
223
339
|
@property
|
|
224
340
|
def main_input(self) -> List["FlowNode"]:
|
|
341
|
+
"""Gets the list of nodes connected to the main input port(s).
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
A list of main input FlowNodes.
|
|
345
|
+
"""
|
|
225
346
|
return self.node_inputs.main_inputs
|
|
226
347
|
|
|
227
348
|
@property
|
|
228
|
-
def is_correct(self):
|
|
229
|
-
|
|
349
|
+
def is_correct(self) -> bool:
|
|
350
|
+
"""Checks if the node's input connections satisfy its template requirements.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
True if connections are valid, False otherwise.
|
|
354
|
+
"""
|
|
230
355
|
if isinstance(self.setting_input, input_schema.NodePromise):
|
|
231
356
|
return False
|
|
232
357
|
return (self.node_template.input == len(self.node_inputs.get_all_inputs()) or
|
|
@@ -234,6 +359,10 @@ class FlowNode:
|
|
|
234
359
|
(self.node_template.multi and self.node_template.can_be_start))
|
|
235
360
|
|
|
236
361
|
def set_node_information(self):
|
|
362
|
+
"""Populates the `node_information` attribute with the current state.
|
|
363
|
+
|
|
364
|
+
This includes the node's connections, settings, and position.
|
|
365
|
+
"""
|
|
237
366
|
logger.info('setting node information')
|
|
238
367
|
node_information = self.node_information
|
|
239
368
|
node_information.left_input_id = self.node_inputs.left_input.node_id if self.left_input else None
|
|
@@ -248,43 +377,76 @@ class FlowNode:
|
|
|
248
377
|
node_information.type = self.node_type
|
|
249
378
|
|
|
250
379
|
def get_node_information(self) -> schemas.NodeInformation:
|
|
380
|
+
"""Updates and returns the node's information object.
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
The `NodeInformation` object for this node.
|
|
384
|
+
"""
|
|
251
385
|
self.set_node_information()
|
|
252
386
|
return self.node_information
|
|
253
387
|
|
|
254
388
|
@property
|
|
255
|
-
def function(self):
|
|
389
|
+
def function(self) -> Callable:
|
|
390
|
+
"""Gets the core processing function of the node.
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
The callable function.
|
|
394
|
+
"""
|
|
256
395
|
return self._function
|
|
257
396
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
397
|
+
@function.setter
|
|
398
|
+
def function(self, function: Callable):
|
|
399
|
+
"""Sets the core processing function of the node.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
function: The new callable function.
|
|
403
|
+
"""
|
|
404
|
+
self._function = function
|
|
266
405
|
|
|
267
406
|
@property
|
|
268
407
|
def all_inputs(self) -> List["FlowNode"]:
|
|
408
|
+
"""Gets a list of all nodes connected to any input port.
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
A list of all input FlowNodes.
|
|
412
|
+
"""
|
|
269
413
|
return self.node_inputs.get_all_inputs()
|
|
270
414
|
|
|
271
|
-
def calculate_hash(self, setting_input: Any):
|
|
415
|
+
def calculate_hash(self, setting_input: Any) -> str:
|
|
416
|
+
"""Calculates a hash based on settings and input node hashes.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
setting_input: The node's settings object to be included in the hash.
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
A string hash value.
|
|
423
|
+
"""
|
|
272
424
|
depends_on_hashes = [_node.hash for _node in self.all_inputs]
|
|
273
425
|
node_data_hash = get_hash(setting_input)
|
|
274
426
|
return get_hash(depends_on_hashes + [node_data_hash, self.parent_uuid])
|
|
275
427
|
|
|
276
428
|
@property
|
|
277
|
-
def hash(self):
|
|
429
|
+
def hash(self) -> str:
|
|
430
|
+
"""Gets the cached hash for the node, calculating it if it doesn't exist.
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
The string hash value.
|
|
434
|
+
"""
|
|
278
435
|
if not self._hash:
|
|
279
436
|
self._hash = self.calculate_hash(self.setting_input)
|
|
280
437
|
return self._hash
|
|
281
438
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
439
|
+
def add_node_connection(self, from_node: "FlowNode",
|
|
440
|
+
insert_type: Literal['main', 'left', 'right'] = 'main') -> None:
|
|
441
|
+
"""Adds a connection from a source node to this node.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
from_node: The node to connect from.
|
|
445
|
+
insert_type: The type of input to connect to ('main', 'left', 'right').
|
|
286
446
|
|
|
287
|
-
|
|
447
|
+
Raises:
|
|
448
|
+
Exception: If the insert_type is invalid.
|
|
449
|
+
"""
|
|
288
450
|
from_node.leads_to_nodes.append(self)
|
|
289
451
|
if insert_type == 'main':
|
|
290
452
|
if self.node_template.input <= 2 or self.node_inputs.main_inputs is None:
|
|
@@ -303,20 +465,39 @@ class FlowNode:
|
|
|
303
465
|
self.reset()
|
|
304
466
|
from_node.reset()
|
|
305
467
|
|
|
306
|
-
def evaluate_nodes(self, deep: bool = False):
|
|
468
|
+
def evaluate_nodes(self, deep: bool = False) -> None:
|
|
469
|
+
"""Triggers a state reset for all directly connected downstream nodes.
|
|
470
|
+
|
|
471
|
+
Args:
|
|
472
|
+
deep: If True, the reset propagates recursively through the entire downstream graph.
|
|
473
|
+
"""
|
|
307
474
|
for node in self.leads_to_nodes:
|
|
308
475
|
self.print(f'resetting node: {node.node_id}')
|
|
309
476
|
node.reset(deep)
|
|
310
477
|
|
|
311
|
-
def get_flow_file_column_schema(self, col_name: str) -> FlowfileColumn:
|
|
478
|
+
def get_flow_file_column_schema(self, col_name: str) -> FlowfileColumn | None:
|
|
479
|
+
"""Retrieves the schema for a specific column from the output schema.
|
|
480
|
+
|
|
481
|
+
Args:
|
|
482
|
+
col_name: The name of the column.
|
|
483
|
+
|
|
484
|
+
Returns:
|
|
485
|
+
The FlowfileColumn object for that column, or None if not found.
|
|
486
|
+
"""
|
|
312
487
|
for s in self.schema:
|
|
313
488
|
if s.column_name == col_name:
|
|
314
489
|
return s
|
|
315
490
|
|
|
316
|
-
def get_predicted_schema(self, force: bool = False):
|
|
317
|
-
"""
|
|
318
|
-
|
|
319
|
-
|
|
491
|
+
def get_predicted_schema(self, force: bool = False) -> List[FlowfileColumn] | None:
|
|
492
|
+
"""Predicts the output schema of the node without full execution.
|
|
493
|
+
|
|
494
|
+
It uses the schema_callback or infers from predicted data.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
force: If True, forces recalculation even if a predicted schema exists.
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
A list of FlowfileColumn objects representing the predicted schema.
|
|
320
501
|
"""
|
|
321
502
|
if self.node_schema.predicted_schema and not force:
|
|
322
503
|
return self.node_schema.predicted_schema
|
|
@@ -338,6 +519,11 @@ class FlowNode:
|
|
|
338
519
|
|
|
339
520
|
@property
|
|
340
521
|
def is_setup(self) -> bool:
|
|
522
|
+
"""Checks if the node has been properly configured and is ready for execution.
|
|
523
|
+
|
|
524
|
+
Returns:
|
|
525
|
+
True if the node is set up, False otherwise.
|
|
526
|
+
"""
|
|
341
527
|
if not self.node_information.is_setup:
|
|
342
528
|
if self.function.__name__ != 'placeholder':
|
|
343
529
|
self.node_information.is_setup = True
|
|
@@ -345,9 +531,24 @@ class FlowNode:
|
|
|
345
531
|
return self.node_information.is_setup
|
|
346
532
|
|
|
347
533
|
def print(self, v: Any):
|
|
534
|
+
"""Helper method to log messages with node context.
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
v: The message or value to log.
|
|
538
|
+
"""
|
|
348
539
|
logger.info(f'{self.node_type}, node_id: {self.node_id}: {v}')
|
|
349
540
|
|
|
350
541
|
def get_resulting_data(self) -> FlowDataEngine | None:
|
|
542
|
+
"""Executes the node's function to produce the actual output data.
|
|
543
|
+
|
|
544
|
+
Handles both regular functions and external data sources.
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
A FlowDataEngine instance containing the result, or None on error.
|
|
548
|
+
|
|
549
|
+
Raises:
|
|
550
|
+
Exception: Propagates exceptions from the node's function execution.
|
|
551
|
+
"""
|
|
351
552
|
if self.is_setup:
|
|
352
553
|
if self.results.resulting_data is None and self.results.errors is None:
|
|
353
554
|
self.print('getting resulting data')
|
|
@@ -375,6 +576,13 @@ class FlowNode:
|
|
|
375
576
|
return self.results.resulting_data
|
|
376
577
|
|
|
377
578
|
def _predicted_data_getter(self) -> FlowDataEngine | None:
|
|
579
|
+
"""Internal helper to get a predicted data result.
|
|
580
|
+
|
|
581
|
+
This calls the function with predicted data from input nodes.
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
A FlowDataEngine instance with predicted data, or an empty one on error.
|
|
585
|
+
"""
|
|
378
586
|
try:
|
|
379
587
|
fl = self._function(*[v.get_predicted_resulting_data() for v in self.all_inputs])
|
|
380
588
|
return fl
|
|
@@ -391,6 +599,13 @@ class FlowNode:
|
|
|
391
599
|
logger.warning(e)
|
|
392
600
|
|
|
393
601
|
def get_predicted_resulting_data(self) -> FlowDataEngine:
|
|
602
|
+
"""Creates a `FlowDataEngine` instance based on the predicted schema.
|
|
603
|
+
|
|
604
|
+
This avoids executing the node's full logic.
|
|
605
|
+
|
|
606
|
+
Returns:
|
|
607
|
+
A FlowDataEngine instance with a schema but no data.
|
|
608
|
+
"""
|
|
394
609
|
if self.needs_run(False) and self.schema_callback is not None or self.node_schema.result_schema is not None:
|
|
395
610
|
self.print('Getting data based on the schema')
|
|
396
611
|
|
|
@@ -404,17 +619,28 @@ class FlowNode:
|
|
|
404
619
|
return fl
|
|
405
620
|
|
|
406
621
|
def add_lead_to_in_depend_source(self):
|
|
622
|
+
"""Ensures this node is registered in the `leads_to_nodes` list of its inputs."""
|
|
407
623
|
for input_node in self.all_inputs:
|
|
408
624
|
if self.node_id not in [n.node_id for n in input_node.leads_to_nodes]:
|
|
409
625
|
input_node.leads_to_nodes.append(self)
|
|
410
626
|
|
|
411
627
|
def get_all_dependent_nodes(self) -> Generator["FlowNode", None, None]:
|
|
628
|
+
"""Yields all downstream nodes recursively.
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
A generator of all dependent FlowNode objects.
|
|
632
|
+
"""
|
|
412
633
|
for node in self.leads_to_nodes:
|
|
413
634
|
yield node
|
|
414
635
|
for n in node.get_all_dependent_nodes():
|
|
415
636
|
yield n
|
|
416
637
|
|
|
417
638
|
def get_all_dependent_node_ids(self) -> Generator[int, None, None]:
|
|
639
|
+
"""Yields the IDs of all downstream nodes recursively.
|
|
640
|
+
|
|
641
|
+
Returns:
|
|
642
|
+
A generator of all dependent node IDs.
|
|
643
|
+
"""
|
|
418
644
|
for node in self.leads_to_nodes:
|
|
419
645
|
yield node.node_id
|
|
420
646
|
for n in node.get_all_dependent_node_ids():
|
|
@@ -422,6 +648,13 @@ class FlowNode:
|
|
|
422
648
|
|
|
423
649
|
@property
|
|
424
650
|
def schema(self) -> List[FlowfileColumn]:
|
|
651
|
+
"""Gets the definitive output schema of the node.
|
|
652
|
+
|
|
653
|
+
If not already run, it falls back to the predicted schema.
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
A list of FlowfileColumn objects.
|
|
657
|
+
"""
|
|
425
658
|
try:
|
|
426
659
|
if self.is_setup and self.results.errors is None:
|
|
427
660
|
if self.node_schema.result_schema is not None and len(self.node_schema.result_schema) > 0:
|
|
@@ -434,31 +667,42 @@ class FlowNode:
|
|
|
434
667
|
return self.node_schema.result_schema
|
|
435
668
|
else:
|
|
436
669
|
return []
|
|
437
|
-
except:
|
|
670
|
+
except Exception as e:
|
|
671
|
+
logger.error(e)
|
|
438
672
|
return []
|
|
439
673
|
|
|
440
|
-
def load_from_cache(self) -> FlowDataEngine:
|
|
441
|
-
if results_exists(self.hash):
|
|
442
|
-
try:
|
|
443
|
-
return FlowDataEngine(self._fetch_cached_df.get_result())
|
|
444
|
-
except Exception as e:
|
|
445
|
-
logger.error(e)
|
|
446
|
-
|
|
447
674
|
def remove_cache(self):
|
|
675
|
+
"""Removes cached results for this node.
|
|
676
|
+
|
|
677
|
+
Note: Currently not fully implemented.
|
|
678
|
+
"""
|
|
679
|
+
|
|
448
680
|
if results_exists(self.hash):
|
|
449
681
|
logger.warning('Not implemented')
|
|
450
682
|
|
|
451
683
|
def needs_run(self, performance_mode: bool, node_logger: NodeLogger = None,
|
|
452
684
|
execution_location: schemas.ExecutionLocationsLiteral = "auto") -> bool:
|
|
453
|
-
if
|
|
685
|
+
"""Determines if the node needs to be executed.
|
|
686
|
+
|
|
687
|
+
The decision is based on its run state, caching settings, and execution mode.
|
|
688
|
+
|
|
689
|
+
Args:
|
|
690
|
+
performance_mode: True if the flow is in performance mode.
|
|
691
|
+
node_logger: The logger instance for this node.
|
|
692
|
+
execution_location: The target execution location.
|
|
693
|
+
|
|
694
|
+
Returns:
|
|
695
|
+
True if the node should be run, False otherwise.
|
|
696
|
+
"""
|
|
697
|
+
if execution_location == "local" or SINGLE_FILE_MODE:
|
|
454
698
|
return False
|
|
699
|
+
|
|
455
700
|
flow_logger = logger if node_logger is None else node_logger
|
|
456
701
|
cache_result_exists = results_exists(self.hash)
|
|
457
702
|
if not self.node_stats.has_run_with_current_setup:
|
|
458
703
|
flow_logger.info('Node has not run, needs to run')
|
|
459
704
|
return True
|
|
460
705
|
if self.node_settings.cache_results and cache_result_exists:
|
|
461
|
-
|
|
462
706
|
return False
|
|
463
707
|
elif self.node_settings.cache_results and not cache_result_exists:
|
|
464
708
|
return True
|
|
@@ -468,9 +712,44 @@ class FlowNode:
|
|
|
468
712
|
return True
|
|
469
713
|
|
|
470
714
|
def __call__(self, *args, **kwargs):
|
|
715
|
+
"""Makes the node instance callable, acting as an alias for execute_node."""
|
|
471
716
|
self.execute_node(*args, **kwargs)
|
|
472
717
|
|
|
718
|
+
def execute_full_local(self, performance_mode: bool = False) -> None:
|
|
719
|
+
"""Executes the node's logic locally, including example data generation.
|
|
720
|
+
|
|
721
|
+
Args:
|
|
722
|
+
performance_mode: If True, skips generating example data.
|
|
723
|
+
|
|
724
|
+
Raises:
|
|
725
|
+
Exception: Propagates exceptions from the execution.
|
|
726
|
+
"""
|
|
727
|
+
def example_data_generator():
|
|
728
|
+
example_data = None
|
|
729
|
+
|
|
730
|
+
def get_example_data():
|
|
731
|
+
nonlocal example_data
|
|
732
|
+
if example_data is None:
|
|
733
|
+
example_data = resulting_data.get_sample(100).to_arrow()
|
|
734
|
+
return example_data
|
|
735
|
+
return get_example_data
|
|
736
|
+
resulting_data = self.get_resulting_data()
|
|
737
|
+
|
|
738
|
+
if not performance_mode:
|
|
739
|
+
self.results.example_data_generator = example_data_generator()
|
|
740
|
+
self.node_schema.result_schema = self.results.resulting_data.schema
|
|
741
|
+
self.node_stats.has_completed_last_run = True
|
|
742
|
+
|
|
473
743
|
def execute_local(self, flow_id: int, performance_mode: bool = False):
|
|
744
|
+
"""Executes the node's logic locally.
|
|
745
|
+
|
|
746
|
+
Args:
|
|
747
|
+
flow_id: The ID of the parent flow.
|
|
748
|
+
performance_mode: If True, skips generating example data.
|
|
749
|
+
|
|
750
|
+
Raises:
|
|
751
|
+
Exception: Propagates exceptions from the execution.
|
|
752
|
+
"""
|
|
474
753
|
try:
|
|
475
754
|
resulting_data = self.get_resulting_data()
|
|
476
755
|
if not performance_mode:
|
|
@@ -495,7 +774,15 @@ class FlowNode:
|
|
|
495
774
|
step.node_settings.streamable = self.node_settings.streamable
|
|
496
775
|
|
|
497
776
|
def execute_remote(self, performance_mode: bool = False, node_logger: NodeLogger = None):
|
|
498
|
-
|
|
777
|
+
"""Executes the node's logic remotely or handles cached results.
|
|
778
|
+
|
|
779
|
+
Args:
|
|
780
|
+
performance_mode: If True, skips generating example data.
|
|
781
|
+
node_logger: The logger for this node execution.
|
|
782
|
+
|
|
783
|
+
Raises:
|
|
784
|
+
Exception: If the node_logger is not provided or if execution fails.
|
|
785
|
+
"""
|
|
499
786
|
if node_logger is None:
|
|
500
787
|
raise Exception('Node logger is not defined')
|
|
501
788
|
if self.node_settings.cache_results and results_exists(self.hash):
|
|
@@ -552,11 +839,15 @@ class FlowNode:
|
|
|
552
839
|
self._fetch_cached_df = None
|
|
553
840
|
|
|
554
841
|
def prepare_before_run(self):
|
|
842
|
+
"""Resets results and errors before a new execution."""
|
|
843
|
+
|
|
555
844
|
self.results.errors = None
|
|
556
845
|
self.results.resulting_data = None
|
|
557
846
|
self.results.example_data = None
|
|
558
847
|
|
|
559
848
|
def cancel(self):
|
|
849
|
+
"""Cancels an ongoing external process if one is running."""
|
|
850
|
+
|
|
560
851
|
if self._fetch_cached_df is not None:
|
|
561
852
|
self._fetch_cached_df.cancel()
|
|
562
853
|
self.node_stats.is_canceled = True
|
|
@@ -566,6 +857,18 @@ class FlowNode:
|
|
|
566
857
|
|
|
567
858
|
def execute_node(self, run_location: schemas.ExecutionLocationsLiteral, reset_cache: bool = False,
|
|
568
859
|
performance_mode: bool = False, retry: bool = True, node_logger: NodeLogger = None):
|
|
860
|
+
"""Orchestrates the execution, handling location, caching, and retries.
|
|
861
|
+
|
|
862
|
+
Args:
|
|
863
|
+
run_location: The location for execution ('local', 'remote').
|
|
864
|
+
reset_cache: If True, forces removal of any existing cache.
|
|
865
|
+
performance_mode: If True, optimizes for speed over diagnostics.
|
|
866
|
+
retry: If True, allows retrying execution on recoverable errors.
|
|
867
|
+
node_logger: The logger for this node execution.
|
|
868
|
+
|
|
869
|
+
Raises:
|
|
870
|
+
Exception: If the node_logger is not defined.
|
|
871
|
+
"""
|
|
569
872
|
if node_logger is None:
|
|
570
873
|
raise Exception('Flow logger is not defined')
|
|
571
874
|
# node_logger = flow_logger.get_node_logger(self.node_id)
|
|
@@ -575,7 +878,8 @@ class FlowNode:
|
|
|
575
878
|
self.node_stats.has_completed_last_run = False
|
|
576
879
|
if self.is_setup:
|
|
577
880
|
node_logger.info(f'Starting to run {self.__name__}')
|
|
578
|
-
if self.needs_run(performance_mode, node_logger, run_location)
|
|
881
|
+
if (self.needs_run(performance_mode, node_logger, run_location) or self.node_template.node_group == "output"
|
|
882
|
+
and not (run_location == 'local' or SINGLE_FILE_MODE)):
|
|
579
883
|
self.prepare_before_run()
|
|
580
884
|
try:
|
|
581
885
|
if ((run_location == 'remote' or (self.node_default.transform_type == 'wide')
|
|
@@ -605,13 +909,28 @@ class FlowNode:
|
|
|
605
909
|
else:
|
|
606
910
|
self.results.errors = str(e)
|
|
607
911
|
node_logger.error(f'Error with running the node: {e}')
|
|
608
|
-
|
|
912
|
+
elif ((run_location == 'local' or SINGLE_FILE_MODE) and
|
|
913
|
+
(not self.node_stats.has_run_with_current_setup or self.node_template.node_group == "output")):
|
|
914
|
+
try:
|
|
915
|
+
node_logger.info('Executing fully locally')
|
|
916
|
+
self.execute_full_local(performance_mode)
|
|
917
|
+
except Exception as e:
|
|
918
|
+
self.results.errors = str(e)
|
|
919
|
+
node_logger.error(f'Error with running the node: {e}')
|
|
920
|
+
self.node_stats.error = str(e)
|
|
921
|
+
self.node_stats.has_completed_last_run = False
|
|
922
|
+
self.node_stats.has_run_with_current_setup = True
|
|
609
923
|
else:
|
|
610
924
|
node_logger.info('Node has already run, not running the node')
|
|
611
925
|
else:
|
|
612
926
|
node_logger.warning(f'Node {self.__name__} is not setup, cannot run the node')
|
|
613
927
|
|
|
614
928
|
def store_example_data_generator(self, external_df_fetcher: ExternalDfFetcher | ExternalSampler):
|
|
929
|
+
"""Stores a generator function for fetching a sample of the result data.
|
|
930
|
+
|
|
931
|
+
Args:
|
|
932
|
+
external_df_fetcher: The process that generated the sample data.
|
|
933
|
+
"""
|
|
615
934
|
if external_df_fetcher.status is not None:
|
|
616
935
|
file_ref = external_df_fetcher.status.file_ref
|
|
617
936
|
self.results.example_data_path = file_ref
|
|
@@ -620,9 +939,21 @@ class FlowNode:
|
|
|
620
939
|
logger.error('Could not get the sample data, the external process is not ready')
|
|
621
940
|
|
|
622
941
|
def needs_reset(self) -> bool:
|
|
942
|
+
"""Checks if the node's hash has changed, indicating an outdated state.
|
|
943
|
+
|
|
944
|
+
Returns:
|
|
945
|
+
True if the calculated hash differs from the stored hash.
|
|
946
|
+
"""
|
|
623
947
|
return self._hash != self.calculate_hash(self.setting_input)
|
|
624
948
|
|
|
625
949
|
def reset(self, deep: bool = False):
|
|
950
|
+
"""Resets the node's execution state and schema information.
|
|
951
|
+
|
|
952
|
+
This also triggers a reset on all downstream nodes.
|
|
953
|
+
|
|
954
|
+
Args:
|
|
955
|
+
deep: If True, forces a reset even if the hash hasn't changed.
|
|
956
|
+
"""
|
|
626
957
|
needs_reset = self.needs_reset() or deep
|
|
627
958
|
if needs_reset:
|
|
628
959
|
logger.info(f'{self.node_id}: Node needs reset')
|
|
@@ -637,10 +968,19 @@ class FlowNode:
|
|
|
637
968
|
self.node_schema.predicted_schema = None
|
|
638
969
|
self._hash = None
|
|
639
970
|
self.node_information.is_setup = None
|
|
971
|
+
self.results.errors = None
|
|
640
972
|
self.evaluate_nodes()
|
|
641
973
|
_ = self.hash # Recalculate the hash after reset
|
|
642
974
|
|
|
643
975
|
def delete_lead_to_node(self, node_id: int) -> bool:
|
|
976
|
+
"""Removes a connection to a specific downstream node.
|
|
977
|
+
|
|
978
|
+
Args:
|
|
979
|
+
node_id: The ID of the downstream node to disconnect.
|
|
980
|
+
|
|
981
|
+
Returns:
|
|
982
|
+
True if the connection was found and removed, False otherwise.
|
|
983
|
+
"""
|
|
644
984
|
logger.info(f'Deleting lead to node: {node_id}')
|
|
645
985
|
for i, lead_to_node in enumerate(self.leads_to_nodes):
|
|
646
986
|
logger.info(f'Checking lead to node: {lead_to_node.node_id}')
|
|
@@ -652,7 +992,16 @@ class FlowNode:
|
|
|
652
992
|
|
|
653
993
|
def delete_input_node(self, node_id: int, connection_type: input_schema.InputConnectionClass = 'input-0',
|
|
654
994
|
complete: bool = False) -> bool:
|
|
655
|
-
|
|
995
|
+
"""Removes a connection from a specific input node.
|
|
996
|
+
|
|
997
|
+
Args:
|
|
998
|
+
node_id: The ID of the input node to disconnect.
|
|
999
|
+
connection_type: The specific input handle (e.g., 'input-0', 'input-1').
|
|
1000
|
+
complete: If True, tries to delete from all input types.
|
|
1001
|
+
|
|
1002
|
+
Returns:
|
|
1003
|
+
True if a connection was found and removed, False otherwise.
|
|
1004
|
+
"""
|
|
656
1005
|
deleted: bool = False
|
|
657
1006
|
if connection_type == 'input-0':
|
|
658
1007
|
for i, node in enumerate(self.node_inputs.main_inputs):
|
|
@@ -675,17 +1024,32 @@ class FlowNode:
|
|
|
675
1024
|
self.reset()
|
|
676
1025
|
return deleted
|
|
677
1026
|
|
|
678
|
-
def __repr__(self):
|
|
1027
|
+
def __repr__(self) -> str:
|
|
1028
|
+
"""Provides a string representation of the FlowNode instance.
|
|
1029
|
+
|
|
1030
|
+
Returns:
|
|
1031
|
+
A string showing the node's ID and type.
|
|
1032
|
+
"""
|
|
679
1033
|
return f"Node id: {self.node_id} ({self.node_type})"
|
|
680
1034
|
|
|
681
|
-
def _get_readable_schema(self):
|
|
1035
|
+
def _get_readable_schema(self) -> List[dict] | None:
|
|
1036
|
+
"""Helper to get a simplified, dictionary representation of the output schema.
|
|
1037
|
+
|
|
1038
|
+
Returns:
|
|
1039
|
+
A list of dictionaries, each with 'column_name' and 'data_type'.
|
|
1040
|
+
"""
|
|
682
1041
|
if self.is_setup:
|
|
683
1042
|
output = []
|
|
684
1043
|
for s in self.schema:
|
|
685
1044
|
output.append(dict(column_name=s.column_name, data_type=s.data_type))
|
|
686
1045
|
return output
|
|
687
1046
|
|
|
688
|
-
def get_repr(self):
|
|
1047
|
+
def get_repr(self) -> dict:
|
|
1048
|
+
"""Gets a detailed dictionary representation of the node's state.
|
|
1049
|
+
|
|
1050
|
+
Returns:
|
|
1051
|
+
A dictionary containing key information about the node.
|
|
1052
|
+
"""
|
|
689
1053
|
return dict(FlowNode=
|
|
690
1054
|
dict(node_id=self.node_id,
|
|
691
1055
|
step_name=self.__name__,
|
|
@@ -693,30 +1057,66 @@ class FlowNode:
|
|
|
693
1057
|
output_schema=self._get_readable_schema()))
|
|
694
1058
|
|
|
695
1059
|
@property
|
|
696
|
-
def number_of_leads_to_nodes(self) -> int:
|
|
1060
|
+
def number_of_leads_to_nodes(self) -> int | None:
|
|
1061
|
+
"""Counts the number of downstream node connections.
|
|
1062
|
+
|
|
1063
|
+
Returns:
|
|
1064
|
+
The number of nodes this node leads to.
|
|
1065
|
+
"""
|
|
697
1066
|
if self.is_setup:
|
|
698
1067
|
return len(self.leads_to_nodes)
|
|
699
1068
|
|
|
700
1069
|
@property
|
|
701
1070
|
def has_next_step(self) -> bool:
|
|
1071
|
+
"""Checks if this node has any downstream connections.
|
|
1072
|
+
|
|
1073
|
+
Returns:
|
|
1074
|
+
True if it has at least one downstream node.
|
|
1075
|
+
"""
|
|
702
1076
|
return len(self.leads_to_nodes) > 0
|
|
703
1077
|
|
|
704
1078
|
@property
|
|
705
1079
|
def has_input(self) -> bool:
|
|
1080
|
+
"""Checks if this node has any input connections.
|
|
1081
|
+
|
|
1082
|
+
Returns:
|
|
1083
|
+
True if it has at least one input node.
|
|
1084
|
+
"""
|
|
706
1085
|
return len(self.all_inputs) > 0
|
|
707
1086
|
|
|
708
1087
|
@property
|
|
709
1088
|
def singular_input(self) -> bool:
|
|
1089
|
+
"""Checks if the node template specifies exactly one input.
|
|
1090
|
+
|
|
1091
|
+
Returns:
|
|
1092
|
+
True if the node is a single-input type.
|
|
1093
|
+
"""
|
|
710
1094
|
return self.node_template.input == 1
|
|
711
1095
|
|
|
712
1096
|
@property
|
|
713
1097
|
def singular_main_input(self) -> "FlowNode":
|
|
1098
|
+
"""Gets the input node, assuming it is a single-input type.
|
|
1099
|
+
|
|
1100
|
+
Returns:
|
|
1101
|
+
The single input FlowNode, or None.
|
|
1102
|
+
"""
|
|
714
1103
|
if self.singular_input:
|
|
715
1104
|
return self.all_inputs[0]
|
|
716
1105
|
|
|
717
1106
|
def get_table_example(self, include_data: bool = False) -> TableExample | None:
|
|
1107
|
+
"""Generates a `TableExample` model summarizing the node's output.
|
|
1108
|
+
|
|
1109
|
+
This can optionally include a sample of the data.
|
|
1110
|
+
|
|
1111
|
+
Args:
|
|
1112
|
+
include_data: If True, includes a data sample in the result.
|
|
1113
|
+
|
|
1114
|
+
Returns:
|
|
1115
|
+
A `TableExample` object, or None if the node is not set up.
|
|
1116
|
+
"""
|
|
718
1117
|
self.print('Getting a table example')
|
|
719
1118
|
if self.is_setup and include_data and self.node_stats.has_completed_last_run:
|
|
1119
|
+
|
|
720
1120
|
if self.node_template.node_group == 'output':
|
|
721
1121
|
self.print('getting the table example')
|
|
722
1122
|
return self.main_input[0].get_table_example(include_data)
|
|
@@ -749,10 +1149,16 @@ class FlowNode:
|
|
|
749
1149
|
table_schema=schema, columns=columns,
|
|
750
1150
|
data=[])
|
|
751
1151
|
|
|
752
|
-
def calculate_settings_out_select(self):
|
|
753
|
-
pass
|
|
754
|
-
|
|
755
1152
|
def get_node_data(self, flow_id: int, include_example: bool = False) -> NodeData:
|
|
1153
|
+
"""Gathers all necessary data for representing the node in the UI.
|
|
1154
|
+
|
|
1155
|
+
Args:
|
|
1156
|
+
flow_id: The ID of the parent flow.
|
|
1157
|
+
include_example: If True, includes data samples.
|
|
1158
|
+
|
|
1159
|
+
Returns:
|
|
1160
|
+
A `NodeData` object.
|
|
1161
|
+
"""
|
|
756
1162
|
node = NodeData(flow_id=flow_id,
|
|
757
1163
|
node_id=self.node_id,
|
|
758
1164
|
has_run=self.node_stats.has_run_with_current_setup,
|
|
@@ -772,15 +1178,30 @@ class FlowNode:
|
|
|
772
1178
|
return node
|
|
773
1179
|
|
|
774
1180
|
def get_output_data(self) -> TableExample:
|
|
1181
|
+
"""Gets the full output data sample for this node.
|
|
1182
|
+
|
|
1183
|
+
Returns:
|
|
1184
|
+
A `TableExample` object with data.
|
|
1185
|
+
"""
|
|
775
1186
|
return self.get_table_example(True)
|
|
776
1187
|
|
|
777
1188
|
def get_node_input(self) -> schemas.NodeInput:
|
|
1189
|
+
"""Creates a `NodeInput` schema object for representing this node in the UI.
|
|
1190
|
+
|
|
1191
|
+
Returns:
|
|
1192
|
+
A `NodeInput` object.
|
|
1193
|
+
"""
|
|
778
1194
|
return schemas.NodeInput(pos_y=self.setting_input.pos_y,
|
|
779
1195
|
pos_x=self.setting_input.pos_x,
|
|
780
1196
|
id=self.node_id,
|
|
781
1197
|
**self.node_template.__dict__)
|
|
782
1198
|
|
|
783
1199
|
def get_edge_input(self) -> List[schemas.NodeEdge]:
|
|
1200
|
+
"""Generates `NodeEdge` objects for all input connections to this node.
|
|
1201
|
+
|
|
1202
|
+
Returns:
|
|
1203
|
+
A list of `NodeEdge` objects.
|
|
1204
|
+
"""
|
|
784
1205
|
edges = []
|
|
785
1206
|
if self.node_inputs.main_inputs is not None:
|
|
786
1207
|
for i, main_input in enumerate(self.node_inputs.main_inputs):
|