Flowfile 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +27 -6
- flowfile/api.py +1 -0
- flowfile/web/__init__.py +2 -2
- flowfile/web/static/assets/{CloudConnectionManager-d004942f.js → CloudConnectionManager-c20a740f.js} +3 -4
- flowfile/web/static/assets/{CloudStorageReader-eccf9fc2.js → CloudStorageReader-960b400a.js} +7 -7
- flowfile/web/static/assets/{CloudStorageWriter-b1ba6bba.js → CloudStorageWriter-e3decbdd.js} +7 -7
- flowfile/web/static/assets/{CrossJoin-68981877.js → CrossJoin-d67e2405.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-0b06649c.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-8349a426.js → DatabaseManager-9ea35e84.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-905344f8.js → DatabaseReader-9578bfa5.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-9f5b8638.js → DatabaseWriter-19531098.js} +9 -9
- flowfile/web/static/assets/{ExploreData-131a6d53.js → ExploreData-40476474.js} +47141 -43697
- flowfile/web/static/assets/{ExternalSource-e3549dcc.js → ExternalSource-2297ef96.js} +6 -6
- flowfile/web/static/assets/{Filter-6e0730ae.js → Filter-f211c03a.js} +8 -8
- flowfile/web/static/assets/{Formula-02f033e6.js → Formula-4207ea31.js} +8 -8
- flowfile/web/static/assets/{FuzzyMatch-54c14036.js → FuzzyMatch-bf120df0.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-08a3f499.js → GraphSolver-5bb7497a.js} +5 -5
- flowfile/web/static/assets/{GroupBy-2ae38139.js → GroupBy-92c81b65.js} +6 -6
- flowfile/web/static/assets/{Join-493b9772.js → Join-4e49a274.js} +9 -9
- flowfile/web/static/assets/{ManualInput-4373d163.js → ManualInput-90998ae8.js} +5 -5
- flowfile/web/static/assets/{Output-b534f3c7.js → Output-81e3e917.js} +4 -4
- flowfile/web/static/assets/{Pivot-2968ff65.js → Pivot-a3419842.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-65136536.js → PolarsCode-72710deb.js} +6 -6
- flowfile/web/static/assets/{Read-c56339ed.js → Read-c4059daf.js} +6 -6
- flowfile/web/static/assets/{RecordCount-1c641a5e.js → RecordCount-c2b5e095.js} +5 -5
- flowfile/web/static/assets/{RecordId-df308b8f.js → RecordId-10baf191.js} +6 -6
- flowfile/web/static/assets/{Sample-293e8a64.js → Sample-3ed9a0ae.js} +5 -5
- flowfile/web/static/assets/{SecretManager-03911655.js → SecretManager-0d49c0e8.js} +2 -2
- flowfile/web/static/assets/{Select-3058a13d.js → Select-8a02a0b3.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-fbf4fb39.js → SettingsSection-4c0f45f5.js} +1 -1
- flowfile/web/static/assets/{Sort-a29bbaf7.js → Sort-f55c9f9d.js} +6 -6
- flowfile/web/static/assets/{TextToRows-c7d7760e.js → TextToRows-5dbc2145.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-118f1d20.js → UnavailableFields-a1768e52.js} +2 -2
- flowfile/web/static/assets/{Union-f0589571.js → Union-f2aefdc9.js} +5 -5
- flowfile/web/static/assets/{Unique-7329a207.js → Unique-46b250da.js} +8 -8
- flowfile/web/static/assets/{Unpivot-30b0be15.js → Unpivot-25ac84cc.js} +5 -5
- flowfile/web/static/assets/{api-fb67319c.js → api-6ef0dcef.js} +1 -1
- flowfile/web/static/assets/{api-602fb95c.js → api-a0abbdc7.js} +1 -1
- flowfile/web/static/assets/{designer-94a6bf4d.js → designer-13eabd83.js} +4 -4
- flowfile/web/static/assets/{documentation-a224831e.js → documentation-b87e7f6f.js} +1 -1
- flowfile/web/static/assets/{dropDown-c2d2aa97.js → dropDown-13564764.js} +1 -1
- flowfile/web/static/assets/{fullEditor-921ac5fd.js → fullEditor-fd2cd6f9.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-7013cc94.js → genericNodeSettings-71e11604.js} +3 -3
- flowfile/web/static/assets/{index-3a75211d.js → index-f6c15e76.js} +46 -22
- flowfile/web/static/assets/{nodeTitle-a63d4680.js → nodeTitle-988d9efe.js} +3 -3
- flowfile/web/static/assets/{secretApi-763aec6e.js → secretApi-dd636aa2.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-08464729.js → selectDynamic-af36165e.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-f15a5f87.js → vue-codemirror.esm-2847001e.js} +1 -1
- flowfile/web/static/assets/{vue-content-loader.es-93bd09d7.js → vue-content-loader.es-0371da73.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/METADATA +2 -2
- {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/RECORD +96 -94
- flowfile_core/__init__.py +1 -0
- flowfile_core/auth/jwt.py +39 -0
- flowfile_core/configs/node_store/nodes.py +1 -0
- flowfile_core/configs/settings.py +6 -5
- flowfile_core/flowfile/code_generator/code_generator.py +71 -0
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +1 -1
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +597 -309
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
- flowfile_core/flowfile/flow_graph.py +619 -191
- flowfile_core/flowfile/flow_graph_utils.py +2 -2
- flowfile_core/flowfile/flow_node/flow_node.py +500 -89
- flowfile_core/flowfile/flow_node/models.py +125 -20
- flowfile_core/flowfile/handler.py +2 -33
- flowfile_core/flowfile/manage/open_flowfile.py +1 -2
- flowfile_core/flowfile/util/calculate_layout.py +0 -2
- flowfile_core/flowfile/utils.py +36 -5
- flowfile_core/main.py +32 -13
- flowfile_core/routes/cloud_connections.py +7 -11
- flowfile_core/routes/logs.py +2 -6
- flowfile_core/routes/public.py +1 -0
- flowfile_core/routes/routes.py +127 -51
- flowfile_core/routes/secrets.py +72 -14
- flowfile_core/schemas/__init__.py +8 -0
- flowfile_core/schemas/input_schema.py +92 -64
- flowfile_core/schemas/output_model.py +19 -3
- flowfile_core/schemas/schemas.py +144 -11
- flowfile_core/schemas/transform_schema.py +82 -17
- flowfile_frame/__init__.py +9 -1
- flowfile_frame/cloud_storage/__init__.py +0 -0
- flowfile_frame/cloud_storage/frame_helpers.py +39 -0
- flowfile_frame/cloud_storage/secret_manager.py +73 -0
- flowfile_frame/expr.py +28 -1
- flowfile_frame/expr.pyi +76 -61
- flowfile_frame/flow_frame.py +232 -110
- flowfile_frame/flow_frame.pyi +140 -91
- flowfile_frame/flow_frame_methods.py +150 -12
- flowfile_frame/group_frame.py +3 -0
- flowfile_frame/utils.py +25 -3
- test_utils/s3/data_generator.py +1 -0
- test_utils/s3/demo_data_generator.py +186 -0
- test_utils/s3/fixtures.py +6 -1
- flowfile_core/schemas/defaults.py +0 -9
- flowfile_core/schemas/models.py +0 -193
- {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
- {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
- {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +0 -0
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
from typing import List, Union, Callable, Any, Optional, Generator, Literal
|
|
3
2
|
from flowfile_core.configs import logger
|
|
4
3
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
@@ -6,6 +5,7 @@ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEng
|
|
|
6
5
|
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
7
6
|
from flowfile_core.schemas import input_schema, schemas
|
|
8
7
|
from flowfile_core.configs.flow_logger import NodeLogger
|
|
8
|
+
from flowfile_core.configs.settings import SINGLE_FILE_MODE
|
|
9
9
|
|
|
10
10
|
from flowfile_core.schemas.output_model import TableExample, FileColumn, NodeData
|
|
11
11
|
from flowfile_core.flowfile.utils import get_hash
|
|
@@ -13,13 +13,19 @@ from flowfile_core.configs.node_store import nodes as node_interface
|
|
|
13
13
|
from flowfile_core.flowfile.setting_generator import setting_generator, setting_updator
|
|
14
14
|
from time import sleep
|
|
15
15
|
from flowfile_core.flowfile.flow_data_engine.subprocess_operations import (
|
|
16
|
-
ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result,
|
|
16
|
+
ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result,
|
|
17
|
+
ExternalDatabaseFetcher, ExternalDatabaseWriter, ExternalCloudWriter)
|
|
17
18
|
from flowfile_core.flowfile.flow_node.models import (NodeStepSettings, NodeStepInputs, NodeSchemaInformation,
|
|
18
19
|
NodeStepStats, NodeResults)
|
|
19
20
|
from flowfile_core.flowfile.flow_node.schema_callback import SingleExecutionFuture
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class FlowNode:
|
|
24
|
+
"""Represents a single node in a data flow graph.
|
|
25
|
+
|
|
26
|
+
This class manages the node's state, its data processing function,
|
|
27
|
+
and its connections to other nodes within the graph.
|
|
28
|
+
"""
|
|
23
29
|
parent_uuid: str
|
|
24
30
|
node_type: str
|
|
25
31
|
node_template: node_interface.NodeTemplate
|
|
@@ -35,12 +41,62 @@ class FlowNode:
|
|
|
35
41
|
_setting_input: Any = None
|
|
36
42
|
_hash: Optional[str] = None # host this for caching results
|
|
37
43
|
_function: Callable = None # the function that needs to be executed when triggered
|
|
44
|
+
_name: str = None # name of the node, used for display
|
|
38
45
|
_schema_callback: Optional[SingleExecutionFuture] = None # Function that calculates the schema without executing
|
|
39
46
|
_state_needs_reset: bool = False
|
|
40
47
|
_fetch_cached_df: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter | ExternalCloudWriter] = None
|
|
41
48
|
_cache_progress: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter | ExternalCloudWriter] = None
|
|
42
49
|
|
|
50
|
+
def __init__(self, node_id: Union[str, int], function: Callable,
|
|
51
|
+
parent_uuid: str,
|
|
52
|
+
setting_input: Any,
|
|
53
|
+
name: str,
|
|
54
|
+
node_type: str,
|
|
55
|
+
input_columns: List[str] = None,
|
|
56
|
+
output_schema: List[FlowfileColumn] = None,
|
|
57
|
+
drop_columns: List[str] = None,
|
|
58
|
+
renew_schema: bool = True,
|
|
59
|
+
pos_x: float = 0,
|
|
60
|
+
pos_y: float = 0,
|
|
61
|
+
schema_callback: Callable = None,
|
|
62
|
+
):
|
|
63
|
+
"""Initializes a FlowNode instance.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
node_id: Unique identifier for the node.
|
|
67
|
+
function: The core data processing function for the node.
|
|
68
|
+
parent_uuid: The UUID of the parent flow.
|
|
69
|
+
setting_input: The configuration/settings object for the node.
|
|
70
|
+
name: The name of the node.
|
|
71
|
+
node_type: The type identifier of the node (e.g., 'join', 'filter').
|
|
72
|
+
input_columns: List of column names expected as input.
|
|
73
|
+
output_schema: The schema of the columns to be added.
|
|
74
|
+
drop_columns: List of column names to be dropped.
|
|
75
|
+
renew_schema: Flag to indicate if the schema should be renewed.
|
|
76
|
+
pos_x: The x-coordinate on the canvas.
|
|
77
|
+
pos_y: The y-coordinate on the canvas.
|
|
78
|
+
schema_callback: A custom function to calculate the output schema.
|
|
79
|
+
"""
|
|
80
|
+
self._name = None
|
|
81
|
+
self.parent_uuid = parent_uuid
|
|
82
|
+
self.post_init()
|
|
83
|
+
self.active = True
|
|
84
|
+
self.node_information.id = node_id
|
|
85
|
+
self.node_type = node_type
|
|
86
|
+
self.node_settings.renew_schema = renew_schema
|
|
87
|
+
self.update_node(function=function,
|
|
88
|
+
input_columns=input_columns,
|
|
89
|
+
output_schema=output_schema,
|
|
90
|
+
drop_columns=drop_columns,
|
|
91
|
+
setting_input=setting_input,
|
|
92
|
+
name=name,
|
|
93
|
+
pos_x=pos_x,
|
|
94
|
+
pos_y=pos_y,
|
|
95
|
+
schema_callback=schema_callback,
|
|
96
|
+
)
|
|
97
|
+
|
|
43
98
|
def post_init(self):
|
|
99
|
+
"""Initializes or resets the node's attributes to their default states."""
|
|
44
100
|
self.node_inputs = NodeStepInputs()
|
|
45
101
|
self.node_stats = NodeStepStats()
|
|
46
102
|
self.node_settings = NodeStepSettings()
|
|
@@ -54,19 +110,32 @@ class FlowNode:
|
|
|
54
110
|
self._state_needs_reset = False
|
|
55
111
|
|
|
56
112
|
@property
|
|
57
|
-
def state_needs_reset(self):
|
|
113
|
+
def state_needs_reset(self) -> bool:
|
|
114
|
+
"""Checks if the node's state needs to be reset.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
True if a reset is required, False otherwise.
|
|
118
|
+
"""
|
|
58
119
|
return self._state_needs_reset
|
|
59
120
|
|
|
60
121
|
@state_needs_reset.setter
|
|
61
122
|
def state_needs_reset(self, v: bool):
|
|
123
|
+
"""Sets the flag indicating that the node's state needs to be reset.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
v: The boolean value to set.
|
|
127
|
+
"""
|
|
62
128
|
self._state_needs_reset = v
|
|
63
129
|
|
|
64
130
|
@staticmethod
|
|
65
131
|
def create_schema_callback_from_function(f: Callable) -> Callable[[], List[FlowfileColumn]]:
|
|
66
|
-
"""
|
|
67
|
-
|
|
68
|
-
:
|
|
69
|
-
|
|
132
|
+
"""Wraps a node's function to create a schema callback that extracts the schema.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
f: The node's core function that returns a FlowDataEngine instance.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
A callable that, when executed, returns the output schema.
|
|
70
139
|
"""
|
|
71
140
|
def schema_callback() -> List[FlowfileColumn]:
|
|
72
141
|
try:
|
|
@@ -79,6 +148,13 @@ class FlowNode:
|
|
|
79
148
|
|
|
80
149
|
@property
|
|
81
150
|
def schema_callback(self) -> SingleExecutionFuture:
|
|
151
|
+
"""Gets the schema callback function, creating one if it doesn't exist.
|
|
152
|
+
|
|
153
|
+
The callback is used for predicting the output schema without full execution.
|
|
154
|
+
|
|
155
|
+
Returns:
|
|
156
|
+
A SingleExecutionFuture instance wrapping the schema function.
|
|
157
|
+
"""
|
|
82
158
|
if self._schema_callback is None:
|
|
83
159
|
if self.user_provided_schema_callback is not None:
|
|
84
160
|
self.schema_callback = self.user_provided_schema_callback
|
|
@@ -88,6 +164,11 @@ class FlowNode:
|
|
|
88
164
|
|
|
89
165
|
@schema_callback.setter
|
|
90
166
|
def schema_callback(self, f: Callable):
|
|
167
|
+
"""Sets the schema callback function for the node.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
f: The function to be used for schema calculation.
|
|
171
|
+
"""
|
|
91
172
|
if f is None:
|
|
92
173
|
return
|
|
93
174
|
|
|
@@ -101,9 +182,24 @@ class FlowNode:
|
|
|
101
182
|
|
|
102
183
|
@property
|
|
103
184
|
def is_start(self) -> bool:
|
|
185
|
+
"""Determines if the node is a starting node in the flow.
|
|
186
|
+
|
|
187
|
+
A starting node requires no inputs.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
True if the node is a start node, False otherwise.
|
|
191
|
+
"""
|
|
104
192
|
return not self.has_input and self.node_template.input == 0
|
|
105
193
|
|
|
106
194
|
def get_input_type(self, node_id: int) -> List:
|
|
195
|
+
"""Gets the type of connection ('main', 'left', 'right') for a given input node ID.
|
|
196
|
+
|
|
197
|
+
Args:
|
|
198
|
+
node_id: The ID of the input node.
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
A list of connection types for that node ID.
|
|
202
|
+
"""
|
|
107
203
|
relation_type = []
|
|
108
204
|
if node_id in [n.node_id for n in self.node_inputs.main_inputs]:
|
|
109
205
|
relation_type.append('main')
|
|
@@ -113,36 +209,6 @@ class FlowNode:
|
|
|
113
209
|
relation_type.append('right')
|
|
114
210
|
return list(set(relation_type))
|
|
115
211
|
|
|
116
|
-
def __init__(self, node_id: Union[str, int], function: Callable,
|
|
117
|
-
parent_uuid: str,
|
|
118
|
-
setting_input: Any,
|
|
119
|
-
name: str,
|
|
120
|
-
node_type: str,
|
|
121
|
-
input_columns: List[str] = None,
|
|
122
|
-
output_schema: List[FlowfileColumn] = None,
|
|
123
|
-
drop_columns: List[str] = None,
|
|
124
|
-
renew_schema: bool = True,
|
|
125
|
-
pos_x: float = 0,
|
|
126
|
-
pos_y: float = 0,
|
|
127
|
-
schema_callback: Callable = None,
|
|
128
|
-
):
|
|
129
|
-
self.parent_uuid = parent_uuid
|
|
130
|
-
self.post_init()
|
|
131
|
-
self.active = True
|
|
132
|
-
self.node_information.id = node_id
|
|
133
|
-
self.node_type = node_type
|
|
134
|
-
self.node_settings.renew_schema = renew_schema
|
|
135
|
-
self.update_node(function=function,
|
|
136
|
-
input_columns=input_columns,
|
|
137
|
-
output_schema=output_schema,
|
|
138
|
-
drop_columns=drop_columns,
|
|
139
|
-
setting_input=setting_input,
|
|
140
|
-
name=name,
|
|
141
|
-
pos_x=pos_x,
|
|
142
|
-
pos_y=pos_y,
|
|
143
|
-
schema_callback=schema_callback,
|
|
144
|
-
)
|
|
145
|
-
|
|
146
212
|
def update_node(self,
|
|
147
213
|
function: Callable,
|
|
148
214
|
input_columns: List[str] = None,
|
|
@@ -154,9 +220,24 @@ class FlowNode:
|
|
|
154
220
|
pos_y: float = 0,
|
|
155
221
|
schema_callback: Callable = None,
|
|
156
222
|
):
|
|
223
|
+
"""Updates the properties of the node.
|
|
224
|
+
|
|
225
|
+
This is called during initialization and when settings are changed.
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
function: The new core data processing function.
|
|
229
|
+
input_columns: The new list of input columns.
|
|
230
|
+
output_schema: The new schema of added columns.
|
|
231
|
+
drop_columns: The new list of dropped columns.
|
|
232
|
+
name: The new name for the node.
|
|
233
|
+
setting_input: The new settings object.
|
|
234
|
+
pos_x: The new x-coordinate.
|
|
235
|
+
pos_y: The new y-coordinate.
|
|
236
|
+
schema_callback: The new custom schema callback function.
|
|
237
|
+
"""
|
|
157
238
|
self.user_provided_schema_callback = schema_callback
|
|
158
|
-
self.node_information.y_position = pos_y
|
|
159
|
-
self.node_information.x_position = pos_x
|
|
239
|
+
self.node_information.y_position = int(pos_y)
|
|
240
|
+
self.node_information.x_position = int(pos_x)
|
|
160
241
|
self.node_information.setting_input = setting_input
|
|
161
242
|
self.name = self.node_type if name is None else name
|
|
162
243
|
self._function = function
|
|
@@ -178,20 +259,40 @@ class FlowNode:
|
|
|
178
259
|
self.setting_input = setting_input # wait until the end so that the hash is calculated correctly
|
|
179
260
|
|
|
180
261
|
@property
|
|
181
|
-
def name(self):
|
|
262
|
+
def name(self) -> str:
|
|
263
|
+
"""Gets the name of the node.
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
The node's name.
|
|
267
|
+
"""
|
|
182
268
|
return self._name
|
|
183
269
|
|
|
184
270
|
@name.setter
|
|
185
271
|
def name(self, name: str):
|
|
272
|
+
"""Sets the name of the node.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
name: The new name.
|
|
276
|
+
"""
|
|
186
277
|
self._name = name
|
|
187
278
|
self.__name__ = name
|
|
188
279
|
|
|
189
280
|
@property
|
|
190
|
-
def setting_input(self):
|
|
281
|
+
def setting_input(self) -> Any:
|
|
282
|
+
"""Gets the node's specific configuration settings.
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
The settings object.
|
|
286
|
+
"""
|
|
191
287
|
return self._setting_input
|
|
192
288
|
|
|
193
289
|
@setting_input.setter
|
|
194
290
|
def setting_input(self, setting_input: Any):
|
|
291
|
+
"""Sets the node's configuration and triggers a reset if necessary.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
setting_input: The new settings object.
|
|
295
|
+
"""
|
|
195
296
|
is_manual_input = (self.node_type == 'manual_input' and
|
|
196
297
|
isinstance(setting_input, input_schema.NodeManualInput) and
|
|
197
298
|
isinstance(self._setting_input, input_schema.NodeManualInput)
|
|
@@ -209,24 +310,48 @@ class FlowNode:
|
|
|
209
310
|
self.reset()
|
|
210
311
|
|
|
211
312
|
@property
|
|
212
|
-
def node_id(self):
|
|
313
|
+
def node_id(self) -> Union[str, int]:
|
|
314
|
+
"""Gets the unique identifier of the node.
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
The node's ID.
|
|
318
|
+
"""
|
|
213
319
|
return self.node_information.id
|
|
214
320
|
|
|
215
321
|
@property
|
|
216
|
-
def left_input(self):
|
|
322
|
+
def left_input(self) -> Optional["FlowNode"]:
|
|
323
|
+
"""Gets the node connected to the left input port.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
The left input FlowNode, or None.
|
|
327
|
+
"""
|
|
217
328
|
return self.node_inputs.left_input
|
|
218
329
|
|
|
219
330
|
@property
|
|
220
|
-
def right_input(self):
|
|
331
|
+
def right_input(self) -> Optional["FlowNode"]:
|
|
332
|
+
"""Gets the node connected to the right input port.
|
|
333
|
+
|
|
334
|
+
Returns:
|
|
335
|
+
The right input FlowNode, or None.
|
|
336
|
+
"""
|
|
221
337
|
return self.node_inputs.right_input
|
|
222
338
|
|
|
223
339
|
@property
|
|
224
340
|
def main_input(self) -> List["FlowNode"]:
|
|
341
|
+
"""Gets the list of nodes connected to the main input port(s).
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
A list of main input FlowNodes.
|
|
345
|
+
"""
|
|
225
346
|
return self.node_inputs.main_inputs
|
|
226
347
|
|
|
227
348
|
@property
|
|
228
|
-
def is_correct(self):
|
|
229
|
-
|
|
349
|
+
def is_correct(self) -> bool:
|
|
350
|
+
"""Checks if the node's input connections satisfy its template requirements.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
True if connections are valid, False otherwise.
|
|
354
|
+
"""
|
|
230
355
|
if isinstance(self.setting_input, input_schema.NodePromise):
|
|
231
356
|
return False
|
|
232
357
|
return (self.node_template.input == len(self.node_inputs.get_all_inputs()) or
|
|
@@ -234,6 +359,10 @@ class FlowNode:
|
|
|
234
359
|
(self.node_template.multi and self.node_template.can_be_start))
|
|
235
360
|
|
|
236
361
|
def set_node_information(self):
|
|
362
|
+
"""Populates the `node_information` attribute with the current state.
|
|
363
|
+
|
|
364
|
+
This includes the node's connections, settings, and position.
|
|
365
|
+
"""
|
|
237
366
|
logger.info('setting node information')
|
|
238
367
|
node_information = self.node_information
|
|
239
368
|
node_information.left_input_id = self.node_inputs.left_input.node_id if self.left_input else None
|
|
@@ -248,43 +377,76 @@ class FlowNode:
|
|
|
248
377
|
node_information.type = self.node_type
|
|
249
378
|
|
|
250
379
|
def get_node_information(self) -> schemas.NodeInformation:
|
|
380
|
+
"""Updates and returns the node's information object.
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
The `NodeInformation` object for this node.
|
|
384
|
+
"""
|
|
251
385
|
self.set_node_information()
|
|
252
386
|
return self.node_information
|
|
253
387
|
|
|
254
388
|
@property
|
|
255
|
-
def function(self):
|
|
389
|
+
def function(self) -> Callable:
|
|
390
|
+
"""Gets the core processing function of the node.
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
The callable function.
|
|
394
|
+
"""
|
|
256
395
|
return self._function
|
|
257
396
|
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
397
|
+
@function.setter
|
|
398
|
+
def function(self, function: Callable):
|
|
399
|
+
"""Sets the core processing function of the node.
|
|
400
|
+
|
|
401
|
+
Args:
|
|
402
|
+
function: The new callable function.
|
|
403
|
+
"""
|
|
404
|
+
self._function = function
|
|
266
405
|
|
|
267
406
|
@property
|
|
268
407
|
def all_inputs(self) -> List["FlowNode"]:
|
|
408
|
+
"""Gets a list of all nodes connected to any input port.
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
A list of all input FlowNodes.
|
|
412
|
+
"""
|
|
269
413
|
return self.node_inputs.get_all_inputs()
|
|
270
414
|
|
|
271
|
-
def calculate_hash(self, setting_input: Any):
|
|
415
|
+
def calculate_hash(self, setting_input: Any) -> str:
|
|
416
|
+
"""Calculates a hash based on settings and input node hashes.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
setting_input: The node's settings object to be included in the hash.
|
|
420
|
+
|
|
421
|
+
Returns:
|
|
422
|
+
A string hash value.
|
|
423
|
+
"""
|
|
272
424
|
depends_on_hashes = [_node.hash for _node in self.all_inputs]
|
|
273
425
|
node_data_hash = get_hash(setting_input)
|
|
274
426
|
return get_hash(depends_on_hashes + [node_data_hash, self.parent_uuid])
|
|
275
427
|
|
|
276
428
|
@property
|
|
277
|
-
def hash(self):
|
|
429
|
+
def hash(self) -> str:
|
|
430
|
+
"""Gets the cached hash for the node, calculating it if it doesn't exist.
|
|
431
|
+
|
|
432
|
+
Returns:
|
|
433
|
+
The string hash value.
|
|
434
|
+
"""
|
|
278
435
|
if not self._hash:
|
|
279
436
|
self._hash = self.calculate_hash(self.setting_input)
|
|
280
437
|
return self._hash
|
|
281
438
|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
# self.reset()
|
|
439
|
+
def add_node_connection(self, from_node: "FlowNode",
|
|
440
|
+
insert_type: Literal['main', 'left', 'right'] = 'main') -> None:
|
|
441
|
+
"""Adds a connection from a source node to this node.
|
|
286
442
|
|
|
287
|
-
|
|
443
|
+
Args:
|
|
444
|
+
from_node: The node to connect from.
|
|
445
|
+
insert_type: The type of input to connect to ('main', 'left', 'right').
|
|
446
|
+
|
|
447
|
+
Raises:
|
|
448
|
+
Exception: If the insert_type is invalid.
|
|
449
|
+
"""
|
|
288
450
|
from_node.leads_to_nodes.append(self)
|
|
289
451
|
if insert_type == 'main':
|
|
290
452
|
if self.node_template.input <= 2 or self.node_inputs.main_inputs is None:
|
|
@@ -303,20 +465,39 @@ class FlowNode:
|
|
|
303
465
|
self.reset()
|
|
304
466
|
from_node.reset()
|
|
305
467
|
|
|
306
|
-
def evaluate_nodes(self, deep: bool = False):
|
|
468
|
+
def evaluate_nodes(self, deep: bool = False) -> None:
|
|
469
|
+
"""Triggers a state reset for all directly connected downstream nodes.
|
|
470
|
+
|
|
471
|
+
Args:
|
|
472
|
+
deep: If True, the reset propagates recursively through the entire downstream graph.
|
|
473
|
+
"""
|
|
307
474
|
for node in self.leads_to_nodes:
|
|
308
475
|
self.print(f'resetting node: {node.node_id}')
|
|
309
476
|
node.reset(deep)
|
|
310
477
|
|
|
311
|
-
def get_flow_file_column_schema(self, col_name: str) -> FlowfileColumn:
|
|
478
|
+
def get_flow_file_column_schema(self, col_name: str) -> FlowfileColumn | None:
|
|
479
|
+
"""Retrieves the schema for a specific column from the output schema.
|
|
480
|
+
|
|
481
|
+
Args:
|
|
482
|
+
col_name: The name of the column.
|
|
483
|
+
|
|
484
|
+
Returns:
|
|
485
|
+
The FlowfileColumn object for that column, or None if not found.
|
|
486
|
+
"""
|
|
312
487
|
for s in self.schema:
|
|
313
488
|
if s.column_name == col_name:
|
|
314
489
|
return s
|
|
315
490
|
|
|
316
|
-
def get_predicted_schema(self, force: bool = False):
|
|
317
|
-
"""
|
|
318
|
-
|
|
319
|
-
|
|
491
|
+
def get_predicted_schema(self, force: bool = False) -> List[FlowfileColumn] | None:
|
|
492
|
+
"""Predicts the output schema of the node without full execution.
|
|
493
|
+
|
|
494
|
+
It uses the schema_callback or infers from predicted data.
|
|
495
|
+
|
|
496
|
+
Args:
|
|
497
|
+
force: If True, forces recalculation even if a predicted schema exists.
|
|
498
|
+
|
|
499
|
+
Returns:
|
|
500
|
+
A list of FlowfileColumn objects representing the predicted schema.
|
|
320
501
|
"""
|
|
321
502
|
if self.node_schema.predicted_schema and not force:
|
|
322
503
|
return self.node_schema.predicted_schema
|
|
@@ -338,6 +519,11 @@ class FlowNode:
|
|
|
338
519
|
|
|
339
520
|
@property
|
|
340
521
|
def is_setup(self) -> bool:
|
|
522
|
+
"""Checks if the node has been properly configured and is ready for execution.
|
|
523
|
+
|
|
524
|
+
Returns:
|
|
525
|
+
True if the node is set up, False otherwise.
|
|
526
|
+
"""
|
|
341
527
|
if not self.node_information.is_setup:
|
|
342
528
|
if self.function.__name__ != 'placeholder':
|
|
343
529
|
self.node_information.is_setup = True
|
|
@@ -345,9 +531,24 @@ class FlowNode:
|
|
|
345
531
|
return self.node_information.is_setup
|
|
346
532
|
|
|
347
533
|
def print(self, v: Any):
|
|
534
|
+
"""Helper method to log messages with node context.
|
|
535
|
+
|
|
536
|
+
Args:
|
|
537
|
+
v: The message or value to log.
|
|
538
|
+
"""
|
|
348
539
|
logger.info(f'{self.node_type}, node_id: {self.node_id}: {v}')
|
|
349
540
|
|
|
350
541
|
def get_resulting_data(self) -> FlowDataEngine | None:
|
|
542
|
+
"""Executes the node's function to produce the actual output data.
|
|
543
|
+
|
|
544
|
+
Handles both regular functions and external data sources.
|
|
545
|
+
|
|
546
|
+
Returns:
|
|
547
|
+
A FlowDataEngine instance containing the result, or None on error.
|
|
548
|
+
|
|
549
|
+
Raises:
|
|
550
|
+
Exception: Propagates exceptions from the node's function execution.
|
|
551
|
+
"""
|
|
351
552
|
if self.is_setup:
|
|
352
553
|
if self.results.resulting_data is None and self.results.errors is None:
|
|
353
554
|
self.print('getting resulting data')
|
|
@@ -375,6 +576,13 @@ class FlowNode:
|
|
|
375
576
|
return self.results.resulting_data
|
|
376
577
|
|
|
377
578
|
def _predicted_data_getter(self) -> FlowDataEngine | None:
|
|
579
|
+
"""Internal helper to get a predicted data result.
|
|
580
|
+
|
|
581
|
+
This calls the function with predicted data from input nodes.
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
A FlowDataEngine instance with predicted data, or an empty one on error.
|
|
585
|
+
"""
|
|
378
586
|
try:
|
|
379
587
|
fl = self._function(*[v.get_predicted_resulting_data() for v in self.all_inputs])
|
|
380
588
|
return fl
|
|
@@ -391,6 +599,13 @@ class FlowNode:
|
|
|
391
599
|
logger.warning(e)
|
|
392
600
|
|
|
393
601
|
def get_predicted_resulting_data(self) -> FlowDataEngine:
|
|
602
|
+
"""Creates a `FlowDataEngine` instance based on the predicted schema.
|
|
603
|
+
|
|
604
|
+
This avoids executing the node's full logic.
|
|
605
|
+
|
|
606
|
+
Returns:
|
|
607
|
+
A FlowDataEngine instance with a schema but no data.
|
|
608
|
+
"""
|
|
394
609
|
if self.needs_run(False) and self.schema_callback is not None or self.node_schema.result_schema is not None:
|
|
395
610
|
self.print('Getting data based on the schema')
|
|
396
611
|
|
|
@@ -404,17 +619,28 @@ class FlowNode:
|
|
|
404
619
|
return fl
|
|
405
620
|
|
|
406
621
|
def add_lead_to_in_depend_source(self):
|
|
622
|
+
"""Ensures this node is registered in the `leads_to_nodes` list of its inputs."""
|
|
407
623
|
for input_node in self.all_inputs:
|
|
408
624
|
if self.node_id not in [n.node_id for n in input_node.leads_to_nodes]:
|
|
409
625
|
input_node.leads_to_nodes.append(self)
|
|
410
626
|
|
|
411
627
|
def get_all_dependent_nodes(self) -> Generator["FlowNode", None, None]:
|
|
628
|
+
"""Yields all downstream nodes recursively.
|
|
629
|
+
|
|
630
|
+
Returns:
|
|
631
|
+
A generator of all dependent FlowNode objects.
|
|
632
|
+
"""
|
|
412
633
|
for node in self.leads_to_nodes:
|
|
413
634
|
yield node
|
|
414
635
|
for n in node.get_all_dependent_nodes():
|
|
415
636
|
yield n
|
|
416
637
|
|
|
417
638
|
def get_all_dependent_node_ids(self) -> Generator[int, None, None]:
|
|
639
|
+
"""Yields the IDs of all downstream nodes recursively.
|
|
640
|
+
|
|
641
|
+
Returns:
|
|
642
|
+
A generator of all dependent node IDs.
|
|
643
|
+
"""
|
|
418
644
|
for node in self.leads_to_nodes:
|
|
419
645
|
yield node.node_id
|
|
420
646
|
for n in node.get_all_dependent_node_ids():
|
|
@@ -422,6 +648,13 @@ class FlowNode:
|
|
|
422
648
|
|
|
423
649
|
@property
|
|
424
650
|
def schema(self) -> List[FlowfileColumn]:
|
|
651
|
+
"""Gets the definitive output schema of the node.
|
|
652
|
+
|
|
653
|
+
If not already run, it falls back to the predicted schema.
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
A list of FlowfileColumn objects.
|
|
657
|
+
"""
|
|
425
658
|
try:
|
|
426
659
|
if self.is_setup and self.results.errors is None:
|
|
427
660
|
if self.node_schema.result_schema is not None and len(self.node_schema.result_schema) > 0:
|
|
@@ -434,31 +667,42 @@ class FlowNode:
|
|
|
434
667
|
return self.node_schema.result_schema
|
|
435
668
|
else:
|
|
436
669
|
return []
|
|
437
|
-
except:
|
|
670
|
+
except Exception as e:
|
|
671
|
+
logger.error(e)
|
|
438
672
|
return []
|
|
439
673
|
|
|
440
|
-
def load_from_cache(self) -> FlowDataEngine:
|
|
441
|
-
if results_exists(self.hash):
|
|
442
|
-
try:
|
|
443
|
-
return FlowDataEngine(self._fetch_cached_df.get_result())
|
|
444
|
-
except Exception as e:
|
|
445
|
-
logger.error(e)
|
|
446
|
-
|
|
447
674
|
def remove_cache(self):
|
|
675
|
+
"""Removes cached results for this node.
|
|
676
|
+
|
|
677
|
+
Note: Currently not fully implemented.
|
|
678
|
+
"""
|
|
679
|
+
|
|
448
680
|
if results_exists(self.hash):
|
|
449
681
|
logger.warning('Not implemented')
|
|
450
682
|
|
|
451
683
|
def needs_run(self, performance_mode: bool, node_logger: NodeLogger = None,
|
|
452
684
|
execution_location: schemas.ExecutionLocationsLiteral = "auto") -> bool:
|
|
453
|
-
if
|
|
685
|
+
"""Determines if the node needs to be executed.
|
|
686
|
+
|
|
687
|
+
The decision is based on its run state, caching settings, and execution mode.
|
|
688
|
+
|
|
689
|
+
Args:
|
|
690
|
+
performance_mode: True if the flow is in performance mode.
|
|
691
|
+
node_logger: The logger instance for this node.
|
|
692
|
+
execution_location: The target execution location.
|
|
693
|
+
|
|
694
|
+
Returns:
|
|
695
|
+
True if the node should be run, False otherwise.
|
|
696
|
+
"""
|
|
697
|
+
if execution_location == "local" or SINGLE_FILE_MODE:
|
|
454
698
|
return False
|
|
699
|
+
|
|
455
700
|
flow_logger = logger if node_logger is None else node_logger
|
|
456
701
|
cache_result_exists = results_exists(self.hash)
|
|
457
702
|
if not self.node_stats.has_run_with_current_setup:
|
|
458
703
|
flow_logger.info('Node has not run, needs to run')
|
|
459
704
|
return True
|
|
460
705
|
if self.node_settings.cache_results and cache_result_exists:
|
|
461
|
-
|
|
462
706
|
return False
|
|
463
707
|
elif self.node_settings.cache_results and not cache_result_exists:
|
|
464
708
|
return True
|
|
@@ -468,9 +712,34 @@ class FlowNode:
|
|
|
468
712
|
return True
|
|
469
713
|
|
|
470
714
|
def __call__(self, *args, **kwargs):
|
|
715
|
+
"""Makes the node instance callable, acting as an alias for execute_node."""
|
|
471
716
|
self.execute_node(*args, **kwargs)
|
|
472
717
|
|
|
718
|
+
def execute_full_local(self, performance_mode: bool = False) -> None:
|
|
719
|
+
"""Executes the node's logic locally, including example data generation.
|
|
720
|
+
|
|
721
|
+
Args:
|
|
722
|
+
performance_mode: If True, skips generating example data.
|
|
723
|
+
|
|
724
|
+
Raises:
|
|
725
|
+
Exception: Propagates exceptions from the execution.
|
|
726
|
+
"""
|
|
727
|
+
if self.results.resulting_data is None and not performance_mode:
|
|
728
|
+
self.results.resulting_data = self.get_resulting_data()
|
|
729
|
+
self.results.example_data_generator = lambda: self.get_resulting_data().get_sample(100).to_arrow()
|
|
730
|
+
self.node_schema.result_schema = self.results.resulting_data.schema
|
|
731
|
+
self.node_stats.has_completed_last_run = True
|
|
732
|
+
|
|
473
733
|
def execute_local(self, flow_id: int, performance_mode: bool = False):
|
|
734
|
+
"""Executes the node's logic locally.
|
|
735
|
+
|
|
736
|
+
Args:
|
|
737
|
+
flow_id: The ID of the parent flow.
|
|
738
|
+
performance_mode: If True, skips generating example data.
|
|
739
|
+
|
|
740
|
+
Raises:
|
|
741
|
+
Exception: Propagates exceptions from the execution.
|
|
742
|
+
"""
|
|
474
743
|
try:
|
|
475
744
|
resulting_data = self.get_resulting_data()
|
|
476
745
|
if not performance_mode:
|
|
@@ -495,7 +764,15 @@ class FlowNode:
|
|
|
495
764
|
step.node_settings.streamable = self.node_settings.streamable
|
|
496
765
|
|
|
497
766
|
def execute_remote(self, performance_mode: bool = False, node_logger: NodeLogger = None):
|
|
498
|
-
|
|
767
|
+
"""Executes the node's logic remotely or handles cached results.
|
|
768
|
+
|
|
769
|
+
Args:
|
|
770
|
+
performance_mode: If True, skips generating example data.
|
|
771
|
+
node_logger: The logger for this node execution.
|
|
772
|
+
|
|
773
|
+
Raises:
|
|
774
|
+
Exception: If the node_logger is not provided or if execution fails.
|
|
775
|
+
"""
|
|
499
776
|
if node_logger is None:
|
|
500
777
|
raise Exception('Node logger is not defined')
|
|
501
778
|
if self.node_settings.cache_results and results_exists(self.hash):
|
|
@@ -552,11 +829,15 @@ class FlowNode:
|
|
|
552
829
|
self._fetch_cached_df = None
|
|
553
830
|
|
|
554
831
|
def prepare_before_run(self):
|
|
832
|
+
"""Resets results and errors before a new execution."""
|
|
833
|
+
|
|
555
834
|
self.results.errors = None
|
|
556
835
|
self.results.resulting_data = None
|
|
557
836
|
self.results.example_data = None
|
|
558
837
|
|
|
559
838
|
def cancel(self):
|
|
839
|
+
"""Cancels an ongoing external process if one is running."""
|
|
840
|
+
|
|
560
841
|
if self._fetch_cached_df is not None:
|
|
561
842
|
self._fetch_cached_df.cancel()
|
|
562
843
|
self.node_stats.is_canceled = True
|
|
@@ -566,6 +847,18 @@ class FlowNode:
|
|
|
566
847
|
|
|
567
848
|
def execute_node(self, run_location: schemas.ExecutionLocationsLiteral, reset_cache: bool = False,
|
|
568
849
|
performance_mode: bool = False, retry: bool = True, node_logger: NodeLogger = None):
|
|
850
|
+
"""Orchestrates the execution, handling location, caching, and retries.
|
|
851
|
+
|
|
852
|
+
Args:
|
|
853
|
+
run_location: The location for execution ('local', 'remote').
|
|
854
|
+
reset_cache: If True, forces removal of any existing cache.
|
|
855
|
+
performance_mode: If True, optimizes for speed over diagnostics.
|
|
856
|
+
retry: If True, allows retrying execution on recoverable errors.
|
|
857
|
+
node_logger: The logger for this node execution.
|
|
858
|
+
|
|
859
|
+
Raises:
|
|
860
|
+
Exception: If the node_logger is not defined.
|
|
861
|
+
"""
|
|
569
862
|
if node_logger is None:
|
|
570
863
|
raise Exception('Flow logger is not defined')
|
|
571
864
|
# node_logger = flow_logger.get_node_logger(self.node_id)
|
|
@@ -575,7 +868,8 @@ class FlowNode:
|
|
|
575
868
|
self.node_stats.has_completed_last_run = False
|
|
576
869
|
if self.is_setup:
|
|
577
870
|
node_logger.info(f'Starting to run {self.__name__}')
|
|
578
|
-
if self.needs_run(performance_mode, node_logger, run_location)
|
|
871
|
+
if (self.needs_run(performance_mode, node_logger, run_location) or self.node_template.node_group == "output"
|
|
872
|
+
and not (run_location == 'local' or SINGLE_FILE_MODE)):
|
|
579
873
|
self.prepare_before_run()
|
|
580
874
|
try:
|
|
581
875
|
if ((run_location == 'remote' or (self.node_default.transform_type == 'wide')
|
|
@@ -605,13 +899,28 @@ class FlowNode:
|
|
|
605
899
|
else:
|
|
606
900
|
self.results.errors = str(e)
|
|
607
901
|
node_logger.error(f'Error with running the node: {e}')
|
|
608
|
-
|
|
902
|
+
elif ((run_location == 'local' or SINGLE_FILE_MODE) and (not self.node_stats.has_run_with_current_setup
|
|
903
|
+
or self.node_template.node_group == "output")):
|
|
904
|
+
try:
|
|
905
|
+
node_logger.info('Executing fully locally')
|
|
906
|
+
self.execute_full_local(performance_mode)
|
|
907
|
+
except Exception as e:
|
|
908
|
+
self.results.errors = str(e)
|
|
909
|
+
node_logger.error(f'Error with running the node: {e}')
|
|
910
|
+
self.node_stats.error = str(e)
|
|
911
|
+
self.node_stats.has_completed_last_run = False
|
|
912
|
+
self.node_stats.has_run_with_current_setup = True
|
|
609
913
|
else:
|
|
610
914
|
node_logger.info('Node has already run, not running the node')
|
|
611
915
|
else:
|
|
612
916
|
node_logger.warning(f'Node {self.__name__} is not setup, cannot run the node')
|
|
613
917
|
|
|
614
918
|
def store_example_data_generator(self, external_df_fetcher: ExternalDfFetcher | ExternalSampler):
|
|
919
|
+
"""Stores a generator function for fetching a sample of the result data.
|
|
920
|
+
|
|
921
|
+
Args:
|
|
922
|
+
external_df_fetcher: The process that generated the sample data.
|
|
923
|
+
"""
|
|
615
924
|
if external_df_fetcher.status is not None:
|
|
616
925
|
file_ref = external_df_fetcher.status.file_ref
|
|
617
926
|
self.results.example_data_path = file_ref
|
|
@@ -620,9 +929,21 @@ class FlowNode:
|
|
|
620
929
|
logger.error('Could not get the sample data, the external process is not ready')
|
|
621
930
|
|
|
622
931
|
def needs_reset(self) -> bool:
|
|
932
|
+
"""Checks if the node's hash has changed, indicating an outdated state.
|
|
933
|
+
|
|
934
|
+
Returns:
|
|
935
|
+
True if the calculated hash differs from the stored hash.
|
|
936
|
+
"""
|
|
623
937
|
return self._hash != self.calculate_hash(self.setting_input)
|
|
624
938
|
|
|
625
939
|
def reset(self, deep: bool = False):
|
|
940
|
+
"""Resets the node's execution state and schema information.
|
|
941
|
+
|
|
942
|
+
This also triggers a reset on all downstream nodes.
|
|
943
|
+
|
|
944
|
+
Args:
|
|
945
|
+
deep: If True, forces a reset even if the hash hasn't changed.
|
|
946
|
+
"""
|
|
626
947
|
needs_reset = self.needs_reset() or deep
|
|
627
948
|
if needs_reset:
|
|
628
949
|
logger.info(f'{self.node_id}: Node needs reset')
|
|
@@ -637,10 +958,19 @@ class FlowNode:
|
|
|
637
958
|
self.node_schema.predicted_schema = None
|
|
638
959
|
self._hash = None
|
|
639
960
|
self.node_information.is_setup = None
|
|
961
|
+
self.results.errors = None
|
|
640
962
|
self.evaluate_nodes()
|
|
641
963
|
_ = self.hash # Recalculate the hash after reset
|
|
642
964
|
|
|
643
965
|
def delete_lead_to_node(self, node_id: int) -> bool:
|
|
966
|
+
"""Removes a connection to a specific downstream node.
|
|
967
|
+
|
|
968
|
+
Args:
|
|
969
|
+
node_id: The ID of the downstream node to disconnect.
|
|
970
|
+
|
|
971
|
+
Returns:
|
|
972
|
+
True if the connection was found and removed, False otherwise.
|
|
973
|
+
"""
|
|
644
974
|
logger.info(f'Deleting lead to node: {node_id}')
|
|
645
975
|
for i, lead_to_node in enumerate(self.leads_to_nodes):
|
|
646
976
|
logger.info(f'Checking lead to node: {lead_to_node.node_id}')
|
|
@@ -652,7 +982,16 @@ class FlowNode:
|
|
|
652
982
|
|
|
653
983
|
def delete_input_node(self, node_id: int, connection_type: input_schema.InputConnectionClass = 'input-0',
|
|
654
984
|
complete: bool = False) -> bool:
|
|
655
|
-
|
|
985
|
+
"""Removes a connection from a specific input node.
|
|
986
|
+
|
|
987
|
+
Args:
|
|
988
|
+
node_id: The ID of the input node to disconnect.
|
|
989
|
+
connection_type: The specific input handle (e.g., 'input-0', 'input-1').
|
|
990
|
+
complete: If True, tries to delete from all input types.
|
|
991
|
+
|
|
992
|
+
Returns:
|
|
993
|
+
True if a connection was found and removed, False otherwise.
|
|
994
|
+
"""
|
|
656
995
|
deleted: bool = False
|
|
657
996
|
if connection_type == 'input-0':
|
|
658
997
|
for i, node in enumerate(self.node_inputs.main_inputs):
|
|
@@ -675,17 +1014,32 @@ class FlowNode:
|
|
|
675
1014
|
self.reset()
|
|
676
1015
|
return deleted
|
|
677
1016
|
|
|
678
|
-
def __repr__(self):
|
|
1017
|
+
def __repr__(self) -> str:
|
|
1018
|
+
"""Provides a string representation of the FlowNode instance.
|
|
1019
|
+
|
|
1020
|
+
Returns:
|
|
1021
|
+
A string showing the node's ID and type.
|
|
1022
|
+
"""
|
|
679
1023
|
return f"Node id: {self.node_id} ({self.node_type})"
|
|
680
1024
|
|
|
681
|
-
def _get_readable_schema(self):
|
|
1025
|
+
def _get_readable_schema(self) -> List[dict] | None:
|
|
1026
|
+
"""Helper to get a simplified, dictionary representation of the output schema.
|
|
1027
|
+
|
|
1028
|
+
Returns:
|
|
1029
|
+
A list of dictionaries, each with 'column_name' and 'data_type'.
|
|
1030
|
+
"""
|
|
682
1031
|
if self.is_setup:
|
|
683
1032
|
output = []
|
|
684
1033
|
for s in self.schema:
|
|
685
1034
|
output.append(dict(column_name=s.column_name, data_type=s.data_type))
|
|
686
1035
|
return output
|
|
687
1036
|
|
|
688
|
-
def get_repr(self):
|
|
1037
|
+
def get_repr(self) -> dict:
|
|
1038
|
+
"""Gets a detailed dictionary representation of the node's state.
|
|
1039
|
+
|
|
1040
|
+
Returns:
|
|
1041
|
+
A dictionary containing key information about the node.
|
|
1042
|
+
"""
|
|
689
1043
|
return dict(FlowNode=
|
|
690
1044
|
dict(node_id=self.node_id,
|
|
691
1045
|
step_name=self.__name__,
|
|
@@ -693,30 +1047,66 @@ class FlowNode:
|
|
|
693
1047
|
output_schema=self._get_readable_schema()))
|
|
694
1048
|
|
|
695
1049
|
@property
|
|
696
|
-
def number_of_leads_to_nodes(self) -> int:
|
|
1050
|
+
def number_of_leads_to_nodes(self) -> int | None:
|
|
1051
|
+
"""Counts the number of downstream node connections.
|
|
1052
|
+
|
|
1053
|
+
Returns:
|
|
1054
|
+
The number of nodes this node leads to.
|
|
1055
|
+
"""
|
|
697
1056
|
if self.is_setup:
|
|
698
1057
|
return len(self.leads_to_nodes)
|
|
699
1058
|
|
|
700
1059
|
@property
|
|
701
1060
|
def has_next_step(self) -> bool:
|
|
1061
|
+
"""Checks if this node has any downstream connections.
|
|
1062
|
+
|
|
1063
|
+
Returns:
|
|
1064
|
+
True if it has at least one downstream node.
|
|
1065
|
+
"""
|
|
702
1066
|
return len(self.leads_to_nodes) > 0
|
|
703
1067
|
|
|
704
1068
|
@property
|
|
705
1069
|
def has_input(self) -> bool:
|
|
1070
|
+
"""Checks if this node has any input connections.
|
|
1071
|
+
|
|
1072
|
+
Returns:
|
|
1073
|
+
True if it has at least one input node.
|
|
1074
|
+
"""
|
|
706
1075
|
return len(self.all_inputs) > 0
|
|
707
1076
|
|
|
708
1077
|
@property
|
|
709
1078
|
def singular_input(self) -> bool:
|
|
1079
|
+
"""Checks if the node template specifies exactly one input.
|
|
1080
|
+
|
|
1081
|
+
Returns:
|
|
1082
|
+
True if the node is a single-input type.
|
|
1083
|
+
"""
|
|
710
1084
|
return self.node_template.input == 1
|
|
711
1085
|
|
|
712
1086
|
@property
|
|
713
1087
|
def singular_main_input(self) -> "FlowNode":
|
|
1088
|
+
"""Gets the input node, assuming it is a single-input type.
|
|
1089
|
+
|
|
1090
|
+
Returns:
|
|
1091
|
+
The single input FlowNode, or None.
|
|
1092
|
+
"""
|
|
714
1093
|
if self.singular_input:
|
|
715
1094
|
return self.all_inputs[0]
|
|
716
1095
|
|
|
717
1096
|
def get_table_example(self, include_data: bool = False) -> TableExample | None:
|
|
1097
|
+
"""Generates a `TableExample` model summarizing the node's output.
|
|
1098
|
+
|
|
1099
|
+
This can optionally include a sample of the data.
|
|
1100
|
+
|
|
1101
|
+
Args:
|
|
1102
|
+
include_data: If True, includes a data sample in the result.
|
|
1103
|
+
|
|
1104
|
+
Returns:
|
|
1105
|
+
A `TableExample` object, or None if the node is not set up.
|
|
1106
|
+
"""
|
|
718
1107
|
self.print('Getting a table example')
|
|
719
1108
|
if self.is_setup and include_data and self.node_stats.has_completed_last_run:
|
|
1109
|
+
|
|
720
1110
|
if self.node_template.node_group == 'output':
|
|
721
1111
|
self.print('getting the table example')
|
|
722
1112
|
return self.main_input[0].get_table_example(include_data)
|
|
@@ -749,10 +1139,16 @@ class FlowNode:
|
|
|
749
1139
|
table_schema=schema, columns=columns,
|
|
750
1140
|
data=[])
|
|
751
1141
|
|
|
752
|
-
def calculate_settings_out_select(self):
|
|
753
|
-
pass
|
|
754
|
-
|
|
755
1142
|
def get_node_data(self, flow_id: int, include_example: bool = False) -> NodeData:
|
|
1143
|
+
"""Gathers all necessary data for representing the node in the UI.
|
|
1144
|
+
|
|
1145
|
+
Args:
|
|
1146
|
+
flow_id: The ID of the parent flow.
|
|
1147
|
+
include_example: If True, includes data samples.
|
|
1148
|
+
|
|
1149
|
+
Returns:
|
|
1150
|
+
A `NodeData` object.
|
|
1151
|
+
"""
|
|
756
1152
|
node = NodeData(flow_id=flow_id,
|
|
757
1153
|
node_id=self.node_id,
|
|
758
1154
|
has_run=self.node_stats.has_run_with_current_setup,
|
|
@@ -772,15 +1168,30 @@ class FlowNode:
|
|
|
772
1168
|
return node
|
|
773
1169
|
|
|
774
1170
|
def get_output_data(self) -> TableExample:
|
|
1171
|
+
"""Gets the full output data sample for this node.
|
|
1172
|
+
|
|
1173
|
+
Returns:
|
|
1174
|
+
A `TableExample` object with data.
|
|
1175
|
+
"""
|
|
775
1176
|
return self.get_table_example(True)
|
|
776
1177
|
|
|
777
1178
|
def get_node_input(self) -> schemas.NodeInput:
|
|
1179
|
+
"""Creates a `NodeInput` schema object for representing this node in the UI.
|
|
1180
|
+
|
|
1181
|
+
Returns:
|
|
1182
|
+
A `NodeInput` object.
|
|
1183
|
+
"""
|
|
778
1184
|
return schemas.NodeInput(pos_y=self.setting_input.pos_y,
|
|
779
1185
|
pos_x=self.setting_input.pos_x,
|
|
780
1186
|
id=self.node_id,
|
|
781
1187
|
**self.node_template.__dict__)
|
|
782
1188
|
|
|
783
1189
|
def get_edge_input(self) -> List[schemas.NodeEdge]:
|
|
1190
|
+
"""Generates `NodeEdge` objects for all input connections to this node.
|
|
1191
|
+
|
|
1192
|
+
Returns:
|
|
1193
|
+
A list of `NodeEdge` objects.
|
|
1194
|
+
"""
|
|
784
1195
|
edges = []
|
|
785
1196
|
if self.node_inputs.main_inputs is not None:
|
|
786
1197
|
for i, main_input in enumerate(self.node_inputs.main_inputs):
|