Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (145) hide show
  1. flowfile/__init__.py +27 -6
  2. flowfile/api.py +1 -0
  3. flowfile/web/__init__.py +2 -2
  4. flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
  5. flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
  6. flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
  7. flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
  8. flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
  9. flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
  10. flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
  11. flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
  12. flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
  14. flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
  15. flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
  16. flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
  17. flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
  18. flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
  19. flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
  20. flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
  21. flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
  22. flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
  23. flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
  24. flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
  25. flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
  26. flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
  27. flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
  28. flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
  29. flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
  30. flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
  31. flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
  32. flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
  33. flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
  34. flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
  35. flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
  36. flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
  37. flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
  38. flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
  39. flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
  40. flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
  41. flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
  42. flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
  43. flowfile/web/static/assets/api-6ef0dcef.js +80 -0
  44. flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
  45. flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
  46. flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
  47. flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
  48. flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
  49. flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
  50. flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
  51. flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
  52. flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
  53. flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
  54. flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
  55. flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
  56. flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
  57. flowfile/web/static/index.html +1 -1
  58. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
  59. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
  60. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
  61. flowfile_core/__init__.py +3 -0
  62. flowfile_core/auth/jwt.py +39 -0
  63. flowfile_core/configs/node_store/nodes.py +9 -6
  64. flowfile_core/configs/settings.py +6 -5
  65. flowfile_core/database/connection.py +63 -15
  66. flowfile_core/database/init_db.py +0 -1
  67. flowfile_core/database/models.py +49 -2
  68. flowfile_core/flowfile/code_generator/code_generator.py +472 -17
  69. flowfile_core/flowfile/connection_manager/models.py +1 -1
  70. flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
  71. flowfile_core/flowfile/extensions.py +1 -1
  72. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
  73. flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
  74. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
  75. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
  76. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
  77. flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
  78. flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
  79. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
  80. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
  81. flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
  82. flowfile_core/flowfile/flow_graph.py +718 -253
  83. flowfile_core/flowfile/flow_graph_utils.py +2 -2
  84. flowfile_core/flowfile/flow_node/flow_node.py +563 -117
  85. flowfile_core/flowfile/flow_node/models.py +154 -20
  86. flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
  87. flowfile_core/flowfile/handler.py +2 -33
  88. flowfile_core/flowfile/manage/open_flowfile.py +1 -2
  89. flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
  90. flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
  91. flowfile_core/flowfile/util/calculate_layout.py +0 -2
  92. flowfile_core/flowfile/utils.py +35 -26
  93. flowfile_core/main.py +35 -15
  94. flowfile_core/routes/cloud_connections.py +77 -0
  95. flowfile_core/routes/logs.py +2 -7
  96. flowfile_core/routes/public.py +1 -0
  97. flowfile_core/routes/routes.py +130 -90
  98. flowfile_core/routes/secrets.py +72 -14
  99. flowfile_core/schemas/__init__.py +8 -0
  100. flowfile_core/schemas/cloud_storage_schemas.py +215 -0
  101. flowfile_core/schemas/input_schema.py +121 -71
  102. flowfile_core/schemas/output_model.py +19 -3
  103. flowfile_core/schemas/schemas.py +150 -12
  104. flowfile_core/schemas/transform_schema.py +175 -35
  105. flowfile_core/utils/utils.py +40 -1
  106. flowfile_core/utils/validate_setup.py +41 -0
  107. flowfile_frame/__init__.py +9 -1
  108. flowfile_frame/cloud_storage/frame_helpers.py +39 -0
  109. flowfile_frame/cloud_storage/secret_manager.py +73 -0
  110. flowfile_frame/expr.py +28 -1
  111. flowfile_frame/expr.pyi +76 -61
  112. flowfile_frame/flow_frame.py +481 -208
  113. flowfile_frame/flow_frame.pyi +140 -91
  114. flowfile_frame/flow_frame_methods.py +160 -22
  115. flowfile_frame/group_frame.py +3 -0
  116. flowfile_frame/utils.py +25 -3
  117. flowfile_worker/external_sources/s3_source/main.py +216 -0
  118. flowfile_worker/external_sources/s3_source/models.py +142 -0
  119. flowfile_worker/funcs.py +51 -6
  120. flowfile_worker/models.py +22 -2
  121. flowfile_worker/routes.py +40 -38
  122. flowfile_worker/utils.py +1 -1
  123. test_utils/s3/commands.py +46 -0
  124. test_utils/s3/data_generator.py +292 -0
  125. test_utils/s3/demo_data_generator.py +186 -0
  126. test_utils/s3/fixtures.py +214 -0
  127. flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
  128. flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
  129. flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
  130. flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
  131. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
  132. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
  133. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
  134. flowfile_core/schemas/defaults.py +0 -9
  135. flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
  136. flowfile_core/schemas/models.py +0 -193
  137. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
  138. flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
  139. flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
  140. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  141. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
  142. {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
  143. {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
  144. {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
  145. {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
@@ -1,4 +1,3 @@
1
-
2
1
  from typing import List, Union, Callable, Any, Optional, Generator, Literal
3
2
  from flowfile_core.configs import logger
4
3
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
@@ -6,6 +5,7 @@ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEng
6
5
  from flowfile_core.utils.arrow_reader import get_read_top_n
7
6
  from flowfile_core.schemas import input_schema, schemas
8
7
  from flowfile_core.configs.flow_logger import NodeLogger
8
+ from flowfile_core.configs.settings import SINGLE_FILE_MODE
9
9
 
10
10
  from flowfile_core.schemas.output_model import TableExample, FileColumn, NodeData
11
11
  from flowfile_core.flowfile.utils import get_hash
@@ -13,13 +13,19 @@ from flowfile_core.configs.node_store import nodes as node_interface
13
13
  from flowfile_core.flowfile.setting_generator import setting_generator, setting_updator
14
14
  from time import sleep
15
15
  from flowfile_core.flowfile.flow_data_engine.subprocess_operations import (
16
- ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result, ExternalDatabaseFetcher, ExternalDatabaseWriter)
16
+ ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result,
17
+ ExternalDatabaseFetcher, ExternalDatabaseWriter, ExternalCloudWriter)
17
18
  from flowfile_core.flowfile.flow_node.models import (NodeStepSettings, NodeStepInputs, NodeSchemaInformation,
18
19
  NodeStepStats, NodeResults)
19
20
  from flowfile_core.flowfile.flow_node.schema_callback import SingleExecutionFuture
20
21
 
21
22
 
22
23
  class FlowNode:
24
+ """Represents a single node in a data flow graph.
25
+
26
+ This class manages the node's state, its data processing function,
27
+ and its connections to other nodes within the graph.
28
+ """
23
29
  parent_uuid: str
24
30
  node_type: str
25
31
  node_template: node_interface.NodeTemplate
@@ -31,15 +37,66 @@ class FlowNode:
31
37
  results: NodeResults
32
38
  node_information: Optional[schemas.NodeInformation] = None
33
39
  leads_to_nodes: List["FlowNode"] = [] # list with target flows, after execution the step will trigger those step(s)
40
+ user_provided_schema_callback: Optional[Callable] = None # user provided callback function for schema calculation
34
41
  _setting_input: Any = None
35
42
  _hash: Optional[str] = None # host this for caching results
36
43
  _function: Callable = None # the function that needs to be executed when triggered
44
+ _name: str = None # name of the node, used for display
37
45
  _schema_callback: Optional[SingleExecutionFuture] = None # Function that calculates the schema without executing
38
46
  _state_needs_reset: bool = False
39
- _fetch_cached_df: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter] = None
40
- _cache_progress: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter] = None
47
+ _fetch_cached_df: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter | ExternalCloudWriter] = None
48
+ _cache_progress: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter | ExternalCloudWriter] = None
49
+
50
+ def __init__(self, node_id: Union[str, int], function: Callable,
51
+ parent_uuid: str,
52
+ setting_input: Any,
53
+ name: str,
54
+ node_type: str,
55
+ input_columns: List[str] = None,
56
+ output_schema: List[FlowfileColumn] = None,
57
+ drop_columns: List[str] = None,
58
+ renew_schema: bool = True,
59
+ pos_x: float = 0,
60
+ pos_y: float = 0,
61
+ schema_callback: Callable = None,
62
+ ):
63
+ """Initializes a FlowNode instance.
64
+
65
+ Args:
66
+ node_id: Unique identifier for the node.
67
+ function: The core data processing function for the node.
68
+ parent_uuid: The UUID of the parent flow.
69
+ setting_input: The configuration/settings object for the node.
70
+ name: The name of the node.
71
+ node_type: The type identifier of the node (e.g., 'join', 'filter').
72
+ input_columns: List of column names expected as input.
73
+ output_schema: The schema of the columns to be added.
74
+ drop_columns: List of column names to be dropped.
75
+ renew_schema: Flag to indicate if the schema should be renewed.
76
+ pos_x: The x-coordinate on the canvas.
77
+ pos_y: The y-coordinate on the canvas.
78
+ schema_callback: A custom function to calculate the output schema.
79
+ """
80
+ self._name = None
81
+ self.parent_uuid = parent_uuid
82
+ self.post_init()
83
+ self.active = True
84
+ self.node_information.id = node_id
85
+ self.node_type = node_type
86
+ self.node_settings.renew_schema = renew_schema
87
+ self.update_node(function=function,
88
+ input_columns=input_columns,
89
+ output_schema=output_schema,
90
+ drop_columns=drop_columns,
91
+ setting_input=setting_input,
92
+ name=name,
93
+ pos_x=pos_x,
94
+ pos_y=pos_y,
95
+ schema_callback=schema_callback,
96
+ )
41
97
 
42
98
  def post_init(self):
99
+ """Initializes or resets the node's attributes to their default states."""
43
100
  self.node_inputs = NodeStepInputs()
44
101
  self.node_stats = NodeStepStats()
45
102
  self.node_settings = NodeStepSettings()
@@ -53,19 +110,65 @@ class FlowNode:
53
110
  self._state_needs_reset = False
54
111
 
55
112
  @property
56
- def state_needs_reset(self):
113
+ def state_needs_reset(self) -> bool:
114
+ """Checks if the node's state needs to be reset.
115
+
116
+ Returns:
117
+ True if a reset is required, False otherwise.
118
+ """
57
119
  return self._state_needs_reset
58
120
 
59
121
  @state_needs_reset.setter
60
122
  def state_needs_reset(self, v: bool):
123
+ """Sets the flag indicating that the node's state needs to be reset.
124
+
125
+ Args:
126
+ v: The boolean value to set.
127
+ """
61
128
  self._state_needs_reset = v
62
129
 
130
+ @staticmethod
131
+ def create_schema_callback_from_function(f: Callable) -> Callable[[], List[FlowfileColumn]]:
132
+ """Wraps a node's function to create a schema callback that extracts the schema.
133
+
134
+ Args:
135
+ f: The node's core function that returns a FlowDataEngine instance.
136
+
137
+ Returns:
138
+ A callable that, when executed, returns the output schema.
139
+ """
140
+ def schema_callback() -> List[FlowfileColumn]:
141
+ try:
142
+ logger.info('Executing the schema callback function based on the node function')
143
+ return f().schema
144
+ except Exception as e:
145
+ logger.warning(f'Error with the schema callback: {e}')
146
+ return []
147
+ return schema_callback
148
+
63
149
  @property
64
- def schema_callback(self):
150
+ def schema_callback(self) -> SingleExecutionFuture:
151
+ """Gets the schema callback function, creating one if it doesn't exist.
152
+
153
+ The callback is used for predicting the output schema without full execution.
154
+
155
+ Returns:
156
+ A SingleExecutionFuture instance wrapping the schema function.
157
+ """
158
+ if self._schema_callback is None:
159
+ if self.user_provided_schema_callback is not None:
160
+ self.schema_callback = self.user_provided_schema_callback
161
+ elif self.is_start:
162
+ self.schema_callback = self.create_schema_callback_from_function(self._function)
65
163
  return self._schema_callback
66
164
 
67
165
  @schema_callback.setter
68
166
  def schema_callback(self, f: Callable):
167
+ """Sets the schema callback function for the node.
168
+
169
+ Args:
170
+ f: The function to be used for schema calculation.
171
+ """
69
172
  if f is None:
70
173
  return
71
174
 
@@ -76,13 +179,27 @@ class FlowNode:
76
179
  return []
77
180
 
78
181
  self._schema_callback = SingleExecutionFuture(f, error_callback)
79
- self._schema_callback.start()
80
182
 
81
183
  @property
82
184
  def is_start(self) -> bool:
185
+ """Determines if the node is a starting node in the flow.
186
+
187
+ A starting node requires no inputs.
188
+
189
+ Returns:
190
+ True if the node is a start node, False otherwise.
191
+ """
83
192
  return not self.has_input and self.node_template.input == 0
84
193
 
85
194
  def get_input_type(self, node_id: int) -> List:
195
+ """Gets the type of connection ('main', 'left', 'right') for a given input node ID.
196
+
197
+ Args:
198
+ node_id: The ID of the input node.
199
+
200
+ Returns:
201
+ A list of connection types for that node ID.
202
+ """
86
203
  relation_type = []
87
204
  if node_id in [n.node_id for n in self.node_inputs.main_inputs]:
88
205
  relation_type.append('main')
@@ -92,36 +209,6 @@ class FlowNode:
92
209
  relation_type.append('right')
93
210
  return list(set(relation_type))
94
211
 
95
- def __init__(self, node_id: Union[str, int], function: Callable,
96
- parent_uuid: str,
97
- setting_input: Any,
98
- name: str,
99
- node_type: str,
100
- input_columns: List[str] = None,
101
- output_schema: List[FlowfileColumn] = None,
102
- drop_columns: List[str] = None,
103
- renew_schema: bool = True,
104
- pos_x: float = 0,
105
- pos_y: float = 0,
106
- schema_callback: Callable = None,
107
- ):
108
- self.parent_uuid = parent_uuid
109
- self.post_init()
110
- self.active = True
111
- self.node_information.id = node_id
112
- self.node_type = node_type
113
- self.node_settings.renew_schema = renew_schema
114
- self.update_node(function=function,
115
- input_columns=input_columns,
116
- output_schema=output_schema,
117
- drop_columns=drop_columns,
118
- setting_input=setting_input,
119
- name=name,
120
- pos_x=pos_x,
121
- pos_y=pos_y,
122
- schema_callback=schema_callback,
123
- )
124
-
125
212
  def update_node(self,
126
213
  function: Callable,
127
214
  input_columns: List[str] = None,
@@ -133,13 +220,28 @@ class FlowNode:
133
220
  pos_y: float = 0,
134
221
  schema_callback: Callable = None,
135
222
  ):
136
-
137
- self.schema_callback = schema_callback
138
- self.node_information.y_position = pos_y
139
- self.node_information.x_position = pos_x
223
+ """Updates the properties of the node.
224
+
225
+ This is called during initialization and when settings are changed.
226
+
227
+ Args:
228
+ function: The new core data processing function.
229
+ input_columns: The new list of input columns.
230
+ output_schema: The new schema of added columns.
231
+ drop_columns: The new list of dropped columns.
232
+ name: The new name for the node.
233
+ setting_input: The new settings object.
234
+ pos_x: The new x-coordinate.
235
+ pos_y: The new y-coordinate.
236
+ schema_callback: The new custom schema callback function.
237
+ """
238
+ self.user_provided_schema_callback = schema_callback
239
+ self.node_information.y_position = int(pos_y)
240
+ self.node_information.x_position = int(pos_x)
140
241
  self.node_information.setting_input = setting_input
141
242
  self.name = self.node_type if name is None else name
142
243
  self._function = function
244
+
143
245
  self.node_schema.input_columns = [] if input_columns is None else input_columns
144
246
  self.node_schema.output_columns = [] if output_schema is None else output_schema
145
247
  self.node_schema.drop_columns = [] if drop_columns is None else drop_columns
@@ -147,7 +249,6 @@ class FlowNode:
147
249
  if hasattr(setting_input, 'cache_results'):
148
250
  self.node_settings.cache_results = setting_input.cache_results
149
251
 
150
- self.setting_input = setting_input
151
252
  self.results.errors = None
152
253
  self.add_lead_to_in_depend_source()
153
254
  _ = self.hash
@@ -155,51 +256,102 @@ class FlowNode:
155
256
  if self.node_template is None:
156
257
  raise Exception(f'Node template {self.node_type} not found')
157
258
  self.node_default = node_interface.node_defaults.get(self.node_type)
259
+ self.setting_input = setting_input # wait until the end so that the hash is calculated correctly
158
260
 
159
261
  @property
160
- def name(self):
262
+ def name(self) -> str:
263
+ """Gets the name of the node.
264
+
265
+ Returns:
266
+ The node's name.
267
+ """
161
268
  return self._name
162
269
 
163
270
  @name.setter
164
271
  def name(self, name: str):
272
+ """Sets the name of the node.
273
+
274
+ Args:
275
+ name: The new name.
276
+ """
165
277
  self._name = name
166
278
  self.__name__ = name
167
279
 
168
280
  @property
169
- def setting_input(self):
281
+ def setting_input(self) -> Any:
282
+ """Gets the node's specific configuration settings.
283
+
284
+ Returns:
285
+ The settings object.
286
+ """
170
287
  return self._setting_input
171
288
 
172
289
  @setting_input.setter
173
290
  def setting_input(self, setting_input: Any):
291
+ """Sets the node's configuration and triggers a reset if necessary.
292
+
293
+ Args:
294
+ setting_input: The new settings object.
295
+ """
296
+ is_manual_input = (self.node_type == 'manual_input' and
297
+ isinstance(setting_input, input_schema.NodeManualInput) and
298
+ isinstance(self._setting_input, input_schema.NodeManualInput)
299
+ )
300
+ if is_manual_input:
301
+ _ = self.hash
174
302
  self._setting_input = setting_input
175
303
  self.set_node_information()
176
- if self.node_type == 'manual_input' and isinstance(self._setting_input, input_schema.NodeManualInput):
177
- if self.hash != self.calculate_hash(setting_input) or not self.node_stats.has_run:
178
- self.function = self.function.__class__(setting_input.raw_data_format)
304
+ if is_manual_input:
305
+ if self.hash != self.calculate_hash(setting_input) or not self.node_stats.has_run_with_current_setup:
306
+ self.function = FlowDataEngine(setting_input.raw_data_format)
179
307
  self.reset()
180
308
  self.get_predicted_schema()
181
309
  elif self._setting_input is not None:
182
310
  self.reset()
183
311
 
184
312
  @property
185
- def node_id(self):
313
+ def node_id(self) -> Union[str, int]:
314
+ """Gets the unique identifier of the node.
315
+
316
+ Returns:
317
+ The node's ID.
318
+ """
186
319
  return self.node_information.id
187
320
 
188
321
  @property
189
- def left_input(self):
322
+ def left_input(self) -> Optional["FlowNode"]:
323
+ """Gets the node connected to the left input port.
324
+
325
+ Returns:
326
+ The left input FlowNode, or None.
327
+ """
190
328
  return self.node_inputs.left_input
191
329
 
192
330
  @property
193
- def right_input(self):
331
+ def right_input(self) -> Optional["FlowNode"]:
332
+ """Gets the node connected to the right input port.
333
+
334
+ Returns:
335
+ The right input FlowNode, or None.
336
+ """
194
337
  return self.node_inputs.right_input
195
338
 
196
339
  @property
197
340
  def main_input(self) -> List["FlowNode"]:
341
+ """Gets the list of nodes connected to the main input port(s).
342
+
343
+ Returns:
344
+ A list of main input FlowNodes.
345
+ """
198
346
  return self.node_inputs.main_inputs
199
347
 
200
348
  @property
201
- def is_correct(self):
202
- # Check if inputs meet requirements
349
+ def is_correct(self) -> bool:
350
+ """Checks if the node's input connections satisfy its template requirements.
351
+
352
+ Returns:
353
+ True if connections are valid, False otherwise.
354
+ """
203
355
  if isinstance(self.setting_input, input_schema.NodePromise):
204
356
  return False
205
357
  return (self.node_template.input == len(self.node_inputs.get_all_inputs()) or
@@ -207,6 +359,10 @@ class FlowNode:
207
359
  (self.node_template.multi and self.node_template.can_be_start))
208
360
 
209
361
  def set_node_information(self):
362
+ """Populates the `node_information` attribute with the current state.
363
+
364
+ This includes the node's connections, settings, and position.
365
+ """
210
366
  logger.info('setting node information')
211
367
  node_information = self.node_information
212
368
  node_information.left_input_id = self.node_inputs.left_input.node_id if self.left_input else None
@@ -221,43 +377,76 @@ class FlowNode:
221
377
  node_information.type = self.node_type
222
378
 
223
379
  def get_node_information(self) -> schemas.NodeInformation:
380
+ """Updates and returns the node's information object.
381
+
382
+ Returns:
383
+ The `NodeInformation` object for this node.
384
+ """
224
385
  self.set_node_information()
225
386
  return self.node_information
226
387
 
227
388
  @property
228
- def function(self):
389
+ def function(self) -> Callable:
390
+ """Gets the core processing function of the node.
391
+
392
+ Returns:
393
+ The callable function.
394
+ """
229
395
  return self._function
230
396
 
231
- def reset_hash(self) -> bool:
232
- old_hash = self._hash
233
- self._hash = None
234
- if self.hash != old_hash:
235
- if self.node_settings.cache_results:
236
- self.remove_cache()
237
- return True
238
- return False
397
+ @function.setter
398
+ def function(self, function: Callable):
399
+ """Sets the core processing function of the node.
400
+
401
+ Args:
402
+ function: The new callable function.
403
+ """
404
+ self._function = function
239
405
 
240
406
  @property
241
407
  def all_inputs(self) -> List["FlowNode"]:
408
+ """Gets a list of all nodes connected to any input port.
409
+
410
+ Returns:
411
+ A list of all input FlowNodes.
412
+ """
242
413
  return self.node_inputs.get_all_inputs()
243
414
 
244
- def calculate_hash(self, setting_input: Any):
415
+ def calculate_hash(self, setting_input: Any) -> str:
416
+ """Calculates a hash based on settings and input node hashes.
417
+
418
+ Args:
419
+ setting_input: The node's settings object to be included in the hash.
420
+
421
+ Returns:
422
+ A string hash value.
423
+ """
245
424
  depends_on_hashes = [_node.hash for _node in self.all_inputs]
246
425
  node_data_hash = get_hash(setting_input)
247
426
  return get_hash(depends_on_hashes + [node_data_hash, self.parent_uuid])
248
427
 
249
428
  @property
250
- def hash(self):
429
+ def hash(self) -> str:
430
+ """Gets the cached hash for the node, calculating it if it doesn't exist.
431
+
432
+ Returns:
433
+ The string hash value.
434
+ """
251
435
  if not self._hash:
252
436
  self._hash = self.calculate_hash(self.setting_input)
253
437
  return self._hash
254
438
 
255
- @function.setter
256
- def function(self, function: Callable):
257
- self._function = function
258
- # self.reset()
439
+ def add_node_connection(self, from_node: "FlowNode",
440
+ insert_type: Literal['main', 'left', 'right'] = 'main') -> None:
441
+ """Adds a connection from a source node to this node.
442
+
443
+ Args:
444
+ from_node: The node to connect from.
445
+ insert_type: The type of input to connect to ('main', 'left', 'right').
259
446
 
260
- def add_node_connection(self, from_node: "FlowNode", insert_type: Literal['main', 'left', 'right'] = 'main'):
447
+ Raises:
448
+ Exception: If the insert_type is invalid.
449
+ """
261
450
  from_node.leads_to_nodes.append(self)
262
451
  if insert_type == 'main':
263
452
  if self.node_template.input <= 2 or self.node_inputs.main_inputs is None:
@@ -276,22 +465,41 @@ class FlowNode:
276
465
  self.reset()
277
466
  from_node.reset()
278
467
 
279
- def evaluate_nodes(self, deep: bool = False):
468
+ def evaluate_nodes(self, deep: bool = False) -> None:
469
+ """Triggers a state reset for all directly connected downstream nodes.
470
+
471
+ Args:
472
+ deep: If True, the reset propagates recursively through the entire downstream graph.
473
+ """
280
474
  for node in self.leads_to_nodes:
281
475
  self.print(f'resetting node: {node.node_id}')
282
476
  node.reset(deep)
283
477
 
284
- def get_flow_file_column_schema(self, col_name: str) -> FlowfileColumn:
478
+ def get_flow_file_column_schema(self, col_name: str) -> FlowfileColumn | None:
479
+ """Retrieves the schema for a specific column from the output schema.
480
+
481
+ Args:
482
+ col_name: The name of the column.
483
+
484
+ Returns:
485
+ The FlowfileColumn object for that column, or None if not found.
486
+ """
285
487
  for s in self.schema:
286
488
  if s.column_name == col_name:
287
489
  return s
288
490
 
289
- def get_predicted_schema(self, force: bool = False):
290
- """
291
- Method to get a predicted schema based on the columns that are dropped and added
292
- :return:
491
+ def get_predicted_schema(self, force: bool = False) -> List[FlowfileColumn] | None:
492
+ """Predicts the output schema of the node without full execution.
493
+
494
+ It uses the schema_callback or infers from predicted data.
495
+
496
+ Args:
497
+ force: If True, forces recalculation even if a predicted schema exists.
498
+
499
+ Returns:
500
+ A list of FlowfileColumn objects representing the predicted schema.
293
501
  """
294
- if self.node_schema.predicted_schema is not None and not force:
502
+ if self.node_schema.predicted_schema and not force:
295
503
  return self.node_schema.predicted_schema
296
504
  if self.schema_callback is not None and (self.node_schema.predicted_schema is None or force):
297
505
  self.print('Getting the data from a schema callback')
@@ -299,7 +507,7 @@ class FlowNode:
299
507
  # Force the schema callback to reset, so that it will be executed again
300
508
  self.schema_callback.reset()
301
509
  schema = self.schema_callback()
302
- if schema is not None:
510
+ if schema is not None and len(schema) > 0:
303
511
  self.print('Calculating the schema based on the schema callback')
304
512
  self.node_schema.predicted_schema = schema
305
513
  return self.node_schema.predicted_schema
@@ -311,6 +519,11 @@ class FlowNode:
311
519
 
312
520
  @property
313
521
  def is_setup(self) -> bool:
522
+ """Checks if the node has been properly configured and is ready for execution.
523
+
524
+ Returns:
525
+ True if the node is set up, False otherwise.
526
+ """
314
527
  if not self.node_information.is_setup:
315
528
  if self.function.__name__ != 'placeholder':
316
529
  self.node_information.is_setup = True
@@ -318,16 +531,31 @@ class FlowNode:
318
531
  return self.node_information.is_setup
319
532
 
320
533
  def print(self, v: Any):
534
+ """Helper method to log messages with node context.
535
+
536
+ Args:
537
+ v: The message or value to log.
538
+ """
321
539
  logger.info(f'{self.node_type}, node_id: {self.node_id}: {v}')
322
540
 
323
- def get_resulting_data(self) -> FlowDataEngine:
541
+ def get_resulting_data(self) -> FlowDataEngine | None:
542
+ """Executes the node's function to produce the actual output data.
543
+
544
+ Handles both regular functions and external data sources.
545
+
546
+ Returns:
547
+ A FlowDataEngine instance containing the result, or None on error.
548
+
549
+ Raises:
550
+ Exception: Propagates exceptions from the node's function execution.
551
+ """
324
552
  if self.is_setup:
325
553
  if self.results.resulting_data is None and self.results.errors is None:
326
554
  self.print('getting resulting data')
327
555
  try:
328
556
  if isinstance(self.function, FlowDataEngine):
329
557
  fl: FlowDataEngine = self.function
330
- elif self.node_type in ('external_source', 'airbyte_reader'):
558
+ elif self.node_type == 'external_source':
331
559
  fl: FlowDataEngine = self.function()
332
560
  fl.collect_external()
333
561
  self.node_settings.streamable = False
@@ -342,11 +570,19 @@ class FlowNode:
342
570
  except Exception as e:
343
571
  self.results.resulting_data = FlowDataEngine()
344
572
  self.results.errors = str(e)
345
- self.node_stats.has_run = False
573
+ self.node_stats.has_run_with_current_setup = False
574
+ self.node_stats.has_completed_last_run = False
346
575
  raise e
347
576
  return self.results.resulting_data
348
577
 
349
- def _predicted_data_getter(self) -> FlowDataEngine|None:
578
+ def _predicted_data_getter(self) -> FlowDataEngine | None:
579
+ """Internal helper to get a predicted data result.
580
+
581
+ This calls the function with predicted data from input nodes.
582
+
583
+ Returns:
584
+ A FlowDataEngine instance with predicted data, or an empty one on error.
585
+ """
350
586
  try:
351
587
  fl = self._function(*[v.get_predicted_resulting_data() for v in self.all_inputs])
352
588
  return fl
@@ -363,8 +599,16 @@ class FlowNode:
363
599
  logger.warning(e)
364
600
 
365
601
  def get_predicted_resulting_data(self) -> FlowDataEngine:
602
+ """Creates a `FlowDataEngine` instance based on the predicted schema.
603
+
604
+ This avoids executing the node's full logic.
605
+
606
+ Returns:
607
+ A FlowDataEngine instance with a schema but no data.
608
+ """
366
609
  if self.needs_run(False) and self.schema_callback is not None or self.node_schema.result_schema is not None:
367
610
  self.print('Getting data based on the schema')
611
+
368
612
  _s = self.schema_callback() if self.node_schema.result_schema is None else self.node_schema.result_schema
369
613
  return FlowDataEngine.create_from_schema(_s)
370
614
  else:
@@ -375,17 +619,28 @@ class FlowNode:
375
619
  return fl
376
620
 
377
621
  def add_lead_to_in_depend_source(self):
622
+ """Ensures this node is registered in the `leads_to_nodes` list of its inputs."""
378
623
  for input_node in self.all_inputs:
379
624
  if self.node_id not in [n.node_id for n in input_node.leads_to_nodes]:
380
625
  input_node.leads_to_nodes.append(self)
381
626
 
382
627
  def get_all_dependent_nodes(self) -> Generator["FlowNode", None, None]:
628
+ """Yields all downstream nodes recursively.
629
+
630
+ Returns:
631
+ A generator of all dependent FlowNode objects.
632
+ """
383
633
  for node in self.leads_to_nodes:
384
634
  yield node
385
635
  for n in node.get_all_dependent_nodes():
386
636
  yield n
387
637
 
388
638
  def get_all_dependent_node_ids(self) -> Generator[int, None, None]:
639
+ """Yields the IDs of all downstream nodes recursively.
640
+
641
+ Returns:
642
+ A generator of all dependent node IDs.
643
+ """
389
644
  for node in self.leads_to_nodes:
390
645
  yield node.node_id
391
646
  for n in node.get_all_dependent_node_ids():
@@ -393,6 +648,13 @@ class FlowNode:
393
648
 
394
649
  @property
395
650
  def schema(self) -> List[FlowfileColumn]:
651
+ """Gets the definitive output schema of the node.
652
+
653
+ If not already run, it falls back to the predicted schema.
654
+
655
+ Returns:
656
+ A list of FlowfileColumn objects.
657
+ """
396
658
  try:
397
659
  if self.is_setup and self.results.errors is None:
398
660
  if self.node_schema.result_schema is not None and len(self.node_schema.result_schema) > 0:
@@ -405,31 +667,42 @@ class FlowNode:
405
667
  return self.node_schema.result_schema
406
668
  else:
407
669
  return []
408
- except:
670
+ except Exception as e:
671
+ logger.error(e)
409
672
  return []
410
673
 
411
- def load_from_cache(self) -> FlowDataEngine:
412
- if results_exists(self.hash):
413
- try:
414
- return FlowDataEngine(self._fetch_cached_df.get_result())
415
- except Exception as e:
416
- logger.error(e)
417
-
418
674
  def remove_cache(self):
675
+ """Removes cached results for this node.
676
+
677
+ Note: Currently not fully implemented.
678
+ """
679
+
419
680
  if results_exists(self.hash):
420
681
  logger.warning('Not implemented')
421
682
 
422
683
  def needs_run(self, performance_mode: bool, node_logger: NodeLogger = None,
423
684
  execution_location: schemas.ExecutionLocationsLiteral = "auto") -> bool:
424
- if execution_location == "local":
685
+ """Determines if the node needs to be executed.
686
+
687
+ The decision is based on its run state, caching settings, and execution mode.
688
+
689
+ Args:
690
+ performance_mode: True if the flow is in performance mode.
691
+ node_logger: The logger instance for this node.
692
+ execution_location: The target execution location.
693
+
694
+ Returns:
695
+ True if the node should be run, False otherwise.
696
+ """
697
+ if execution_location == "local" or SINGLE_FILE_MODE:
425
698
  return False
699
+
426
700
  flow_logger = logger if node_logger is None else node_logger
427
701
  cache_result_exists = results_exists(self.hash)
428
- if not self.node_stats.has_run:
702
+ if not self.node_stats.has_run_with_current_setup:
429
703
  flow_logger.info('Node has not run, needs to run')
430
704
  return True
431
705
  if self.node_settings.cache_results and cache_result_exists:
432
-
433
706
  return False
434
707
  elif self.node_settings.cache_results and not cache_result_exists:
435
708
  return True
@@ -439,9 +712,34 @@ class FlowNode:
439
712
  return True
440
713
 
441
714
  def __call__(self, *args, **kwargs):
715
+ """Makes the node instance callable, acting as an alias for execute_node."""
442
716
  self.execute_node(*args, **kwargs)
443
717
 
718
+ def execute_full_local(self, performance_mode: bool = False) -> None:
719
+ """Executes the node's logic locally, including example data generation.
720
+
721
+ Args:
722
+ performance_mode: If True, skips generating example data.
723
+
724
+ Raises:
725
+ Exception: Propagates exceptions from the execution.
726
+ """
727
+ if self.results.resulting_data is None and not performance_mode:
728
+ self.results.resulting_data = self.get_resulting_data()
729
+ self.results.example_data_generator = lambda: self.get_resulting_data().get_sample(100).to_arrow()
730
+ self.node_schema.result_schema = self.results.resulting_data.schema
731
+ self.node_stats.has_completed_last_run = True
732
+
444
733
  def execute_local(self, flow_id: int, performance_mode: bool = False):
734
+ """Executes the node's logic locally.
735
+
736
+ Args:
737
+ flow_id: The ID of the parent flow.
738
+ performance_mode: If True, skips generating example data.
739
+
740
+ Raises:
741
+ Exception: Propagates exceptions from the execution.
742
+ """
445
743
  try:
446
744
  resulting_data = self.get_resulting_data()
447
745
  if not performance_mode:
@@ -449,23 +747,32 @@ class FlowNode:
449
747
  wait_on_completion=True, node_id=self.node_id, flow_id=flow_id)
450
748
  self.store_example_data_generator(external_sampler)
451
749
  if self.results.errors is None and not self.node_stats.is_canceled:
452
- self.node_stats.has_run = True
750
+ self.node_stats.has_run_with_current_setup = True
453
751
  self.node_schema.result_schema = resulting_data.schema
454
752
 
455
753
  except Exception as e:
456
754
  logger.warning(f"Error with step {self.__name__}")
457
755
  logger.error(str(e))
458
756
  self.results.errors = str(e)
459
- self.node_stats.has_run = False
757
+ self.node_stats.has_run_with_current_setup = False
758
+ self.node_stats.has_completed_last_run = False
460
759
  raise e
461
760
 
462
- if self.node_stats.has_run:
761
+ if self.node_stats.has_run_with_current_setup:
463
762
  for step in self.leads_to_nodes:
464
763
  if not self.node_settings.streamable:
465
764
  step.node_settings.streamable = self.node_settings.streamable
466
765
 
467
766
  def execute_remote(self, performance_mode: bool = False, node_logger: NodeLogger = None):
468
- # flow_logger = logger if flow_logger is None else flow_logger
767
+ """Executes the node's logic remotely or handles cached results.
768
+
769
+ Args:
770
+ performance_mode: If True, skips generating example data.
771
+ node_logger: The logger for this node execution.
772
+
773
+ Raises:
774
+ Exception: If the node_logger is not provided or if execution fails.
775
+ """
469
776
  if node_logger is None:
470
777
  raise Exception('Node logger is not defined')
471
778
  if self.node_settings.cache_results and results_exists(self.hash):
@@ -477,7 +784,7 @@ class FlowNode:
477
784
  node_logger.warning('Failed to read the cache, rerunning the code')
478
785
  if self.node_type == 'output':
479
786
  self.results.resulting_data = self.get_resulting_data()
480
- self.node_stats.has_run = True
787
+ self.node_stats.has_run_with_current_setup = True
481
788
  return
482
789
  try:
483
790
  self.get_resulting_data()
@@ -498,7 +805,7 @@ class FlowNode:
498
805
  )
499
806
  if not performance_mode:
500
807
  self.store_example_data_generator(external_df_fetcher)
501
- self.node_stats.has_run = True
808
+ self.node_stats.has_run_with_current_setup = True
502
809
 
503
810
  except Exception as e:
504
811
  node_logger.error('Error with external process')
@@ -522,11 +829,15 @@ class FlowNode:
522
829
  self._fetch_cached_df = None
523
830
 
524
831
  def prepare_before_run(self):
832
+ """Resets results and errors before a new execution."""
833
+
525
834
  self.results.errors = None
526
835
  self.results.resulting_data = None
527
836
  self.results.example_data = None
528
837
 
529
838
  def cancel(self):
839
+ """Cancels an ongoing external process if one is running."""
840
+
530
841
  if self._fetch_cached_df is not None:
531
842
  self._fetch_cached_df.cancel()
532
843
  self.node_stats.is_canceled = True
@@ -536,15 +847,29 @@ class FlowNode:
536
847
 
537
848
  def execute_node(self, run_location: schemas.ExecutionLocationsLiteral, reset_cache: bool = False,
538
849
  performance_mode: bool = False, retry: bool = True, node_logger: NodeLogger = None):
850
+ """Orchestrates the execution, handling location, caching, and retries.
851
+
852
+ Args:
853
+ run_location: The location for execution ('local', 'remote').
854
+ reset_cache: If True, forces removal of any existing cache.
855
+ performance_mode: If True, optimizes for speed over diagnostics.
856
+ retry: If True, allows retrying execution on recoverable errors.
857
+ node_logger: The logger for this node execution.
858
+
859
+ Raises:
860
+ Exception: If the node_logger is not defined.
861
+ """
539
862
  if node_logger is None:
540
863
  raise Exception('Flow logger is not defined')
541
864
  # node_logger = flow_logger.get_node_logger(self.node_id)
542
865
  if reset_cache:
543
866
  self.remove_cache()
544
- self.node_stats.has_run = False
867
+ self.node_stats.has_run_with_current_setup = False
868
+ self.node_stats.has_completed_last_run = False
545
869
  if self.is_setup:
546
870
  node_logger.info(f'Starting to run {self.__name__}')
547
- if self.needs_run(performance_mode, node_logger, run_location):
871
+ if (self.needs_run(performance_mode, node_logger, run_location) or self.node_template.node_group == "output"
872
+ and not (run_location == 'local' or SINGLE_FILE_MODE)):
548
873
  self.prepare_before_run()
549
874
  try:
550
875
  if ((run_location == 'remote' or (self.node_default.transform_type == 'wide')
@@ -572,16 +897,30 @@ class FlowNode:
572
897
  performance_mode=performance_mode, retry=False,
573
898
  node_logger=node_logger)
574
899
  else:
575
- self.node_stats.has_run = False
576
900
  self.results.errors = str(e)
577
901
  node_logger.error(f'Error with running the node: {e}')
578
-
902
+ elif ((run_location == 'local' or SINGLE_FILE_MODE) and (not self.node_stats.has_run_with_current_setup
903
+ or self.node_template.node_group == "output")):
904
+ try:
905
+ node_logger.info('Executing fully locally')
906
+ self.execute_full_local(performance_mode)
907
+ except Exception as e:
908
+ self.results.errors = str(e)
909
+ node_logger.error(f'Error with running the node: {e}')
910
+ self.node_stats.error = str(e)
911
+ self.node_stats.has_completed_last_run = False
912
+ self.node_stats.has_run_with_current_setup = True
579
913
  else:
580
914
  node_logger.info('Node has already run, not running the node')
581
915
  else:
582
916
  node_logger.warning(f'Node {self.__name__} is not setup, cannot run the node')
583
917
 
584
918
  def store_example_data_generator(self, external_df_fetcher: ExternalDfFetcher | ExternalSampler):
919
+ """Stores a generator function for fetching a sample of the result data.
920
+
921
+ Args:
922
+ external_df_fetcher: The process that generated the sample data.
923
+ """
585
924
  if external_df_fetcher.status is not None:
586
925
  file_ref = external_df_fetcher.status.file_ref
587
926
  self.results.example_data_path = file_ref
@@ -590,23 +929,48 @@ class FlowNode:
590
929
  logger.error('Could not get the sample data, the external process is not ready')
591
930
 
592
931
  def needs_reset(self) -> bool:
932
+ """Checks if the node's hash has changed, indicating an outdated state.
933
+
934
+ Returns:
935
+ True if the calculated hash differs from the stored hash.
936
+ """
593
937
  return self._hash != self.calculate_hash(self.setting_input)
594
938
 
595
939
  def reset(self, deep: bool = False):
940
+ """Resets the node's execution state and schema information.
941
+
942
+ This also triggers a reset on all downstream nodes.
943
+
944
+ Args:
945
+ deep: If True, forces a reset even if the hash hasn't changed.
946
+ """
596
947
  needs_reset = self.needs_reset() or deep
597
948
  if needs_reset:
598
949
  logger.info(f'{self.node_id}: Node needs reset')
599
- self.node_stats.has_run = False
950
+ self.node_stats.has_run_with_current_setup = False
600
951
  self.results.reset()
601
- if self.schema_callback:
602
- self.schema_callback.reset()
952
+ if self.is_correct:
953
+ self._schema_callback = None # Ensure the schema callback is reset
954
+ if self.schema_callback:
955
+ logger.info(f'{self.node_id}: Resetting the schema callback')
956
+ self.schema_callback.start()
603
957
  self.node_schema.result_schema = None
604
958
  self.node_schema.predicted_schema = None
605
959
  self._hash = None
606
960
  self.node_information.is_setup = None
961
+ self.results.errors = None
607
962
  self.evaluate_nodes()
963
+ _ = self.hash # Recalculate the hash after reset
608
964
 
609
965
  def delete_lead_to_node(self, node_id: int) -> bool:
966
+ """Removes a connection to a specific downstream node.
967
+
968
+ Args:
969
+ node_id: The ID of the downstream node to disconnect.
970
+
971
+ Returns:
972
+ True if the connection was found and removed, False otherwise.
973
+ """
610
974
  logger.info(f'Deleting lead to node: {node_id}')
611
975
  for i, lead_to_node in enumerate(self.leads_to_nodes):
612
976
  logger.info(f'Checking lead to node: {lead_to_node.node_id}')
@@ -618,7 +982,16 @@ class FlowNode:
618
982
 
619
983
  def delete_input_node(self, node_id: int, connection_type: input_schema.InputConnectionClass = 'input-0',
620
984
  complete: bool = False) -> bool:
621
- # connection type must be in right, left or main
985
+ """Removes a connection from a specific input node.
986
+
987
+ Args:
988
+ node_id: The ID of the input node to disconnect.
989
+ connection_type: The specific input handle (e.g., 'input-0', 'input-1').
990
+ complete: If True, tries to delete from all input types.
991
+
992
+ Returns:
993
+ True if a connection was found and removed, False otherwise.
994
+ """
622
995
  deleted: bool = False
623
996
  if connection_type == 'input-0':
624
997
  for i, node in enumerate(self.node_inputs.main_inputs):
@@ -641,17 +1014,32 @@ class FlowNode:
641
1014
  self.reset()
642
1015
  return deleted
643
1016
 
644
- def __repr__(self):
1017
+ def __repr__(self) -> str:
1018
+ """Provides a string representation of the FlowNode instance.
1019
+
1020
+ Returns:
1021
+ A string showing the node's ID and type.
1022
+ """
645
1023
  return f"Node id: {self.node_id} ({self.node_type})"
646
1024
 
647
- def _get_readable_schema(self):
1025
+ def _get_readable_schema(self) -> List[dict] | None:
1026
+ """Helper to get a simplified, dictionary representation of the output schema.
1027
+
1028
+ Returns:
1029
+ A list of dictionaries, each with 'column_name' and 'data_type'.
1030
+ """
648
1031
  if self.is_setup:
649
1032
  output = []
650
1033
  for s in self.schema:
651
1034
  output.append(dict(column_name=s.column_name, data_type=s.data_type))
652
1035
  return output
653
1036
 
654
- def get_repr(self):
1037
+ def get_repr(self) -> dict:
1038
+ """Gets a detailed dictionary representation of the node's state.
1039
+
1040
+ Returns:
1041
+ A dictionary containing key information about the node.
1042
+ """
655
1043
  return dict(FlowNode=
656
1044
  dict(node_id=self.node_id,
657
1045
  step_name=self.__name__,
@@ -659,33 +1047,70 @@ class FlowNode:
659
1047
  output_schema=self._get_readable_schema()))
660
1048
 
661
1049
  @property
662
- def number_of_leads_to_nodes(self) -> int:
1050
+ def number_of_leads_to_nodes(self) -> int | None:
1051
+ """Counts the number of downstream node connections.
1052
+
1053
+ Returns:
1054
+ The number of nodes this node leads to.
1055
+ """
663
1056
  if self.is_setup:
664
1057
  return len(self.leads_to_nodes)
665
1058
 
666
1059
  @property
667
1060
  def has_next_step(self) -> bool:
1061
+ """Checks if this node has any downstream connections.
1062
+
1063
+ Returns:
1064
+ True if it has at least one downstream node.
1065
+ """
668
1066
  return len(self.leads_to_nodes) > 0
669
1067
 
670
1068
  @property
671
1069
  def has_input(self) -> bool:
1070
+ """Checks if this node has any input connections.
1071
+
1072
+ Returns:
1073
+ True if it has at least one input node.
1074
+ """
672
1075
  return len(self.all_inputs) > 0
673
1076
 
674
1077
  @property
675
1078
  def singular_input(self) -> bool:
1079
+ """Checks if the node template specifies exactly one input.
1080
+
1081
+ Returns:
1082
+ True if the node is a single-input type.
1083
+ """
676
1084
  return self.node_template.input == 1
677
1085
 
678
1086
  @property
679
1087
  def singular_main_input(self) -> "FlowNode":
1088
+ """Gets the input node, assuming it is a single-input type.
1089
+
1090
+ Returns:
1091
+ The single input FlowNode, or None.
1092
+ """
680
1093
  if self.singular_input:
681
1094
  return self.all_inputs[0]
682
1095
 
683
1096
  def get_table_example(self, include_data: bool = False) -> TableExample | None:
1097
+ """Generates a `TableExample` model summarizing the node's output.
1098
+
1099
+ This can optionally include a sample of the data.
1100
+
1101
+ Args:
1102
+ include_data: If True, includes a data sample in the result.
1103
+
1104
+ Returns:
1105
+ A `TableExample` object, or None if the node is not set up.
1106
+ """
684
1107
  self.print('Getting a table example')
685
- if self.node_type == 'output':
686
- self.print('getting the table example')
687
- return self.main_input[0].get_table_example(include_data)
688
- if self.is_setup and include_data:
1108
+ if self.is_setup and include_data and self.node_stats.has_completed_last_run:
1109
+
1110
+ if self.node_template.node_group == 'output':
1111
+ self.print('getting the table example')
1112
+ return self.main_input[0].get_table_example(include_data)
1113
+
689
1114
  logger.info('getting the table example since the node has run')
690
1115
  example_data_getter = self.results.example_data_generator
691
1116
  if example_data_getter is not None:
@@ -714,13 +1139,19 @@ class FlowNode:
714
1139
  table_schema=schema, columns=columns,
715
1140
  data=[])
716
1141
 
717
- def calculate_settings_out_select(self):
718
- pass
719
-
720
1142
  def get_node_data(self, flow_id: int, include_example: bool = False) -> NodeData:
1143
+ """Gathers all necessary data for representing the node in the UI.
1144
+
1145
+ Args:
1146
+ flow_id: The ID of the parent flow.
1147
+ include_example: If True, includes data samples.
1148
+
1149
+ Returns:
1150
+ A `NodeData` object.
1151
+ """
721
1152
  node = NodeData(flow_id=flow_id,
722
1153
  node_id=self.node_id,
723
- has_run=self.node_stats.has_run,
1154
+ has_run=self.node_stats.has_run_with_current_setup,
724
1155
  setting_input=self.setting_input,
725
1156
  flow_type=self.node_type)
726
1157
  if self.main_input:
@@ -737,15 +1168,30 @@ class FlowNode:
737
1168
  return node
738
1169
 
739
1170
  def get_output_data(self) -> TableExample:
1171
+ """Gets the full output data sample for this node.
1172
+
1173
+ Returns:
1174
+ A `TableExample` object with data.
1175
+ """
740
1176
  return self.get_table_example(True)
741
1177
 
742
1178
  def get_node_input(self) -> schemas.NodeInput:
1179
+ """Creates a `NodeInput` schema object for representing this node in the UI.
1180
+
1181
+ Returns:
1182
+ A `NodeInput` object.
1183
+ """
743
1184
  return schemas.NodeInput(pos_y=self.setting_input.pos_y,
744
1185
  pos_x=self.setting_input.pos_x,
745
1186
  id=self.node_id,
746
1187
  **self.node_template.__dict__)
747
1188
 
748
1189
  def get_edge_input(self) -> List[schemas.NodeEdge]:
1190
+ """Generates `NodeEdge` objects for all input connections to this node.
1191
+
1192
+ Returns:
1193
+ A list of `NodeEdge` objects.
1194
+ """
749
1195
  edges = []
750
1196
  if self.node_inputs.main_inputs is not None:
751
1197
  for i, main_input in enumerate(self.node_inputs.main_inputs):