Flowfile 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (98) hide show
  1. flowfile/__init__.py +27 -6
  2. flowfile/api.py +1 -0
  3. flowfile/web/__init__.py +2 -2
  4. flowfile/web/static/assets/{CloudConnectionManager-d004942f.js → CloudConnectionManager-c20a740f.js} +3 -4
  5. flowfile/web/static/assets/{CloudStorageReader-eccf9fc2.js → CloudStorageReader-960b400a.js} +7 -7
  6. flowfile/web/static/assets/{CloudStorageWriter-b1ba6bba.js → CloudStorageWriter-e3decbdd.js} +7 -7
  7. flowfile/web/static/assets/{CrossJoin-68981877.js → CrossJoin-d67e2405.js} +8 -8
  8. flowfile/web/static/assets/{DatabaseConnectionSettings-0b06649c.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
  9. flowfile/web/static/assets/{DatabaseManager-8349a426.js → DatabaseManager-9ea35e84.js} +2 -2
  10. flowfile/web/static/assets/{DatabaseReader-905344f8.js → DatabaseReader-9578bfa5.js} +9 -9
  11. flowfile/web/static/assets/{DatabaseWriter-9f5b8638.js → DatabaseWriter-19531098.js} +9 -9
  12. flowfile/web/static/assets/{ExploreData-131a6d53.js → ExploreData-40476474.js} +47141 -43697
  13. flowfile/web/static/assets/{ExternalSource-e3549dcc.js → ExternalSource-2297ef96.js} +6 -6
  14. flowfile/web/static/assets/{Filter-6e0730ae.js → Filter-f211c03a.js} +8 -8
  15. flowfile/web/static/assets/{Formula-02f033e6.js → Formula-4207ea31.js} +8 -8
  16. flowfile/web/static/assets/{FuzzyMatch-54c14036.js → FuzzyMatch-bf120df0.js} +9 -9
  17. flowfile/web/static/assets/{GraphSolver-08a3f499.js → GraphSolver-5bb7497a.js} +5 -5
  18. flowfile/web/static/assets/{GroupBy-2ae38139.js → GroupBy-92c81b65.js} +6 -6
  19. flowfile/web/static/assets/{Join-493b9772.js → Join-4e49a274.js} +9 -9
  20. flowfile/web/static/assets/{ManualInput-4373d163.js → ManualInput-90998ae8.js} +5 -5
  21. flowfile/web/static/assets/{Output-b534f3c7.js → Output-81e3e917.js} +4 -4
  22. flowfile/web/static/assets/{Pivot-2968ff65.js → Pivot-a3419842.js} +6 -6
  23. flowfile/web/static/assets/{PolarsCode-65136536.js → PolarsCode-72710deb.js} +6 -6
  24. flowfile/web/static/assets/{Read-c56339ed.js → Read-c4059daf.js} +6 -6
  25. flowfile/web/static/assets/{RecordCount-1c641a5e.js → RecordCount-c2b5e095.js} +5 -5
  26. flowfile/web/static/assets/{RecordId-df308b8f.js → RecordId-10baf191.js} +6 -6
  27. flowfile/web/static/assets/{Sample-293e8a64.js → Sample-3ed9a0ae.js} +5 -5
  28. flowfile/web/static/assets/{SecretManager-03911655.js → SecretManager-0d49c0e8.js} +2 -2
  29. flowfile/web/static/assets/{Select-3058a13d.js → Select-8a02a0b3.js} +8 -8
  30. flowfile/web/static/assets/{SettingsSection-fbf4fb39.js → SettingsSection-4c0f45f5.js} +1 -1
  31. flowfile/web/static/assets/{Sort-a29bbaf7.js → Sort-f55c9f9d.js} +6 -6
  32. flowfile/web/static/assets/{TextToRows-c7d7760e.js → TextToRows-5dbc2145.js} +8 -8
  33. flowfile/web/static/assets/{UnavailableFields-118f1d20.js → UnavailableFields-a1768e52.js} +2 -2
  34. flowfile/web/static/assets/{Union-f0589571.js → Union-f2aefdc9.js} +5 -5
  35. flowfile/web/static/assets/{Unique-7329a207.js → Unique-46b250da.js} +8 -8
  36. flowfile/web/static/assets/{Unpivot-30b0be15.js → Unpivot-25ac84cc.js} +5 -5
  37. flowfile/web/static/assets/{api-fb67319c.js → api-6ef0dcef.js} +1 -1
  38. flowfile/web/static/assets/{api-602fb95c.js → api-a0abbdc7.js} +1 -1
  39. flowfile/web/static/assets/{designer-94a6bf4d.js → designer-13eabd83.js} +4 -4
  40. flowfile/web/static/assets/{documentation-a224831e.js → documentation-b87e7f6f.js} +1 -1
  41. flowfile/web/static/assets/{dropDown-c2d2aa97.js → dropDown-13564764.js} +1 -1
  42. flowfile/web/static/assets/{fullEditor-921ac5fd.js → fullEditor-fd2cd6f9.js} +2 -2
  43. flowfile/web/static/assets/{genericNodeSettings-7013cc94.js → genericNodeSettings-71e11604.js} +3 -3
  44. flowfile/web/static/assets/{index-3a75211d.js → index-f6c15e76.js} +46 -22
  45. flowfile/web/static/assets/{nodeTitle-a63d4680.js → nodeTitle-988d9efe.js} +3 -3
  46. flowfile/web/static/assets/{secretApi-763aec6e.js → secretApi-dd636aa2.js} +1 -1
  47. flowfile/web/static/assets/{selectDynamic-08464729.js → selectDynamic-af36165e.js} +3 -3
  48. flowfile/web/static/assets/{vue-codemirror.esm-f15a5f87.js → vue-codemirror.esm-2847001e.js} +1 -1
  49. flowfile/web/static/assets/{vue-content-loader.es-93bd09d7.js → vue-content-loader.es-0371da73.js} +1 -1
  50. flowfile/web/static/index.html +1 -1
  51. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/METADATA +2 -2
  52. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/RECORD +96 -94
  53. flowfile_core/__init__.py +1 -0
  54. flowfile_core/auth/jwt.py +39 -0
  55. flowfile_core/configs/node_store/nodes.py +1 -0
  56. flowfile_core/configs/settings.py +6 -5
  57. flowfile_core/flowfile/code_generator/code_generator.py +71 -0
  58. flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +1 -1
  59. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +597 -309
  60. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
  61. flowfile_core/flowfile/flow_graph.py +619 -191
  62. flowfile_core/flowfile/flow_graph_utils.py +2 -2
  63. flowfile_core/flowfile/flow_node/flow_node.py +500 -89
  64. flowfile_core/flowfile/flow_node/models.py +125 -20
  65. flowfile_core/flowfile/handler.py +2 -33
  66. flowfile_core/flowfile/manage/open_flowfile.py +1 -2
  67. flowfile_core/flowfile/util/calculate_layout.py +0 -2
  68. flowfile_core/flowfile/utils.py +36 -5
  69. flowfile_core/main.py +32 -13
  70. flowfile_core/routes/cloud_connections.py +7 -11
  71. flowfile_core/routes/logs.py +2 -6
  72. flowfile_core/routes/public.py +1 -0
  73. flowfile_core/routes/routes.py +127 -51
  74. flowfile_core/routes/secrets.py +72 -14
  75. flowfile_core/schemas/__init__.py +8 -0
  76. flowfile_core/schemas/input_schema.py +92 -64
  77. flowfile_core/schemas/output_model.py +19 -3
  78. flowfile_core/schemas/schemas.py +144 -11
  79. flowfile_core/schemas/transform_schema.py +82 -17
  80. flowfile_frame/__init__.py +9 -1
  81. flowfile_frame/cloud_storage/__init__.py +0 -0
  82. flowfile_frame/cloud_storage/frame_helpers.py +39 -0
  83. flowfile_frame/cloud_storage/secret_manager.py +73 -0
  84. flowfile_frame/expr.py +28 -1
  85. flowfile_frame/expr.pyi +76 -61
  86. flowfile_frame/flow_frame.py +232 -110
  87. flowfile_frame/flow_frame.pyi +140 -91
  88. flowfile_frame/flow_frame_methods.py +150 -12
  89. flowfile_frame/group_frame.py +3 -0
  90. flowfile_frame/utils.py +25 -3
  91. test_utils/s3/data_generator.py +1 -0
  92. test_utils/s3/demo_data_generator.py +186 -0
  93. test_utils/s3/fixtures.py +6 -1
  94. flowfile_core/schemas/defaults.py +0 -9
  95. flowfile_core/schemas/models.py +0 -193
  96. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
  97. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
  98. {flowfile-0.3.6.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,3 @@
1
-
2
1
  from typing import List, Union, Callable, Any, Optional, Generator, Literal
3
2
  from flowfile_core.configs import logger
4
3
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
@@ -6,6 +5,7 @@ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEng
6
5
  from flowfile_core.utils.arrow_reader import get_read_top_n
7
6
  from flowfile_core.schemas import input_schema, schemas
8
7
  from flowfile_core.configs.flow_logger import NodeLogger
8
+ from flowfile_core.configs.settings import SINGLE_FILE_MODE
9
9
 
10
10
  from flowfile_core.schemas.output_model import TableExample, FileColumn, NodeData
11
11
  from flowfile_core.flowfile.utils import get_hash
@@ -13,13 +13,19 @@ from flowfile_core.configs.node_store import nodes as node_interface
13
13
  from flowfile_core.flowfile.setting_generator import setting_generator, setting_updator
14
14
  from time import sleep
15
15
  from flowfile_core.flowfile.flow_data_engine.subprocess_operations import (
16
- ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result, ExternalDatabaseFetcher, ExternalDatabaseWriter, ExternalCloudWriter)
16
+ ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result,
17
+ ExternalDatabaseFetcher, ExternalDatabaseWriter, ExternalCloudWriter)
17
18
  from flowfile_core.flowfile.flow_node.models import (NodeStepSettings, NodeStepInputs, NodeSchemaInformation,
18
19
  NodeStepStats, NodeResults)
19
20
  from flowfile_core.flowfile.flow_node.schema_callback import SingleExecutionFuture
20
21
 
21
22
 
22
23
  class FlowNode:
24
+ """Represents a single node in a data flow graph.
25
+
26
+ This class manages the node's state, its data processing function,
27
+ and its connections to other nodes within the graph.
28
+ """
23
29
  parent_uuid: str
24
30
  node_type: str
25
31
  node_template: node_interface.NodeTemplate
@@ -35,12 +41,62 @@ class FlowNode:
35
41
  _setting_input: Any = None
36
42
  _hash: Optional[str] = None # host this for caching results
37
43
  _function: Callable = None # the function that needs to be executed when triggered
44
+ _name: str = None # name of the node, used for display
38
45
  _schema_callback: Optional[SingleExecutionFuture] = None # Function that calculates the schema without executing
39
46
  _state_needs_reset: bool = False
40
47
  _fetch_cached_df: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter | ExternalCloudWriter] = None
41
48
  _cache_progress: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter | ExternalCloudWriter] = None
42
49
 
50
+ def __init__(self, node_id: Union[str, int], function: Callable,
51
+ parent_uuid: str,
52
+ setting_input: Any,
53
+ name: str,
54
+ node_type: str,
55
+ input_columns: List[str] = None,
56
+ output_schema: List[FlowfileColumn] = None,
57
+ drop_columns: List[str] = None,
58
+ renew_schema: bool = True,
59
+ pos_x: float = 0,
60
+ pos_y: float = 0,
61
+ schema_callback: Callable = None,
62
+ ):
63
+ """Initializes a FlowNode instance.
64
+
65
+ Args:
66
+ node_id: Unique identifier for the node.
67
+ function: The core data processing function for the node.
68
+ parent_uuid: The UUID of the parent flow.
69
+ setting_input: The configuration/settings object for the node.
70
+ name: The name of the node.
71
+ node_type: The type identifier of the node (e.g., 'join', 'filter').
72
+ input_columns: List of column names expected as input.
73
+ output_schema: The schema of the columns to be added.
74
+ drop_columns: List of column names to be dropped.
75
+ renew_schema: Flag to indicate if the schema should be renewed.
76
+ pos_x: The x-coordinate on the canvas.
77
+ pos_y: The y-coordinate on the canvas.
78
+ schema_callback: A custom function to calculate the output schema.
79
+ """
80
+ self._name = None
81
+ self.parent_uuid = parent_uuid
82
+ self.post_init()
83
+ self.active = True
84
+ self.node_information.id = node_id
85
+ self.node_type = node_type
86
+ self.node_settings.renew_schema = renew_schema
87
+ self.update_node(function=function,
88
+ input_columns=input_columns,
89
+ output_schema=output_schema,
90
+ drop_columns=drop_columns,
91
+ setting_input=setting_input,
92
+ name=name,
93
+ pos_x=pos_x,
94
+ pos_y=pos_y,
95
+ schema_callback=schema_callback,
96
+ )
97
+
43
98
  def post_init(self):
99
+ """Initializes or resets the node's attributes to their default states."""
44
100
  self.node_inputs = NodeStepInputs()
45
101
  self.node_stats = NodeStepStats()
46
102
  self.node_settings = NodeStepSettings()
@@ -54,19 +110,32 @@ class FlowNode:
54
110
  self._state_needs_reset = False
55
111
 
56
112
  @property
57
- def state_needs_reset(self):
113
+ def state_needs_reset(self) -> bool:
114
+ """Checks if the node's state needs to be reset.
115
+
116
+ Returns:
117
+ True if a reset is required, False otherwise.
118
+ """
58
119
  return self._state_needs_reset
59
120
 
60
121
  @state_needs_reset.setter
61
122
  def state_needs_reset(self, v: bool):
123
+ """Sets the flag indicating that the node's state needs to be reset.
124
+
125
+ Args:
126
+ v: The boolean value to set.
127
+ """
62
128
  self._state_needs_reset = v
63
129
 
64
130
  @staticmethod
65
131
  def create_schema_callback_from_function(f: Callable) -> Callable[[], List[FlowfileColumn]]:
66
- """
67
- Create a schema callback from a function.
68
- :param f: Function that returns the schema
69
- :return: Callable that returns the schema
132
+ """Wraps a node's function to create a schema callback that extracts the schema.
133
+
134
+ Args:
135
+ f: The node's core function that returns a FlowDataEngine instance.
136
+
137
+ Returns:
138
+ A callable that, when executed, returns the output schema.
70
139
  """
71
140
  def schema_callback() -> List[FlowfileColumn]:
72
141
  try:
@@ -79,6 +148,13 @@ class FlowNode:
79
148
 
80
149
  @property
81
150
  def schema_callback(self) -> SingleExecutionFuture:
151
+ """Gets the schema callback function, creating one if it doesn't exist.
152
+
153
+ The callback is used for predicting the output schema without full execution.
154
+
155
+ Returns:
156
+ A SingleExecutionFuture instance wrapping the schema function.
157
+ """
82
158
  if self._schema_callback is None:
83
159
  if self.user_provided_schema_callback is not None:
84
160
  self.schema_callback = self.user_provided_schema_callback
@@ -88,6 +164,11 @@ class FlowNode:
88
164
 
89
165
  @schema_callback.setter
90
166
  def schema_callback(self, f: Callable):
167
+ """Sets the schema callback function for the node.
168
+
169
+ Args:
170
+ f: The function to be used for schema calculation.
171
+ """
91
172
  if f is None:
92
173
  return
93
174
 
@@ -101,9 +182,24 @@ class FlowNode:
101
182
 
102
183
  @property
103
184
  def is_start(self) -> bool:
185
+ """Determines if the node is a starting node in the flow.
186
+
187
+ A starting node requires no inputs.
188
+
189
+ Returns:
190
+ True if the node is a start node, False otherwise.
191
+ """
104
192
  return not self.has_input and self.node_template.input == 0
105
193
 
106
194
  def get_input_type(self, node_id: int) -> List:
195
+ """Gets the type of connection ('main', 'left', 'right') for a given input node ID.
196
+
197
+ Args:
198
+ node_id: The ID of the input node.
199
+
200
+ Returns:
201
+ A list of connection types for that node ID.
202
+ """
107
203
  relation_type = []
108
204
  if node_id in [n.node_id for n in self.node_inputs.main_inputs]:
109
205
  relation_type.append('main')
@@ -113,36 +209,6 @@ class FlowNode:
113
209
  relation_type.append('right')
114
210
  return list(set(relation_type))
115
211
 
116
- def __init__(self, node_id: Union[str, int], function: Callable,
117
- parent_uuid: str,
118
- setting_input: Any,
119
- name: str,
120
- node_type: str,
121
- input_columns: List[str] = None,
122
- output_schema: List[FlowfileColumn] = None,
123
- drop_columns: List[str] = None,
124
- renew_schema: bool = True,
125
- pos_x: float = 0,
126
- pos_y: float = 0,
127
- schema_callback: Callable = None,
128
- ):
129
- self.parent_uuid = parent_uuid
130
- self.post_init()
131
- self.active = True
132
- self.node_information.id = node_id
133
- self.node_type = node_type
134
- self.node_settings.renew_schema = renew_schema
135
- self.update_node(function=function,
136
- input_columns=input_columns,
137
- output_schema=output_schema,
138
- drop_columns=drop_columns,
139
- setting_input=setting_input,
140
- name=name,
141
- pos_x=pos_x,
142
- pos_y=pos_y,
143
- schema_callback=schema_callback,
144
- )
145
-
146
212
  def update_node(self,
147
213
  function: Callable,
148
214
  input_columns: List[str] = None,
@@ -154,9 +220,24 @@ class FlowNode:
154
220
  pos_y: float = 0,
155
221
  schema_callback: Callable = None,
156
222
  ):
223
+ """Updates the properties of the node.
224
+
225
+ This is called during initialization and when settings are changed.
226
+
227
+ Args:
228
+ function: The new core data processing function.
229
+ input_columns: The new list of input columns.
230
+ output_schema: The new schema of added columns.
231
+ drop_columns: The new list of dropped columns.
232
+ name: The new name for the node.
233
+ setting_input: The new settings object.
234
+ pos_x: The new x-coordinate.
235
+ pos_y: The new y-coordinate.
236
+ schema_callback: The new custom schema callback function.
237
+ """
157
238
  self.user_provided_schema_callback = schema_callback
158
- self.node_information.y_position = pos_y
159
- self.node_information.x_position = pos_x
239
+ self.node_information.y_position = int(pos_y)
240
+ self.node_information.x_position = int(pos_x)
160
241
  self.node_information.setting_input = setting_input
161
242
  self.name = self.node_type if name is None else name
162
243
  self._function = function
@@ -178,20 +259,40 @@ class FlowNode:
178
259
  self.setting_input = setting_input # wait until the end so that the hash is calculated correctly
179
260
 
180
261
  @property
181
- def name(self):
262
+ def name(self) -> str:
263
+ """Gets the name of the node.
264
+
265
+ Returns:
266
+ The node's name.
267
+ """
182
268
  return self._name
183
269
 
184
270
  @name.setter
185
271
  def name(self, name: str):
272
+ """Sets the name of the node.
273
+
274
+ Args:
275
+ name: The new name.
276
+ """
186
277
  self._name = name
187
278
  self.__name__ = name
188
279
 
189
280
  @property
190
- def setting_input(self):
281
+ def setting_input(self) -> Any:
282
+ """Gets the node's specific configuration settings.
283
+
284
+ Returns:
285
+ The settings object.
286
+ """
191
287
  return self._setting_input
192
288
 
193
289
  @setting_input.setter
194
290
  def setting_input(self, setting_input: Any):
291
+ """Sets the node's configuration and triggers a reset if necessary.
292
+
293
+ Args:
294
+ setting_input: The new settings object.
295
+ """
195
296
  is_manual_input = (self.node_type == 'manual_input' and
196
297
  isinstance(setting_input, input_schema.NodeManualInput) and
197
298
  isinstance(self._setting_input, input_schema.NodeManualInput)
@@ -209,24 +310,48 @@ class FlowNode:
209
310
  self.reset()
210
311
 
211
312
  @property
212
- def node_id(self):
313
+ def node_id(self) -> Union[str, int]:
314
+ """Gets the unique identifier of the node.
315
+
316
+ Returns:
317
+ The node's ID.
318
+ """
213
319
  return self.node_information.id
214
320
 
215
321
  @property
216
- def left_input(self):
322
+ def left_input(self) -> Optional["FlowNode"]:
323
+ """Gets the node connected to the left input port.
324
+
325
+ Returns:
326
+ The left input FlowNode, or None.
327
+ """
217
328
  return self.node_inputs.left_input
218
329
 
219
330
  @property
220
- def right_input(self):
331
+ def right_input(self) -> Optional["FlowNode"]:
332
+ """Gets the node connected to the right input port.
333
+
334
+ Returns:
335
+ The right input FlowNode, or None.
336
+ """
221
337
  return self.node_inputs.right_input
222
338
 
223
339
  @property
224
340
  def main_input(self) -> List["FlowNode"]:
341
+ """Gets the list of nodes connected to the main input port(s).
342
+
343
+ Returns:
344
+ A list of main input FlowNodes.
345
+ """
225
346
  return self.node_inputs.main_inputs
226
347
 
227
348
  @property
228
- def is_correct(self):
229
- # Check if inputs meet requirements
349
+ def is_correct(self) -> bool:
350
+ """Checks if the node's input connections satisfy its template requirements.
351
+
352
+ Returns:
353
+ True if connections are valid, False otherwise.
354
+ """
230
355
  if isinstance(self.setting_input, input_schema.NodePromise):
231
356
  return False
232
357
  return (self.node_template.input == len(self.node_inputs.get_all_inputs()) or
@@ -234,6 +359,10 @@ class FlowNode:
234
359
  (self.node_template.multi and self.node_template.can_be_start))
235
360
 
236
361
  def set_node_information(self):
362
+ """Populates the `node_information` attribute with the current state.
363
+
364
+ This includes the node's connections, settings, and position.
365
+ """
237
366
  logger.info('setting node information')
238
367
  node_information = self.node_information
239
368
  node_information.left_input_id = self.node_inputs.left_input.node_id if self.left_input else None
@@ -248,43 +377,76 @@ class FlowNode:
248
377
  node_information.type = self.node_type
249
378
 
250
379
  def get_node_information(self) -> schemas.NodeInformation:
380
+ """Updates and returns the node's information object.
381
+
382
+ Returns:
383
+ The `NodeInformation` object for this node.
384
+ """
251
385
  self.set_node_information()
252
386
  return self.node_information
253
387
 
254
388
  @property
255
- def function(self):
389
+ def function(self) -> Callable:
390
+ """Gets the core processing function of the node.
391
+
392
+ Returns:
393
+ The callable function.
394
+ """
256
395
  return self._function
257
396
 
258
- def reset_hash(self) -> bool:
259
- old_hash = self._hash
260
- self._hash = None
261
- if self.hash != old_hash:
262
- if self.node_settings.cache_results:
263
- self.remove_cache()
264
- return True
265
- return False
397
+ @function.setter
398
+ def function(self, function: Callable):
399
+ """Sets the core processing function of the node.
400
+
401
+ Args:
402
+ function: The new callable function.
403
+ """
404
+ self._function = function
266
405
 
267
406
  @property
268
407
  def all_inputs(self) -> List["FlowNode"]:
408
+ """Gets a list of all nodes connected to any input port.
409
+
410
+ Returns:
411
+ A list of all input FlowNodes.
412
+ """
269
413
  return self.node_inputs.get_all_inputs()
270
414
 
271
- def calculate_hash(self, setting_input: Any):
415
+ def calculate_hash(self, setting_input: Any) -> str:
416
+ """Calculates a hash based on settings and input node hashes.
417
+
418
+ Args:
419
+ setting_input: The node's settings object to be included in the hash.
420
+
421
+ Returns:
422
+ A string hash value.
423
+ """
272
424
  depends_on_hashes = [_node.hash for _node in self.all_inputs]
273
425
  node_data_hash = get_hash(setting_input)
274
426
  return get_hash(depends_on_hashes + [node_data_hash, self.parent_uuid])
275
427
 
276
428
  @property
277
- def hash(self):
429
+ def hash(self) -> str:
430
+ """Gets the cached hash for the node, calculating it if it doesn't exist.
431
+
432
+ Returns:
433
+ The string hash value.
434
+ """
278
435
  if not self._hash:
279
436
  self._hash = self.calculate_hash(self.setting_input)
280
437
  return self._hash
281
438
 
282
- @function.setter
283
- def function(self, function: Callable):
284
- self._function = function
285
- # self.reset()
439
+ def add_node_connection(self, from_node: "FlowNode",
440
+ insert_type: Literal['main', 'left', 'right'] = 'main') -> None:
441
+ """Adds a connection from a source node to this node.
286
442
 
287
- def add_node_connection(self, from_node: "FlowNode", insert_type: Literal['main', 'left', 'right'] = 'main'):
443
+ Args:
444
+ from_node: The node to connect from.
445
+ insert_type: The type of input to connect to ('main', 'left', 'right').
446
+
447
+ Raises:
448
+ Exception: If the insert_type is invalid.
449
+ """
288
450
  from_node.leads_to_nodes.append(self)
289
451
  if insert_type == 'main':
290
452
  if self.node_template.input <= 2 or self.node_inputs.main_inputs is None:
@@ -303,20 +465,39 @@ class FlowNode:
303
465
  self.reset()
304
466
  from_node.reset()
305
467
 
306
- def evaluate_nodes(self, deep: bool = False):
468
+ def evaluate_nodes(self, deep: bool = False) -> None:
469
+ """Triggers a state reset for all directly connected downstream nodes.
470
+
471
+ Args:
472
+ deep: If True, the reset propagates recursively through the entire downstream graph.
473
+ """
307
474
  for node in self.leads_to_nodes:
308
475
  self.print(f'resetting node: {node.node_id}')
309
476
  node.reset(deep)
310
477
 
311
- def get_flow_file_column_schema(self, col_name: str) -> FlowfileColumn:
478
+ def get_flow_file_column_schema(self, col_name: str) -> FlowfileColumn | None:
479
+ """Retrieves the schema for a specific column from the output schema.
480
+
481
+ Args:
482
+ col_name: The name of the column.
483
+
484
+ Returns:
485
+ The FlowfileColumn object for that column, or None if not found.
486
+ """
312
487
  for s in self.schema:
313
488
  if s.column_name == col_name:
314
489
  return s
315
490
 
316
- def get_predicted_schema(self, force: bool = False):
317
- """
318
- Method to get a predicted schema based on the columns that are dropped and added
319
- :return:
491
+ def get_predicted_schema(self, force: bool = False) -> List[FlowfileColumn] | None:
492
+ """Predicts the output schema of the node without full execution.
493
+
494
+ It uses the schema_callback or infers from predicted data.
495
+
496
+ Args:
497
+ force: If True, forces recalculation even if a predicted schema exists.
498
+
499
+ Returns:
500
+ A list of FlowfileColumn objects representing the predicted schema.
320
501
  """
321
502
  if self.node_schema.predicted_schema and not force:
322
503
  return self.node_schema.predicted_schema
@@ -338,6 +519,11 @@ class FlowNode:
338
519
 
339
520
  @property
340
521
  def is_setup(self) -> bool:
522
+ """Checks if the node has been properly configured and is ready for execution.
523
+
524
+ Returns:
525
+ True if the node is set up, False otherwise.
526
+ """
341
527
  if not self.node_information.is_setup:
342
528
  if self.function.__name__ != 'placeholder':
343
529
  self.node_information.is_setup = True
@@ -345,9 +531,24 @@ class FlowNode:
345
531
  return self.node_information.is_setup
346
532
 
347
533
  def print(self, v: Any):
534
+ """Helper method to log messages with node context.
535
+
536
+ Args:
537
+ v: The message or value to log.
538
+ """
348
539
  logger.info(f'{self.node_type}, node_id: {self.node_id}: {v}')
349
540
 
350
541
  def get_resulting_data(self) -> FlowDataEngine | None:
542
+ """Executes the node's function to produce the actual output data.
543
+
544
+ Handles both regular functions and external data sources.
545
+
546
+ Returns:
547
+ A FlowDataEngine instance containing the result, or None on error.
548
+
549
+ Raises:
550
+ Exception: Propagates exceptions from the node's function execution.
551
+ """
351
552
  if self.is_setup:
352
553
  if self.results.resulting_data is None and self.results.errors is None:
353
554
  self.print('getting resulting data')
@@ -375,6 +576,13 @@ class FlowNode:
375
576
  return self.results.resulting_data
376
577
 
377
578
  def _predicted_data_getter(self) -> FlowDataEngine | None:
579
+ """Internal helper to get a predicted data result.
580
+
581
+ This calls the function with predicted data from input nodes.
582
+
583
+ Returns:
584
+ A FlowDataEngine instance with predicted data, or an empty one on error.
585
+ """
378
586
  try:
379
587
  fl = self._function(*[v.get_predicted_resulting_data() for v in self.all_inputs])
380
588
  return fl
@@ -391,6 +599,13 @@ class FlowNode:
391
599
  logger.warning(e)
392
600
 
393
601
  def get_predicted_resulting_data(self) -> FlowDataEngine:
602
+ """Creates a `FlowDataEngine` instance based on the predicted schema.
603
+
604
+ This avoids executing the node's full logic.
605
+
606
+ Returns:
607
+ A FlowDataEngine instance with a schema but no data.
608
+ """
394
609
  if self.needs_run(False) and self.schema_callback is not None or self.node_schema.result_schema is not None:
395
610
  self.print('Getting data based on the schema')
396
611
 
@@ -404,17 +619,28 @@ class FlowNode:
404
619
  return fl
405
620
 
406
621
  def add_lead_to_in_depend_source(self):
622
+ """Ensures this node is registered in the `leads_to_nodes` list of its inputs."""
407
623
  for input_node in self.all_inputs:
408
624
  if self.node_id not in [n.node_id for n in input_node.leads_to_nodes]:
409
625
  input_node.leads_to_nodes.append(self)
410
626
 
411
627
  def get_all_dependent_nodes(self) -> Generator["FlowNode", None, None]:
628
+ """Yields all downstream nodes recursively.
629
+
630
+ Returns:
631
+ A generator of all dependent FlowNode objects.
632
+ """
412
633
  for node in self.leads_to_nodes:
413
634
  yield node
414
635
  for n in node.get_all_dependent_nodes():
415
636
  yield n
416
637
 
417
638
  def get_all_dependent_node_ids(self) -> Generator[int, None, None]:
639
+ """Yields the IDs of all downstream nodes recursively.
640
+
641
+ Returns:
642
+ A generator of all dependent node IDs.
643
+ """
418
644
  for node in self.leads_to_nodes:
419
645
  yield node.node_id
420
646
  for n in node.get_all_dependent_node_ids():
@@ -422,6 +648,13 @@ class FlowNode:
422
648
 
423
649
  @property
424
650
  def schema(self) -> List[FlowfileColumn]:
651
+ """Gets the definitive output schema of the node.
652
+
653
+ If not already run, it falls back to the predicted schema.
654
+
655
+ Returns:
656
+ A list of FlowfileColumn objects.
657
+ """
425
658
  try:
426
659
  if self.is_setup and self.results.errors is None:
427
660
  if self.node_schema.result_schema is not None and len(self.node_schema.result_schema) > 0:
@@ -434,31 +667,42 @@ class FlowNode:
434
667
  return self.node_schema.result_schema
435
668
  else:
436
669
  return []
437
- except:
670
+ except Exception as e:
671
+ logger.error(e)
438
672
  return []
439
673
 
440
- def load_from_cache(self) -> FlowDataEngine:
441
- if results_exists(self.hash):
442
- try:
443
- return FlowDataEngine(self._fetch_cached_df.get_result())
444
- except Exception as e:
445
- logger.error(e)
446
-
447
674
  def remove_cache(self):
675
+ """Removes cached results for this node.
676
+
677
+ Note: Currently not fully implemented.
678
+ """
679
+
448
680
  if results_exists(self.hash):
449
681
  logger.warning('Not implemented')
450
682
 
451
683
  def needs_run(self, performance_mode: bool, node_logger: NodeLogger = None,
452
684
  execution_location: schemas.ExecutionLocationsLiteral = "auto") -> bool:
453
- if execution_location == "local":
685
+ """Determines if the node needs to be executed.
686
+
687
+ The decision is based on its run state, caching settings, and execution mode.
688
+
689
+ Args:
690
+ performance_mode: True if the flow is in performance mode.
691
+ node_logger: The logger instance for this node.
692
+ execution_location: The target execution location.
693
+
694
+ Returns:
695
+ True if the node should be run, False otherwise.
696
+ """
697
+ if execution_location == "local" or SINGLE_FILE_MODE:
454
698
  return False
699
+
455
700
  flow_logger = logger if node_logger is None else node_logger
456
701
  cache_result_exists = results_exists(self.hash)
457
702
  if not self.node_stats.has_run_with_current_setup:
458
703
  flow_logger.info('Node has not run, needs to run')
459
704
  return True
460
705
  if self.node_settings.cache_results and cache_result_exists:
461
-
462
706
  return False
463
707
  elif self.node_settings.cache_results and not cache_result_exists:
464
708
  return True
@@ -468,9 +712,34 @@ class FlowNode:
468
712
  return True
469
713
 
470
714
  def __call__(self, *args, **kwargs):
715
+ """Makes the node instance callable, acting as an alias for execute_node."""
471
716
  self.execute_node(*args, **kwargs)
472
717
 
718
+ def execute_full_local(self, performance_mode: bool = False) -> None:
719
+ """Executes the node's logic locally, including example data generation.
720
+
721
+ Args:
722
+ performance_mode: If True, skips generating example data.
723
+
724
+ Raises:
725
+ Exception: Propagates exceptions from the execution.
726
+ """
727
+ if self.results.resulting_data is None and not performance_mode:
728
+ self.results.resulting_data = self.get_resulting_data()
729
+ self.results.example_data_generator = lambda: self.get_resulting_data().get_sample(100).to_arrow()
730
+ self.node_schema.result_schema = self.results.resulting_data.schema
731
+ self.node_stats.has_completed_last_run = True
732
+
473
733
  def execute_local(self, flow_id: int, performance_mode: bool = False):
734
+ """Executes the node's logic locally.
735
+
736
+ Args:
737
+ flow_id: The ID of the parent flow.
738
+ performance_mode: If True, skips generating example data.
739
+
740
+ Raises:
741
+ Exception: Propagates exceptions from the execution.
742
+ """
474
743
  try:
475
744
  resulting_data = self.get_resulting_data()
476
745
  if not performance_mode:
@@ -495,7 +764,15 @@ class FlowNode:
495
764
  step.node_settings.streamable = self.node_settings.streamable
496
765
 
497
766
  def execute_remote(self, performance_mode: bool = False, node_logger: NodeLogger = None):
498
- # flow_logger = logger if flow_logger is None else flow_logger
767
+ """Executes the node's logic remotely or handles cached results.
768
+
769
+ Args:
770
+ performance_mode: If True, skips generating example data.
771
+ node_logger: The logger for this node execution.
772
+
773
+ Raises:
774
+ Exception: If the node_logger is not provided or if execution fails.
775
+ """
499
776
  if node_logger is None:
500
777
  raise Exception('Node logger is not defined')
501
778
  if self.node_settings.cache_results and results_exists(self.hash):
@@ -552,11 +829,15 @@ class FlowNode:
552
829
  self._fetch_cached_df = None
553
830
 
554
831
  def prepare_before_run(self):
832
+ """Resets results and errors before a new execution."""
833
+
555
834
  self.results.errors = None
556
835
  self.results.resulting_data = None
557
836
  self.results.example_data = None
558
837
 
559
838
  def cancel(self):
839
+ """Cancels an ongoing external process if one is running."""
840
+
560
841
  if self._fetch_cached_df is not None:
561
842
  self._fetch_cached_df.cancel()
562
843
  self.node_stats.is_canceled = True
@@ -566,6 +847,18 @@ class FlowNode:
566
847
 
567
848
  def execute_node(self, run_location: schemas.ExecutionLocationsLiteral, reset_cache: bool = False,
568
849
  performance_mode: bool = False, retry: bool = True, node_logger: NodeLogger = None):
850
+ """Orchestrates the execution, handling location, caching, and retries.
851
+
852
+ Args:
853
+ run_location: The location for execution ('local', 'remote').
854
+ reset_cache: If True, forces removal of any existing cache.
855
+ performance_mode: If True, optimizes for speed over diagnostics.
856
+ retry: If True, allows retrying execution on recoverable errors.
857
+ node_logger: The logger for this node execution.
858
+
859
+ Raises:
860
+ Exception: If the node_logger is not defined.
861
+ """
569
862
  if node_logger is None:
570
863
  raise Exception('Flow logger is not defined')
571
864
  # node_logger = flow_logger.get_node_logger(self.node_id)
@@ -575,7 +868,8 @@ class FlowNode:
575
868
  self.node_stats.has_completed_last_run = False
576
869
  if self.is_setup:
577
870
  node_logger.info(f'Starting to run {self.__name__}')
578
- if self.needs_run(performance_mode, node_logger, run_location):
871
+ if (self.needs_run(performance_mode, node_logger, run_location) or self.node_template.node_group == "output"
872
+ and not (run_location == 'local' or SINGLE_FILE_MODE)):
579
873
  self.prepare_before_run()
580
874
  try:
581
875
  if ((run_location == 'remote' or (self.node_default.transform_type == 'wide')
@@ -605,13 +899,28 @@ class FlowNode:
605
899
  else:
606
900
  self.results.errors = str(e)
607
901
  node_logger.error(f'Error with running the node: {e}')
608
-
902
+ elif ((run_location == 'local' or SINGLE_FILE_MODE) and (not self.node_stats.has_run_with_current_setup
903
+ or self.node_template.node_group == "output")):
904
+ try:
905
+ node_logger.info('Executing fully locally')
906
+ self.execute_full_local(performance_mode)
907
+ except Exception as e:
908
+ self.results.errors = str(e)
909
+ node_logger.error(f'Error with running the node: {e}')
910
+ self.node_stats.error = str(e)
911
+ self.node_stats.has_completed_last_run = False
912
+ self.node_stats.has_run_with_current_setup = True
609
913
  else:
610
914
  node_logger.info('Node has already run, not running the node')
611
915
  else:
612
916
  node_logger.warning(f'Node {self.__name__} is not setup, cannot run the node')
613
917
 
614
918
  def store_example_data_generator(self, external_df_fetcher: ExternalDfFetcher | ExternalSampler):
919
+ """Stores a generator function for fetching a sample of the result data.
920
+
921
+ Args:
922
+ external_df_fetcher: The process that generated the sample data.
923
+ """
615
924
  if external_df_fetcher.status is not None:
616
925
  file_ref = external_df_fetcher.status.file_ref
617
926
  self.results.example_data_path = file_ref
@@ -620,9 +929,21 @@ class FlowNode:
620
929
  logger.error('Could not get the sample data, the external process is not ready')
621
930
 
622
931
  def needs_reset(self) -> bool:
932
+ """Checks if the node's hash has changed, indicating an outdated state.
933
+
934
+ Returns:
935
+ True if the calculated hash differs from the stored hash.
936
+ """
623
937
  return self._hash != self.calculate_hash(self.setting_input)
624
938
 
625
939
  def reset(self, deep: bool = False):
940
+ """Resets the node's execution state and schema information.
941
+
942
+ This also triggers a reset on all downstream nodes.
943
+
944
+ Args:
945
+ deep: If True, forces a reset even if the hash hasn't changed.
946
+ """
626
947
  needs_reset = self.needs_reset() or deep
627
948
  if needs_reset:
628
949
  logger.info(f'{self.node_id}: Node needs reset')
@@ -637,10 +958,19 @@ class FlowNode:
637
958
  self.node_schema.predicted_schema = None
638
959
  self._hash = None
639
960
  self.node_information.is_setup = None
961
+ self.results.errors = None
640
962
  self.evaluate_nodes()
641
963
  _ = self.hash # Recalculate the hash after reset
642
964
 
643
965
  def delete_lead_to_node(self, node_id: int) -> bool:
966
+ """Removes a connection to a specific downstream node.
967
+
968
+ Args:
969
+ node_id: The ID of the downstream node to disconnect.
970
+
971
+ Returns:
972
+ True if the connection was found and removed, False otherwise.
973
+ """
644
974
  logger.info(f'Deleting lead to node: {node_id}')
645
975
  for i, lead_to_node in enumerate(self.leads_to_nodes):
646
976
  logger.info(f'Checking lead to node: {lead_to_node.node_id}')
@@ -652,7 +982,16 @@ class FlowNode:
652
982
 
653
983
  def delete_input_node(self, node_id: int, connection_type: input_schema.InputConnectionClass = 'input-0',
654
984
  complete: bool = False) -> bool:
655
- # connection type must be in right, left or main
985
+ """Removes a connection from a specific input node.
986
+
987
+ Args:
988
+ node_id: The ID of the input node to disconnect.
989
+ connection_type: The specific input handle (e.g., 'input-0', 'input-1').
990
+ complete: If True, tries to delete from all input types.
991
+
992
+ Returns:
993
+ True if a connection was found and removed, False otherwise.
994
+ """
656
995
  deleted: bool = False
657
996
  if connection_type == 'input-0':
658
997
  for i, node in enumerate(self.node_inputs.main_inputs):
@@ -675,17 +1014,32 @@ class FlowNode:
675
1014
  self.reset()
676
1015
  return deleted
677
1016
 
678
- def __repr__(self):
1017
+ def __repr__(self) -> str:
1018
+ """Provides a string representation of the FlowNode instance.
1019
+
1020
+ Returns:
1021
+ A string showing the node's ID and type.
1022
+ """
679
1023
  return f"Node id: {self.node_id} ({self.node_type})"
680
1024
 
681
- def _get_readable_schema(self):
1025
+ def _get_readable_schema(self) -> List[dict] | None:
1026
+ """Helper to get a simplified, dictionary representation of the output schema.
1027
+
1028
+ Returns:
1029
+ A list of dictionaries, each with 'column_name' and 'data_type'.
1030
+ """
682
1031
  if self.is_setup:
683
1032
  output = []
684
1033
  for s in self.schema:
685
1034
  output.append(dict(column_name=s.column_name, data_type=s.data_type))
686
1035
  return output
687
1036
 
688
- def get_repr(self):
1037
+ def get_repr(self) -> dict:
1038
+ """Gets a detailed dictionary representation of the node's state.
1039
+
1040
+ Returns:
1041
+ A dictionary containing key information about the node.
1042
+ """
689
1043
  return dict(FlowNode=
690
1044
  dict(node_id=self.node_id,
691
1045
  step_name=self.__name__,
@@ -693,30 +1047,66 @@ class FlowNode:
693
1047
  output_schema=self._get_readable_schema()))
694
1048
 
695
1049
  @property
696
- def number_of_leads_to_nodes(self) -> int:
1050
+ def number_of_leads_to_nodes(self) -> int | None:
1051
+ """Counts the number of downstream node connections.
1052
+
1053
+ Returns:
1054
+ The number of nodes this node leads to.
1055
+ """
697
1056
  if self.is_setup:
698
1057
  return len(self.leads_to_nodes)
699
1058
 
700
1059
  @property
701
1060
  def has_next_step(self) -> bool:
1061
+ """Checks if this node has any downstream connections.
1062
+
1063
+ Returns:
1064
+ True if it has at least one downstream node.
1065
+ """
702
1066
  return len(self.leads_to_nodes) > 0
703
1067
 
704
1068
  @property
705
1069
  def has_input(self) -> bool:
1070
+ """Checks if this node has any input connections.
1071
+
1072
+ Returns:
1073
+ True if it has at least one input node.
1074
+ """
706
1075
  return len(self.all_inputs) > 0
707
1076
 
708
1077
  @property
709
1078
  def singular_input(self) -> bool:
1079
+ """Checks if the node template specifies exactly one input.
1080
+
1081
+ Returns:
1082
+ True if the node is a single-input type.
1083
+ """
710
1084
  return self.node_template.input == 1
711
1085
 
712
1086
  @property
713
1087
  def singular_main_input(self) -> "FlowNode":
1088
+ """Gets the input node, assuming it is a single-input type.
1089
+
1090
+ Returns:
1091
+ The single input FlowNode, or None.
1092
+ """
714
1093
  if self.singular_input:
715
1094
  return self.all_inputs[0]
716
1095
 
717
1096
  def get_table_example(self, include_data: bool = False) -> TableExample | None:
1097
+ """Generates a `TableExample` model summarizing the node's output.
1098
+
1099
+ This can optionally include a sample of the data.
1100
+
1101
+ Args:
1102
+ include_data: If True, includes a data sample in the result.
1103
+
1104
+ Returns:
1105
+ A `TableExample` object, or None if the node is not set up.
1106
+ """
718
1107
  self.print('Getting a table example')
719
1108
  if self.is_setup and include_data and self.node_stats.has_completed_last_run:
1109
+
720
1110
  if self.node_template.node_group == 'output':
721
1111
  self.print('getting the table example')
722
1112
  return self.main_input[0].get_table_example(include_data)
@@ -749,10 +1139,16 @@ class FlowNode:
749
1139
  table_schema=schema, columns=columns,
750
1140
  data=[])
751
1141
 
752
- def calculate_settings_out_select(self):
753
- pass
754
-
755
1142
  def get_node_data(self, flow_id: int, include_example: bool = False) -> NodeData:
1143
+ """Gathers all necessary data for representing the node in the UI.
1144
+
1145
+ Args:
1146
+ flow_id: The ID of the parent flow.
1147
+ include_example: If True, includes data samples.
1148
+
1149
+ Returns:
1150
+ A `NodeData` object.
1151
+ """
756
1152
  node = NodeData(flow_id=flow_id,
757
1153
  node_id=self.node_id,
758
1154
  has_run=self.node_stats.has_run_with_current_setup,
@@ -772,15 +1168,30 @@ class FlowNode:
772
1168
  return node
773
1169
 
774
1170
  def get_output_data(self) -> TableExample:
1171
+ """Gets the full output data sample for this node.
1172
+
1173
+ Returns:
1174
+ A `TableExample` object with data.
1175
+ """
775
1176
  return self.get_table_example(True)
776
1177
 
777
1178
  def get_node_input(self) -> schemas.NodeInput:
1179
+ """Creates a `NodeInput` schema object for representing this node in the UI.
1180
+
1181
+ Returns:
1182
+ A `NodeInput` object.
1183
+ """
778
1184
  return schemas.NodeInput(pos_y=self.setting_input.pos_y,
779
1185
  pos_x=self.setting_input.pos_x,
780
1186
  id=self.node_id,
781
1187
  **self.node_template.__dict__)
782
1188
 
783
1189
  def get_edge_input(self) -> List[schemas.NodeEdge]:
1190
+ """Generates `NodeEdge` objects for all input connections to this node.
1191
+
1192
+ Returns:
1193
+ A list of `NodeEdge` objects.
1194
+ """
784
1195
  edges = []
785
1196
  if self.node_inputs.main_inputs is not None:
786
1197
  for i, main_input in enumerate(self.node_inputs.main_inputs):