Flowfile 0.3.9__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (169) hide show
  1. flowfile/__init__.py +6 -1
  2. flowfile/api.py +0 -1
  3. flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-109ecc3c.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-19cdd67a.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-48e0ae20.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
  8. flowfile/web/static/assets/ColumnSelector-ecaf7c44.js +83 -0
  9. flowfile/web/static/assets/ContextMenu-2b348c4c.js +41 -0
  10. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  11. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  12. flowfile/web/static/assets/ContextMenu-a779eed7.js +41 -0
  13. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  14. flowfile/web/static/assets/ContextMenu-eca26a03.js +41 -0
  15. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  16. flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-a88f8142.js} +14 -84
  17. flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
  18. flowfile/web/static/assets/CustomNode-cb863dff.js +211 -0
  19. flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-819d3267.js} +3 -3
  20. flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-84ee2834.js} +2 -2
  21. flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-060dd412.js} +14 -114
  22. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  23. flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-7fc7750f.js} +13 -74
  24. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  25. flowfile/web/static/assets/ExploreData-82c95991.js +192 -0
  26. flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-e1a6ddc7.js} +8 -79
  27. flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-8aca894a.js} +12 -85
  28. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  29. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  30. flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-e33686d9.js} +18 -85
  31. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  32. flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-abda150d.js} +16 -87
  33. flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-4ecad1d7.js} +13 -159
  34. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  35. flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-656d07f3.js} +12 -75
  36. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  37. flowfile/web/static/assets/{Join-5a78a203.js → Join-b84ec849.js} +15 -85
  38. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  39. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  40. flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-346f4135.js} +11 -82
  41. flowfile/web/static/assets/MultiSelect-61b98268.js +5 -0
  42. flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-2a7c8312.js +63 -0
  43. flowfile/web/static/assets/NumericInput-e36602c2.js +5 -0
  44. flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-211a1990.js +35 -0
  45. flowfile/web/static/assets/Output-ddc9079f.css +37 -0
  46. flowfile/web/static/assets/{Output-411ecaee.js → Output-eb041599.js} +13 -243
  47. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  48. flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-f5c774f4.js} +14 -138
  49. flowfile/web/static/assets/PivotValidation-26546cbc.js +61 -0
  50. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  51. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  52. flowfile/web/static/assets/PivotValidation-e150a24b.js +61 -0
  53. flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-da3a7abf.js} +13 -80
  54. flowfile/web/static/assets/Read-0c768769.js +243 -0
  55. flowfile/web/static/assets/Read-6b17491f.css +62 -0
  56. flowfile/web/static/assets/RecordCount-84736276.js +53 -0
  57. flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-60055e6d.js} +8 -80
  58. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  59. flowfile/web/static/assets/SQLQueryComponent-8a486004.js +38 -0
  60. flowfile/web/static/assets/{Sample-b4a18476.js → Sample-2d662611.js} +8 -77
  61. flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-ef586cab.js} +2 -2
  62. flowfile/web/static/assets/{Select-727688dc.js → Select-2e4a6965.js} +11 -85
  63. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  64. flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-310b61c0.js} +2 -40
  65. flowfile/web/static/assets/SettingsSection-5634f439.js +45 -0
  66. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  67. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  68. flowfile/web/static/assets/SettingsSection-7c68b19f.js +53 -0
  69. flowfile/web/static/assets/SingleSelect-7298811a.js +5 -0
  70. flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-43807bad.js +62 -0
  71. flowfile/web/static/assets/SliderInput-53105476.js +40 -0
  72. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
  73. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  74. flowfile/web/static/assets/{Sort-be3339a8.js → Sort-4fdebe74.js} +12 -97
  75. flowfile/web/static/assets/TextInput-28366b7e.js +5 -0
  76. flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-9cad14ba.js +32 -0
  77. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  78. flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-73ffa692.js} +14 -83
  79. flowfile/web/static/assets/ToggleSwitch-598add30.js +5 -0
  80. flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-f620cd32.js +31 -0
  81. flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-66239e83.js} +2 -2
  82. flowfile/web/static/assets/Union-26b10614.js +77 -0
  83. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  84. flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-33b9edbb.js} +22 -91
  85. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  86. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  87. flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-ef69d0e2.js} +12 -166
  88. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  89. flowfile/web/static/assets/UnpivotValidation-8658388e.js +51 -0
  90. flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-4d7861f4.js} +4 -264
  91. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  92. flowfile/web/static/assets/{api-023d1733.js → api-2d1394bd.js} +1 -1
  93. flowfile/web/static/assets/{api-cb00cce6.js → api-c908fffe.js} +1 -1
  94. flowfile/web/static/assets/{designer-6c322d8e.js → designer-1667687d.js} +2201 -705
  95. flowfile/web/static/assets/{designer-2197d782.css → designer-665e9408.css} +836 -201
  96. flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-5eed779e.js} +1 -1
  97. flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-41ebe3c2.js} +1 -1
  98. flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-0670d32d.js} +2 -2
  99. flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-38410ebf.js} +3 -3
  100. flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
  101. flowfile/web/static/assets/{index-683fc198.js → index-5ec791df.js} +210 -31
  102. flowfile/web/static/assets/outputCsv-059583b6.js +86 -0
  103. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  104. flowfile/web/static/assets/outputExcel-76b1e02c.js +56 -0
  105. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  106. flowfile/web/static/assets/outputParquet-440fd4c7.js +31 -0
  107. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  108. flowfile/web/static/assets/readCsv-9813903a.js +178 -0
  109. flowfile/web/static/assets/readCsv-bca3ed53.css +52 -0
  110. flowfile/web/static/assets/readExcel-7f40d237.js +203 -0
  111. flowfile/web/static/assets/readExcel-e1b381ea.css +64 -0
  112. flowfile/web/static/assets/readParquet-22d56002.js +26 -0
  113. flowfile/web/static/assets/readParquet-cee068e2.css +19 -0
  114. flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-b3cb072e.js} +1 -1
  115. flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-7ad95bca.js} +7 -7
  116. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  117. flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
  118. flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-b1dfaa46.js} +59 -33
  119. flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-22bac17c.js} +1 -1
  120. flowfile/web/static/index.html +2 -2
  121. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/METADATA +1 -1
  122. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/RECORD +160 -102
  123. flowfile_core/configs/flow_logger.py +5 -13
  124. flowfile_core/configs/node_store/__init__.py +30 -0
  125. flowfile_core/configs/node_store/nodes.py +383 -99
  126. flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
  127. flowfile_core/configs/settings.py +2 -1
  128. flowfile_core/database/connection.py +5 -21
  129. flowfile_core/fileExplorer/funcs.py +239 -121
  130. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
  131. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
  132. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
  133. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +28 -8
  134. flowfile_core/flowfile/flow_graph.py +117 -34
  135. flowfile_core/flowfile/flow_node/flow_node.py +45 -13
  136. flowfile_core/flowfile/handler.py +22 -3
  137. flowfile_core/flowfile/manage/open_flowfile.py +9 -1
  138. flowfile_core/flowfile/node_designer/__init__.py +47 -0
  139. flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
  140. flowfile_core/flowfile/node_designer/custom_node.py +371 -0
  141. flowfile_core/flowfile/node_designer/data_types.py +146 -0
  142. flowfile_core/flowfile/node_designer/ui_components.py +277 -0
  143. flowfile_core/flowfile/schema_callbacks.py +8 -4
  144. flowfile_core/flowfile/setting_generator/settings.py +0 -1
  145. flowfile_core/main.py +5 -1
  146. flowfile_core/routes/routes.py +73 -28
  147. flowfile_core/routes/user_defined_components.py +55 -0
  148. flowfile_core/schemas/input_schema.py +7 -1
  149. flowfile_core/schemas/output_model.py +5 -2
  150. flowfile_core/schemas/schemas.py +8 -3
  151. flowfile_core/schemas/transform_schema.py +1 -0
  152. flowfile_core/utils/validate_setup.py +3 -1
  153. flowfile_worker/__init__.py +6 -35
  154. flowfile_worker/main.py +5 -2
  155. flowfile_worker/routes.py +47 -5
  156. shared/__init__.py +15 -0
  157. shared/storage_config.py +258 -0
  158. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  159. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  160. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  161. flowfile/web/static/assets/Read-c3b1929c.js +0 -701
  162. flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
  163. flowfile/web/static/assets/Union-89fd73dc.js +0 -146
  164. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  165. flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
  166. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  167. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/LICENSE +0 -0
  168. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/WHEEL +0 -0
  169. {flowfile-0.3.9.dist-info → flowfile-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -39,7 +39,7 @@ def trigger_df_operation(flow_id: int, node_id: int | str, lf: pl.LazyFrame, fil
39
39
  'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
40
40
  v = requests.post(url=f'{WORKER_URL}/submit_query/', json=_json)
41
41
  if not v.ok:
42
- raise Exception(f'Could not cache the data, {v.text}')
42
+ raise Exception(f'trigger_df_operation: Could not cache the data, {v.text}')
43
43
  return Status(**v.json())
44
44
 
45
45
 
@@ -49,7 +49,7 @@ def trigger_sample_operation(lf: pl.LazyFrame, file_ref: str, flow_id: int, node
49
49
  'sample_size': sample_size, 'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id}
50
50
  v = requests.post(url=f'{WORKER_URL}/store_sample/', json=_json)
51
51
  if not v.ok:
52
- raise Exception(f'Could not cache the data, {v.text}')
52
+ raise Exception(f'trigger_sample_operation: Could not cache the data, {v.text}')
53
53
  return Status(**v.json())
54
54
 
55
55
 
@@ -67,9 +67,10 @@ def trigger_fuzzy_match_operation(left_df: pl.LazyFrame, right_df: pl.LazyFrame,
67
67
  flowfile_flow_id=flow_id,
68
68
  flowfile_node_id=node_id
69
69
  )
70
+ print("fuzzy join input", fuzzy_join_input)
70
71
  v = requests.post(f'{WORKER_URL}/add_fuzzy_join', data=fuzzy_join_input.model_dump_json())
71
72
  if not v.ok:
72
- raise Exception(f'Could not cache the data, {v.text}')
73
+ raise Exception(f'trigger_fuzzy_match_operation: Could not cache the data, {v.text}')
73
74
  return Status(**v.json())
74
75
 
75
76
 
@@ -78,7 +79,7 @@ def trigger_create_operation(flow_id: int, node_id: int | str, received_table: R
78
79
  f = requests.post(url=f'{WORKER_URL}/create_table/{file_type}', data=received_table.model_dump_json(),
79
80
  params={'flowfile_flow_id': flow_id, 'flowfile_node_id': node_id})
80
81
  if not f.ok:
81
- raise Exception(f'Could not cache the data, {f.text}')
82
+ raise Exception(f'trigger_create_operation: Could not cache the data, {f.text}')
82
83
  return Status(**f.json())
83
84
 
84
85
 
@@ -86,7 +87,7 @@ def trigger_database_read_collector(database_external_read_settings: DatabaseExt
86
87
  f = requests.post(url=f'{WORKER_URL}/store_database_read_result',
87
88
  data=database_external_read_settings.model_dump_json())
88
89
  if not f.ok:
89
- raise Exception(f'Could not cache the data, {f.text}')
90
+ raise Exception(f'trigger_database_read_collector: Could not cache the data, {f.text}')
90
91
  return Status(**f.json())
91
92
 
92
93
 
@@ -94,7 +95,7 @@ def trigger_database_write(database_external_write_settings: DatabaseExternalWri
94
95
  f = requests.post(url=f'{WORKER_URL}/store_database_write_result',
95
96
  data=database_external_write_settings.model_dump_json())
96
97
  if not f.ok:
97
- raise Exception(f'Could not cache the data, {f.text}')
98
+ raise Exception(f'trigger_database_write: Could not cache the data, {f.text}')
98
99
  return Status(**f.json())
99
100
 
100
101
 
@@ -102,7 +103,7 @@ def trigger_cloud_storage_write(database_external_write_settings: CloudStorageWr
102
103
  f = requests.post(url=f'{WORKER_URL}/write_data_to_cloud',
103
104
  data=database_external_write_settings.model_dump_json())
104
105
  if not f.ok:
105
- raise Exception(f'Could not cache the data, {f.text}')
106
+ raise Exception(f'trigger_cloud_storage_write: Could not cache the data, {f.text}')
106
107
  return Status(**f.json())
107
108
 
108
109
 
@@ -111,7 +112,7 @@ def get_results(file_ref: str) -> Status | None:
111
112
  if f.status_code == 200:
112
113
  return Status(**f.json())
113
114
  else:
114
- raise Exception(f'Could not fetch the data, {f.text}')
115
+ raise Exception(f'get_results: Could not fetch the data, {f.text}')
115
116
 
116
117
 
117
118
  def results_exists(file_ref: str):
@@ -128,6 +129,25 @@ def results_exists(file_ref: str):
128
129
  return False
129
130
 
130
131
 
132
+ def clear_task_from_worker(file_ref: str) -> bool:
133
+ """
134
+ Clears a task from the worker service by making a DELETE request. It also removes associated cached files.
135
+ Args:
136
+ file_ref (str): The unique identifier of the task to clear.
137
+
138
+ Returns:
139
+ bool: True if the task was successfully cleared, False otherwise.
140
+ """
141
+ try:
142
+ f = requests.delete(f'{WORKER_URL}/clear_task/{file_ref}')
143
+ if f.status_code == 200:
144
+ return True
145
+ return False
146
+ except requests.RequestException as e:
147
+ logger.error(f"Failed to remove results: {str(e)}")
148
+ return False
149
+
150
+
131
151
  def get_df_result(encoded_df: str) -> pl.LazyFrame:
132
152
  r = decodebytes(encoded_df.encode())
133
153
  return pl.LazyFrame.deserialize(io.BytesIO(r))
@@ -1,18 +1,22 @@
1
1
  import datetime
2
2
  import pickle
3
+
4
+ import os
5
+
3
6
  import polars as pl
7
+
4
8
  import fastexcel
5
9
  from fastapi.exceptions import HTTPException
6
10
  from time import time
7
11
  from functools import partial
8
- from typing import List, Dict, Union, Callable, Any, Optional, Tuple
12
+ from typing import List, Dict, Union, Callable, Any, Optional, Tuple, Literal
9
13
  from uuid import uuid1
10
14
  from copy import deepcopy
11
15
  from pyarrow.parquet import ParquetFile
12
16
  from flowfile_core.configs import logger
13
17
  from flowfile_core.configs.flow_logger import FlowLogger
14
18
  from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
15
- from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import cast_str_to_polars_type, FlowfileColumn
19
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
16
20
 
17
21
  from flowfile_core.flowfile.flow_data_engine.cloud_storage_reader import CloudStorageReader
18
22
  from flowfile_core.utils.arrow_reader import get_read_top_n
@@ -47,6 +51,7 @@ from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source impor
47
51
  from flowfile_core.flowfile.database_connection_manager.db_connections import (get_local_database_connection,
48
52
  get_local_cloud_connection)
49
53
  from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
54
+ from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
50
55
 
51
56
 
52
57
  def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
@@ -175,11 +180,9 @@ class FlowGraph:
175
180
  schema: Optional[List[FlowfileColumn]] = None
176
181
  has_over_row_function: bool = False
177
182
  _flow_starts: List[Union[int, str]] = None
178
- node_results: List[NodeResult] = None
179
183
  latest_run_info: Optional[RunInformation] = None
180
184
  start_datetime: datetime = None
181
185
  end_datetime: datetime = None
182
- nodes_completed: int = 0
183
186
  _flow_settings: schemas.FlowSettings = None
184
187
  flow_logger: FlowLogger
185
188
 
@@ -206,11 +209,9 @@ class FlowGraph:
206
209
 
207
210
  self._flow_settings = flow_settings
208
211
  self.uuid = str(uuid1())
209
- self.nodes_completed = 0
210
212
  self.start_datetime = None
211
213
  self.end_datetime = None
212
214
  self.latest_run_info = None
213
- self.node_results = []
214
215
  self._flow_id = flow_settings.flow_id
215
216
  self.flow_logger = FlowLogger(flow_settings.flow_id)
216
217
  self._flow_starts: List[FlowNode] = []
@@ -436,6 +437,24 @@ class FlowGraph:
436
437
  node = self._node_db.get(node_id)
437
438
  if node is not None:
438
439
  return node
440
+
441
+ def add_user_defined_node(self, *,
442
+ custom_node: CustomNodeBase,
443
+ user_defined_node_settings: input_schema.UserDefinedNode
444
+ ):
445
+
446
+ def _func(*fdes: FlowDataEngine) -> FlowDataEngine | None:
447
+ output = custom_node.process(*(fde.data_frame for fde in fdes))
448
+ if isinstance(output, pl.LazyFrame | pl.DataFrame):
449
+ return FlowDataEngine(output)
450
+ return None
451
+
452
+ self.add_node_step(node_id=user_defined_node_settings.node_id,
453
+ function=_func,
454
+ setting_input=user_defined_node_settings,
455
+ input_node_ids=user_defined_node_settings.depending_on_ids,
456
+ node_type=custom_node.item,
457
+ )
439
458
 
440
459
  def add_pivot(self, pivot_settings: input_schema.NodePivot):
441
460
  """Adds a pivot node to the graph.
@@ -814,11 +833,11 @@ class FlowGraph:
814
833
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
815
834
  node = self.get_node(node_id=fuzzy_settings.node_id)
816
835
  if self.execution_location == "local":
817
- return main.fuzzy_join(fuzzy_match_input=fuzzy_settings.join_input,
836
+ return main.fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
818
837
  other=right,
819
838
  node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id))
820
839
 
821
- f = main.start_fuzzy_join(fuzzy_match_input=fuzzy_settings.join_input, other=right, file_ref=node.hash,
840
+ f = main.start_fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input), other=right, file_ref=node.hash,
822
841
  flow_id=self.flow_id, node_id=fuzzy_settings.node_id)
823
842
  logger.info("Started the fuzzy match action")
824
843
  node._fetch_cached_df = f # Add to the node so it can be cancelled and fetch later if needed
@@ -1599,6 +1618,66 @@ class FlowGraph:
1599
1618
  self.reset()
1600
1619
  self.flow_settings.execution_location = execution_location
1601
1620
 
1621
+ def validate_if_node_can_be_fetched(self, node_id: int) -> None:
1622
+ flow_node = self._node_db.get(node_id)
1623
+ if not flow_node:
1624
+ raise Exception("Node not found found")
1625
+ skip_nodes, execution_order = compute_execution_plan(
1626
+ nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
1627
+ )
1628
+ if flow_node.node_id in [skip_node.node_id for skip_node in skip_nodes]:
1629
+ raise Exception("Node can not be executed because it does not have it's inputs")
1630
+
1631
+ def create_initial_run_information(self, number_of_nodes: int,
1632
+ run_type: Literal["fetch_one", "full_run"]):
1633
+ return RunInformation(
1634
+ flow_id=self.flow_id, start_time=datetime.datetime.now(), end_time=None,
1635
+ success=None, number_of_nodes=number_of_nodes, node_step_result=[],
1636
+ run_type=run_type
1637
+ )
1638
+
1639
+ def trigger_fetch_node(self, node_id: int) -> RunInformation | None:
1640
+ """Executes a specific node in the graph by its ID."""
1641
+ if self.flow_settings.is_running:
1642
+ raise Exception("Flow is already running")
1643
+ flow_node = self.get_node(node_id)
1644
+ self.flow_settings.is_running = True
1645
+ self.flow_settings.is_canceled = False
1646
+ self.flow_logger.clear_log_file()
1647
+ self.latest_run_info = self.create_initial_run_information(1, "fetch_one")
1648
+ node_logger = self.flow_logger.get_node_logger(flow_node.node_id)
1649
+ node_result = NodeResult(node_id=flow_node.node_id, node_name=flow_node.name)
1650
+ logger.info(f'Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}')
1651
+ try:
1652
+ self.latest_run_info.node_step_result.append(node_result)
1653
+ flow_node.execute_node(run_location=self.flow_settings.execution_location,
1654
+ performance_mode=False,
1655
+ node_logger=node_logger,
1656
+ optimize_for_downstream=False,
1657
+ reset_cache=True)
1658
+ node_result.error = str(flow_node.results.errors)
1659
+ if self.flow_settings.is_canceled:
1660
+ node_result.success = None
1661
+ node_result.success = None
1662
+ node_result.is_running = False
1663
+ node_result.success = flow_node.results.errors is None
1664
+ node_result.end_timestamp = time()
1665
+ node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1666
+ node_result.is_running = False
1667
+ self.latest_run_info.nodes_completed += 1
1668
+ self.latest_run_info.end_time = datetime.datetime.now()
1669
+ self.flow_settings.is_running = False
1670
+ return self.get_run_info()
1671
+ except Exception as e:
1672
+ node_result.error = 'Node did not run'
1673
+ node_result.success = False
1674
+ node_result.end_timestamp = time()
1675
+ node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1676
+ node_result.is_running = False
1677
+ node_logger.error(f'Error in node {flow_node.node_id}: {e}')
1678
+ finally:
1679
+ self.flow_settings.is_running = False
1680
+
1602
1681
  def run_graph(self) -> RunInformation | None:
1603
1682
  """Executes the entire data flow graph from start to finish.
1604
1683
 
@@ -1614,20 +1693,23 @@ class FlowGraph:
1614
1693
  if self.flow_settings.is_running:
1615
1694
  raise Exception('Flow is already running')
1616
1695
  try:
1696
+
1617
1697
  self.flow_settings.is_running = True
1618
1698
  self.flow_settings.is_canceled = False
1619
1699
  self.flow_logger.clear_log_file()
1620
- self.nodes_completed = 0
1621
- self.node_results = []
1622
- self.start_datetime = datetime.datetime.now()
1623
- self.end_datetime = None
1624
- self.latest_run_info = None
1625
1700
  self.flow_logger.info('Starting to run flowfile flow...')
1626
- skip_nodes, execution_order = compute_execution_plan(nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes())
1701
+
1702
+ skip_nodes, execution_order = compute_execution_plan(
1703
+ nodes=self.nodes,
1704
+ flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
1705
+ )
1706
+
1707
+ self.latest_run_info = self.create_initial_run_information(len(execution_order), "full_run")
1627
1708
 
1628
1709
  skip_node_message(self.flow_logger, skip_nodes)
1629
1710
  execution_order_message(self.flow_logger, execution_order)
1630
1711
  performance_mode = self.flow_settings.execution_mode == 'Performance'
1712
+
1631
1713
  for node in execution_order:
1632
1714
  node_logger = self.flow_logger.get_node_logger(node.node_id)
1633
1715
  if self.flow_settings.is_canceled:
@@ -1637,7 +1719,7 @@ class FlowGraph:
1637
1719
  node_logger.info(f'Skipping node {node.node_id}')
1638
1720
  continue
1639
1721
  node_result = NodeResult(node_id=node.node_id, node_name=node.name)
1640
- self.node_results.append(node_result)
1722
+ self.latest_run_info.node_step_result.append(node_result)
1641
1723
  logger.info(f'Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}')
1642
1724
  node.execute_node(run_location=self.flow_settings.execution_location,
1643
1725
  performance_mode=performance_mode,
@@ -1663,7 +1745,7 @@ class FlowGraph:
1663
1745
  if not node_result.success:
1664
1746
  skip_nodes.extend(list(node.get_all_dependent_nodes()))
1665
1747
  node_logger.info(f'Completed node with success: {node_result.success}')
1666
- self.nodes_completed += 1
1748
+ self.latest_run_info.nodes_completed += 1
1667
1749
  self.flow_logger.info('Flow completed!')
1668
1750
  self.end_datetime = datetime.datetime.now()
1669
1751
  self.flow_settings.is_running = False
@@ -1675,28 +1757,23 @@ class FlowGraph:
1675
1757
  finally:
1676
1758
  self.flow_settings.is_running = False
1677
1759
 
1678
- def get_run_info(self) -> RunInformation:
1760
+ def get_run_info(self) -> RunInformation | None:
1679
1761
  """Gets a summary of the most recent graph execution.
1680
1762
 
1681
1763
  Returns:
1682
1764
  A RunInformation object with details about the last run.
1683
1765
  """
1766
+ is_running = self.flow_settings.is_running
1684
1767
  if self.latest_run_info is None:
1685
- node_results = self.node_results
1686
- success = all(nr.success for nr in node_results)
1687
- self.latest_run_info = RunInformation(start_time=self.start_datetime, end_time=self.end_datetime,
1688
- success=success,
1689
- node_step_result=node_results, flow_id=self.flow_id,
1690
- nodes_completed=self.nodes_completed,
1691
- number_of_nodes=len(self.nodes))
1692
- elif self.latest_run_info.nodes_completed != self.nodes_completed:
1693
- node_results = self.node_results
1694
- self.latest_run_info = RunInformation(start_time=self.start_datetime, end_time=self.end_datetime,
1695
- success=all(nr.success for nr in node_results),
1696
- node_step_result=node_results, flow_id=self.flow_id,
1697
- nodes_completed=self.nodes_completed,
1698
- number_of_nodes=len(self.nodes))
1699
- return self.latest_run_info
1768
+ return
1769
+
1770
+ elif not is_running and self.latest_run_info.success is not None:
1771
+ return self.latest_run_info
1772
+
1773
+ run_info = self.latest_run_info
1774
+ if not is_running:
1775
+ run_info.success = all(nr.success for nr in run_info.node_step_result)
1776
+ return run_info
1700
1777
 
1701
1778
  @property
1702
1779
  def node_connections(self) -> List[Tuple[int, int]]:
@@ -1767,8 +1844,14 @@ class FlowGraph:
1767
1844
  Args:
1768
1845
  flow_path: The path where the flow file will be saved.
1769
1846
  """
1770
- with open(flow_path, 'wb') as f:
1771
- pickle.dump(self.get_node_storage(), f)
1847
+ logger.info("Saving flow to %s", flow_path)
1848
+ os.makedirs(os.path.dirname(flow_path), exist_ok=True)
1849
+ try:
1850
+ with open(flow_path, 'wb') as f:
1851
+ pickle.dump(self.get_node_storage(), f)
1852
+ except Exception as e:
1853
+ logger.error(f"Error saving flow: {e}")
1854
+
1772
1855
  self.flow_settings.path = flow_path
1773
1856
 
1774
1857
  def get_frontend_data(self) -> dict:
@@ -8,11 +8,11 @@ from flowfile_core.configs.flow_logger import NodeLogger
8
8
 
9
9
  from flowfile_core.schemas.output_model import TableExample, FileColumn, NodeData
10
10
  from flowfile_core.flowfile.utils import get_hash
11
- from flowfile_core.configs.node_store import nodes as node_interface
11
+ from flowfile_core.configs import node_store
12
12
  from flowfile_core.flowfile.setting_generator import setting_generator, setting_updator
13
13
  from time import sleep
14
14
  from flowfile_core.flowfile.flow_data_engine.subprocess_operations import (
15
- ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result,
15
+ ExternalDfFetcher, ExternalSampler, clear_task_from_worker, results_exists, get_external_df_result,
16
16
  ExternalDatabaseFetcher, ExternalDatabaseWriter, ExternalCloudWriter)
17
17
  from flowfile_core.flowfile.flow_node.models import (NodeStepSettings, NodeStepInputs, NodeSchemaInformation,
18
18
  NodeStepStats, NodeResults)
@@ -27,7 +27,7 @@ class FlowNode:
27
27
  """
28
28
  parent_uuid: str
29
29
  node_type: str
30
- node_template: node_interface.NodeTemplate
30
+ node_template: node_store.NodeTemplate
31
31
  node_default: schemas.NodeDefault
32
32
  node_schema: NodeSchemaInformation
33
33
  node_inputs: NodeStepInputs
@@ -251,10 +251,10 @@ class FlowNode:
251
251
  self.results.errors = None
252
252
  self.add_lead_to_in_depend_source()
253
253
  _ = self.hash
254
- self.node_template = node_interface.node_dict.get(self.node_type)
254
+ self.node_template = node_store.node_dict.get(self.node_type)
255
255
  if self.node_template is None:
256
256
  raise Exception(f'Node template {self.node_type} not found')
257
- self.node_default = node_interface.node_defaults.get(self.node_type)
257
+ self.node_default = node_store.node_defaults.get(self.node_type)
258
258
  self.setting_input = setting_input # wait until the end so that the hash is calculated correctly
259
259
 
260
260
  @property
@@ -678,9 +678,10 @@ class FlowNode:
678
678
 
679
679
  if results_exists(self.hash):
680
680
  logger.warning('Not implemented')
681
+ clear_task_from_worker(self.hash)
681
682
 
682
683
  def needs_run(self, performance_mode: bool, node_logger: NodeLogger = None,
683
- execution_location: schemas.ExecutionLocationsLiteral = "worker") -> bool:
684
+ execution_location: schemas.ExecutionLocationsLiteral = "remote") -> bool:
684
685
  """Determines if the node needs to be executed.
685
686
 
686
687
  The decision is based on its run state, caching settings, and execution mode.
@@ -723,6 +724,8 @@ class FlowNode:
723
724
  Raises:
724
725
  Exception: Propagates exceptions from the execution.
725
726
  """
727
+ self.clear_table_example()
728
+
726
729
  def example_data_generator():
727
730
  example_data = None
728
731
 
@@ -735,6 +738,7 @@ class FlowNode:
735
738
  resulting_data = self.get_resulting_data()
736
739
 
737
740
  if not performance_mode:
741
+ self.node_stats.has_run_with_current_setup = True
738
742
  self.results.example_data_generator = example_data_generator()
739
743
  self.node_schema.result_schema = self.results.resulting_data.schema
740
744
  self.node_stats.has_completed_last_run = True
@@ -854,8 +858,12 @@ class FlowNode:
854
858
  logger.warning('No external process to cancel')
855
859
  self.node_stats.is_canceled = True
856
860
 
857
- def execute_node(self, run_location: schemas.ExecutionLocationsLiteral, reset_cache: bool = False,
858
- performance_mode: bool = False, retry: bool = True, node_logger: NodeLogger = None):
861
+ def execute_node(self, run_location: schemas.ExecutionLocationsLiteral,
862
+ reset_cache: bool = False,
863
+ performance_mode: bool = False,
864
+ retry: bool = True,
865
+ node_logger: NodeLogger = None,
866
+ optimize_for_downstream: bool = True):
859
867
  """Orchestrates the execution, handling location, caching, and retries.
860
868
 
861
869
  Args:
@@ -864,25 +872,33 @@ class FlowNode:
864
872
  performance_mode: If True, optimizes for speed over diagnostics.
865
873
  retry: If True, allows retrying execution on recoverable errors.
866
874
  node_logger: The logger for this node execution.
875
+ optimize_for_downstream: If true, operations that shuffle the order of rows are fully cached and provided as
876
+ input to downstream steps
867
877
 
868
878
  Raises:
869
879
  Exception: If the node_logger is not defined.
870
880
  """
871
881
  if node_logger is None:
872
882
  raise Exception('Flow logger is not defined')
873
- # node_logger = flow_logger.get_node_logger(self.node_id)
883
+ # TODO: Simplify which route is being picked there are many duplicate checks
884
+
874
885
  if reset_cache:
875
886
  self.remove_cache()
876
887
  self.node_stats.has_run_with_current_setup = False
877
888
  self.node_stats.has_completed_last_run = False
889
+
878
890
  if self.is_setup:
879
891
  node_logger.info(f'Starting to run {self.__name__}')
880
892
  if (self.needs_run(performance_mode, node_logger, run_location) or self.node_template.node_group == "output"
881
893
  and not (run_location == 'local')):
894
+ self.clear_table_example()
882
895
  self.prepare_before_run()
896
+ self.reset()
883
897
  try:
884
- if ((run_location == 'remote' or (self.node_default.transform_type == 'wide')
885
- and not run_location == 'local')) or self.node_settings.cache_results:
898
+ if (((run_location == 'remote' or
899
+ (self.node_default.transform_type == 'wide' and optimize_for_downstream) and
900
+ not run_location == 'local'))
901
+ or self.node_settings.cache_results):
886
902
  node_logger.info('Running the node remotely')
887
903
  if self.node_settings.cache_results:
888
904
  performance_mode = False
@@ -924,7 +940,7 @@ class FlowNode:
924
940
  node_logger.error(f'Error with running the node: {e}')
925
941
  self.node_stats.error = str(e)
926
942
  self.node_stats.has_completed_last_run = False
927
- self.node_stats.has_run_with_current_setup = True
943
+
928
944
  else:
929
945
  node_logger.info('Node has already run, not running the node')
930
946
  else:
@@ -1108,6 +1124,17 @@ class FlowNode:
1108
1124
  if self.singular_input:
1109
1125
  return self.all_inputs[0]
1110
1126
 
1127
+ def clear_table_example(self) -> None:
1128
+ """
1129
+ Clear the table example in the results so that it clears the existing results
1130
+ Returns:
1131
+ None
1132
+ """
1133
+
1134
+ self.results.example_data = None
1135
+ self.results.example_data_generator = None
1136
+ self.results.example_data_path = None
1137
+
1111
1138
  def get_table_example(self, include_data: bool = False) -> TableExample | None:
1112
1139
  """Generates a `TableExample` model summarizing the node's output.
1113
1140
 
@@ -1136,10 +1163,15 @@ class FlowNode:
1136
1163
  data = []
1137
1164
  schema = [FileColumn.model_validate(c.get_column_repr()) for c in self.schema]
1138
1165
  fl = self.get_resulting_data()
1166
+ has_example_data = self.results.example_data_generator is not None
1167
+
1139
1168
  return TableExample(node_id=self.node_id,
1140
1169
  name=str(self.node_id), number_of_records=999,
1141
1170
  number_of_columns=fl.number_of_fields,
1142
- table_schema=schema, columns=fl.columns, data=data)
1171
+ table_schema=schema, columns=fl.columns, data=data,
1172
+ has_example_data=has_example_data,
1173
+ has_run_with_current_setup=self.node_stats.has_run_with_current_setup
1174
+ )
1143
1175
  else:
1144
1176
  logger.warning('getting the table example but the node has not run')
1145
1177
  try:
@@ -3,11 +3,25 @@ from dataclasses import dataclass
3
3
  from typing import Dict, List
4
4
  import os
5
5
  from pathlib import Path
6
+ from datetime import datetime
6
7
 
7
8
  from flowfile_core.flowfile.manage.open_flowfile import open_flow
8
9
  from flowfile_core.flowfile.flow_graph import FlowGraph
9
10
  from flowfile_core.schemas.schemas import FlowSettings
10
11
  from flowfile_core.flowfile.utils import create_unique_id
12
+ from shared.storage_config import storage
13
+
14
+
15
+ def get_flow_save_location(flow_name: str) -> Path:
16
+ """Gets the initial save location for flow files"""
17
+ if ".flowfile" not in flow_name:
18
+ flow_name += ".flowfile"
19
+ return storage.temp_directory_for_flows / flow_name
20
+
21
+
22
+ def create_flow_name() -> str:
23
+ """Creates a unique flow name"""
24
+ return datetime.now().strftime("%Y%m%d_%H_%M_%S")+"_flow.flowfile"
11
25
 
12
26
 
13
27
  @dataclass
@@ -57,7 +71,7 @@ class FlowfileHandler:
57
71
  else:
58
72
  raise Exception('Flow not found')
59
73
 
60
- def add_flow(self, name: str, flow_path: str) -> int:
74
+ def add_flow(self, name: str = None, flow_path: str = None) -> int:
61
75
  """
62
76
  Creates a new flow with a reference to the flow path
63
77
  Args:
@@ -69,8 +83,13 @@ class FlowfileHandler:
69
83
 
70
84
  """
71
85
  next_id = create_unique_id()
72
- flow_info = FlowSettings(name=name, flow_id=next_id, save_location='', path=flow_path)
73
- _ = self.register_flow(flow_info)
86
+ if not name:
87
+ name = create_flow_name()
88
+ if not flow_path:
89
+ flow_path = get_flow_save_location(name)
90
+ flow_info = FlowSettings(name=name, flow_id=next_id, save_location=str(flow_path), path=str(flow_path))
91
+ flow = self.register_flow(flow_info)
92
+ flow.save_flow(flow.flow_settings.path)
74
93
  return next_id
75
94
 
76
95
  def get_flow_info(self, flow_id: int) -> FlowSettings:
@@ -4,6 +4,7 @@ from flowfile_core.flowfile.manage.compatibility_enhancements import ensure_comp
4
4
  import pickle
5
5
  from flowfile_core.flowfile.flow_graph import FlowGraph
6
6
  from pathlib import Path
7
+ from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
7
8
 
8
9
 
9
10
  def determine_insertion_order(node_storage: schemas.FlowInformation):
@@ -81,7 +82,14 @@ def open_flow(flow_path: Path) -> FlowGraph:
81
82
  new_flow.add_node_promise(node_promise)
82
83
  for node_id in ingestion_order:
83
84
  node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
84
- getattr(new_flow, 'add_' + node_info.type)(node_info.setting_input)
85
+ if hasattr(node_info.setting_input, "is_user_defined") and node_info.setting_input.is_user_defined:
86
+ if node_info.type not in CUSTOM_NODE_STORE:
87
+ continue
88
+ user_defined_node_class = CUSTOM_NODE_STORE[node_info.type]
89
+ new_flow.add_user_defined_node(custom_node=user_defined_node_class.from_settings(node_info.setting_input.settings),
90
+ user_defined_node_settings=node_info.setting_input)
91
+ else:
92
+ getattr(new_flow, 'add_' + node_info.type)(node_info.setting_input)
85
93
  from_node = new_flow.get_node(node_id)
86
94
  for output_node_id in node_info.outputs:
87
95
  to_node = new_flow.get_node(output_node_id)
@@ -0,0 +1,47 @@
1
+ # flowfile_core/flowfile/node_designer/__init__.py
2
+
3
+ """
4
+ Tools for creating custom Flowfile nodes.
5
+
6
+ This package provides all the necessary components for developers to build their own
7
+ custom nodes, define their UI, and implement their data processing logic.
8
+ """
9
+
10
+ # Import the core base class for creating a new node
11
+ from .custom_node import CustomNodeBase, NodeSettings
12
+
13
+ # Import all UI components so they can be used directly
14
+ from .ui_components import (
15
+ Section,
16
+ TextInput,
17
+ NumericInput,
18
+ ToggleSwitch,
19
+ SingleSelect,
20
+ MultiSelect,
21
+ ColumnSelector,
22
+ IncomingColumns, # Important marker class for dynamic dropdowns
23
+ )
24
+
25
+ # Import the main `Types` object for filtering in ColumnSelector
26
+ from .data_types import Types
27
+
28
+
29
+ # Define the public API of this package
30
+ __all__ = [
31
+ # Core Node Class
32
+ "CustomNodeBase",
33
+
34
+ # UI Components & Layout
35
+ "Section",
36
+ "TextInput",
37
+ "NumericInput",
38
+ "ToggleSwitch",
39
+ "SingleSelect",
40
+ "MultiSelect",
41
+ "NodeSettings",
42
+ "ColumnSelector",
43
+ "IncomingColumns",
44
+
45
+ # Data Type Filtering
46
+ "Types",
47
+ ]