Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. flowfile/__init__.py +8 -1
  2. flowfile/api.py +1 -3
  3. flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d5b1b6c9.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-00d87aad.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ColumnSelector-4685e75d.js +83 -0
  8. flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
  9. flowfile/web/static/assets/ContextMenu-23e909da.js +41 -0
  10. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  11. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  12. flowfile/web/static/assets/ContextMenu-70ae0c79.js +41 -0
  13. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  14. flowfile/web/static/assets/ContextMenu-f149cf7c.js +41 -0
  15. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  16. flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-702a3edd.js} +14 -84
  17. flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
  18. flowfile/web/static/assets/CustomNode-b1519993.js +211 -0
  19. flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-6f3e4ea5.js} +3 -3
  20. flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-cf5ef661.js} +2 -2
  21. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  22. flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-d38c7295.js} +14 -114
  23. flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-b04ef46a.js} +13 -74
  24. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  25. flowfile/web/static/assets/ExploreData-5fa10ed8.js +192 -0
  26. flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-d39af878.js} +8 -79
  27. flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-9b6d08db.js} +12 -85
  28. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  29. flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-6b04fb1d.js} +20 -87
  30. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  31. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  32. flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-999521f4.js} +16 -87
  33. flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-17dd2198.js} +13 -159
  34. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  35. flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-6b039e18.js} +12 -75
  36. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  37. flowfile/web/static/assets/{Join-5a78a203.js → Join-24d0f113.js} +15 -85
  38. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  39. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  40. flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-34639209.js} +11 -82
  41. flowfile/web/static/assets/MultiSelect-0e8724a3.js +5 -0
  42. flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js +63 -0
  43. flowfile/web/static/assets/NumericInput-3d63a470.js +5 -0
  44. flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js +35 -0
  45. flowfile/web/static/assets/Output-283fe388.css +37 -0
  46. flowfile/web/static/assets/{Output-411ecaee.js → Output-edea9802.js} +62 -273
  47. flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-61d19301.js} +14 -138
  48. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  49. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  50. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  51. flowfile/web/static/assets/PivotValidation-de9f43fe.js +61 -0
  52. flowfile/web/static/assets/PivotValidation-f97fec5b.js +61 -0
  53. flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-bc3c9984.js} +13 -80
  54. flowfile/web/static/assets/Read-64a3f259.js +218 -0
  55. flowfile/web/static/assets/Read-e808b239.css +62 -0
  56. flowfile/web/static/assets/RecordCount-3d5039be.js +53 -0
  57. flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-597510e0.js} +8 -80
  58. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  59. flowfile/web/static/assets/SQLQueryComponent-df51adbe.js +38 -0
  60. flowfile/web/static/assets/{Sample-b4a18476.js → Sample-4be0a507.js} +8 -77
  61. flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-4839be57.js} +2 -2
  62. flowfile/web/static/assets/{Select-727688dc.js → Select-9b72f201.js} +11 -85
  63. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  64. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  65. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  66. flowfile/web/static/assets/SettingsSection-7ded385d.js +45 -0
  67. flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-e1e9c953.js} +2 -40
  68. flowfile/web/static/assets/SettingsSection-f0f75a42.js +53 -0
  69. flowfile/web/static/assets/SingleSelect-6c777aac.js +5 -0
  70. flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js +62 -0
  71. flowfile/web/static/assets/SliderInput-7cb93e62.js +40 -0
  72. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
  73. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  74. flowfile/web/static/assets/{Sort-be3339a8.js → Sort-6cbde21a.js} +12 -97
  75. flowfile/web/static/assets/TextInput-d9a40c11.js +5 -0
  76. flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-5896c375.js +32 -0
  77. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  78. flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-c4fcbf4d.js} +14 -83
  79. flowfile/web/static/assets/ToggleSwitch-4ef91d19.js +5 -0
  80. flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js +31 -0
  81. flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-a03f512c.js} +2 -2
  82. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  83. flowfile/web/static/assets/Union-bfe9b996.js +77 -0
  84. flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-5d023a27.js} +23 -104
  85. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  86. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  87. flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-91cc5354.js} +12 -166
  88. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  89. flowfile/web/static/assets/UnpivotValidation-7ee2de44.js +51 -0
  90. flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-e51b9924.js} +4 -264
  91. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  92. flowfile/web/static/assets/{api-cb00cce6.js → api-c1bad5ca.js} +1 -1
  93. flowfile/web/static/assets/{api-023d1733.js → api-cf1221f0.js} +1 -1
  94. flowfile/web/static/assets/{designer-2197d782.css → designer-8da3ba3a.css} +859 -201
  95. flowfile/web/static/assets/{designer-6c322d8e.js → designer-9633482a.js} +2297 -733
  96. flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-ca400224.js} +1 -1
  97. flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-614b998d.js} +1 -1
  98. flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-f7971590.js} +2 -2
  99. flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-4fe5f36b.js} +3 -3
  100. flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
  101. flowfile/web/static/assets/{index-683fc198.js → index-5429bbf8.js} +208 -31
  102. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  103. flowfile/web/static/assets/outputCsv-076b85ab.js +86 -0
  104. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  105. flowfile/web/static/assets/outputExcel-0fd17dbe.js +56 -0
  106. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  107. flowfile/web/static/assets/outputParquet-b61e0847.js +31 -0
  108. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  109. flowfile/web/static/assets/readCsv-a8bb8b61.js +179 -0
  110. flowfile/web/static/assets/readCsv-c767cb37.css +52 -0
  111. flowfile/web/static/assets/readExcel-67b4aee0.js +201 -0
  112. flowfile/web/static/assets/readExcel-806d2826.css +64 -0
  113. flowfile/web/static/assets/readParquet-48c81530.css +19 -0
  114. flowfile/web/static/assets/readParquet-92ce1dbc.js +23 -0
  115. flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-68435402.js} +1 -1
  116. flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-92e25ee3.js} +7 -7
  117. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  118. flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
  119. flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-41b0e0d7.js} +65 -36
  120. flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-2c8e608f.js} +1 -1
  121. flowfile/web/static/index.html +2 -2
  122. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/METADATA +5 -3
  123. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/RECORD +191 -121
  124. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  125. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  126. flowfile_core/__init__.py +3 -0
  127. flowfile_core/configs/flow_logger.py +5 -13
  128. flowfile_core/configs/node_store/__init__.py +30 -0
  129. flowfile_core/configs/node_store/nodes.py +383 -99
  130. flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
  131. flowfile_core/configs/settings.py +2 -1
  132. flowfile_core/database/connection.py +5 -21
  133. flowfile_core/fileExplorer/funcs.py +239 -121
  134. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  135. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  136. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  137. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  138. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
  139. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
  140. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
  141. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  142. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  143. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  144. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +212 -86
  145. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  146. flowfile_core/flowfile/flow_graph.py +240 -54
  147. flowfile_core/flowfile/flow_node/flow_node.py +48 -13
  148. flowfile_core/flowfile/flow_node/models.py +2 -1
  149. flowfile_core/flowfile/handler.py +24 -5
  150. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  151. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  152. flowfile_core/flowfile/node_designer/__init__.py +47 -0
  153. flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
  154. flowfile_core/flowfile/node_designer/custom_node.py +371 -0
  155. flowfile_core/flowfile/node_designer/ui_components.py +277 -0
  156. flowfile_core/flowfile/schema_callbacks.py +17 -10
  157. flowfile_core/flowfile/setting_generator/settings.py +15 -10
  158. flowfile_core/main.py +5 -1
  159. flowfile_core/routes/routes.py +73 -30
  160. flowfile_core/routes/user_defined_components.py +55 -0
  161. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  162. flowfile_core/schemas/input_schema.py +228 -65
  163. flowfile_core/schemas/output_model.py +5 -2
  164. flowfile_core/schemas/schemas.py +153 -35
  165. flowfile_core/schemas/transform_schema.py +1083 -412
  166. flowfile_core/schemas/yaml_types.py +103 -0
  167. flowfile_core/types.py +156 -0
  168. flowfile_core/utils/validate_setup.py +3 -1
  169. flowfile_frame/__init__.py +3 -1
  170. flowfile_frame/flow_frame.py +31 -24
  171. flowfile_frame/flow_frame_methods.py +12 -9
  172. flowfile_worker/__init__.py +9 -35
  173. flowfile_worker/create/__init__.py +3 -21
  174. flowfile_worker/create/funcs.py +68 -56
  175. flowfile_worker/create/models.py +130 -62
  176. flowfile_worker/main.py +5 -2
  177. flowfile_worker/routes.py +52 -13
  178. shared/__init__.py +15 -0
  179. shared/storage_config.py +258 -0
  180. tools/migrate/README.md +56 -0
  181. tools/migrate/__init__.py +12 -0
  182. tools/migrate/__main__.py +131 -0
  183. tools/migrate/legacy_schemas.py +621 -0
  184. tools/migrate/migrate.py +598 -0
  185. tools/migrate/tests/__init__.py +0 -0
  186. tools/migrate/tests/conftest.py +23 -0
  187. tools/migrate/tests/test_migrate.py +627 -0
  188. tools/migrate/tests/test_migration_e2e.py +1010 -0
  189. tools/migrate/tests/test_node_migrations.py +813 -0
  190. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  191. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  192. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  193. flowfile/web/static/assets/Read-c3b1929c.js +0 -701
  194. flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
  195. flowfile/web/static/assets/Union-89fd73dc.js +0 -146
  196. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  197. flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
  198. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  199. flowfile_core/flowfile/manage/open_flowfile.py +0 -135
  200. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info/licenses}/LICENSE +0 -0
  201. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -1,20 +1,27 @@
1
1
  import datetime
2
- import pickle
2
+
3
+ import os
4
+ import yaml
5
+ import json
6
+
3
7
  import polars as pl
8
+ from pathlib import Path
9
+
4
10
  import fastexcel
5
11
  from fastapi.exceptions import HTTPException
6
12
  from time import time
7
13
  from functools import partial
8
- from typing import List, Dict, Union, Callable, Any, Optional, Tuple
14
+ from typing import List, Dict, Union, Callable, Any, Optional, Tuple, Literal
9
15
  from uuid import uuid1
10
16
  from copy import deepcopy
11
17
  from pyarrow.parquet import ParquetFile
12
18
  from flowfile_core.configs import logger
13
19
  from flowfile_core.configs.flow_logger import FlowLogger
14
20
  from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
15
- from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import cast_str_to_polars_type, FlowfileColumn
21
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, cast_str_to_polars_type
16
22
 
17
23
  from flowfile_core.flowfile.flow_data_engine.cloud_storage_reader import CloudStorageReader
24
+ from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
18
25
  from flowfile_core.utils.arrow_reader import get_read_top_n
19
26
  from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine, execute_polars_code
20
27
  from flowfile_core.flowfile.flow_data_engine.read_excel_tables import (get_open_xlsx_datatypes,
@@ -47,6 +54,23 @@ from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source impor
47
54
  from flowfile_core.flowfile.database_connection_manager.db_connections import (get_local_database_connection,
48
55
  get_local_cloud_connection)
49
56
  from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
57
+ from flowfile_core.flowfile.node_designer.custom_node import CustomNodeBase
58
+ from importlib.metadata import version, PackageNotFoundError
59
+
60
+ try:
61
+ __version__ = version("Flowfile")
62
+ except PackageNotFoundError:
63
+ __version__ = "0.0.0-dev"
64
+
65
+
66
+ def represent_list_json(dumper, data):
67
+ """Use inline style for short simple lists, block style for complex ones."""
68
+ if len(data) <= 10 and all(isinstance(item, (int, str, float, bool, type(None))) for item in data):
69
+ return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=True)
70
+ return dumper.represent_sequence('tag:yaml.org,2002:seq', data, flow_style=False)
71
+
72
+
73
+ yaml.add_representer(list, represent_list_json)
50
74
 
51
75
 
52
76
  def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
@@ -147,7 +171,7 @@ def get_cloud_connection_settings(connection_name: str,
147
171
  HTTPException: If the connection settings cannot be found.
148
172
  """
149
173
  cloud_connection_settings = get_local_cloud_connection(connection_name, user_id)
150
- if cloud_connection_settings is None and auth_mode in ("env_vars", "auto"):
174
+ if cloud_connection_settings is None and auth_mode in ("env_vars", transform_schema.AUTO_DATA_TYPE):
151
175
  # If the auth mode is aws-cli, we do not need connection settings
152
176
  cloud_connection_settings = FullCloudStorageConnection(storage_type="s3", auth_method="env_vars")
153
177
  elif cloud_connection_settings is None and auth_mode == "aws-cli":
@@ -175,11 +199,9 @@ class FlowGraph:
175
199
  schema: Optional[List[FlowfileColumn]] = None
176
200
  has_over_row_function: bool = False
177
201
  _flow_starts: List[Union[int, str]] = None
178
- node_results: List[NodeResult] = None
179
202
  latest_run_info: Optional[RunInformation] = None
180
203
  start_datetime: datetime = None
181
204
  end_datetime: datetime = None
182
- nodes_completed: int = 0
183
205
  _flow_settings: schemas.FlowSettings = None
184
206
  flow_logger: FlowLogger
185
207
 
@@ -206,11 +228,9 @@ class FlowGraph:
206
228
 
207
229
  self._flow_settings = flow_settings
208
230
  self.uuid = str(uuid1())
209
- self.nodes_completed = 0
210
231
  self.start_datetime = None
211
232
  self.end_datetime = None
212
233
  self.latest_run_info = None
213
- self.node_results = []
214
234
  self._flow_id = flow_settings.flow_id
215
235
  self.flow_logger = FlowLogger(flow_settings.flow_id)
216
236
  self._flow_starts: List[FlowNode] = []
@@ -222,7 +242,7 @@ class FlowGraph:
222
242
  self._node_ids = []
223
243
  self._node_db = {}
224
244
  self.cache_results = cache_results
225
- self.__name__ = name if name else id(self)
245
+ self.__name__ = name if name else "flow_" + str(id(self))
226
246
  self.depends_on = {}
227
247
  if path_ref is not None:
228
248
  self.add_datasource(input_schema.NodeDatasource(file_path=path_ref))
@@ -436,6 +456,24 @@ class FlowGraph:
436
456
  node = self._node_db.get(node_id)
437
457
  if node is not None:
438
458
  return node
459
+
460
+ def add_user_defined_node(self, *,
461
+ custom_node: CustomNodeBase,
462
+ user_defined_node_settings: input_schema.UserDefinedNode
463
+ ):
464
+
465
+ def _func(*fdes: FlowDataEngine) -> FlowDataEngine | None:
466
+ output = custom_node.process(*(fde.data_frame for fde in fdes))
467
+ if isinstance(output, pl.LazyFrame | pl.DataFrame):
468
+ return FlowDataEngine(output)
469
+ return None
470
+
471
+ self.add_node_step(node_id=user_defined_node_settings.node_id,
472
+ function=_func,
473
+ setting_input=user_defined_node_settings,
474
+ input_node_ids=user_defined_node_settings.depending_on_ids,
475
+ node_type=custom_node.item,
476
+ )
439
477
 
440
478
  def add_pivot(self, pivot_settings: input_schema.NodePivot):
441
479
  """Adds a pivot node to the graph.
@@ -714,11 +752,11 @@ class FlowGraph:
714
752
  """
715
753
 
716
754
  error = ""
717
- if function_settings.function.field.data_type not in (None, "Auto"):
755
+ if function_settings.function.field.data_type not in (None, transform_schema.AUTO_DATA_TYPE):
718
756
  output_type = cast_str_to_polars_type(function_settings.function.field.data_type)
719
757
  else:
720
758
  output_type = None
721
- if output_type not in (None, "Auto"):
759
+ if output_type not in (None, transform_schema.AUTO_DATA_TYPE):
722
760
  new_col = [FlowfileColumn.from_input(column_name=function_settings.function.field.name,
723
761
  data_type=str(output_type))]
724
762
  else:
@@ -736,6 +774,7 @@ class FlowGraph:
736
774
  setting_input=function_settings,
737
775
  input_node_ids=[function_settings.depending_on_id]
738
776
  )
777
+ # TODO: Add validation here
739
778
  if error != "":
740
779
  node = self.get_node(function_settings.node_id)
741
780
  node.results.errors = error
@@ -752,13 +791,11 @@ class FlowGraph:
752
791
  Returns:
753
792
  The `FlowGraph` instance for method chaining.
754
793
  """
755
-
756
794
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
757
795
  for left_select in cross_join_settings.cross_join_input.left_select.renames:
758
796
  left_select.is_available = True if left_select.old_name in main.schema else False
759
797
  for right_select in cross_join_settings.cross_join_input.right_select.renames:
760
798
  right_select.is_available = True if right_select.old_name in right.schema else False
761
-
762
799
  return main.do_cross_join(cross_join_input=cross_join_settings.cross_join_input,
763
800
  auto_generate_selection=cross_join_settings.auto_generate_selection,
764
801
  verify_integrity=False,
@@ -781,13 +818,11 @@ class FlowGraph:
781
818
  Returns:
782
819
  The `FlowGraph` instance for method chaining.
783
820
  """
784
-
785
821
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
786
822
  for left_select in join_settings.join_input.left_select.renames:
787
823
  left_select.is_available = True if left_select.old_name in main.schema else False
788
824
  for right_select in join_settings.join_input.right_select.renames:
789
825
  right_select.is_available = True if right_select.old_name in right.schema else False
790
-
791
826
  return main.join(join_input=join_settings.join_input,
792
827
  auto_generate_selection=join_settings.auto_generate_selection,
793
828
  verify_integrity=False,
@@ -814,18 +849,18 @@ class FlowGraph:
814
849
  def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
815
850
  node = self.get_node(node_id=fuzzy_settings.node_id)
816
851
  if self.execution_location == "local":
817
- return main.fuzzy_join(fuzzy_match_input=fuzzy_settings.join_input,
852
+ return main.fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input),
818
853
  other=right,
819
854
  node_logger=self.flow_logger.get_node_logger(fuzzy_settings.node_id))
820
855
 
821
- f = main.start_fuzzy_join(fuzzy_match_input=fuzzy_settings.join_input, other=right, file_ref=node.hash,
856
+ f = main.start_fuzzy_join(fuzzy_match_input=deepcopy(fuzzy_settings.join_input), other=right, file_ref=node.hash,
822
857
  flow_id=self.flow_id, node_id=fuzzy_settings.node_id)
823
858
  logger.info("Started the fuzzy match action")
824
859
  node._fetch_cached_df = f # Add to the node so it can be cancelled and fetch later if needed
825
860
  return FlowDataEngine(f.get_result())
826
861
 
827
862
  def schema_callback():
828
- fm_input_copy = deepcopy(fuzzy_settings.join_input) # Deepcopy create an unique object per func
863
+ fm_input_copy = FuzzyMatchInputManager(fuzzy_settings.join_input) # Deepcopy create an unique object per func
829
864
  node = self.get_node(node_id=fuzzy_settings.node_id)
830
865
  return calculate_fuzzy_match_schema(fm_input_copy,
831
866
  left_schema=node.node_inputs.main_inputs[0].schema,
@@ -1112,7 +1147,6 @@ class FlowGraph:
1112
1147
  """
1113
1148
 
1114
1149
  def _func(df: FlowDataEngine):
1115
- output_file.output_settings.populate_abs_file_path()
1116
1150
  execute_remote = self.execution_location != 'local'
1117
1151
  df.output(output_fs=output_file.output_settings, flow_id=self.flow_id, node_id=output_file.node_id,
1118
1152
  execute_remote=execute_remote)
@@ -1432,10 +1466,10 @@ class FlowGraph:
1432
1466
  Args:
1433
1467
  input_file: The settings for the read operation.
1434
1468
  """
1435
-
1436
- if input_file.received_file.file_type in ('xlsx', 'excel') and input_file.received_file.sheet_name == '':
1469
+ if (input_file.received_file.file_type in ('xlsx', 'excel') and
1470
+ input_file.received_file.table_settings.sheet_name == ''):
1437
1471
  sheet_name = fastexcel.read_excel(input_file.received_file.path).sheet_names[0]
1438
- input_file.received_file.sheet_name = sheet_name
1472
+ input_file.received_file.table_settings.sheet_name = sheet_name
1439
1473
 
1440
1474
  received_file = input_file.received_file
1441
1475
  input_file.received_file.set_absolute_filepath()
@@ -1444,7 +1478,7 @@ class FlowGraph:
1444
1478
  input_file.received_file.set_absolute_filepath()
1445
1479
  if input_file.received_file.file_type == 'parquet':
1446
1480
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1447
- elif input_file.received_file.file_type == 'csv' and 'utf' in input_file.received_file.encoding:
1481
+ elif input_file.received_file.file_type == 'csv' and 'utf' in input_file.received_file.table_settings.encoding:
1448
1482
  input_data = FlowDataEngine.create_from_path(input_file.received_file)
1449
1483
  else:
1450
1484
  input_data = FlowDataEngine.create_from_path_worker(input_file.received_file,
@@ -1481,12 +1515,12 @@ class FlowGraph:
1481
1515
  # If the file is an Excel file, we need to use the openpyxl engine to read the schema
1482
1516
  schema_callback = get_xlsx_schema_callback(engine='openpyxl',
1483
1517
  file_path=received_file.file_path,
1484
- sheet_name=received_file.sheet_name,
1485
- start_row=received_file.start_row,
1486
- end_row=received_file.end_row,
1487
- start_column=received_file.start_column,
1488
- end_column=received_file.end_column,
1489
- has_headers=received_file.has_headers)
1518
+ sheet_name=received_file.table_settings.sheet_name,
1519
+ start_row=received_file.table_settings.start_row,
1520
+ end_row=received_file.table_settings.end_row,
1521
+ start_column=received_file.table_settings.start_column,
1522
+ end_column=received_file.table_settings.end_column,
1523
+ has_headers=received_file.table_settings.has_headers)
1490
1524
  else:
1491
1525
  schema_callback = None
1492
1526
  else:
@@ -1599,6 +1633,73 @@ class FlowGraph:
1599
1633
  self.reset()
1600
1634
  self.flow_settings.execution_location = execution_location
1601
1635
 
1636
+ def validate_if_node_can_be_fetched(self, node_id: int) -> None:
1637
+ flow_node = self._node_db.get(node_id)
1638
+ if not flow_node:
1639
+ raise Exception("Node not found found")
1640
+ skip_nodes, execution_order = compute_execution_plan(
1641
+ nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
1642
+ )
1643
+ if flow_node.node_id in [skip_node.node_id for skip_node in skip_nodes]:
1644
+ raise Exception("Node can not be executed because it does not have it's inputs")
1645
+
1646
+ def create_initial_run_information(self, number_of_nodes: int,
1647
+ run_type: Literal["fetch_one", "full_run"]):
1648
+ return RunInformation(
1649
+ flow_id=self.flow_id, start_time=datetime.datetime.now(), end_time=None,
1650
+ success=None, number_of_nodes=number_of_nodes, node_step_result=[],
1651
+ run_type=run_type
1652
+ )
1653
+
1654
+ def create_empty_run_information(self) -> RunInformation:
1655
+ return RunInformation(
1656
+ flow_id=self.flow_id, start_time=None, end_time=None,
1657
+ success=None, number_of_nodes=0, node_step_result=[],
1658
+ run_type="init"
1659
+ )
1660
+
1661
+ def trigger_fetch_node(self, node_id: int) -> RunInformation | None:
1662
+ """Executes a specific node in the graph by its ID."""
1663
+ if self.flow_settings.is_running:
1664
+ raise Exception("Flow is already running")
1665
+ flow_node = self.get_node(node_id)
1666
+ self.flow_settings.is_running = True
1667
+ self.flow_settings.is_canceled = False
1668
+ self.flow_logger.clear_log_file()
1669
+ self.latest_run_info = self.create_initial_run_information(1, "fetch_one")
1670
+ node_logger = self.flow_logger.get_node_logger(flow_node.node_id)
1671
+ node_result = NodeResult(node_id=flow_node.node_id, node_name=flow_node.name)
1672
+ logger.info(f'Starting to run: node {flow_node.node_id}, start time: {node_result.start_timestamp}')
1673
+ try:
1674
+ self.latest_run_info.node_step_result.append(node_result)
1675
+ flow_node.execute_node(run_location=self.flow_settings.execution_location,
1676
+ performance_mode=False,
1677
+ node_logger=node_logger,
1678
+ optimize_for_downstream=False,
1679
+ reset_cache=True)
1680
+ node_result.error = str(flow_node.results.errors)
1681
+ if self.flow_settings.is_canceled:
1682
+ node_result.success = None
1683
+ node_result.success = None
1684
+ node_result.is_running = False
1685
+ node_result.success = flow_node.results.errors is None
1686
+ node_result.end_timestamp = time()
1687
+ node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1688
+ node_result.is_running = False
1689
+ self.latest_run_info.nodes_completed += 1
1690
+ self.latest_run_info.end_time = datetime.datetime.now()
1691
+ self.flow_settings.is_running = False
1692
+ return self.get_run_info()
1693
+ except Exception as e:
1694
+ node_result.error = 'Node did not run'
1695
+ node_result.success = False
1696
+ node_result.end_timestamp = time()
1697
+ node_result.run_time = int(node_result.end_timestamp - node_result.start_timestamp)
1698
+ node_result.is_running = False
1699
+ node_logger.error(f'Error in node {flow_node.node_id}: {e}')
1700
+ finally:
1701
+ self.flow_settings.is_running = False
1702
+
1602
1703
  def run_graph(self) -> RunInformation | None:
1603
1704
  """Executes the entire data flow graph from start to finish.
1604
1705
 
@@ -1614,20 +1715,23 @@ class FlowGraph:
1614
1715
  if self.flow_settings.is_running:
1615
1716
  raise Exception('Flow is already running')
1616
1717
  try:
1718
+
1617
1719
  self.flow_settings.is_running = True
1618
1720
  self.flow_settings.is_canceled = False
1619
1721
  self.flow_logger.clear_log_file()
1620
- self.nodes_completed = 0
1621
- self.node_results = []
1622
- self.start_datetime = datetime.datetime.now()
1623
- self.end_datetime = None
1624
- self.latest_run_info = None
1625
1722
  self.flow_logger.info('Starting to run flowfile flow...')
1626
- skip_nodes, execution_order = compute_execution_plan(nodes=self.nodes, flow_starts=self._flow_starts+self.get_implicit_starter_nodes())
1723
+
1724
+ skip_nodes, execution_order = compute_execution_plan(
1725
+ nodes=self.nodes,
1726
+ flow_starts=self._flow_starts+self.get_implicit_starter_nodes()
1727
+ )
1728
+
1729
+ self.latest_run_info = self.create_initial_run_information(len(execution_order), "full_run")
1627
1730
 
1628
1731
  skip_node_message(self.flow_logger, skip_nodes)
1629
1732
  execution_order_message(self.flow_logger, execution_order)
1630
1733
  performance_mode = self.flow_settings.execution_mode == 'Performance'
1734
+
1631
1735
  for node in execution_order:
1632
1736
  node_logger = self.flow_logger.get_node_logger(node.node_id)
1633
1737
  if self.flow_settings.is_canceled:
@@ -1637,7 +1741,7 @@ class FlowGraph:
1637
1741
  node_logger.info(f'Skipping node {node.node_id}')
1638
1742
  continue
1639
1743
  node_result = NodeResult(node_id=node.node_id, node_name=node.name)
1640
- self.node_results.append(node_result)
1744
+ self.latest_run_info.node_step_result.append(node_result)
1641
1745
  logger.info(f'Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}')
1642
1746
  node.execute_node(run_location=self.flow_settings.execution_location,
1643
1747
  performance_mode=performance_mode,
@@ -1663,7 +1767,8 @@ class FlowGraph:
1663
1767
  if not node_result.success:
1664
1768
  skip_nodes.extend(list(node.get_all_dependent_nodes()))
1665
1769
  node_logger.info(f'Completed node with success: {node_result.success}')
1666
- self.nodes_completed += 1
1770
+ self.latest_run_info.nodes_completed += 1
1771
+ self.latest_run_info.end_time = datetime.datetime.now()
1667
1772
  self.flow_logger.info('Flow completed!')
1668
1773
  self.end_datetime = datetime.datetime.now()
1669
1774
  self.flow_settings.is_running = False
@@ -1681,22 +1786,17 @@ class FlowGraph:
1681
1786
  Returns:
1682
1787
  A RunInformation object with details about the last run.
1683
1788
  """
1789
+ is_running = self.flow_settings.is_running
1684
1790
  if self.latest_run_info is None:
1685
- node_results = self.node_results
1686
- success = all(nr.success for nr in node_results)
1687
- self.latest_run_info = RunInformation(start_time=self.start_datetime, end_time=self.end_datetime,
1688
- success=success,
1689
- node_step_result=node_results, flow_id=self.flow_id,
1690
- nodes_completed=self.nodes_completed,
1691
- number_of_nodes=len(self.nodes))
1692
- elif self.latest_run_info.nodes_completed != self.nodes_completed:
1693
- node_results = self.node_results
1694
- self.latest_run_info = RunInformation(start_time=self.start_datetime, end_time=self.end_datetime,
1695
- success=all(nr.success for nr in node_results),
1696
- node_step_result=node_results, flow_id=self.flow_id,
1697
- nodes_completed=self.nodes_completed,
1698
- number_of_nodes=len(self.nodes))
1699
- return self.latest_run_info
1791
+ return self.create_empty_run_information()
1792
+
1793
+ elif not is_running and self.latest_run_info.success is not None:
1794
+ return self.latest_run_info
1795
+
1796
+ run_info = self.latest_run_info
1797
+ if not is_running:
1798
+ run_info.success = all(nr.success for nr in run_info.node_step_result)
1799
+ return run_info
1700
1800
 
1701
1801
  @property
1702
1802
  def node_connections(self) -> List[Tuple[int, int]]:
@@ -1729,6 +1829,42 @@ class FlowGraph:
1729
1829
  node = self._node_db[node_id]
1730
1830
  return node.get_node_data(flow_id=self.flow_id, include_example=include_example)
1731
1831
 
1832
+ def get_flowfile_data(self) -> schemas.FlowfileData:
1833
+ start_node_ids = {v.node_id for v in self._flow_starts}
1834
+
1835
+ nodes = []
1836
+ for node in self.nodes:
1837
+ node_info = node.get_node_information()
1838
+ flowfile_node = schemas.FlowfileNode(
1839
+ id=node_info.id,
1840
+ type=node_info.type,
1841
+ is_start_node=node.node_id in start_node_ids,
1842
+ description=node_info.description,
1843
+ x_position=int(node_info.x_position),
1844
+ y_position=int(node_info.y_position),
1845
+ left_input_id=node_info.left_input_id,
1846
+ right_input_id=node_info.right_input_id,
1847
+ input_ids=node_info.input_ids,
1848
+ outputs=node_info.outputs,
1849
+ setting_input=node_info.setting_input,
1850
+ )
1851
+ nodes.append(flowfile_node)
1852
+
1853
+ settings = schemas.FlowfileSettings(
1854
+ description=self.flow_settings.description,
1855
+ execution_mode=self.flow_settings.execution_mode,
1856
+ execution_location=self.flow_settings.execution_location,
1857
+ auto_save=self.flow_settings.auto_save,
1858
+ show_detailed_progress=self.flow_settings.show_detailed_progress,
1859
+ )
1860
+ return schemas.FlowfileData(
1861
+ flowfile_version=__version__,
1862
+ flowfile_id=self.flow_id,
1863
+ flowfile_name=self.__name__,
1864
+ flowfile_settings=settings,
1865
+ nodes=nodes,
1866
+ )
1867
+
1732
1868
  def get_node_storage(self) -> schemas.FlowInformation:
1733
1869
  """Serializes the entire graph's state into a storable format.
1734
1870
 
@@ -1761,14 +1897,64 @@ class FlowGraph:
1761
1897
  for node in self.nodes:
1762
1898
  node.remove_cache()
1763
1899
 
1900
+ def _handle_flow_renaming(self, new_name: str, new_path: Path):
1901
+ """
1902
+ Handle the rename of a flow when it is being saved.
1903
+ """
1904
+ if self.flow_settings and self.flow_settings.path and Path(self.flow_settings.path).absolute() != new_path.absolute():
1905
+ self.__name__ = new_name
1906
+ self.flow_settings.save_location = str(new_path.absolute())
1907
+ self.flow_settings.name = new_name
1908
+ if self.flow_settings and not self.flow_settings.save_location:
1909
+ self.flow_settings.save_location = str(new_path.absolute())
1910
+ self.__name__ = new_name
1911
+ self.flow_settings.name = new_name
1912
+
1764
1913
  def save_flow(self, flow_path: str):
1765
1914
  """Saves the current state of the flow graph to a file.
1766
1915
 
1916
+ Supports multiple formats based on file extension:
1917
+ - .yaml / .yml: New YAML format
1918
+ - .json: JSON format
1919
+
1767
1920
  Args:
1768
1921
  flow_path: The path where the flow file will be saved.
1769
1922
  """
1770
- with open(flow_path, 'wb') as f:
1771
- pickle.dump(self.get_node_storage(), f)
1923
+ logger.info("Saving flow to %s", flow_path)
1924
+ path = Path(flow_path)
1925
+ os.makedirs(path.parent, exist_ok=True)
1926
+ suffix = path.suffix.lower()
1927
+ new_flow_name = path.name.replace(suffix, "")
1928
+ self._handle_flow_renaming(new_flow_name, path)
1929
+ self.flow_settings.modified_on = datetime.datetime.now().timestamp()
1930
+ try:
1931
+ if suffix == '.flowfile':
1932
+ raise DeprecationWarning(
1933
+ f"The .flowfile format is deprecated. Please use .yaml or .json formats.\n\n"
1934
+ "Or stay on v0.4.1 if you still need .flowfile support.\n\n"
1935
+ )
1936
+ elif suffix in ('.yaml', '.yml'):
1937
+ flowfile_data = self.get_flowfile_data()
1938
+ data = flowfile_data.model_dump(mode='json')
1939
+ with open(flow_path, 'w', encoding='utf-8') as f:
1940
+ yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
1941
+ elif suffix == '.json':
1942
+ flowfile_data = self.get_flowfile_data()
1943
+ data = flowfile_data.model_dump(mode='json')
1944
+ with open(flow_path, 'w', encoding='utf-8') as f:
1945
+ json.dump(data, f, indent=2, ensure_ascii=False)
1946
+
1947
+ else:
1948
+ flowfile_data = self.get_flowfile_data()
1949
+ logger.warning(f"Unknown file extension {suffix}. Defaulting to YAML format.")
1950
+ data = flowfile_data.model_dump(mode='json')
1951
+ with open(flow_path, 'w', encoding='utf-8') as f:
1952
+ yaml.dump(data, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
1953
+
1954
+ except Exception as e:
1955
+ logger.error(f"Error saving flow: {e}")
1956
+ raise
1957
+
1772
1958
  self.flow_settings.path = flow_path
1773
1959
 
1774
1960
  def get_frontend_data(self) -> dict: