Flowfile 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (161) hide show
  1. flowfile/__init__.py +4 -3
  2. flowfile/api.py +1 -1
  3. flowfile/web/static/assets/{CloudConnectionManager-c20a740f.js → CloudConnectionManager-d7c2c028.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-960b400a.js → CloudStorageReader-d467329f.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-e3decbdd.js → CloudStorageWriter-071b8b00.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ContextMenu-2dea5e27.js +41 -0
  8. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  9. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  10. flowfile/web/static/assets/ContextMenu-785554c4.js +41 -0
  11. flowfile/web/static/assets/ContextMenu-a51e19ea.js +41 -0
  12. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  13. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  14. flowfile/web/static/assets/{CrossJoin-d67e2405.js → CrossJoin-cf68ec7a.js} +14 -84
  15. flowfile/web/static/assets/{DatabaseConnectionSettings-a81e0f7e.js → DatabaseConnectionSettings-435c5dd8.js} +3 -3
  16. flowfile/web/static/assets/{DatabaseManager-9ea35e84.js → DatabaseManager-349e33a8.js} +2 -2
  17. flowfile/web/static/assets/{DatabaseReader-9578bfa5.js → DatabaseReader-8075bd28.js} +14 -114
  18. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  19. flowfile/web/static/assets/{DatabaseWriter-19531098.js → DatabaseWriter-3e2dda89.js} +13 -74
  20. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  21. flowfile/web/static/assets/ExploreData-76ec698c.js +192 -0
  22. flowfile/web/static/assets/{ExternalSource-2297ef96.js → ExternalSource-609a265c.js} +8 -79
  23. flowfile/web/static/assets/{Filter-f211c03a.js → Filter-97cff793.js} +12 -85
  24. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  25. flowfile/web/static/assets/{Formula-4207ea31.js → Formula-09de0ec9.js} +18 -85
  26. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  27. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  28. flowfile/web/static/assets/{FuzzyMatch-bf120df0.js → FuzzyMatch-bdf70248.js} +16 -87
  29. flowfile/web/static/assets/{GraphSolver-5bb7497a.js → GraphSolver-0b5a0e05.js} +13 -159
  30. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  31. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  32. flowfile/web/static/assets/{GroupBy-92c81b65.js → GroupBy-eaddadde.js} +12 -75
  33. flowfile/web/static/assets/{Join-4e49a274.js → Join-3313371b.js} +15 -85
  34. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  35. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  36. flowfile/web/static/assets/{ManualInput-90998ae8.js → ManualInput-e8bfc0be.js} +11 -82
  37. flowfile/web/static/assets/{Output-81e3e917.js → Output-7303bb09.js} +13 -243
  38. flowfile/web/static/assets/Output-ddc9079f.css +37 -0
  39. flowfile/web/static/assets/{Pivot-a3419842.js → Pivot-3b1c54ef.js} +14 -138
  40. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  41. flowfile/web/static/assets/PivotValidation-3bb36c8f.js +61 -0
  42. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  43. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  44. flowfile/web/static/assets/PivotValidation-eaa819c0.js +61 -0
  45. flowfile/web/static/assets/{PolarsCode-72710deb.js → PolarsCode-aa12e25d.js} +13 -80
  46. flowfile/web/static/assets/Read-6b17491f.css +62 -0
  47. flowfile/web/static/assets/Read-a2bfc618.js +243 -0
  48. flowfile/web/static/assets/RecordCount-aa0dc082.js +53 -0
  49. flowfile/web/static/assets/{RecordId-10baf191.js → RecordId-48ee1a3b.js} +8 -80
  50. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  51. flowfile/web/static/assets/SQLQueryComponent-e149dbf2.js +38 -0
  52. flowfile/web/static/assets/{Sample-3ed9a0ae.js → Sample-f06cb97a.js} +8 -77
  53. flowfile/web/static/assets/{SecretManager-0d49c0e8.js → SecretManager-37f34886.js} +2 -2
  54. flowfile/web/static/assets/{Select-8a02a0b3.js → Select-b60e6c47.js} +11 -85
  55. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  56. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  57. flowfile/web/static/assets/SettingsSection-70e5a7b1.js +53 -0
  58. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  59. flowfile/web/static/assets/{SettingsSection-4c0f45f5.js → SettingsSection-75b6cf4f.js} +2 -40
  60. flowfile/web/static/assets/SettingsSection-e57a672e.js +45 -0
  61. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  62. flowfile/web/static/assets/{Sort-f55c9f9d.js → Sort-51b1ee4d.js} +12 -97
  63. flowfile/web/static/assets/{TextToRows-5dbc2145.js → TextToRows-26835f8f.js} +14 -83
  64. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  65. flowfile/web/static/assets/{UnavailableFields-a1768e52.js → UnavailableFields-88a4cd0c.js} +2 -2
  66. flowfile/web/static/assets/Union-4d0088eb.js +77 -0
  67. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  68. flowfile/web/static/assets/{Unique-46b250da.js → Unique-7d554a62.js} +22 -91
  69. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  70. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  71. flowfile/web/static/assets/{Unpivot-25ac84cc.js → Unpivot-4668595c.js} +12 -166
  72. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  73. flowfile/web/static/assets/UnpivotValidation-d4f0e0e8.js +51 -0
  74. flowfile/web/static/assets/{ExploreData-40476474.js → VueGraphicWalker-5324d566.js} +4 -264
  75. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  76. flowfile/web/static/assets/{api-6ef0dcef.js → api-271ed117.js} +1 -1
  77. flowfile/web/static/assets/{api-a0abbdc7.js → api-31e4fea6.js} +1 -1
  78. flowfile/web/static/assets/{designer-186f2e71.css → designer-091bdc3f.css} +819 -184
  79. flowfile/web/static/assets/{designer-13eabd83.js → designer-bf3d9487.js} +2214 -680
  80. flowfile/web/static/assets/{documentation-b87e7f6f.js → documentation-4d0a1cea.js} +1 -1
  81. flowfile/web/static/assets/{dropDown-13564764.js → dropDown-025888df.js} +1 -1
  82. flowfile/web/static/assets/{fullEditor-fd2cd6f9.js → fullEditor-1df991ec.js} +2 -2
  83. flowfile/web/static/assets/{genericNodeSettings-71e11604.js → genericNodeSettings-d3b2b2ac.js} +3 -3
  84. flowfile/web/static/assets/{index-f6c15e76.js → index-d0518598.js} +210 -31
  85. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  86. flowfile/web/static/assets/outputCsv-d8457527.js +86 -0
  87. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  88. flowfile/web/static/assets/outputExcel-be89153e.js +56 -0
  89. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  90. flowfile/web/static/assets/outputParquet-fabb445a.js +31 -0
  91. flowfile/web/static/assets/readCsv-bca3ed53.css +52 -0
  92. flowfile/web/static/assets/readCsv-e8359522.js +178 -0
  93. flowfile/web/static/assets/readExcel-dabaf51b.js +203 -0
  94. flowfile/web/static/assets/readExcel-e1b381ea.css +64 -0
  95. flowfile/web/static/assets/readParquet-cee068e2.css +19 -0
  96. flowfile/web/static/assets/readParquet-e0771ef2.js +26 -0
  97. flowfile/web/static/assets/{secretApi-dd636aa2.js → secretApi-ce823eee.js} +1 -1
  98. flowfile/web/static/assets/{selectDynamic-af36165e.js → selectDynamic-5476546e.js} +7 -7
  99. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  100. flowfile/web/static/assets/{vue-codemirror.esm-2847001e.js → vue-codemirror.esm-9ed00d50.js} +29 -33
  101. flowfile/web/static/assets/{vue-content-loader.es-0371da73.js → vue-content-loader.es-7bca2d9b.js} +1 -1
  102. flowfile/web/static/index.html +1 -1
  103. {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/METADATA +2 -1
  104. {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/RECORD +147 -117
  105. flowfile_core/configs/flow_logger.py +5 -13
  106. flowfile_core/configs/node_store/nodes.py +303 -44
  107. flowfile_core/configs/settings.py +6 -3
  108. flowfile_core/database/connection.py +5 -21
  109. flowfile_core/fileExplorer/funcs.py +239 -121
  110. flowfile_core/flowfile/code_generator/code_generator.py +36 -0
  111. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +60 -80
  112. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +61 -0
  113. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +44 -3
  114. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +3 -3
  115. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +33 -10
  116. flowfile_core/flowfile/flow_graph.py +223 -118
  117. flowfile_core/flowfile/flow_node/flow_node.py +56 -19
  118. flowfile_core/flowfile/flow_node/models.py +0 -2
  119. flowfile_core/flowfile/flow_node/schema_callback.py +138 -43
  120. flowfile_core/flowfile/graph_tree/graph_tree.py +250 -0
  121. flowfile_core/flowfile/graph_tree/models.py +15 -0
  122. flowfile_core/flowfile/handler.py +22 -3
  123. flowfile_core/flowfile/manage/compatibility_enhancements.py +1 -1
  124. flowfile_core/flowfile/{flow_data_engine/fuzzy_matching/settings_validator.py → schema_callbacks.py} +72 -16
  125. flowfile_core/flowfile/setting_generator/settings.py +2 -2
  126. flowfile_core/flowfile/util/execution_orderer.py +9 -0
  127. flowfile_core/flowfile/util/node_skipper.py +8 -0
  128. flowfile_core/main.py +4 -1
  129. flowfile_core/routes/routes.py +59 -10
  130. flowfile_core/schemas/input_schema.py +0 -1
  131. flowfile_core/schemas/output_model.py +5 -2
  132. flowfile_core/schemas/schemas.py +48 -3
  133. flowfile_core/schemas/transform_schema.py +28 -38
  134. flowfile_frame/__init__.py +1 -4
  135. flowfile_frame/flow_frame.py +33 -4
  136. flowfile_frame/flow_frame.pyi +2 -0
  137. flowfile_worker/__init__.py +6 -35
  138. flowfile_worker/funcs.py +7 -3
  139. flowfile_worker/main.py +5 -2
  140. flowfile_worker/models.py +3 -1
  141. flowfile_worker/routes.py +47 -5
  142. shared/__init__.py +15 -0
  143. shared/storage_config.py +243 -0
  144. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  145. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  146. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  147. flowfile/web/static/assets/Read-c4059daf.js +0 -701
  148. flowfile/web/static/assets/RecordCount-c2b5e095.js +0 -122
  149. flowfile/web/static/assets/Union-f2aefdc9.js +0 -146
  150. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  151. flowfile/web/static/assets/nodeTitle-988d9efe.js +0 -227
  152. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  153. flowfile_worker/polars_fuzzy_match/matcher.py +0 -435
  154. flowfile_worker/polars_fuzzy_match/models.py +0 -36
  155. flowfile_worker/polars_fuzzy_match/pre_process.py +0 -213
  156. flowfile_worker/polars_fuzzy_match/process.py +0 -86
  157. flowfile_worker/polars_fuzzy_match/utils.py +0 -50
  158. {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/LICENSE +0 -0
  159. {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/WHEEL +0 -0
  160. {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/entry_points.txt +0 -0
  161. {flowfile_worker/polars_fuzzy_match → flowfile_core/flowfile/graph_tree}/__init__.py +0 -0
@@ -108,14 +108,12 @@ class NodeStepSettings:
108
108
  streamable: If True, the node can process data in a streaming fashion.
109
109
  setup_errors: If True, indicates a non-blocking error occurred during setup.
110
110
  breaking_setup_errors: If True, indicates an error occurred that prevents execution.
111
- execute_location: The preferred location for execution ('auto', 'local', 'remote').
112
111
  """
113
112
  cache_results: bool = False
114
113
  renew_schema: bool = True
115
114
  streamable: bool = True
116
115
  setup_errors: bool = False
117
116
  breaking_setup_errors: bool = False
118
- execute_location: schemas.ExecutionLocationsLiteral = 'auto'
119
117
 
120
118
 
121
119
  class NodeStepInputs:
@@ -1,71 +1,166 @@
1
-
2
1
  from typing import Callable, Any, Optional, Generic, TypeVar
3
2
  from concurrent.futures import ThreadPoolExecutor, Future
3
+ import threading
4
4
  from flowfile_core.configs import logger
5
5
 
6
-
7
6
  T = TypeVar('T')
8
7
 
9
8
 
10
9
  class SingleExecutionFuture(Generic[T]):
11
- """Single execution of a function in a separate thread with caching of the result."""
12
- executor: ThreadPoolExecutor
13
- future: Optional[Future[T]]
10
+ """Thread-safe single execution of a function with result caching.
11
+
12
+ Ensures a function is executed at most once even when called from multiple threads.
13
+ Subsequent calls return the cached result.
14
+ """
15
+
14
16
  func: Callable[[], T]
15
17
  on_error: Optional[Callable[[Exception], Any]]
16
- result_value: Optional[T]
17
- has_run_at_least_once: bool = False # Indicates if the function has been run at least once
18
+ _lock: threading.RLock
19
+ _executor: Optional[ThreadPoolExecutor]
20
+ _future: Optional[Future[T]]
21
+ _result_value: Optional[T]
22
+ _exception: Optional[Exception]
23
+ _has_completed: bool
24
+ _has_started: bool
18
25
 
19
26
  def __init__(
20
- self,
21
- func: Callable[[], T],
22
- on_error: Optional[Callable[[Exception], Any]] = None
27
+ self,
28
+ func: Callable[[], T],
29
+ on_error: Optional[Callable[[Exception], Any]] = None
23
30
  ) -> None:
24
31
  """Initialize with function and optional error handler."""
25
- self.executor = ThreadPoolExecutor(max_workers=1)
26
- self.future = None
27
32
  self.func = func
28
33
  self.on_error = on_error
29
- self.result_value = None
30
- self.has_run_at_least_once = False
34
+
35
+ # Thread safety
36
+ self._lock = threading.RLock() # RLock allows re-entrant locking
37
+
38
+ # Execution state
39
+ self._executor = None
40
+ self._future = None
41
+ self._result_value = None
42
+ self._exception = None
43
+ self._has_completed = False
44
+ self._has_started = False
45
+
46
+ def _ensure_executor(self) -> ThreadPoolExecutor:
47
+ """Ensure executor exists, creating if necessary."""
48
+ if self._executor is None or self._executor._shutdown:
49
+ self._executor = ThreadPoolExecutor(max_workers=1)
50
+ return self._executor
31
51
 
32
52
  def start(self) -> None:
33
53
  """Start the function execution if not already started."""
34
- if not self.future:
35
- logger.info("single executor function started")
36
- self.future = self.executor.submit(self.func)
54
+ with self._lock:
55
+ if self._has_started:
56
+ logger.info("Function already started or completed")
57
+ return
58
+
59
+ logger.info("Starting single executor function")
60
+ executor: ThreadPoolExecutor = self._ensure_executor()
61
+ self._future = executor.submit(self._func_wrapper)
62
+ self._has_started = True
63
+
64
+ def _func_wrapper(self) -> T:
65
+ """Wrapper to capture the result or exception."""
66
+ try:
67
+ result: T = self.func()
68
+ with self._lock:
69
+ self._result_value = result
70
+ self._has_completed = True
71
+ return result
72
+ except Exception as e:
73
+ with self._lock:
74
+ self._exception = e
75
+ self._has_completed = True
76
+ raise
37
77
 
38
78
  def cleanup(self) -> None:
39
- """Clean up resources by clearing the future and shutting down the executor."""
40
- self.has_run_at_least_once = True
41
- self.executor.shutdown(wait=False)
79
+ """Clean up resources by shutting down the executor."""
80
+ with self._lock:
81
+ if self._executor and not self._executor._shutdown:
82
+ self._executor.shutdown(wait=False)
42
83
 
43
84
  def __call__(self) -> Optional[T]:
44
85
  """Execute function if not running and return its result."""
45
- if self.result_value:
46
- return self.result_value
47
- if not self.future:
48
- self.start()
49
- else:
50
- logger.info("Function already running or did complete")
51
- try:
52
- self.result_value = self.future.result()
53
- logger.info("Done with the function")
54
- return self.result_value
55
- except Exception as e:
56
- if self.on_error:
57
- return self.on_error(e)
58
- else:
59
- raise e
60
- finally:
61
- self.cleanup()
86
+ with self._lock:
87
+ # If already completed, return cached result or raise cached exception
88
+ if self._has_completed:
89
+ if self._exception:
90
+ if self.on_error:
91
+ return self.on_error(self._exception)
92
+ else:
93
+ raise self._exception
94
+ return self._result_value
95
+
96
+ # Start if not already started
97
+ if not self._has_started:
98
+ self.start()
99
+
100
+ # Wait for completion outside the lock to avoid blocking other threads
101
+ if self._future:
102
+ try:
103
+ result: T = self._future.result()
104
+ logger.info("Function completed successfully")
105
+ return result
106
+ except Exception as e:
107
+ logger.error(f"Function raised exception: {e}")
108
+ if self.on_error:
109
+ return self.on_error(e)
110
+ else:
111
+ raise
112
+
113
+ return None
114
+
115
+ def reset(self) -> None:
116
+ """Reset the execution state, allowing the function to be run again."""
117
+ with self._lock:
118
+ logger.info("Resetting single execution future")
119
+
120
+ # Cancel any pending execution
121
+ if self._future and not self._future.done():
122
+ self._future.cancel()
62
123
 
63
- def reset(self):
64
- """Reset the future and result value."""
65
- logger.info("Resetting the future and result value")
66
- self.result_value = None
67
- self.future = None
124
+ # Clean up old executor
125
+ if self._executor and not self._executor._shutdown:
126
+ self._executor.shutdown(wait=False)
127
+
128
+ # Reset state
129
+ self._executor = None
130
+ self._future = None
131
+ self._result_value = None
132
+ self._exception = None
133
+ self._has_completed = False
134
+ self._has_started = False
135
+
136
+ def is_running(self) -> bool:
137
+ """Check if the function is currently executing."""
138
+ with self._lock:
139
+ return bool(
140
+ self._has_started and
141
+ not self._has_completed and
142
+ self._future is not None and
143
+ not self._future.done()
144
+ )
145
+
146
+ def is_completed(self) -> bool:
147
+ """Check if the function has completed execution."""
148
+ with self._lock:
149
+ return self._has_completed
150
+
151
+ def get_result(self) -> Optional[T]:
152
+ """Get the cached result without triggering execution."""
153
+ with self._lock:
154
+ if self._exception:
155
+ if self.on_error:
156
+ return self.on_error(self._exception)
157
+ else:
158
+ raise self._exception
159
+ return self._result_value
68
160
 
69
161
  def __del__(self) -> None:
70
162
  """Ensure executor is shut down on deletion."""
71
- self.cleanup()
163
+ try:
164
+ self.cleanup()
165
+ except Exception:
166
+ pass
@@ -0,0 +1,250 @@
1
+ from pydantic import BaseModel
2
+
3
+ from flowfile_core.flowfile.flow_node.flow_node import FlowNode
4
+
5
+ from flowfile_core.flowfile.graph_tree.models import BranchInfo, InputInfo
6
+
7
+
8
+ def calculate_depth(node_id: int, node_info: dict[int, BranchInfo], visited: set = None) -> int:
9
+ """Calculates the depth of each node."""
10
+
11
+ if visited is None:
12
+ visited = set()
13
+ if node_id in visited:
14
+ return node_info[node_id].depth
15
+ visited.add(node_id)
16
+
17
+ max_input_depth = -1
18
+ inputs = node_info[node_id].inputs
19
+
20
+ for main_id in inputs.main:
21
+ max_input_depth = max(max_input_depth, calculate_depth(main_id, node_info, visited))
22
+ if inputs.left:
23
+ max_input_depth = max(max_input_depth, calculate_depth(inputs.left, node_info, visited))
24
+ if inputs.right:
25
+ max_input_depth = max(max_input_depth, calculate_depth(inputs.right, node_info, visited))
26
+
27
+ node_info[node_id].depth = max_input_depth + 1
28
+ return node_info[node_id].depth
29
+
30
+
31
+ # Trace paths from each root
32
+ def trace_path(node_id: int, node_info: dict[int, BranchInfo], merge_points: dict[int, list[int]],
33
+ current_path: list[int] | None = None):
34
+ """Define the trace of each node path"""
35
+ if current_path is None:
36
+ current_path = []
37
+
38
+ current_path = current_path + [node_id]
39
+ outputs = node_info[node_id].outputs
40
+
41
+ if not outputs:
42
+ # End of path
43
+ return [current_path]
44
+
45
+ # If this node has multiple outputs or connects to a merge point, branch
46
+ all_paths = []
47
+ for output_id in outputs:
48
+ if output_id in merge_points and len(merge_points[output_id]) > 1:
49
+ # This is a merge point, end this path here
50
+ all_paths.append(current_path + [output_id])
51
+ else:
52
+ # Continue the path
53
+ all_paths.extend(trace_path(output_id, node_info, merge_points, current_path))
54
+ return all_paths
55
+
56
+
57
+ def build_node_info(nodes: list[FlowNode]) -> dict[int, BranchInfo]:
58
+ """Builds node information used to construct the graph tree."""
59
+
60
+ node_info = {}
61
+ for node in nodes:
62
+ node_id = node.node_id
63
+
64
+ # Get node label
65
+ operation = node.node_type.replace("_", " ").title() if node.node_type else "Unknown"
66
+ label = f"{operation} (id={node_id})"
67
+ if hasattr(node, 'setting_input') and hasattr(node.setting_input, 'description'):
68
+ if node.setting_input.description:
69
+ desc = node.setting_input.description
70
+ if len(desc) > 20: # Truncate long descriptions
71
+ desc = desc[:17] + "..."
72
+ label = f"{operation} ({node_id}): {desc}"
73
+
74
+ # Get inputs and outputs
75
+ inputs = InputInfo(
76
+ main=[n.node_id for n in (node.node_inputs.main_inputs or [])],
77
+ left=node.node_inputs.left_input.node_id if node.node_inputs.left_input else None,
78
+ right=node.node_inputs.right_input.node_id if node.node_inputs.right_input else None
79
+ )
80
+ outputs = [n.node_id for n in node.leads_to_nodes]
81
+
82
+ node_info[node_id] = BranchInfo(
83
+ label=label,
84
+ short_label=f"{operation} ({node_id})",
85
+ inputs=inputs,
86
+ outputs=outputs,
87
+ depth=0
88
+ )
89
+
90
+ return node_info
91
+
92
+
93
+ def group_nodes_by_depth(node_info: dict[int, BranchInfo]) -> tuple[dict[int, list[int]], int]:
94
+ """Groups each node by depth"""
95
+ depth_groups = {}
96
+ max_depth = 0
97
+ for node_id, info in node_info.items():
98
+ depth = info.depth
99
+ max_depth = max(max_depth, depth)
100
+ if depth not in depth_groups:
101
+ depth_groups[depth] = []
102
+ depth_groups[depth].append(node_id)
103
+
104
+ return depth_groups, max_depth
105
+
106
+
107
+ def define_node_connections(node_info: dict[int, BranchInfo]) -> dict[int, list[int]]:
108
+ """Defines node connections to merge"""
109
+ merge_points = {} # target_id -> list of source_ids
110
+ for node_id, info in node_info.items():
111
+ for output_id in info.outputs:
112
+ if output_id not in merge_points:
113
+ merge_points[output_id] = []
114
+ merge_points[output_id].append(node_id)
115
+
116
+ return merge_points
117
+
118
+
119
+ def build_flow_paths(node_info: dict[int, BranchInfo], flow_starts: list[FlowNode],
120
+ merge_points: dict[int, list[int]]):
121
+ """Build the flow paths to be drawn"""
122
+
123
+
124
+ # Find all root nodes (no inputs)
125
+ root_nodes = [nid for nid, info in node_info.items()
126
+ if not info.inputs.main and not info.inputs.left and not info.inputs.right]
127
+
128
+ if not root_nodes and flow_starts:
129
+ root_nodes = [n.node_id for n in flow_starts]
130
+ paths = [] # List of paths through the graph
131
+
132
+ # Get all paths
133
+ for root_id in root_nodes:
134
+ paths.extend(trace_path(root_id, node_info, merge_points))
135
+
136
+ return paths
137
+
138
+
139
+ def group_paths(paths:list, merge_points:dict):
140
+ """Groups each node path."""
141
+ paths_by_merge = {}
142
+ standalone_paths = []
143
+
144
+ for path in paths:
145
+ if len(path) > 1 and path[-1] in merge_points and len(merge_points[path[-1]]) > 1:
146
+ merge_id = path[-1]
147
+ if merge_id not in paths_by_merge:
148
+ paths_by_merge[merge_id] = []
149
+ paths_by_merge[merge_id].append(path)
150
+ else:
151
+ standalone_paths.append(path)
152
+ return paths_by_merge, standalone_paths
153
+
154
+
155
+ def draw_merged_paths(node_info: dict[int, BranchInfo],
156
+ merge_points: dict[int, list[int]],
157
+ paths_by_merge: dict[int, list[list[int]]],
158
+ merge_drawn: set,
159
+ drawn_nodes: set,
160
+ lines: list[str]):
161
+ """Draws paths for each node that merges."""
162
+ for merge_id, merge_paths in paths_by_merge.items():
163
+ if merge_id in merge_drawn:
164
+ continue
165
+ merge_info = node_info[merge_id]
166
+ sources = merge_points[merge_id]
167
+
168
+ # Draw each source path leading to the merge
169
+ for i, source_id in enumerate(sources):
170
+ # Find the path containing this source
171
+ source_path = None
172
+ for path in merge_paths:
173
+ if source_id in path:
174
+
175
+ source_path = path[:path.index(source_id) + 1]
176
+ break
177
+
178
+ if source_path:
179
+ # Build the line for this path
180
+ line_parts = []
181
+ for j, nid in enumerate(source_path):
182
+ if j == 0:
183
+ line_parts.append(node_info[nid].label)
184
+ else:
185
+ line_parts.append(f" ──> {node_info[nid].short_label}")
186
+
187
+ # Add the merge arrow
188
+ if i == 0:
189
+ # First source
190
+ line = "".join(line_parts) + " ─────┐"
191
+ lines.append(line)
192
+ elif i == len(sources) - 1:
193
+ # Last source
194
+ line = "".join(line_parts) + " ─────┴──> " + merge_info.label
195
+ lines.append(line)
196
+
197
+ # Continue with the rest of the path after merge
198
+ remaining = node_info[merge_id].outputs
199
+ while remaining:
200
+ next_id = remaining[0]
201
+ lines[-1] += f" ──> {node_info[next_id].label}"
202
+ remaining = node_info[next_id].outputs
203
+ drawn_nodes.add(next_id)
204
+ else:
205
+ # Middle sources
206
+ line = "".join(line_parts) + " ─────┤"
207
+ lines.append(line)
208
+
209
+ for nid in source_path:
210
+ drawn_nodes.add(nid)
211
+
212
+ drawn_nodes.add(merge_id)
213
+ merge_drawn.add(merge_id)
214
+ lines.append("") # Add spacing between merge groups
215
+ return paths_by_merge
216
+
217
+
218
+ def draw_standalone_paths(drawn_nodes: set[int], standalone_paths: list[list[int]], lines: list[str],
219
+ node_info: dict[int, BranchInfo]):
220
+ """ Draws paths that do not merge."""
221
+ # Draw standalone paths
222
+ for path in standalone_paths:
223
+ if all(nid in drawn_nodes for nid in path):
224
+ continue
225
+
226
+ line_parts = []
227
+ for i, node_id in enumerate(path):
228
+ if node_id not in drawn_nodes:
229
+ if i == 0:
230
+ line_parts.append(node_info[node_id].label)
231
+ else:
232
+ line_parts.append(f" ──> {node_info[node_id].short_label}")
233
+ drawn_nodes.add(node_id)
234
+
235
+ if line_parts:
236
+ lines.append("".join(line_parts))
237
+
238
+
239
+ def add_un_drawn_nodes(drawn_nodes: set[int], node_info: dict[int, BranchInfo], lines: list[str]):
240
+ """Adds isolated nodes if exists."""
241
+ # Add any remaining undrawn nodes
242
+
243
+ for node_id in node_info:
244
+ if node_id not in drawn_nodes:
245
+ lines.append(node_info[node_id].label + " (isolated)")
246
+
247
+ lines.append("")
248
+ lines.append("=" * 80)
249
+ lines.append("Execution Order")
250
+ lines.append("=" * 80)
@@ -0,0 +1,15 @@
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class InputInfo(BaseModel):
5
+ main: list[int]
6
+ right: int | None = None
7
+ left: int | None = None
8
+
9
+
10
+ class BranchInfo(BaseModel):
11
+ label: str
12
+ short_label: str
13
+ inputs: InputInfo
14
+ outputs: list[int]
15
+ depth: int
@@ -3,11 +3,25 @@ from dataclasses import dataclass
3
3
  from typing import Dict, List
4
4
  import os
5
5
  from pathlib import Path
6
+ from datetime import datetime
6
7
 
7
8
  from flowfile_core.flowfile.manage.open_flowfile import open_flow
8
9
  from flowfile_core.flowfile.flow_graph import FlowGraph
9
10
  from flowfile_core.schemas.schemas import FlowSettings
10
11
  from flowfile_core.flowfile.utils import create_unique_id
12
+ from shared.storage_config import storage
13
+
14
+
15
+ def get_flow_save_location(flow_name: str) -> Path:
16
+ """Gets the initial save location for flow files"""
17
+ if ".flowfile" not in flow_name:
18
+ flow_name += ".flowfile"
19
+ return storage.temp_directory_for_flows / flow_name
20
+
21
+
22
+ def create_flow_name() -> str:
23
+ """Creates a unique flow name"""
24
+ return datetime.now().strftime("%Y%m%d_%H_%M_%S")+"_flow.flowfile"
11
25
 
12
26
 
13
27
  @dataclass
@@ -57,7 +71,7 @@ class FlowfileHandler:
57
71
  else:
58
72
  raise Exception('Flow not found')
59
73
 
60
- def add_flow(self, name: str, flow_path: str) -> int:
74
+ def add_flow(self, name: str = None, flow_path: str = None) -> int:
61
75
  """
62
76
  Creates a new flow with a reference to the flow path
63
77
  Args:
@@ -69,8 +83,13 @@ class FlowfileHandler:
69
83
 
70
84
  """
71
85
  next_id = create_unique_id()
72
- flow_info = FlowSettings(name=name, flow_id=next_id, save_location='', path=flow_path)
73
- _ = self.register_flow(flow_info)
86
+ if not name:
87
+ name = create_flow_name()
88
+ if not flow_path:
89
+ flow_path = get_flow_save_location(name)
90
+ flow_info = FlowSettings(name=name, flow_id=next_id, save_location=str(flow_path), path=str(flow_path))
91
+ flow = self.register_flow(flow_info)
92
+ flow.save_flow(flow.flow_settings.path)
74
93
  return next_id
75
94
 
76
95
  def get_flow_info(self, flow_id: int) -> FlowSettings:
@@ -48,7 +48,7 @@ def ensure_compatibility(flow_storage_obj: schemas.FlowInformation, flow_path: s
48
48
  setattr(flow_storage_obj, 'flow_settings', flow_settings)
49
49
  flow_storage_obj = schemas.FlowInformation.model_validate(flow_storage_obj)
50
50
  elif not hasattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location'):
51
- setattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location', 'auto')
51
+ setattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location', "remote")
52
52
  elif not hasattr(flow_storage_obj.flow_settings, 'is_running'):
53
53
  setattr(flow_storage_obj.flow_settings, 'is_running', False)
54
54
  setattr(flow_storage_obj.flow_settings, 'is_canceled', False)
@@ -1,39 +1,90 @@
1
1
 
2
2
  from typing import List
3
- from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, PlType
4
- from flowfile_core.schemas import transform_schema
5
- from flowfile_core.schemas import input_schema
3
+
6
4
  from polars import datatypes
7
5
  import polars as pl
6
+
7
+ from pl_fuzzy_frame_match.output_column_name_utils import set_name_in_fuzzy_mappings
8
+ from pl_fuzzy_frame_match.pre_process import rename_fuzzy_right_mapping
9
+
8
10
  from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import fetch_unique_values
9
11
  from flowfile_core.configs.flow_logger import main_logger
12
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, PlType
13
+ from flowfile_core.schemas import transform_schema
14
+ from flowfile_core.schemas import input_schema
10
15
 
11
16
 
12
- def calculate_uniqueness(a: float, b: float) -> float:
13
- return ((pow(a + 0.5, 2) + pow(b + 0.5, 2)) / 2 - pow(0.5, 2)) + 0.5 * abs(a - b)
17
+ def _ensure_all_columns_have_select(left_cols: List[str],
18
+ right_cols: List[str],
19
+ fuzzy_match_input: transform_schema.FuzzyMatchInput):
20
+ """
21
+ Ensure that all columns in the left and right FlowDataEngines are included in the fuzzy match input's select
22
+ statements.
23
+ Args:
24
+ left_cols (List[str]): List of column names in the left FlowDataEngine.
25
+ right_cols (List[str]): List of column names in the right FlowDataEngine.
26
+ fuzzy_match_input (FuzzyMatchInput): Fuzzy match input configuration containing select statements.
27
+
28
+ Returns:
29
+ None
30
+ """
31
+ right_cols_in_select = {c.old_name for c in fuzzy_match_input.right_select.renames}
32
+ left_cols_in_select = {c.old_name for c in fuzzy_match_input.left_select.renames}
33
+
34
+ fuzzy_match_input.left_select.renames.extend(
35
+ [transform_schema.SelectInput(col) for col in left_cols if col not in left_cols_in_select])
36
+ fuzzy_match_input.right_select.renames.extend(
37
+ [transform_schema.SelectInput(col) for col in right_cols if col not in right_cols_in_select]
38
+ )
39
+
40
+
41
+ def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: transform_schema.JoinInputs) -> None:
42
+ """
43
+ Ensure that the select columns in the fuzzy match input match the order of the incoming columns.
44
+ This function modifies the join_inputs object in-place.
45
+
46
+ Returns:
47
+ None
48
+ """
49
+ select_map = {select.new_name: select for select in join_inputs.renames}
50
+ ordered_renames = [select_map[col] for col in col_order if col in select_map]
51
+ join_inputs.renames = ordered_renames
14
52
 
15
53
 
16
54
  def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInput,
17
55
  left_schema: List[FlowfileColumn],
18
56
  right_schema: List[FlowfileColumn]):
19
- print('calculating fuzzy match schema')
57
+ _ensure_all_columns_have_select(left_cols=[col.column_name for col in left_schema],
58
+ right_cols=[col.column_name for col in right_schema],
59
+ fuzzy_match_input=fm_input)
60
+ _order_join_inputs_based_on_col_order(col_order=[col.column_name for col in left_schema],
61
+ join_inputs=fm_input.left_select)
62
+ _order_join_inputs_based_on_col_order(col_order=[col.column_name for col in right_schema],
63
+ join_inputs=fm_input.right_select)
64
+ for column in fm_input.left_select.renames:
65
+ if column.join_key:
66
+ column.keep = True
67
+ for column in fm_input.right_select.renames:
68
+ if column.join_key:
69
+ column.keep = True
20
70
  left_schema_dict, right_schema_dict = ({ls.name: ls for ls in left_schema}, {rs.name: rs for rs in right_schema})
21
71
  fm_input.auto_rename()
22
-
72
+ right_renames = {column.old_name: column.new_name for column in fm_input.right_select.renames}
73
+ new_join_mapping = rename_fuzzy_right_mapping(fm_input.join_mapping, right_renames)
23
74
  output_schema = []
24
75
  for column in fm_input.left_select.renames:
25
76
  column_schema = left_schema_dict.get(column.old_name)
26
- if column_schema and column.keep:
77
+ if column_schema and (column.keep or column.join_key):
27
78
  output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
28
79
  example_values=column_schema.example_values))
29
80
  for column in fm_input.right_select.renames:
30
81
  column_schema = right_schema_dict.get(column.old_name)
31
- if column_schema and column.keep:
82
+ if column_schema and (column.keep or column.join_key):
32
83
  output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
33
84
  example_values=column_schema.example_values))
34
-
35
- for i, fm in enumerate(fm_input.join_mapping):
36
- output_schema.append(FlowfileColumn.from_input(f'fuzzy_score_{i}', 'Float64'))
85
+ set_name_in_fuzzy_mappings(new_join_mapping)
86
+ output_schema.extend([FlowfileColumn.from_input(fuzzy_mapping.output_column_name, 'Float64')
87
+ for fuzzy_mapping in new_join_mapping])
37
88
  return output_schema
38
89
 
39
90
 
@@ -71,7 +122,8 @@ def pre_calculate_pivot_schema(node_input_schema: List[FlowfileColumn],
71
122
  val_column_schema = get_schema_of_column(node_input_schema, pivot_input.value_col)
72
123
  if output_fields is not None and len(output_fields) > 0:
73
124
  return index_columns_schema+[FlowfileColumn(PlType(Plcolumn_name=output_field.name,
74
- pl_datatype=output_field.data_type)) for output_field in output_fields]
125
+ pl_datatype=output_field.data_type)) for output_field in
126
+ output_fields]
75
127
 
76
128
  else:
77
129
  max_unique_vals = 200
@@ -84,7 +136,11 @@ def pre_calculate_pivot_schema(node_input_schema: List[FlowfileColumn],
84
136
  f' Max unique values: {max_unique_vals}')
85
137
  pl_output_fields = []
86
138
  for val in unique_vals:
87
- for agg in pivot_input.aggregations:
88
- output_type = get_output_data_type_pivot(val_column_schema, agg)
89
- pl_output_fields.append(PlType(column_name=f'{val}_{agg}', pl_datatype=output_type))
139
+ if len(pivot_input.aggregations) == 1:
140
+ output_type = get_output_data_type_pivot(val_column_schema, pivot_input.aggregations[0])
141
+ pl_output_fields.append(PlType(column_name=str(val), pl_datatype=output_type))
142
+ else:
143
+ for agg in pivot_input.aggregations:
144
+ output_type = get_output_data_type_pivot(val_column_schema, agg)
145
+ pl_output_fields.append(PlType(column_name=f'{val}_{agg}', pl_datatype=output_type))
90
146
  return index_columns_schema + [FlowfileColumn(pl_output_field) for pl_output_field in pl_output_fields]