Flowfile 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +4 -3
- flowfile/api.py +1 -1
- flowfile/web/static/assets/{CloudConnectionManager-c20a740f.js → CloudConnectionManager-d7c2c028.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-960b400a.js → CloudStorageReader-d467329f.js} +11 -78
- flowfile/web/static/assets/{CloudStorageWriter-e3decbdd.js → CloudStorageWriter-071b8b00.js} +12 -79
- flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
- flowfile/web/static/assets/ContextMenu-2dea5e27.js +41 -0
- flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
- flowfile/web/static/assets/ContextMenu-785554c4.js +41 -0
- flowfile/web/static/assets/ContextMenu-a51e19ea.js +41 -0
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
- flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
- flowfile/web/static/assets/{CrossJoin-d67e2405.js → CrossJoin-cf68ec7a.js} +14 -84
- flowfile/web/static/assets/{DatabaseConnectionSettings-a81e0f7e.js → DatabaseConnectionSettings-435c5dd8.js} +3 -3
- flowfile/web/static/assets/{DatabaseManager-9ea35e84.js → DatabaseManager-349e33a8.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-9578bfa5.js → DatabaseReader-8075bd28.js} +14 -114
- flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
- flowfile/web/static/assets/{DatabaseWriter-19531098.js → DatabaseWriter-3e2dda89.js} +13 -74
- flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
- flowfile/web/static/assets/ExploreData-76ec698c.js +192 -0
- flowfile/web/static/assets/{ExternalSource-2297ef96.js → ExternalSource-609a265c.js} +8 -79
- flowfile/web/static/assets/{Filter-f211c03a.js → Filter-97cff793.js} +12 -85
- flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
- flowfile/web/static/assets/{Formula-4207ea31.js → Formula-09de0ec9.js} +18 -85
- flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
- flowfile/web/static/assets/{FuzzyMatch-bf120df0.js → FuzzyMatch-bdf70248.js} +16 -87
- flowfile/web/static/assets/{GraphSolver-5bb7497a.js → GraphSolver-0b5a0e05.js} +13 -159
- flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
- flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
- flowfile/web/static/assets/{GroupBy-92c81b65.js → GroupBy-eaddadde.js} +12 -75
- flowfile/web/static/assets/{Join-4e49a274.js → Join-3313371b.js} +15 -85
- flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
- flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
- flowfile/web/static/assets/{ManualInput-90998ae8.js → ManualInput-e8bfc0be.js} +11 -82
- flowfile/web/static/assets/{Output-81e3e917.js → Output-7303bb09.js} +13 -243
- flowfile/web/static/assets/Output-ddc9079f.css +37 -0
- flowfile/web/static/assets/{Pivot-a3419842.js → Pivot-3b1c54ef.js} +14 -138
- flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
- flowfile/web/static/assets/PivotValidation-3bb36c8f.js +61 -0
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
- flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
- flowfile/web/static/assets/PivotValidation-eaa819c0.js +61 -0
- flowfile/web/static/assets/{PolarsCode-72710deb.js → PolarsCode-aa12e25d.js} +13 -80
- flowfile/web/static/assets/Read-6b17491f.css +62 -0
- flowfile/web/static/assets/Read-a2bfc618.js +243 -0
- flowfile/web/static/assets/RecordCount-aa0dc082.js +53 -0
- flowfile/web/static/assets/{RecordId-10baf191.js → RecordId-48ee1a3b.js} +8 -80
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
- flowfile/web/static/assets/SQLQueryComponent-e149dbf2.js +38 -0
- flowfile/web/static/assets/{Sample-3ed9a0ae.js → Sample-f06cb97a.js} +8 -77
- flowfile/web/static/assets/{SecretManager-0d49c0e8.js → SecretManager-37f34886.js} +2 -2
- flowfile/web/static/assets/{Select-8a02a0b3.js → Select-b60e6c47.js} +11 -85
- flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
- flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
- flowfile/web/static/assets/SettingsSection-70e5a7b1.js +53 -0
- flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
- flowfile/web/static/assets/{SettingsSection-4c0f45f5.js → SettingsSection-75b6cf4f.js} +2 -40
- flowfile/web/static/assets/SettingsSection-e57a672e.js +45 -0
- flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
- flowfile/web/static/assets/{Sort-f55c9f9d.js → Sort-51b1ee4d.js} +12 -97
- flowfile/web/static/assets/{TextToRows-5dbc2145.js → TextToRows-26835f8f.js} +14 -83
- flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
- flowfile/web/static/assets/{UnavailableFields-a1768e52.js → UnavailableFields-88a4cd0c.js} +2 -2
- flowfile/web/static/assets/Union-4d0088eb.js +77 -0
- flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
- flowfile/web/static/assets/{Unique-46b250da.js → Unique-7d554a62.js} +22 -91
- flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
- flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
- flowfile/web/static/assets/{Unpivot-25ac84cc.js → Unpivot-4668595c.js} +12 -166
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
- flowfile/web/static/assets/UnpivotValidation-d4f0e0e8.js +51 -0
- flowfile/web/static/assets/{ExploreData-40476474.js → VueGraphicWalker-5324d566.js} +4 -264
- flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
- flowfile/web/static/assets/{api-6ef0dcef.js → api-271ed117.js} +1 -1
- flowfile/web/static/assets/{api-a0abbdc7.js → api-31e4fea6.js} +1 -1
- flowfile/web/static/assets/{designer-186f2e71.css → designer-091bdc3f.css} +819 -184
- flowfile/web/static/assets/{designer-13eabd83.js → designer-bf3d9487.js} +2214 -680
- flowfile/web/static/assets/{documentation-b87e7f6f.js → documentation-4d0a1cea.js} +1 -1
- flowfile/web/static/assets/{dropDown-13564764.js → dropDown-025888df.js} +1 -1
- flowfile/web/static/assets/{fullEditor-fd2cd6f9.js → fullEditor-1df991ec.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-71e11604.js → genericNodeSettings-d3b2b2ac.js} +3 -3
- flowfile/web/static/assets/{index-f6c15e76.js → index-d0518598.js} +210 -31
- flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
- flowfile/web/static/assets/outputCsv-d8457527.js +86 -0
- flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
- flowfile/web/static/assets/outputExcel-be89153e.js +56 -0
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
- flowfile/web/static/assets/outputParquet-fabb445a.js +31 -0
- flowfile/web/static/assets/readCsv-bca3ed53.css +52 -0
- flowfile/web/static/assets/readCsv-e8359522.js +178 -0
- flowfile/web/static/assets/readExcel-dabaf51b.js +203 -0
- flowfile/web/static/assets/readExcel-e1b381ea.css +64 -0
- flowfile/web/static/assets/readParquet-cee068e2.css +19 -0
- flowfile/web/static/assets/readParquet-e0771ef2.js +26 -0
- flowfile/web/static/assets/{secretApi-dd636aa2.js → secretApi-ce823eee.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-af36165e.js → selectDynamic-5476546e.js} +7 -7
- flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
- flowfile/web/static/assets/{vue-codemirror.esm-2847001e.js → vue-codemirror.esm-9ed00d50.js} +29 -33
- flowfile/web/static/assets/{vue-content-loader.es-0371da73.js → vue-content-loader.es-7bca2d9b.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/METADATA +2 -1
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/RECORD +147 -117
- flowfile_core/configs/flow_logger.py +5 -13
- flowfile_core/configs/node_store/nodes.py +303 -44
- flowfile_core/configs/settings.py +6 -3
- flowfile_core/database/connection.py +5 -21
- flowfile_core/fileExplorer/funcs.py +239 -121
- flowfile_core/flowfile/code_generator/code_generator.py +36 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +60 -80
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +61 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +44 -3
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +3 -3
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +33 -10
- flowfile_core/flowfile/flow_graph.py +223 -118
- flowfile_core/flowfile/flow_node/flow_node.py +56 -19
- flowfile_core/flowfile/flow_node/models.py +0 -2
- flowfile_core/flowfile/flow_node/schema_callback.py +138 -43
- flowfile_core/flowfile/graph_tree/graph_tree.py +250 -0
- flowfile_core/flowfile/graph_tree/models.py +15 -0
- flowfile_core/flowfile/handler.py +22 -3
- flowfile_core/flowfile/manage/compatibility_enhancements.py +1 -1
- flowfile_core/flowfile/{flow_data_engine/fuzzy_matching/settings_validator.py → schema_callbacks.py} +72 -16
- flowfile_core/flowfile/setting_generator/settings.py +2 -2
- flowfile_core/flowfile/util/execution_orderer.py +9 -0
- flowfile_core/flowfile/util/node_skipper.py +8 -0
- flowfile_core/main.py +4 -1
- flowfile_core/routes/routes.py +59 -10
- flowfile_core/schemas/input_schema.py +0 -1
- flowfile_core/schemas/output_model.py +5 -2
- flowfile_core/schemas/schemas.py +48 -3
- flowfile_core/schemas/transform_schema.py +28 -38
- flowfile_frame/__init__.py +1 -4
- flowfile_frame/flow_frame.py +33 -4
- flowfile_frame/flow_frame.pyi +2 -0
- flowfile_worker/__init__.py +6 -35
- flowfile_worker/funcs.py +7 -3
- flowfile_worker/main.py +5 -2
- flowfile_worker/models.py +3 -1
- flowfile_worker/routes.py +47 -5
- shared/__init__.py +15 -0
- shared/storage_config.py +243 -0
- flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
- flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
- flowfile/web/static/assets/Read-80dc1675.css +0 -197
- flowfile/web/static/assets/Read-c4059daf.js +0 -701
- flowfile/web/static/assets/RecordCount-c2b5e095.js +0 -122
- flowfile/web/static/assets/Union-f2aefdc9.js +0 -146
- flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
- flowfile/web/static/assets/nodeTitle-988d9efe.js +0 -227
- flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
- flowfile_worker/polars_fuzzy_match/matcher.py +0 -435
- flowfile_worker/polars_fuzzy_match/models.py +0 -36
- flowfile_worker/polars_fuzzy_match/pre_process.py +0 -213
- flowfile_worker/polars_fuzzy_match/process.py +0 -86
- flowfile_worker/polars_fuzzy_match/utils.py +0 -50
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/LICENSE +0 -0
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/WHEEL +0 -0
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/entry_points.txt +0 -0
- {flowfile_worker/polars_fuzzy_match → flowfile_core/flowfile/graph_tree}/__init__.py +0 -0
|
@@ -108,14 +108,12 @@ class NodeStepSettings:
|
|
|
108
108
|
streamable: If True, the node can process data in a streaming fashion.
|
|
109
109
|
setup_errors: If True, indicates a non-blocking error occurred during setup.
|
|
110
110
|
breaking_setup_errors: If True, indicates an error occurred that prevents execution.
|
|
111
|
-
execute_location: The preferred location for execution ('auto', 'local', 'remote').
|
|
112
111
|
"""
|
|
113
112
|
cache_results: bool = False
|
|
114
113
|
renew_schema: bool = True
|
|
115
114
|
streamable: bool = True
|
|
116
115
|
setup_errors: bool = False
|
|
117
116
|
breaking_setup_errors: bool = False
|
|
118
|
-
execute_location: schemas.ExecutionLocationsLiteral = 'auto'
|
|
119
117
|
|
|
120
118
|
|
|
121
119
|
class NodeStepInputs:
|
|
@@ -1,71 +1,166 @@
|
|
|
1
|
-
|
|
2
1
|
from typing import Callable, Any, Optional, Generic, TypeVar
|
|
3
2
|
from concurrent.futures import ThreadPoolExecutor, Future
|
|
3
|
+
import threading
|
|
4
4
|
from flowfile_core.configs import logger
|
|
5
5
|
|
|
6
|
-
|
|
7
6
|
T = TypeVar('T')
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
class SingleExecutionFuture(Generic[T]):
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
|
|
10
|
+
"""Thread-safe single execution of a function with result caching.
|
|
11
|
+
|
|
12
|
+
Ensures a function is executed at most once even when called from multiple threads.
|
|
13
|
+
Subsequent calls return the cached result.
|
|
14
|
+
"""
|
|
15
|
+
|
|
14
16
|
func: Callable[[], T]
|
|
15
17
|
on_error: Optional[Callable[[Exception], Any]]
|
|
16
|
-
|
|
17
|
-
|
|
18
|
+
_lock: threading.RLock
|
|
19
|
+
_executor: Optional[ThreadPoolExecutor]
|
|
20
|
+
_future: Optional[Future[T]]
|
|
21
|
+
_result_value: Optional[T]
|
|
22
|
+
_exception: Optional[Exception]
|
|
23
|
+
_has_completed: bool
|
|
24
|
+
_has_started: bool
|
|
18
25
|
|
|
19
26
|
def __init__(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
27
|
+
self,
|
|
28
|
+
func: Callable[[], T],
|
|
29
|
+
on_error: Optional[Callable[[Exception], Any]] = None
|
|
23
30
|
) -> None:
|
|
24
31
|
"""Initialize with function and optional error handler."""
|
|
25
|
-
self.executor = ThreadPoolExecutor(max_workers=1)
|
|
26
|
-
self.future = None
|
|
27
32
|
self.func = func
|
|
28
33
|
self.on_error = on_error
|
|
29
|
-
|
|
30
|
-
|
|
34
|
+
|
|
35
|
+
# Thread safety
|
|
36
|
+
self._lock = threading.RLock() # RLock allows re-entrant locking
|
|
37
|
+
|
|
38
|
+
# Execution state
|
|
39
|
+
self._executor = None
|
|
40
|
+
self._future = None
|
|
41
|
+
self._result_value = None
|
|
42
|
+
self._exception = None
|
|
43
|
+
self._has_completed = False
|
|
44
|
+
self._has_started = False
|
|
45
|
+
|
|
46
|
+
def _ensure_executor(self) -> ThreadPoolExecutor:
|
|
47
|
+
"""Ensure executor exists, creating if necessary."""
|
|
48
|
+
if self._executor is None or self._executor._shutdown:
|
|
49
|
+
self._executor = ThreadPoolExecutor(max_workers=1)
|
|
50
|
+
return self._executor
|
|
31
51
|
|
|
32
52
|
def start(self) -> None:
|
|
33
53
|
"""Start the function execution if not already started."""
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
54
|
+
with self._lock:
|
|
55
|
+
if self._has_started:
|
|
56
|
+
logger.info("Function already started or completed")
|
|
57
|
+
return
|
|
58
|
+
|
|
59
|
+
logger.info("Starting single executor function")
|
|
60
|
+
executor: ThreadPoolExecutor = self._ensure_executor()
|
|
61
|
+
self._future = executor.submit(self._func_wrapper)
|
|
62
|
+
self._has_started = True
|
|
63
|
+
|
|
64
|
+
def _func_wrapper(self) -> T:
|
|
65
|
+
"""Wrapper to capture the result or exception."""
|
|
66
|
+
try:
|
|
67
|
+
result: T = self.func()
|
|
68
|
+
with self._lock:
|
|
69
|
+
self._result_value = result
|
|
70
|
+
self._has_completed = True
|
|
71
|
+
return result
|
|
72
|
+
except Exception as e:
|
|
73
|
+
with self._lock:
|
|
74
|
+
self._exception = e
|
|
75
|
+
self._has_completed = True
|
|
76
|
+
raise
|
|
37
77
|
|
|
38
78
|
def cleanup(self) -> None:
|
|
39
|
-
"""Clean up resources by
|
|
40
|
-
self.
|
|
41
|
-
|
|
79
|
+
"""Clean up resources by shutting down the executor."""
|
|
80
|
+
with self._lock:
|
|
81
|
+
if self._executor and not self._executor._shutdown:
|
|
82
|
+
self._executor.shutdown(wait=False)
|
|
42
83
|
|
|
43
84
|
def __call__(self) -> Optional[T]:
|
|
44
85
|
"""Execute function if not running and return its result."""
|
|
45
|
-
|
|
46
|
-
return
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if self.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
86
|
+
with self._lock:
|
|
87
|
+
# If already completed, return cached result or raise cached exception
|
|
88
|
+
if self._has_completed:
|
|
89
|
+
if self._exception:
|
|
90
|
+
if self.on_error:
|
|
91
|
+
return self.on_error(self._exception)
|
|
92
|
+
else:
|
|
93
|
+
raise self._exception
|
|
94
|
+
return self._result_value
|
|
95
|
+
|
|
96
|
+
# Start if not already started
|
|
97
|
+
if not self._has_started:
|
|
98
|
+
self.start()
|
|
99
|
+
|
|
100
|
+
# Wait for completion outside the lock to avoid blocking other threads
|
|
101
|
+
if self._future:
|
|
102
|
+
try:
|
|
103
|
+
result: T = self._future.result()
|
|
104
|
+
logger.info("Function completed successfully")
|
|
105
|
+
return result
|
|
106
|
+
except Exception as e:
|
|
107
|
+
logger.error(f"Function raised exception: {e}")
|
|
108
|
+
if self.on_error:
|
|
109
|
+
return self.on_error(e)
|
|
110
|
+
else:
|
|
111
|
+
raise
|
|
112
|
+
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def reset(self) -> None:
|
|
116
|
+
"""Reset the execution state, allowing the function to be run again."""
|
|
117
|
+
with self._lock:
|
|
118
|
+
logger.info("Resetting single execution future")
|
|
119
|
+
|
|
120
|
+
# Cancel any pending execution
|
|
121
|
+
if self._future and not self._future.done():
|
|
122
|
+
self._future.cancel()
|
|
62
123
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
124
|
+
# Clean up old executor
|
|
125
|
+
if self._executor and not self._executor._shutdown:
|
|
126
|
+
self._executor.shutdown(wait=False)
|
|
127
|
+
|
|
128
|
+
# Reset state
|
|
129
|
+
self._executor = None
|
|
130
|
+
self._future = None
|
|
131
|
+
self._result_value = None
|
|
132
|
+
self._exception = None
|
|
133
|
+
self._has_completed = False
|
|
134
|
+
self._has_started = False
|
|
135
|
+
|
|
136
|
+
def is_running(self) -> bool:
|
|
137
|
+
"""Check if the function is currently executing."""
|
|
138
|
+
with self._lock:
|
|
139
|
+
return bool(
|
|
140
|
+
self._has_started and
|
|
141
|
+
not self._has_completed and
|
|
142
|
+
self._future is not None and
|
|
143
|
+
not self._future.done()
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
def is_completed(self) -> bool:
|
|
147
|
+
"""Check if the function has completed execution."""
|
|
148
|
+
with self._lock:
|
|
149
|
+
return self._has_completed
|
|
150
|
+
|
|
151
|
+
def get_result(self) -> Optional[T]:
|
|
152
|
+
"""Get the cached result without triggering execution."""
|
|
153
|
+
with self._lock:
|
|
154
|
+
if self._exception:
|
|
155
|
+
if self.on_error:
|
|
156
|
+
return self.on_error(self._exception)
|
|
157
|
+
else:
|
|
158
|
+
raise self._exception
|
|
159
|
+
return self._result_value
|
|
68
160
|
|
|
69
161
|
def __del__(self) -> None:
|
|
70
162
|
"""Ensure executor is shut down on deletion."""
|
|
71
|
-
|
|
163
|
+
try:
|
|
164
|
+
self.cleanup()
|
|
165
|
+
except Exception:
|
|
166
|
+
pass
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
from pydantic import BaseModel
|
|
2
|
+
|
|
3
|
+
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
4
|
+
|
|
5
|
+
from flowfile_core.flowfile.graph_tree.models import BranchInfo, InputInfo
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def calculate_depth(node_id: int, node_info: dict[int, BranchInfo], visited: set = None) -> int:
|
|
9
|
+
"""Calculates the depth of each node."""
|
|
10
|
+
|
|
11
|
+
if visited is None:
|
|
12
|
+
visited = set()
|
|
13
|
+
if node_id in visited:
|
|
14
|
+
return node_info[node_id].depth
|
|
15
|
+
visited.add(node_id)
|
|
16
|
+
|
|
17
|
+
max_input_depth = -1
|
|
18
|
+
inputs = node_info[node_id].inputs
|
|
19
|
+
|
|
20
|
+
for main_id in inputs.main:
|
|
21
|
+
max_input_depth = max(max_input_depth, calculate_depth(main_id, node_info, visited))
|
|
22
|
+
if inputs.left:
|
|
23
|
+
max_input_depth = max(max_input_depth, calculate_depth(inputs.left, node_info, visited))
|
|
24
|
+
if inputs.right:
|
|
25
|
+
max_input_depth = max(max_input_depth, calculate_depth(inputs.right, node_info, visited))
|
|
26
|
+
|
|
27
|
+
node_info[node_id].depth = max_input_depth + 1
|
|
28
|
+
return node_info[node_id].depth
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Trace paths from each root
|
|
32
|
+
def trace_path(node_id: int, node_info: dict[int, BranchInfo], merge_points: dict[int, list[int]],
|
|
33
|
+
current_path: list[int] | None = None):
|
|
34
|
+
"""Define the trace of each node path"""
|
|
35
|
+
if current_path is None:
|
|
36
|
+
current_path = []
|
|
37
|
+
|
|
38
|
+
current_path = current_path + [node_id]
|
|
39
|
+
outputs = node_info[node_id].outputs
|
|
40
|
+
|
|
41
|
+
if not outputs:
|
|
42
|
+
# End of path
|
|
43
|
+
return [current_path]
|
|
44
|
+
|
|
45
|
+
# If this node has multiple outputs or connects to a merge point, branch
|
|
46
|
+
all_paths = []
|
|
47
|
+
for output_id in outputs:
|
|
48
|
+
if output_id in merge_points and len(merge_points[output_id]) > 1:
|
|
49
|
+
# This is a merge point, end this path here
|
|
50
|
+
all_paths.append(current_path + [output_id])
|
|
51
|
+
else:
|
|
52
|
+
# Continue the path
|
|
53
|
+
all_paths.extend(trace_path(output_id, node_info, merge_points, current_path))
|
|
54
|
+
return all_paths
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def build_node_info(nodes: list[FlowNode]) -> dict[int, BranchInfo]:
|
|
58
|
+
"""Builds node information used to construct the graph tree."""
|
|
59
|
+
|
|
60
|
+
node_info = {}
|
|
61
|
+
for node in nodes:
|
|
62
|
+
node_id = node.node_id
|
|
63
|
+
|
|
64
|
+
# Get node label
|
|
65
|
+
operation = node.node_type.replace("_", " ").title() if node.node_type else "Unknown"
|
|
66
|
+
label = f"{operation} (id={node_id})"
|
|
67
|
+
if hasattr(node, 'setting_input') and hasattr(node.setting_input, 'description'):
|
|
68
|
+
if node.setting_input.description:
|
|
69
|
+
desc = node.setting_input.description
|
|
70
|
+
if len(desc) > 20: # Truncate long descriptions
|
|
71
|
+
desc = desc[:17] + "..."
|
|
72
|
+
label = f"{operation} ({node_id}): {desc}"
|
|
73
|
+
|
|
74
|
+
# Get inputs and outputs
|
|
75
|
+
inputs = InputInfo(
|
|
76
|
+
main=[n.node_id for n in (node.node_inputs.main_inputs or [])],
|
|
77
|
+
left=node.node_inputs.left_input.node_id if node.node_inputs.left_input else None,
|
|
78
|
+
right=node.node_inputs.right_input.node_id if node.node_inputs.right_input else None
|
|
79
|
+
)
|
|
80
|
+
outputs = [n.node_id for n in node.leads_to_nodes]
|
|
81
|
+
|
|
82
|
+
node_info[node_id] = BranchInfo(
|
|
83
|
+
label=label,
|
|
84
|
+
short_label=f"{operation} ({node_id})",
|
|
85
|
+
inputs=inputs,
|
|
86
|
+
outputs=outputs,
|
|
87
|
+
depth=0
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
return node_info
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def group_nodes_by_depth(node_info: dict[int, BranchInfo]) -> tuple[dict[int, list[int]], int]:
|
|
94
|
+
"""Groups each node by depth"""
|
|
95
|
+
depth_groups = {}
|
|
96
|
+
max_depth = 0
|
|
97
|
+
for node_id, info in node_info.items():
|
|
98
|
+
depth = info.depth
|
|
99
|
+
max_depth = max(max_depth, depth)
|
|
100
|
+
if depth not in depth_groups:
|
|
101
|
+
depth_groups[depth] = []
|
|
102
|
+
depth_groups[depth].append(node_id)
|
|
103
|
+
|
|
104
|
+
return depth_groups, max_depth
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def define_node_connections(node_info: dict[int, BranchInfo]) -> dict[int, list[int]]:
|
|
108
|
+
"""Defines node connections to merge"""
|
|
109
|
+
merge_points = {} # target_id -> list of source_ids
|
|
110
|
+
for node_id, info in node_info.items():
|
|
111
|
+
for output_id in info.outputs:
|
|
112
|
+
if output_id not in merge_points:
|
|
113
|
+
merge_points[output_id] = []
|
|
114
|
+
merge_points[output_id].append(node_id)
|
|
115
|
+
|
|
116
|
+
return merge_points
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def build_flow_paths(node_info: dict[int, BranchInfo], flow_starts: list[FlowNode],
|
|
120
|
+
merge_points: dict[int, list[int]]):
|
|
121
|
+
"""Build the flow paths to be drawn"""
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# Find all root nodes (no inputs)
|
|
125
|
+
root_nodes = [nid for nid, info in node_info.items()
|
|
126
|
+
if not info.inputs.main and not info.inputs.left and not info.inputs.right]
|
|
127
|
+
|
|
128
|
+
if not root_nodes and flow_starts:
|
|
129
|
+
root_nodes = [n.node_id for n in flow_starts]
|
|
130
|
+
paths = [] # List of paths through the graph
|
|
131
|
+
|
|
132
|
+
# Get all paths
|
|
133
|
+
for root_id in root_nodes:
|
|
134
|
+
paths.extend(trace_path(root_id, node_info, merge_points))
|
|
135
|
+
|
|
136
|
+
return paths
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def group_paths(paths:list, merge_points:dict):
|
|
140
|
+
"""Groups each node path."""
|
|
141
|
+
paths_by_merge = {}
|
|
142
|
+
standalone_paths = []
|
|
143
|
+
|
|
144
|
+
for path in paths:
|
|
145
|
+
if len(path) > 1 and path[-1] in merge_points and len(merge_points[path[-1]]) > 1:
|
|
146
|
+
merge_id = path[-1]
|
|
147
|
+
if merge_id not in paths_by_merge:
|
|
148
|
+
paths_by_merge[merge_id] = []
|
|
149
|
+
paths_by_merge[merge_id].append(path)
|
|
150
|
+
else:
|
|
151
|
+
standalone_paths.append(path)
|
|
152
|
+
return paths_by_merge, standalone_paths
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def draw_merged_paths(node_info: dict[int, BranchInfo],
|
|
156
|
+
merge_points: dict[int, list[int]],
|
|
157
|
+
paths_by_merge: dict[int, list[list[int]]],
|
|
158
|
+
merge_drawn: set,
|
|
159
|
+
drawn_nodes: set,
|
|
160
|
+
lines: list[str]):
|
|
161
|
+
"""Draws paths for each node that merges."""
|
|
162
|
+
for merge_id, merge_paths in paths_by_merge.items():
|
|
163
|
+
if merge_id in merge_drawn:
|
|
164
|
+
continue
|
|
165
|
+
merge_info = node_info[merge_id]
|
|
166
|
+
sources = merge_points[merge_id]
|
|
167
|
+
|
|
168
|
+
# Draw each source path leading to the merge
|
|
169
|
+
for i, source_id in enumerate(sources):
|
|
170
|
+
# Find the path containing this source
|
|
171
|
+
source_path = None
|
|
172
|
+
for path in merge_paths:
|
|
173
|
+
if source_id in path:
|
|
174
|
+
|
|
175
|
+
source_path = path[:path.index(source_id) + 1]
|
|
176
|
+
break
|
|
177
|
+
|
|
178
|
+
if source_path:
|
|
179
|
+
# Build the line for this path
|
|
180
|
+
line_parts = []
|
|
181
|
+
for j, nid in enumerate(source_path):
|
|
182
|
+
if j == 0:
|
|
183
|
+
line_parts.append(node_info[nid].label)
|
|
184
|
+
else:
|
|
185
|
+
line_parts.append(f" ──> {node_info[nid].short_label}")
|
|
186
|
+
|
|
187
|
+
# Add the merge arrow
|
|
188
|
+
if i == 0:
|
|
189
|
+
# First source
|
|
190
|
+
line = "".join(line_parts) + " ─────┐"
|
|
191
|
+
lines.append(line)
|
|
192
|
+
elif i == len(sources) - 1:
|
|
193
|
+
# Last source
|
|
194
|
+
line = "".join(line_parts) + " ─────┴──> " + merge_info.label
|
|
195
|
+
lines.append(line)
|
|
196
|
+
|
|
197
|
+
# Continue with the rest of the path after merge
|
|
198
|
+
remaining = node_info[merge_id].outputs
|
|
199
|
+
while remaining:
|
|
200
|
+
next_id = remaining[0]
|
|
201
|
+
lines[-1] += f" ──> {node_info[next_id].label}"
|
|
202
|
+
remaining = node_info[next_id].outputs
|
|
203
|
+
drawn_nodes.add(next_id)
|
|
204
|
+
else:
|
|
205
|
+
# Middle sources
|
|
206
|
+
line = "".join(line_parts) + " ─────┤"
|
|
207
|
+
lines.append(line)
|
|
208
|
+
|
|
209
|
+
for nid in source_path:
|
|
210
|
+
drawn_nodes.add(nid)
|
|
211
|
+
|
|
212
|
+
drawn_nodes.add(merge_id)
|
|
213
|
+
merge_drawn.add(merge_id)
|
|
214
|
+
lines.append("") # Add spacing between merge groups
|
|
215
|
+
return paths_by_merge
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def draw_standalone_paths(drawn_nodes: set[int], standalone_paths: list[list[int]], lines: list[str],
|
|
219
|
+
node_info: dict[int, BranchInfo]):
|
|
220
|
+
""" Draws paths that do not merge."""
|
|
221
|
+
# Draw standalone paths
|
|
222
|
+
for path in standalone_paths:
|
|
223
|
+
if all(nid in drawn_nodes for nid in path):
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
line_parts = []
|
|
227
|
+
for i, node_id in enumerate(path):
|
|
228
|
+
if node_id not in drawn_nodes:
|
|
229
|
+
if i == 0:
|
|
230
|
+
line_parts.append(node_info[node_id].label)
|
|
231
|
+
else:
|
|
232
|
+
line_parts.append(f" ──> {node_info[node_id].short_label}")
|
|
233
|
+
drawn_nodes.add(node_id)
|
|
234
|
+
|
|
235
|
+
if line_parts:
|
|
236
|
+
lines.append("".join(line_parts))
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def add_un_drawn_nodes(drawn_nodes: set[int], node_info: dict[int, BranchInfo], lines: list[str]):
|
|
240
|
+
"""Adds isolated nodes if exists."""
|
|
241
|
+
# Add any remaining undrawn nodes
|
|
242
|
+
|
|
243
|
+
for node_id in node_info:
|
|
244
|
+
if node_id not in drawn_nodes:
|
|
245
|
+
lines.append(node_info[node_id].label + " (isolated)")
|
|
246
|
+
|
|
247
|
+
lines.append("")
|
|
248
|
+
lines.append("=" * 80)
|
|
249
|
+
lines.append("Execution Order")
|
|
250
|
+
lines.append("=" * 80)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from pydantic import BaseModel
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class InputInfo(BaseModel):
|
|
5
|
+
main: list[int]
|
|
6
|
+
right: int | None = None
|
|
7
|
+
left: int | None = None
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class BranchInfo(BaseModel):
|
|
11
|
+
label: str
|
|
12
|
+
short_label: str
|
|
13
|
+
inputs: InputInfo
|
|
14
|
+
outputs: list[int]
|
|
15
|
+
depth: int
|
|
@@ -3,11 +3,25 @@ from dataclasses import dataclass
|
|
|
3
3
|
from typing import Dict, List
|
|
4
4
|
import os
|
|
5
5
|
from pathlib import Path
|
|
6
|
+
from datetime import datetime
|
|
6
7
|
|
|
7
8
|
from flowfile_core.flowfile.manage.open_flowfile import open_flow
|
|
8
9
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
9
10
|
from flowfile_core.schemas.schemas import FlowSettings
|
|
10
11
|
from flowfile_core.flowfile.utils import create_unique_id
|
|
12
|
+
from shared.storage_config import storage
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_flow_save_location(flow_name: str) -> Path:
|
|
16
|
+
"""Gets the initial save location for flow files"""
|
|
17
|
+
if ".flowfile" not in flow_name:
|
|
18
|
+
flow_name += ".flowfile"
|
|
19
|
+
return storage.temp_directory_for_flows / flow_name
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def create_flow_name() -> str:
|
|
23
|
+
"""Creates a unique flow name"""
|
|
24
|
+
return datetime.now().strftime("%Y%m%d_%H_%M_%S")+"_flow.flowfile"
|
|
11
25
|
|
|
12
26
|
|
|
13
27
|
@dataclass
|
|
@@ -57,7 +71,7 @@ class FlowfileHandler:
|
|
|
57
71
|
else:
|
|
58
72
|
raise Exception('Flow not found')
|
|
59
73
|
|
|
60
|
-
def add_flow(self, name: str, flow_path: str) -> int:
|
|
74
|
+
def add_flow(self, name: str = None, flow_path: str = None) -> int:
|
|
61
75
|
"""
|
|
62
76
|
Creates a new flow with a reference to the flow path
|
|
63
77
|
Args:
|
|
@@ -69,8 +83,13 @@ class FlowfileHandler:
|
|
|
69
83
|
|
|
70
84
|
"""
|
|
71
85
|
next_id = create_unique_id()
|
|
72
|
-
|
|
73
|
-
|
|
86
|
+
if not name:
|
|
87
|
+
name = create_flow_name()
|
|
88
|
+
if not flow_path:
|
|
89
|
+
flow_path = get_flow_save_location(name)
|
|
90
|
+
flow_info = FlowSettings(name=name, flow_id=next_id, save_location=str(flow_path), path=str(flow_path))
|
|
91
|
+
flow = self.register_flow(flow_info)
|
|
92
|
+
flow.save_flow(flow.flow_settings.path)
|
|
74
93
|
return next_id
|
|
75
94
|
|
|
76
95
|
def get_flow_info(self, flow_id: int) -> FlowSettings:
|
|
@@ -48,7 +48,7 @@ def ensure_compatibility(flow_storage_obj: schemas.FlowInformation, flow_path: s
|
|
|
48
48
|
setattr(flow_storage_obj, 'flow_settings', flow_settings)
|
|
49
49
|
flow_storage_obj = schemas.FlowInformation.model_validate(flow_storage_obj)
|
|
50
50
|
elif not hasattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location'):
|
|
51
|
-
setattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location',
|
|
51
|
+
setattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location', "remote")
|
|
52
52
|
elif not hasattr(flow_storage_obj.flow_settings, 'is_running'):
|
|
53
53
|
setattr(flow_storage_obj.flow_settings, 'is_running', False)
|
|
54
54
|
setattr(flow_storage_obj.flow_settings, 'is_canceled', False)
|
flowfile_core/flowfile/{flow_data_engine/fuzzy_matching/settings_validator.py → schema_callbacks.py}
RENAMED
|
@@ -1,39 +1,90 @@
|
|
|
1
1
|
|
|
2
2
|
from typing import List
|
|
3
|
-
|
|
4
|
-
from flowfile_core.schemas import transform_schema
|
|
5
|
-
from flowfile_core.schemas import input_schema
|
|
3
|
+
|
|
6
4
|
from polars import datatypes
|
|
7
5
|
import polars as pl
|
|
6
|
+
|
|
7
|
+
from pl_fuzzy_frame_match.output_column_name_utils import set_name_in_fuzzy_mappings
|
|
8
|
+
from pl_fuzzy_frame_match.pre_process import rename_fuzzy_right_mapping
|
|
9
|
+
|
|
8
10
|
from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import fetch_unique_values
|
|
9
11
|
from flowfile_core.configs.flow_logger import main_logger
|
|
12
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, PlType
|
|
13
|
+
from flowfile_core.schemas import transform_schema
|
|
14
|
+
from flowfile_core.schemas import input_schema
|
|
10
15
|
|
|
11
16
|
|
|
12
|
-
def
|
|
13
|
-
|
|
17
|
+
def _ensure_all_columns_have_select(left_cols: List[str],
|
|
18
|
+
right_cols: List[str],
|
|
19
|
+
fuzzy_match_input: transform_schema.FuzzyMatchInput):
|
|
20
|
+
"""
|
|
21
|
+
Ensure that all columns in the left and right FlowDataEngines are included in the fuzzy match input's select
|
|
22
|
+
statements.
|
|
23
|
+
Args:
|
|
24
|
+
left_cols (List[str]): List of column names in the left FlowDataEngine.
|
|
25
|
+
right_cols (List[str]): List of column names in the right FlowDataEngine.
|
|
26
|
+
fuzzy_match_input (FuzzyMatchInput): Fuzzy match input configuration containing select statements.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
None
|
|
30
|
+
"""
|
|
31
|
+
right_cols_in_select = {c.old_name for c in fuzzy_match_input.right_select.renames}
|
|
32
|
+
left_cols_in_select = {c.old_name for c in fuzzy_match_input.left_select.renames}
|
|
33
|
+
|
|
34
|
+
fuzzy_match_input.left_select.renames.extend(
|
|
35
|
+
[transform_schema.SelectInput(col) for col in left_cols if col not in left_cols_in_select])
|
|
36
|
+
fuzzy_match_input.right_select.renames.extend(
|
|
37
|
+
[transform_schema.SelectInput(col) for col in right_cols if col not in right_cols_in_select]
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: transform_schema.JoinInputs) -> None:
|
|
42
|
+
"""
|
|
43
|
+
Ensure that the select columns in the fuzzy match input match the order of the incoming columns.
|
|
44
|
+
This function modifies the join_inputs object in-place.
|
|
45
|
+
|
|
46
|
+
Returns:
|
|
47
|
+
None
|
|
48
|
+
"""
|
|
49
|
+
select_map = {select.new_name: select for select in join_inputs.renames}
|
|
50
|
+
ordered_renames = [select_map[col] for col in col_order if col in select_map]
|
|
51
|
+
join_inputs.renames = ordered_renames
|
|
14
52
|
|
|
15
53
|
|
|
16
54
|
def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInput,
|
|
17
55
|
left_schema: List[FlowfileColumn],
|
|
18
56
|
right_schema: List[FlowfileColumn]):
|
|
19
|
-
|
|
57
|
+
_ensure_all_columns_have_select(left_cols=[col.column_name for col in left_schema],
|
|
58
|
+
right_cols=[col.column_name for col in right_schema],
|
|
59
|
+
fuzzy_match_input=fm_input)
|
|
60
|
+
_order_join_inputs_based_on_col_order(col_order=[col.column_name for col in left_schema],
|
|
61
|
+
join_inputs=fm_input.left_select)
|
|
62
|
+
_order_join_inputs_based_on_col_order(col_order=[col.column_name for col in right_schema],
|
|
63
|
+
join_inputs=fm_input.right_select)
|
|
64
|
+
for column in fm_input.left_select.renames:
|
|
65
|
+
if column.join_key:
|
|
66
|
+
column.keep = True
|
|
67
|
+
for column in fm_input.right_select.renames:
|
|
68
|
+
if column.join_key:
|
|
69
|
+
column.keep = True
|
|
20
70
|
left_schema_dict, right_schema_dict = ({ls.name: ls for ls in left_schema}, {rs.name: rs for rs in right_schema})
|
|
21
71
|
fm_input.auto_rename()
|
|
22
|
-
|
|
72
|
+
right_renames = {column.old_name: column.new_name for column in fm_input.right_select.renames}
|
|
73
|
+
new_join_mapping = rename_fuzzy_right_mapping(fm_input.join_mapping, right_renames)
|
|
23
74
|
output_schema = []
|
|
24
75
|
for column in fm_input.left_select.renames:
|
|
25
76
|
column_schema = left_schema_dict.get(column.old_name)
|
|
26
|
-
if column_schema and column.keep:
|
|
77
|
+
if column_schema and (column.keep or column.join_key):
|
|
27
78
|
output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
|
|
28
79
|
example_values=column_schema.example_values))
|
|
29
80
|
for column in fm_input.right_select.renames:
|
|
30
81
|
column_schema = right_schema_dict.get(column.old_name)
|
|
31
|
-
if column_schema and column.keep:
|
|
82
|
+
if column_schema and (column.keep or column.join_key):
|
|
32
83
|
output_schema.append(FlowfileColumn.from_input(column.new_name, column_schema.data_type,
|
|
33
84
|
example_values=column_schema.example_values))
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
85
|
+
set_name_in_fuzzy_mappings(new_join_mapping)
|
|
86
|
+
output_schema.extend([FlowfileColumn.from_input(fuzzy_mapping.output_column_name, 'Float64')
|
|
87
|
+
for fuzzy_mapping in new_join_mapping])
|
|
37
88
|
return output_schema
|
|
38
89
|
|
|
39
90
|
|
|
@@ -71,7 +122,8 @@ def pre_calculate_pivot_schema(node_input_schema: List[FlowfileColumn],
|
|
|
71
122
|
val_column_schema = get_schema_of_column(node_input_schema, pivot_input.value_col)
|
|
72
123
|
if output_fields is not None and len(output_fields) > 0:
|
|
73
124
|
return index_columns_schema+[FlowfileColumn(PlType(Plcolumn_name=output_field.name,
|
|
74
|
-
pl_datatype=output_field.data_type)) for output_field in
|
|
125
|
+
pl_datatype=output_field.data_type)) for output_field in
|
|
126
|
+
output_fields]
|
|
75
127
|
|
|
76
128
|
else:
|
|
77
129
|
max_unique_vals = 200
|
|
@@ -84,7 +136,11 @@ def pre_calculate_pivot_schema(node_input_schema: List[FlowfileColumn],
|
|
|
84
136
|
f' Max unique values: {max_unique_vals}')
|
|
85
137
|
pl_output_fields = []
|
|
86
138
|
for val in unique_vals:
|
|
87
|
-
|
|
88
|
-
output_type = get_output_data_type_pivot(val_column_schema,
|
|
89
|
-
pl_output_fields.append(PlType(column_name=
|
|
139
|
+
if len(pivot_input.aggregations) == 1:
|
|
140
|
+
output_type = get_output_data_type_pivot(val_column_schema, pivot_input.aggregations[0])
|
|
141
|
+
pl_output_fields.append(PlType(column_name=str(val), pl_datatype=output_type))
|
|
142
|
+
else:
|
|
143
|
+
for agg in pivot_input.aggregations:
|
|
144
|
+
output_type = get_output_data_type_pivot(val_column_schema, agg)
|
|
145
|
+
pl_output_fields.append(PlType(column_name=f'{val}_{agg}', pl_datatype=output_type))
|
|
90
146
|
return index_columns_schema + [FlowfileColumn(pl_output_field) for pl_output_field in pl_output_fields]
|