Flowfile 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of Flowfile might be problematic. Click here for more details.

Files changed (171) hide show
  1. build_backends/__init__.py +0 -0
  2. build_backends/main.py +313 -0
  3. build_backends/main_prd.py +202 -0
  4. flowfile/__init__.py +71 -0
  5. flowfile/__main__.py +24 -0
  6. flowfile-0.2.2.dist-info/LICENSE +21 -0
  7. flowfile-0.2.2.dist-info/METADATA +225 -0
  8. flowfile-0.2.2.dist-info/RECORD +171 -0
  9. flowfile-0.2.2.dist-info/WHEEL +4 -0
  10. flowfile-0.2.2.dist-info/entry_points.txt +9 -0
  11. flowfile_core/__init__.py +13 -0
  12. flowfile_core/auth/__init__.py +0 -0
  13. flowfile_core/auth/jwt.py +140 -0
  14. flowfile_core/auth/models.py +40 -0
  15. flowfile_core/auth/secrets.py +178 -0
  16. flowfile_core/configs/__init__.py +35 -0
  17. flowfile_core/configs/flow_logger.py +433 -0
  18. flowfile_core/configs/node_store/__init__.py +0 -0
  19. flowfile_core/configs/node_store/nodes.py +98 -0
  20. flowfile_core/configs/settings.py +120 -0
  21. flowfile_core/database/__init__.py +0 -0
  22. flowfile_core/database/connection.py +51 -0
  23. flowfile_core/database/init_db.py +45 -0
  24. flowfile_core/database/models.py +41 -0
  25. flowfile_core/fileExplorer/__init__.py +0 -0
  26. flowfile_core/fileExplorer/funcs.py +259 -0
  27. flowfile_core/fileExplorer/utils.py +53 -0
  28. flowfile_core/flowfile/FlowfileFlow.py +1403 -0
  29. flowfile_core/flowfile/__init__.py +0 -0
  30. flowfile_core/flowfile/_extensions/__init__.py +0 -0
  31. flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
  32. flowfile_core/flowfile/analytics/__init__.py +0 -0
  33. flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
  34. flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
  35. flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
  36. flowfile_core/flowfile/analytics/utils.py +9 -0
  37. flowfile_core/flowfile/connection_manager/__init__.py +3 -0
  38. flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
  39. flowfile_core/flowfile/connection_manager/models.py +10 -0
  40. flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
  41. flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
  42. flowfile_core/flowfile/database_connection_manager/models.py +15 -0
  43. flowfile_core/flowfile/extensions.py +36 -0
  44. flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
  45. flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
  46. flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
  47. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
  48. flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
  49. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
  50. flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
  51. flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
  52. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
  53. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
  54. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
  55. flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
  56. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
  57. flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
  58. flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
  59. flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
  60. flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
  61. flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
  62. flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
  63. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
  64. flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
  65. flowfile_core/flowfile/flow_data_engine/types.py +0 -0
  66. flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
  67. flowfile_core/flowfile/flow_node/__init__.py +0 -0
  68. flowfile_core/flowfile/flow_node/flow_node.py +771 -0
  69. flowfile_core/flowfile/flow_node/models.py +111 -0
  70. flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
  71. flowfile_core/flowfile/handler.py +123 -0
  72. flowfile_core/flowfile/manage/__init__.py +0 -0
  73. flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
  74. flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
  75. flowfile_core/flowfile/manage/open_flowfile.py +136 -0
  76. flowfile_core/flowfile/setting_generator/__init__.py +2 -0
  77. flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
  78. flowfile_core/flowfile/setting_generator/settings.py +176 -0
  79. flowfile_core/flowfile/sources/__init__.py +0 -0
  80. flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
  81. flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
  82. flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
  83. flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
  84. flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
  85. flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
  86. flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
  87. flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
  88. flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
  89. flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
  90. flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
  91. flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
  92. flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
  93. flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
  94. flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
  95. flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
  96. flowfile_core/flowfile/util/__init__.py +0 -0
  97. flowfile_core/flowfile/util/calculate_layout.py +137 -0
  98. flowfile_core/flowfile/util/execution_orderer.py +141 -0
  99. flowfile_core/flowfile/utils.py +106 -0
  100. flowfile_core/main.py +138 -0
  101. flowfile_core/routes/__init__.py +0 -0
  102. flowfile_core/routes/auth.py +34 -0
  103. flowfile_core/routes/logs.py +163 -0
  104. flowfile_core/routes/public.py +10 -0
  105. flowfile_core/routes/routes.py +601 -0
  106. flowfile_core/routes/secrets.py +85 -0
  107. flowfile_core/run_lock.py +11 -0
  108. flowfile_core/schemas/__init__.py +0 -0
  109. flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
  110. flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
  111. flowfile_core/schemas/defaults.py +9 -0
  112. flowfile_core/schemas/external_sources/__init__.py +0 -0
  113. flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
  114. flowfile_core/schemas/input_schema.py +477 -0
  115. flowfile_core/schemas/models.py +193 -0
  116. flowfile_core/schemas/output_model.py +115 -0
  117. flowfile_core/schemas/schemas.py +106 -0
  118. flowfile_core/schemas/transform_schema.py +569 -0
  119. flowfile_core/secrets/__init__.py +0 -0
  120. flowfile_core/secrets/secrets.py +64 -0
  121. flowfile_core/utils/__init__.py +0 -0
  122. flowfile_core/utils/arrow_reader.py +247 -0
  123. flowfile_core/utils/excel_file_manager.py +18 -0
  124. flowfile_core/utils/fileManager.py +45 -0
  125. flowfile_core/utils/fl_executor.py +38 -0
  126. flowfile_core/utils/utils.py +8 -0
  127. flowfile_frame/__init__.py +56 -0
  128. flowfile_frame/__main__.py +12 -0
  129. flowfile_frame/adapters.py +17 -0
  130. flowfile_frame/expr.py +1163 -0
  131. flowfile_frame/flow_frame.py +2093 -0
  132. flowfile_frame/group_frame.py +199 -0
  133. flowfile_frame/join.py +75 -0
  134. flowfile_frame/selectors.py +242 -0
  135. flowfile_frame/utils.py +184 -0
  136. flowfile_worker/__init__.py +55 -0
  137. flowfile_worker/configs.py +95 -0
  138. flowfile_worker/create/__init__.py +37 -0
  139. flowfile_worker/create/funcs.py +146 -0
  140. flowfile_worker/create/models.py +86 -0
  141. flowfile_worker/create/pl_types.py +35 -0
  142. flowfile_worker/create/read_excel_tables.py +110 -0
  143. flowfile_worker/create/utils.py +84 -0
  144. flowfile_worker/external_sources/__init__.py +0 -0
  145. flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
  146. flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
  147. flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
  148. flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
  149. flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
  150. flowfile_worker/external_sources/sql_source/__init__.py +0 -0
  151. flowfile_worker/external_sources/sql_source/main.py +56 -0
  152. flowfile_worker/external_sources/sql_source/models.py +72 -0
  153. flowfile_worker/flow_logger.py +58 -0
  154. flowfile_worker/funcs.py +327 -0
  155. flowfile_worker/main.py +108 -0
  156. flowfile_worker/models.py +95 -0
  157. flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
  158. flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
  159. flowfile_worker/polars_fuzzy_match/models.py +36 -0
  160. flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
  161. flowfile_worker/polars_fuzzy_match/process.py +86 -0
  162. flowfile_worker/polars_fuzzy_match/utils.py +50 -0
  163. flowfile_worker/process_manager.py +36 -0
  164. flowfile_worker/routes.py +440 -0
  165. flowfile_worker/secrets.py +148 -0
  166. flowfile_worker/spawner.py +187 -0
  167. flowfile_worker/utils.py +25 -0
  168. test_utils/__init__.py +3 -0
  169. test_utils/postgres/__init__.py +1 -0
  170. test_utils/postgres/commands.py +109 -0
  171. test_utils/postgres/fixtures.py +417 -0
@@ -0,0 +1,1403 @@
1
+ import datetime
2
+ import pickle
3
+ import polars as pl
4
+ import fastexcel
5
+ from fastapi.exceptions import HTTPException
6
+ from time import time
7
+ from functools import partial
8
+ from typing import List, Dict, Union, Callable, Any, Optional, Tuple
9
+ from uuid import uuid1
10
+ from copy import deepcopy
11
+ from pyarrow.parquet import ParquetFile
12
+ from flowfile_core.configs import logger
13
+ from flowfile_core.configs.flow_logger import FlowLogger
14
+ from flowfile_core.flowfile.sources.external_sources.factory import data_source_factory
15
+ from flowfile_core.flowfile.sources.external_sources.airbyte_sources.settings import airbyte_settings_from_config
16
+ from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import type_to_polars_str, FlowfileColumn
17
+ from flowfile_core.flowfile.flow_data_engine.fuzzy_matching.settings_validator import (calculate_fuzzy_match_schema,
18
+ pre_calculate_pivot_schema)
19
+ from flowfile_core.utils.arrow_reader import get_read_top_n
20
+ from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine, execute_polars_code
21
+ from flowfile_core.flowfile.flow_data_engine.read_excel_tables import get_open_xlsx_datatypes, \
22
+ get_calamine_xlsx_data_types
23
+ from flowfile_core.flowfile.sources import external_sources
24
+ from flowfile_core.schemas import input_schema, schemas, transform_schema
25
+ from flowfile_core.schemas.output_model import TableExample, NodeData, NodeResult, RunInformation
26
+ from flowfile_core.flowfile.utils import snake_case_to_camel_case
27
+ from flowfile_core.flowfile.analytics.utils import create_graphic_walker_node_from_node_promise
28
+ from flowfile_core.flowfile.flow_node.flow_node import FlowNode
29
+ from flowfile_core.flowfile.util.execution_orderer import determine_execution_order
30
+ from flowfile_core.flowfile.flow_data_engine.polars_code_parser import polars_code_parser
31
+ from flowfile_core.flowfile.flow_data_engine.subprocess_operations.subprocess_operations import (ExternalAirbyteFetcher,
32
+ ExternalDatabaseFetcher,
33
+ ExternalDatabaseWriter,
34
+ ExternalDfFetcher)
35
+ from flowfile_core.secrets.secrets import get_encrypted_secret, decrypt_secret
36
+ from flowfile_core.flowfile.sources.external_sources.sql_source import utils as sql_utils, models as sql_models
37
+ from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import SqlSource, BaseSqlSource
38
+ from flowfile_core.flowfile.database_connection_manager.db_connections import get_local_database_connection
39
+ from flowfile_core.flowfile.util.calculate_layout import calculate_layered_layout
40
+
41
+
42
+ def get_xlsx_schema(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
43
+ end_row: int, end_column: int, has_headers: bool):
44
+ try:
45
+ logger.info('Starting to calculate the schema')
46
+ if engine == 'openpyxl':
47
+ max_col = end_column if end_column > 0 else None
48
+ return get_open_xlsx_datatypes(file_path=file_path,
49
+ sheet_name=sheet_name,
50
+ min_row=start_row + 1,
51
+ min_col=start_column + 1,
52
+ max_row=100,
53
+ max_col=max_col, has_headers=has_headers)
54
+ elif engine == 'calamine':
55
+ return get_calamine_xlsx_data_types(file_path=file_path,
56
+ sheet_name=sheet_name,
57
+ start_row=start_row,
58
+ end_row=end_row)
59
+ logger.info('done calculating the schema')
60
+ except Exception as e:
61
+ logger.error(e)
62
+ return []
63
+
64
+
65
+ def skip_node_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
66
+ if len(nodes) > 0:
67
+ msg = "\n".join(str(node) for node in nodes)
68
+ flow_logger.warning(f'skipping nodes:\n{msg}')
69
+
70
+
71
+ def execution_order_message(flow_logger: FlowLogger, nodes: List[FlowNode]) -> None:
72
+ msg = "\n".join(str(node) for node in nodes)
73
+ flow_logger.info(f'execution order:\n{msg}')
74
+
75
+
76
+ def get_xlsx_schema_callback(engine: str, file_path: str, sheet_name: str, start_row: int, start_column: int,
77
+ end_row: int, end_column: int, has_headers: bool):
78
+ return partial(get_xlsx_schema, engine=engine, file_path=file_path, sheet_name=sheet_name, start_row=start_row,
79
+ start_column=start_column, end_row=end_row, end_column=end_column, has_headers=has_headers)
80
+
81
+
82
+ class FlowGraph:
83
+ """
84
+ FlowGraph is a class that enables Extract, Transform and Load (ETL) operations
85
+ on data. It allows you to create a Directed Acyclic Graph (DAG) where each
86
+ node represents a step in the ETL pipeline.
87
+
88
+ The class offers methods to add transformations and data sources, as well as
89
+ methods to run the transformations and generate results.
90
+
91
+ Attributes:
92
+ _input_cols (set): A set that stores the input columns for the transformations.
93
+ _output_cols (set): A set that stores the output columns from the transformations.
94
+ """
95
+ uuid: str
96
+ depends_on: Dict[int, Union[ParquetFile, FlowDataEngine, "FlowGraph", pl.DataFrame,]]
97
+ _flow_id: int
98
+ _input_data: Union[ParquetFile, FlowDataEngine, "FlowGraph"]
99
+ _input_cols: List[str]
100
+ _output_cols: List[str]
101
+ _node_db: Dict[Union[str, int], FlowNode]
102
+ _node_ids: List[Union[str, int]]
103
+ _results: Optional[FlowDataEngine] = None
104
+ cache_results: bool = False
105
+ schema: Optional[List[FlowfileColumn]] = None
106
+ has_over_row_function: bool = False
107
+ _flow_starts: List[Union[int, str]] = None
108
+ node_results: List[NodeResult] = None
109
+ latest_run_info: Optional[RunInformation] = None
110
+ start_datetime: datetime = None
111
+ end_datetime: datetime = None
112
+ nodes_completed: int = 0
113
+ flow_settings: schemas.FlowSettings = None
114
+ flow_logger: FlowLogger
115
+
116
+ def __init__(self, flow_id: int,
117
+ flow_settings: schemas.FlowSettings,
118
+ name: str = None, input_cols: List[str] = None,
119
+ output_cols: List[str] = None,
120
+ path_ref: str = None,
121
+ input_flow: Union[ParquetFile, FlowDataEngine, "FlowGraph"] = None,
122
+ cache_results: bool = False):
123
+ self.flow_settings = flow_settings
124
+ self.uuid = str(uuid1())
125
+ self.nodes_completed = 0
126
+ self.start_datetime = None
127
+ self.end_datetime = None
128
+ self.latest_run_info = None
129
+ self.node_results = []
130
+ self._flow_id = flow_id
131
+ self.flow_logger = FlowLogger(flow_id)
132
+ self._flow_starts: List[FlowNode] = []
133
+ self._results = None
134
+ self.schema = None
135
+ self.has_over_row_function = False
136
+ self._input_cols = [] if input_cols is None else input_cols
137
+ self._output_cols = [] if output_cols is None else output_cols
138
+ self._node_ids = []
139
+ self._node_db = {}
140
+ self.cache_results = cache_results
141
+ self.__name__ = name if name else id(self)
142
+ self.depends_on = {}
143
+ if path_ref is not None:
144
+ self.add_datasource(input_schema.NodeDatasource(file_path=path_ref))
145
+ elif input_flow is not None:
146
+ self.add_datasource(input_file=input_flow)
147
+
148
+ def add_node_promise(self, node_promise: input_schema.NodePromise):
149
+
150
+ def placeholder(n: FlowNode = None):
151
+ if n is None:
152
+ return FlowDataEngine()
153
+ return n
154
+
155
+ self.add_node_step(node_id=node_promise.node_id, node_type=node_promise.node_type, function=placeholder,
156
+ setting_input=node_promise)
157
+
158
+ def apply_layout(self, y_spacing: int = 150, x_spacing: int = 200, initial_y: int = 100):
159
+ """
160
+ Calculates and applies a layered layout to all nodes in the graph.
161
+ Updates the pos_x and pos_y attributes of the node setting inputs.
162
+ """
163
+ self.flow_logger.info("Applying layered layout...")
164
+ start_time = time()
165
+ try:
166
+ # Calculate new positions for all nodes
167
+ new_positions = calculate_layered_layout(
168
+ self, y_spacing=y_spacing, x_spacing=x_spacing, initial_y=initial_y
169
+ )
170
+
171
+ if not new_positions:
172
+ self.flow_logger.warning("Layout calculation returned no positions.")
173
+ return
174
+
175
+ # Apply the new positions to the setting_input of each node
176
+ updated_count = 0
177
+ for node_id, (pos_x, pos_y) in new_positions.items():
178
+ node = self.get_node(node_id)
179
+ if node and hasattr(node, 'setting_input'):
180
+ setting = node.setting_input
181
+ if hasattr(setting, 'pos_x') and hasattr(setting, 'pos_y'):
182
+ setting.pos_x = pos_x
183
+ setting.pos_y = pos_y
184
+ updated_count += 1
185
+ else:
186
+ self.flow_logger.warning(f"Node {node_id} setting_input ({type(setting)}) lacks pos_x/pos_y attributes.")
187
+ elif node:
188
+ self.flow_logger.warning(f"Node {node_id} lacks setting_input attribute.")
189
+ # else: Node not found, already warned by calculate_layered_layout
190
+
191
+ end_time = time()
192
+ self.flow_logger.info(f"Layout applied to {updated_count}/{len(self.nodes)} nodes in {end_time - start_time:.2f} seconds.")
193
+
194
+ except Exception as e:
195
+ self.flow_logger.error(f"Error applying layout: {e}")
196
+ raise # Optional: re-raise the exception
197
+
198
+ def add_initial_node_analysis(self, node_promise: input_schema.NodePromise):
199
+ node_analysis = create_graphic_walker_node_from_node_promise(node_promise)
200
+ self.add_explore_data(node_analysis)
201
+
202
+ def add_explore_data(self, node_analysis: input_schema.NodeExploreData):
203
+ sample_size: int = 10000
204
+
205
+ def analysis_preparation(flowfile_table: FlowDataEngine):
206
+
207
+ if flowfile_table.number_of_records<0:
208
+
209
+ number_of_records = ExternalDfFetcher(
210
+ lf=flowfile_table.data_frame,
211
+ operation_type="calculate_number_of_records",
212
+ flow_id=self.flow_id,
213
+ node_id=node.node_id,
214
+ ).result
215
+ else:
216
+ number_of_records = flowfile_table.number_of_records
217
+ if number_of_records > sample_size:
218
+ flowfile_table = flowfile_table.get_sample(sample_size, random=True)
219
+
220
+ external_sampler = ExternalDfFetcher(
221
+ lf=flowfile_table.data_frame,
222
+ file_ref=node.hash,
223
+ wait_on_completion=True,
224
+ node_id=node.node_id,
225
+ flow_id=self.flow_id,
226
+ )
227
+ node.results.analysis_data_generator = get_read_top_n(external_sampler.status.file_ref, 10000)
228
+ return flowfile_table
229
+
230
+ def schema_callback():
231
+ node = self.get_node(node_analysis.node_id)
232
+ if len(node.all_inputs) == 1:
233
+ input_node = node.all_inputs[0]
234
+ return input_node.schema
235
+ else:
236
+ return [FlowfileColumn.from_input('col_1', 'na')]
237
+
238
+ self.add_node_step(node_id=node_analysis.node_id, node_type='explore_data',
239
+ function=analysis_preparation,
240
+ setting_input=node_analysis, schema_callback=schema_callback)
241
+ node = self.get_node(node_analysis.node_id)
242
+
243
+ @property
244
+ def flow_id(self) -> int:
245
+ return self._flow_id
246
+
247
+ @flow_id.setter
248
+ def flow_id(self, new_id: int):
249
+ self._flow_id = new_id
250
+ for node in self.nodes:
251
+ if hasattr(node.setting_input, 'flow_id'):
252
+ node.setting_input.flow_id = new_id
253
+ self.flow_settings.flow_id = new_id
254
+
255
+ def __repr__(self):
256
+ """
257
+ Official string representation of the FlowGraph class.
258
+ """
259
+ settings_str = " -" + '\n -'.join(f"{k}: {v}" for k, v in self.flow_settings)
260
+ return f"FlowGraph(\nNodes: {self._node_db}\n\nSettings:\n{settings_str}"
261
+
262
+ def get_nodes_overview(self):
263
+ output = []
264
+ for v in self._node_db.values():
265
+ output.append(v.get_repr())
266
+ return output
267
+
268
+ def remove_from_output_cols(self, columns: List[str]):
269
+ cols = set(columns)
270
+ self._output_cols = [c for c in self._output_cols if c not in cols]
271
+
272
+ def get_node(self, node_id: Union[int, str] = None) -> FlowNode:
273
+ if node_id is None:
274
+ node_id = self._node_ids[-1]
275
+ node = self._node_db.get(node_id)
276
+ if node is not None:
277
+ return node
278
+
279
+ def add_pivot(self, pivot_settings: input_schema.NodePivot):
280
+ def _func(fl: FlowDataEngine):
281
+ return fl.do_pivot(pivot_settings.pivot_input, self.flow_logger.get_node_logger(pivot_settings.node_id))
282
+
283
+ self.add_node_step(node_id=pivot_settings.node_id,
284
+ function=_func,
285
+ node_type='pivot',
286
+ setting_input=pivot_settings,
287
+ input_node_ids=[pivot_settings.depending_on_id])
288
+
289
+ node = self.get_node(pivot_settings.node_id)
290
+
291
+ def schema_callback():
292
+ input_data = node.singular_main_input.get_resulting_data() # get from the previous step the data
293
+ input_data.lazy = True # ensure the dataset is lazy
294
+ input_lf = input_data.data_frame # get the lazy frame
295
+ return pre_calculate_pivot_schema(input_data.schema, pivot_settings.pivot_input, input_lf=input_lf)
296
+ node.schema_callback = schema_callback
297
+
298
+ def add_unpivot(self, unpivot_settings: input_schema.NodeUnpivot):
299
+
300
+ def _func(fl: FlowDataEngine) -> FlowDataEngine:
301
+ return fl.unpivot(unpivot_settings.unpivot_input)
302
+
303
+ self.add_node_step(node_id=unpivot_settings.node_id,
304
+ function=_func,
305
+ node_type='unpivot',
306
+ setting_input=unpivot_settings,
307
+ input_node_ids=[unpivot_settings.depending_on_id])
308
+
309
+ def add_union(self, union_settings: input_schema.NodeUnion):
310
+ def _func(*flowfile_tables: FlowDataEngine):
311
+ dfs: List[pl.LazyFrame] | List[pl.DataFrame] = [flt.data_frame for flt in flowfile_tables]
312
+ return FlowDataEngine(pl.concat(dfs, how='diagonal_relaxed'))
313
+
314
+ self.add_node_step(node_id=union_settings.node_id,
315
+ function=_func,
316
+ node_type=f'union',
317
+ setting_input=union_settings,
318
+ input_node_ids=union_settings.depending_on_ids)
319
+
320
+ def add_group_by(self, group_by_settings: input_schema.NodeGroupBy):
321
+
322
+ def _func(fl: FlowDataEngine) -> FlowDataEngine:
323
+ return fl.do_group_by(group_by_settings.groupby_input, False)
324
+
325
+ self.add_node_step(node_id=group_by_settings.node_id,
326
+ function=_func,
327
+ node_type=f'group_by',
328
+ setting_input=group_by_settings,
329
+ input_node_ids=[group_by_settings.depending_on_id])
330
+
331
+ node = self.get_node(group_by_settings.node_id)
332
+
333
+ def schema_callback():
334
+ output_columns = [(c.old_name, c.new_name, c.output_type) for c in group_by_settings.groupby_input.agg_cols]
335
+ depends_on = node.node_inputs.main_inputs[0]
336
+ input_schema_dict: Dict[str, str] = {s.name: s.data_type for s in depends_on.schema}
337
+ output_schema = []
338
+ for old_name, new_name, data_type in output_columns:
339
+ data_type = input_schema_dict[old_name] if data_type is None else data_type
340
+ output_schema.append(FlowfileColumn.from_input(data_type=data_type, column_name=new_name))
341
+ return output_schema
342
+
343
+ node.schema_callback = schema_callback
344
+
345
+ def add_or_update_column_func(self, col_name: str, pl_dtype: pl.DataType, depends_on: FlowNode):
346
+ col_output = FlowfileColumn.from_input(column_name=col_name, data_type=str(pl_dtype))
347
+ schema = depends_on.schema
348
+ col_exist = depends_on.get_flow_file_column_schema(col_name)
349
+ if col_exist is None:
350
+ new_schema = schema + [col_output]
351
+ else:
352
+ new_schema = []
353
+ for s in self.schema:
354
+ if s.name == col_name:
355
+ new_schema.append(col_output)
356
+ else:
357
+ new_schema.append(s)
358
+ return new_schema
359
+
360
+ def add_filter(self, filter_settings: input_schema.NodeFilter):
361
+ is_advanced = filter_settings.filter_input.filter_type == 'advanced'
362
+ if is_advanced:
363
+ predicate = filter_settings.filter_input.advanced_filter
364
+ else:
365
+ _basic_filter = filter_settings.filter_input.basic_filter
366
+ filter_settings.filter_input.advanced_filter = (f'[{_basic_filter.field}]{_basic_filter.filter_type}"'
367
+ f'{_basic_filter.filter_value}"')
368
+
369
+ def _func(fl: FlowDataEngine):
370
+ is_advanced = filter_settings.filter_input.filter_type == 'advanced'
371
+ if is_advanced:
372
+ return fl.do_filter(predicate)
373
+ else:
374
+ basic_filter = filter_settings.filter_input.basic_filter
375
+ if basic_filter.filter_value.isnumeric():
376
+ field_data_type = fl.get_schema_column(basic_filter.field).generic_datatype()
377
+ if field_data_type == 'str':
378
+ _f = f'[{basic_filter.field}]{basic_filter.filter_type}"{basic_filter.filter_value}"'
379
+ else:
380
+ _f = f'[{basic_filter.field}]{basic_filter.filter_type}{basic_filter.filter_value}'
381
+ else:
382
+ _f = f'[{basic_filter.field}]{basic_filter.filter_type}"{basic_filter.filter_value}"'
383
+ filter_settings.filter_input.advanced_filter = _f
384
+ return fl.do_filter(_f)
385
+
386
+ self.add_node_step(filter_settings.node_id, _func,
387
+ node_type='filter',
388
+ renew_schema=False,
389
+ setting_input=filter_settings,
390
+ input_node_ids=[filter_settings.depending_on_id]
391
+ )
392
+
393
+ def add_record_count(self, node_number_of_records: input_schema.NodeRecordCount):
394
+ def _func(fl: FlowDataEngine) -> FlowDataEngine:
395
+ return fl.get_record_count()
396
+
397
+ self.add_node_step(node_id=node_number_of_records.node_id,
398
+ function=_func,
399
+ node_type='record_count',
400
+ setting_input=node_number_of_records,
401
+ input_node_ids=[node_number_of_records.depending_on_id])
402
+
403
+ def add_polars_code(self, node_polars_code: input_schema.NodePolarsCode):
404
+ def _func(*flowfile_tables: FlowDataEngine) -> FlowDataEngine:
405
+ return execute_polars_code(*flowfile_tables, code=node_polars_code.polars_code_input.polars_code)
406
+
407
+ self.add_node_step(node_id=node_polars_code.node_id,
408
+ function=_func,
409
+ node_type='polars_code',
410
+ setting_input=node_polars_code,
411
+ input_node_ids=node_polars_code.depending_on_ids)
412
+
413
+ try:
414
+ polars_code_parser.validate_code(node_polars_code.polars_code_input.polars_code)
415
+ except Exception as e:
416
+ node = self.get_node(node_id=node_polars_code.node_id)
417
+ node.results.errors = str(e)
418
+
419
+ def add_unique(self, unique_settings: input_schema.NodeUnique):
420
+
421
+ def _func(fl: FlowDataEngine) -> FlowDataEngine:
422
+ return fl.make_unique(unique_settings.unique_input)
423
+
424
+ self.add_node_step(node_id=unique_settings.node_id,
425
+ function=_func,
426
+ input_columns=[],
427
+ node_type='unique',
428
+ setting_input=unique_settings,
429
+ input_node_ids=[unique_settings.node_id])
430
+
431
+ def add_graph_solver(self, graph_solver_settings: input_schema.NodeGraphSolver):
432
+ def _func(fl: FlowDataEngine) -> FlowDataEngine:
433
+ return fl.solve_graph(graph_solver_settings.graph_solver_input)
434
+
435
+ self.add_node_step(node_id=graph_solver_settings.node_id,
436
+ function=_func,
437
+ node_type='graph_solver',
438
+ setting_input=graph_solver_settings)
439
+
440
+ def add_formula(self, function_settings: input_schema.NodeFormula):
441
+ error = ""
442
+ if function_settings.function.field.data_type is not None:
443
+ output_type = type_to_polars_str(function_settings.function.field.data_type)
444
+ else:
445
+ output_type = None
446
+ if output_type is not None:
447
+ new_col = [FlowfileColumn.from_input(column_name=function_settings.function.field.name,
448
+ data_type=str(output_type))]
449
+ else:
450
+ new_col = [FlowfileColumn.from_input(function_settings.function.field.name, 'String')]
451
+
452
+ def _func(fl: FlowDataEngine):
453
+ return fl.apply_sql_formula(func=function_settings.function.function,
454
+ col_name=function_settings.function.field.name,
455
+ output_data_type=output_type)
456
+
457
+ self.add_node_step(function_settings.node_id, _func,
458
+ output_schema=new_col,
459
+ node_type='formula',
460
+ renew_schema=False,
461
+ setting_input=function_settings,
462
+ input_node_ids=[function_settings.depending_on_id]
463
+ )
464
+ if error != "":
465
+ node = self.get_node(function_settings.node_id)
466
+ node.results.errors = error
467
+ return False, error
468
+ else:
469
+ return True, ""
470
+
471
+ def add_cross_join(self, cross_join_settings: input_schema.NodeCrossJoin) -> "FlowGraph":
472
+
473
+ def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
474
+ for left_select in cross_join_settings.cross_join_input.left_select.renames:
475
+ left_select.is_available = True if left_select.old_name in main.schema else False
476
+ for right_select in cross_join_settings.cross_join_input.right_select.renames:
477
+ right_select.is_available = True if right_select.old_name in right.schema else False
478
+
479
+ return main.do_cross_join(cross_join_input=cross_join_settings.cross_join_input,
480
+ auto_generate_selection=cross_join_settings.auto_generate_selection,
481
+ verify_integrity=False,
482
+ other=right)
483
+
484
+ self.add_node_step(node_id=cross_join_settings.node_id,
485
+ function=_func,
486
+ input_columns=[],
487
+ node_type='cross_join',
488
+ setting_input=cross_join_settings)
489
+ return self
490
+
491
+ def add_join(self, join_settings: input_schema.NodeJoin) -> "FlowGraph":
492
+ def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
493
+ for left_select in join_settings.join_input.left_select.renames:
494
+ left_select.is_available = True if left_select.old_name in main.schema else False
495
+ for right_select in join_settings.join_input.right_select.renames:
496
+ right_select.is_available = True if right_select.old_name in right.schema else False
497
+
498
+ return main.join(join_input=join_settings.join_input,
499
+ auto_generate_selection=join_settings.auto_generate_selection,
500
+ verify_integrity=False,
501
+ other=right)
502
+
503
+ self.add_node_step(node_id=join_settings.node_id,
504
+ function=_func,
505
+ input_columns=[],
506
+ node_type='join',
507
+ setting_input=join_settings,
508
+ input_node_ids=join_settings.depending_on_ids)
509
+ return self
510
+
511
+ def add_fuzzy_match(self, fuzzy_settings: input_schema.NodeFuzzyMatch) -> "FlowGraph":
512
+ def _func(main: FlowDataEngine, right: FlowDataEngine) -> FlowDataEngine:
513
+ f = main.start_fuzzy_join(fuzzy_match_input=fuzzy_settings.join_input, other=right, file_ref=node.hash,
514
+ flow_id=self.flow_id, node_id=fuzzy_settings.node_id)
515
+ logger.info("Started the fuzzy match action")
516
+ node._fetch_cached_df = f
517
+ return FlowDataEngine(f.get_result())
518
+
519
+ self.add_node_step(node_id=fuzzy_settings.node_id,
520
+ function=_func,
521
+ input_columns=[],
522
+ node_type='fuzzy_match',
523
+ setting_input=fuzzy_settings)
524
+ node = self.get_node(node_id=fuzzy_settings.node_id)
525
+
526
+ def schema_callback():
527
+ return calculate_fuzzy_match_schema(fuzzy_settings.join_input,
528
+ left_schema=node.node_inputs.main_inputs[0].schema,
529
+ right_schema=node.node_inputs.right_input.schema
530
+ )
531
+
532
+ node.schema_callback = schema_callback
533
+ return self
534
+
535
+ def add_text_to_rows(self, node_text_to_rows: input_schema.NodeTextToRows) -> "FlowGraph":
536
+ def _func(table: FlowDataEngine) -> FlowDataEngine:
537
+ return table.split(node_text_to_rows.text_to_rows_input)
538
+
539
+ self.add_node_step(node_id=node_text_to_rows.node_id,
540
+ function=_func,
541
+ node_type='text_to_rows',
542
+ setting_input=node_text_to_rows,
543
+ input_node_ids=[node_text_to_rows.depending_on_id])
544
+ return self
545
+
546
+ def add_sort(self, sort_settings: input_schema.NodeSort) -> "FlowGraph":
547
+ def _func(table: FlowDataEngine) -> FlowDataEngine:
548
+ return table.do_sort(sort_settings.sort_input)
549
+
550
+ self.add_node_step(node_id=sort_settings.node_id,
551
+ function=_func,
552
+ node_type='sort',
553
+ setting_input=sort_settings,
554
+ input_node_ids=[sort_settings.depending_on_id])
555
+ return self
556
+
557
+ def add_sample(self, sample_settings: input_schema.NodeSample) -> "FlowGraph":
558
+ def _func(table: FlowDataEngine) -> FlowDataEngine:
559
+ return table.get_sample(sample_settings.sample_size)
560
+
561
+ self.add_node_step(node_id=sample_settings.node_id,
562
+ function=_func,
563
+ node_type='sample',
564
+ setting_input=sample_settings,
565
+ input_node_ids=[sample_settings.depending_on_id]
566
+ )
567
+ return self
568
+
569
+ def add_record_id(self, record_id_settings: input_schema.NodeRecordId) -> "FlowGraph":
570
+
571
+ def _func(table: FlowDataEngine) -> FlowDataEngine:
572
+ return table.add_record_id(record_id_settings.record_id_input)
573
+
574
+ self.add_node_step(node_id=record_id_settings.node_id,
575
+ function=_func,
576
+ node_type='record_id',
577
+ setting_input=record_id_settings,
578
+ input_node_ids=[record_id_settings.depending_on_id]
579
+ )
580
+ return self
581
+
582
+ def add_select(self, select_settings: input_schema.NodeSelect) -> "FlowGraph":
583
+ select_cols = select_settings.select_input
584
+ drop_cols = tuple(s.old_name for s in select_settings.select_input)
585
+
586
+ def _func(table: FlowDataEngine) -> FlowDataEngine:
587
+ input_cols = set(f.name for f in table.schema)
588
+ ids_to_remove = []
589
+ for i, select_col in enumerate(select_cols):
590
+ if select_col.old_name not in input_cols:
591
+ select_col.is_available = False
592
+ if not select_col.keep:
593
+ ids_to_remove.append(i)
594
+ else:
595
+ select_col.is_available = True
596
+ ids_to_remove.reverse()
597
+ for i in ids_to_remove:
598
+ v = select_cols.pop(i)
599
+ del v
600
+ return table.do_select(select_inputs=transform_schema.SelectInputs(select_cols),
601
+ keep_missing=select_settings.keep_missing)
602
+
603
+ self.add_node_step(node_id=select_settings.node_id,
604
+ function=_func,
605
+ input_columns=[],
606
+ node_type='select',
607
+ drop_columns=list(drop_cols),
608
+ setting_input=select_settings,
609
+ input_node_ids=[select_settings.depending_on_id])
610
+ return self
611
+
612
+ @property
613
+ def graph_has_functions(self) -> bool:
614
+ return len(self._node_ids) > 0
615
+
616
+ def delete_node(self, node_id: Union[int, str]):
617
+ logger.info(f"Starting deletion of node with ID: {node_id}")
618
+
619
+ node = self._node_db.get(node_id)
620
+ if node:
621
+ logger.info(f"Found node: {node_id}, processing deletion")
622
+
623
+ lead_to_steps: List[FlowNode] = node.leads_to_nodes
624
+ logger.debug(f"Node {node_id} leads to {len(lead_to_steps)} other nodes")
625
+
626
+ if len(lead_to_steps) > 0:
627
+ for lead_to_step in lead_to_steps:
628
+ logger.debug(f"Deleting input node {node_id} from dependent node {lead_to_step}")
629
+ lead_to_step.delete_input_node(node_id, complete=True)
630
+
631
+ if not node.is_start:
632
+ depends_on: List[FlowNode] = node.node_inputs.get_all_inputs()
633
+ logger.debug(f"Node {node_id} depends on {len(depends_on)} other nodes")
634
+
635
+ for depend_on in depends_on:
636
+ logger.debug(f"Removing lead_to reference {node_id} from node {depend_on}")
637
+ depend_on.delete_lead_to_node(node_id)
638
+
639
+ self._node_db.pop(node_id)
640
+ logger.debug(f"Successfully removed node {node_id} from node_db")
641
+ del node
642
+ logger.info("Node object deleted")
643
+ else:
644
+ logger.error(f"Failed to find node with id {node_id}")
645
+ raise Exception(f"Node with id {node_id} does not exist")
646
+
647
+ @property
648
+ def graph_has_input_data(self) -> bool:
649
+ return self._input_data is not None
650
+
651
+ def add_node_step(self,
652
+ node_id: Union[int, str],
653
+ function: Callable,
654
+ input_columns: List[str] = None,
655
+ output_schema: List[FlowfileColumn] = None,
656
+ node_type: str = None,
657
+ drop_columns: List[str] = None,
658
+ renew_schema: bool = True,
659
+ setting_input: Any = None,
660
+ cache_results: bool = None,
661
+ schema_callback: Callable = None,
662
+ input_node_ids: List[int] = None):
663
+ existing_node = self.get_node(node_id)
664
+ if existing_node is not None:
665
+ if existing_node.node_type != node_type:
666
+ self.delete_node(existing_node.node_id)
667
+ existing_node = None
668
+ if existing_node:
669
+ input_nodes = existing_node.all_inputs
670
+ elif input_node_ids is not None:
671
+ input_nodes = [self.get_node(node_id) for node_id in input_node_ids]
672
+ else:
673
+ input_nodes = None
674
+ if cache_results is None:
675
+ if hasattr(setting_input, 'cache_results'):
676
+ cache_results = getattr(setting_input, 'cache_results')
677
+ cache_results = False if cache_results is None else cache_results
678
+ if isinstance(input_columns, str):
679
+ input_columns = [input_columns]
680
+
681
+ if input_nodes is not None or function.__name__ in ('placeholder', 'analysis_preparation'):
682
+
683
+ if not existing_node:
684
+ node = FlowNode(node_id=node_id,
685
+ function=function,
686
+ output_schema=output_schema,
687
+ input_columns=input_columns,
688
+ drop_columns=drop_columns,
689
+ renew_schema=renew_schema,
690
+ setting_input=setting_input,
691
+ node_type=node_type,
692
+ name=function.__name__,
693
+ schema_callback=schema_callback,
694
+ parent_uuid=self.uuid)
695
+ else:
696
+ existing_node.update_node(function=function,
697
+ output_schema=output_schema,
698
+ input_columns=input_columns,
699
+ drop_columns=drop_columns,
700
+ setting_input=setting_input,
701
+ schema_callback=schema_callback)
702
+ node = existing_node
703
+ elif node_type == 'input_data':
704
+ node = None
705
+ else:
706
+ raise Exception("No data initialized")
707
+ self._node_db[node_id] = node
708
+ self._node_ids.append(node_id)
709
+
710
+ def add_include_cols(self, include_columns: List[str]):
711
+ for column in include_columns:
712
+ if column not in self._input_cols:
713
+ self._input_cols.append(column)
714
+ if column not in self._output_cols:
715
+ self._output_cols.append(column)
716
+ return self
717
+
718
+ def add_output(self, output_file: input_schema.NodeOutput):
719
+ def _func(df: FlowDataEngine):
720
+ execute_remote = self.execution_location != 'local'
721
+ df.output(output_fs=output_file.output_settings, flow_id=self.flow_id, node_id=output_file.node_id,
722
+ execute_remote=execute_remote)
723
+ return df
724
+
725
+ def schema_callback():
726
+ input_node: FlowNode = self.get_node(output_file.node_id).node_inputs.main_inputs[0]
727
+
728
+ return input_node.schema
729
+
730
+ self.add_node_step(node_id=output_file.node_id,
731
+ function=_func,
732
+ input_columns=[],
733
+ node_type='output',
734
+ setting_input=output_file,
735
+ schema_callback=schema_callback,
736
+ input_node_ids=[output_file.depending_on_id])
737
+
738
+ def add_database_writer(self, node_database_writer: input_schema.NodeDatabaseWriter):
739
+ logger.info("Adding database reader")
740
+ node_type = 'database_writer'
741
+ database_settings: input_schema.DatabaseWriteSettings = node_database_writer.database_write_settings
742
+ database_connection: Optional[input_schema.DatabaseConnection | input_schema.FullDatabaseConnection]
743
+ if database_settings.connection_mode == 'inline':
744
+ database_connection: input_schema.DatabaseConnection = database_settings.database_connection
745
+ encrypted_password = get_encrypted_secret(current_user_id=node_database_writer.user_id,
746
+ secret_name=database_connection.password_ref)
747
+ if encrypted_password is None:
748
+ raise HTTPException(status_code=400, detail="Password not found")
749
+ else:
750
+ database_reference_settings = get_local_database_connection(database_settings.database_connection_name,
751
+ node_database_writer.user_id)
752
+ encrypted_password = database_reference_settings.password.get_secret_value()
753
+
754
+ def _func(df: FlowDataEngine):
755
+ df.lazy = True
756
+ database_external_write_settings = (
757
+ sql_models.DatabaseExternalWriteSettings.create_from_from_node_database_writer(
758
+ node_database_writer=node_database_writer,
759
+ password=encrypted_password,
760
+ table_name=(database_settings.schema_name+'.'+database_settings.table_name
761
+ if database_settings.schema_name else database_settings.table_name),
762
+ database_reference_settings=(database_reference_settings if database_settings.connection_mode == 'reference'
763
+ else None),
764
+ lf=df.data_frame
765
+ )
766
+ )
767
+ external_database_writer = ExternalDatabaseWriter(database_external_write_settings, wait_on_completion=False)
768
+ node._fetch_cached_df = external_database_writer
769
+ external_database_writer.get_result()
770
+ return df
771
+
772
+ def schema_callback():
773
+ input_node: FlowNode = self.get_node(node_database_writer.node_id).node_inputs.main_inputs[0]
774
+ return input_node.schema
775
+
776
+ self.add_node_step(
777
+ node_id=node_database_writer.node_id,
778
+ function=_func,
779
+ input_columns=[],
780
+ node_type=node_type,
781
+ setting_input=node_database_writer,
782
+ schema_callback=schema_callback,
783
+ )
784
+ node = self.get_node(node_database_writer.node_id)
785
+
786
+ def add_database_reader(self, node_database_reader: input_schema.NodeDatabaseReader):
787
+ logger.info("Adding database reader")
788
+ node_type = 'database_reader'
789
+ database_settings: input_schema.DatabaseSettings = node_database_reader.database_settings
790
+ database_connection: Optional[input_schema.DatabaseConnection | input_schema.FullDatabaseConnection]
791
+ if database_settings.connection_mode == 'inline':
792
+ database_connection: input_schema.DatabaseConnection = database_settings.database_connection
793
+ encrypted_password = get_encrypted_secret(current_user_id=node_database_reader.user_id,
794
+ secret_name=database_connection.password_ref)
795
+ if encrypted_password is None:
796
+ raise HTTPException(status_code=400, detail="Password not found")
797
+ else:
798
+ database_reference_settings = get_local_database_connection(database_settings.database_connection_name,
799
+ node_database_reader.user_id)
800
+ database_connection = database_reference_settings
801
+ encrypted_password = database_reference_settings.password.get_secret_value()
802
+
803
+ def _func():
804
+ sql_source = BaseSqlSource(query=None if database_settings.query_mode == 'table' else database_settings.query,
805
+ table_name=database_settings.table_name,
806
+ schema_name=database_settings.schema_name,
807
+ fields=node_database_reader.fields,
808
+ )
809
+ database_external_read_settings = (
810
+ sql_models.DatabaseExternalReadSettings.create_from_from_node_database_reader(
811
+ node_database_reader=node_database_reader,
812
+ password=encrypted_password,
813
+ query=sql_source.query,
814
+ database_reference_settings=(database_reference_settings if database_settings.connection_mode == 'reference'
815
+ else None),
816
+ )
817
+ )
818
+
819
+ external_database_fetcher = ExternalDatabaseFetcher(database_external_read_settings, wait_on_completion=False)
820
+ node._fetch_cached_df = external_database_fetcher
821
+ fl = FlowDataEngine(external_database_fetcher.get_result())
822
+ node_database_reader.fields = [c.get_minimal_field_info() for c in fl.schema]
823
+ return fl
824
+
825
+ def schema_callback():
826
+ sql_source = SqlSource(connection_string=
827
+ sql_utils.construct_sql_uri(database_type=database_connection.database_type,
828
+ host=database_connection.host,
829
+ port=database_connection.port,
830
+ database=database_connection.database,
831
+ username=database_connection.username,
832
+ password=decrypt_secret(encrypted_password)),
833
+ query=None if database_settings.query_mode == 'table' else database_settings.query,
834
+ table_name=database_settings.table_name,
835
+ schema_name=database_settings.schema_name,
836
+ fields=node_database_reader.fields,
837
+ )
838
+ return sql_source.get_schema()
839
+
840
+ node = self.get_node(node_database_reader.node_id)
841
+ if node:
842
+ node.node_type = node_type
843
+ node.name = node_type
844
+ node.function = _func
845
+ node.setting_input = node_database_reader
846
+ node.node_settings.cache_results = node_database_reader.cache_results
847
+ if node_database_reader.node_id not in set(start_node.node_id for start_node in self._flow_starts):
848
+ self._flow_starts.append(node)
849
+ node.schema_callback = schema_callback
850
+ else:
851
+ node = FlowNode(node_database_reader.node_id, function=_func,
852
+ setting_input=node_database_reader,
853
+ name=node_type, node_type=node_type, parent_uuid=self.uuid,
854
+ schema_callback=schema_callback)
855
+ self._node_db[node_database_reader.node_id] = node
856
+ self._flow_starts.append(node)
857
+ self._node_ids.append(node_database_reader.node_id)
858
+
859
+ def add_airbyte_reader(self, external_source_input: input_schema.NodeAirbyteReader):
860
+ logger.info('Adding airbyte reader')
861
+ node_type = 'airbyte_reader'
862
+ source_settings: input_schema.AirbyteReader = external_source_input.source_settings
863
+ airbyte_settings = airbyte_settings_from_config(source_settings, flow_id=self.flow_id,
864
+ node_id=external_source_input.node_id)
865
+
866
+ logger.info("Airbyte settings created")
867
+ airbyte_settings.fields = source_settings.fields
868
+ external_source = data_source_factory(source_type='airbyte', airbyte_settings=airbyte_settings)
869
+
870
+ def _func():
871
+ logger.info('Calling external source')
872
+ external_fetcher = ExternalAirbyteFetcher(airbyte_settings, wait_on_completion=False)
873
+ node._fetch_cached_df = external_fetcher
874
+ fl = FlowDataEngine(external_fetcher.get_result())
875
+ external_source_input.source_settings.fields = [c.get_minimal_field_info() for c in fl.schema]
876
+ return fl
877
+
878
+ def schema_callback():
879
+ return [FlowfileColumn.from_input(f.name, f.data_type) for f in external_source.schema]
880
+
881
+ node = self.get_node(external_source_input.node_id)
882
+ if node:
883
+ node.node_type = node_type
884
+ node.name = node_type
885
+ node.function = _func
886
+ node.setting_input = external_source_input
887
+ node.node_settings.cache_results = external_source_input.cache_results
888
+ if external_source_input.node_id not in set(start_node.node_id for start_node in self._flow_starts):
889
+ self._flow_starts.append(node)
890
+ node.schema_callback = schema_callback
891
+ else:
892
+ node = FlowNode(external_source_input.node_id, function=_func,
893
+ setting_input=external_source_input,
894
+ name=node_type, node_type=node_type, parent_uuid=self.uuid,
895
+ schema_callback=schema_callback)
896
+ self._node_db[external_source_input.node_id] = node
897
+ self._flow_starts.append(node)
898
+ self._node_ids.append(external_source_input.node_id)
899
+ if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
900
+ logger.info('Using provided schema in the node')
901
+
902
+ def add_google_sheet(self, external_source_input: input_schema.NodeExternalSource):
903
+ logger.info('Adding google sheet reader')
904
+ self.add_external_source(external_source_input)
905
+
906
+ def add_sql_source(self, external_source_input: input_schema.NodeExternalSource):
907
+ logger.info('Adding sql source')
908
+ self.add_external_source(external_source_input)
909
+
910
+ def add_external_source(self,
911
+ external_source_input: input_schema.NodeExternalSource | input_schema.NodeAirbyteReader):
912
+
913
+ custom_source_type = external_source_input.identifier != 'airbyte'
914
+ if custom_source_type:
915
+ node_type = 'external_source'
916
+ external_source_script = getattr(external_sources.custom_external_sources, external_source_input.identifier)
917
+ source_settings = (getattr(input_schema, snake_case_to_camel_case(external_source_input.identifier)).
918
+ model_validate(external_source_input.source_settings))
919
+ if hasattr(external_source_script, 'initial_getter'):
920
+ initial_getter = getattr(external_source_script, 'initial_getter')(source_settings)
921
+ else:
922
+ initial_getter = None
923
+ data_getter = external_source_script.getter(source_settings)
924
+ external_source = data_source_factory(source_type='custom',
925
+ data_getter=data_getter,
926
+ initial_data_getter=initial_getter,
927
+ orientation=external_source_input.source_settings.orientation,
928
+ schema=None)
929
+ else:
930
+ node_type = 'airbyte_reader'
931
+ source_settings: input_schema.AirbyteReader = external_source_input.source_settings
932
+ airbyte_settings = airbyte_settings_from_config(source_settings, flow_id=self.flow_id,
933
+ node_id=external_source_input.node_id)
934
+ airbyte_settings.fields = source_settings.fields
935
+ external_source = data_source_factory(source_type='airbyte', airbyte_settings=airbyte_settings)
936
+
937
+ def _func():
938
+ logger.info('Calling external source')
939
+ fl = FlowDataEngine.create_from_external_source(external_source=external_source)
940
+ external_source_input.source_settings.fields = [c.get_minimal_field_info() for c in fl.schema]
941
+ return fl
942
+
943
+ node = self.get_node(external_source_input.node_id)
944
+ if node:
945
+ node.node_type = node_type
946
+ node.name = node_type
947
+ node.function = _func
948
+ node.setting_input = external_source_input
949
+ node.node_settings.cache_results = external_source_input.cache_results
950
+ if external_source_input.node_id not in set(start_node.node_id for start_node in self._flow_starts):
951
+ self._flow_starts.append(node)
952
+ else:
953
+ node = FlowNode(external_source_input.node_id, function=_func,
954
+ setting_input=external_source_input,
955
+ name=node_type, node_type=node_type, parent_uuid=self.uuid)
956
+ self._node_db[external_source_input.node_id] = node
957
+ self._flow_starts.append(node)
958
+ self._node_ids.append(external_source_input.node_id)
959
+ if external_source_input.source_settings.fields and len(external_source_input.source_settings.fields) > 0:
960
+ logger.info('Using provided schema in the node')
961
+
962
+ def schema_callback():
963
+ return [FlowfileColumn.from_input(f.name, f.data_type) for f in
964
+ external_source_input.source_settings.fields]
965
+
966
+ node.schema_callback = schema_callback
967
+ else:
968
+ logger.warning('Removing schema')
969
+ node._schema_callback = None
970
+ self.add_node_step(node_id=external_source_input.node_id,
971
+ function=_func,
972
+ input_columns=[],
973
+ node_type=node_type,
974
+ setting_input=external_source_input)
975
+
976
+ def add_read(self, input_file: input_schema.NodeRead):
977
+ if input_file.received_file.file_type in ('xlsx', 'excel') and input_file.received_file.sheet_name == '':
978
+ sheet_name = fastexcel.read_excel(input_file.received_file.path).sheet_names[0]
979
+ input_file.received_file.sheet_name = sheet_name
980
+
981
+ received_file = input_file.received_file
982
+ input_file.received_file.set_absolute_filepath()
983
+
984
+ def _func():
985
+ if input_file.received_file.file_type == 'parquet':
986
+ input_data = FlowDataEngine.create_from_path(input_file.received_file)
987
+ elif input_file.received_file.file_type == 'csv' and 'utf' in input_file.received_file.encoding:
988
+ input_data = FlowDataEngine.create_from_path(input_file.received_file)
989
+ else:
990
+ input_data = FlowDataEngine.create_from_path_worker(input_file.received_file,
991
+ node_id=input_file.node_id,
992
+ flow_id=self.flow_id)
993
+ input_data.name = input_file.received_file.name
994
+ return input_data
995
+
996
+ node = self.get_node(input_file.node_id)
997
+ schema_callback = None
998
+ if node:
999
+ start_hash = node.hash
1000
+ node.node_type = 'read'
1001
+ node.name = 'read'
1002
+ node.function = _func
1003
+ node.setting_input = input_file
1004
+ if input_file.node_id not in set(start_node.node_id for start_node in self._flow_starts):
1005
+ self._flow_starts.append(node)
1006
+
1007
+ if start_hash != node.hash:
1008
+ logger.info('Hash changed, updating schema')
1009
+ if len(received_file.fields) > 0:
1010
+ # If the file has fields defined, we can use them to create the schema
1011
+ def schema_callback():
1012
+ return [FlowfileColumn.from_input(f.name, f.data_type) for f in received_file.fields]
1013
+
1014
+ elif input_file.received_file.file_type in ('csv', 'json', 'parquet'):
1015
+ # everything that can be scanned by polars
1016
+ def schema_callback():
1017
+ input_data = FlowDataEngine.create_from_path(input_file.received_file)
1018
+ return input_data.schema
1019
+
1020
+ elif input_file.received_file.file_type in ('xlsx', 'excel'):
1021
+ # If the file is an Excel file, we need to use the openpyxl engine to read the schema
1022
+ schema_callback = get_xlsx_schema_callback(engine='openpyxl',
1023
+ file_path=received_file.file_path,
1024
+ sheet_name=received_file.sheet_name,
1025
+ start_row=received_file.start_row,
1026
+ end_row=received_file.end_row,
1027
+ start_column=received_file.start_column,
1028
+ end_column=received_file.end_column,
1029
+ has_headers=received_file.has_headers)
1030
+ else:
1031
+ schema_callback = None
1032
+ else:
1033
+ node = FlowNode(input_file.node_id, function=_func,
1034
+ setting_input=input_file,
1035
+ name='read', node_type='read', parent_uuid=self.uuid)
1036
+ self._node_db[input_file.node_id] = node
1037
+ self._flow_starts.append(node)
1038
+ self._node_ids.append(input_file.node_id)
1039
+
1040
+ if schema_callback is not None:
1041
+ node.schema_callback = schema_callback
1042
+ return self
1043
+
1044
+ def add_datasource(self, input_file: input_schema.NodeDatasource | input_schema.NodeManualInput):
1045
+
1046
+ if isinstance(input_file, input_schema.NodeManualInput):
1047
+ input_data = FlowDataEngine(input_file.raw_data)
1048
+ ref = 'manual_input'
1049
+
1050
+ else:
1051
+ input_data = FlowDataEngine(path_ref=input_file.file_ref)
1052
+ ref = 'datasource'
1053
+ node = self.get_node(input_file.node_id)
1054
+ if node:
1055
+ node.node_type = ref
1056
+ node.name = ref
1057
+ node.function = input_data
1058
+ node.setting_input = input_file
1059
+
1060
+ if not input_file.node_id in set(start_node.node_id for start_node in self._flow_starts):
1061
+ self._flow_starts.append(node)
1062
+ else:
1063
+ node = FlowNode(input_file.node_id, function=input_data,
1064
+ setting_input=input_file,
1065
+ name=ref, node_type=ref, parent_uuid=self.uuid)
1066
+ self._node_db[input_file.node_id] = node
1067
+ self._flow_starts.append(node)
1068
+ self._node_ids.append(input_file.node_id)
1069
+ return self
1070
+
1071
+ def add_manual_input(self, input_file: input_schema.NodeManualInput):
1072
+ self.add_datasource(input_file)
1073
+
1074
+ @property
1075
+ def nodes(self) -> List[FlowNode]:
1076
+ return list(self._node_db.values())
1077
+
1078
+ def check_for_missed_cols(self, expected_cols: List):
1079
+ not_filled_cols = set(expected_cols) - set(self._output_cols)
1080
+ cols_available = list(not_filled_cols & set([c.name for c in self._input_data.schema]))
1081
+ self._output_cols += cols_available
1082
+
1083
+ @property
1084
+ def input_data_columns(self) -> List[str]:
1085
+ if self._input_cols:
1086
+ return list(set([col for col in self._input_cols if
1087
+ col in [table_col.name for table_col in self._input_data.schema]]))
1088
+
1089
+ @property
1090
+ def execution_mode(self) -> str:
1091
+ return self.flow_settings.execution_mode
1092
+
1093
+ def get_implicit_starter_nodes(self) -> List[FlowNode]:
1094
+ """Ensures that nodes that can be a start (e.g. polars code), will be a starting node"""
1095
+ starting_node_ids = [node.node_id for node in self._flow_starts]
1096
+ implicit_starting_nodes = []
1097
+ for node in self.nodes:
1098
+ if node.node_template.can_be_start and not node.has_input and node.node_id not in starting_node_ids:
1099
+ implicit_starting_nodes.append(node)
1100
+ return implicit_starting_nodes
1101
+
1102
+ @execution_mode.setter
1103
+ def execution_mode(self, mode: str):
1104
+ self.flow_settings.execution_mode = mode
1105
+
1106
+ @property
1107
+ def execution_location(self) -> schemas.ExecutionLocationsLiteral:
1108
+ return self.flow_settings.execution_location
1109
+
1110
+ @execution_location.setter
1111
+ def execution_location(self, execution_location: schemas.ExecutionLocationsLiteral):
1112
+ self.flow_settings.execution_location = execution_location
1113
+
1114
+ def run_graph(self):
1115
+ if self.flow_settings.is_running:
1116
+ raise Exception('Flow is already running')
1117
+ try:
1118
+ self.flow_settings.is_running = True
1119
+ self.flow_settings.is_canceled = False
1120
+ self.flow_logger.clear_log_file()
1121
+ self.nodes_completed = 0
1122
+ self.node_results = []
1123
+ self.start_datetime = datetime.datetime.now()
1124
+ self.end_datetime = None
1125
+ self.latest_run_info = None
1126
+ self.flow_logger.info('Starting to run flowfile flow...')
1127
+ skip_nodes = [node for node in self.nodes if not node.is_correct]
1128
+ skip_nodes.extend([lead_to_node for node in skip_nodes for lead_to_node in node.leads_to_nodes])
1129
+ execution_order = determine_execution_order(all_nodes=[node for node in self.nodes if
1130
+ node not in skip_nodes],
1131
+ flow_starts=self._flow_starts+self.get_implicit_starter_nodes())
1132
+
1133
+ skip_node_message(self.flow_logger, skip_nodes)
1134
+ execution_order_message(self.flow_logger, execution_order)
1135
+ performance_mode = self.flow_settings.execution_mode == 'Performance'
1136
+ for node in execution_order:
1137
+ node_logger = self.flow_logger.get_node_logger(node.node_id)
1138
+ if self.flow_settings.is_canceled:
1139
+ self.flow_logger.info('Flow canceled')
1140
+ break
1141
+ if node in skip_nodes:
1142
+ node_logger.info(f'Skipping node {node.node_id}')
1143
+ continue
1144
+ node_result = NodeResult(node_id=node.node_id, node_name=node.name)
1145
+ self.node_results.append(node_result)
1146
+ logger.info(f'Starting to run: node {node.node_id}, start time: {node_result.start_timestamp}')
1147
+ node.execute_node(run_location=self.flow_settings.execution_location,
1148
+ performance_mode=performance_mode,
1149
+ node_logger=node_logger)
1150
+ try:
1151
+ node_result.error = str(node.results.errors)
1152
+ if self.flow_settings.is_canceled:
1153
+ node_result.success = None
1154
+ node_result.success = None
1155
+ node_result.is_running = False
1156
+ continue
1157
+ node_result.success = node.results.errors is None
1158
+ node_result.end_timestamp = time()
1159
+ node_result.run_time = node_result.end_timestamp - node_result.start_timestamp
1160
+ node_result.is_running = False
1161
+ except Exception as e:
1162
+ node_result.error = 'Node did not run'
1163
+ node_result.success = False
1164
+ node_result.end_timestamp = time()
1165
+ node_result.run_time = node_result.end_timestamp - node_result.start_timestamp
1166
+ node_result.is_running = False
1167
+ node_logger.error(f'Error in node {node.node_id}: {e}')
1168
+ if not node_result.success:
1169
+ skip_nodes.extend(list(node.get_all_dependent_nodes()))
1170
+ node_logger.info(f'Completed node with success: {node_result.success}')
1171
+ self.nodes_completed += 1
1172
+ self.flow_logger.info('Flow completed!')
1173
+ self.end_datetime = datetime.datetime.now()
1174
+ self.flow_settings.is_running = False
1175
+ if self.flow_settings.is_canceled:
1176
+ self.flow_logger.info('Flow canceled')
1177
+ return self.get_run_info()
1178
+ except Exception as e:
1179
+ raise e
1180
+ finally:
1181
+ self.flow_settings.is_running = False
1182
+
1183
+ def get_run_info(self) -> RunInformation:
1184
+ if self.latest_run_info is None:
1185
+ node_results = self.node_results
1186
+ success = all(nr.success for nr in node_results)
1187
+ self.latest_run_info = RunInformation(start_time=self.start_datetime, end_time=self.end_datetime,
1188
+ success=success,
1189
+ node_step_result=node_results, flow_id=self.flow_id,
1190
+ nodes_completed=self.nodes_completed,
1191
+ number_of_nodes=len(self.nodes))
1192
+ elif self.latest_run_info.nodes_completed != self.nodes_completed:
1193
+ node_results = self.node_results
1194
+ self.latest_run_info = RunInformation(start_time=self.start_datetime, end_time=self.end_datetime,
1195
+ success=all(nr.success for nr in node_results),
1196
+ node_step_result=node_results, flow_id=self.flow_id,
1197
+ nodes_completed=self.nodes_completed,
1198
+ number_of_nodes=len(self.nodes))
1199
+ return self.latest_run_info
1200
+
1201
+ @property
1202
+ def node_connections(self) -> List[Tuple[int, int]]:
1203
+ connections = set()
1204
+ for node in self.nodes:
1205
+ outgoing_connections = [(node.node_id, ltn.node_id) for ltn in node.leads_to_nodes]
1206
+ incoming_connections = [(don.node_id, node.node_id) for don in node.all_inputs]
1207
+ node_connections = [c for c in outgoing_connections + incoming_connections if (c[0] is not None
1208
+ and c[1] is not None)]
1209
+ for node_connection in node_connections:
1210
+ if node_connection not in connections:
1211
+ connections.add(node_connection)
1212
+ return list(connections)
1213
+
1214
+ def get_schema(self) -> List[FlowfileColumn]:
1215
+ if self.schema is None:
1216
+ if len(self._node_ids) > 0:
1217
+ self.schema = self._node_db[self._node_ids[0]].schema
1218
+ return self.schema
1219
+
1220
+ def get_example_data(self, node_id: int) -> TableExample | None:
1221
+ node = self._node_db[node_id]
1222
+ return node.get_table_example(include_data=True)
1223
+
1224
+ def get_node_data(self, node_id: int, include_example: bool = True) -> NodeData:
1225
+ node = self._node_db[node_id]
1226
+ return node.get_node_data(flow_id=self.flow_id, include_example=include_example)
1227
+
1228
+ def get_node_storage(self) -> schemas.FlowInformation:
1229
+
1230
+ node_information = {node.node_id: node.get_node_information() for
1231
+ node in self.nodes if node.is_setup and node.is_correct}
1232
+
1233
+ return schemas.FlowInformation(flow_id=self.flow_id,
1234
+ flow_name=self.__name__,
1235
+ storage_location=self.flow_settings.path,
1236
+ flow_settings=self.flow_settings,
1237
+ data=node_information,
1238
+ node_starts=[v.node_id for v in self._flow_starts],
1239
+ node_connections=self.node_connections
1240
+ )
1241
+
1242
+ def cancel(self):
1243
+ if not self.flow_settings.is_running:
1244
+ return
1245
+ self.flow_settings.is_canceled = True
1246
+ for node in self.nodes:
1247
+ node.cancel()
1248
+
1249
+ def close_flow(self):
1250
+ for node in self.nodes:
1251
+ node.remove_cache()
1252
+
1253
+ def save_flow(self, flow_path: str):
1254
+ with open(flow_path, 'wb') as f:
1255
+ pickle.dump(self.get_node_storage(), f)
1256
+ self.flow_settings.path = flow_path
1257
+
1258
+ def get_frontend_data(self):
1259
+ result = {
1260
+ 'Home': {
1261
+ "data": {}
1262
+ }
1263
+ }
1264
+ flow_info: schemas.FlowInformation = self.get_node_storage()
1265
+
1266
+ for node_id, node_info in flow_info.data.items():
1267
+ if node_info.is_setup:
1268
+ try:
1269
+ pos_x = node_info.data.pos_x
1270
+ pos_y = node_info.data.pos_y
1271
+ # Basic node structure
1272
+ result["Home"]["data"][str(node_id)] = {
1273
+ "id": node_info.id,
1274
+ "name": node_info.type,
1275
+ "data": {}, # Additional data can go here
1276
+ "class": node_info.type,
1277
+ "html": node_info.type,
1278
+ "typenode": "vue",
1279
+ "inputs": {},
1280
+ "outputs": {},
1281
+ "pos_x": pos_x,
1282
+ "pos_y": pos_y
1283
+ }
1284
+ except Exception as e:
1285
+ logger.error(e)
1286
+ # Add outputs to the node based on `outputs` in your backend data
1287
+ if node_info.outputs:
1288
+ outputs = {o: 0 for o in node_info.outputs}
1289
+ for o in node_info.outputs:
1290
+ outputs[o] += 1
1291
+ connections = []
1292
+ for output_node_id, n_connections in outputs.items():
1293
+ leading_to_node = self.get_node(output_node_id)
1294
+ input_types = leading_to_node.get_input_type(node_info.id)
1295
+ for input_type in input_types:
1296
+ if input_type == 'main':
1297
+ input_frontend_id = 'input_1'
1298
+ elif input_type == 'right':
1299
+ input_frontend_id = 'input_2'
1300
+ elif input_type == 'left':
1301
+ input_frontend_id = 'input_3'
1302
+ else:
1303
+ input_frontend_id = 'input_1'
1304
+ connection = {"node": str(output_node_id), "input": input_frontend_id}
1305
+ connections.append(connection)
1306
+
1307
+ result["Home"]["data"][str(node_id)]["outputs"]["output_1"] = {
1308
+ "connections": connections}
1309
+ else:
1310
+ result["Home"]["data"][str(node_id)]["outputs"] = {"output_1": {"connections": []}}
1311
+
1312
+ # Add input to the node based on `depending_on_id` in your backend data
1313
+ if node_info.left_input_id is not None or node_info.right_input_id is not None or node_info.input_ids is not None:
1314
+ main_inputs = node_info.main_input_ids
1315
+ result["Home"]["data"][str(node_id)]["inputs"]["input_1"] = {
1316
+ "connections": [{"node": str(main_node_id), "input": "output_1"} for main_node_id in main_inputs]
1317
+ }
1318
+ if node_info.right_input_id is not None:
1319
+ result["Home"]["data"][str(node_id)]["inputs"]["input_2"] = {
1320
+ "connections": [{"node": str(node_info.right_input_id), "input": "output_1"}]
1321
+ }
1322
+ if node_info.left_input_id is not None:
1323
+ result["Home"]["data"][str(node_id)]["inputs"]["input_3"] = {
1324
+ "connections": [{"node": str(node_info.left_input_id), "input": "output_1"}]
1325
+ }
1326
+ return result
1327
+
1328
+ def get_vue_flow_input(self) -> schemas.VueFlowInput:
1329
+ edges: List[schemas.NodeEdge] = []
1330
+ nodes: List[schemas.NodeInput] = []
1331
+ for node in self.nodes:
1332
+ nodes.append(node.get_node_input())
1333
+ edges.extend(node.get_edge_input())
1334
+ return schemas.VueFlowInput(node_edges=edges, node_inputs=nodes)
1335
+
1336
+ def reset(self):
1337
+ for node in self.nodes:
1338
+ node.reset(True)
1339
+
1340
+ def copy_node(self, new_node_settings: input_schema.NodePromise, existing_setting_input: Any, node_type: str) -> None:
1341
+ """Copy an existing node with potentially new settings."""
1342
+ self.add_node_promise(new_node_settings)
1343
+
1344
+ if isinstance(existing_setting_input, input_schema.NodePromise):
1345
+ return
1346
+
1347
+ combined_settings = combine_existing_settings_and_new_settings(
1348
+ existing_setting_input, new_node_settings
1349
+ )
1350
+ getattr(self, f"add_{node_type}")(combined_settings)
1351
+
1352
+
1353
+ def combine_existing_settings_and_new_settings(setting_input: Any, new_settings: input_schema.NodePromise) -> Any:
1354
+ """Combine excopy_nodeisting settings with new settings from a NodePromise."""
1355
+ copied_setting_input = deepcopy(setting_input)
1356
+
1357
+ # Update only attributes that exist on new_settings
1358
+ fields_to_update = (
1359
+ "node_id",
1360
+ "pos_x",
1361
+ "pos_y",
1362
+ "description",
1363
+ "flow_id"
1364
+ )
1365
+
1366
+ for field in fields_to_update:
1367
+ if hasattr(new_settings, field) and getattr(new_settings, field) is not None:
1368
+ setattr(copied_setting_input, field, getattr(new_settings, field))
1369
+
1370
+ return copied_setting_input
1371
+
1372
+
1373
+ def add_connection(flow: FlowGraph, node_connection: input_schema.NodeConnection):
1374
+ logger.info('adding a connection')
1375
+ from_node = flow.get_node(node_connection.output_connection.node_id)
1376
+ to_node = flow.get_node(node_connection.input_connection.node_id)
1377
+ logger.info(f'from_node={from_node}, to_node={to_node}')
1378
+ if not (from_node and to_node):
1379
+ raise HTTPException(404, 'Not not available')
1380
+ else:
1381
+ to_node.add_node_connection(from_node, node_connection.input_connection.get_node_input_connection_type())
1382
+
1383
+
1384
+ def delete_connection(graph, node_connection: input_schema.NodeConnection):
1385
+ """Delete the connection between two nodes."""
1386
+ from_node = graph.get_node(node_connection.output_connection.node_id)
1387
+ to_node = graph.get_node(node_connection.input_connection.node_id)
1388
+ connection_valid = to_node.node_inputs.validate_if_input_connection_exists(
1389
+ node_input_id=from_node.node_id,
1390
+ connection_name=node_connection.input_connection.get_node_input_connection_type(),
1391
+ )
1392
+ if not connection_valid:
1393
+ raise HTTPException(422, "Connection does not exist on the input node")
1394
+ if from_node is not None:
1395
+ from_node.delete_lead_to_node(node_connection.input_connection.node_id)
1396
+
1397
+ if to_node is not None:
1398
+ to_node.delete_input_node(
1399
+ node_connection.output_connection.node_id,
1400
+ connection_type=node_connection.input_connection.connection_class,
1401
+ )
1402
+
1403
+