Flowfile 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- build_backends/__init__.py +0 -0
- build_backends/main.py +313 -0
- build_backends/main_prd.py +202 -0
- flowfile/__init__.py +71 -0
- flowfile/__main__.py +24 -0
- flowfile-0.2.2.dist-info/LICENSE +21 -0
- flowfile-0.2.2.dist-info/METADATA +225 -0
- flowfile-0.2.2.dist-info/RECORD +171 -0
- flowfile-0.2.2.dist-info/WHEEL +4 -0
- flowfile-0.2.2.dist-info/entry_points.txt +9 -0
- flowfile_core/__init__.py +13 -0
- flowfile_core/auth/__init__.py +0 -0
- flowfile_core/auth/jwt.py +140 -0
- flowfile_core/auth/models.py +40 -0
- flowfile_core/auth/secrets.py +178 -0
- flowfile_core/configs/__init__.py +35 -0
- flowfile_core/configs/flow_logger.py +433 -0
- flowfile_core/configs/node_store/__init__.py +0 -0
- flowfile_core/configs/node_store/nodes.py +98 -0
- flowfile_core/configs/settings.py +120 -0
- flowfile_core/database/__init__.py +0 -0
- flowfile_core/database/connection.py +51 -0
- flowfile_core/database/init_db.py +45 -0
- flowfile_core/database/models.py +41 -0
- flowfile_core/fileExplorer/__init__.py +0 -0
- flowfile_core/fileExplorer/funcs.py +259 -0
- flowfile_core/fileExplorer/utils.py +53 -0
- flowfile_core/flowfile/FlowfileFlow.py +1403 -0
- flowfile_core/flowfile/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
- flowfile_core/flowfile/analytics/__init__.py +0 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
- flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
- flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
- flowfile_core/flowfile/analytics/utils.py +9 -0
- flowfile_core/flowfile/connection_manager/__init__.py +3 -0
- flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
- flowfile_core/flowfile/connection_manager/models.py +10 -0
- flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
- flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
- flowfile_core/flowfile/database_connection_manager/models.py +15 -0
- flowfile_core/flowfile/extensions.py +36 -0
- flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
- flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
- flowfile_core/flowfile/flow_data_engine/types.py +0 -0
- flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
- flowfile_core/flowfile/flow_node/__init__.py +0 -0
- flowfile_core/flowfile/flow_node/flow_node.py +771 -0
- flowfile_core/flowfile/flow_node/models.py +111 -0
- flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
- flowfile_core/flowfile/handler.py +123 -0
- flowfile_core/flowfile/manage/__init__.py +0 -0
- flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
- flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
- flowfile_core/flowfile/manage/open_flowfile.py +136 -0
- flowfile_core/flowfile/setting_generator/__init__.py +2 -0
- flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
- flowfile_core/flowfile/setting_generator/settings.py +176 -0
- flowfile_core/flowfile/sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
- flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
- flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
- flowfile_core/flowfile/util/__init__.py +0 -0
- flowfile_core/flowfile/util/calculate_layout.py +137 -0
- flowfile_core/flowfile/util/execution_orderer.py +141 -0
- flowfile_core/flowfile/utils.py +106 -0
- flowfile_core/main.py +138 -0
- flowfile_core/routes/__init__.py +0 -0
- flowfile_core/routes/auth.py +34 -0
- flowfile_core/routes/logs.py +163 -0
- flowfile_core/routes/public.py +10 -0
- flowfile_core/routes/routes.py +601 -0
- flowfile_core/routes/secrets.py +85 -0
- flowfile_core/run_lock.py +11 -0
- flowfile_core/schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
- flowfile_core/schemas/defaults.py +9 -0
- flowfile_core/schemas/external_sources/__init__.py +0 -0
- flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
- flowfile_core/schemas/input_schema.py +477 -0
- flowfile_core/schemas/models.py +193 -0
- flowfile_core/schemas/output_model.py +115 -0
- flowfile_core/schemas/schemas.py +106 -0
- flowfile_core/schemas/transform_schema.py +569 -0
- flowfile_core/secrets/__init__.py +0 -0
- flowfile_core/secrets/secrets.py +64 -0
- flowfile_core/utils/__init__.py +0 -0
- flowfile_core/utils/arrow_reader.py +247 -0
- flowfile_core/utils/excel_file_manager.py +18 -0
- flowfile_core/utils/fileManager.py +45 -0
- flowfile_core/utils/fl_executor.py +38 -0
- flowfile_core/utils/utils.py +8 -0
- flowfile_frame/__init__.py +56 -0
- flowfile_frame/__main__.py +12 -0
- flowfile_frame/adapters.py +17 -0
- flowfile_frame/expr.py +1163 -0
- flowfile_frame/flow_frame.py +2093 -0
- flowfile_frame/group_frame.py +199 -0
- flowfile_frame/join.py +75 -0
- flowfile_frame/selectors.py +242 -0
- flowfile_frame/utils.py +184 -0
- flowfile_worker/__init__.py +55 -0
- flowfile_worker/configs.py +95 -0
- flowfile_worker/create/__init__.py +37 -0
- flowfile_worker/create/funcs.py +146 -0
- flowfile_worker/create/models.py +86 -0
- flowfile_worker/create/pl_types.py +35 -0
- flowfile_worker/create/read_excel_tables.py +110 -0
- flowfile_worker/create/utils.py +84 -0
- flowfile_worker/external_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
- flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
- flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- flowfile_worker/external_sources/sql_source/__init__.py +0 -0
- flowfile_worker/external_sources/sql_source/main.py +56 -0
- flowfile_worker/external_sources/sql_source/models.py +72 -0
- flowfile_worker/flow_logger.py +58 -0
- flowfile_worker/funcs.py +327 -0
- flowfile_worker/main.py +108 -0
- flowfile_worker/models.py +95 -0
- flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
- flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
- flowfile_worker/polars_fuzzy_match/models.py +36 -0
- flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
- flowfile_worker/polars_fuzzy_match/process.py +86 -0
- flowfile_worker/polars_fuzzy_match/utils.py +50 -0
- flowfile_worker/process_manager.py +36 -0
- flowfile_worker/routes.py +440 -0
- flowfile_worker/secrets.py +148 -0
- flowfile_worker/spawner.py +187 -0
- flowfile_worker/utils.py +25 -0
- test_utils/__init__.py +3 -0
- test_utils/postgres/__init__.py +1 -0
- test_utils/postgres/commands.py +109 -0
- test_utils/postgres/fixtures.py +417 -0
|
@@ -0,0 +1,771 @@
|
|
|
1
|
+
|
|
2
|
+
from typing import List, Union, Callable, Any, Optional, Generator, Literal
|
|
3
|
+
from flowfile_core.configs import logger
|
|
4
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
5
|
+
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
6
|
+
from flowfile_core.utils.arrow_reader import get_read_top_n
|
|
7
|
+
from flowfile_core.schemas import input_schema, schemas
|
|
8
|
+
from flowfile_core.configs.flow_logger import NodeLogger
|
|
9
|
+
|
|
10
|
+
from flowfile_core.schemas.output_model import TableExample, FileColumn, NodeData
|
|
11
|
+
from flowfile_core.flowfile.utils import get_hash
|
|
12
|
+
from flowfile_core.configs.node_store import nodes as node_interface
|
|
13
|
+
from flowfile_core.flowfile.setting_generator import setting_generator, setting_updator
|
|
14
|
+
from time import sleep
|
|
15
|
+
from flowfile_core.flowfile.flow_data_engine.subprocess_operations import (
|
|
16
|
+
ExternalDfFetcher, ExternalSampler, results_exists, get_external_df_result, ExternalDatabaseFetcher, ExternalDatabaseWriter)
|
|
17
|
+
from flowfile_core.flowfile.flow_node.models import (NodeStepSettings, NodeStepInputs, NodeSchemaInformation,
|
|
18
|
+
NodeStepStats, NodeResults)
|
|
19
|
+
from flowfile_core.flowfile.flow_node.schema_callback import SingleExecutionFuture
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class FlowNode:
|
|
23
|
+
parent_uuid: str
|
|
24
|
+
node_type: str
|
|
25
|
+
node_template: node_interface.NodeTemplate
|
|
26
|
+
node_default: schemas.NodeDefault
|
|
27
|
+
node_schema: NodeSchemaInformation
|
|
28
|
+
node_inputs: NodeStepInputs
|
|
29
|
+
node_stats: NodeStepStats
|
|
30
|
+
node_settings: NodeStepSettings
|
|
31
|
+
results: NodeResults
|
|
32
|
+
node_information: Optional[schemas.NodeInformation] = None
|
|
33
|
+
leads_to_nodes: List["FlowNode"] = [] # list with target flows, after execution the step will trigger those step(s)
|
|
34
|
+
_setting_input: Any = None
|
|
35
|
+
_hash: Optional[str] = None # host this for caching results
|
|
36
|
+
_function: Callable = None # the function that needs to be executed when triggered
|
|
37
|
+
_schema_callback: Optional[SingleExecutionFuture] = None # Function that calculates the schema without executing
|
|
38
|
+
_state_needs_reset: bool = False
|
|
39
|
+
_fetch_cached_df: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter] = None
|
|
40
|
+
_cache_progress: Optional[ExternalDfFetcher | ExternalDatabaseFetcher | ExternalDatabaseWriter] = None
|
|
41
|
+
|
|
42
|
+
def post_init(self):
|
|
43
|
+
self.node_inputs = NodeStepInputs()
|
|
44
|
+
self.node_stats = NodeStepStats()
|
|
45
|
+
self.node_settings = NodeStepSettings()
|
|
46
|
+
self.node_schema = NodeSchemaInformation()
|
|
47
|
+
self.results = NodeResults()
|
|
48
|
+
self.node_information = schemas.NodeInformation()
|
|
49
|
+
self.leads_to_nodes = []
|
|
50
|
+
self._setting_input = None
|
|
51
|
+
self._cache_progress = None
|
|
52
|
+
self._schema_callback = None
|
|
53
|
+
self._state_needs_reset = False
|
|
54
|
+
|
|
55
|
+
@property
|
|
56
|
+
def state_needs_reset(self):
|
|
57
|
+
return self._state_needs_reset
|
|
58
|
+
|
|
59
|
+
@state_needs_reset.setter
|
|
60
|
+
def state_needs_reset(self, v: bool):
|
|
61
|
+
self._state_needs_reset = v
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def schema_callback(self):
|
|
65
|
+
return self._schema_callback
|
|
66
|
+
|
|
67
|
+
@schema_callback.setter
|
|
68
|
+
def schema_callback(self, f: Callable):
|
|
69
|
+
if f is None:
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
def error_callback(e: Exception) -> List:
|
|
73
|
+
logger.warning(e)
|
|
74
|
+
|
|
75
|
+
self.node_settings.setup_errors = True
|
|
76
|
+
return []
|
|
77
|
+
|
|
78
|
+
self._schema_callback = SingleExecutionFuture(f, error_callback)
|
|
79
|
+
self._schema_callback.start()
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def is_start(self) -> bool:
|
|
83
|
+
return not self.has_input and self.node_template.input == 0
|
|
84
|
+
|
|
85
|
+
def get_input_type(self, node_id: int) -> List:
|
|
86
|
+
relation_type = []
|
|
87
|
+
if node_id in [n.node_id for n in self.node_inputs.main_inputs]:
|
|
88
|
+
relation_type.append('main')
|
|
89
|
+
if self.node_inputs.left_input is not None and node_id == self.node_inputs.left_input.node_id:
|
|
90
|
+
relation_type.append('left')
|
|
91
|
+
if self.node_inputs.right_input is not None and node_id == self.node_inputs.right_input.node_id:
|
|
92
|
+
relation_type.append('right')
|
|
93
|
+
return list(set(relation_type))
|
|
94
|
+
|
|
95
|
+
def __init__(self, node_id: Union[str, int], function: Callable,
|
|
96
|
+
parent_uuid: str,
|
|
97
|
+
setting_input: Any,
|
|
98
|
+
name: str,
|
|
99
|
+
node_type: str,
|
|
100
|
+
input_columns: List[str] = None,
|
|
101
|
+
output_schema: List[FlowfileColumn] = None,
|
|
102
|
+
drop_columns: List[str] = None,
|
|
103
|
+
renew_schema: bool = True,
|
|
104
|
+
pos_x: float = 0,
|
|
105
|
+
pos_y: float = 0,
|
|
106
|
+
schema_callback: Callable = None,
|
|
107
|
+
):
|
|
108
|
+
self.parent_uuid = parent_uuid
|
|
109
|
+
self.post_init()
|
|
110
|
+
self.active = True
|
|
111
|
+
self.node_information.id = node_id
|
|
112
|
+
self.node_type = node_type
|
|
113
|
+
self.node_settings.renew_schema = renew_schema
|
|
114
|
+
self.update_node(function=function,
|
|
115
|
+
input_columns=input_columns,
|
|
116
|
+
output_schema=output_schema,
|
|
117
|
+
drop_columns=drop_columns,
|
|
118
|
+
setting_input=setting_input,
|
|
119
|
+
name=name,
|
|
120
|
+
pos_x=pos_x,
|
|
121
|
+
pos_y=pos_y,
|
|
122
|
+
schema_callback=schema_callback,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
def update_node(self,
|
|
126
|
+
function: Callable,
|
|
127
|
+
input_columns: List[str] = None,
|
|
128
|
+
output_schema: List[FlowfileColumn] = None,
|
|
129
|
+
drop_columns: List[str] = None,
|
|
130
|
+
name: str = None,
|
|
131
|
+
setting_input: Any = None,
|
|
132
|
+
pos_x: float = 0,
|
|
133
|
+
pos_y: float = 0,
|
|
134
|
+
schema_callback: Callable = None,
|
|
135
|
+
):
|
|
136
|
+
|
|
137
|
+
self.schema_callback = schema_callback
|
|
138
|
+
self.node_information.y_position = pos_y
|
|
139
|
+
self.node_information.x_position = pos_x
|
|
140
|
+
self.node_information.setting_input = setting_input
|
|
141
|
+
self.name = self.node_type if name is None else name
|
|
142
|
+
self._function = function
|
|
143
|
+
self.node_schema.input_columns = [] if input_columns is None else input_columns
|
|
144
|
+
self.node_schema.output_columns = [] if output_schema is None else output_schema
|
|
145
|
+
self.node_schema.drop_columns = [] if drop_columns is None else drop_columns
|
|
146
|
+
self.node_settings.renew_schema = True
|
|
147
|
+
if hasattr(setting_input, 'cache_results'):
|
|
148
|
+
self.node_settings.cache_results = setting_input.cache_results
|
|
149
|
+
self.setting_input = setting_input
|
|
150
|
+
self.results.errors = None
|
|
151
|
+
self.add_lead_to_in_depend_source()
|
|
152
|
+
_ = self.hash
|
|
153
|
+
self.node_template = node_interface.node_dict.get(self.node_type)
|
|
154
|
+
if self.node_template is None:
|
|
155
|
+
raise Exception(f'Node template {self.node_type} not found')
|
|
156
|
+
self.node_default = node_interface.node_defaults.get(self.node_type)
|
|
157
|
+
|
|
158
|
+
@property
|
|
159
|
+
def name(self):
|
|
160
|
+
return self._name
|
|
161
|
+
|
|
162
|
+
@name.setter
|
|
163
|
+
def name(self, name: str):
|
|
164
|
+
self._name = name
|
|
165
|
+
self.__name__ = name
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def setting_input(self):
|
|
169
|
+
return self._setting_input
|
|
170
|
+
|
|
171
|
+
@setting_input.setter
|
|
172
|
+
def setting_input(self, setting_input: Any):
|
|
173
|
+
self._setting_input = setting_input
|
|
174
|
+
self.set_node_information()
|
|
175
|
+
if self.node_type == 'manual_input' and isinstance(self._setting_input, input_schema.NodeManualInput):
|
|
176
|
+
if self.hash != self.calculate_hash(setting_input) or not self.node_stats.has_run:
|
|
177
|
+
self.function = self.function.__class__(setting_input.raw_data)
|
|
178
|
+
self.reset()
|
|
179
|
+
self.get_predicted_schema()
|
|
180
|
+
elif self._setting_input is not None:
|
|
181
|
+
self.reset()
|
|
182
|
+
|
|
183
|
+
@property
|
|
184
|
+
def node_id(self):
|
|
185
|
+
return self.node_information.id
|
|
186
|
+
|
|
187
|
+
@property
|
|
188
|
+
def left_input(self):
|
|
189
|
+
return self.node_inputs.left_input
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def right_input(self):
|
|
193
|
+
return self.node_inputs.right_input
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def main_input(self) -> List["FlowNode"]:
|
|
197
|
+
return self.node_inputs.main_inputs
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def is_correct(self):
|
|
201
|
+
# Check if inputs meet requirements
|
|
202
|
+
if isinstance(self.setting_input, input_schema.NodePromise):
|
|
203
|
+
return False
|
|
204
|
+
return (self.node_template.input == len(self.node_inputs.get_all_inputs()) or
|
|
205
|
+
(self.node_template.multi and len(self.node_inputs.get_all_inputs()) > 0) or
|
|
206
|
+
(self.node_template.multi and self.node_template.can_be_start))
|
|
207
|
+
|
|
208
|
+
def set_node_information(self):
|
|
209
|
+
logger.info('setting node information')
|
|
210
|
+
node_information = self.node_information
|
|
211
|
+
node_information.left_input_id = self.node_inputs.left_input.node_id if self.left_input else None
|
|
212
|
+
node_information.right_input_id = self.node_inputs.right_input.node_id if self.right_input else None
|
|
213
|
+
node_information.input_ids = [mi.node_id for mi in
|
|
214
|
+
self.node_inputs.main_inputs] if self.node_inputs.main_inputs is not None else None
|
|
215
|
+
node_information.setting_input = self.setting_input
|
|
216
|
+
node_information.outputs = [n.node_id for n in self.leads_to_nodes]
|
|
217
|
+
node_information.is_setup = self.is_setup
|
|
218
|
+
node_information.x_position = self.setting_input.pos_x
|
|
219
|
+
node_information.y_position = self.setting_input.pos_y
|
|
220
|
+
node_information.type = self.node_type
|
|
221
|
+
|
|
222
|
+
def get_node_information(self) -> schemas.NodeInformation:
|
|
223
|
+
self.set_node_information()
|
|
224
|
+
return self.node_information
|
|
225
|
+
|
|
226
|
+
@property
|
|
227
|
+
def function(self):
|
|
228
|
+
return self._function
|
|
229
|
+
|
|
230
|
+
def reset_hash(self) -> bool:
|
|
231
|
+
old_hash = self._hash
|
|
232
|
+
self._hash = None
|
|
233
|
+
if self.hash != old_hash:
|
|
234
|
+
if self.node_settings.cache_results:
|
|
235
|
+
self.remove_cache()
|
|
236
|
+
return True
|
|
237
|
+
return False
|
|
238
|
+
|
|
239
|
+
@property
|
|
240
|
+
def all_inputs(self) -> List["FlowNode"]:
|
|
241
|
+
return self.node_inputs.get_all_inputs()
|
|
242
|
+
|
|
243
|
+
def calculate_hash(self, setting_input: Any):
|
|
244
|
+
depends_on_hashes = [_node.hash for _node in self.all_inputs]
|
|
245
|
+
node_data_hash = get_hash(setting_input)
|
|
246
|
+
return get_hash(depends_on_hashes + [node_data_hash, self.parent_uuid])
|
|
247
|
+
|
|
248
|
+
@property
|
|
249
|
+
def hash(self):
|
|
250
|
+
if not self._hash:
|
|
251
|
+
self._hash = self.calculate_hash(self.setting_input)
|
|
252
|
+
return self._hash
|
|
253
|
+
|
|
254
|
+
@function.setter
|
|
255
|
+
def function(self, function: Callable):
|
|
256
|
+
self._function = function
|
|
257
|
+
# self.reset()
|
|
258
|
+
|
|
259
|
+
def add_node_connection(self, from_node: "FlowNode", insert_type: Literal['main', 'left', 'right'] = 'main'):
|
|
260
|
+
from_node.leads_to_nodes.append(self)
|
|
261
|
+
if insert_type == 'main':
|
|
262
|
+
if self.node_template.input <= 2 or self.node_inputs.main_inputs is None:
|
|
263
|
+
self.node_inputs.main_inputs = [from_node]
|
|
264
|
+
else:
|
|
265
|
+
self.node_inputs.main_inputs.append(from_node)
|
|
266
|
+
elif insert_type == 'right':
|
|
267
|
+
self.node_inputs.right_input = from_node
|
|
268
|
+
elif insert_type == 'left':
|
|
269
|
+
self.node_inputs.left_input = from_node
|
|
270
|
+
else:
|
|
271
|
+
raise Exception('Cannot find the connection')
|
|
272
|
+
if self.setting_input.is_setup:
|
|
273
|
+
if hasattr(self.setting_input, 'depending_on_id') and insert_type == 'main':
|
|
274
|
+
self.setting_input.depending_on_id = from_node.node_id
|
|
275
|
+
self.reset()
|
|
276
|
+
from_node.reset()
|
|
277
|
+
|
|
278
|
+
def evaluate_nodes(self, deep: bool = False):
|
|
279
|
+
for node in self.leads_to_nodes:
|
|
280
|
+
self.print(f'resetting node: {node.node_id}')
|
|
281
|
+
node.reset(deep)
|
|
282
|
+
|
|
283
|
+
def get_flow_file_column_schema(self, col_name: str) -> FlowfileColumn:
|
|
284
|
+
for s in self.schema:
|
|
285
|
+
if s.column_name == col_name:
|
|
286
|
+
return s
|
|
287
|
+
|
|
288
|
+
def get_predicted_schema(self, force: bool = False):
|
|
289
|
+
"""
|
|
290
|
+
Method to get a predicted schema based on the columns that are dropped and added
|
|
291
|
+
:return:
|
|
292
|
+
"""
|
|
293
|
+
if self.node_schema.predicted_schema is not None and not force:
|
|
294
|
+
return self.node_schema.predicted_schema
|
|
295
|
+
if self.schema_callback is not None and (self.node_schema.predicted_schema is None or force):
|
|
296
|
+
self.print('Getting the data from a schema callback')
|
|
297
|
+
if force:
|
|
298
|
+
# Force the schema callback to reset, so that it will be executed again
|
|
299
|
+
self.schema_callback.reset()
|
|
300
|
+
schema = self.schema_callback()
|
|
301
|
+
if schema is not None:
|
|
302
|
+
self.print('Calculating the schema based on the schema callback')
|
|
303
|
+
self.node_schema.predicted_schema = schema
|
|
304
|
+
return self.node_schema.predicted_schema
|
|
305
|
+
predicted_data = self._predicted_data_getter()
|
|
306
|
+
if predicted_data is not None and predicted_data.schema is not None:
|
|
307
|
+
self.print('Calculating the schema based on the predicted resulting data')
|
|
308
|
+
self.node_schema.predicted_schema = self._predicted_data_getter().schema
|
|
309
|
+
return self.node_schema.predicted_schema
|
|
310
|
+
|
|
311
|
+
@property
|
|
312
|
+
def is_setup(self) -> bool:
|
|
313
|
+
if not self.node_information.is_setup:
|
|
314
|
+
if self.function.__name__ != 'placeholder':
|
|
315
|
+
self.node_information.is_setup = True
|
|
316
|
+
self.setting_input.is_setup = True
|
|
317
|
+
return self.node_information.is_setup
|
|
318
|
+
|
|
319
|
+
def print(self, v: Any):
|
|
320
|
+
logger.info(f'{self.node_type}, node_id: {self.node_id}: {v}')
|
|
321
|
+
|
|
322
|
+
def get_resulting_data(self) -> FlowDataEngine:
|
|
323
|
+
if self.is_setup:
|
|
324
|
+
if self.results.resulting_data is None and self.results.errors is None:
|
|
325
|
+
self.print('getting resulting data')
|
|
326
|
+
try:
|
|
327
|
+
if isinstance(self.function, FlowDataEngine):
|
|
328
|
+
fl: FlowDataEngine = self.function
|
|
329
|
+
elif self.node_type in ('external_source', 'airbyte_reader'):
|
|
330
|
+
fl: FlowDataEngine = self.function()
|
|
331
|
+
fl.collect_external()
|
|
332
|
+
self.node_settings.streamable = False
|
|
333
|
+
else:
|
|
334
|
+
try:
|
|
335
|
+
fl = self._function(*[v.get_resulting_data() for v in self.all_inputs])
|
|
336
|
+
except Exception as e:
|
|
337
|
+
raise e
|
|
338
|
+
fl.set_streamable(self.node_settings.streamable)
|
|
339
|
+
self.results.resulting_data = fl
|
|
340
|
+
self.node_schema.result_schema = fl.schema
|
|
341
|
+
except Exception as e:
|
|
342
|
+
self.results.resulting_data = FlowDataEngine()
|
|
343
|
+
self.results.errors = str(e)
|
|
344
|
+
self.node_stats.has_run = False
|
|
345
|
+
raise e
|
|
346
|
+
return self.results.resulting_data
|
|
347
|
+
|
|
348
|
+
def _predicted_data_getter(self) -> FlowDataEngine|None:
|
|
349
|
+
try:
|
|
350
|
+
fl = self._function(*[v.get_predicted_resulting_data() for v in self.all_inputs])
|
|
351
|
+
return fl
|
|
352
|
+
except ValueError as e:
|
|
353
|
+
if str(e) == "generator already executing":
|
|
354
|
+
logger.info('Generator already executing, waiting for the result')
|
|
355
|
+
sleep(1)
|
|
356
|
+
return self._predicted_data_getter()
|
|
357
|
+
fl = FlowDataEngine()
|
|
358
|
+
return fl
|
|
359
|
+
|
|
360
|
+
except Exception as e:
|
|
361
|
+
logger.warning('there was an issue with the function, returning an empty Flowfile')
|
|
362
|
+
logger.warning(e)
|
|
363
|
+
|
|
364
|
+
def get_predicted_resulting_data(self) -> FlowDataEngine:
|
|
365
|
+
if self.needs_run(False) and self.schema_callback is not None or self.node_schema.result_schema is not None:
|
|
366
|
+
self.print('Getting data based on the schema')
|
|
367
|
+
_s = self.schema_callback() if self.node_schema.result_schema is None else self.node_schema.result_schema
|
|
368
|
+
return FlowDataEngine.create_from_schema(_s)
|
|
369
|
+
else:
|
|
370
|
+
if isinstance(self.function, FlowDataEngine):
|
|
371
|
+
fl = self.function
|
|
372
|
+
else:
|
|
373
|
+
fl = FlowDataEngine.create_from_schema(self.get_predicted_schema())
|
|
374
|
+
return fl
|
|
375
|
+
|
|
376
|
+
def add_lead_to_in_depend_source(self):
|
|
377
|
+
for input_node in self.all_inputs:
|
|
378
|
+
if self.node_id not in [n.node_id for n in input_node.leads_to_nodes]:
|
|
379
|
+
input_node.leads_to_nodes.append(self)
|
|
380
|
+
|
|
381
|
+
def get_all_dependent_nodes(self) -> Generator["FlowNode", None, None]:
|
|
382
|
+
for node in self.leads_to_nodes:
|
|
383
|
+
yield node
|
|
384
|
+
for n in node.get_all_dependent_nodes():
|
|
385
|
+
yield n
|
|
386
|
+
|
|
387
|
+
def get_all_dependent_node_ids(self) -> Generator[int, None, None]:
|
|
388
|
+
for node in self.leads_to_nodes:
|
|
389
|
+
yield node.node_id
|
|
390
|
+
for n in node.get_all_dependent_node_ids():
|
|
391
|
+
yield n
|
|
392
|
+
|
|
393
|
+
@property
|
|
394
|
+
def schema(self) -> List[FlowfileColumn]:
|
|
395
|
+
try:
|
|
396
|
+
if self.is_setup and self.results.errors is None:
|
|
397
|
+
if self.node_schema.result_schema is not None and len(self.node_schema.result_schema) > 0:
|
|
398
|
+
return self.node_schema.result_schema
|
|
399
|
+
elif self.node_type == 'output':
|
|
400
|
+
if len(self.node_inputs.main_inputs) > 0:
|
|
401
|
+
self.node_schema.result_schema = self.node_inputs.main_inputs[0].schema
|
|
402
|
+
else:
|
|
403
|
+
self.node_schema.result_schema = self.get_predicted_schema()
|
|
404
|
+
return self.node_schema.result_schema
|
|
405
|
+
else:
|
|
406
|
+
return []
|
|
407
|
+
except:
|
|
408
|
+
return []
|
|
409
|
+
|
|
410
|
+
def load_from_cache(self) -> FlowDataEngine:
|
|
411
|
+
if results_exists(self.hash):
|
|
412
|
+
try:
|
|
413
|
+
return FlowDataEngine(self._fetch_cached_df.get_result())
|
|
414
|
+
except Exception as e:
|
|
415
|
+
logger.error(e)
|
|
416
|
+
|
|
417
|
+
def remove_cache(self):
|
|
418
|
+
if results_exists(self.hash):
|
|
419
|
+
logger.warning('Not implemented')
|
|
420
|
+
|
|
421
|
+
def needs_run(self, performance_mode: bool, node_logger: NodeLogger = None,
|
|
422
|
+
execution_location: schemas.ExecutionLocationsLiteral = "auto") -> bool:
|
|
423
|
+
if execution_location == "local":
|
|
424
|
+
return False
|
|
425
|
+
flow_logger = logger if node_logger is None else node_logger
|
|
426
|
+
cache_result_exists = results_exists(self.hash)
|
|
427
|
+
if not self.node_stats.has_run:
|
|
428
|
+
flow_logger.info('Node has not run, needs to run')
|
|
429
|
+
return True
|
|
430
|
+
if self.node_settings.cache_results and cache_result_exists:
|
|
431
|
+
|
|
432
|
+
return False
|
|
433
|
+
elif self.node_settings.cache_results and not cache_result_exists:
|
|
434
|
+
return True
|
|
435
|
+
elif not performance_mode and cache_result_exists:
|
|
436
|
+
return False
|
|
437
|
+
else:
|
|
438
|
+
return True
|
|
439
|
+
|
|
440
|
+
def __call__(self, *args, **kwargs):
|
|
441
|
+
self.execute_node(*args, **kwargs)
|
|
442
|
+
|
|
443
|
+
def execute_local(self, flow_id: int, performance_mode: bool = False):
|
|
444
|
+
try:
|
|
445
|
+
resulting_data = self.get_resulting_data()
|
|
446
|
+
if not performance_mode:
|
|
447
|
+
external_sampler = ExternalSampler(lf=resulting_data.data_frame, file_ref=self.hash,
|
|
448
|
+
wait_on_completion=True, node_id=self.node_id, flow_id=flow_id)
|
|
449
|
+
self.store_example_data_generator(external_sampler)
|
|
450
|
+
if self.results.errors is None and not self.node_stats.is_canceled:
|
|
451
|
+
self.node_stats.has_run = True
|
|
452
|
+
self.node_schema.result_schema = resulting_data.schema
|
|
453
|
+
|
|
454
|
+
except Exception as e:
|
|
455
|
+
logger.warning(f"Error with step {self.__name__}")
|
|
456
|
+
logger.error(str(e))
|
|
457
|
+
self.results.errors = str(e)
|
|
458
|
+
self.node_stats.has_run = False
|
|
459
|
+
raise e
|
|
460
|
+
|
|
461
|
+
if self.node_stats.has_run:
|
|
462
|
+
for step in self.leads_to_nodes:
|
|
463
|
+
if not self.node_settings.streamable:
|
|
464
|
+
step.node_settings.streamable = self.node_settings.streamable
|
|
465
|
+
|
|
466
|
+
def execute_remote(self, performance_mode: bool = False, node_logger: NodeLogger = None):
|
|
467
|
+
# flow_logger = logger if flow_logger is None else flow_logger
|
|
468
|
+
if node_logger is None:
|
|
469
|
+
raise Exception('Node logger is not defined')
|
|
470
|
+
if self.node_settings.cache_results and results_exists(self.hash):
|
|
471
|
+
try:
|
|
472
|
+
self.results.resulting_data = get_external_df_result(self.hash)
|
|
473
|
+
self._cache_progress = None
|
|
474
|
+
return
|
|
475
|
+
except Exception as e:
|
|
476
|
+
node_logger.warning('Failed to read the cache, rerunning the code')
|
|
477
|
+
if self.node_type == 'output':
|
|
478
|
+
self.results.resulting_data = self.get_resulting_data()
|
|
479
|
+
self.node_stats.has_run = True
|
|
480
|
+
return
|
|
481
|
+
try:
|
|
482
|
+
self.get_resulting_data()
|
|
483
|
+
except Exception as e:
|
|
484
|
+
self.results.errors = 'Error with creating the lazy frame, most likely due to invalid graph'
|
|
485
|
+
raise e
|
|
486
|
+
if not performance_mode:
|
|
487
|
+
external_df_fetcher = ExternalDfFetcher(lf=self.get_resulting_data().data_frame,
|
|
488
|
+
file_ref=self.hash, wait_on_completion=False,
|
|
489
|
+
flow_id=node_logger.flow_id,
|
|
490
|
+
node_id=self.node_id)
|
|
491
|
+
self._fetch_cached_df = external_df_fetcher
|
|
492
|
+
try:
|
|
493
|
+
lf = external_df_fetcher.get_result()
|
|
494
|
+
self.results.resulting_data = FlowDataEngine(
|
|
495
|
+
lf, number_of_records=ExternalDfFetcher(lf=lf, operation_type='calculate_number_of_records',
|
|
496
|
+
flow_id=node_logger.flow_id, node_id=self.node_id).result
|
|
497
|
+
)
|
|
498
|
+
if not performance_mode:
|
|
499
|
+
self.store_example_data_generator(external_df_fetcher)
|
|
500
|
+
self.node_stats.has_run = True
|
|
501
|
+
|
|
502
|
+
except Exception as e:
|
|
503
|
+
node_logger.error('Error with external process')
|
|
504
|
+
if external_df_fetcher.error_code == -1:
|
|
505
|
+
try:
|
|
506
|
+
self.results.resulting_data = self.get_resulting_data()
|
|
507
|
+
self.results.warnings = ('Error with external process (unknown error), '
|
|
508
|
+
'likely the process was killed by the server because of memory constraints, '
|
|
509
|
+
'continue with the process. '
|
|
510
|
+
'We cannot display example data...')
|
|
511
|
+
except Exception as e:
|
|
512
|
+
self.results.errors = str(e)
|
|
513
|
+
raise e
|
|
514
|
+
elif external_df_fetcher.error_description is None:
|
|
515
|
+
self.results.errors = str(e)
|
|
516
|
+
raise e
|
|
517
|
+
else:
|
|
518
|
+
self.results.errors = external_df_fetcher.error_description
|
|
519
|
+
raise Exception(external_df_fetcher.error_description)
|
|
520
|
+
finally:
|
|
521
|
+
self._fetch_cached_df = None
|
|
522
|
+
|
|
523
|
+
def prepare_before_run(self):
|
|
524
|
+
self.results.errors = None
|
|
525
|
+
self.results.resulting_data = None
|
|
526
|
+
self.results.example_data = None
|
|
527
|
+
|
|
528
|
+
def cancel(self):
|
|
529
|
+
if self._fetch_cached_df is not None:
|
|
530
|
+
self._fetch_cached_df.cancel()
|
|
531
|
+
self.node_stats.is_canceled = True
|
|
532
|
+
else:
|
|
533
|
+
logger.warning('No external process to cancel')
|
|
534
|
+
self.node_stats.is_canceled = True
|
|
535
|
+
|
|
536
|
+
def execute_node(self, run_location: schemas.ExecutionLocationsLiteral, reset_cache: bool = False,
|
|
537
|
+
performance_mode: bool = False, retry: bool = True, node_logger: NodeLogger = None):
|
|
538
|
+
if node_logger is None:
|
|
539
|
+
raise Exception('Flow logger is not defined')
|
|
540
|
+
# node_logger = flow_logger.get_node_logger(self.node_id)
|
|
541
|
+
if reset_cache:
|
|
542
|
+
self.remove_cache()
|
|
543
|
+
self.node_stats.has_run = False
|
|
544
|
+
if self.is_setup:
|
|
545
|
+
node_logger.info(f'Starting to run {self.__name__}')
|
|
546
|
+
if self.needs_run(performance_mode, node_logger, run_location):
|
|
547
|
+
self.prepare_before_run()
|
|
548
|
+
try:
|
|
549
|
+
if ((run_location == 'remote' or (self.node_default.transform_type == 'wide')
|
|
550
|
+
and not run_location == 'local')) or self.node_settings.cache_results:
|
|
551
|
+
node_logger.info('Running the node remotely')
|
|
552
|
+
if self.node_settings.cache_results:
|
|
553
|
+
performance_mode = False
|
|
554
|
+
self.execute_remote(performance_mode=(performance_mode if not self.node_settings.cache_results
|
|
555
|
+
else False),
|
|
556
|
+
node_logger=node_logger
|
|
557
|
+
)
|
|
558
|
+
else:
|
|
559
|
+
node_logger.info('Running the node locally')
|
|
560
|
+
self.execute_local(performance_mode=performance_mode, flow_id=node_logger.flow_id)
|
|
561
|
+
except Exception as e:
|
|
562
|
+
if 'No such file or directory (os error' in str(e) and retry:
|
|
563
|
+
logger.warning('Error with the input node, starting to rerun the input node...')
|
|
564
|
+
all_inputs: List[FlowNode] = self.node_inputs.get_all_inputs()
|
|
565
|
+
for node_input in all_inputs:
|
|
566
|
+
node_input.execute_node(run_location=run_location,
|
|
567
|
+
performance_mode=performance_mode, retry=True,
|
|
568
|
+
reset_cache=True,
|
|
569
|
+
node_logger=node_logger)
|
|
570
|
+
self.execute_node(run_location=run_location,
|
|
571
|
+
performance_mode=performance_mode, retry=False,
|
|
572
|
+
node_logger=node_logger)
|
|
573
|
+
else:
|
|
574
|
+
self.node_stats.has_run = False
|
|
575
|
+
self.results.errors = str(e)
|
|
576
|
+
node_logger.error(f'Error with running the node: {e}')
|
|
577
|
+
|
|
578
|
+
else:
|
|
579
|
+
node_logger.info('Node has already run, not running the node')
|
|
580
|
+
else:
|
|
581
|
+
node_logger.warning(f'Node {self.__name__} is not setup, cannot run the node')
|
|
582
|
+
|
|
583
|
+
def store_example_data_generator(self, external_df_fetcher: ExternalDfFetcher | ExternalSampler):
|
|
584
|
+
if external_df_fetcher.status is not None:
|
|
585
|
+
file_ref = external_df_fetcher.status.file_ref
|
|
586
|
+
self.results.example_data_path = file_ref
|
|
587
|
+
self.results.example_data_generator = get_read_top_n(file_path=file_ref, n=100)
|
|
588
|
+
else:
|
|
589
|
+
logger.error('Could not get the sample data, the external process is not ready')
|
|
590
|
+
|
|
591
|
+
def needs_reset(self) -> bool:
|
|
592
|
+
return self._hash != self.calculate_hash(self.setting_input)
|
|
593
|
+
|
|
594
|
+
def reset(self, deep: bool = False):
|
|
595
|
+
needs_reset = self.needs_reset() or deep
|
|
596
|
+
if needs_reset:
|
|
597
|
+
logger.info(f'{self.node_id}: Node needs reset')
|
|
598
|
+
self.node_stats.has_run = False
|
|
599
|
+
self.results.reset()
|
|
600
|
+
if self.schema_callback:
|
|
601
|
+
self.schema_callback.reset()
|
|
602
|
+
self.node_schema.result_schema = None
|
|
603
|
+
self.node_schema.predicted_schema = None
|
|
604
|
+
self._hash = None
|
|
605
|
+
self.node_information.is_setup = None
|
|
606
|
+
self.evaluate_nodes()
|
|
607
|
+
|
|
608
|
+
def delete_lead_to_node(self, node_id: int) -> bool:
|
|
609
|
+
logger.info(f'Deleting lead to node: {node_id}')
|
|
610
|
+
for i, lead_to_node in enumerate(self.leads_to_nodes):
|
|
611
|
+
logger.info(f'Checking lead to node: {lead_to_node.node_id}')
|
|
612
|
+
if lead_to_node.node_id == node_id:
|
|
613
|
+
logger.info(f'Found the node to delete: {node_id}')
|
|
614
|
+
self.leads_to_nodes.pop(i)
|
|
615
|
+
return True
|
|
616
|
+
return False
|
|
617
|
+
|
|
618
|
+
def delete_input_node(self, node_id: int, connection_type: input_schema.InputConnectionClass = 'input-0',
|
|
619
|
+
complete: bool = False) -> bool:
|
|
620
|
+
# connection type must be in right, left or main
|
|
621
|
+
deleted: bool = False
|
|
622
|
+
if connection_type == 'input-0':
|
|
623
|
+
for i, node in enumerate(self.node_inputs.main_inputs):
|
|
624
|
+
if node.node_id == node_id:
|
|
625
|
+
self.node_inputs.main_inputs.pop(i)
|
|
626
|
+
deleted = True
|
|
627
|
+
if not complete:
|
|
628
|
+
continue
|
|
629
|
+
elif connection_type == 'input-1' or complete:
|
|
630
|
+
if self.node_inputs.right_input is not None and self.node_inputs.right_input.node_id == node_id:
|
|
631
|
+
self.node_inputs.right_input = None
|
|
632
|
+
deleted = True
|
|
633
|
+
elif connection_type == 'input-2' or complete:
|
|
634
|
+
if self.node_inputs.left_input is not None and self.node_inputs.right_input.node_id == node_id:
|
|
635
|
+
self.node_inputs.left_input = None
|
|
636
|
+
deleted = True
|
|
637
|
+
else:
|
|
638
|
+
logger.warning('Could not find the connection to delete...')
|
|
639
|
+
if deleted:
|
|
640
|
+
self.reset()
|
|
641
|
+
return deleted
|
|
642
|
+
|
|
643
|
+
def __repr__(self):
|
|
644
|
+
return f"Node id: {self.node_id} ({self.node_type})"
|
|
645
|
+
|
|
646
|
+
def _get_readable_schema(self):
|
|
647
|
+
if self.is_setup:
|
|
648
|
+
output = []
|
|
649
|
+
for s in self.schema:
|
|
650
|
+
output.append(dict(column_name=s.column_name, data_type=s.data_type))
|
|
651
|
+
return output
|
|
652
|
+
|
|
653
|
+
def get_repr(self):
|
|
654
|
+
return dict(FlowNode=
|
|
655
|
+
dict(node_id=self.node_id,
|
|
656
|
+
step_name=self.__name__,
|
|
657
|
+
output_columns=self.node_schema.output_columns,
|
|
658
|
+
output_schema=self._get_readable_schema()))
|
|
659
|
+
|
|
660
|
+
@property
|
|
661
|
+
def number_of_leads_to_nodes(self) -> int:
|
|
662
|
+
if self.is_setup:
|
|
663
|
+
return len(self.leads_to_nodes)
|
|
664
|
+
|
|
665
|
+
@property
|
|
666
|
+
def has_next_step(self) -> bool:
|
|
667
|
+
return len(self.leads_to_nodes) > 0
|
|
668
|
+
|
|
669
|
+
@property
|
|
670
|
+
def has_input(self) -> bool:
|
|
671
|
+
return len(self.all_inputs) > 0
|
|
672
|
+
|
|
673
|
+
@property
|
|
674
|
+
def singular_input(self) -> bool:
|
|
675
|
+
return self.node_template.input == 1
|
|
676
|
+
|
|
677
|
+
@property
|
|
678
|
+
def singular_main_input(self) -> "FlowNode":
|
|
679
|
+
if self.singular_input:
|
|
680
|
+
return self.all_inputs[0]
|
|
681
|
+
|
|
682
|
+
def get_table_example(self, include_data: bool = False) -> TableExample | None:
|
|
683
|
+
self.print('Getting a table example')
|
|
684
|
+
if self.node_type == 'output':
|
|
685
|
+
self.print('getting the table example')
|
|
686
|
+
return self.main_input[0].get_table_example(include_data)
|
|
687
|
+
if self.is_setup and include_data:
|
|
688
|
+
logger.info('getting the table example since the node has run')
|
|
689
|
+
example_data_getter = self.results.example_data_generator
|
|
690
|
+
if example_data_getter is not None:
|
|
691
|
+
data = example_data_getter().to_pylist()
|
|
692
|
+
if data is None:
|
|
693
|
+
data = []
|
|
694
|
+
else:
|
|
695
|
+
data = []
|
|
696
|
+
schema = [FileColumn.model_validate(c.get_column_repr()) for c in self.schema]
|
|
697
|
+
fl = self.get_resulting_data()
|
|
698
|
+
return TableExample(node_id=self.node_id,
|
|
699
|
+
name=str(self.node_id), number_of_records=999,
|
|
700
|
+
number_of_columns=fl.number_of_fields,
|
|
701
|
+
table_schema=schema, columns=fl.columns, data=data)
|
|
702
|
+
else:
|
|
703
|
+
logger.warning('getting the table example but the node has not run')
|
|
704
|
+
try:
|
|
705
|
+
schema = [FileColumn.model_validate(c.get_column_repr()) for c in self.schema]
|
|
706
|
+
except Exception as e:
|
|
707
|
+
logger.warning(e)
|
|
708
|
+
schema = []
|
|
709
|
+
columns = [s.name for s in schema]
|
|
710
|
+
return TableExample(node_id=self.node_id,
|
|
711
|
+
name=str(self.node_id), number_of_records=0,
|
|
712
|
+
number_of_columns=len(columns),
|
|
713
|
+
table_schema=schema, columns=columns,
|
|
714
|
+
data=[])
|
|
715
|
+
|
|
716
|
+
def calculate_settings_out_select(self):
|
|
717
|
+
pass
|
|
718
|
+
|
|
719
|
+
def get_node_data(self, flow_id: int, include_example: bool = False) -> NodeData:
|
|
720
|
+
node = NodeData(flow_id=flow_id,
|
|
721
|
+
node_id=self.node_id,
|
|
722
|
+
has_run=self.node_stats.has_run,
|
|
723
|
+
setting_input=self.setting_input,
|
|
724
|
+
flow_type=self.node_type)
|
|
725
|
+
if self.main_input:
|
|
726
|
+
node.main_input = self.main_input[0].get_table_example()
|
|
727
|
+
if self.left_input:
|
|
728
|
+
node.left_input = self.left_input.get_table_example()
|
|
729
|
+
if self.right_input:
|
|
730
|
+
node.right_input = self.right_input.get_table_example()
|
|
731
|
+
if self.is_setup:
|
|
732
|
+
node.main_output = self.get_table_example(include_example)
|
|
733
|
+
node = setting_generator.get_setting_generator(self.node_type)(node)
|
|
734
|
+
|
|
735
|
+
node = setting_updator.get_setting_updator(self.node_type)(node)
|
|
736
|
+
return node
|
|
737
|
+
|
|
738
|
+
def get_output_data(self) -> TableExample:
|
|
739
|
+
return self.get_table_example(True)
|
|
740
|
+
|
|
741
|
+
def get_node_input(self) -> schemas.NodeInput:
|
|
742
|
+
return schemas.NodeInput(pos_y=self.setting_input.pos_y,
|
|
743
|
+
pos_x=self.setting_input.pos_x,
|
|
744
|
+
id=self.node_id,
|
|
745
|
+
**self.node_template.__dict__)
|
|
746
|
+
|
|
747
|
+
def get_edge_input(self) -> List[schemas.NodeEdge]:
|
|
748
|
+
edges = []
|
|
749
|
+
if self.node_inputs.main_inputs is not None:
|
|
750
|
+
for i, main_input in enumerate(self.node_inputs.main_inputs):
|
|
751
|
+
edges.append(schemas.NodeEdge(id=f'{main_input.node_id}-{self.node_id}-{i}',
|
|
752
|
+
source=main_input.node_id,
|
|
753
|
+
target=self.node_id,
|
|
754
|
+
sourceHandle='output-0',
|
|
755
|
+
targetHandle='input-0',
|
|
756
|
+
))
|
|
757
|
+
if self.node_inputs.left_input is not None:
|
|
758
|
+
edges.append(schemas.NodeEdge(id=f'{self.node_inputs.left_input.node_id}-{self.node_id}-right',
|
|
759
|
+
source=self.node_inputs.left_input.node_id,
|
|
760
|
+
target=self.node_id,
|
|
761
|
+
sourceHandle='output-0',
|
|
762
|
+
targetHandle='input-2',
|
|
763
|
+
))
|
|
764
|
+
if self.node_inputs.right_input is not None:
|
|
765
|
+
edges.append(schemas.NodeEdge(id=f'{self.node_inputs.right_input.node_id}-{self.node_id}-left',
|
|
766
|
+
source=self.node_inputs.right_input.node_id,
|
|
767
|
+
target=self.node_id,
|
|
768
|
+
sourceHandle='output-0',
|
|
769
|
+
targetHandle='input-1',
|
|
770
|
+
))
|
|
771
|
+
return edges
|