Flowfile 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- build_backends/__init__.py +0 -0
- build_backends/main.py +313 -0
- build_backends/main_prd.py +202 -0
- flowfile/__init__.py +71 -0
- flowfile/__main__.py +24 -0
- flowfile-0.2.2.dist-info/LICENSE +21 -0
- flowfile-0.2.2.dist-info/METADATA +225 -0
- flowfile-0.2.2.dist-info/RECORD +171 -0
- flowfile-0.2.2.dist-info/WHEEL +4 -0
- flowfile-0.2.2.dist-info/entry_points.txt +9 -0
- flowfile_core/__init__.py +13 -0
- flowfile_core/auth/__init__.py +0 -0
- flowfile_core/auth/jwt.py +140 -0
- flowfile_core/auth/models.py +40 -0
- flowfile_core/auth/secrets.py +178 -0
- flowfile_core/configs/__init__.py +35 -0
- flowfile_core/configs/flow_logger.py +433 -0
- flowfile_core/configs/node_store/__init__.py +0 -0
- flowfile_core/configs/node_store/nodes.py +98 -0
- flowfile_core/configs/settings.py +120 -0
- flowfile_core/database/__init__.py +0 -0
- flowfile_core/database/connection.py +51 -0
- flowfile_core/database/init_db.py +45 -0
- flowfile_core/database/models.py +41 -0
- flowfile_core/fileExplorer/__init__.py +0 -0
- flowfile_core/fileExplorer/funcs.py +259 -0
- flowfile_core/fileExplorer/utils.py +53 -0
- flowfile_core/flowfile/FlowfileFlow.py +1403 -0
- flowfile_core/flowfile/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
- flowfile_core/flowfile/analytics/__init__.py +0 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
- flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
- flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
- flowfile_core/flowfile/analytics/utils.py +9 -0
- flowfile_core/flowfile/connection_manager/__init__.py +3 -0
- flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
- flowfile_core/flowfile/connection_manager/models.py +10 -0
- flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
- flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
- flowfile_core/flowfile/database_connection_manager/models.py +15 -0
- flowfile_core/flowfile/extensions.py +36 -0
- flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
- flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
- flowfile_core/flowfile/flow_data_engine/types.py +0 -0
- flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
- flowfile_core/flowfile/flow_node/__init__.py +0 -0
- flowfile_core/flowfile/flow_node/flow_node.py +771 -0
- flowfile_core/flowfile/flow_node/models.py +111 -0
- flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
- flowfile_core/flowfile/handler.py +123 -0
- flowfile_core/flowfile/manage/__init__.py +0 -0
- flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
- flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
- flowfile_core/flowfile/manage/open_flowfile.py +136 -0
- flowfile_core/flowfile/setting_generator/__init__.py +2 -0
- flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
- flowfile_core/flowfile/setting_generator/settings.py +176 -0
- flowfile_core/flowfile/sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
- flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
- flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
- flowfile_core/flowfile/util/__init__.py +0 -0
- flowfile_core/flowfile/util/calculate_layout.py +137 -0
- flowfile_core/flowfile/util/execution_orderer.py +141 -0
- flowfile_core/flowfile/utils.py +106 -0
- flowfile_core/main.py +138 -0
- flowfile_core/routes/__init__.py +0 -0
- flowfile_core/routes/auth.py +34 -0
- flowfile_core/routes/logs.py +163 -0
- flowfile_core/routes/public.py +10 -0
- flowfile_core/routes/routes.py +601 -0
- flowfile_core/routes/secrets.py +85 -0
- flowfile_core/run_lock.py +11 -0
- flowfile_core/schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
- flowfile_core/schemas/defaults.py +9 -0
- flowfile_core/schemas/external_sources/__init__.py +0 -0
- flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
- flowfile_core/schemas/input_schema.py +477 -0
- flowfile_core/schemas/models.py +193 -0
- flowfile_core/schemas/output_model.py +115 -0
- flowfile_core/schemas/schemas.py +106 -0
- flowfile_core/schemas/transform_schema.py +569 -0
- flowfile_core/secrets/__init__.py +0 -0
- flowfile_core/secrets/secrets.py +64 -0
- flowfile_core/utils/__init__.py +0 -0
- flowfile_core/utils/arrow_reader.py +247 -0
- flowfile_core/utils/excel_file_manager.py +18 -0
- flowfile_core/utils/fileManager.py +45 -0
- flowfile_core/utils/fl_executor.py +38 -0
- flowfile_core/utils/utils.py +8 -0
- flowfile_frame/__init__.py +56 -0
- flowfile_frame/__main__.py +12 -0
- flowfile_frame/adapters.py +17 -0
- flowfile_frame/expr.py +1163 -0
- flowfile_frame/flow_frame.py +2093 -0
- flowfile_frame/group_frame.py +199 -0
- flowfile_frame/join.py +75 -0
- flowfile_frame/selectors.py +242 -0
- flowfile_frame/utils.py +184 -0
- flowfile_worker/__init__.py +55 -0
- flowfile_worker/configs.py +95 -0
- flowfile_worker/create/__init__.py +37 -0
- flowfile_worker/create/funcs.py +146 -0
- flowfile_worker/create/models.py +86 -0
- flowfile_worker/create/pl_types.py +35 -0
- flowfile_worker/create/read_excel_tables.py +110 -0
- flowfile_worker/create/utils.py +84 -0
- flowfile_worker/external_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
- flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
- flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- flowfile_worker/external_sources/sql_source/__init__.py +0 -0
- flowfile_worker/external_sources/sql_source/main.py +56 -0
- flowfile_worker/external_sources/sql_source/models.py +72 -0
- flowfile_worker/flow_logger.py +58 -0
- flowfile_worker/funcs.py +327 -0
- flowfile_worker/main.py +108 -0
- flowfile_worker/models.py +95 -0
- flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
- flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
- flowfile_worker/polars_fuzzy_match/models.py +36 -0
- flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
- flowfile_worker/polars_fuzzy_match/process.py +86 -0
- flowfile_worker/polars_fuzzy_match/utils.py +50 -0
- flowfile_worker/process_manager.py +36 -0
- flowfile_worker/routes.py +440 -0
- flowfile_worker/secrets.py +148 -0
- flowfile_worker/spawner.py +187 -0
- flowfile_worker/utils.py +25 -0
- test_utils/__init__.py +3 -0
- test_utils/postgres/__init__.py +1 -0
- test_utils/postgres/commands.py +109 -0
- test_utils/postgres/fixtures.py +417 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
|
|
2
|
+
import pyarrow as pa
|
|
3
|
+
from typing import List, Union, Callable, Optional, Literal
|
|
4
|
+
|
|
5
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
6
|
+
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
7
|
+
from flowfile_core.schemas import schemas
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class NodeStepPromise:
|
|
13
|
+
node_id: Union[str, int]
|
|
14
|
+
name: str
|
|
15
|
+
is_start: bool
|
|
16
|
+
leads_to_id: Optional[List[Union[str, int]]] = None
|
|
17
|
+
left_input: Optional[Union[str, int]] = None
|
|
18
|
+
right_input: Optional[Union[str, int]] = None
|
|
19
|
+
depends_on: Optional[List[Union[str, int]]] = None
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NodeStepStats:
|
|
23
|
+
error: str = None
|
|
24
|
+
has_run: bool = False
|
|
25
|
+
active: bool = True
|
|
26
|
+
is_canceled: bool = False
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class NodeStepSettings:
|
|
30
|
+
cache_results: bool = False
|
|
31
|
+
renew_schema: bool = True
|
|
32
|
+
streamable: bool = True
|
|
33
|
+
setup_errors: bool = False
|
|
34
|
+
breaking_setup_errors: bool = False
|
|
35
|
+
execute_location: schemas.ExecutionLocationsLiteral = 'auto'
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class NodeStepInputs:
|
|
39
|
+
left_input: "FlowNode" = None
|
|
40
|
+
right_input: "FlowNode" = None
|
|
41
|
+
main_inputs: List["FlowNode"] = None
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def input_ids(self) -> List[int]:
|
|
45
|
+
if self.main_inputs is not None:
|
|
46
|
+
return [node_input.node_information.id for node_input in self.get_all_inputs()]
|
|
47
|
+
|
|
48
|
+
def get_all_inputs(self) -> List["FlowNode"]:
|
|
49
|
+
main_inputs = self.main_inputs or []
|
|
50
|
+
return [v for v in main_inputs + [self.left_input, self.right_input] if v is not None]
|
|
51
|
+
|
|
52
|
+
def __repr__(self) -> str:
|
|
53
|
+
left_repr = f"Left Input: {self.left_input}" if self.left_input else "Left Input: None"
|
|
54
|
+
right_repr = f"Right Input: {self.right_input}" if self.right_input else "Right Input: None"
|
|
55
|
+
main_inputs_repr = f"Main Inputs: {self.main_inputs}" if self.main_inputs else "Main Inputs: None"
|
|
56
|
+
return f"{self.__class__.__name__}({left_repr}, {right_repr}, {main_inputs_repr})"
|
|
57
|
+
|
|
58
|
+
def validate_if_input_connection_exists(self, node_input_id: int,
|
|
59
|
+
connection_name: Literal['main', 'left', 'right']) -> bool:
|
|
60
|
+
if connection_name == 'main':
|
|
61
|
+
return any((node_input.node_information.id == node_input_id for node_input in self.main_inputs))
|
|
62
|
+
if connection_name == 'left':
|
|
63
|
+
return self.left_input.node_information.id == node_input_id
|
|
64
|
+
if connection_name == 'right':
|
|
65
|
+
return self.right_input.node_information.id == node_input_id
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class NodeSchemaInformation:
|
|
69
|
+
result_schema: Optional[List[FlowfileColumn]] = [] # resulting schema of the function
|
|
70
|
+
predicted_schema: Optional[List[FlowfileColumn]] = [] # predicted resulting schema of the function
|
|
71
|
+
input_columns: List[str] = [] # columns that are needed for the function
|
|
72
|
+
drop_columns: List[str] = [] # columns that will not be available after the function
|
|
73
|
+
output_columns: List[FlowfileColumn] = [] # columns that will be added with the function
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class NodeResults:
|
|
77
|
+
_resulting_data: Optional[FlowDataEngine] = None # after successful execution this will contain the Flowfile
|
|
78
|
+
example_data: Optional[
|
|
79
|
+
FlowDataEngine] = None # after success this will contain a sample of the data (to provide frontend data)
|
|
80
|
+
example_data_path: Optional[str] = None # Path to the arrow table file
|
|
81
|
+
example_data_generator: Optional[Callable[[], pa.Table]] = None
|
|
82
|
+
run_time: int = -1
|
|
83
|
+
errors: Optional[str] = None
|
|
84
|
+
warnings: Optional[str] = None
|
|
85
|
+
analysis_data_generator: Optional[Callable[[], pa.Table]] = None
|
|
86
|
+
|
|
87
|
+
def __init__(self):
|
|
88
|
+
self._resulting_data = None
|
|
89
|
+
self.example_data = None
|
|
90
|
+
self.run_time = -1
|
|
91
|
+
self.errors = None
|
|
92
|
+
self.warnings = None
|
|
93
|
+
self.example_data_generator = None
|
|
94
|
+
self.analysis_data_generator = None
|
|
95
|
+
|
|
96
|
+
def get_example_data(self) -> pa.Table | None:
|
|
97
|
+
if self.example_data_generator:
|
|
98
|
+
return self.example_data_generator()
|
|
99
|
+
|
|
100
|
+
@property
|
|
101
|
+
def resulting_data(self) -> FlowDataEngine:
|
|
102
|
+
return self._resulting_data
|
|
103
|
+
|
|
104
|
+
@resulting_data.setter
|
|
105
|
+
def resulting_data(self, d: FlowDataEngine):
|
|
106
|
+
self._resulting_data = d
|
|
107
|
+
|
|
108
|
+
def reset(self):
|
|
109
|
+
self._resulting_data = None
|
|
110
|
+
self.run_time = -1
|
|
111
|
+
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
|
|
2
|
+
from typing import Callable, Any, Optional, Generic, TypeVar
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor, Future
|
|
4
|
+
from flowfile_core.configs import logger
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
T = TypeVar('T')
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class SingleExecutionFuture(Generic[T]):
|
|
11
|
+
"""Single execution of a function in a separate thread with caching of the result."""
|
|
12
|
+
executor: ThreadPoolExecutor
|
|
13
|
+
future: Optional[Future[T]]
|
|
14
|
+
func: Callable[[], T]
|
|
15
|
+
on_error: Optional[Callable[[Exception], Any]]
|
|
16
|
+
result_value: Optional[T]
|
|
17
|
+
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
func: Callable[[], T],
|
|
21
|
+
on_error: Optional[Callable[[Exception], Any]] = None
|
|
22
|
+
) -> None:
|
|
23
|
+
"""Initialize with function and optional error handler."""
|
|
24
|
+
self.executor = ThreadPoolExecutor(max_workers=1)
|
|
25
|
+
self.future = None
|
|
26
|
+
self.func = func
|
|
27
|
+
self.on_error = on_error
|
|
28
|
+
self.result_value = None
|
|
29
|
+
|
|
30
|
+
def start(self) -> None:
|
|
31
|
+
"""Start the function execution if not already started."""
|
|
32
|
+
if not self.future:
|
|
33
|
+
logger.info("single executor function started")
|
|
34
|
+
self.future = self.executor.submit(self.func)
|
|
35
|
+
|
|
36
|
+
def cleanup(self) -> None:
|
|
37
|
+
"""Clean up resources by clearing the future and shutting down the executor."""
|
|
38
|
+
# if self.future:
|
|
39
|
+
# self.future = None
|
|
40
|
+
self.executor.shutdown(wait=False)
|
|
41
|
+
|
|
42
|
+
def __call__(self) -> Optional[T]:
|
|
43
|
+
"""Execute function if not running and return its result."""
|
|
44
|
+
if self.result_value:
|
|
45
|
+
return self.result_value
|
|
46
|
+
if not self.future:
|
|
47
|
+
self.start()
|
|
48
|
+
else:
|
|
49
|
+
logger.info("Function already running or did complete")
|
|
50
|
+
try:
|
|
51
|
+
self.result_value = self.future.result()
|
|
52
|
+
logger.info("Done with the function")
|
|
53
|
+
return self.result_value
|
|
54
|
+
except Exception as e:
|
|
55
|
+
if self.on_error:
|
|
56
|
+
return self.on_error(e)
|
|
57
|
+
else:
|
|
58
|
+
raise e
|
|
59
|
+
finally:
|
|
60
|
+
self.cleanup()
|
|
61
|
+
|
|
62
|
+
def reset(self):
|
|
63
|
+
"""Reset the future and result value."""
|
|
64
|
+
logger.info("Resetting the future and result value")
|
|
65
|
+
self.result_value = None
|
|
66
|
+
self.future = None
|
|
67
|
+
|
|
68
|
+
def __del__(self) -> None:
|
|
69
|
+
"""Ensure executor is shut down on deletion."""
|
|
70
|
+
self.cleanup()
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import random
|
|
3
|
+
import uuid
|
|
4
|
+
import socket
|
|
5
|
+
import hashlib
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Dict, List
|
|
9
|
+
import os
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from flowfile_core.flowfile.manage.open_flowfile import open_flow
|
|
13
|
+
from flowfile_core.flowfile.FlowfileFlow import FlowGraph
|
|
14
|
+
from flowfile_core.schemas.schemas import FlowSettings
|
|
15
|
+
from flowfile_core.configs import logger
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def create_unique_id() -> int:
|
|
19
|
+
"""
|
|
20
|
+
Create a unique id for the flowfile with increased randomness while maintaining 32-bit compatibility
|
|
21
|
+
Returns:
|
|
22
|
+
int: unique id within 32 bits (4 bytes)
|
|
23
|
+
"""
|
|
24
|
+
# Get various entropy sources
|
|
25
|
+
time_ms = int(time.time() * 1000)
|
|
26
|
+
pid = os.getpid()
|
|
27
|
+
random_bytes = random.getrandbits(32)
|
|
28
|
+
mac_addr = uuid.getnode() # MAC address as integer
|
|
29
|
+
hostname = socket.gethostname()
|
|
30
|
+
|
|
31
|
+
# Combine all sources into a string
|
|
32
|
+
seed = f"{time_ms}-{pid}-{random_bytes}-{mac_addr}-{hostname}-{uuid.uuid4()}"
|
|
33
|
+
|
|
34
|
+
# Create a hash of all entropy sources
|
|
35
|
+
hash_obj = hashlib.md5(seed.encode())
|
|
36
|
+
hash_int = int(hash_obj.hexdigest(), 16)
|
|
37
|
+
|
|
38
|
+
# Ensure the result fits within 32 bits (4 bytes)
|
|
39
|
+
unique_id = hash_int & 0xFFFFFFFF
|
|
40
|
+
|
|
41
|
+
return unique_id
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class FlowfileHandler:
|
|
46
|
+
_flows: Dict[int, FlowGraph]
|
|
47
|
+
|
|
48
|
+
def __init__(self):
|
|
49
|
+
self._flows = {}
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def flowfile_flows(self) -> List[FlowGraph]:
|
|
53
|
+
return list(self._flows.values())
|
|
54
|
+
|
|
55
|
+
def __add__(self, other: FlowGraph) -> int:
|
|
56
|
+
self._flows[other.flow_id] = other
|
|
57
|
+
return other.flow_id
|
|
58
|
+
|
|
59
|
+
def import_flow(self, flow_path: Path|str) -> int:
|
|
60
|
+
if isinstance(flow_path, str):
|
|
61
|
+
flow_path = Path(flow_path)
|
|
62
|
+
imported_flow = open_flow(flow_path)
|
|
63
|
+
self._flows[imported_flow.flow_id] = imported_flow
|
|
64
|
+
imported_flow.flow_settings = self.get_flow_info(imported_flow.flow_id)
|
|
65
|
+
imported_flow.flow_settings.is_running = False
|
|
66
|
+
return imported_flow.flow_id
|
|
67
|
+
|
|
68
|
+
def register_flow(self, flow_settings: FlowSettings):
|
|
69
|
+
if flow_settings.flow_id in self._flows:
|
|
70
|
+
self.delete_flow(flow_settings.flow_id)
|
|
71
|
+
raise 'flow already registered'
|
|
72
|
+
else:
|
|
73
|
+
name = flow_settings.name if flow_settings.name else flow_settings.flow_id
|
|
74
|
+
self._flows[flow_settings.flow_id] = FlowGraph(name=name, flow_id=flow_settings.flow_id, flow_settings=flow_settings)
|
|
75
|
+
return self.get_flow(flow_settings.flow_id)
|
|
76
|
+
|
|
77
|
+
def get_flow(self, flow_id: int) -> FlowGraph | None:
|
|
78
|
+
return self._flows.get(flow_id, None)
|
|
79
|
+
|
|
80
|
+
def delete_flow(self, flow_id: int):
|
|
81
|
+
flow = self._flows.pop(flow_id)
|
|
82
|
+
del flow
|
|
83
|
+
|
|
84
|
+
def save_flow(self, flow_id: int, flow_path: str):
|
|
85
|
+
flow = self.get_flow(flow_id)
|
|
86
|
+
if flow:
|
|
87
|
+
flow.save_flow(flow_path)
|
|
88
|
+
else:
|
|
89
|
+
raise Exception('Flow not found')
|
|
90
|
+
|
|
91
|
+
def add_flow(self, name: str, flow_path: str) -> int:
|
|
92
|
+
"""
|
|
93
|
+
Creates a new flow with a reference to the flow path
|
|
94
|
+
Args:
|
|
95
|
+
name (str): The name of the flow
|
|
96
|
+
flow_path (str): The path to the flow file
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
int: The flow id
|
|
100
|
+
|
|
101
|
+
"""
|
|
102
|
+
next_id = create_unique_id()
|
|
103
|
+
flow_info = FlowSettings(name=name, flow_id=next_id, save_location='', path=flow_path)
|
|
104
|
+
_ = self.register_flow(flow_info)
|
|
105
|
+
return next_id
|
|
106
|
+
|
|
107
|
+
def get_flow_info(self, flow_id: int) -> FlowSettings:
|
|
108
|
+
flow = self.get_flow(flow_id)
|
|
109
|
+
if not flow:
|
|
110
|
+
raise Exception(f'Flow {flow_id} not found')
|
|
111
|
+
flow_exists = os.path.exists(flow.flow_settings.path)
|
|
112
|
+
last_modified_ts = os.path.getmtime(flow.flow_settings.path) if flow_exists else -1
|
|
113
|
+
flow.flow_settings.modified_on = last_modified_ts
|
|
114
|
+
return flow.flow_settings
|
|
115
|
+
|
|
116
|
+
def get_node(self, flow_id: int, node_id: int):
|
|
117
|
+
flow = self.get_flow(flow_id)
|
|
118
|
+
if not flow:
|
|
119
|
+
raise Exception(f'Flow {flow_id} not found')
|
|
120
|
+
node = flow.get_node(node_id)
|
|
121
|
+
if not node:
|
|
122
|
+
raise Exception(f'Node {node_id} not found in flow {flow_id}')
|
|
123
|
+
return node
|
|
File without changes
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from flowfile_core.schemas import schemas, input_schema
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def ensure_compatibility_node_read(node_read: input_schema.NodeRead):
|
|
5
|
+
if hasattr(node_read, 'received_file'):
|
|
6
|
+
if not hasattr(node_read.received_file, 'fields'):
|
|
7
|
+
print('setting fields')
|
|
8
|
+
setattr(node_read.received_file, 'fields', [])
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def ensure_compatibility_node_output(node_output: input_schema.NodeOutput):
|
|
12
|
+
if hasattr(node_output, 'output_settings'):
|
|
13
|
+
if not hasattr(node_output.output_settings, 'abs_file_path'):
|
|
14
|
+
new_output_settings = input_schema.OutputSettings.model_validate(node_output.output_settings.model_dump())
|
|
15
|
+
setattr(node_output, 'output_settings', new_output_settings)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def ensure_compatibility_node_select(node_select: input_schema.NodeSelect):
|
|
19
|
+
if hasattr(node_select, 'select_input'):
|
|
20
|
+
if any(not hasattr(select_input, 'position') for select_input in node_select.select_input):
|
|
21
|
+
for _index, select_input in enumerate(node_select.select_input):
|
|
22
|
+
setattr(select_input, 'position', _index)
|
|
23
|
+
if not hasattr(node_select, 'sorted_by'):
|
|
24
|
+
setattr(node_select, 'sorted_by', 'none')
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def ensure_compatibility_node_joins(node_settings: input_schema.NodeFuzzyMatch | input_schema.NodeJoin):
|
|
28
|
+
if any(not hasattr(r, 'position') for r in node_settings.join_input.right_select.renames):
|
|
29
|
+
for _index, select_input in enumerate(node_settings.join_input.right_select.renames +
|
|
30
|
+
node_settings.join_input.left_select.renames):
|
|
31
|
+
setattr(select_input, 'position', _index)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def ensure_description(node: input_schema.NodeBase):
|
|
35
|
+
if not hasattr(node, 'description'):
|
|
36
|
+
setattr(node, 'description', '')
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def ensure_compatibility_node_polars(node_polars: input_schema.NodePolarsCode):
|
|
40
|
+
if hasattr(node_polars, 'depending_on_id'):
|
|
41
|
+
setattr(node_polars, 'depending_on_ids', [getattr(node_polars, 'depending_on_id')])
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def ensure_compatibility(flow_storage_obj: schemas.FlowInformation, flow_path: str):
|
|
45
|
+
if not hasattr(flow_storage_obj, 'flow_settings'):
|
|
46
|
+
flow_settings = schemas.FlowSettings(flow_id=flow_storage_obj.flow_id, path=flow_path,
|
|
47
|
+
name=flow_storage_obj.flow_name)
|
|
48
|
+
setattr(flow_storage_obj, 'flow_settings', flow_settings)
|
|
49
|
+
flow_storage_obj = schemas.FlowInformation.model_validate(flow_storage_obj)
|
|
50
|
+
elif not hasattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location'):
|
|
51
|
+
setattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location', 'auto')
|
|
52
|
+
elif not hasattr(flow_storage_obj.flow_settings, 'is_running'):
|
|
53
|
+
setattr(flow_storage_obj.flow_settings, 'is_running', False)
|
|
54
|
+
setattr(flow_storage_obj.flow_settings, 'is_canceled', False)
|
|
55
|
+
if not hasattr(flow_storage_obj.flow_settings, 'show_detailed_progress'):
|
|
56
|
+
setattr(flow_storage_obj.flow_settings, 'show_detailed_progress', True)
|
|
57
|
+
for _id, node_information in flow_storage_obj.data.items():
|
|
58
|
+
if not hasattr(node_information, 'setting_input'):
|
|
59
|
+
continue
|
|
60
|
+
if node_information.setting_input.__class__.__name__ == 'NodeRead':
|
|
61
|
+
ensure_compatibility_node_read(node_information.setting_input)
|
|
62
|
+
elif node_information.setting_input.__class__.__name__ == 'NodeSelect':
|
|
63
|
+
ensure_compatibility_node_select(node_information.setting_input)
|
|
64
|
+
elif node_information.setting_input.__class__.__name__ == 'NodeOutput':
|
|
65
|
+
ensure_compatibility_node_output(node_information.setting_input)
|
|
66
|
+
elif node_information.setting_input.__class__.__name__ in ('NodeJoin', 'NodeFuzzyMatch'):
|
|
67
|
+
ensure_compatibility_node_joins(node_information.setting_input)
|
|
68
|
+
elif node_information.setting_input.__class__.__name__ == 'NodePolarsCode':
|
|
69
|
+
ensure_compatibility_node_polars(node_information.setting_input)
|
|
70
|
+
ensure_description(node_information.setting_input)
|
|
File without changes
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
from flowfile_core.schemas import schemas, input_schema
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
from flowfile_core.flowfile.manage.compatibility_enhancements import ensure_compatibility
|
|
4
|
+
import pickle
|
|
5
|
+
from flowfile_core.flowfile.FlowfileFlow import FlowGraph
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def determine_insertion_order(node_storage: schemas.FlowInformation):
|
|
10
|
+
ingest_order: List[int] = []
|
|
11
|
+
ingest_order_set: set[int] = set()
|
|
12
|
+
all_nodes = set(node_storage.data.keys())
|
|
13
|
+
|
|
14
|
+
def assure_output_id(input_node: schemas.NodeInformation, output_node: schemas.NodeInformation):
|
|
15
|
+
# assure the output id is in the list with outputs of the input node this is a quick fix
|
|
16
|
+
if output_node.id not in input_node.outputs:
|
|
17
|
+
input_node.outputs.append(output_node.id)
|
|
18
|
+
|
|
19
|
+
def determine_order(node_id: int):
|
|
20
|
+
current_node = node_storage.data.get(node_id)
|
|
21
|
+
if current_node is None:
|
|
22
|
+
return
|
|
23
|
+
output_ids = current_node.outputs
|
|
24
|
+
main_input_ids = current_node.input_ids if current_node.input_ids else []
|
|
25
|
+
input_ids = [n for n in [current_node.left_input_id,
|
|
26
|
+
current_node.right_input_id] + main_input_ids if (n is not None
|
|
27
|
+
and n not in
|
|
28
|
+
ingest_order_set)]
|
|
29
|
+
if len(input_ids) > 0:
|
|
30
|
+
for input_id in input_ids:
|
|
31
|
+
new_node = node_storage.data.get(input_id)
|
|
32
|
+
if new_node is None:
|
|
33
|
+
ingest_order.append(current_node.id)
|
|
34
|
+
ingest_order_set.add(current_node.id)
|
|
35
|
+
continue
|
|
36
|
+
assure_output_id(new_node, current_node)
|
|
37
|
+
if new_node.id not in ingest_order_set:
|
|
38
|
+
determine_order(input_id)
|
|
39
|
+
elif current_node.id not in ingest_order_set:
|
|
40
|
+
ingest_order.append(current_node.id)
|
|
41
|
+
ingest_order_set.add(current_node.id)
|
|
42
|
+
|
|
43
|
+
for output_id in output_ids:
|
|
44
|
+
if output_id not in ingest_order_set:
|
|
45
|
+
determine_order(output_id)
|
|
46
|
+
|
|
47
|
+
if len(node_storage.node_starts) > 0:
|
|
48
|
+
determine_order(node_storage.node_starts[0])
|
|
49
|
+
# add the random not connected nodes
|
|
50
|
+
else:
|
|
51
|
+
for node_id in all_nodes:
|
|
52
|
+
determine_order(node_id)
|
|
53
|
+
ingest_order += list(all_nodes - ingest_order_set)
|
|
54
|
+
return ingest_order
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def open_flow(flow_path: Path) -> FlowGraph:
|
|
58
|
+
"""
|
|
59
|
+
Open a flowfile from a given path
|
|
60
|
+
Args:
|
|
61
|
+
flow_path (Path): The absolute or relative path to the flowfile
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
FlowGraph: The flowfile object
|
|
65
|
+
"""
|
|
66
|
+
with open(str(flow_path), 'rb') as f:
|
|
67
|
+
flow_storage_obj: schemas.FlowInformation = pickle.load(f)
|
|
68
|
+
flow_storage_obj.flow_settings.path = str(flow_path)
|
|
69
|
+
flow_storage_obj.flow_settings.name = str(flow_path.stem)
|
|
70
|
+
flow_storage_obj.flow_name = str(flow_path.stem)
|
|
71
|
+
ensure_compatibility(flow_storage_obj, str(flow_path))
|
|
72
|
+
ingestion_order = determine_insertion_order(flow_storage_obj)
|
|
73
|
+
new_flow = FlowGraph(flow_id=flow_storage_obj.flow_id, name=flow_storage_obj.flow_name,
|
|
74
|
+
flow_settings=flow_storage_obj.flow_settings)
|
|
75
|
+
for node_id in ingestion_order:
|
|
76
|
+
node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
|
|
77
|
+
node_promise = input_schema.NodePromise(flow_id=new_flow.flow_id, node_id=node_info.id,
|
|
78
|
+
pos_x=node_info.x_position, pos_y=node_info.y_position,
|
|
79
|
+
node_type=node_info.type)
|
|
80
|
+
if hasattr(node_info.setting_input, 'cache_results'):
|
|
81
|
+
node_promise.cache_results = node_info.setting_input.cache_results
|
|
82
|
+
new_flow.add_node_promise(node_promise)
|
|
83
|
+
for node_id in ingestion_order:
|
|
84
|
+
node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
|
|
85
|
+
getattr(new_flow, 'add_' + node_info.type)(node_info.setting_input)
|
|
86
|
+
from_node = new_flow.get_node(node_id)
|
|
87
|
+
for output_node_id in node_info.outputs:
|
|
88
|
+
to_node = new_flow.get_node(output_node_id)
|
|
89
|
+
if to_node is not None:
|
|
90
|
+
output_node_obj = flow_storage_obj.data[output_node_id]
|
|
91
|
+
is_left_input = (output_node_obj.left_input_id == node_id) and (to_node.left_input.node_id != node_id
|
|
92
|
+
if to_node.left_input is not None
|
|
93
|
+
else True)
|
|
94
|
+
is_right_input = (output_node_obj.right_input_id == node_id) and (to_node.right_input.node_id != node_id
|
|
95
|
+
if to_node.right_input is not None
|
|
96
|
+
else True)
|
|
97
|
+
is_main_input = node_id in output_node_obj.input_ids
|
|
98
|
+
if is_left_input:
|
|
99
|
+
insert_type = 'left'
|
|
100
|
+
elif is_right_input:
|
|
101
|
+
insert_type = 'right'
|
|
102
|
+
elif is_main_input:
|
|
103
|
+
insert_type = 'main'
|
|
104
|
+
else:
|
|
105
|
+
continue
|
|
106
|
+
to_node.add_node_connection(from_node, insert_type)
|
|
107
|
+
else:
|
|
108
|
+
from_node.delete_lead_to_node(output_node_id)
|
|
109
|
+
if not (from_node.node_id, output_node_id) in flow_storage_obj.node_connections:
|
|
110
|
+
continue
|
|
111
|
+
flow_storage_obj.node_connections.pop(
|
|
112
|
+
flow_storage_obj.node_connections.index((from_node.node_id, output_node_id)))
|
|
113
|
+
|
|
114
|
+
for missing_connection in set(flow_storage_obj.node_connections) - set(new_flow.node_connections):
|
|
115
|
+
to_node = new_flow.get_node(missing_connection[1])
|
|
116
|
+
if not to_node.has_input:
|
|
117
|
+
test_if_circular_connection(missing_connection, new_flow)
|
|
118
|
+
from_node = new_flow.get_node(missing_connection[0])
|
|
119
|
+
if from_node:
|
|
120
|
+
to_node.add_node_connection(from_node)
|
|
121
|
+
return new_flow
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def test_if_circular_connection(connection: Tuple[int, int], flow: FlowGraph):
|
|
125
|
+
to_node = flow.get_node(connection[1])
|
|
126
|
+
leads_to_nodes_queue = [n for n in to_node.leads_to_nodes]
|
|
127
|
+
circular_connection: bool = False
|
|
128
|
+
while len(leads_to_nodes_queue) > 0:
|
|
129
|
+
leads_to_node = leads_to_nodes_queue.pop(0)
|
|
130
|
+
if leads_to_node.node_id == connection[0]:
|
|
131
|
+
circular_connection = True
|
|
132
|
+
break
|
|
133
|
+
for leads_to_node_leads_to in leads_to_node.leads_to_nodes:
|
|
134
|
+
leads_to_nodes_queue.append(leads_to_node_leads_to)
|
|
135
|
+
return circular_connection
|
|
136
|
+
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from flowfile_core.configs import logger
|
|
2
|
+
from typing import Callable
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class SettingGenerator:
|
|
6
|
+
setting_generator_set = set()
|
|
7
|
+
|
|
8
|
+
def __init__(self):
|
|
9
|
+
self.setting_generator_set = set()
|
|
10
|
+
|
|
11
|
+
def add_setting_generator_func(self, f: Callable):
|
|
12
|
+
self.setting_generator_set.update([f.__name__])
|
|
13
|
+
setattr(self, f.__name__, f)
|
|
14
|
+
|
|
15
|
+
def get_setting_generator(self, node_type: str) -> Callable:
|
|
16
|
+
logger.info('getting setting generator for ' + node_type)
|
|
17
|
+
|
|
18
|
+
if node_type in self.setting_generator_set:
|
|
19
|
+
logger.info('setting generator found')
|
|
20
|
+
return getattr(self, node_type)
|
|
21
|
+
else:
|
|
22
|
+
return lambda x: x
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SettingUpdator:
|
|
26
|
+
setting_updator_set = set()
|
|
27
|
+
|
|
28
|
+
def __init__(self):
|
|
29
|
+
self.setting_updator_set = set()
|
|
30
|
+
|
|
31
|
+
def add_setting_updator_func(self, f: Callable):
|
|
32
|
+
self.setting_updator_set.update([f.__name__])
|
|
33
|
+
setattr(self, f.__name__, f)
|
|
34
|
+
|
|
35
|
+
def get_setting_updator(self, node_type: str) -> Callable:
|
|
36
|
+
logger.info('getting setting updator for ' + node_type)
|
|
37
|
+
if node_type in self.setting_updator_set:
|
|
38
|
+
logger.info('setting updator found')
|
|
39
|
+
return getattr(self, node_type)
|
|
40
|
+
else:
|
|
41
|
+
return lambda x: x
|