Flowfile 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- build_backends/__init__.py +0 -0
- build_backends/main.py +313 -0
- build_backends/main_prd.py +202 -0
- flowfile/__init__.py +71 -0
- flowfile/__main__.py +24 -0
- flowfile-0.2.2.dist-info/LICENSE +21 -0
- flowfile-0.2.2.dist-info/METADATA +225 -0
- flowfile-0.2.2.dist-info/RECORD +171 -0
- flowfile-0.2.2.dist-info/WHEEL +4 -0
- flowfile-0.2.2.dist-info/entry_points.txt +9 -0
- flowfile_core/__init__.py +13 -0
- flowfile_core/auth/__init__.py +0 -0
- flowfile_core/auth/jwt.py +140 -0
- flowfile_core/auth/models.py +40 -0
- flowfile_core/auth/secrets.py +178 -0
- flowfile_core/configs/__init__.py +35 -0
- flowfile_core/configs/flow_logger.py +433 -0
- flowfile_core/configs/node_store/__init__.py +0 -0
- flowfile_core/configs/node_store/nodes.py +98 -0
- flowfile_core/configs/settings.py +120 -0
- flowfile_core/database/__init__.py +0 -0
- flowfile_core/database/connection.py +51 -0
- flowfile_core/database/init_db.py +45 -0
- flowfile_core/database/models.py +41 -0
- flowfile_core/fileExplorer/__init__.py +0 -0
- flowfile_core/fileExplorer/funcs.py +259 -0
- flowfile_core/fileExplorer/utils.py +53 -0
- flowfile_core/flowfile/FlowfileFlow.py +1403 -0
- flowfile_core/flowfile/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/__init__.py +0 -0
- flowfile_core/flowfile/_extensions/real_time_interface.py +51 -0
- flowfile_core/flowfile/analytics/__init__.py +0 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +123 -0
- flowfile_core/flowfile/analytics/graphic_walker.py +60 -0
- flowfile_core/flowfile/analytics/schemas/__init__.py +0 -0
- flowfile_core/flowfile/analytics/utils.py +9 -0
- flowfile_core/flowfile/connection_manager/__init__.py +3 -0
- flowfile_core/flowfile/connection_manager/_connection_manager.py +48 -0
- flowfile_core/flowfile/connection_manager/models.py +10 -0
- flowfile_core/flowfile/database_connection_manager/__init__.py +0 -0
- flowfile_core/flowfile/database_connection_manager/db_connections.py +139 -0
- flowfile_core/flowfile/database_connection_manager/models.py +15 -0
- flowfile_core/flowfile/extensions.py +36 -0
- flowfile_core/flowfile/flow_data_engine/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +146 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1521 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +144 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/polars_type.py +24 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/__init__.py +0 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +38 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +90 -0
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +54 -0
- flowfile_core/flowfile/flow_data_engine/pivot_table.py +20 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +249 -0
- flowfile_core/flowfile/flow_data_engine/read_excel_tables.py +143 -0
- flowfile_core/flowfile/flow_data_engine/sample_data.py +120 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/__init__.py +1 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +36 -0
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +503 -0
- flowfile_core/flowfile/flow_data_engine/threaded_processes.py +27 -0
- flowfile_core/flowfile/flow_data_engine/types.py +0 -0
- flowfile_core/flowfile/flow_data_engine/utils.py +212 -0
- flowfile_core/flowfile/flow_node/__init__.py +0 -0
- flowfile_core/flowfile/flow_node/flow_node.py +771 -0
- flowfile_core/flowfile/flow_node/models.py +111 -0
- flowfile_core/flowfile/flow_node/schema_callback.py +70 -0
- flowfile_core/flowfile/handler.py +123 -0
- flowfile_core/flowfile/manage/__init__.py +0 -0
- flowfile_core/flowfile/manage/compatibility_enhancements.py +70 -0
- flowfile_core/flowfile/manage/manage_flowfile.py +0 -0
- flowfile_core/flowfile/manage/open_flowfile.py +136 -0
- flowfile_core/flowfile/setting_generator/__init__.py +2 -0
- flowfile_core/flowfile/setting_generator/setting_generator.py +41 -0
- flowfile_core/flowfile/setting_generator/settings.py +176 -0
- flowfile_core/flowfile/sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/__init__.py +3 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +159 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +172 -0
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +173 -0
- flowfile_core/flowfile/sources/external_sources/base_class.py +39 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +2 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/exchange_rate.py +0 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/external_source.py +100 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +74 -0
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/sample_users.py +29 -0
- flowfile_core/flowfile/sources/external_sources/factory.py +22 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/__init__.py +0 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/models.py +90 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +328 -0
- flowfile_core/flowfile/sources/external_sources/sql_source/utils.py +379 -0
- flowfile_core/flowfile/util/__init__.py +0 -0
- flowfile_core/flowfile/util/calculate_layout.py +137 -0
- flowfile_core/flowfile/util/execution_orderer.py +141 -0
- flowfile_core/flowfile/utils.py +106 -0
- flowfile_core/main.py +138 -0
- flowfile_core/routes/__init__.py +0 -0
- flowfile_core/routes/auth.py +34 -0
- flowfile_core/routes/logs.py +163 -0
- flowfile_core/routes/public.py +10 -0
- flowfile_core/routes/routes.py +601 -0
- flowfile_core/routes/secrets.py +85 -0
- flowfile_core/run_lock.py +11 -0
- flowfile_core/schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/__init__.py +0 -0
- flowfile_core/schemas/analysis_schemas/graphic_walker_schemas.py +118 -0
- flowfile_core/schemas/defaults.py +9 -0
- flowfile_core/schemas/external_sources/__init__.py +0 -0
- flowfile_core/schemas/external_sources/airbyte_schemas.py +20 -0
- flowfile_core/schemas/input_schema.py +477 -0
- flowfile_core/schemas/models.py +193 -0
- flowfile_core/schemas/output_model.py +115 -0
- flowfile_core/schemas/schemas.py +106 -0
- flowfile_core/schemas/transform_schema.py +569 -0
- flowfile_core/secrets/__init__.py +0 -0
- flowfile_core/secrets/secrets.py +64 -0
- flowfile_core/utils/__init__.py +0 -0
- flowfile_core/utils/arrow_reader.py +247 -0
- flowfile_core/utils/excel_file_manager.py +18 -0
- flowfile_core/utils/fileManager.py +45 -0
- flowfile_core/utils/fl_executor.py +38 -0
- flowfile_core/utils/utils.py +8 -0
- flowfile_frame/__init__.py +56 -0
- flowfile_frame/__main__.py +12 -0
- flowfile_frame/adapters.py +17 -0
- flowfile_frame/expr.py +1163 -0
- flowfile_frame/flow_frame.py +2093 -0
- flowfile_frame/group_frame.py +199 -0
- flowfile_frame/join.py +75 -0
- flowfile_frame/selectors.py +242 -0
- flowfile_frame/utils.py +184 -0
- flowfile_worker/__init__.py +55 -0
- flowfile_worker/configs.py +95 -0
- flowfile_worker/create/__init__.py +37 -0
- flowfile_worker/create/funcs.py +146 -0
- flowfile_worker/create/models.py +86 -0
- flowfile_worker/create/pl_types.py +35 -0
- flowfile_worker/create/read_excel_tables.py +110 -0
- flowfile_worker/create/utils.py +84 -0
- flowfile_worker/external_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/__init__.py +0 -0
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +161 -0
- flowfile_worker/external_sources/airbyte_sources/main.py +89 -0
- flowfile_worker/external_sources/airbyte_sources/models.py +133 -0
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- flowfile_worker/external_sources/sql_source/__init__.py +0 -0
- flowfile_worker/external_sources/sql_source/main.py +56 -0
- flowfile_worker/external_sources/sql_source/models.py +72 -0
- flowfile_worker/flow_logger.py +58 -0
- flowfile_worker/funcs.py +327 -0
- flowfile_worker/main.py +108 -0
- flowfile_worker/models.py +95 -0
- flowfile_worker/polars_fuzzy_match/__init__.py +0 -0
- flowfile_worker/polars_fuzzy_match/matcher.py +435 -0
- flowfile_worker/polars_fuzzy_match/models.py +36 -0
- flowfile_worker/polars_fuzzy_match/pre_process.py +213 -0
- flowfile_worker/polars_fuzzy_match/process.py +86 -0
- flowfile_worker/polars_fuzzy_match/utils.py +50 -0
- flowfile_worker/process_manager.py +36 -0
- flowfile_worker/routes.py +440 -0
- flowfile_worker/secrets.py +148 -0
- flowfile_worker/spawner.py +187 -0
- flowfile_worker/utils.py +25 -0
- test_utils/__init__.py +3 -0
- test_utils/postgres/__init__.py +1 -0
- test_utils/postgres/commands.py +109 -0
- test_utils/postgres/fixtures.py +417 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import math
|
|
3
|
+
from collections import defaultdict, deque
|
|
4
|
+
from typing import List, Dict, Set, Tuple, TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
if TYPE_CHECKING:
|
|
7
|
+
# Make sure this import path is correct for your project structure
|
|
8
|
+
from flowfile_core.flowfile.FlowfileFlow import FlowGraph
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def calculate_layered_layout(
|
|
12
|
+
graph: 'FlowGraph',
|
|
13
|
+
x_spacing: int = 250,
|
|
14
|
+
y_spacing: int = 100,
|
|
15
|
+
initial_y: int = 50
|
|
16
|
+
) -> Dict[int, Tuple[int, int]]:
|
|
17
|
+
"""
|
|
18
|
+
Calculates node positions using a simplified layered approach for a
|
|
19
|
+
LEFT-TO-RIGHT flow. Stages determine horizontal position (X), and
|
|
20
|
+
nodes within a stage are spread vertically (Y).
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
graph: The FlowGraph instance.
|
|
24
|
+
x_spacing: Horizontal distance between stage centers (X spacing).
|
|
25
|
+
y_spacing: Vertical distance between node centers within a stage (Y spacing).
|
|
26
|
+
initial_y: Reference Y position for vertically centering stages.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
A dictionary mapping node_id to calculated (pos_x, pos_y).
|
|
30
|
+
"""
|
|
31
|
+
if not graph._node_db:
|
|
32
|
+
return {}
|
|
33
|
+
|
|
34
|
+
nodes = graph.nodes
|
|
35
|
+
node_ids = {node.node_id for node in nodes}
|
|
36
|
+
adj: Dict[int, List[int]] = defaultdict(list)
|
|
37
|
+
rev_adj: Dict[int, List[int]] = defaultdict(list)
|
|
38
|
+
in_degree: Dict[int, int] = defaultdict(int)
|
|
39
|
+
|
|
40
|
+
# --- Graph Building Stage ---
|
|
41
|
+
try:
|
|
42
|
+
connections = graph.node_connections
|
|
43
|
+
for u, v in connections:
|
|
44
|
+
if u in node_ids and v in node_ids:
|
|
45
|
+
if v not in adj[u]:
|
|
46
|
+
adj[u].append(v)
|
|
47
|
+
if u not in rev_adj[v]:
|
|
48
|
+
rev_adj[v].append(u)
|
|
49
|
+
for node_id in node_ids:
|
|
50
|
+
in_degree[node_id] = len(rev_adj.get(node_id, []))
|
|
51
|
+
|
|
52
|
+
except Exception as e:
|
|
53
|
+
logging.warning(e)
|
|
54
|
+
# Fallback graph building
|
|
55
|
+
adj.clear()
|
|
56
|
+
rev_adj.clear()
|
|
57
|
+
in_degree.clear()
|
|
58
|
+
for node in nodes:
|
|
59
|
+
if node.node_id not in in_degree:
|
|
60
|
+
in_degree[node.node_id] = 0
|
|
61
|
+
for node in nodes:
|
|
62
|
+
children = node.leads_to_nodes
|
|
63
|
+
for child_node in children:
|
|
64
|
+
if child_node.node_id in node_ids:
|
|
65
|
+
if child_node.node_id not in adj[node.node_id]:
|
|
66
|
+
adj[node.node_id].append(child_node.node_id)
|
|
67
|
+
# Assuming primary method works or in_degree is handled elsewhere
|
|
68
|
+
in_degree[child_node.node_id] += 1
|
|
69
|
+
|
|
70
|
+
stages: Dict[int, List[int]] = defaultdict(list)
|
|
71
|
+
node_stage: Dict[int, int] = {}
|
|
72
|
+
initial_sources = sorted([node_id for node_id in node_ids if in_degree.get(node_id, 0) == 0])
|
|
73
|
+
queue = deque(initial_sources)
|
|
74
|
+
current_stage = 0
|
|
75
|
+
processed_nodes_count = 0
|
|
76
|
+
current_in_degree = defaultdict(int, in_degree)
|
|
77
|
+
|
|
78
|
+
while queue:
|
|
79
|
+
stage_size = len(queue)
|
|
80
|
+
processing_order = sorted(list(queue))
|
|
81
|
+
queue.clear()
|
|
82
|
+
nodes_in_current_stage = []
|
|
83
|
+
for u in processing_order:
|
|
84
|
+
nodes_in_current_stage.append(u)
|
|
85
|
+
node_stage[u] = current_stage
|
|
86
|
+
processed_nodes_count += 1
|
|
87
|
+
for v in sorted(adj.get(u, [])):
|
|
88
|
+
current_in_degree[v] -= 1
|
|
89
|
+
if current_in_degree[v] == 0:
|
|
90
|
+
queue.append(v)
|
|
91
|
+
elif current_in_degree[v] < 0:
|
|
92
|
+
# Basic warning for potential issues
|
|
93
|
+
print(f"[Layout Warning] Node {v} negative in-degree.")
|
|
94
|
+
if nodes_in_current_stage:
|
|
95
|
+
stages[current_stage] = nodes_in_current_stage
|
|
96
|
+
current_stage += 1
|
|
97
|
+
|
|
98
|
+
# Handle unprocessed nodes (cycles/disconnected)
|
|
99
|
+
if processed_nodes_count != len(node_ids):
|
|
100
|
+
print(f"[Layout Warning] Cycles or disconnected? Processed {processed_nodes_count}/{len(node_ids)}.")
|
|
101
|
+
unprocessed_nodes = sorted(list(node_ids - set(node_stage.keys())))
|
|
102
|
+
for node_id in unprocessed_nodes:
|
|
103
|
+
node_stage[node_id] = current_stage
|
|
104
|
+
stages[current_stage].append(node_id)
|
|
105
|
+
if unprocessed_nodes:
|
|
106
|
+
stages[current_stage].sort()
|
|
107
|
+
current_stage += 1
|
|
108
|
+
|
|
109
|
+
# --- Coordinate Assignment Stage ---
|
|
110
|
+
positions: Dict[int, Tuple[int, int]] = {}
|
|
111
|
+
max_stage_height = 0
|
|
112
|
+
|
|
113
|
+
for stage_index, node_ids_in_stage in stages.items():
|
|
114
|
+
if not node_ids_in_stage:
|
|
115
|
+
continue
|
|
116
|
+
sorted_nodes = sorted(node_ids_in_stage)
|
|
117
|
+
stages[stage_index] = sorted_nodes
|
|
118
|
+
stage_height = (len(sorted_nodes) - 1) * y_spacing
|
|
119
|
+
max_stage_height = max(max_stage_height, stage_height)
|
|
120
|
+
|
|
121
|
+
center_offset_y = max_stage_height / 2
|
|
122
|
+
|
|
123
|
+
for stage_index, node_ids_in_stage in stages.items():
|
|
124
|
+
if not node_ids_in_stage:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
pos_x = stage_index * x_spacing
|
|
128
|
+
|
|
129
|
+
stage_height = (len(node_ids_in_stage) - 1) * y_spacing
|
|
130
|
+
current_center_y = initial_y + center_offset_y
|
|
131
|
+
start_y = current_center_y - (stage_height / 2)
|
|
132
|
+
|
|
133
|
+
for i, node_id in enumerate(node_ids_in_stage):
|
|
134
|
+
pos_y = start_y + i * y_spacing
|
|
135
|
+
positions[node_id] = (int(pos_x), int(pos_y))
|
|
136
|
+
|
|
137
|
+
return positions
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
from typing import List, Dict, Set
|
|
2
|
+
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
3
|
+
from flowfile_core.configs import logger
|
|
4
|
+
from collections import deque, defaultdict
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def determine_execution_order(all_nodes: List[FlowNode], flow_starts: List[FlowNode] = None) -> List[FlowNode]:
|
|
8
|
+
"""
|
|
9
|
+
Determines the execution order of nodes using topological sorting based on node dependencies.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
all_nodes (List[FlowNode]): A list of all nodes (steps) in the flow.
|
|
13
|
+
flow_starts (List[FlowNode], optional): A list of starting nodes for the flow. If not provided, the function starts with nodes having zero in-degree.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
List[FlowNode]: A list of nodes in the order they should be executed.
|
|
17
|
+
|
|
18
|
+
Raises:
|
|
19
|
+
Exception: If a cycle is detected, meaning that a valid execution order cannot be determined.
|
|
20
|
+
"""
|
|
21
|
+
node_map = build_node_map(all_nodes)
|
|
22
|
+
in_degree, adjacency_list = compute_in_degrees_and_adjacency_list(all_nodes, node_map)
|
|
23
|
+
|
|
24
|
+
queue, visited_nodes = initialize_queue(flow_starts, all_nodes, in_degree)
|
|
25
|
+
|
|
26
|
+
execution_order = perform_topological_sort(queue, node_map, in_degree, adjacency_list, visited_nodes)
|
|
27
|
+
if len(execution_order) != len(node_map):
|
|
28
|
+
raise Exception("Cycle detected in the graph. Execution order cannot be determined.")
|
|
29
|
+
|
|
30
|
+
logger.info(f"execution order: \n {[node for node in execution_order if node.is_correct]}")
|
|
31
|
+
|
|
32
|
+
return execution_order
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def build_node_map(all_nodes: List[FlowNode]) -> Dict[str, FlowNode]:
|
|
36
|
+
"""
|
|
37
|
+
Creates a mapping from node ID to node object.
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
all_nodes (List[FlowNode]): A list of all nodes (steps) in the flow.
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
Dict[str, FlowNode]: A dictionary mapping node IDs to FlowNode objects.
|
|
44
|
+
"""
|
|
45
|
+
return {node.node_id: node for node in all_nodes}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def compute_in_degrees_and_adjacency_list(all_nodes: List[FlowNode],
|
|
49
|
+
node_map: Dict[str, FlowNode]) -> (Dict[str, int], Dict[str, List[str]]):
|
|
50
|
+
"""
|
|
51
|
+
Computes the in-degree and adjacency list for all nodes.
|
|
52
|
+
|
|
53
|
+
Args:
|
|
54
|
+
all_nodes (List[FlowNode]): A list of all nodes (steps) in the flow.
|
|
55
|
+
node_map (Dict[str, FlowNode]): A dictionary mapping node IDs to FlowNode objects.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
(Dict[str, int], Dict[str, List[str]]): A tuple containing:
|
|
59
|
+
- in_degree: A dictionary mapping node IDs to their in-degree count.
|
|
60
|
+
- adjacency_list: A dictionary mapping node IDs to a list of their connected nodes (outgoing edges).
|
|
61
|
+
"""
|
|
62
|
+
in_degree = defaultdict(int)
|
|
63
|
+
adjacency_list = defaultdict(list)
|
|
64
|
+
|
|
65
|
+
for node in all_nodes:
|
|
66
|
+
for next_node in node.leads_to_nodes:
|
|
67
|
+
adjacency_list[node.node_id].append(next_node.node_id)
|
|
68
|
+
in_degree[next_node.node_id] += 1
|
|
69
|
+
if next_node.node_id not in node_map:
|
|
70
|
+
node_map[next_node.node_id] = next_node
|
|
71
|
+
|
|
72
|
+
return in_degree, adjacency_list
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def initialize_queue(flow_starts: List[FlowNode], all_nodes: List[FlowNode], in_degree: Dict[str, int]) -> (
|
|
76
|
+
deque, Set[str]):
|
|
77
|
+
"""
|
|
78
|
+
Initializes the queue with nodes that have zero in-degree or based on specified flow start nodes.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
flow_starts (List[FlowNode]): A list of starting nodes for the flow.
|
|
82
|
+
all_nodes (List[FlowNode]): A list of all nodes (steps) in the flow.
|
|
83
|
+
in_degree (Dict[str, int]): A dictionary mapping node IDs to their in-degree count.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
(deque, Set[str]): A tuple containing:
|
|
87
|
+
- queue: A deque containing nodes with zero in-degree or specified start nodes.
|
|
88
|
+
- visited_nodes: A set of visited node IDs to track processing state.
|
|
89
|
+
"""
|
|
90
|
+
queue = deque()
|
|
91
|
+
visited_nodes = set()
|
|
92
|
+
|
|
93
|
+
if flow_starts and len(flow_starts) > 0:
|
|
94
|
+
for node in flow_starts:
|
|
95
|
+
if in_degree[node.node_id] == 0:
|
|
96
|
+
queue.append(node.node_id)
|
|
97
|
+
visited_nodes.add(node.node_id)
|
|
98
|
+
else:
|
|
99
|
+
logger.warning(f"Flow start node {node.node_id} has non-zero in-degree.")
|
|
100
|
+
else:
|
|
101
|
+
for node in all_nodes:
|
|
102
|
+
if in_degree[node.node_id] == 0:
|
|
103
|
+
queue.append(node.node_id)
|
|
104
|
+
visited_nodes.add(node.node_id)
|
|
105
|
+
|
|
106
|
+
return queue, visited_nodes
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def perform_topological_sort(queue: deque, node_map: Dict[str, FlowNode], in_degree: Dict[str, int],
|
|
110
|
+
adjacency_list: Dict[str, List[str]], visited_nodes: Set[str]) -> List[FlowNode]:
|
|
111
|
+
"""
|
|
112
|
+
Performs topological sorting to determine the execution order of nodes.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
queue (deque): A deque containing nodes with zero in-degree.
|
|
116
|
+
node_map (Dict[str, FlowNode]): A dictionary mapping node IDs to FlowNode objects.
|
|
117
|
+
in_degree (Dict[str, int]): A dictionary mapping node IDs to their in-degree count.
|
|
118
|
+
adjacency_list (Dict[str, List[str]]): A dictionary mapping node IDs to a list of their connected nodes (outgoing edges).
|
|
119
|
+
visited_nodes (Set[str]): A set of visited node IDs.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
List[FlowNode]: A list of nodes in the order they should be executed.
|
|
123
|
+
"""
|
|
124
|
+
execution_order = []
|
|
125
|
+
logger.info('Starting topological sort to determine execution order')
|
|
126
|
+
|
|
127
|
+
while queue:
|
|
128
|
+
current_node_id = queue.popleft()
|
|
129
|
+
current_node = node_map.get(current_node_id)
|
|
130
|
+
if current_node is None:
|
|
131
|
+
logger.warning(f"Node with ID {current_node_id} not found in the node map.")
|
|
132
|
+
continue
|
|
133
|
+
execution_order.append(current_node)
|
|
134
|
+
|
|
135
|
+
for next_node_id in adjacency_list.get(current_node_id, []):
|
|
136
|
+
in_degree[next_node_id] -= 1
|
|
137
|
+
if in_degree[next_node_id] == 0 and next_node_id not in visited_nodes:
|
|
138
|
+
queue.append(next_node_id)
|
|
139
|
+
visited_nodes.add(next_node_id)
|
|
140
|
+
|
|
141
|
+
return execution_order
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import hashlib
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
import json
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def generate_sha256_hash(data: bytes):
|
|
10
|
+
sha256 = hashlib.sha256()
|
|
11
|
+
sha256.update(data)
|
|
12
|
+
return sha256.hexdigest()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def create_directory_if_not_exists(directory: str):
|
|
16
|
+
if not os.path.exists(directory):
|
|
17
|
+
os.mkdir(directory)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def snake_case_to_camel_case(text: str) -> str:
|
|
21
|
+
# Split the text by underscores, capitalize each piece, and join them together
|
|
22
|
+
transformed_text = ''.join(word.capitalize() for word in text.split('_'))
|
|
23
|
+
return transformed_text
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def json_default(val):
|
|
27
|
+
if isinstance(val, datetime):
|
|
28
|
+
return val.isoformat(timespec='microseconds')
|
|
29
|
+
elif hasattr(val, '__dict__'):
|
|
30
|
+
return val.__dict__
|
|
31
|
+
else:
|
|
32
|
+
raise Exception('Value is not serializable')
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def json_dumps(thing) -> str:
|
|
36
|
+
return json.dumps(
|
|
37
|
+
thing,
|
|
38
|
+
default=json_default,
|
|
39
|
+
ensure_ascii=False,
|
|
40
|
+
sort_keys=True,
|
|
41
|
+
indent=None,
|
|
42
|
+
separators=(',', ':'),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def get_hash(val):
|
|
47
|
+
if hasattr(val, 'overridden_hash') and val.overridden_hash():
|
|
48
|
+
val = hash(val)
|
|
49
|
+
elif hasattr(val, '__dict__'):
|
|
50
|
+
val = {k: v for k, v in val.__dict__.items() if k not in {'pos_x', 'pos_y'}}
|
|
51
|
+
elif hasattr(val, 'json'):
|
|
52
|
+
pass
|
|
53
|
+
return generate_sha256_hash(json_dumps(val).encode('utf-8'))
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def cleanup(start_location: str = 'temp_storage'):
|
|
57
|
+
def get_all_files_and_folders(_start_location) -> List[str]:
|
|
58
|
+
inspect_items = [_start_location]
|
|
59
|
+
output = []
|
|
60
|
+
while len(inspect_items) > 0:
|
|
61
|
+
attributes = []
|
|
62
|
+
for inspect_item in inspect_items:
|
|
63
|
+
output.append(inspect_item)
|
|
64
|
+
if os.path.isdir(inspect_item):
|
|
65
|
+
dir_attributes = [os.path.join(inspect_item, _item) for _item in os.listdir(inspect_item)]
|
|
66
|
+
if len(dir_attributes) > 0:
|
|
67
|
+
attributes += dir_attributes
|
|
68
|
+
inspect_items = attributes
|
|
69
|
+
return output
|
|
70
|
+
|
|
71
|
+
output = get_all_files_and_folders(start_location)
|
|
72
|
+
|
|
73
|
+
# get level of dept of folder and sort based on that
|
|
74
|
+
actions = [(_path.count(os.sep), _path) for _path in output]
|
|
75
|
+
actions = [action for action in actions if action[0] > 0]
|
|
76
|
+
files_to_delete = {action[0]: [] for action in actions}
|
|
77
|
+
directories_to_delete = {action[0]: [] for action in actions}
|
|
78
|
+
for action in actions:
|
|
79
|
+
if os.path.isfile(action[1]):
|
|
80
|
+
files_to_delete[action[0]].append(action[1])
|
|
81
|
+
else:
|
|
82
|
+
directories_to_delete[action[0]].append(action[1])
|
|
83
|
+
|
|
84
|
+
files_to_delete = list(files_to_delete.items())
|
|
85
|
+
directories_to_delete = list(directories_to_delete.items())
|
|
86
|
+
files_to_delete.sort(key=lambda x: x[0], reverse=True)
|
|
87
|
+
directories_to_delete.sort(key=lambda x: x[0], reverse=True)
|
|
88
|
+
for action in files_to_delete:
|
|
89
|
+
for _f in action[1]:
|
|
90
|
+
os.remove(_f)
|
|
91
|
+
for action in directories_to_delete:
|
|
92
|
+
for _f in action[1]:
|
|
93
|
+
shutil.rmtree(_f)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def batch_generator(input_list: List, batch_size: int = 10000):
|
|
97
|
+
run: bool = True
|
|
98
|
+
while run:
|
|
99
|
+
if len(input_list) > batch_size:
|
|
100
|
+
yield input_list[:batch_size]
|
|
101
|
+
input_list = input_list[batch_size:]
|
|
102
|
+
else:
|
|
103
|
+
yield input_list
|
|
104
|
+
input_list = []
|
|
105
|
+
run = False
|
|
106
|
+
|
flowfile_core/main.py
ADDED
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import signal
|
|
4
|
+
from contextlib import asynccontextmanager
|
|
5
|
+
|
|
6
|
+
import uvicorn
|
|
7
|
+
from fastapi import FastAPI
|
|
8
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
9
|
+
|
|
10
|
+
from flowfile_core import ServerRun
|
|
11
|
+
from flowfile_core.configs.settings import SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL
|
|
12
|
+
|
|
13
|
+
from flowfile_core.routes.auth import router as auth_router
|
|
14
|
+
from flowfile_core.routes.secrets import router as secrets_router
|
|
15
|
+
from flowfile_core.routes.routes import router
|
|
16
|
+
from flowfile_core.routes.public import router as public_router
|
|
17
|
+
from flowfile_core.routes.logs import router as logs_router
|
|
18
|
+
|
|
19
|
+
from flowfile_core.configs.flow_logger import clear_all_flow_logs
|
|
20
|
+
|
|
21
|
+
os.environ["FLOWFILE_MODE"] = "electron"
|
|
22
|
+
|
|
23
|
+
should_exit = False
|
|
24
|
+
server_instance = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@asynccontextmanager
|
|
28
|
+
async def shutdown_handler(app: FastAPI):
|
|
29
|
+
"""Handle graceful shutdown of the application."""
|
|
30
|
+
print('Starting core application...')
|
|
31
|
+
try:
|
|
32
|
+
yield
|
|
33
|
+
finally:
|
|
34
|
+
print('Shutting down core application...')
|
|
35
|
+
print("Cleaning up core service resources...")
|
|
36
|
+
clear_all_flow_logs()
|
|
37
|
+
await asyncio.sleep(0.1) # Give a moment for cleanup
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# Initialize FastAPI with metadata
|
|
41
|
+
app = FastAPI(
|
|
42
|
+
title='Flowfile Backend',
|
|
43
|
+
version='0.1',
|
|
44
|
+
description='Backend for the Flowfile application',
|
|
45
|
+
lifespan=shutdown_handler
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# Configure CORS
|
|
49
|
+
origins = [
|
|
50
|
+
"http://localhost",
|
|
51
|
+
"http://localhost:5173",
|
|
52
|
+
"http://localhost:3000",
|
|
53
|
+
"http://localhost:8080",
|
|
54
|
+
"http://localhost:8081",
|
|
55
|
+
"http://localhost:4173",
|
|
56
|
+
"http://localhost:4174",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
app.add_middleware(
|
|
60
|
+
CORSMiddleware,
|
|
61
|
+
allow_origins=origins,
|
|
62
|
+
allow_credentials=True,
|
|
63
|
+
allow_methods=["*"],
|
|
64
|
+
allow_headers=["*"],
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
app.include_router(public_router)
|
|
68
|
+
app.include_router(router)
|
|
69
|
+
app.include_router(logs_router, tags=["logs"])
|
|
70
|
+
app.include_router(auth_router, prefix="/auth", tags=["auth"])
|
|
71
|
+
app.include_router(secrets_router, prefix="/secrets", tags=["secrets"])
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@app.post("/shutdown")
|
|
75
|
+
async def shutdown():
|
|
76
|
+
"""Endpoint to handle graceful shutdown"""
|
|
77
|
+
ServerRun.exit = True
|
|
78
|
+
print(f"ServerRun.exit = {ServerRun.exit}")
|
|
79
|
+
if server_instance:
|
|
80
|
+
# Schedule the shutdown
|
|
81
|
+
await asyncio.create_task(trigger_shutdown())
|
|
82
|
+
return {"message": "Shutting down"}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
async def trigger_shutdown():
|
|
86
|
+
"""Trigger the actual shutdown after responding to the client"""
|
|
87
|
+
await asyncio.sleep(1) # Give time for the response to be sent
|
|
88
|
+
if server_instance:
|
|
89
|
+
server_instance.should_exit = True
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def signal_handler(signum, frame):
|
|
93
|
+
"""Handle shutdown signals"""
|
|
94
|
+
print(f"Received signal {signum}")
|
|
95
|
+
if server_instance:
|
|
96
|
+
server_instance.should_exit = True
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def run(host: str = None, port: int = None):
|
|
100
|
+
"""Run the FastAPI app with graceful shutdown"""
|
|
101
|
+
global server_instance
|
|
102
|
+
|
|
103
|
+
# Use values from settings if not explicitly provided
|
|
104
|
+
if host is None:
|
|
105
|
+
host = SERVER_HOST
|
|
106
|
+
if port is None:
|
|
107
|
+
port = SERVER_PORT
|
|
108
|
+
|
|
109
|
+
print(f"Starting server on {host}:{port}")
|
|
110
|
+
print(f"Worker configured at {WORKER_URL} (host: {WORKER_HOST}, port: {WORKER_PORT})")
|
|
111
|
+
|
|
112
|
+
# Setup signal handlers
|
|
113
|
+
signal.signal(signal.SIGTERM, signal_handler)
|
|
114
|
+
signal.signal(signal.SIGINT, signal_handler)
|
|
115
|
+
|
|
116
|
+
config = uvicorn.Config(
|
|
117
|
+
app,
|
|
118
|
+
host=host,
|
|
119
|
+
port=port,
|
|
120
|
+
loop="asyncio"
|
|
121
|
+
)
|
|
122
|
+
server = uvicorn.Server(config)
|
|
123
|
+
server_instance = server # Store server instance globally
|
|
124
|
+
|
|
125
|
+
print('Starting core server...')
|
|
126
|
+
print('Core server started')
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
server.run()
|
|
130
|
+
except KeyboardInterrupt:
|
|
131
|
+
print("Received interrupt signal, shutting down...")
|
|
132
|
+
finally:
|
|
133
|
+
server_instance = None
|
|
134
|
+
print("Core server shutdown complete")
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
if __name__ == "__main__":
|
|
138
|
+
run()
|
|
File without changes
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# app_routes/auth.py
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
from fastapi import APIRouter, Depends, HTTPException, status, Request
|
|
6
|
+
from sqlalchemy.orm import Session
|
|
7
|
+
|
|
8
|
+
from flowfile_core.auth.jwt import get_current_active_user, create_access_token
|
|
9
|
+
from flowfile_core.auth.models import Token, User
|
|
10
|
+
from flowfile_core.database.connection import get_db
|
|
11
|
+
|
|
12
|
+
router = APIRouter()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@router.post("/token", response_model=Token)
|
|
16
|
+
async def login_for_access_token(request: Request, db: Session = Depends(get_db)):
|
|
17
|
+
# In Electron mode, auto-authenticate without requiring form data
|
|
18
|
+
if os.environ.get("FLOWFILE_MODE") == "electron" or 1 == 1:
|
|
19
|
+
access_token = create_access_token(data={"sub": "local_user"})
|
|
20
|
+
return {"access_token": access_token, "token_type": "bearer"}
|
|
21
|
+
else:
|
|
22
|
+
# In Docker mode, authenticate against database
|
|
23
|
+
# Would typically process form data here
|
|
24
|
+
raise HTTPException(
|
|
25
|
+
status_code=status.HTTP_401_UNAUTHORIZED,
|
|
26
|
+
detail="Docker mode authentication not implemented yet",
|
|
27
|
+
headers={"WWW-Authenticate": "Bearer"},
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Get current user endpoint
|
|
32
|
+
@router.get("/users/me", response_model=User)
|
|
33
|
+
async def read_users_me(current_user=Depends(get_current_active_user)):
|
|
34
|
+
return current_user
|