Flowfile 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +27 -6
- flowfile/api.py +5 -2
- flowfile/web/__init__.py +4 -2
- flowfile/web/static/assets/{CloudConnectionManager-d004942f.js → CloudConnectionManager-c20a740f.js} +3 -4
- flowfile/web/static/assets/{CloudStorageReader-eccf9fc2.js → CloudStorageReader-960b400a.js} +7 -7
- flowfile/web/static/assets/{CloudStorageWriter-b1ba6bba.js → CloudStorageWriter-e3decbdd.js} +7 -7
- flowfile/web/static/assets/{CrossJoin-68981877.js → CrossJoin-d67e2405.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-0b06649c.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-8349a426.js → DatabaseManager-9ea35e84.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-905344f8.js → DatabaseReader-9578bfa5.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-9f5b8638.js → DatabaseWriter-19531098.js} +9 -9
- flowfile/web/static/assets/{ExploreData-131a6d53.js → ExploreData-40476474.js} +47141 -43697
- flowfile/web/static/assets/{ExternalSource-e3549dcc.js → ExternalSource-2297ef96.js} +6 -6
- flowfile/web/static/assets/{Filter-6e0730ae.js → Filter-f211c03a.js} +8 -8
- flowfile/web/static/assets/{Formula-02f033e6.js → Formula-4207ea31.js} +8 -8
- flowfile/web/static/assets/{FuzzyMatch-54c14036.js → FuzzyMatch-bf120df0.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-08a3f499.js → GraphSolver-5bb7497a.js} +5 -5
- flowfile/web/static/assets/{GroupBy-2ae38139.js → GroupBy-92c81b65.js} +6 -6
- flowfile/web/static/assets/{Join-493b9772.js → Join-4e49a274.js} +9 -9
- flowfile/web/static/assets/{ManualInput-4373d163.js → ManualInput-90998ae8.js} +5 -5
- flowfile/web/static/assets/{Output-b534f3c7.js → Output-81e3e917.js} +4 -4
- flowfile/web/static/assets/{Pivot-2968ff65.js → Pivot-a3419842.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-65136536.js → PolarsCode-72710deb.js} +6 -6
- flowfile/web/static/assets/{Read-c56339ed.js → Read-c4059daf.js} +6 -6
- flowfile/web/static/assets/{RecordCount-1c641a5e.js → RecordCount-c2b5e095.js} +5 -5
- flowfile/web/static/assets/{RecordId-df308b8f.js → RecordId-10baf191.js} +6 -6
- flowfile/web/static/assets/{Sample-293e8a64.js → Sample-3ed9a0ae.js} +5 -5
- flowfile/web/static/assets/{SecretManager-03911655.js → SecretManager-0d49c0e8.js} +2 -2
- flowfile/web/static/assets/{Select-3058a13d.js → Select-8a02a0b3.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-fbf4fb39.js → SettingsSection-4c0f45f5.js} +1 -1
- flowfile/web/static/assets/{Sort-a29bbaf7.js → Sort-f55c9f9d.js} +6 -6
- flowfile/web/static/assets/{TextToRows-c7d7760e.js → TextToRows-5dbc2145.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-118f1d20.js → UnavailableFields-a1768e52.js} +2 -2
- flowfile/web/static/assets/{Union-f0589571.js → Union-f2aefdc9.js} +5 -5
- flowfile/web/static/assets/{Unique-7329a207.js → Unique-46b250da.js} +8 -8
- flowfile/web/static/assets/{Unpivot-30b0be15.js → Unpivot-25ac84cc.js} +5 -5
- flowfile/web/static/assets/{api-fb67319c.js → api-6ef0dcef.js} +1 -1
- flowfile/web/static/assets/{api-602fb95c.js → api-a0abbdc7.js} +1 -1
- flowfile/web/static/assets/{designer-94a6bf4d.js → designer-13eabd83.js} +4 -4
- flowfile/web/static/assets/{documentation-a224831e.js → documentation-b87e7f6f.js} +1 -1
- flowfile/web/static/assets/{dropDown-c2d2aa97.js → dropDown-13564764.js} +1 -1
- flowfile/web/static/assets/{fullEditor-921ac5fd.js → fullEditor-fd2cd6f9.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-7013cc94.js → genericNodeSettings-71e11604.js} +3 -3
- flowfile/web/static/assets/{index-3a75211d.js → index-f6c15e76.js} +46 -22
- flowfile/web/static/assets/{nodeTitle-a63d4680.js → nodeTitle-988d9efe.js} +3 -3
- flowfile/web/static/assets/{secretApi-763aec6e.js → secretApi-dd636aa2.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-08464729.js → selectDynamic-af36165e.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-f15a5f87.js → vue-codemirror.esm-2847001e.js} +1 -1
- flowfile/web/static/assets/{vue-content-loader.es-93bd09d7.js → vue-content-loader.es-0371da73.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/METADATA +2 -2
- {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/RECORD +100 -98
- flowfile_core/__init__.py +1 -0
- flowfile_core/auth/jwt.py +39 -0
- flowfile_core/configs/node_store/nodes.py +1 -0
- flowfile_core/configs/settings.py +6 -5
- flowfile_core/configs/utils.py +5 -0
- flowfile_core/database/connection.py +1 -3
- flowfile_core/flowfile/code_generator/code_generator.py +71 -0
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +1 -2
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +598 -310
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
- flowfile_core/flowfile/flow_graph.py +620 -192
- flowfile_core/flowfile/flow_graph_utils.py +2 -2
- flowfile_core/flowfile/flow_node/flow_node.py +510 -89
- flowfile_core/flowfile/flow_node/models.py +125 -20
- flowfile_core/flowfile/handler.py +2 -33
- flowfile_core/flowfile/manage/open_flowfile.py +1 -2
- flowfile_core/flowfile/util/calculate_layout.py +0 -2
- flowfile_core/flowfile/utils.py +36 -5
- flowfile_core/main.py +32 -13
- flowfile_core/routes/cloud_connections.py +7 -11
- flowfile_core/routes/logs.py +2 -6
- flowfile_core/routes/public.py +1 -0
- flowfile_core/routes/routes.py +127 -51
- flowfile_core/routes/secrets.py +72 -14
- flowfile_core/schemas/__init__.py +8 -0
- flowfile_core/schemas/input_schema.py +92 -64
- flowfile_core/schemas/output_model.py +19 -3
- flowfile_core/schemas/schemas.py +144 -11
- flowfile_core/schemas/transform_schema.py +82 -17
- flowfile_core/utils/arrow_reader.py +8 -3
- flowfile_core/utils/validate_setup.py +0 -2
- flowfile_frame/__init__.py +9 -1
- flowfile_frame/cloud_storage/__init__.py +0 -0
- flowfile_frame/cloud_storage/frame_helpers.py +39 -0
- flowfile_frame/cloud_storage/secret_manager.py +73 -0
- flowfile_frame/expr.py +42 -1
- flowfile_frame/expr.pyi +76 -61
- flowfile_frame/flow_frame.py +233 -111
- flowfile_frame/flow_frame.pyi +137 -91
- flowfile_frame/flow_frame_methods.py +150 -12
- flowfile_frame/group_frame.py +3 -0
- flowfile_frame/utils.py +25 -3
- test_utils/s3/data_generator.py +1 -0
- test_utils/s3/demo_data_generator.py +186 -0
- test_utils/s3/fixtures.py +6 -1
- flowfile_core/schemas/defaults.py +0 -9
- flowfile_core/schemas/models.py +0 -193
- {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/LICENSE +0 -0
- {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/WHEEL +0 -0
- {flowfile-0.3.6.dist-info → flowfile-0.3.8.dist-info}/entry_points.txt +0 -0
|
@@ -1,15 +1,32 @@
|
|
|
1
1
|
|
|
2
2
|
import pyarrow as pa
|
|
3
3
|
from typing import List, Union, Callable, Optional, Literal
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
# Forward declaration for type hints to avoid circular imports
|
|
7
|
+
if False:
|
|
8
|
+
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
4
9
|
|
|
5
10
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
6
11
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
7
12
|
from flowfile_core.schemas import schemas
|
|
8
|
-
from dataclasses import dataclass
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
@dataclass
|
|
12
16
|
class NodeStepPromise:
|
|
17
|
+
"""
|
|
18
|
+
A lightweight, temporary representation of a node during the initial
|
|
19
|
+
graph construction phase, before full `FlowNode` objects are created.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
node_id: The unique identifier for the node.
|
|
23
|
+
name: The display name of the node.
|
|
24
|
+
is_start: A boolean indicating if this is a starting node with no inputs.
|
|
25
|
+
leads_to_id: A list of node IDs that this node connects to.
|
|
26
|
+
left_input: The ID of the node connected to the left input port.
|
|
27
|
+
right_input: The ID of the node connected to the right input port.
|
|
28
|
+
depends_on: A list of node IDs that this node depends on for main inputs.
|
|
29
|
+
"""
|
|
13
30
|
node_id: Union[str, int]
|
|
14
31
|
name: str
|
|
15
32
|
is_start: bool
|
|
@@ -20,6 +37,9 @@ class NodeStepPromise:
|
|
|
20
37
|
|
|
21
38
|
|
|
22
39
|
class NodeStepStats:
|
|
40
|
+
"""
|
|
41
|
+
Tracks the execution status and statistics of a `FlowNode`.
|
|
42
|
+
"""
|
|
23
43
|
error: str = None
|
|
24
44
|
_has_run_with_current_setup: bool = False
|
|
25
45
|
has_completed_last_run: bool = False
|
|
@@ -31,23 +51,46 @@ class NodeStepStats:
|
|
|
31
51
|
has_completed_last_run: bool = False,
|
|
32
52
|
active: bool = True,
|
|
33
53
|
is_canceled: bool = False):
|
|
54
|
+
"""
|
|
55
|
+
Initializes the node's statistics.
|
|
56
|
+
|
|
57
|
+
:param error: Any error message from the last run.
|
|
58
|
+
:param has_run_with_current_setup: Flag indicating if the node has run successfully with its current configuration.
|
|
59
|
+
:param has_completed_last_run: Flag indicating if the last triggered run finished (successfully or not).
|
|
60
|
+
:param active: Flag indicating if the node is active in the flow.
|
|
61
|
+
:param is_canceled: Flag indicating if the last run was canceled.
|
|
62
|
+
"""
|
|
34
63
|
self.error = error
|
|
35
64
|
self._has_run_with_current_setup = has_run_with_current_setup
|
|
36
65
|
self.has_completed_last_run = has_completed_last_run
|
|
37
66
|
self.active = active
|
|
38
67
|
self.is_canceled = is_canceled
|
|
39
68
|
|
|
40
|
-
def __repr__(self):
|
|
69
|
+
def __repr__(self) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Provides a string representation of the node's stats.
|
|
72
|
+
:return: A string detailing the current stats.
|
|
73
|
+
"""
|
|
41
74
|
return (f"NodeStepStats(error={self.error}, has_run_with_current_setup={self.has_run_with_current_setup}, "
|
|
42
75
|
f"has_completed_last_run={self.has_completed_last_run}, "
|
|
43
76
|
f"active={self.active}, is_canceled={self.is_canceled})")
|
|
44
77
|
|
|
45
78
|
@property
|
|
46
79
|
def has_run_with_current_setup(self) -> bool:
|
|
80
|
+
"""
|
|
81
|
+
Checks if the node has run successfully with its current settings and inputs.
|
|
82
|
+
This is the primary flag for caching.
|
|
83
|
+
:return: True if the node is considered up-to-date, False otherwise.
|
|
84
|
+
"""
|
|
47
85
|
return self._has_run_with_current_setup
|
|
48
86
|
|
|
49
87
|
@has_run_with_current_setup.setter
|
|
50
88
|
def has_run_with_current_setup(self, value: bool):
|
|
89
|
+
"""
|
|
90
|
+
Sets the run status of the node.
|
|
91
|
+
If set to True, it implies the last run was completed successfully.
|
|
92
|
+
:param value: The new boolean status.
|
|
93
|
+
"""
|
|
51
94
|
if value:
|
|
52
95
|
self._has_run_with_current_setup = True
|
|
53
96
|
self.has_completed_last_run = True
|
|
@@ -56,6 +99,17 @@ class NodeStepStats:
|
|
|
56
99
|
|
|
57
100
|
|
|
58
101
|
class NodeStepSettings:
|
|
102
|
+
"""
|
|
103
|
+
Holds the configuration settings that control a node's execution behavior.
|
|
104
|
+
|
|
105
|
+
Attributes:
|
|
106
|
+
cache_results: If True, the node will cache its results to avoid re-computation.
|
|
107
|
+
renew_schema: If True, the schema will be re-evaluated on changes.
|
|
108
|
+
streamable: If True, the node can process data in a streaming fashion.
|
|
109
|
+
setup_errors: If True, indicates a non-blocking error occurred during setup.
|
|
110
|
+
breaking_setup_errors: If True, indicates an error occurred that prevents execution.
|
|
111
|
+
execute_location: The preferred location for execution ('auto', 'local', 'remote').
|
|
112
|
+
"""
|
|
59
113
|
cache_results: bool = False
|
|
60
114
|
renew_schema: bool = True
|
|
61
115
|
streamable: bool = True
|
|
@@ -65,20 +119,40 @@ class NodeStepSettings:
|
|
|
65
119
|
|
|
66
120
|
|
|
67
121
|
class NodeStepInputs:
|
|
122
|
+
"""
|
|
123
|
+
Manages the input connections for a `FlowNode`.
|
|
124
|
+
|
|
125
|
+
Attributes:
|
|
126
|
+
left_input: The `FlowNode` connected to the left input port.
|
|
127
|
+
right_input: The `FlowNode` connected to the right input port.
|
|
128
|
+
main_inputs: A list of `FlowNode` objects connected to the main input port(s).
|
|
129
|
+
"""
|
|
68
130
|
left_input: "FlowNode" = None
|
|
69
131
|
right_input: "FlowNode" = None
|
|
70
132
|
main_inputs: List["FlowNode"] = None
|
|
71
133
|
|
|
72
134
|
@property
|
|
73
|
-
def input_ids(self) -> List[int]:
|
|
135
|
+
def input_ids(self) -> List[int] | None:
|
|
136
|
+
"""
|
|
137
|
+
Gets the IDs of all connected input nodes.
|
|
138
|
+
:return: A list of integer node IDs.
|
|
139
|
+
"""
|
|
74
140
|
if self.main_inputs is not None:
|
|
75
141
|
return [node_input.node_information.id for node_input in self.get_all_inputs()]
|
|
76
142
|
|
|
77
143
|
def get_all_inputs(self) -> List["FlowNode"]:
|
|
144
|
+
"""
|
|
145
|
+
Retrieves a single list containing all input nodes (main, left, and right).
|
|
146
|
+
:return: A list of all connected `FlowNode` objects.
|
|
147
|
+
"""
|
|
78
148
|
main_inputs = self.main_inputs or []
|
|
79
149
|
return [v for v in main_inputs + [self.left_input, self.right_input] if v is not None]
|
|
80
150
|
|
|
81
151
|
def __repr__(self) -> str:
|
|
152
|
+
"""
|
|
153
|
+
Provides a string representation of the node's inputs.
|
|
154
|
+
:return: A string detailing the connected inputs.
|
|
155
|
+
"""
|
|
82
156
|
left_repr = f"Left Input: {self.left_input}" if self.left_input else "Left Input: None"
|
|
83
157
|
right_repr = f"Right Input: {self.right_input}" if self.right_input else "Right Input: None"
|
|
84
158
|
main_inputs_repr = f"Main Inputs: {self.main_inputs}" if self.main_inputs else "Main Inputs: None"
|
|
@@ -86,27 +160,46 @@ class NodeStepInputs:
|
|
|
86
160
|
|
|
87
161
|
def validate_if_input_connection_exists(self, node_input_id: int,
|
|
88
162
|
connection_name: Literal['main', 'left', 'right']) -> bool:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
163
|
+
"""
|
|
164
|
+
Checks if a connection from a specific node ID exists on a given port.
|
|
165
|
+
|
|
166
|
+
:param node_input_id: The ID of the source node to check for.
|
|
167
|
+
:param connection_name: The name of the input port ('main', 'left', 'right').
|
|
168
|
+
:return: True if the connection exists, False otherwise.
|
|
169
|
+
"""
|
|
170
|
+
if connection_name == 'main' and self.main_inputs:
|
|
171
|
+
return any(node_input.node_information.id == node_input_id for node_input in self.main_inputs)
|
|
172
|
+
if connection_name == 'left' and self.left_input:
|
|
92
173
|
return self.left_input.node_information.id == node_input_id
|
|
93
174
|
if connection_name == 'right':
|
|
94
175
|
return self.right_input.node_information.id == node_input_id
|
|
95
176
|
|
|
96
177
|
|
|
97
178
|
class NodeSchemaInformation:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
179
|
+
"""
|
|
180
|
+
Stores all schema-related information for a `FlowNode`.
|
|
181
|
+
|
|
182
|
+
Attributes:
|
|
183
|
+
result_schema: The actual output schema after a successful execution.
|
|
184
|
+
predicted_schema: The predicted output schema, calculated without full execution.
|
|
185
|
+
input_columns: A list of column names the node requires from its inputs.
|
|
186
|
+
drop_columns: A list of column names that will be dropped by the node.
|
|
187
|
+
output_columns: A list of `FlowfileColumn` objects that will be added by the node.
|
|
188
|
+
"""
|
|
189
|
+
result_schema: Optional[List[FlowfileColumn]] = None
|
|
190
|
+
predicted_schema: Optional[List[FlowfileColumn]] = None
|
|
191
|
+
input_columns: List[str] = []
|
|
192
|
+
drop_columns: List[str] = []
|
|
193
|
+
output_columns: List[FlowfileColumn] = []
|
|
103
194
|
|
|
104
195
|
|
|
105
196
|
class NodeResults:
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
197
|
+
"""
|
|
198
|
+
Stores the outputs of a `FlowNode`'s execution, including data, errors, and metadata.
|
|
199
|
+
"""
|
|
200
|
+
_resulting_data: Optional[FlowDataEngine] = None
|
|
201
|
+
example_data: Optional[FlowDataEngine] = None
|
|
202
|
+
example_data_path: Optional[str] = None
|
|
110
203
|
example_data_generator: Optional[Callable[[], pa.Table]] = None
|
|
111
204
|
run_time: int = -1
|
|
112
205
|
errors: Optional[str] = None
|
|
@@ -122,19 +215,31 @@ class NodeResults:
|
|
|
122
215
|
self.example_data_generator = None
|
|
123
216
|
self.analysis_data_generator = None
|
|
124
217
|
|
|
125
|
-
def get_example_data(self) -> pa.Table
|
|
218
|
+
def get_example_data(self) -> Optional[pa.Table]:
|
|
219
|
+
"""
|
|
220
|
+
Executes the generator to fetch a sample of the resulting data.
|
|
221
|
+
:return: A PyArrow Table containing a sample of the data, or None.
|
|
222
|
+
"""
|
|
126
223
|
if self.example_data_generator:
|
|
127
224
|
return self.example_data_generator()
|
|
128
225
|
|
|
129
226
|
@property
|
|
130
|
-
def resulting_data(self) -> FlowDataEngine:
|
|
227
|
+
def resulting_data(self) -> Optional[FlowDataEngine]:
|
|
228
|
+
"""
|
|
229
|
+
Gets the full resulting data from the node's execution.
|
|
230
|
+
:return: A `FlowDataEngine` instance containing the result, or None.
|
|
231
|
+
"""
|
|
131
232
|
return self._resulting_data
|
|
132
233
|
|
|
133
234
|
@resulting_data.setter
|
|
134
|
-
def resulting_data(self, d: FlowDataEngine):
|
|
235
|
+
def resulting_data(self, d: Optional[FlowDataEngine]):
|
|
236
|
+
"""
|
|
237
|
+
Sets the resulting data.
|
|
238
|
+
:param d: The `FlowDataEngine` instance to store.
|
|
239
|
+
"""
|
|
135
240
|
self._resulting_data = d
|
|
136
241
|
|
|
137
242
|
def reset(self):
|
|
243
|
+
"""Resets all result attributes to their default, empty state."""
|
|
138
244
|
self._resulting_data = None
|
|
139
|
-
self.run_time = -1
|
|
140
|
-
|
|
245
|
+
self.run_time = -1
|
|
@@ -1,8 +1,3 @@
|
|
|
1
|
-
import time
|
|
2
|
-
import random
|
|
3
|
-
import uuid
|
|
4
|
-
import socket
|
|
5
|
-
import hashlib
|
|
6
1
|
|
|
7
2
|
from dataclasses import dataclass
|
|
8
3
|
from typing import Dict, List
|
|
@@ -12,33 +7,7 @@ from pathlib import Path
|
|
|
12
7
|
from flowfile_core.flowfile.manage.open_flowfile import open_flow
|
|
13
8
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
14
9
|
from flowfile_core.schemas.schemas import FlowSettings
|
|
15
|
-
from flowfile_core.
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def create_unique_id() -> int:
|
|
19
|
-
"""
|
|
20
|
-
Create a unique id for the flowfile with increased randomness while maintaining 32-bit compatibility
|
|
21
|
-
Returns:
|
|
22
|
-
int: unique id within 32 bits (4 bytes)
|
|
23
|
-
"""
|
|
24
|
-
# Get various entropy sources
|
|
25
|
-
time_ms = int(time.time() * 1000)
|
|
26
|
-
pid = os.getpid()
|
|
27
|
-
random_bytes = random.getrandbits(32)
|
|
28
|
-
mac_addr = uuid.getnode() # MAC address as integer
|
|
29
|
-
hostname = socket.gethostname()
|
|
30
|
-
|
|
31
|
-
# Combine all sources into a string
|
|
32
|
-
seed = f"{time_ms}-{pid}-{random_bytes}-{mac_addr}-{hostname}-{uuid.uuid4()}"
|
|
33
|
-
|
|
34
|
-
# Create a hash of all entropy sources
|
|
35
|
-
hash_obj = hashlib.md5(seed.encode())
|
|
36
|
-
hash_int = int(hash_obj.hexdigest(), 16)
|
|
37
|
-
|
|
38
|
-
# Ensure the result fits within 32 bits (4 bytes)
|
|
39
|
-
unique_id = hash_int & 0xFFFFFFFF
|
|
40
|
-
|
|
41
|
-
return unique_id
|
|
10
|
+
from flowfile_core.flowfile.utils import create_unique_id
|
|
42
11
|
|
|
43
12
|
|
|
44
13
|
@dataclass
|
|
@@ -71,7 +40,7 @@ class FlowfileHandler:
|
|
|
71
40
|
raise 'flow already registered'
|
|
72
41
|
else:
|
|
73
42
|
name = flow_settings.name if flow_settings.name else flow_settings.flow_id
|
|
74
|
-
self._flows[flow_settings.flow_id] = FlowGraph(name=name,
|
|
43
|
+
self._flows[flow_settings.flow_id] = FlowGraph(name=name, flow_settings=flow_settings)
|
|
75
44
|
return self.get_flow(flow_settings.flow_id)
|
|
76
45
|
|
|
77
46
|
def get_flow(self, flow_id: int) -> FlowGraph | None:
|
|
@@ -70,8 +70,7 @@ def open_flow(flow_path: Path) -> FlowGraph:
|
|
|
70
70
|
flow_storage_obj.flow_name = str(flow_path.stem)
|
|
71
71
|
ensure_compatibility(flow_storage_obj, str(flow_path))
|
|
72
72
|
ingestion_order = determine_insertion_order(flow_storage_obj)
|
|
73
|
-
new_flow = FlowGraph(
|
|
74
|
-
flow_settings=flow_storage_obj.flow_settings)
|
|
73
|
+
new_flow = FlowGraph(name=flow_storage_obj.flow_name, flow_settings=flow_storage_obj.flow_settings)
|
|
75
74
|
for node_id in ingestion_order:
|
|
76
75
|
node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
|
|
77
76
|
node_promise = input_schema.NodePromise(flow_id=new_flow.flow_id, node_id=node_info.id,
|
|
@@ -4,7 +4,6 @@ from collections import defaultdict, deque
|
|
|
4
4
|
from typing import List, Dict, Set, Tuple, TYPE_CHECKING
|
|
5
5
|
|
|
6
6
|
if TYPE_CHECKING:
|
|
7
|
-
# Make sure this import path is correct for your project structure
|
|
8
7
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
9
8
|
|
|
10
9
|
|
|
@@ -64,7 +63,6 @@ def calculate_layered_layout(
|
|
|
64
63
|
if child_node.node_id in node_ids:
|
|
65
64
|
if child_node.node_id not in adj[node.node_id]:
|
|
66
65
|
adj[node.node_id].append(child_node.node_id)
|
|
67
|
-
# Assuming primary method works or in_degree is handled elsewhere
|
|
68
66
|
in_degree[child_node.node_id] += 1
|
|
69
67
|
|
|
70
68
|
stages: Dict[int, List[int]] = defaultdict(list)
|
flowfile_core/flowfile/utils.py
CHANGED
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import hashlib
|
|
3
2
|
import json
|
|
4
3
|
import shutil
|
|
5
4
|
|
|
6
|
-
|
|
5
|
+
import datetime
|
|
7
6
|
from typing import List
|
|
8
7
|
from decimal import Decimal
|
|
8
|
+
import time
|
|
9
|
+
import random
|
|
10
|
+
import uuid
|
|
11
|
+
import socket
|
|
12
|
+
import hashlib
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
def generate_sha256_hash(data: bytes):
|
|
@@ -26,11 +30,11 @@ def snake_case_to_camel_case(text: str) -> str:
|
|
|
26
30
|
|
|
27
31
|
|
|
28
32
|
def json_default(val):
|
|
29
|
-
if isinstance(val, datetime):
|
|
33
|
+
if isinstance(val, datetime.datetime):
|
|
30
34
|
return val.isoformat(timespec='microseconds')
|
|
31
|
-
elif isinstance(val, date):
|
|
35
|
+
elif isinstance(val, datetime.date):
|
|
32
36
|
return val.isoformat()
|
|
33
|
-
elif isinstance(val, time):
|
|
37
|
+
elif isinstance(val, datetime.time):
|
|
34
38
|
return val.isoformat()
|
|
35
39
|
elif hasattr(val, '__dict__'):
|
|
36
40
|
return val.__dict__
|
|
@@ -113,3 +117,30 @@ def batch_generator(input_list: List, batch_size: int = 10000):
|
|
|
113
117
|
yield input_list
|
|
114
118
|
input_list = []
|
|
115
119
|
run = False
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def create_unique_id() -> int:
|
|
123
|
+
"""
|
|
124
|
+
Create a unique id for the flowfile with increased randomness while maintaining 32-bit compatibility
|
|
125
|
+
Returns:
|
|
126
|
+
int: unique id within 32 bits (4 bytes)
|
|
127
|
+
"""
|
|
128
|
+
# Get various entropy sources
|
|
129
|
+
time_ms = int(time.time() * 1000)
|
|
130
|
+
pid = os.getpid()
|
|
131
|
+
random_bytes = random.getrandbits(32)
|
|
132
|
+
mac_addr = uuid.getnode() # MAC address as integer
|
|
133
|
+
hostname = socket.gethostname()
|
|
134
|
+
|
|
135
|
+
# Combine all sources into a string
|
|
136
|
+
seed = f"{time_ms}-{pid}-{random_bytes}-{mac_addr}-{hostname}-{uuid.uuid4()}"
|
|
137
|
+
|
|
138
|
+
# Create a hash of all entropy sources
|
|
139
|
+
|
|
140
|
+
hash_obj = hashlib.sha256(seed.encode())
|
|
141
|
+
hash_int = int(hash_obj.hexdigest(), 16)
|
|
142
|
+
|
|
143
|
+
# Ensure the result fits within 32 bits (4 bytes)
|
|
144
|
+
unique_id = hash_int & 0xFFFFFFFF
|
|
145
|
+
|
|
146
|
+
return unique_id
|
flowfile_core/main.py
CHANGED
|
@@ -27,7 +27,11 @@ server_instance = None
|
|
|
27
27
|
|
|
28
28
|
@asynccontextmanager
|
|
29
29
|
async def shutdown_handler(app: FastAPI):
|
|
30
|
-
"""
|
|
30
|
+
"""Handles the graceful startup and shutdown of the FastAPI application.
|
|
31
|
+
|
|
32
|
+
This context manager ensures that resources, such as log files, are cleaned
|
|
33
|
+
up properly when the application is terminated.
|
|
34
|
+
"""
|
|
31
35
|
print('Starting core application...')
|
|
32
36
|
try:
|
|
33
37
|
yield
|
|
@@ -77,31 +81,46 @@ app.include_router(cloud_connections_router, prefix="/cloud_connections", tags=[
|
|
|
77
81
|
|
|
78
82
|
@app.post("/shutdown")
|
|
79
83
|
async def shutdown():
|
|
80
|
-
"""
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
84
|
+
"""An API endpoint to gracefully shut down the server.
|
|
85
|
+
|
|
86
|
+
This endpoint sets a flag that the Uvicorn server checks, allowing it
|
|
87
|
+
to terminate cleanly. A background task is used to trigger the shutdown
|
|
88
|
+
after the HTTP response has been sent.
|
|
89
|
+
"""
|
|
90
|
+
# Use a background task to trigger the shutdown after the response is sent
|
|
91
|
+
background_tasks = ServerRun()
|
|
92
|
+
background_tasks.add_task(trigger_shutdown)
|
|
93
|
+
return {"message": "Server is shutting down"}
|
|
87
94
|
|
|
88
95
|
|
|
89
96
|
async def trigger_shutdown():
|
|
90
|
-
"""
|
|
91
|
-
|
|
97
|
+
"""(Internal) Triggers the actual server shutdown.
|
|
98
|
+
|
|
99
|
+
Waits for a moment to allow the `/shutdown` response to be sent before
|
|
100
|
+
telling the Uvicorn server instance to exit.
|
|
101
|
+
"""
|
|
102
|
+
await asyncio.sleep(1)
|
|
92
103
|
if server_instance:
|
|
93
104
|
server_instance.should_exit = True
|
|
94
105
|
|
|
95
106
|
|
|
96
107
|
def signal_handler(signum, frame):
|
|
97
|
-
"""
|
|
108
|
+
"""Handles OS signals like SIGINT (Ctrl+C) and SIGTERM for graceful shutdown."""
|
|
98
109
|
print(f"Received signal {signum}")
|
|
99
110
|
if server_instance:
|
|
100
111
|
server_instance.should_exit = True
|
|
101
112
|
|
|
102
113
|
|
|
103
114
|
def run(host: str = None, port: int = None):
|
|
104
|
-
"""
|
|
115
|
+
"""Runs the FastAPI application using Uvicorn.
|
|
116
|
+
|
|
117
|
+
This function configures and starts the Uvicorn server, setting up
|
|
118
|
+
signal handlers to ensure a graceful shutdown.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
host: The host to bind the server to. Defaults to `SERVER_HOST` from settings.
|
|
122
|
+
port: The port to bind the server to. Defaults to `SERVER_PORT` from settings.
|
|
123
|
+
"""
|
|
105
124
|
global server_instance
|
|
106
125
|
|
|
107
126
|
# Use values from settings if not explicitly provided
|
|
@@ -134,7 +153,7 @@ def run(host: str = None, port: int = None):
|
|
|
134
153
|
print("Received interrupt signal, shutting down...")
|
|
135
154
|
finally:
|
|
136
155
|
server_instance = None
|
|
137
|
-
print("
|
|
156
|
+
print("Server has shut down.")
|
|
138
157
|
|
|
139
158
|
|
|
140
159
|
if __name__ == "__main__":
|
|
@@ -27,13 +27,11 @@ def create_cloud_storage_connection(input_connection: FullCloudStorageConnection
|
|
|
27
27
|
"""
|
|
28
28
|
Create a new cloud storage connection.
|
|
29
29
|
Parameters
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
db: Session obtained from Depends(get_db)
|
|
30
|
+
input_connection: FullCloudStorageConnection schema containing connection details
|
|
31
|
+
current_user: User obtained from Depends(get_current_active_user)
|
|
32
|
+
db: Session obtained from Depends(get_db)
|
|
34
33
|
Returns
|
|
35
|
-
|
|
36
|
-
Dict with a success message
|
|
34
|
+
Dict with a success message
|
|
37
35
|
"""
|
|
38
36
|
logger.info(f'Create cloud connection {input_connection.connection_name}')
|
|
39
37
|
try:
|
|
@@ -70,12 +68,10 @@ def get_cloud_connections(
|
|
|
70
68
|
"""
|
|
71
69
|
Get all cloud storage connections for the current user.
|
|
72
70
|
Parameters
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
current_user: User obtained from Depends(get_current_active_user)
|
|
71
|
+
db: Session obtained from Depends(get_db)
|
|
72
|
+
current_user: User obtained from Depends(get_current_active_user)
|
|
76
73
|
|
|
77
74
|
Returns
|
|
78
|
-
|
|
79
|
-
List[FullCloudStorageConnectionInterface]
|
|
75
|
+
List[FullCloudStorageConnectionInterface]
|
|
80
76
|
"""
|
|
81
77
|
return get_all_cloud_connections_interface(db, current_user.id)
|
flowfile_core/routes/logs.py
CHANGED
|
@@ -33,9 +33,7 @@ async def format_sse_message(data: str) -> str:
|
|
|
33
33
|
|
|
34
34
|
@router.post("/logs/{flow_id}", tags=['flow_logging'])
|
|
35
35
|
async def add_log(flow_id: int, log_message: str):
|
|
36
|
-
"""
|
|
37
|
-
Adds a log message to the log file for a given flow_id.
|
|
38
|
-
"""
|
|
36
|
+
"""Adds a log message to the log file for a given flow_id."""
|
|
39
37
|
flow = flow_file_handler.get_flow(flow_id)
|
|
40
38
|
if not flow:
|
|
41
39
|
raise HTTPException(status_code=404, detail="Flow not found")
|
|
@@ -45,9 +43,7 @@ async def add_log(flow_id: int, log_message: str):
|
|
|
45
43
|
|
|
46
44
|
@router.post("/raw_logs", tags=['flow_logging'])
|
|
47
45
|
async def add_raw_log(raw_log_input: schemas.RawLogInput):
|
|
48
|
-
"""
|
|
49
|
-
Adds a log message to the log file for a given flow_id.
|
|
50
|
-
"""
|
|
46
|
+
"""Adds a log message to the log file for a given flow_id."""
|
|
51
47
|
logger.info('Adding raw logs')
|
|
52
48
|
flow = flow_file_handler.get_flow(raw_log_input.flowfile_flow_id)
|
|
53
49
|
if not flow:
|