Flowfile 0.3.5__py3-none-any.whl → 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +27 -6
- flowfile/api.py +1 -0
- flowfile/web/__init__.py +2 -2
- flowfile/web/static/assets/CloudConnectionManager-2dfdce2f.css +86 -0
- flowfile/web/static/assets/CloudConnectionManager-c20a740f.js +783 -0
- flowfile/web/static/assets/CloudStorageReader-29d14fcc.css +143 -0
- flowfile/web/static/assets/CloudStorageReader-960b400a.js +437 -0
- flowfile/web/static/assets/CloudStorageWriter-49c9a4b2.css +138 -0
- flowfile/web/static/assets/CloudStorageWriter-e3decbdd.js +430 -0
- flowfile/web/static/assets/{CrossJoin-dfcf7351.js → CrossJoin-d67e2405.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-b2afb1d7.js → DatabaseConnectionSettings-a81e0f7e.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-824a49b2.js → DatabaseManager-9ea35e84.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-a48124d8.js → DatabaseReader-9578bfa5.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-b47cbae2.js → DatabaseWriter-19531098.js} +9 -9
- flowfile/web/static/assets/{ExploreData-fdfc45a4.js → ExploreData-40476474.js} +47141 -43697
- flowfile/web/static/assets/{ExternalSource-861b0e71.js → ExternalSource-2297ef96.js} +6 -6
- flowfile/web/static/assets/{Filter-f87bb897.js → Filter-f211c03a.js} +8 -8
- flowfile/web/static/assets/{Formula-b8cefc31.css → Formula-29f19d21.css} +10 -0
- flowfile/web/static/assets/{Formula-1e2ed720.js → Formula-4207ea31.js} +75 -9
- flowfile/web/static/assets/{FuzzyMatch-b6cc4fdd.js → FuzzyMatch-bf120df0.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-6a371f4c.js → GraphSolver-5bb7497a.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f7b7f472.js → GroupBy-92c81b65.js} +6 -6
- flowfile/web/static/assets/{Join-eec38203.js → Join-4e49a274.js} +23 -15
- flowfile/web/static/assets/{Join-41c0f331.css → Join-f45eff22.css} +20 -20
- flowfile/web/static/assets/{ManualInput-9aaa46fb.js → ManualInput-90998ae8.js} +106 -34
- flowfile/web/static/assets/{ManualInput-ac7b9972.css → ManualInput-a71b52c6.css} +29 -17
- flowfile/web/static/assets/{Output-3b2ca045.js → Output-81e3e917.js} +4 -4
- flowfile/web/static/assets/{Pivot-a4f5d88f.js → Pivot-a3419842.js} +6 -6
- flowfile/web/static/assets/{PolarsCode-49ce444f.js → PolarsCode-72710deb.js} +6 -6
- flowfile/web/static/assets/{Read-07acdc9a.js → Read-c4059daf.js} +6 -6
- flowfile/web/static/assets/{RecordCount-6a21da56.js → RecordCount-c2b5e095.js} +5 -5
- flowfile/web/static/assets/{RecordId-949bdc17.js → RecordId-10baf191.js} +6 -6
- flowfile/web/static/assets/{Sample-7afca6e1.js → Sample-3ed9a0ae.js} +5 -5
- flowfile/web/static/assets/{SecretManager-b41c029d.js → SecretManager-0d49c0e8.js} +2 -2
- flowfile/web/static/assets/{Select-32b28406.js → Select-8a02a0b3.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-a0f15a05.js → SettingsSection-4c0f45f5.js} +1 -1
- flowfile/web/static/assets/{Sort-fc6ba0e2.js → Sort-f55c9f9d.js} +6 -6
- flowfile/web/static/assets/{TextToRows-23127596.js → TextToRows-5dbc2145.js} +8 -8
- flowfile/web/static/assets/{UnavailableFields-c42880a3.js → UnavailableFields-a1768e52.js} +2 -2
- flowfile/web/static/assets/{Union-39eecc6c.js → Union-f2aefdc9.js} +5 -5
- flowfile/web/static/assets/{Unique-a0e8fe61.js → Unique-46b250da.js} +8 -8
- flowfile/web/static/assets/{Unpivot-1e2d43f0.js → Unpivot-25ac84cc.js} +5 -5
- flowfile/web/static/assets/api-6ef0dcef.js +80 -0
- flowfile/web/static/assets/{api-44ca9e9c.js → api-a0abbdc7.js} +1 -1
- flowfile/web/static/assets/cloud_storage_reader-aa1415d6.png +0 -0
- flowfile/web/static/assets/{designer-267d44f1.js → designer-13eabd83.js} +36 -34
- flowfile/web/static/assets/{documentation-6c0810a2.js → documentation-b87e7f6f.js} +1 -1
- flowfile/web/static/assets/{dropDown-52790b15.js → dropDown-13564764.js} +1 -1
- flowfile/web/static/assets/{fullEditor-e272b506.js → fullEditor-fd2cd6f9.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-4bdcf98e.js → genericNodeSettings-71e11604.js} +3 -3
- flowfile/web/static/assets/{index-e235a8bc.js → index-f6c15e76.js} +59 -22
- flowfile/web/static/assets/{nodeTitle-fc3fc4b7.js → nodeTitle-988d9efe.js} +3 -3
- flowfile/web/static/assets/{secretApi-cdc2a3fd.js → secretApi-dd636aa2.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-96aa82cd.js → selectDynamic-af36165e.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-25e75a08.js → vue-codemirror.esm-2847001e.js} +2 -1
- flowfile/web/static/assets/{vue-content-loader.es-6c4b1c24.js → vue-content-loader.es-0371da73.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/METADATA +9 -4
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/RECORD +131 -124
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/entry_points.txt +2 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/auth/jwt.py +39 -0
- flowfile_core/configs/node_store/nodes.py +9 -6
- flowfile_core/configs/settings.py +6 -5
- flowfile_core/database/connection.py +63 -15
- flowfile_core/database/init_db.py +0 -1
- flowfile_core/database/models.py +49 -2
- flowfile_core/flowfile/code_generator/code_generator.py +472 -17
- flowfile_core/flowfile/connection_manager/models.py +1 -1
- flowfile_core/flowfile/database_connection_manager/db_connections.py +216 -2
- flowfile_core/flowfile/extensions.py +1 -1
- flowfile_core/flowfile/flow_data_engine/cloud_storage_reader.py +259 -0
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +19 -8
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +1062 -311
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +12 -2
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/settings_validator.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/__init__.py +2 -1
- flowfile_core/flowfile/flow_data_engine/join/utils.py +25 -0
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +3 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +29 -22
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -40
- flowfile_core/flowfile/flow_graph.py +718 -253
- flowfile_core/flowfile/flow_graph_utils.py +2 -2
- flowfile_core/flowfile/flow_node/flow_node.py +563 -117
- flowfile_core/flowfile/flow_node/models.py +154 -20
- flowfile_core/flowfile/flow_node/schema_callback.py +3 -2
- flowfile_core/flowfile/handler.py +2 -33
- flowfile_core/flowfile/manage/open_flowfile.py +1 -2
- flowfile_core/flowfile/sources/external_sources/__init__.py +0 -2
- flowfile_core/flowfile/sources/external_sources/factory.py +4 -7
- flowfile_core/flowfile/util/calculate_layout.py +0 -2
- flowfile_core/flowfile/utils.py +35 -26
- flowfile_core/main.py +35 -15
- flowfile_core/routes/cloud_connections.py +77 -0
- flowfile_core/routes/logs.py +2 -7
- flowfile_core/routes/public.py +1 -0
- flowfile_core/routes/routes.py +130 -90
- flowfile_core/routes/secrets.py +72 -14
- flowfile_core/schemas/__init__.py +8 -0
- flowfile_core/schemas/cloud_storage_schemas.py +215 -0
- flowfile_core/schemas/input_schema.py +121 -71
- flowfile_core/schemas/output_model.py +19 -3
- flowfile_core/schemas/schemas.py +150 -12
- flowfile_core/schemas/transform_schema.py +175 -35
- flowfile_core/utils/utils.py +40 -1
- flowfile_core/utils/validate_setup.py +41 -0
- flowfile_frame/__init__.py +9 -1
- flowfile_frame/cloud_storage/frame_helpers.py +39 -0
- flowfile_frame/cloud_storage/secret_manager.py +73 -0
- flowfile_frame/expr.py +28 -1
- flowfile_frame/expr.pyi +76 -61
- flowfile_frame/flow_frame.py +481 -208
- flowfile_frame/flow_frame.pyi +140 -91
- flowfile_frame/flow_frame_methods.py +160 -22
- flowfile_frame/group_frame.py +3 -0
- flowfile_frame/utils.py +25 -3
- flowfile_worker/external_sources/s3_source/main.py +216 -0
- flowfile_worker/external_sources/s3_source/models.py +142 -0
- flowfile_worker/funcs.py +51 -6
- flowfile_worker/models.py +22 -2
- flowfile_worker/routes.py +40 -38
- flowfile_worker/utils.py +1 -1
- test_utils/s3/commands.py +46 -0
- test_utils/s3/data_generator.py +292 -0
- test_utils/s3/demo_data_generator.py +186 -0
- test_utils/s3/fixtures.py +214 -0
- flowfile/web/static/assets/AirbyteReader-1ac35765.css +0 -314
- flowfile/web/static/assets/AirbyteReader-e08044e5.js +0 -922
- flowfile/web/static/assets/dropDownGeneric-60f56a8a.js +0 -72
- flowfile/web/static/assets/dropDownGeneric-895680d6.css +0 -10
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/airbyte.py +0 -159
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +0 -172
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/settings.py +0 -173
- flowfile_core/schemas/defaults.py +0 -9
- flowfile_core/schemas/external_sources/airbyte_schemas.py +0 -20
- flowfile_core/schemas/models.py +0 -193
- flowfile_worker/external_sources/airbyte_sources/cache_manager.py +0 -161
- flowfile_worker/external_sources/airbyte_sources/main.py +0 -89
- flowfile_worker/external_sources/airbyte_sources/models.py +0 -133
- flowfile_worker/external_sources/airbyte_sources/settings.py +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/LICENSE +0 -0
- {flowfile-0.3.5.dist-info → flowfile-0.3.7.dist-info}/WHEEL +0 -0
- {flowfile_core/flowfile/sources/external_sources/airbyte_sources → flowfile_frame/cloud_storage}/__init__.py +0 -0
- {flowfile_core/schemas/external_sources → flowfile_worker/external_sources/s3_source}/__init__.py +0 -0
- {flowfile_worker/external_sources/airbyte_sources → test_utils/s3}/__init__.py +0 -0
|
@@ -1,15 +1,32 @@
|
|
|
1
1
|
|
|
2
2
|
import pyarrow as pa
|
|
3
3
|
from typing import List, Union, Callable, Optional, Literal
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
# Forward declaration for type hints to avoid circular imports
|
|
7
|
+
if False:
|
|
8
|
+
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
4
9
|
|
|
5
10
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
6
11
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
7
12
|
from flowfile_core.schemas import schemas
|
|
8
|
-
from dataclasses import dataclass
|
|
9
13
|
|
|
10
14
|
|
|
11
15
|
@dataclass
|
|
12
16
|
class NodeStepPromise:
|
|
17
|
+
"""
|
|
18
|
+
A lightweight, temporary representation of a node during the initial
|
|
19
|
+
graph construction phase, before full `FlowNode` objects are created.
|
|
20
|
+
|
|
21
|
+
Attributes:
|
|
22
|
+
node_id: The unique identifier for the node.
|
|
23
|
+
name: The display name of the node.
|
|
24
|
+
is_start: A boolean indicating if this is a starting node with no inputs.
|
|
25
|
+
leads_to_id: A list of node IDs that this node connects to.
|
|
26
|
+
left_input: The ID of the node connected to the left input port.
|
|
27
|
+
right_input: The ID of the node connected to the right input port.
|
|
28
|
+
depends_on: A list of node IDs that this node depends on for main inputs.
|
|
29
|
+
"""
|
|
13
30
|
node_id: Union[str, int]
|
|
14
31
|
name: str
|
|
15
32
|
is_start: bool
|
|
@@ -20,13 +37,79 @@ class NodeStepPromise:
|
|
|
20
37
|
|
|
21
38
|
|
|
22
39
|
class NodeStepStats:
|
|
40
|
+
"""
|
|
41
|
+
Tracks the execution status and statistics of a `FlowNode`.
|
|
42
|
+
"""
|
|
23
43
|
error: str = None
|
|
24
|
-
|
|
44
|
+
_has_run_with_current_setup: bool = False
|
|
45
|
+
has_completed_last_run: bool = False
|
|
25
46
|
active: bool = True
|
|
26
47
|
is_canceled: bool = False
|
|
27
48
|
|
|
49
|
+
def __init__(self, error: str = None,
|
|
50
|
+
has_run_with_current_setup: bool = False,
|
|
51
|
+
has_completed_last_run: bool = False,
|
|
52
|
+
active: bool = True,
|
|
53
|
+
is_canceled: bool = False):
|
|
54
|
+
"""
|
|
55
|
+
Initializes the node's statistics.
|
|
56
|
+
|
|
57
|
+
:param error: Any error message from the last run.
|
|
58
|
+
:param has_run_with_current_setup: Flag indicating if the node has run successfully with its current configuration.
|
|
59
|
+
:param has_completed_last_run: Flag indicating if the last triggered run finished (successfully or not).
|
|
60
|
+
:param active: Flag indicating if the node is active in the flow.
|
|
61
|
+
:param is_canceled: Flag indicating if the last run was canceled.
|
|
62
|
+
"""
|
|
63
|
+
self.error = error
|
|
64
|
+
self._has_run_with_current_setup = has_run_with_current_setup
|
|
65
|
+
self.has_completed_last_run = has_completed_last_run
|
|
66
|
+
self.active = active
|
|
67
|
+
self.is_canceled = is_canceled
|
|
68
|
+
|
|
69
|
+
def __repr__(self) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Provides a string representation of the node's stats.
|
|
72
|
+
:return: A string detailing the current stats.
|
|
73
|
+
"""
|
|
74
|
+
return (f"NodeStepStats(error={self.error}, has_run_with_current_setup={self.has_run_with_current_setup}, "
|
|
75
|
+
f"has_completed_last_run={self.has_completed_last_run}, "
|
|
76
|
+
f"active={self.active}, is_canceled={self.is_canceled})")
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def has_run_with_current_setup(self) -> bool:
|
|
80
|
+
"""
|
|
81
|
+
Checks if the node has run successfully with its current settings and inputs.
|
|
82
|
+
This is the primary flag for caching.
|
|
83
|
+
:return: True if the node is considered up-to-date, False otherwise.
|
|
84
|
+
"""
|
|
85
|
+
return self._has_run_with_current_setup
|
|
86
|
+
|
|
87
|
+
@has_run_with_current_setup.setter
|
|
88
|
+
def has_run_with_current_setup(self, value: bool):
|
|
89
|
+
"""
|
|
90
|
+
Sets the run status of the node.
|
|
91
|
+
If set to True, it implies the last run was completed successfully.
|
|
92
|
+
:param value: The new boolean status.
|
|
93
|
+
"""
|
|
94
|
+
if value:
|
|
95
|
+
self._has_run_with_current_setup = True
|
|
96
|
+
self.has_completed_last_run = True
|
|
97
|
+
else:
|
|
98
|
+
self._has_run_with_current_setup = False
|
|
99
|
+
|
|
28
100
|
|
|
29
101
|
class NodeStepSettings:
|
|
102
|
+
"""
|
|
103
|
+
Holds the configuration settings that control a node's execution behavior.
|
|
104
|
+
|
|
105
|
+
Attributes:
|
|
106
|
+
cache_results: If True, the node will cache its results to avoid re-computation.
|
|
107
|
+
renew_schema: If True, the schema will be re-evaluated on changes.
|
|
108
|
+
streamable: If True, the node can process data in a streaming fashion.
|
|
109
|
+
setup_errors: If True, indicates a non-blocking error occurred during setup.
|
|
110
|
+
breaking_setup_errors: If True, indicates an error occurred that prevents execution.
|
|
111
|
+
execute_location: The preferred location for execution ('auto', 'local', 'remote').
|
|
112
|
+
"""
|
|
30
113
|
cache_results: bool = False
|
|
31
114
|
renew_schema: bool = True
|
|
32
115
|
streamable: bool = True
|
|
@@ -36,20 +119,40 @@ class NodeStepSettings:
|
|
|
36
119
|
|
|
37
120
|
|
|
38
121
|
class NodeStepInputs:
|
|
122
|
+
"""
|
|
123
|
+
Manages the input connections for a `FlowNode`.
|
|
124
|
+
|
|
125
|
+
Attributes:
|
|
126
|
+
left_input: The `FlowNode` connected to the left input port.
|
|
127
|
+
right_input: The `FlowNode` connected to the right input port.
|
|
128
|
+
main_inputs: A list of `FlowNode` objects connected to the main input port(s).
|
|
129
|
+
"""
|
|
39
130
|
left_input: "FlowNode" = None
|
|
40
131
|
right_input: "FlowNode" = None
|
|
41
132
|
main_inputs: List["FlowNode"] = None
|
|
42
133
|
|
|
43
134
|
@property
|
|
44
|
-
def input_ids(self) -> List[int]:
|
|
135
|
+
def input_ids(self) -> List[int] | None:
|
|
136
|
+
"""
|
|
137
|
+
Gets the IDs of all connected input nodes.
|
|
138
|
+
:return: A list of integer node IDs.
|
|
139
|
+
"""
|
|
45
140
|
if self.main_inputs is not None:
|
|
46
141
|
return [node_input.node_information.id for node_input in self.get_all_inputs()]
|
|
47
142
|
|
|
48
143
|
def get_all_inputs(self) -> List["FlowNode"]:
|
|
144
|
+
"""
|
|
145
|
+
Retrieves a single list containing all input nodes (main, left, and right).
|
|
146
|
+
:return: A list of all connected `FlowNode` objects.
|
|
147
|
+
"""
|
|
49
148
|
main_inputs = self.main_inputs or []
|
|
50
149
|
return [v for v in main_inputs + [self.left_input, self.right_input] if v is not None]
|
|
51
150
|
|
|
52
151
|
def __repr__(self) -> str:
|
|
152
|
+
"""
|
|
153
|
+
Provides a string representation of the node's inputs.
|
|
154
|
+
:return: A string detailing the connected inputs.
|
|
155
|
+
"""
|
|
53
156
|
left_repr = f"Left Input: {self.left_input}" if self.left_input else "Left Input: None"
|
|
54
157
|
right_repr = f"Right Input: {self.right_input}" if self.right_input else "Right Input: None"
|
|
55
158
|
main_inputs_repr = f"Main Inputs: {self.main_inputs}" if self.main_inputs else "Main Inputs: None"
|
|
@@ -57,27 +160,46 @@ class NodeStepInputs:
|
|
|
57
160
|
|
|
58
161
|
def validate_if_input_connection_exists(self, node_input_id: int,
|
|
59
162
|
connection_name: Literal['main', 'left', 'right']) -> bool:
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
163
|
+
"""
|
|
164
|
+
Checks if a connection from a specific node ID exists on a given port.
|
|
165
|
+
|
|
166
|
+
:param node_input_id: The ID of the source node to check for.
|
|
167
|
+
:param connection_name: The name of the input port ('main', 'left', 'right').
|
|
168
|
+
:return: True if the connection exists, False otherwise.
|
|
169
|
+
"""
|
|
170
|
+
if connection_name == 'main' and self.main_inputs:
|
|
171
|
+
return any(node_input.node_information.id == node_input_id for node_input in self.main_inputs)
|
|
172
|
+
if connection_name == 'left' and self.left_input:
|
|
63
173
|
return self.left_input.node_information.id == node_input_id
|
|
64
174
|
if connection_name == 'right':
|
|
65
175
|
return self.right_input.node_information.id == node_input_id
|
|
66
176
|
|
|
67
177
|
|
|
68
178
|
class NodeSchemaInformation:
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
179
|
+
"""
|
|
180
|
+
Stores all schema-related information for a `FlowNode`.
|
|
181
|
+
|
|
182
|
+
Attributes:
|
|
183
|
+
result_schema: The actual output schema after a successful execution.
|
|
184
|
+
predicted_schema: The predicted output schema, calculated without full execution.
|
|
185
|
+
input_columns: A list of column names the node requires from its inputs.
|
|
186
|
+
drop_columns: A list of column names that will be dropped by the node.
|
|
187
|
+
output_columns: A list of `FlowfileColumn` objects that will be added by the node.
|
|
188
|
+
"""
|
|
189
|
+
result_schema: Optional[List[FlowfileColumn]] = None
|
|
190
|
+
predicted_schema: Optional[List[FlowfileColumn]] = None
|
|
191
|
+
input_columns: List[str] = []
|
|
192
|
+
drop_columns: List[str] = []
|
|
193
|
+
output_columns: List[FlowfileColumn] = []
|
|
74
194
|
|
|
75
195
|
|
|
76
196
|
class NodeResults:
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
197
|
+
"""
|
|
198
|
+
Stores the outputs of a `FlowNode`'s execution, including data, errors, and metadata.
|
|
199
|
+
"""
|
|
200
|
+
_resulting_data: Optional[FlowDataEngine] = None
|
|
201
|
+
example_data: Optional[FlowDataEngine] = None
|
|
202
|
+
example_data_path: Optional[str] = None
|
|
81
203
|
example_data_generator: Optional[Callable[[], pa.Table]] = None
|
|
82
204
|
run_time: int = -1
|
|
83
205
|
errors: Optional[str] = None
|
|
@@ -93,19 +215,31 @@ class NodeResults:
|
|
|
93
215
|
self.example_data_generator = None
|
|
94
216
|
self.analysis_data_generator = None
|
|
95
217
|
|
|
96
|
-
def get_example_data(self) -> pa.Table
|
|
218
|
+
def get_example_data(self) -> Optional[pa.Table]:
|
|
219
|
+
"""
|
|
220
|
+
Executes the generator to fetch a sample of the resulting data.
|
|
221
|
+
:return: A PyArrow Table containing a sample of the data, or None.
|
|
222
|
+
"""
|
|
97
223
|
if self.example_data_generator:
|
|
98
224
|
return self.example_data_generator()
|
|
99
225
|
|
|
100
226
|
@property
|
|
101
|
-
def resulting_data(self) -> FlowDataEngine:
|
|
227
|
+
def resulting_data(self) -> Optional[FlowDataEngine]:
|
|
228
|
+
"""
|
|
229
|
+
Gets the full resulting data from the node's execution.
|
|
230
|
+
:return: A `FlowDataEngine` instance containing the result, or None.
|
|
231
|
+
"""
|
|
102
232
|
return self._resulting_data
|
|
103
233
|
|
|
104
234
|
@resulting_data.setter
|
|
105
|
-
def resulting_data(self, d: FlowDataEngine):
|
|
235
|
+
def resulting_data(self, d: Optional[FlowDataEngine]):
|
|
236
|
+
"""
|
|
237
|
+
Sets the resulting data.
|
|
238
|
+
:param d: The `FlowDataEngine` instance to store.
|
|
239
|
+
"""
|
|
106
240
|
self._resulting_data = d
|
|
107
241
|
|
|
108
242
|
def reset(self):
|
|
243
|
+
"""Resets all result attributes to their default, empty state."""
|
|
109
244
|
self._resulting_data = None
|
|
110
|
-
self.run_time = -1
|
|
111
|
-
|
|
245
|
+
self.run_time = -1
|
|
@@ -14,6 +14,7 @@ class SingleExecutionFuture(Generic[T]):
|
|
|
14
14
|
func: Callable[[], T]
|
|
15
15
|
on_error: Optional[Callable[[Exception], Any]]
|
|
16
16
|
result_value: Optional[T]
|
|
17
|
+
has_run_at_least_once: bool = False # Indicates if the function has been run at least once
|
|
17
18
|
|
|
18
19
|
def __init__(
|
|
19
20
|
self,
|
|
@@ -26,6 +27,7 @@ class SingleExecutionFuture(Generic[T]):
|
|
|
26
27
|
self.func = func
|
|
27
28
|
self.on_error = on_error
|
|
28
29
|
self.result_value = None
|
|
30
|
+
self.has_run_at_least_once = False
|
|
29
31
|
|
|
30
32
|
def start(self) -> None:
|
|
31
33
|
"""Start the function execution if not already started."""
|
|
@@ -35,8 +37,7 @@ class SingleExecutionFuture(Generic[T]):
|
|
|
35
37
|
|
|
36
38
|
def cleanup(self) -> None:
|
|
37
39
|
"""Clean up resources by clearing the future and shutting down the executor."""
|
|
38
|
-
|
|
39
|
-
# self.future = None
|
|
40
|
+
self.has_run_at_least_once = True
|
|
40
41
|
self.executor.shutdown(wait=False)
|
|
41
42
|
|
|
42
43
|
def __call__(self) -> Optional[T]:
|
|
@@ -1,8 +1,3 @@
|
|
|
1
|
-
import time
|
|
2
|
-
import random
|
|
3
|
-
import uuid
|
|
4
|
-
import socket
|
|
5
|
-
import hashlib
|
|
6
1
|
|
|
7
2
|
from dataclasses import dataclass
|
|
8
3
|
from typing import Dict, List
|
|
@@ -12,33 +7,7 @@ from pathlib import Path
|
|
|
12
7
|
from flowfile_core.flowfile.manage.open_flowfile import open_flow
|
|
13
8
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
14
9
|
from flowfile_core.schemas.schemas import FlowSettings
|
|
15
|
-
from flowfile_core.
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
def create_unique_id() -> int:
|
|
19
|
-
"""
|
|
20
|
-
Create a unique id for the flowfile with increased randomness while maintaining 32-bit compatibility
|
|
21
|
-
Returns:
|
|
22
|
-
int: unique id within 32 bits (4 bytes)
|
|
23
|
-
"""
|
|
24
|
-
# Get various entropy sources
|
|
25
|
-
time_ms = int(time.time() * 1000)
|
|
26
|
-
pid = os.getpid()
|
|
27
|
-
random_bytes = random.getrandbits(32)
|
|
28
|
-
mac_addr = uuid.getnode() # MAC address as integer
|
|
29
|
-
hostname = socket.gethostname()
|
|
30
|
-
|
|
31
|
-
# Combine all sources into a string
|
|
32
|
-
seed = f"{time_ms}-{pid}-{random_bytes}-{mac_addr}-{hostname}-{uuid.uuid4()}"
|
|
33
|
-
|
|
34
|
-
# Create a hash of all entropy sources
|
|
35
|
-
hash_obj = hashlib.md5(seed.encode())
|
|
36
|
-
hash_int = int(hash_obj.hexdigest(), 16)
|
|
37
|
-
|
|
38
|
-
# Ensure the result fits within 32 bits (4 bytes)
|
|
39
|
-
unique_id = hash_int & 0xFFFFFFFF
|
|
40
|
-
|
|
41
|
-
return unique_id
|
|
10
|
+
from flowfile_core.flowfile.utils import create_unique_id
|
|
42
11
|
|
|
43
12
|
|
|
44
13
|
@dataclass
|
|
@@ -71,7 +40,7 @@ class FlowfileHandler:
|
|
|
71
40
|
raise 'flow already registered'
|
|
72
41
|
else:
|
|
73
42
|
name = flow_settings.name if flow_settings.name else flow_settings.flow_id
|
|
74
|
-
self._flows[flow_settings.flow_id] = FlowGraph(name=name,
|
|
43
|
+
self._flows[flow_settings.flow_id] = FlowGraph(name=name, flow_settings=flow_settings)
|
|
75
44
|
return self.get_flow(flow_settings.flow_id)
|
|
76
45
|
|
|
77
46
|
def get_flow(self, flow_id: int) -> FlowGraph | None:
|
|
@@ -70,8 +70,7 @@ def open_flow(flow_path: Path) -> FlowGraph:
|
|
|
70
70
|
flow_storage_obj.flow_name = str(flow_path.stem)
|
|
71
71
|
ensure_compatibility(flow_storage_obj, str(flow_path))
|
|
72
72
|
ingestion_order = determine_insertion_order(flow_storage_obj)
|
|
73
|
-
new_flow = FlowGraph(
|
|
74
|
-
flow_settings=flow_storage_obj.flow_settings)
|
|
73
|
+
new_flow = FlowGraph(name=flow_storage_obj.flow_name, flow_settings=flow_storage_obj.flow_settings)
|
|
75
74
|
for node_id in ingestion_order:
|
|
76
75
|
node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
|
|
77
76
|
node_promise = input_schema.NodePromise(flow_id=new_flow.flow_id, node_id=node_info.id,
|
|
@@ -1,3 +1 @@
|
|
|
1
1
|
from flowfile_core.flowfile.sources.external_sources import custom_external_sources
|
|
2
|
-
# from flowfile.sources.external_sources.custom_external_sources.external_source import check_for_key_vault_existence
|
|
3
|
-
# from flowfile.sources.external_sources.airbyte_sources.airbyte import AirbyteSource
|
|
@@ -1,22 +1,19 @@
|
|
|
1
1
|
from flowfile_core.flowfile.sources.external_sources.custom_external_sources.external_source import CustomExternalSource
|
|
2
|
-
from flowfile_core.flowfile.sources.external_sources.airbyte_sources.airbyte import AirbyteSource
|
|
3
2
|
|
|
4
3
|
|
|
5
|
-
def data_source_factory(source_type: str, **kwargs) -> CustomExternalSource
|
|
4
|
+
def data_source_factory(source_type: str, **kwargs) -> CustomExternalSource:
|
|
6
5
|
"""
|
|
7
|
-
Factory function to generate either CustomExternalSource
|
|
6
|
+
Factory function to generate either CustomExternalSource .
|
|
8
7
|
|
|
9
8
|
Args:
|
|
10
|
-
source_type (str): The type of source to create ("custom"
|
|
9
|
+
source_type (str): The type of source to create ("custom").
|
|
11
10
|
**kwargs: The keyword arguments required for the specific source type.
|
|
12
11
|
|
|
13
12
|
Returns:
|
|
14
|
-
Union[CustomExternalSource
|
|
13
|
+
Union[CustomExternalSource]: An instance of the selected data source type.
|
|
15
14
|
"""
|
|
16
15
|
if source_type == "custom":
|
|
17
16
|
return CustomExternalSource(**kwargs)
|
|
18
|
-
elif source_type == "airbyte":
|
|
19
|
-
return AirbyteSource(**kwargs)
|
|
20
17
|
else:
|
|
21
18
|
raise ValueError(f"Unknown source type: {source_type}")
|
|
22
19
|
|
|
@@ -4,7 +4,6 @@ from collections import defaultdict, deque
|
|
|
4
4
|
from typing import List, Dict, Set, Tuple, TYPE_CHECKING
|
|
5
5
|
|
|
6
6
|
if TYPE_CHECKING:
|
|
7
|
-
# Make sure this import path is correct for your project structure
|
|
8
7
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
9
8
|
|
|
10
9
|
|
|
@@ -64,7 +63,6 @@ def calculate_layered_layout(
|
|
|
64
63
|
if child_node.node_id in node_ids:
|
|
65
64
|
if child_node.node_id not in adj[node.node_id]:
|
|
66
65
|
adj[node.node_id].append(child_node.node_id)
|
|
67
|
-
# Assuming primary method works or in_degree is handled elsewhere
|
|
68
66
|
in_degree[child_node.node_id] += 1
|
|
69
67
|
|
|
70
68
|
stages: Dict[int, List[int]] = defaultdict(list)
|
flowfile_core/flowfile/utils.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import hashlib
|
|
3
2
|
import json
|
|
4
|
-
import polars as pl
|
|
5
3
|
import shutil
|
|
6
4
|
|
|
7
|
-
|
|
5
|
+
import datetime
|
|
8
6
|
from typing import List
|
|
9
7
|
from decimal import Decimal
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
8
|
+
import time
|
|
9
|
+
import random
|
|
10
|
+
import uuid
|
|
11
|
+
import socket
|
|
12
|
+
import hashlib
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def generate_sha256_hash(data: bytes):
|
|
@@ -30,11 +30,11 @@ def snake_case_to_camel_case(text: str) -> str:
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
def json_default(val):
|
|
33
|
-
if isinstance(val, datetime):
|
|
33
|
+
if isinstance(val, datetime.datetime):
|
|
34
34
|
return val.isoformat(timespec='microseconds')
|
|
35
|
-
elif isinstance(val, date):
|
|
35
|
+
elif isinstance(val, datetime.date):
|
|
36
36
|
return val.isoformat()
|
|
37
|
-
elif isinstance(val, time):
|
|
37
|
+
elif isinstance(val, datetime.time):
|
|
38
38
|
return val.isoformat()
|
|
39
39
|
elif hasattr(val, '__dict__'):
|
|
40
40
|
return val.__dict__
|
|
@@ -61,7 +61,7 @@ def get_hash(val):
|
|
|
61
61
|
if hasattr(val, 'overridden_hash') and val.overridden_hash():
|
|
62
62
|
val = hash(val)
|
|
63
63
|
elif hasattr(val, '__dict__'):
|
|
64
|
-
val = {k: v for k, v in val.__dict__.items() if k not in {'pos_x', 'pos_y'}}
|
|
64
|
+
val = {k: v for k, v in val.__dict__.items() if k not in {'pos_x', 'pos_y', 'description'}}
|
|
65
65
|
elif hasattr(val, 'json'):
|
|
66
66
|
pass
|
|
67
67
|
return generate_sha256_hash(json_dumps(val).encode('utf-8'))
|
|
@@ -119,19 +119,28 @@ def batch_generator(input_list: List, batch_size: int = 10000):
|
|
|
119
119
|
run = False
|
|
120
120
|
|
|
121
121
|
|
|
122
|
-
def
|
|
123
|
-
"""
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
122
|
+
def create_unique_id() -> int:
|
|
123
|
+
"""
|
|
124
|
+
Create a unique id for the flowfile with increased randomness while maintaining 32-bit compatibility
|
|
125
|
+
Returns:
|
|
126
|
+
int: unique id within 32 bits (4 bytes)
|
|
127
|
+
"""
|
|
128
|
+
# Get various entropy sources
|
|
129
|
+
time_ms = int(time.time() * 1000)
|
|
130
|
+
pid = os.getpid()
|
|
131
|
+
random_bytes = random.getrandbits(32)
|
|
132
|
+
mac_addr = uuid.getnode() # MAC address as integer
|
|
133
|
+
hostname = socket.gethostname()
|
|
134
|
+
|
|
135
|
+
# Combine all sources into a string
|
|
136
|
+
seed = f"{time_ms}-{pid}-{random_bytes}-{mac_addr}-{hostname}-{uuid.uuid4()}"
|
|
137
|
+
|
|
138
|
+
# Create a hash of all entropy sources
|
|
139
|
+
|
|
140
|
+
hash_obj = hashlib.sha256(seed.encode())
|
|
141
|
+
hash_int = int(hash_obj.hexdigest(), 16)
|
|
142
|
+
|
|
143
|
+
# Ensure the result fits within 32 bits (4 bytes)
|
|
144
|
+
unique_id = hash_int & 0xFFFFFFFF
|
|
145
|
+
|
|
146
|
+
return unique_id
|
flowfile_core/main.py
CHANGED
|
@@ -8,14 +8,14 @@ from fastapi import FastAPI
|
|
|
8
8
|
from fastapi.middleware.cors import CORSMiddleware
|
|
9
9
|
|
|
10
10
|
from flowfile_core import ServerRun
|
|
11
|
-
from flowfile_core.configs.settings import (SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL,
|
|
12
|
-
OFFLOAD_TO_WORKER)
|
|
11
|
+
from flowfile_core.configs.settings import (SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL,)
|
|
13
12
|
|
|
14
13
|
from flowfile_core.routes.auth import router as auth_router
|
|
15
14
|
from flowfile_core.routes.secrets import router as secrets_router
|
|
16
15
|
from flowfile_core.routes.routes import router
|
|
17
16
|
from flowfile_core.routes.public import router as public_router
|
|
18
17
|
from flowfile_core.routes.logs import router as logs_router
|
|
18
|
+
from flowfile_core.routes.cloud_connections import router as cloud_connections_router
|
|
19
19
|
|
|
20
20
|
from flowfile_core.configs.flow_logger import clear_all_flow_logs
|
|
21
21
|
|
|
@@ -27,7 +27,11 @@ server_instance = None
|
|
|
27
27
|
|
|
28
28
|
@asynccontextmanager
|
|
29
29
|
async def shutdown_handler(app: FastAPI):
|
|
30
|
-
"""
|
|
30
|
+
"""Handles the graceful startup and shutdown of the FastAPI application.
|
|
31
|
+
|
|
32
|
+
This context manager ensures that resources, such as log files, are cleaned
|
|
33
|
+
up properly when the application is terminated.
|
|
34
|
+
"""
|
|
31
35
|
print('Starting core application...')
|
|
32
36
|
try:
|
|
33
37
|
yield
|
|
@@ -72,35 +76,51 @@ app.include_router(router)
|
|
|
72
76
|
app.include_router(logs_router, tags=["logs"])
|
|
73
77
|
app.include_router(auth_router, prefix="/auth", tags=["auth"])
|
|
74
78
|
app.include_router(secrets_router, prefix="/secrets", tags=["secrets"])
|
|
79
|
+
app.include_router(cloud_connections_router, prefix="/cloud_connections", tags=["cloud_connections"])
|
|
75
80
|
|
|
76
81
|
|
|
77
82
|
@app.post("/shutdown")
|
|
78
83
|
async def shutdown():
|
|
79
|
-
"""
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
84
|
+
"""An API endpoint to gracefully shut down the server.
|
|
85
|
+
|
|
86
|
+
This endpoint sets a flag that the Uvicorn server checks, allowing it
|
|
87
|
+
to terminate cleanly. A background task is used to trigger the shutdown
|
|
88
|
+
after the HTTP response has been sent.
|
|
89
|
+
"""
|
|
90
|
+
# Use a background task to trigger the shutdown after the response is sent
|
|
91
|
+
background_tasks = ServerRun()
|
|
92
|
+
background_tasks.add_task(trigger_shutdown)
|
|
93
|
+
return {"message": "Server is shutting down"}
|
|
86
94
|
|
|
87
95
|
|
|
88
96
|
async def trigger_shutdown():
|
|
89
|
-
"""
|
|
90
|
-
|
|
97
|
+
"""(Internal) Triggers the actual server shutdown.
|
|
98
|
+
|
|
99
|
+
Waits for a moment to allow the `/shutdown` response to be sent before
|
|
100
|
+
telling the Uvicorn server instance to exit.
|
|
101
|
+
"""
|
|
102
|
+
await asyncio.sleep(1)
|
|
91
103
|
if server_instance:
|
|
92
104
|
server_instance.should_exit = True
|
|
93
105
|
|
|
94
106
|
|
|
95
107
|
def signal_handler(signum, frame):
|
|
96
|
-
"""
|
|
108
|
+
"""Handles OS signals like SIGINT (Ctrl+C) and SIGTERM for graceful shutdown."""
|
|
97
109
|
print(f"Received signal {signum}")
|
|
98
110
|
if server_instance:
|
|
99
111
|
server_instance.should_exit = True
|
|
100
112
|
|
|
101
113
|
|
|
102
114
|
def run(host: str = None, port: int = None):
|
|
103
|
-
"""
|
|
115
|
+
"""Runs the FastAPI application using Uvicorn.
|
|
116
|
+
|
|
117
|
+
This function configures and starts the Uvicorn server, setting up
|
|
118
|
+
signal handlers to ensure a graceful shutdown.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
host: The host to bind the server to. Defaults to `SERVER_HOST` from settings.
|
|
122
|
+
port: The port to bind the server to. Defaults to `SERVER_PORT` from settings.
|
|
123
|
+
"""
|
|
104
124
|
global server_instance
|
|
105
125
|
|
|
106
126
|
# Use values from settings if not explicitly provided
|
|
@@ -133,7 +153,7 @@ def run(host: str = None, port: int = None):
|
|
|
133
153
|
print("Received interrupt signal, shutting down...")
|
|
134
154
|
finally:
|
|
135
155
|
server_instance = None
|
|
136
|
-
print("
|
|
156
|
+
print("Server has shut down.")
|
|
137
157
|
|
|
138
158
|
|
|
139
159
|
if __name__ == "__main__":
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from fastapi import HTTPException, Depends, APIRouter
|
|
4
|
+
from sqlalchemy.orm import Session
|
|
5
|
+
|
|
6
|
+
# Core modules
|
|
7
|
+
from flowfile_core.auth.jwt import get_current_active_user
|
|
8
|
+
from flowfile_core.configs import logger
|
|
9
|
+
from flowfile_core.database.connection import get_db
|
|
10
|
+
from flowfile_core.flowfile.database_connection_manager.db_connections import (store_cloud_connection,
|
|
11
|
+
get_cloud_connection_schema,
|
|
12
|
+
get_all_cloud_connections_interface,
|
|
13
|
+
delete_cloud_connection)
|
|
14
|
+
# Schema and models
|
|
15
|
+
from flowfile_core.schemas.cloud_storage_schemas import FullCloudStorageConnection, FullCloudStorageConnectionInterface
|
|
16
|
+
|
|
17
|
+
# External dependencies
|
|
18
|
+
# File handling
|
|
19
|
+
router = APIRouter()
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@router.post("/cloud_connection", tags=['cloud_connections'])
|
|
23
|
+
def create_cloud_storage_connection(input_connection: FullCloudStorageConnection,
|
|
24
|
+
current_user=Depends(get_current_active_user),
|
|
25
|
+
db: Session = Depends(get_db)
|
|
26
|
+
):
|
|
27
|
+
"""
|
|
28
|
+
Create a new cloud storage connection.
|
|
29
|
+
Parameters
|
|
30
|
+
input_connection: FullCloudStorageConnection schema containing connection details
|
|
31
|
+
current_user: User obtained from Depends(get_current_active_user)
|
|
32
|
+
db: Session obtained from Depends(get_db)
|
|
33
|
+
Returns
|
|
34
|
+
Dict with a success message
|
|
35
|
+
"""
|
|
36
|
+
logger.info(f'Create cloud connection {input_connection.connection_name}')
|
|
37
|
+
try:
|
|
38
|
+
store_cloud_connection(db, input_connection, current_user.id)
|
|
39
|
+
except ValueError:
|
|
40
|
+
raise HTTPException(422, 'Connection name already exists')
|
|
41
|
+
except Exception as e:
|
|
42
|
+
logger.error(e)
|
|
43
|
+
raise HTTPException(422, str(e))
|
|
44
|
+
return {"message": "Cloud connection created successfully"}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@router.delete('/cloud_connection', tags=['cloud_connections'])
|
|
48
|
+
def delete_cloud_connection_with_connection_name(connection_name: str,
|
|
49
|
+
current_user=Depends(get_current_active_user),
|
|
50
|
+
db: Session = Depends(get_db)
|
|
51
|
+
):
|
|
52
|
+
"""
|
|
53
|
+
Delete a cloud connection.
|
|
54
|
+
"""
|
|
55
|
+
logger.info(f'Deleting cloud connection {connection_name}')
|
|
56
|
+
cloud_storage_connection = get_cloud_connection_schema(db, connection_name, current_user.id)
|
|
57
|
+
if cloud_storage_connection is None:
|
|
58
|
+
raise HTTPException(404, 'Cloud connection connection not found')
|
|
59
|
+
delete_cloud_connection(db, connection_name, current_user.id)
|
|
60
|
+
return {"message": "Cloud connection deleted successfully"}
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@router.get('/cloud_connections', tags=['cloud_connection'],
|
|
64
|
+
response_model=List[FullCloudStorageConnectionInterface])
|
|
65
|
+
def get_cloud_connections(
|
|
66
|
+
db: Session = Depends(get_db),
|
|
67
|
+
current_user=Depends(get_current_active_user)) -> List[FullCloudStorageConnectionInterface]:
|
|
68
|
+
"""
|
|
69
|
+
Get all cloud storage connections for the current user.
|
|
70
|
+
Parameters
|
|
71
|
+
db: Session obtained from Depends(get_db)
|
|
72
|
+
current_user: User obtained from Depends(get_current_active_user)
|
|
73
|
+
|
|
74
|
+
Returns
|
|
75
|
+
List[FullCloudStorageConnectionInterface]
|
|
76
|
+
"""
|
|
77
|
+
return get_all_cloud_connections_interface(db, current_user.id)
|