Flowfile 0.3.1.2__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +2 -1
- flowfile/api.py +5 -3
- flowfile/web/__init__.py +3 -0
- flowfile/web/static/assets/{AirbyteReader-cb0c1d4a.js → AirbyteReader-2b1cf2d8.js} +10 -9
- flowfile/web/static/assets/{CrossJoin-a514fa59.js → CrossJoin-cc3ab73c.js} +8 -8
- flowfile/web/static/assets/{DatabaseConnectionSettings-f2cecf33.js → DatabaseConnectionSettings-307c4652.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-83ee3c98.js → DatabaseManager-69faa6e1.js} +10 -6
- flowfile/web/static/assets/{DatabaseReader-dc0c6881.js → DatabaseReader-e4134cd0.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-5afe9f8d.js → DatabaseWriter-d32d75b1.js} +9 -9
- flowfile/web/static/assets/{ExploreData-c7ee19cf.js → ExploreData-5eb48389.js} +18639 -18629
- flowfile/web/static/assets/{ExternalSource-17b23a01.js → ExternalSource-29489051.js} +8 -21
- flowfile/web/static/assets/{Filter-90856b4f.js → Filter-031332bb.js} +9 -9
- flowfile/web/static/assets/{Formula-38b71e9e.js → Formula-3b900540.js} +15 -15
- flowfile/web/static/assets/{Formula-d60a74f4.css → Formula-b8cefc31.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-d0f1fe81.js → FuzzyMatch-dee31153.js} +9 -9
- flowfile/web/static/assets/{GraphSolver-0c86bbc6.js → GraphSolver-ca74eb47.js} +5 -5
- flowfile/web/static/assets/{GroupBy-f2772e9f.js → GroupBy-081b6591.js} +8 -7
- flowfile/web/static/assets/{Join-bc3e1cf7.js → Join-b467376f.js} +11 -10
- flowfile/web/static/assets/{ManualInput-03aa0245.js → ManualInput-ffffb80a.js} +11 -8
- flowfile/web/static/assets/{Output-5b35eee8.js → Output-9a87d4ba.js} +4 -4
- flowfile/web/static/assets/{Pivot-7164087c.js → Pivot-ee3e6093.js} +8 -7
- flowfile/web/static/assets/{PolarsCode-3abf6507.js → PolarsCode-03921254.js} +13 -11
- flowfile/web/static/assets/{PopOver-b37ff9be.js → PopOver-3bdf8951.js} +1 -1
- flowfile/web/static/assets/{Read-65966a3e.js → Read-67fee3a0.js} +6 -6
- flowfile/web/static/assets/{RecordCount-c66c6d6d.js → RecordCount-a2acd02d.js} +7 -6
- flowfile/web/static/assets/{RecordId-826dc095.js → RecordId-0c8bcd77.js} +10 -8
- flowfile/web/static/assets/{Sample-4ed555c8.js → Sample-60594a3a.js} +7 -6
- flowfile/web/static/assets/{SecretManager-eac1e97d.js → SecretManager-bbcec2ac.js} +2 -2
- flowfile/web/static/assets/{Select-085f05cc.js → Select-9540e6ca.js} +8 -8
- flowfile/web/static/assets/{SettingsSection-1f5e79c1.js → SettingsSection-48f28104.js} +1 -1
- flowfile/web/static/assets/{Sort-3e6cb414.js → Sort-6dbe3633.js} +6 -6
- flowfile/web/static/assets/{TextToRows-606349bc.js → TextToRows-27aab4a8.js} +18 -13
- flowfile/web/static/assets/{UnavailableFields-b41976ed.js → UnavailableFields-8143044b.js} +2 -2
- flowfile/web/static/assets/{Union-fca91665.js → Union-52460248.js} +7 -6
- flowfile/web/static/assets/{Unique-a59f830e.js → Unique-f6962644.js} +8 -8
- flowfile/web/static/assets/{Unpivot-c3815565.js → Unpivot-1ff1e938.js} +5 -5
- flowfile/web/static/assets/{api-22b338bd.js → api-3b345d92.js} +1 -1
- flowfile/web/static/assets/{designer-e5bbe26f.js → designer-4736134f.js} +72 -42
- flowfile/web/static/assets/{documentation-08045cf2.js → documentation-b9545eba.js} +1 -1
- flowfile/web/static/assets/{dropDown-5e7e9a5a.js → dropDown-d5a4014c.js} +1 -1
- flowfile/web/static/assets/{dropDownGeneric-50a91b99.js → dropDownGeneric-1f4e32ec.js} +2 -2
- flowfile/web/static/assets/{fullEditor-705c6ccb.js → fullEditor-f4791c23.js} +3 -3
- flowfile/web/static/assets/{genericNodeSettings-65587f20.js → genericNodeSettings-1d456350.js} +3 -3
- flowfile/web/static/assets/{index-552863fd.js → index-f25c9283.js} +2608 -1570
- flowfile/web/static/assets/{nodeTitle-cf9bae3c.js → nodeTitle-cad6fd9d.js} +3 -3
- flowfile/web/static/assets/{secretApi-3ad510e1.js → secretApi-01f07e2c.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-bd644891.js → selectDynamic-f46a4e3f.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-dd17b478.js → vue-codemirror.esm-eb98fc8b.js} +15 -14
- flowfile/web/static/assets/{vue-content-loader.es-6b36f05e.js → vue-content-loader.es-860c0380.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/METADATA +1 -3
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/RECORD +97 -88
- flowfile_core/configs/__init__.py +15 -4
- flowfile_core/configs/node_store/nodes.py +2 -4
- flowfile_core/configs/settings.py +5 -3
- flowfile_core/configs/utils.py +18 -0
- flowfile_core/flowfile/FlowfileFlow.py +84 -29
- flowfile_core/flowfile/database_connection_manager/db_connections.py +1 -1
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +55 -18
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +42 -9
- flowfile_core/flowfile/flow_data_engine/flow_file_column/utils.py +42 -3
- flowfile_core/flowfile/flow_data_engine/polars_code_parser.py +34 -2
- flowfile_core/flowfile/flow_data_engine/sample_data.py +25 -7
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +4 -3
- flowfile_core/flowfile/flow_data_engine/utils.py +1 -0
- flowfile_core/flowfile/flow_graph_utils.py +320 -0
- flowfile_core/flowfile/flow_node/flow_node.py +2 -1
- flowfile_core/flowfile/sources/external_sources/airbyte_sources/models.py +2 -2
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/__init__.py +0 -1
- flowfile_core/flowfile/sources/external_sources/sql_source/sql_source.py +1 -1
- flowfile_core/flowfile/utils.py +34 -3
- flowfile_core/main.py +2 -3
- flowfile_core/routes/secrets.py +1 -1
- flowfile_core/schemas/input_schema.py +12 -14
- flowfile_core/schemas/transform_schema.py +25 -47
- flowfile_frame/__init__.py +11 -4
- flowfile_frame/adding_expr.py +280 -0
- flowfile_frame/config.py +9 -0
- flowfile_frame/expr.py +301 -83
- flowfile_frame/expr.pyi +2174 -0
- flowfile_frame/expr_name.py +258 -0
- flowfile_frame/flow_frame.py +616 -627
- flowfile_frame/flow_frame.pyi +336 -0
- flowfile_frame/flow_frame_methods.py +617 -0
- flowfile_frame/group_frame.py +89 -42
- flowfile_frame/join.py +1 -2
- flowfile_frame/lazy.py +704 -0
- flowfile_frame/lazy_methods.py +201 -0
- flowfile_frame/list_name_space.py +324 -0
- flowfile_frame/selectors.py +3 -0
- flowfile_frame/series.py +70 -0
- flowfile_frame/utils.py +80 -4
- flowfile/web/static/assets/GoogleSheet-854294a4.js +0 -2616
- flowfile/web/static/assets/GoogleSheet-92084da7.css +0 -233
- flowfile_core/flowfile/sources/external_sources/custom_external_sources/google_sheet.py +0 -74
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/LICENSE +0 -0
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/WHEEL +0 -0
- {flowfile-0.3.1.2.dist-info → flowfile-0.3.3.dist-info}/entry_points.txt +0 -0
- /flowfile_core/{secrets → secret_manager}/__init__.py +0 -0
- /flowfile_core/{secrets/secrets.py → secret_manager/secret_manager.py} +0 -0
|
@@ -1,14 +1,27 @@
|
|
|
1
1
|
from faker import Faker
|
|
2
2
|
from functools import partial
|
|
3
|
+
from math import ceil
|
|
3
4
|
from random import randint
|
|
4
5
|
import polars as pl
|
|
5
6
|
from typing import List, Dict, Any, Generator
|
|
6
7
|
|
|
7
8
|
|
|
8
|
-
def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
|
|
9
|
+
def create_fake_data(n_records: int = 1000, optimized: bool = True) -> pl.DataFrame:
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
n_records (): Number of records to return
|
|
14
|
+
optimized (): Indicator if creation should be optimized, will result in more identical rows when True
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
pl.DataFrame
|
|
18
|
+
"""
|
|
9
19
|
fake = Faker()
|
|
10
|
-
selector = partial(randint,0)
|
|
11
|
-
|
|
20
|
+
selector = partial(randint, 0)
|
|
21
|
+
|
|
22
|
+
max_n_records = min(10_000, n_records) if optimized else n_records
|
|
23
|
+
|
|
24
|
+
min_range = partial(min, max_n_records)
|
|
12
25
|
# Pre-generation of static data
|
|
13
26
|
cities = [fake.city() for _ in range(min_range(7000))]
|
|
14
27
|
companies = [fake.company() for _ in range(min_range(100_000))]
|
|
@@ -19,7 +32,7 @@ def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
|
|
|
19
32
|
first_names = [fake.first_name() for _ in range(min_range(100_000))]
|
|
20
33
|
last_names = [fake.last_name() for _ in range(min_range(50_000))]
|
|
21
34
|
domain_names = [fake.domain_name() for _ in range(10)]
|
|
22
|
-
sales_data = [fake.random_int(0, 1000) for _ in range(
|
|
35
|
+
sales_data = [fake.random_int(0, 1000) for _ in range(max_n_records)]
|
|
23
36
|
|
|
24
37
|
def generate_name():
|
|
25
38
|
return f"{first_names[selector(min_range(100_000))-1]} {last_names[selector(min_range(50_000))-1]}"
|
|
@@ -32,9 +45,8 @@ def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
|
|
|
32
45
|
|
|
33
46
|
def generate_phone_number():
|
|
34
47
|
return fake.phone_number()
|
|
35
|
-
|
|
36
48
|
data = []
|
|
37
|
-
for i in range(
|
|
49
|
+
for i in range(max_n_records):
|
|
38
50
|
name = generate_name()
|
|
39
51
|
data.append(dict(
|
|
40
52
|
ID=randint(1, 1000000),
|
|
@@ -47,8 +59,14 @@ def create_fake_data(n_records: int = 1000) -> pl.DataFrame:
|
|
|
47
59
|
Work=companies[selector(min_range(100_000))-1],
|
|
48
60
|
Zipcode=zipcodes[selector(min_range(200_000))-1],
|
|
49
61
|
Country=countries[selector(min_range(50))-1],
|
|
50
|
-
sales_data=sales_data[selector(
|
|
62
|
+
sales_data=sales_data[selector(max_n_records)-1]
|
|
51
63
|
))
|
|
64
|
+
if max_n_records < n_records:
|
|
65
|
+
n_duplicates: int = ceil(n_records / max_n_records)
|
|
66
|
+
output = []
|
|
67
|
+
for _ in range(n_duplicates):
|
|
68
|
+
output.extend(data)
|
|
69
|
+
data = output[:n_records]
|
|
52
70
|
|
|
53
71
|
return pl.DataFrame(data)
|
|
54
72
|
|
|
@@ -190,7 +190,7 @@ class BaseFetcher:
|
|
|
190
190
|
logger.info('Already running the fetching')
|
|
191
191
|
return
|
|
192
192
|
|
|
193
|
-
sleep_time =
|
|
193
|
+
sleep_time = .5
|
|
194
194
|
self.running = True
|
|
195
195
|
while not self.stop_event.is_set():
|
|
196
196
|
try:
|
|
@@ -205,7 +205,8 @@ class BaseFetcher:
|
|
|
205
205
|
break
|
|
206
206
|
elif status.status == 'Unknown Error':
|
|
207
207
|
self._handle_error(-1,
|
|
208
|
-
'There was an unknown error with the process,
|
|
208
|
+
'There was an unknown error with the process, '
|
|
209
|
+
'and the process got killed by the server')
|
|
209
210
|
break
|
|
210
211
|
else:
|
|
211
212
|
self._handle_error(2, r.text)
|
|
@@ -284,7 +285,7 @@ class ExternalDfFetcher(BaseFetcher):
|
|
|
284
285
|
|
|
285
286
|
def __init__(self, flow_id: int, node_id: int | str, lf: pl.LazyFrame | pl.DataFrame, file_ref: str = None,
|
|
286
287
|
wait_on_completion: bool = True,
|
|
287
|
-
operation_type: OperationType = 'store'):
|
|
288
|
+
operation_type: OperationType = 'store', offload_to_worker: bool = True):
|
|
288
289
|
super().__init__(file_ref=file_ref)
|
|
289
290
|
lf = lf.lazy() if isinstance(lf, pl.DataFrame) else lf
|
|
290
291
|
r = trigger_df_operation(lf=lf, file_ref=self.file_ref, operation_type=operation_type,
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
from typing import Dict, Tuple, Optional, List, Set, Callable
|
|
2
|
+
from copy import deepcopy
|
|
3
|
+
from flowfile_core.schemas import input_schema, schemas
|
|
4
|
+
from flowfile_core.flowfile.FlowfileFlow import FlowGraph, add_connection
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def combine_flow_graphs_with_mapping(
|
|
8
|
+
*flow_graphs: FlowGraph,
|
|
9
|
+
target_flow_id: Optional[int] = None) -> Tuple[FlowGraph, Dict[Tuple[int, int], int]]:
|
|
10
|
+
# Validate input parameters
|
|
11
|
+
_validate_input(flow_graphs)
|
|
12
|
+
|
|
13
|
+
# Generate a unique flow ID if not provided
|
|
14
|
+
if target_flow_id is None:
|
|
15
|
+
target_flow_id = _generate_unique_flow_id(flow_graphs)
|
|
16
|
+
|
|
17
|
+
flow_settings = _create_flow_settings(flow_graphs[0], target_flow_id)
|
|
18
|
+
combined_graph = FlowGraph(flow_id=target_flow_id, flow_settings=flow_settings)
|
|
19
|
+
node_id_mapping = _create_node_id_mapping(flow_graphs)
|
|
20
|
+
_add_nodes_to_combined_graph(flow_graphs, combined_graph, node_id_mapping, target_flow_id)
|
|
21
|
+
_add_connections_to_combined_graph(flow_graphs, combined_graph, node_id_mapping)
|
|
22
|
+
return combined_graph, node_id_mapping
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def combine_flow_graphs(*flow_graphs: FlowGraph, target_flow_id: Optional[int] = None) -> FlowGraph:
|
|
26
|
+
"""
|
|
27
|
+
Combine multiple flow graphs into a single graph, ensuring node IDs don't overlap.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
*flow_graphs: Multiple FlowGraph instances to combine
|
|
31
|
+
target_flow_id: Optional ID for the new combined graph. If None, a new ID will be generated.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
A new FlowGraph containing all nodes and edges from the input graphs with remapped IDs
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
ValueError: If no flow graphs are provided
|
|
38
|
+
"""
|
|
39
|
+
# Validate input parameters
|
|
40
|
+
_validate_input(flow_graphs)
|
|
41
|
+
|
|
42
|
+
# Generate a unique flow ID if not provided
|
|
43
|
+
if target_flow_id is None:
|
|
44
|
+
target_flow_id = _generate_unique_flow_id(flow_graphs)
|
|
45
|
+
|
|
46
|
+
flow_settings = _create_flow_settings(flow_graphs[0], target_flow_id)
|
|
47
|
+
combined_graph = FlowGraph(flow_id=target_flow_id, flow_settings=flow_settings)
|
|
48
|
+
node_id_mapping = _create_node_id_mapping(flow_graphs)
|
|
49
|
+
_add_nodes_to_combined_graph(flow_graphs, combined_graph, node_id_mapping, target_flow_id)
|
|
50
|
+
_add_connections_to_combined_graph(flow_graphs, combined_graph, node_id_mapping)
|
|
51
|
+
|
|
52
|
+
return combined_graph
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _validate_input(flow_graphs: Tuple[FlowGraph, ...]) -> None:
|
|
56
|
+
"""
|
|
57
|
+
Validate input parameters.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
flow_graphs: Flow graphs to validate
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
ValueError: If validation fails
|
|
64
|
+
"""
|
|
65
|
+
if not flow_graphs:
|
|
66
|
+
raise ValueError("At least one FlowGraph must be provided")
|
|
67
|
+
|
|
68
|
+
# Check for duplicate flow IDs
|
|
69
|
+
flow_ids = [fg.flow_id for fg in flow_graphs]
|
|
70
|
+
if len(flow_ids) != len(set(flow_ids)):
|
|
71
|
+
raise ValueError("Cannot combine flows with duplicate flow IDs")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _generate_unique_flow_id(flow_graphs: Tuple[FlowGraph, ...]) -> int:
|
|
75
|
+
"""
|
|
76
|
+
Generate a unique flow ID based on the input flow graphs.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
flow_graphs: Flow graphs to generate ID from
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
int: A new unique flow ID
|
|
83
|
+
"""
|
|
84
|
+
return abs(hash(tuple(fg.flow_id for fg in flow_graphs))) % 1000000
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _create_flow_settings(base_flow_graph: FlowGraph, target_flow_id: int) -> schemas.FlowSettings:
|
|
88
|
+
"""
|
|
89
|
+
Create flow settings for the combined graph based on an existing graph.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
base_flow_graph: Flow graph to base settings on
|
|
93
|
+
target_flow_id: The new flow ID
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
schemas.FlowSettings: Flow settings for the combined graph
|
|
97
|
+
"""
|
|
98
|
+
flow_settings = deepcopy(base_flow_graph.flow_settings)
|
|
99
|
+
flow_settings.flow_id = target_flow_id
|
|
100
|
+
flow_settings.name = f"Combined Flow {target_flow_id}"
|
|
101
|
+
return flow_settings
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _create_node_id_mapping(flow_graphs: Tuple[FlowGraph, ...]) -> Dict[Tuple[int, int], int]:
|
|
105
|
+
"""
|
|
106
|
+
Create a mapping from (flow_id, original_node_id) to new unique node IDs.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
flow_graphs: Flow graphs to process
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
Dict: Mapping from (flow_id, node_id) to new node ID
|
|
113
|
+
"""
|
|
114
|
+
node_id_mapping = {}
|
|
115
|
+
next_node_id = _get_next_available_node_id(flow_graphs)
|
|
116
|
+
|
|
117
|
+
for fg in flow_graphs:
|
|
118
|
+
for node in fg.nodes:
|
|
119
|
+
node_id_mapping[(fg.flow_id, node.node_id)] = next_node_id
|
|
120
|
+
next_node_id += 1
|
|
121
|
+
|
|
122
|
+
return node_id_mapping
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _get_next_available_node_id(flow_graphs: Tuple[FlowGraph, ...]) -> int:
|
|
126
|
+
"""
|
|
127
|
+
Find the next available node ID.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
flow_graphs: Flow graphs to examine
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
int: Next available node ID
|
|
134
|
+
"""
|
|
135
|
+
max_id = 0
|
|
136
|
+
for fg in flow_graphs:
|
|
137
|
+
for node in fg.nodes:
|
|
138
|
+
max_id = max(max_id, node.node_id)
|
|
139
|
+
return max_id + 1
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _add_nodes_to_combined_graph(
|
|
143
|
+
flow_graphs: Tuple[FlowGraph, ...],
|
|
144
|
+
combined_graph: FlowGraph,
|
|
145
|
+
node_id_mapping: Dict[Tuple[int, int], int],
|
|
146
|
+
target_flow_id: int
|
|
147
|
+
) -> None:
|
|
148
|
+
"""
|
|
149
|
+
Add all nodes from source graphs to the combined graph.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
flow_graphs: Source flow graphs
|
|
153
|
+
combined_graph: Target combined graph
|
|
154
|
+
node_id_mapping: Mapping of node IDs
|
|
155
|
+
target_flow_id: Target flow ID
|
|
156
|
+
"""
|
|
157
|
+
processed_nodes = set()
|
|
158
|
+
|
|
159
|
+
for fg in flow_graphs:
|
|
160
|
+
for node in fg.nodes:
|
|
161
|
+
# Skip if already processed
|
|
162
|
+
if (fg.flow_id, node.node_id) in processed_nodes:
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
# Generate new node ID
|
|
166
|
+
new_node_id = node_id_mapping[(fg.flow_id, node.node_id)]
|
|
167
|
+
|
|
168
|
+
# Create and update setting input
|
|
169
|
+
setting_input = _create_updated_setting_input(
|
|
170
|
+
node.setting_input,
|
|
171
|
+
new_node_id,
|
|
172
|
+
target_flow_id,
|
|
173
|
+
fg.flow_id,
|
|
174
|
+
node_id_mapping
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
# Add node to combined graph
|
|
178
|
+
_add_node_to_graph(combined_graph, new_node_id, target_flow_id, node.node_type, setting_input)
|
|
179
|
+
|
|
180
|
+
processed_nodes.add((fg.flow_id, node.node_id))
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _create_updated_setting_input(
|
|
184
|
+
original_setting_input: any,
|
|
185
|
+
new_node_id: int,
|
|
186
|
+
target_flow_id: int,
|
|
187
|
+
source_flow_id: int,
|
|
188
|
+
node_id_mapping: Dict[Tuple[int, int], int]
|
|
189
|
+
) -> any:
|
|
190
|
+
"""
|
|
191
|
+
Create an updated setting input with new node and flow IDs.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
original_setting_input: Original setting input
|
|
195
|
+
new_node_id: New node ID
|
|
196
|
+
target_flow_id: Target flow ID
|
|
197
|
+
source_flow_id: Source flow ID
|
|
198
|
+
node_id_mapping: Mapping of node IDs
|
|
199
|
+
|
|
200
|
+
Returns:
|
|
201
|
+
Updated setting input
|
|
202
|
+
"""
|
|
203
|
+
setting_input = deepcopy(original_setting_input)
|
|
204
|
+
|
|
205
|
+
# Update node ID
|
|
206
|
+
if hasattr(setting_input, 'node_id'):
|
|
207
|
+
setting_input.node_id = new_node_id
|
|
208
|
+
|
|
209
|
+
# Update flow ID
|
|
210
|
+
if hasattr(setting_input, 'flow_id'):
|
|
211
|
+
setting_input.flow_id = target_flow_id
|
|
212
|
+
|
|
213
|
+
# Update depending_on_id if present
|
|
214
|
+
if hasattr(setting_input, 'depending_on_id') and setting_input.depending_on_id != -1:
|
|
215
|
+
orig_depending_id = setting_input.depending_on_id
|
|
216
|
+
setting_input.depending_on_id = node_id_mapping.get((source_flow_id, orig_depending_id), -1)
|
|
217
|
+
|
|
218
|
+
# Update depending_on_ids list if present
|
|
219
|
+
if hasattr(setting_input, 'depending_on_ids'):
|
|
220
|
+
setting_input.depending_on_ids = [
|
|
221
|
+
node_id_mapping.get((source_flow_id, dep_id), -1)
|
|
222
|
+
for dep_id in setting_input.depending_on_ids
|
|
223
|
+
if dep_id != -1
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
return setting_input
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _add_node_to_graph(
|
|
230
|
+
graph: FlowGraph,
|
|
231
|
+
node_id: int,
|
|
232
|
+
flow_id: int,
|
|
233
|
+
node_type: str,
|
|
234
|
+
setting_input: any
|
|
235
|
+
) -> None:
|
|
236
|
+
"""
|
|
237
|
+
Add a node to the graph.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
graph: Target graph
|
|
241
|
+
node_id: Node ID
|
|
242
|
+
flow_id: Flow ID
|
|
243
|
+
node_type: Node type
|
|
244
|
+
setting_input: Setting input
|
|
245
|
+
"""
|
|
246
|
+
# Add node promise to graph
|
|
247
|
+
node_promise = input_schema.NodePromise(
|
|
248
|
+
node_id=node_id,
|
|
249
|
+
flow_id=flow_id,
|
|
250
|
+
node_type=node_type,
|
|
251
|
+
is_setup=True,
|
|
252
|
+
pos_x=getattr(setting_input, 'pos_x', 0),
|
|
253
|
+
pos_y=getattr(setting_input, 'pos_y', 0),
|
|
254
|
+
description=getattr(setting_input, 'description', '')
|
|
255
|
+
)
|
|
256
|
+
graph.add_node_promise(node_promise)
|
|
257
|
+
|
|
258
|
+
# Get node type-specific add method
|
|
259
|
+
add_method_name = f"add_{node_type}"
|
|
260
|
+
if hasattr(graph, add_method_name):
|
|
261
|
+
add_method = getattr(graph, add_method_name)
|
|
262
|
+
add_method(setting_input)
|
|
263
|
+
|
|
264
|
+
|
|
265
|
+
def _add_connections_to_combined_graph(
|
|
266
|
+
flow_graphs: Tuple[FlowGraph, ...],
|
|
267
|
+
combined_graph: FlowGraph,
|
|
268
|
+
node_id_mapping: Dict[Tuple[int, int], int]
|
|
269
|
+
) -> None:
|
|
270
|
+
"""
|
|
271
|
+
Add all connections from source graphs to the combined graph.
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
flow_graphs: Source flow graphs
|
|
275
|
+
combined_graph: Target combined graph
|
|
276
|
+
node_id_mapping: Mapping of node IDs
|
|
277
|
+
"""
|
|
278
|
+
for fg in flow_graphs:
|
|
279
|
+
for connection in fg.node_connections:
|
|
280
|
+
source_id, target_id = connection
|
|
281
|
+
new_source_id = node_id_mapping.get((fg.flow_id, source_id))
|
|
282
|
+
new_target_id = node_id_mapping.get((fg.flow_id, target_id))
|
|
283
|
+
|
|
284
|
+
if new_source_id is not None and new_target_id is not None:
|
|
285
|
+
input_type = _determine_connection_input_type(fg, source_id, target_id)
|
|
286
|
+
|
|
287
|
+
# Create connection in combined graph
|
|
288
|
+
node_connection = input_schema.NodeConnection.create_from_simple_input(
|
|
289
|
+
from_id=new_source_id,
|
|
290
|
+
to_id=new_target_id,
|
|
291
|
+
input_type=input_type
|
|
292
|
+
)
|
|
293
|
+
add_connection(combined_graph, node_connection)
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def _determine_connection_input_type(
|
|
297
|
+
flow_graph: FlowGraph,
|
|
298
|
+
source_id: int,
|
|
299
|
+
target_id: int
|
|
300
|
+
) -> str:
|
|
301
|
+
"""
|
|
302
|
+
Determine the input type for a connection.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
flow_graph: Source flow graph
|
|
306
|
+
source_id: Source node ID
|
|
307
|
+
target_id: Target node ID
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
str: Input type (main, left, right)
|
|
311
|
+
"""
|
|
312
|
+
from_node = flow_graph.get_node(source_id)
|
|
313
|
+
to_node = flow_graph.get_node(target_id)
|
|
314
|
+
|
|
315
|
+
if from_node and to_node:
|
|
316
|
+
input_types = to_node.get_input_type(from_node.node_id)
|
|
317
|
+
if input_types:
|
|
318
|
+
return input_types[0]
|
|
319
|
+
|
|
320
|
+
return "main"
|
|
@@ -146,6 +146,7 @@ class FlowNode:
|
|
|
146
146
|
self.node_settings.renew_schema = True
|
|
147
147
|
if hasattr(setting_input, 'cache_results'):
|
|
148
148
|
self.node_settings.cache_results = setting_input.cache_results
|
|
149
|
+
|
|
149
150
|
self.setting_input = setting_input
|
|
150
151
|
self.results.errors = None
|
|
151
152
|
self.add_lead_to_in_depend_source()
|
|
@@ -174,7 +175,7 @@ class FlowNode:
|
|
|
174
175
|
self.set_node_information()
|
|
175
176
|
if self.node_type == 'manual_input' and isinstance(self._setting_input, input_schema.NodeManualInput):
|
|
176
177
|
if self.hash != self.calculate_hash(setting_input) or not self.node_stats.has_run:
|
|
177
|
-
self.function = self.function.__class__(setting_input.
|
|
178
|
+
self.function = self.function.__class__(setting_input.raw_data_format)
|
|
178
179
|
self.reset()
|
|
179
180
|
self.get_predicted_schema()
|
|
180
181
|
elif self._setting_input is not None:
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Any, Dict, List, Optional, TYPE_CHECKING, Union
|
|
2
2
|
from pydantic import BaseModel, field_validator, ConfigDict
|
|
3
3
|
import polars as pl
|
|
4
|
-
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import
|
|
4
|
+
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
5
5
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
6
6
|
from flowfile_core.schemas.input_schema import MinimalFieldInfo
|
|
7
7
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.polars_type import PlType
|
|
@@ -56,7 +56,7 @@ class JsonSchema(BaseModel):
|
|
|
56
56
|
dtype = 'string'
|
|
57
57
|
else:
|
|
58
58
|
dtype = type_mapping.get(self.type[0] if isinstance(self.type, list) else self.type, 'string')
|
|
59
|
-
return
|
|
59
|
+
return cast_str_to_polars_type(dtype)
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
class AirbyteProperty(BaseModel):
|
|
@@ -4,7 +4,7 @@ from flowfile_core.configs import logger
|
|
|
4
4
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn
|
|
5
5
|
from flowfile_core.schemas.input_schema import MinimalFieldInfo, DatabaseSettings
|
|
6
6
|
from sqlalchemy import Engine, inspect, create_engine, text
|
|
7
|
-
from flowfile_core.
|
|
7
|
+
from flowfile_core.secret_manager.secret_manager import get_encrypted_secret, decrypt_secret
|
|
8
8
|
|
|
9
9
|
from flowfile_core.flowfile.sources.external_sources.base_class import ExternalDataSource
|
|
10
10
|
from flowfile_core.flowfile.sources.external_sources.sql_source.utils import get_polars_type, construct_sql_uri
|
flowfile_core/flowfile/utils.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
|
-
from typing import List
|
|
2
1
|
import os
|
|
3
|
-
import shutil
|
|
4
2
|
import hashlib
|
|
5
|
-
from datetime import datetime
|
|
6
3
|
import json
|
|
4
|
+
import polars as pl
|
|
5
|
+
import shutil
|
|
6
|
+
|
|
7
|
+
from datetime import datetime, date, time
|
|
8
|
+
from typing import List
|
|
9
|
+
from decimal import Decimal
|
|
10
|
+
|
|
11
|
+
from flowfile_core.flowfile.flow_data_engine.utils import standardize_col_dtype
|
|
12
|
+
from flowfile_core.schemas import input_schema
|
|
7
13
|
|
|
8
14
|
|
|
9
15
|
def generate_sha256_hash(data: bytes):
|
|
@@ -26,8 +32,16 @@ def snake_case_to_camel_case(text: str) -> str:
|
|
|
26
32
|
def json_default(val):
|
|
27
33
|
if isinstance(val, datetime):
|
|
28
34
|
return val.isoformat(timespec='microseconds')
|
|
35
|
+
elif isinstance(val, date):
|
|
36
|
+
return val.isoformat()
|
|
37
|
+
elif isinstance(val, time):
|
|
38
|
+
return val.isoformat()
|
|
29
39
|
elif hasattr(val, '__dict__'):
|
|
30
40
|
return val.__dict__
|
|
41
|
+
elif isinstance(val, Decimal):
|
|
42
|
+
if val.as_integer_ratio()[1] == 1:
|
|
43
|
+
return int(val)
|
|
44
|
+
return float(val)
|
|
31
45
|
else:
|
|
32
46
|
raise Exception('Value is not serializable')
|
|
33
47
|
|
|
@@ -104,3 +118,20 @@ def batch_generator(input_list: List, batch_size: int = 10000):
|
|
|
104
118
|
input_list = []
|
|
105
119
|
run = False
|
|
106
120
|
|
|
121
|
+
|
|
122
|
+
def _handle_raw_data(node_manual_input: input_schema.NodeManualInput):
|
|
123
|
+
"""Ensure compatibility with the new typed raw data and the old dict form data type"""
|
|
124
|
+
if (not (hasattr(node_manual_input, "raw_data_format") and node_manual_input.raw_data_format)
|
|
125
|
+
and (hasattr(node_manual_input, 'raw_data') and node_manual_input.raw_data)):
|
|
126
|
+
values = [standardize_col_dtype([vv for vv in c]) for c in zip(*(r.values()
|
|
127
|
+
for r in node_manual_input.raw_data))]
|
|
128
|
+
data_types = (pl.DataType.from_python(type(next((v for v in column_values), None))) for column_values in values)
|
|
129
|
+
_columns = [input_schema.MinimalFieldInfo(name=c, data_type=str(next(data_types))) for c in
|
|
130
|
+
node_manual_input.raw_data[0].keys()]
|
|
131
|
+
|
|
132
|
+
node_manual_input.raw_data_format = input_schema.RawData(columns=_columns, data=values)
|
|
133
|
+
elif ((hasattr(node_manual_input, "raw_data_format") and node_manual_input.raw_data_format)
|
|
134
|
+
and not (hasattr(node_manual_input, 'raw_data') and node_manual_input.raw_data)):
|
|
135
|
+
node_manual_input.raw_data = [{c.name: node_manual_input.raw_data_format.data[ci][ri] for ci, c in
|
|
136
|
+
enumerate(node_manual_input.raw_data_format.columns)}
|
|
137
|
+
for ri in range(len(node_manual_input.raw_data_format.data[0]))]
|
flowfile_core/main.py
CHANGED
|
@@ -8,7 +8,8 @@ from fastapi import FastAPI
|
|
|
8
8
|
from fastapi.middleware.cors import CORSMiddleware
|
|
9
9
|
|
|
10
10
|
from flowfile_core import ServerRun
|
|
11
|
-
from flowfile_core.configs.settings import SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL
|
|
11
|
+
from flowfile_core.configs.settings import (SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL,
|
|
12
|
+
OFFLOAD_TO_WORKER)
|
|
12
13
|
|
|
13
14
|
from flowfile_core.routes.auth import router as auth_router
|
|
14
15
|
from flowfile_core.routes.secrets import router as secrets_router
|
|
@@ -107,7 +108,6 @@ def run(host: str = None, port: int = None):
|
|
|
107
108
|
host = SERVER_HOST
|
|
108
109
|
if port is None:
|
|
109
110
|
port = SERVER_PORT
|
|
110
|
-
|
|
111
111
|
print(f"Starting server on {host}:{port}")
|
|
112
112
|
print(f"Worker configured at {WORKER_URL} (host: {WORKER_HOST}, port: {WORKER_PORT})")
|
|
113
113
|
|
|
@@ -120,7 +120,6 @@ def run(host: str = None, port: int = None):
|
|
|
120
120
|
host=host,
|
|
121
121
|
port=port,
|
|
122
122
|
loop="asyncio",
|
|
123
|
-
log_level="warning",
|
|
124
123
|
)
|
|
125
124
|
server = uvicorn.Server(config)
|
|
126
125
|
server_instance = server # Store server instance globally
|
flowfile_core/routes/secrets.py
CHANGED
|
@@ -10,7 +10,7 @@ from flowfile_core.auth.jwt import get_current_active_user
|
|
|
10
10
|
from flowfile_core.auth.models import Secret, SecretInput
|
|
11
11
|
from flowfile_core.database import models as db_models
|
|
12
12
|
from flowfile_core.database.connection import get_db
|
|
13
|
-
from flowfile_core.
|
|
13
|
+
from flowfile_core.secret_manager.secret_manager import encrypt_secret, store_secret, delete_secret as delete_secret_action
|
|
14
14
|
|
|
15
15
|
router = APIRouter(dependencies=[Depends(get_current_active_user)])
|
|
16
16
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Optional, Literal
|
|
1
|
+
from typing import List, Optional, Literal, Iterator
|
|
2
2
|
from flowfile_core.schemas import transform_schema
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
import os
|
|
@@ -15,6 +15,7 @@ InputConnectionClass = Literal['input-0', 'input-1', 'input-2', 'input-3', 'inpu
|
|
|
15
15
|
|
|
16
16
|
InputType = Literal["main", "left", "right"]
|
|
17
17
|
|
|
18
|
+
|
|
18
19
|
class NewDirectory(BaseModel):
|
|
19
20
|
source_path: str
|
|
20
21
|
dir_name: str
|
|
@@ -60,7 +61,7 @@ class ReceivedTableBase(BaseModel):
|
|
|
60
61
|
return self.path
|
|
61
62
|
|
|
62
63
|
def set_absolute_filepath(self):
|
|
63
|
-
base_path = Path(self.path)
|
|
64
|
+
base_path = Path(self.path).expanduser()
|
|
64
65
|
# Check if the path is relative, resolve it with the current working directory
|
|
65
66
|
if not base_path.is_absolute():
|
|
66
67
|
base_path = Path.cwd() / base_path
|
|
@@ -96,7 +97,7 @@ class ReceivedJsonTable(ReceivedCsvTable):
|
|
|
96
97
|
pass
|
|
97
98
|
|
|
98
99
|
|
|
99
|
-
class ReceivedParquetTable(
|
|
100
|
+
class ReceivedParquetTable(ReceivedTableBase):
|
|
100
101
|
file_type: str = 'parquet'
|
|
101
102
|
|
|
102
103
|
|
|
@@ -246,8 +247,14 @@ class NodeDatasource(NodeBase):
|
|
|
246
247
|
file_ref: str = None
|
|
247
248
|
|
|
248
249
|
|
|
250
|
+
class RawData(BaseModel):
|
|
251
|
+
columns: List[MinimalFieldInfo] = None
|
|
252
|
+
data: List[List] # List of list where each inner list is a column of data. This ensures more efficient storage
|
|
253
|
+
|
|
254
|
+
|
|
249
255
|
class NodeManualInput(NodeBase):
|
|
250
|
-
raw_data: List = None
|
|
256
|
+
raw_data: Optional[List] = None
|
|
257
|
+
raw_data_format: Optional[RawData] = None
|
|
251
258
|
|
|
252
259
|
|
|
253
260
|
class NodeRead(NodeBase):
|
|
@@ -341,15 +348,6 @@ class SampleUsers(ExternalSource):
|
|
|
341
348
|
size: int = 100
|
|
342
349
|
|
|
343
350
|
|
|
344
|
-
class GoogleSheet(ExternalSource):
|
|
345
|
-
GOOGLE_SHEET: bool
|
|
346
|
-
class_name: str = "google_sheet"
|
|
347
|
-
access_token: SecretStr = None
|
|
348
|
-
sheet_id: str
|
|
349
|
-
worksheet_name: str
|
|
350
|
-
sheet_name: str
|
|
351
|
-
|
|
352
|
-
|
|
353
351
|
class AirbyteReader(AirbyteConfig):
|
|
354
352
|
class_name: Optional[str] = "airbyte_reader"
|
|
355
353
|
fields: Optional[List[MinimalFieldInfo]] = None
|
|
@@ -362,7 +360,7 @@ class AccessToken(BaseModel):
|
|
|
362
360
|
|
|
363
361
|
class NodeExternalSource(NodeBase):
|
|
364
362
|
identifier: str
|
|
365
|
-
source_settings: SampleUsers
|
|
363
|
+
source_settings: SampleUsers
|
|
366
364
|
|
|
367
365
|
|
|
368
366
|
class NodeAirbyteReader(NodeExternalSource):
|