Flowfile 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of Flowfile might be problematic. Click here for more details.
- flowfile/__init__.py +4 -3
- flowfile/api.py +1 -1
- flowfile/web/static/assets/{CloudConnectionManager-c20a740f.js → CloudConnectionManager-d7c2c028.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-960b400a.js → CloudStorageReader-d467329f.js} +11 -78
- flowfile/web/static/assets/{CloudStorageWriter-e3decbdd.js → CloudStorageWriter-071b8b00.js} +12 -79
- flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
- flowfile/web/static/assets/ContextMenu-2dea5e27.js +41 -0
- flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
- flowfile/web/static/assets/ContextMenu-785554c4.js +41 -0
- flowfile/web/static/assets/ContextMenu-a51e19ea.js +41 -0
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
- flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
- flowfile/web/static/assets/{CrossJoin-d67e2405.js → CrossJoin-cf68ec7a.js} +14 -84
- flowfile/web/static/assets/{DatabaseConnectionSettings-a81e0f7e.js → DatabaseConnectionSettings-435c5dd8.js} +3 -3
- flowfile/web/static/assets/{DatabaseManager-9ea35e84.js → DatabaseManager-349e33a8.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-9578bfa5.js → DatabaseReader-8075bd28.js} +14 -114
- flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
- flowfile/web/static/assets/{DatabaseWriter-19531098.js → DatabaseWriter-3e2dda89.js} +13 -74
- flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
- flowfile/web/static/assets/ExploreData-76ec698c.js +192 -0
- flowfile/web/static/assets/{ExternalSource-2297ef96.js → ExternalSource-609a265c.js} +8 -79
- flowfile/web/static/assets/{Filter-f211c03a.js → Filter-97cff793.js} +12 -85
- flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
- flowfile/web/static/assets/{Formula-4207ea31.js → Formula-09de0ec9.js} +18 -85
- flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
- flowfile/web/static/assets/{FuzzyMatch-bf120df0.js → FuzzyMatch-bdf70248.js} +16 -87
- flowfile/web/static/assets/{GraphSolver-5bb7497a.js → GraphSolver-0b5a0e05.js} +13 -159
- flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
- flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
- flowfile/web/static/assets/{GroupBy-92c81b65.js → GroupBy-eaddadde.js} +12 -75
- flowfile/web/static/assets/{Join-4e49a274.js → Join-3313371b.js} +15 -85
- flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
- flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
- flowfile/web/static/assets/{ManualInput-90998ae8.js → ManualInput-e8bfc0be.js} +11 -82
- flowfile/web/static/assets/{Output-81e3e917.js → Output-7303bb09.js} +13 -243
- flowfile/web/static/assets/Output-ddc9079f.css +37 -0
- flowfile/web/static/assets/{Pivot-a3419842.js → Pivot-3b1c54ef.js} +14 -138
- flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
- flowfile/web/static/assets/PivotValidation-3bb36c8f.js +61 -0
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
- flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
- flowfile/web/static/assets/PivotValidation-eaa819c0.js +61 -0
- flowfile/web/static/assets/{PolarsCode-72710deb.js → PolarsCode-aa12e25d.js} +13 -80
- flowfile/web/static/assets/Read-6b17491f.css +62 -0
- flowfile/web/static/assets/Read-a2bfc618.js +243 -0
- flowfile/web/static/assets/RecordCount-aa0dc082.js +53 -0
- flowfile/web/static/assets/{RecordId-10baf191.js → RecordId-48ee1a3b.js} +8 -80
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
- flowfile/web/static/assets/SQLQueryComponent-e149dbf2.js +38 -0
- flowfile/web/static/assets/{Sample-3ed9a0ae.js → Sample-f06cb97a.js} +8 -77
- flowfile/web/static/assets/{SecretManager-0d49c0e8.js → SecretManager-37f34886.js} +2 -2
- flowfile/web/static/assets/{Select-8a02a0b3.js → Select-b60e6c47.js} +11 -85
- flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
- flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
- flowfile/web/static/assets/SettingsSection-70e5a7b1.js +53 -0
- flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
- flowfile/web/static/assets/{SettingsSection-4c0f45f5.js → SettingsSection-75b6cf4f.js} +2 -40
- flowfile/web/static/assets/SettingsSection-e57a672e.js +45 -0
- flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
- flowfile/web/static/assets/{Sort-f55c9f9d.js → Sort-51b1ee4d.js} +12 -97
- flowfile/web/static/assets/{TextToRows-5dbc2145.js → TextToRows-26835f8f.js} +14 -83
- flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
- flowfile/web/static/assets/{UnavailableFields-a1768e52.js → UnavailableFields-88a4cd0c.js} +2 -2
- flowfile/web/static/assets/Union-4d0088eb.js +77 -0
- flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
- flowfile/web/static/assets/{Unique-46b250da.js → Unique-7d554a62.js} +22 -91
- flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
- flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
- flowfile/web/static/assets/{Unpivot-25ac84cc.js → Unpivot-4668595c.js} +12 -166
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
- flowfile/web/static/assets/UnpivotValidation-d4f0e0e8.js +51 -0
- flowfile/web/static/assets/{ExploreData-40476474.js → VueGraphicWalker-5324d566.js} +4 -264
- flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
- flowfile/web/static/assets/{api-6ef0dcef.js → api-271ed117.js} +1 -1
- flowfile/web/static/assets/{api-a0abbdc7.js → api-31e4fea6.js} +1 -1
- flowfile/web/static/assets/{designer-186f2e71.css → designer-091bdc3f.css} +819 -184
- flowfile/web/static/assets/{designer-13eabd83.js → designer-bf3d9487.js} +2214 -680
- flowfile/web/static/assets/{documentation-b87e7f6f.js → documentation-4d0a1cea.js} +1 -1
- flowfile/web/static/assets/{dropDown-13564764.js → dropDown-025888df.js} +1 -1
- flowfile/web/static/assets/{fullEditor-fd2cd6f9.js → fullEditor-1df991ec.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-71e11604.js → genericNodeSettings-d3b2b2ac.js} +3 -3
- flowfile/web/static/assets/{index-f6c15e76.js → index-d0518598.js} +210 -31
- flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
- flowfile/web/static/assets/outputCsv-d8457527.js +86 -0
- flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
- flowfile/web/static/assets/outputExcel-be89153e.js +56 -0
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
- flowfile/web/static/assets/outputParquet-fabb445a.js +31 -0
- flowfile/web/static/assets/readCsv-bca3ed53.css +52 -0
- flowfile/web/static/assets/readCsv-e8359522.js +178 -0
- flowfile/web/static/assets/readExcel-dabaf51b.js +203 -0
- flowfile/web/static/assets/readExcel-e1b381ea.css +64 -0
- flowfile/web/static/assets/readParquet-cee068e2.css +19 -0
- flowfile/web/static/assets/readParquet-e0771ef2.js +26 -0
- flowfile/web/static/assets/{secretApi-dd636aa2.js → secretApi-ce823eee.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-af36165e.js → selectDynamic-5476546e.js} +7 -7
- flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
- flowfile/web/static/assets/{vue-codemirror.esm-2847001e.js → vue-codemirror.esm-9ed00d50.js} +29 -33
- flowfile/web/static/assets/{vue-content-loader.es-0371da73.js → vue-content-loader.es-7bca2d9b.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/METADATA +2 -1
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/RECORD +147 -117
- flowfile_core/configs/flow_logger.py +5 -13
- flowfile_core/configs/node_store/nodes.py +303 -44
- flowfile_core/configs/settings.py +6 -3
- flowfile_core/database/connection.py +5 -21
- flowfile_core/fileExplorer/funcs.py +239 -121
- flowfile_core/flowfile/code_generator/code_generator.py +36 -0
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +60 -80
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +61 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +44 -3
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/models.py +3 -3
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +33 -10
- flowfile_core/flowfile/flow_graph.py +223 -118
- flowfile_core/flowfile/flow_node/flow_node.py +56 -19
- flowfile_core/flowfile/flow_node/models.py +0 -2
- flowfile_core/flowfile/flow_node/schema_callback.py +138 -43
- flowfile_core/flowfile/graph_tree/graph_tree.py +250 -0
- flowfile_core/flowfile/graph_tree/models.py +15 -0
- flowfile_core/flowfile/handler.py +22 -3
- flowfile_core/flowfile/manage/compatibility_enhancements.py +1 -1
- flowfile_core/flowfile/{flow_data_engine/fuzzy_matching/settings_validator.py → schema_callbacks.py} +72 -16
- flowfile_core/flowfile/setting_generator/settings.py +2 -2
- flowfile_core/flowfile/util/execution_orderer.py +9 -0
- flowfile_core/flowfile/util/node_skipper.py +8 -0
- flowfile_core/main.py +4 -1
- flowfile_core/routes/routes.py +59 -10
- flowfile_core/schemas/input_schema.py +0 -1
- flowfile_core/schemas/output_model.py +5 -2
- flowfile_core/schemas/schemas.py +48 -3
- flowfile_core/schemas/transform_schema.py +28 -38
- flowfile_frame/__init__.py +1 -4
- flowfile_frame/flow_frame.py +33 -4
- flowfile_frame/flow_frame.pyi +2 -0
- flowfile_worker/__init__.py +6 -35
- flowfile_worker/funcs.py +7 -3
- flowfile_worker/main.py +5 -2
- flowfile_worker/models.py +3 -1
- flowfile_worker/routes.py +47 -5
- shared/__init__.py +15 -0
- shared/storage_config.py +243 -0
- flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
- flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
- flowfile/web/static/assets/Read-80dc1675.css +0 -197
- flowfile/web/static/assets/Read-c4059daf.js +0 -701
- flowfile/web/static/assets/RecordCount-c2b5e095.js +0 -122
- flowfile/web/static/assets/Union-f2aefdc9.js +0 -146
- flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
- flowfile/web/static/assets/nodeTitle-988d9efe.js +0 -227
- flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
- flowfile_worker/polars_fuzzy_match/matcher.py +0 -435
- flowfile_worker/polars_fuzzy_match/models.py +0 -36
- flowfile_worker/polars_fuzzy_match/pre_process.py +0 -213
- flowfile_worker/polars_fuzzy_match/process.py +0 -86
- flowfile_worker/polars_fuzzy_match/utils.py +0 -50
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/LICENSE +0 -0
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/WHEEL +0 -0
- {flowfile-0.3.8.dist-info → flowfile-0.3.10.dist-info}/entry_points.txt +0 -0
- {flowfile_worker/polars_fuzzy_match → flowfile_core/flowfile/graph_tree}/__init__.py +0 -0
|
@@ -4,6 +4,7 @@ from typing import Callable, Iterable
|
|
|
4
4
|
from functools import wraps
|
|
5
5
|
from flowfile_core.schemas.output_model import NodeData
|
|
6
6
|
from flowfile_core.flowfile.setting_generator.setting_generator import SettingGenerator, SettingUpdator
|
|
7
|
+
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
7
8
|
|
|
8
9
|
setting_generator = SettingGenerator()
|
|
9
10
|
setting_updator = SettingUpdator()
|
|
@@ -55,7 +56,6 @@ def cross_join(node_data: "NodeData") -> NodeData:
|
|
|
55
56
|
ji = transform_schema.CrossJoinInput(left_select=node_data.main_input.columns,
|
|
56
57
|
right_select=node_data.right_input.columns)
|
|
57
58
|
ji.auto_rename()
|
|
58
|
-
print(ji)
|
|
59
59
|
node_data.setting_input = input_schema.NodeCrossJoin(flow_id=node_data.flow_id,
|
|
60
60
|
node_id=node_data.node_id,
|
|
61
61
|
cross_join_input=ji)
|
|
@@ -135,7 +135,7 @@ def cross_join(node_data: NodeData):
|
|
|
135
135
|
|
|
136
136
|
|
|
137
137
|
def check_if_fuzzy_match_is_valid(left_columns: Iterable[str], right_columns: Iterable[str],
|
|
138
|
-
fuzzy_map:
|
|
138
|
+
fuzzy_map: FuzzyMapping) -> bool:
|
|
139
139
|
if fuzzy_map.left_col not in left_columns:
|
|
140
140
|
return False
|
|
141
141
|
if fuzzy_map.right_col not in right_columns:
|
|
@@ -2,6 +2,15 @@ from typing import List, Dict, Set
|
|
|
2
2
|
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
3
3
|
from flowfile_core.configs import logger
|
|
4
4
|
from collections import deque, defaultdict
|
|
5
|
+
from flowfile_core.flowfile.util.node_skipper import determine_nodes_to_skip
|
|
6
|
+
|
|
7
|
+
def compute_execution_plan(nodes: List[FlowNode], flow_starts: List[FlowNode] = None):
|
|
8
|
+
""" Computes the execution order after finding the nodes to skip on the execution step."""
|
|
9
|
+
skip_nodes = determine_nodes_to_skip(nodes=nodes)
|
|
10
|
+
computed_execution_order = determine_execution_order(all_nodes=[node for node in nodes if node not in skip_nodes],
|
|
11
|
+
flow_starts=flow_starts)
|
|
12
|
+
return skip_nodes, computed_execution_order
|
|
13
|
+
|
|
5
14
|
|
|
6
15
|
|
|
7
16
|
def determine_execution_order(all_nodes: List[FlowNode], flow_starts: List[FlowNode] = None) -> List[FlowNode]:
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
from flowfile_core.flowfile.flow_node.flow_node import FlowNode
|
|
3
|
+
|
|
4
|
+
def determine_nodes_to_skip(nodes : List[FlowNode]) -> List[FlowNode]:
|
|
5
|
+
""" Finds nodes to skip on the execution step. """
|
|
6
|
+
skip_nodes = [node for node in nodes if not node.is_correct]
|
|
7
|
+
skip_nodes.extend([lead_to_node for node in skip_nodes for lead_to_node in node.leads_to_nodes])
|
|
8
|
+
return skip_nodes
|
flowfile_core/main.py
CHANGED
|
@@ -7,6 +7,8 @@ import uvicorn
|
|
|
7
7
|
from fastapi import FastAPI
|
|
8
8
|
from fastapi.middleware.cors import CORSMiddleware
|
|
9
9
|
|
|
10
|
+
from shared.storage_config import storage
|
|
11
|
+
|
|
10
12
|
from flowfile_core import ServerRun
|
|
11
13
|
from flowfile_core.configs.settings import (SERVER_HOST, SERVER_PORT, WORKER_HOST, WORKER_PORT, WORKER_URL,)
|
|
12
14
|
|
|
@@ -18,13 +20,13 @@ from flowfile_core.routes.logs import router as logs_router
|
|
|
18
20
|
from flowfile_core.routes.cloud_connections import router as cloud_connections_router
|
|
19
21
|
|
|
20
22
|
from flowfile_core.configs.flow_logger import clear_all_flow_logs
|
|
23
|
+
storage.cleanup_directories()
|
|
21
24
|
|
|
22
25
|
os.environ["FLOWFILE_MODE"] = "electron"
|
|
23
26
|
|
|
24
27
|
should_exit = False
|
|
25
28
|
server_instance = None
|
|
26
29
|
|
|
27
|
-
|
|
28
30
|
@asynccontextmanager
|
|
29
31
|
async def shutdown_handler(app: FastAPI):
|
|
30
32
|
"""Handles the graceful startup and shutdown of the FastAPI application.
|
|
@@ -79,6 +81,7 @@ app.include_router(secrets_router, prefix="/secrets", tags=["secrets"])
|
|
|
79
81
|
app.include_router(cloud_connections_router, prefix="/cloud_connections", tags=["cloud_connections"])
|
|
80
82
|
|
|
81
83
|
|
|
84
|
+
|
|
82
85
|
@app.post("/shutdown")
|
|
83
86
|
async def shutdown():
|
|
84
87
|
"""An API endpoint to gracefully shut down the server.
|
flowfile_core/routes/routes.py
CHANGED
|
@@ -26,7 +26,7 @@ from flowfile_core.configs.node_store import nodes
|
|
|
26
26
|
from flowfile_core.configs.settings import IS_RUNNING_IN_DOCKER
|
|
27
27
|
# File handling
|
|
28
28
|
from flowfile_core.fileExplorer.funcs import (
|
|
29
|
-
|
|
29
|
+
SecureFileExplorer,
|
|
30
30
|
FileInfo,
|
|
31
31
|
get_files_from_directory
|
|
32
32
|
)
|
|
@@ -39,9 +39,11 @@ from flowfile_core.flowfile.extensions import get_instant_func_results
|
|
|
39
39
|
from flowfile_core.flowfile.sources.external_sources.sql_source.sql_source import create_sql_source_from_db_settings
|
|
40
40
|
from flowfile_core.run_lock import get_flow_run_lock
|
|
41
41
|
# Schema and models
|
|
42
|
+
|
|
43
|
+
from shared.storage_config import storage
|
|
42
44
|
from flowfile_core.schemas import input_schema, schemas, output_model
|
|
43
45
|
from flowfile_core.utils import excel_file_manager
|
|
44
|
-
from flowfile_core.utils.fileManager import create_dir
|
|
46
|
+
from flowfile_core.utils.fileManager import create_dir
|
|
45
47
|
from flowfile_core.utils.utils import camel_case_to_snake_case
|
|
46
48
|
from flowfile_core import flow_file_handler
|
|
47
49
|
from flowfile_core.flowfile.database_connection_manager.db_connections import (store_database_connection,
|
|
@@ -54,7 +56,10 @@ from flowfile_core.database.connection import get_db
|
|
|
54
56
|
router = APIRouter(dependencies=[Depends(get_current_active_user)])
|
|
55
57
|
|
|
56
58
|
# Initialize services
|
|
57
|
-
file_explorer =
|
|
59
|
+
file_explorer = SecureFileExplorer(
|
|
60
|
+
start_path=storage.user_data_directory,
|
|
61
|
+
sandbox_root=storage.user_data_directory
|
|
62
|
+
)
|
|
58
63
|
|
|
59
64
|
|
|
60
65
|
def get_node_model(setting_name_ref: str):
|
|
@@ -148,7 +153,7 @@ async def get_directory_contents(directory: str, file_types: List[str] = None,
|
|
|
148
153
|
Returns:
|
|
149
154
|
A list of `FileInfo` objects representing the directory's contents.
|
|
150
155
|
"""
|
|
151
|
-
directory_explorer =
|
|
156
|
+
directory_explorer = SecureFileExplorer(directory, storage.user_data_directory)
|
|
152
157
|
try:
|
|
153
158
|
return directory_explorer.list_contents(show_hidden=include_hidden, file_types=file_types)
|
|
154
159
|
except Exception as e:
|
|
@@ -198,6 +203,24 @@ async def get_active_flow_file_sessions() -> List[schemas.FlowSettings]:
|
|
|
198
203
|
return [flf.flow_settings for flf in flow_file_handler.flowfile_flows]
|
|
199
204
|
|
|
200
205
|
|
|
206
|
+
@router.post("/node/trigger_fetch_data", tags=['editor'])
|
|
207
|
+
async def trigger_fetch_node_data(flow_id: int, node_id: int, background_tasks: BackgroundTasks):
|
|
208
|
+
"""Fetches and refreshes the data for a specific node."""
|
|
209
|
+
flow = flow_file_handler.get_flow(flow_id)
|
|
210
|
+
lock = get_flow_run_lock(flow_id)
|
|
211
|
+
async with lock:
|
|
212
|
+
if flow.flow_settings.is_running:
|
|
213
|
+
raise HTTPException(422, 'Flow is already running')
|
|
214
|
+
try:
|
|
215
|
+
flow.validate_if_node_can_be_fetched(node_id)
|
|
216
|
+
except Exception as e:
|
|
217
|
+
raise HTTPException(422, str(e))
|
|
218
|
+
background_tasks.add_task(flow.trigger_fetch_node, node_id)
|
|
219
|
+
return JSONResponse(content={"message": "Data started",
|
|
220
|
+
"flow_id": flow_id,
|
|
221
|
+
"node_id": node_id}, status_code=status.HTTP_200_OK)
|
|
222
|
+
|
|
223
|
+
|
|
201
224
|
@router.post('/flow/run/', tags=['editor'])
|
|
202
225
|
async def run_flow(flow_id: int, background_tasks: BackgroundTasks) -> JSONResponse:
|
|
203
226
|
"""Executes a flow in a background task.
|
|
@@ -228,6 +251,16 @@ def cancel_flow(flow_id: int):
|
|
|
228
251
|
flow.cancel()
|
|
229
252
|
|
|
230
253
|
|
|
254
|
+
@router.post("/flow/apply_standard_layout/", tags=["editor"])
|
|
255
|
+
def apply_standard_layout(flow_id: int):
|
|
256
|
+
flow = flow_file_handler.get_flow(flow_id)
|
|
257
|
+
if not flow:
|
|
258
|
+
raise HTTPException(status_code=404, detail="Flow not found")
|
|
259
|
+
if flow.flow_settings.is_running:
|
|
260
|
+
raise HTTPException(422, "Flow is running")
|
|
261
|
+
flow.apply_layout()
|
|
262
|
+
|
|
263
|
+
|
|
231
264
|
@router.get('/flow/run_status/', tags=['editor'],
|
|
232
265
|
response_model=output_model.RunInformation)
|
|
233
266
|
def get_run_status(flow_id: int, response: Response):
|
|
@@ -238,10 +271,12 @@ def get_run_status(flow_id: int, response: Response):
|
|
|
238
271
|
flow = flow_file_handler.get_flow(flow_id)
|
|
239
272
|
if not flow:
|
|
240
273
|
raise HTTPException(status_code=404, detail="Flow not found")
|
|
274
|
+
if flow.latest_run_info is None:
|
|
275
|
+
raise HTTPException(status_code=404, detail="No run information available")
|
|
241
276
|
if flow.flow_settings.is_running:
|
|
242
277
|
response.status_code = status.HTTP_202_ACCEPTED
|
|
243
|
-
|
|
244
|
-
|
|
278
|
+
else:
|
|
279
|
+
response.status_code = status.HTTP_200_OK
|
|
245
280
|
return flow.get_run_info()
|
|
246
281
|
|
|
247
282
|
|
|
@@ -439,11 +474,24 @@ def get_generated_code(flow_id: int) -> str:
|
|
|
439
474
|
|
|
440
475
|
|
|
441
476
|
@router.post('/editor/create_flow/', tags=['editor'])
|
|
442
|
-
def create_flow(flow_path: str):
|
|
477
|
+
def create_flow(flow_path: str = None, name: str = None):
|
|
443
478
|
"""Creates a new, empty flow file at the specified path and registers a session for it."""
|
|
444
|
-
flow_path
|
|
445
|
-
|
|
446
|
-
|
|
479
|
+
if flow_path is not None and name is None:
|
|
480
|
+
name = Path(flow_path).stem
|
|
481
|
+
elif flow_path is not None and name is not None:
|
|
482
|
+
if name not in flow_path and flow_path.endswith(".flowfile"):
|
|
483
|
+
raise HTTPException(422, 'The name must be part of the flow path when a full path is provided')
|
|
484
|
+
elif name in flow_path and not flow_path.endswith(".flowfile"):
|
|
485
|
+
flow_path = str(Path(flow_path) / (name + ".flowfile"))
|
|
486
|
+
elif name not in flow_path and name.endswith(".flowfile"):
|
|
487
|
+
flow_path = str(Path(flow_path) / name)
|
|
488
|
+
elif name not in flow_path and not name.endswith(".flowfile"):
|
|
489
|
+
flow_path = str(Path(flow_path) / (name + ".flowfile"))
|
|
490
|
+
if flow_path is not None:
|
|
491
|
+
flow_path_ref = Path(flow_path)
|
|
492
|
+
if not flow_path_ref.parent.exists():
|
|
493
|
+
raise HTTPException(422, 'The directory does not exist')
|
|
494
|
+
return flow_file_handler.add_flow(name=name, flow_path=flow_path)
|
|
447
495
|
|
|
448
496
|
|
|
449
497
|
@router.post('/editor/close_flow/', tags=['editor'])
|
|
@@ -471,6 +519,7 @@ def add_generic_settings(input_data: Dict[str, Any], node_type: str, current_use
|
|
|
471
519
|
add_func = getattr(flow, 'add_' + node_type)
|
|
472
520
|
parsed_input = None
|
|
473
521
|
setting_name_ref = 'node' + node_type.replace('_', '')
|
|
522
|
+
|
|
474
523
|
if add_func is None:
|
|
475
524
|
raise HTTPException(404, 'could not find the function')
|
|
476
525
|
try:
|
|
@@ -196,7 +196,6 @@ class NodeBase(BaseModel):
|
|
|
196
196
|
user_id: Optional[int] = None
|
|
197
197
|
is_flow_output: Optional[bool] = False
|
|
198
198
|
|
|
199
|
-
|
|
200
199
|
class NodeSingleInput(NodeBase):
|
|
201
200
|
"""A base model for any node that takes a single data input."""
|
|
202
201
|
depending_on_id: Optional[int] = -1
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Dict, Optional, Any
|
|
1
|
+
from typing import List, Dict, Optional, Any, Literal
|
|
2
2
|
from pydantic import BaseModel, Field
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
import time
|
|
@@ -21,10 +21,11 @@ class RunInformation(BaseModel):
|
|
|
21
21
|
flow_id: int
|
|
22
22
|
start_time: Optional[datetime] = Field(default_factory=datetime.now)
|
|
23
23
|
end_time: Optional[datetime] = None
|
|
24
|
-
success: bool
|
|
24
|
+
success: Optional[bool] = None
|
|
25
25
|
nodes_completed: int = 0
|
|
26
26
|
number_of_nodes: int = 0
|
|
27
27
|
node_step_result: List[NodeResult]
|
|
28
|
+
run_type: Literal["fetch_one", "full_run"]
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class BaseItem(BaseModel):
|
|
@@ -61,6 +62,8 @@ class TableExample(BaseModel):
|
|
|
61
62
|
table_schema: List[FileColumn]
|
|
62
63
|
columns: List[str]
|
|
63
64
|
data: Optional[List[Dict]] = {}
|
|
65
|
+
has_example_data: bool = False
|
|
66
|
+
has_run_with_current_setup: bool = False
|
|
64
67
|
|
|
65
68
|
|
|
66
69
|
class NodeData(BaseModel):
|
flowfile_core/schemas/schemas.py
CHANGED
|
@@ -1,8 +1,35 @@
|
|
|
1
1
|
from typing import Optional, List, Dict, Tuple, Any, Literal, Annotated
|
|
2
2
|
from pydantic import BaseModel, field_validator, ConfigDict, Field, StringConstraints
|
|
3
3
|
from flowfile_core.flowfile.utils import create_unique_id
|
|
4
|
+
from flowfile_core.configs.settings import OFFLOAD_TO_WORKER
|
|
4
5
|
ExecutionModeLiteral = Literal['Development', 'Performance']
|
|
5
|
-
ExecutionLocationsLiteral = Literal['
|
|
6
|
+
ExecutionLocationsLiteral = Literal['local', 'remote']
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_global_execution_location() -> ExecutionLocationsLiteral:
|
|
10
|
+
"""
|
|
11
|
+
Calculates the default execution location based on the global settings
|
|
12
|
+
Returns
|
|
13
|
+
-------
|
|
14
|
+
ExecutionLocationsLiteral where the current
|
|
15
|
+
"""
|
|
16
|
+
if OFFLOAD_TO_WORKER:
|
|
17
|
+
return "remote"
|
|
18
|
+
return "local"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def is_valid_execution_location_in_current_global_settings(execution_location: ExecutionLocationsLiteral) -> bool:
|
|
22
|
+
return not (get_global_execution_location() == "local" and execution_location == "remote")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_prio_execution_location(local_execution_location: ExecutionLocationsLiteral,
|
|
26
|
+
global_execution_location: ExecutionLocationsLiteral) -> ExecutionLocationsLiteral:
|
|
27
|
+
if local_execution_location == global_execution_location:
|
|
28
|
+
return local_execution_location
|
|
29
|
+
elif global_execution_location == "local" and local_execution_location == "remote":
|
|
30
|
+
return "local"
|
|
31
|
+
else:
|
|
32
|
+
return local_execution_location
|
|
6
33
|
|
|
7
34
|
|
|
8
35
|
class FlowGraphConfig(BaseModel):
|
|
@@ -16,7 +43,7 @@ class FlowGraphConfig(BaseModel):
|
|
|
16
43
|
name (str): The name of the flow.
|
|
17
44
|
path (str): The file path associated with the flow.
|
|
18
45
|
execution_mode (ExecutionModeLiteral): The mode of execution ('Development' or 'Performance').
|
|
19
|
-
execution_location (ExecutionLocationsLiteral): The location for execution ('
|
|
46
|
+
execution_location (ExecutionLocationsLiteral): The location for execution ('local', 'remote').
|
|
20
47
|
"""
|
|
21
48
|
flow_id: int = Field(default_factory=create_unique_id, description="Unique identifier for the flow.")
|
|
22
49
|
description: Optional[str] = None
|
|
@@ -24,7 +51,23 @@ class FlowGraphConfig(BaseModel):
|
|
|
24
51
|
name: str = ''
|
|
25
52
|
path: str = ''
|
|
26
53
|
execution_mode: ExecutionModeLiteral = 'Performance'
|
|
27
|
-
execution_location: ExecutionLocationsLiteral =
|
|
54
|
+
execution_location: ExecutionLocationsLiteral = Field(default_factory=get_global_execution_location)
|
|
55
|
+
|
|
56
|
+
@field_validator('execution_location', mode='before')
|
|
57
|
+
def validate_and_set_execution_location(cls, v: Optional[ExecutionLocationsLiteral]) -> ExecutionLocationsLiteral:
|
|
58
|
+
"""
|
|
59
|
+
Validates and sets the execution location.
|
|
60
|
+
1. **If `None` is provided**: It defaults to the location determined by global settings.
|
|
61
|
+
2. **If a value is provided**: It checks if the value is compatible with the global
|
|
62
|
+
settings. If not (e.g., requesting 'remote' when only 'local' is possible),
|
|
63
|
+
it corrects the value to a compatible one.
|
|
64
|
+
"""
|
|
65
|
+
if v is None:
|
|
66
|
+
return get_global_execution_location()
|
|
67
|
+
if v == "auto":
|
|
68
|
+
return get_global_execution_location()
|
|
69
|
+
|
|
70
|
+
return get_prio_execution_location(v, get_global_execution_location())
|
|
28
71
|
|
|
29
72
|
|
|
30
73
|
class FlowSettings(FlowGraphConfig):
|
|
@@ -95,6 +138,8 @@ class NodeTemplate(BaseModel):
|
|
|
95
138
|
node_group: str
|
|
96
139
|
prod_ready: bool = True
|
|
97
140
|
can_be_start: bool = False
|
|
141
|
+
drawer_title: str = "Node title"
|
|
142
|
+
drawer_intro: str = "Drawer into"
|
|
98
143
|
|
|
99
144
|
|
|
100
145
|
class NodeInformation(BaseModel):
|
|
@@ -6,6 +6,9 @@ from copy import deepcopy
|
|
|
6
6
|
|
|
7
7
|
from typing import NamedTuple
|
|
8
8
|
|
|
9
|
+
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
10
|
+
|
|
11
|
+
FuzzyMap = FuzzyMapping # For backwards compatibility
|
|
9
12
|
|
|
10
13
|
def get_func_type_mapping(func: str):
|
|
11
14
|
"""Infers the output data type of common aggregation functions."""
|
|
@@ -158,6 +161,19 @@ class SelectInputs:
|
|
|
158
161
|
"""Gets a list of original column names to select from the source DataFrame."""
|
|
159
162
|
return [v.old_name for v in self.renames if v.keep or (v.join_key and include_join_key)]
|
|
160
163
|
|
|
164
|
+
def has_drop_cols(self) -> bool:
|
|
165
|
+
"""Checks if any column is marked to be dropped from the selection."""
|
|
166
|
+
return any(not v.keep for v in self.renames)
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def drop_columns(self) -> List[SelectInput]:
|
|
170
|
+
"""Returns a list of column names that are marked to be dropped from the selection."""
|
|
171
|
+
return [v for v in self.renames if not v.keep and v.is_available]
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def non_jk_drop_columns(self) -> List[SelectInput]:
|
|
175
|
+
return [v for v in self.renames if not v.keep and v.is_available and not v.join_key]
|
|
176
|
+
|
|
161
177
|
def __add__(self, other: "SelectInput"):
|
|
162
178
|
"""Allows adding a SelectInput using the '+' operator."""
|
|
163
179
|
self.renames.append(other)
|
|
@@ -225,32 +241,6 @@ class JoinMap:
|
|
|
225
241
|
right_col: str
|
|
226
242
|
|
|
227
243
|
|
|
228
|
-
@dataclass
|
|
229
|
-
class FuzzyMap(JoinMap):
|
|
230
|
-
"""Extends `JoinMap` with settings for fuzzy string matching, such as the algorithm and similarity threshold."""
|
|
231
|
-
threshold_score: Optional[float] = 80.0
|
|
232
|
-
fuzzy_type: Optional[FuzzyTypeLiteral] = 'levenshtein'
|
|
233
|
-
perc_unique: Optional[float] = 0.0
|
|
234
|
-
output_column_name: Optional[str] = None
|
|
235
|
-
valid: Optional[bool] = True
|
|
236
|
-
|
|
237
|
-
def __init__(self, left_col: str, right_col: str = None, threshold_score: float = 80.0,
|
|
238
|
-
fuzzy_type: FuzzyTypeLiteral = 'levenshtein', perc_unique: float = 0, output_column_name: str = None,
|
|
239
|
-
_output_col_name: str = None, valid: bool = True, output_col_name: str = None):
|
|
240
|
-
if right_col is None:
|
|
241
|
-
right_col = left_col
|
|
242
|
-
self.valid = valid
|
|
243
|
-
self.left_col = left_col
|
|
244
|
-
self.right_col = right_col
|
|
245
|
-
self.threshold_score = threshold_score
|
|
246
|
-
self.fuzzy_type = fuzzy_type
|
|
247
|
-
self.perc_unique = perc_unique
|
|
248
|
-
self.output_column_name = output_column_name if output_column_name is not None else _output_col_name
|
|
249
|
-
self.output_column_name = self.output_column_name if self.output_column_name is not None else output_col_name
|
|
250
|
-
if self.output_column_name is None:
|
|
251
|
-
self.output_column_name = f'fuzzy_score_{self.left_col}_{self.right_col}'
|
|
252
|
-
|
|
253
|
-
|
|
254
244
|
class JoinSelectMixin:
|
|
255
245
|
"""A mixin providing common methods for join-like operations that involve left and right inputs."""
|
|
256
246
|
left_select: JoinInputs = None
|
|
@@ -430,32 +420,32 @@ class JoinInput(JoinSelectMixin):
|
|
|
430
420
|
@dataclass
|
|
431
421
|
class FuzzyMatchInput(JoinInput):
|
|
432
422
|
"""Extends `JoinInput` with settings specific to fuzzy matching, such as the matching algorithm and threshold."""
|
|
433
|
-
join_mapping: List[
|
|
423
|
+
join_mapping: List[FuzzyMapping]
|
|
434
424
|
aggregate_output: bool = False
|
|
435
425
|
|
|
436
426
|
@staticmethod
|
|
437
|
-
def parse_fuzz_mapping(fuzz_mapping: List[
|
|
427
|
+
def parse_fuzz_mapping(fuzz_mapping: List[FuzzyMapping] | Tuple[str, str] | str) -> List[FuzzyMapping]:
|
|
438
428
|
if isinstance(fuzz_mapping, (tuple, list)):
|
|
439
429
|
assert len(fuzz_mapping) > 0
|
|
440
430
|
if all(isinstance(fm, dict) for fm in fuzz_mapping):
|
|
441
|
-
fuzz_mapping = [
|
|
431
|
+
fuzz_mapping = [FuzzyMapping(**fm) for fm in fuzz_mapping]
|
|
442
432
|
|
|
443
|
-
if not isinstance(fuzz_mapping[0],
|
|
433
|
+
if not isinstance(fuzz_mapping[0], FuzzyMapping):
|
|
444
434
|
assert len(fuzz_mapping) <= 2
|
|
445
435
|
if len(fuzz_mapping) == 2:
|
|
446
436
|
assert isinstance(fuzz_mapping[0], str) and isinstance(fuzz_mapping[1], str)
|
|
447
|
-
fuzz_mapping = [
|
|
437
|
+
fuzz_mapping = [FuzzyMapping(*fuzz_mapping)]
|
|
448
438
|
elif isinstance(fuzz_mapping[0], str):
|
|
449
|
-
fuzz_mapping = [
|
|
439
|
+
fuzz_mapping = [FuzzyMapping(fuzz_mapping[0], fuzz_mapping[0])]
|
|
450
440
|
elif isinstance(fuzz_mapping, str):
|
|
451
|
-
fuzz_mapping = [
|
|
452
|
-
elif isinstance(fuzz_mapping,
|
|
441
|
+
fuzz_mapping = [FuzzyMapping(fuzz_mapping, fuzz_mapping)]
|
|
442
|
+
elif isinstance(fuzz_mapping, FuzzyMapping):
|
|
453
443
|
fuzz_mapping = [fuzz_mapping]
|
|
454
444
|
else:
|
|
455
445
|
raise Exception('No valid join mapping as input')
|
|
456
446
|
return fuzz_mapping
|
|
457
447
|
|
|
458
|
-
def __init__(self, join_mapping: List[
|
|
448
|
+
def __init__(self, join_mapping: List[FuzzyMapping] | Tuple[str, str] | str, left_select: List[SelectInput] | List[str],
|
|
459
449
|
right_select: List[SelectInput] | List[str], aggregate_output: bool = False, how: JoinStrategy = 'inner'):
|
|
460
450
|
self.join_mapping = self.parse_fuzz_mapping(join_mapping)
|
|
461
451
|
self.left_select = self.parse_select(left_select)
|
|
@@ -463,9 +453,9 @@ class FuzzyMatchInput(JoinInput):
|
|
|
463
453
|
self.how = how
|
|
464
454
|
for jm in self.join_mapping:
|
|
465
455
|
|
|
466
|
-
if jm.right_col not in self.right_select.
|
|
456
|
+
if jm.right_col not in {v.old_name for v in self.right_select.renames}:
|
|
467
457
|
self.right_select.append(SelectInput(jm.right_col, keep=False, join_key=True))
|
|
468
|
-
if jm.left_col not in self.left_select.
|
|
458
|
+
if jm.left_col not in {v.old_name for v in self.left_select.renames}:
|
|
469
459
|
self.left_select.append(SelectInput(jm.left_col, keep=False, join_key=True))
|
|
470
460
|
[setattr(v, "join_key", v.old_name in self._left_join_keys) for v in self.left_select.renames]
|
|
471
461
|
[setattr(v, "join_key", v.old_name in self._right_join_keys) for v in self.right_select.renames]
|
|
@@ -476,7 +466,7 @@ class FuzzyMatchInput(JoinInput):
|
|
|
476
466
|
return self.left_select.new_cols & self.right_select.new_cols
|
|
477
467
|
|
|
478
468
|
@property
|
|
479
|
-
def fuzzy_maps(self) -> List[
|
|
469
|
+
def fuzzy_maps(self) -> List[FuzzyMapping]:
|
|
480
470
|
"""Returns the final fuzzy mappings after applying all column renames."""
|
|
481
471
|
new_mappings = []
|
|
482
472
|
left_rename_table, right_rename_table = self.left_select.rename_table, self.right_select.rename_table
|
flowfile_frame/__init__.py
CHANGED
|
@@ -1,12 +1,9 @@
|
|
|
1
1
|
# flowframe/__init__.py
|
|
2
2
|
"""A Polars-like API for building ETL graphs."""
|
|
3
3
|
|
|
4
|
-
from flowfile_core.configs.settings import OFFLOAD_TO_WORKER
|
|
5
|
-
|
|
6
|
-
OFFLOAD_TO_WORKER.value = False
|
|
7
|
-
|
|
8
4
|
# Core classes
|
|
9
5
|
from flowfile_frame.flow_frame import FlowFrame # noqa: F401
|
|
6
|
+
from pl_fuzzy_frame_match.models import FuzzyMapping # noqa: F401
|
|
10
7
|
|
|
11
8
|
from flowfile_frame.utils import create_flow_graph # noqa: F401
|
|
12
9
|
|
flowfile_frame/flow_frame.py
CHANGED
|
@@ -5,11 +5,13 @@ from typing import Any, Iterable, List, Literal, Optional, Tuple, Union, Dict, C
|
|
|
5
5
|
import re
|
|
6
6
|
|
|
7
7
|
import polars as pl
|
|
8
|
-
from polars._typing import (CsvEncoding)
|
|
9
8
|
from flowfile_frame.lazy_methods import add_lazyframe_methods
|
|
10
9
|
|
|
11
|
-
from polars._typing import (FrameInitTypes, SchemaDefinition, SchemaDict, Orientation)
|
|
10
|
+
from polars._typing import (CsvEncoding, FrameInitTypes, SchemaDefinition, SchemaDict, Orientation)
|
|
12
11
|
from collections.abc import Iterator
|
|
12
|
+
|
|
13
|
+
from pl_fuzzy_frame_match import FuzzyMapping, fuzzy_match_dfs
|
|
14
|
+
|
|
13
15
|
from flowfile_core.flowfile.flow_graph import FlowGraph, add_connection
|
|
14
16
|
from flowfile_core.flowfile.flow_graph_utils import combine_flow_graphs_with_mapping
|
|
15
17
|
from flowfile_core.flowfile.flow_data_engine.flow_data_engine import FlowDataEngine
|
|
@@ -20,8 +22,7 @@ from flowfile_frame.expr import Expr, Column, lit, col
|
|
|
20
22
|
from flowfile_frame.selectors import Selector
|
|
21
23
|
from flowfile_frame.group_frame import GroupByFrame
|
|
22
24
|
from flowfile_frame.utils import (_parse_inputs_as_iterable, create_flow_graph, stringify_values,
|
|
23
|
-
ensure_inputs_as_iterable, generate_node_id,
|
|
24
|
-
set_node_id, data as node_id_data)
|
|
25
|
+
ensure_inputs_as_iterable, generate_node_id, data as node_id_data)
|
|
25
26
|
from flowfile_frame.join import _normalize_columns_to_list, _create_join_mappings
|
|
26
27
|
from flowfile_frame.utils import _check_if_convertible_to_code
|
|
27
28
|
from flowfile_frame.config import logger
|
|
@@ -2109,6 +2110,34 @@ class FlowFrame:
|
|
|
2109
2110
|
|
|
2110
2111
|
return self._create_child_frame(new_node_id)
|
|
2111
2112
|
|
|
2113
|
+
def fuzzy_match(self,
|
|
2114
|
+
other: "FlowFrame",
|
|
2115
|
+
fuzzy_mappings: List[FuzzyMapping],
|
|
2116
|
+
description: str = None,
|
|
2117
|
+
) -> "FlowFrame":
|
|
2118
|
+
self._ensure_same_graph(other)
|
|
2119
|
+
|
|
2120
|
+
# Step 3: Generate new node ID
|
|
2121
|
+
new_node_id = generate_node_id()
|
|
2122
|
+
node_fuzzy_match = input_schema.NodeFuzzyMatch(flow_id=self.flow_graph.flow_id,
|
|
2123
|
+
node_id=new_node_id,
|
|
2124
|
+
join_input=
|
|
2125
|
+
transform_schema.FuzzyMatchInput(join_mapping=fuzzy_mappings,
|
|
2126
|
+
left_select=self.columns,
|
|
2127
|
+
right_select=other.columns),
|
|
2128
|
+
description=description or "Fuzzy match between two FlowFrames",
|
|
2129
|
+
depending_on_ids=[self.node_id, other.node_id],
|
|
2130
|
+
)
|
|
2131
|
+
self.flow_graph.add_fuzzy_match(node_fuzzy_match)
|
|
2132
|
+
self._add_connection(self.node_id, new_node_id, "main")
|
|
2133
|
+
other._add_connection(other.node_id, new_node_id, "right")
|
|
2134
|
+
return FlowFrame(
|
|
2135
|
+
data=self.flow_graph.get_node(new_node_id).get_resulting_data().data_frame,
|
|
2136
|
+
flow_graph=self.flow_graph,
|
|
2137
|
+
node_id=new_node_id,
|
|
2138
|
+
parent_node_id=self.node_id,
|
|
2139
|
+
)
|
|
2140
|
+
|
|
2112
2141
|
def text_to_rows(
|
|
2113
2142
|
self,
|
|
2114
2143
|
column: str | Column,
|
flowfile_frame/flow_frame.pyi
CHANGED
|
@@ -208,6 +208,8 @@ class FlowFrame:
|
|
|
208
208
|
# Get the first row of the DataFrame.
|
|
209
209
|
def first(self, description: Optional[str] = None) -> 'FlowFrame': ...
|
|
210
210
|
|
|
211
|
+
def fuzzy_match(self, other: FlowFrame, fuzzy_mappings: typing.List[flowfile_core.schemas.transform_schema.FuzzyMap], description: str = None) -> 'FlowFrame': ...
|
|
212
|
+
|
|
211
213
|
# Take every nth row in the LazyFrame and return as a new LazyFrame.
|
|
212
214
|
def gather_every(self, n: int, offset: int = 0, description: Optional[str] = None) -> 'FlowFrame': ...
|
|
213
215
|
|
flowfile_worker/__init__.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
|
-
import tempfile
|
|
3
2
|
import threading
|
|
4
3
|
import multiprocessing
|
|
5
|
-
import
|
|
6
|
-
import shutil
|
|
7
|
-
multiprocessing.set_start_method('spawn', force=True)
|
|
4
|
+
from shared.storage_config import storage
|
|
8
5
|
|
|
6
|
+
multiprocessing.set_start_method('spawn', force=True)
|
|
9
7
|
|
|
10
8
|
from multiprocessing import get_context
|
|
11
9
|
from flowfile_worker.models import Status
|
|
10
|
+
|
|
12
11
|
mp_context = get_context("spawn")
|
|
12
|
+
|
|
13
13
|
status_dict: Dict[str, Status] = dict()
|
|
14
14
|
process_dict = dict()
|
|
15
15
|
|
|
@@ -17,39 +17,10 @@ status_dict_lock = threading.Lock()
|
|
|
17
17
|
process_dict_lock = threading.Lock()
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
class SharedTempDirectory:
|
|
21
|
-
"""A class that mimics tempfile.TemporaryDirectory but uses a fixed directory"""
|
|
22
|
-
def __init__(self, dir_path):
|
|
23
|
-
self._path = dir_path
|
|
24
|
-
os.makedirs(self._path, exist_ok=True)
|
|
25
|
-
|
|
26
|
-
@property
|
|
27
|
-
def name(self):
|
|
28
|
-
return self._path
|
|
29
|
-
|
|
30
|
-
def cleanup(self):
|
|
31
|
-
"""Remove all contents of the temp directory"""
|
|
32
|
-
try:
|
|
33
|
-
shutil.rmtree(self._path)
|
|
34
|
-
os.makedirs(self._path, exist_ok=True)
|
|
35
|
-
print(f"Cleaned up temporary directory: {self._path}")
|
|
36
|
-
except Exception as e:
|
|
37
|
-
print(f"Error during cleanup: {e}")
|
|
38
|
-
|
|
39
|
-
def __enter__(self):
|
|
40
|
-
return self.name
|
|
41
|
-
|
|
42
|
-
def __exit__(self, exc, value, tb):
|
|
43
|
-
self.cleanup()
|
|
44
|
-
|
|
45
|
-
|
|
46
20
|
CACHE_EXPIRATION_TIME = 24 * 60 * 60
|
|
47
21
|
|
|
48
22
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
CACHE_DIR = SharedTempDirectory(TEMP_DIR)
|
|
52
|
-
else:
|
|
53
|
-
CACHE_DIR = tempfile.TemporaryDirectory()
|
|
23
|
+
CACHE_DIR = storage.cache_directory
|
|
24
|
+
|
|
54
25
|
|
|
55
26
|
PROCESS_MEMORY_USAGE: Dict[str, float] = dict()
|
flowfile_worker/funcs.py
CHANGED
|
@@ -2,8 +2,9 @@ import polars as pl
|
|
|
2
2
|
import io
|
|
3
3
|
from typing import List, Dict, Callable
|
|
4
4
|
from multiprocessing import Array, Value, Queue
|
|
5
|
-
|
|
6
|
-
from
|
|
5
|
+
|
|
6
|
+
from pl_fuzzy_frame_match import fuzzy_match_dfs, FuzzyMapping
|
|
7
|
+
|
|
7
8
|
from flowfile_worker.flow_logger import get_worker_logger
|
|
8
9
|
from flowfile_worker.external_sources.sql_source.models import DatabaseWriteSettings
|
|
9
10
|
from flowfile_worker.external_sources.sql_source.main import write_df_to_database
|
|
@@ -33,7 +34,10 @@ def fuzzy_join_task(left_serializable_object: bytes, right_serializable_object:
|
|
|
33
34
|
flowfile_logger.info("Starting fuzzy join operation")
|
|
34
35
|
left_df = pl.LazyFrame.deserialize(io.BytesIO(left_serializable_object))
|
|
35
36
|
right_df = pl.LazyFrame.deserialize(io.BytesIO(right_serializable_object))
|
|
36
|
-
fuzzy_match_result = fuzzy_match_dfs(left_df,
|
|
37
|
+
fuzzy_match_result = fuzzy_match_dfs(left_df=left_df,
|
|
38
|
+
right_df=right_df,
|
|
39
|
+
fuzzy_maps=fuzzy_maps,
|
|
40
|
+
logger=flowfile_logger)
|
|
37
41
|
flowfile_logger.info("Fuzzy join operation completed successfully")
|
|
38
42
|
fuzzy_match_result.write_ipc(file_path)
|
|
39
43
|
with progress.get_lock():
|
flowfile_worker/main.py
CHANGED
|
@@ -4,8 +4,11 @@ import signal
|
|
|
4
4
|
|
|
5
5
|
from contextlib import asynccontextmanager
|
|
6
6
|
from fastapi import FastAPI
|
|
7
|
+
|
|
8
|
+
from shared.storage_config import storage
|
|
9
|
+
|
|
7
10
|
from flowfile_worker.routes import router
|
|
8
|
-
from flowfile_worker import mp_context
|
|
11
|
+
from flowfile_worker import mp_context
|
|
9
12
|
from flowfile_worker.configs import logger, FLOWFILE_CORE_URI, SERVICE_HOST, SERVICE_PORT
|
|
10
13
|
|
|
11
14
|
|
|
@@ -30,7 +33,7 @@ async def shutdown_handler(app: FastAPI):
|
|
|
30
33
|
logger.error(f"Error cleaning up process: {e}")
|
|
31
34
|
|
|
32
35
|
try:
|
|
33
|
-
|
|
36
|
+
storage.cleanup_directories()
|
|
34
37
|
except Exception as e:
|
|
35
38
|
print(f"Error cleaning up cache directory: {e}")
|
|
36
39
|
|
flowfile_worker/models.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from pydantic import BaseModel
|
|
2
2
|
from typing import Optional, Literal, Any
|
|
3
3
|
from base64 import decodebytes
|
|
4
|
-
|
|
4
|
+
|
|
5
|
+
from pl_fuzzy_frame_match import FuzzyMapping
|
|
6
|
+
|
|
5
7
|
from flowfile_worker.external_sources.sql_source.models import DatabaseWriteSettings
|
|
6
8
|
from flowfile_worker.external_sources.s3_source.models import CloudStorageWriteSettings
|
|
7
9
|
|