Flowfile 0.5.3__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +16 -0
- flowfile/__main__.py +94 -1
- flowfile/web/static/assets/{AdminView-49392a9a.js → AdminView-c2c7942b.js} +1 -1
- flowfile/web/static/assets/{CloudConnectionView-f13f202b.js → CloudConnectionView-7a3042c6.js} +4 -4
- flowfile/web/static/assets/{CloudConnectionView-36bcd6df.css → CloudConnectionView-cf85f943.css} +17 -17
- flowfile/web/static/assets/{CloudStorageReader-0023d4a5.js → CloudStorageReader-709c4037.js} +8 -8
- flowfile/web/static/assets/{CloudStorageWriter-8e781e11.js → CloudStorageWriter-604c51a8.js} +8 -8
- flowfile/web/static/assets/ColumnActionInput-c44b7aee.css +159 -0
- flowfile/web/static/assets/ColumnActionInput-d63d6746.js +330 -0
- flowfile/web/static/assets/{ColumnSelector-8ad68ea9.js → ColumnSelector-0c8cd1cd.js} +1 -1
- flowfile/web/static/assets/ContextMenu-366bf1b4.js +9 -0
- flowfile/web/static/assets/ContextMenu-85cf5b44.js +9 -0
- flowfile/web/static/assets/ContextMenu-9d28ae6d.js +9 -0
- flowfile/web/static/assets/ContextMenu.vue_vue_type_script_setup_true_lang-774c517c.js +59 -0
- flowfile/web/static/assets/{CrossJoin-03df6938.js → CrossJoin-38e5b99a.js} +9 -9
- flowfile/web/static/assets/{CustomNode-8479239b.js → CustomNode-76e8f3f5.js} +27 -20
- flowfile/web/static/assets/CustomNode-edb9b939.css +42 -0
- flowfile/web/static/assets/{DatabaseConnectionSettings-869e3efd.js → DatabaseConnectionSettings-38155669.js} +4 -4
- flowfile/web/static/assets/{DatabaseConnectionSettings-e91df89a.css → DatabaseConnectionSettings-c20a1e16.css} +22 -20
- flowfile/web/static/assets/{DatabaseReader-c58b9552.js → DatabaseReader-2e549c8f.js} +13 -13
- flowfile/web/static/assets/{DatabaseReader-36898a00.css → DatabaseReader-5bf8c75b.css} +39 -44
- flowfile/web/static/assets/{DatabaseView-d26a9140.js → DatabaseView-dc877c29.js} +2 -2
- flowfile/web/static/assets/{DatabaseWriter-217a99f1.css → DatabaseWriter-bdcf2c8b.css} +27 -25
- flowfile/web/static/assets/{DatabaseWriter-4d05ddc7.js → DatabaseWriter-ffb91864.js} +12 -12
- flowfile/web/static/assets/{DesignerView-a6d0ee84.css → DesignerView-71d4e9a1.css} +429 -376
- flowfile/web/static/assets/{DesignerView-e6f5c0e8.js → DesignerView-a4466dab.js} +338 -183
- flowfile/web/static/assets/{DocumentationView-2e78ef1b.js → DocumentationView-979afc84.js} +3 -3
- flowfile/web/static/assets/{DocumentationView-fd46c656.css → DocumentationView-9ea6e871.css} +9 -9
- flowfile/web/static/assets/{ExploreData-7b54caca.js → ExploreData-e4b92aaf.js} +7 -7
- flowfile/web/static/assets/{ExternalSource-47ab05a3.css → ExternalSource-7ac7373f.css} +17 -17
- flowfile/web/static/assets/{ExternalSource-3fa399b2.js → ExternalSource-d08e7227.js} +9 -9
- flowfile/web/static/assets/{Filter-8cbbdbf3.js → Filter-7add806d.js} +9 -9
- flowfile/web/static/assets/{Formula-aac42b1e.js → Formula-36ab24d2.js} +9 -9
- flowfile/web/static/assets/{FuzzyMatch-cd9bbfca.js → FuzzyMatch-cc01bb04.js} +10 -10
- flowfile/web/static/assets/{GraphSolver-c24dec17.css → GraphSolver-4b4d7db9.css} +4 -4
- flowfile/web/static/assets/{GraphSolver-c7e6780e.js → GraphSolver-4fb98f3b.js} +11 -11
- flowfile/web/static/assets/GroupBy-5792782d.css +9 -0
- flowfile/web/static/assets/{GroupBy-93c5d22b.js → GroupBy-b3c8f429.js} +9 -9
- flowfile/web/static/assets/{Join-a19b2de2.js → Join-096b7b26.js} +10 -10
- flowfile/web/static/assets/{LoginView-0df4ed0a.js → LoginView-c33a246a.js} +1 -1
- flowfile/web/static/assets/{ManualInput-3702e677.css → ManualInput-39111f19.css} +48 -48
- flowfile/web/static/assets/{ManualInput-8d3374b2.js → ManualInput-7307e9b1.js} +55 -13
- flowfile/web/static/assets/{MultiSelect-ad1b6243.js → MultiSelect-14822c48.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-e278950d.js → MultiSelect.vue_vue_type_script_setup_true_lang-90c4d340.js} +1 -1
- flowfile/web/static/assets/{NodeDesigner-40b647c9.js → NodeDesigner-5036c392.js} +171 -69
- flowfile/web/static/assets/{NodeDesigner-5f53be3f.css → NodeDesigner-94cd4dd3.css} +190 -190
- flowfile/web/static/assets/{NumericInput-7100234c.js → NumericInput-15cf3b72.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-5130219f.js → NumericInput.vue_vue_type_script_setup_true_lang-91e679d7.js} +1 -1
- flowfile/web/static/assets/{Output-f5efd2aa.js → Output-1f8ed42c.js} +13 -12
- flowfile/web/static/assets/{Output-35e97000.css → Output-692dd25d.css} +10 -10
- flowfile/web/static/assets/{Pivot-d981d23c.js → Pivot-0e153f4e.js} +10 -10
- flowfile/web/static/assets/{PivotValidation-63de1f73.js → PivotValidation-5a4f7c79.js} +1 -1
- flowfile/web/static/assets/{PivotValidation-39386e95.js → PivotValidation-81ec2a33.js} +1 -1
- flowfile/web/static/assets/{PolarsCode-f9d69217.js → PolarsCode-a39f15ac.js} +7 -7
- flowfile/web/static/assets/PopOver-ddcfe4f6.js +138 -0
- flowfile/web/static/assets/{Read-aec2e377.js → Read-39b63932.js} +15 -14
- flowfile/web/static/assets/{Read-36e7bd51.css → Read-90f366bc.css} +13 -13
- flowfile/web/static/assets/{RecordCount-78ed6845.js → RecordCount-e9048ccd.js} +6 -6
- flowfile/web/static/assets/{RecordId-2156e890.js → RecordId-ad02521d.js} +9 -9
- flowfile/web/static/assets/{SQLQueryComponent-48c72f5b.js → SQLQueryComponent-2eeecf0b.js} +3 -3
- flowfile/web/static/assets/SQLQueryComponent-edb90b98.css +29 -0
- flowfile/web/static/assets/{Sample-1352ca74.js → Sample-9a68c23d.js} +6 -6
- flowfile/web/static/assets/{SecretSelector-22b5ff89.js → SecretSelector-2429f35a.js} +2 -2
- flowfile/web/static/assets/{SecretsView-17df66ee.js → SecretsView-c6afc915.js} +2 -2
- flowfile/web/static/assets/{Select-0aee4c54.js → Select-fcd002b6.js} +9 -9
- flowfile/web/static/assets/{SettingsSection-cd341bb6.js → SettingsSection-5ce15962.js} +1 -1
- flowfile/web/static/assets/{SettingsSection-0784e157.js → SettingsSection-c6b1362c.js} +1 -1
- flowfile/web/static/assets/{SettingsSection-f2002a6d.js → SettingsSection-cebb91d5.js} +1 -1
- flowfile/web/static/assets/SetupView-2d12e01f.js +160 -0
- flowfile/web/static/assets/SetupView-ec26f76a.css +230 -0
- flowfile/web/static/assets/{SingleSelect-460cc0ea.js → SingleSelect-b67de4eb.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-30741bb2.js → SingleSelect.vue_vue_type_script_setup_true_lang-eedb70eb.js} +1 -1
- flowfile/web/static/assets/{SliderInput-5d926864.js → SliderInput-fd8134ac.js} +1 -1
- flowfile/web/static/assets/Sort-4abb7fae.css +9 -0
- flowfile/web/static/assets/{Sort-3cdc971b.js → Sort-c005a573.js} +9 -9
- flowfile/web/static/assets/{TextInput-a2d0bfbd.js → TextInput-1bb31dab.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-abad1ca2.js → TextInput.vue_vue_type_script_setup_true_lang-a51fe730.js} +1 -1
- flowfile/web/static/assets/{TextToRows-918945f7.js → TextToRows-4f363753.js} +9 -9
- flowfile/web/static/assets/{ToggleSwitch-f0ef5196.js → ToggleSwitch-ca0f2e5e.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-5605c793.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-49aa41d8.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-54d2f518.css → UnavailableFields-394a1f78.css} +13 -13
- flowfile/web/static/assets/{UnavailableFields-bdad6144.js → UnavailableFields-f6147968.js} +4 -4
- flowfile/web/static/assets/{Union-e8ab8c86.js → Union-c65f17b7.js} +6 -6
- flowfile/web/static/assets/Unique-2b705521.css +3 -0
- flowfile/web/static/assets/{Unique-8cd4f976.js → Unique-a1d96fb2.js} +12 -12
- flowfile/web/static/assets/{Unpivot-710a2948.css → Unpivot-b6ad6427.css} +6 -6
- flowfile/web/static/assets/{Unpivot-8da14095.js → Unpivot-c2657ff3.js} +11 -11
- flowfile/web/static/assets/{UnpivotValidation-6f7d89ff.js → UnpivotValidation-28e29a3b.js} +1 -1
- flowfile/web/static/assets/{VueGraphicWalker-3fb312e1.js → VueGraphicWalker-2fc3ddd4.js} +1 -1
- flowfile/web/static/assets/{api-24483f0d.js → api-df48ec50.js} +1 -1
- flowfile/web/static/assets/{api-8b81fa73.js → api-ee542cf7.js} +1 -1
- flowfile/web/static/assets/{dropDown-3d8dc5fa.css → dropDown-1d6acbd9.css} +26 -26
- flowfile/web/static/assets/{dropDown-ac0fda9d.js → dropDown-7576a76a.js} +3 -3
- flowfile/web/static/assets/{fullEditor-5497a84a.js → fullEditor-7583bef5.js} +3 -3
- flowfile/web/static/assets/{fullEditor-a0be62b3.css → fullEditor-fe9f7e18.css} +3 -3
- flowfile/web/static/assets/{genericNodeSettings-99014e1d.js → genericNodeSettings-0155288b.js} +2 -3
- flowfile/web/static/assets/{index-3ba44389.js → index-057d770d.js} +2 -2
- flowfile/web/static/assets/{index-07dda503.js → index-aeec439d.js} +1 -1
- flowfile/web/static/assets/{index-fb6493ae.js → index-ca6799de.js} +2293 -196
- flowfile/web/static/assets/{index-e6289dd0.css → index-d60c9dd4.css} +560 -10
- flowfile/web/static/assets/nodeInput-d478b9ac.js +2 -0
- flowfile/web/static/assets/{outputCsv-8f8ba42d.js → outputCsv-c492b15e.js} +3 -3
- flowfile/web/static/assets/outputCsv-cc84e09f.css +2499 -0
- flowfile/web/static/assets/{outputExcel-393f4fef.js → outputExcel-13bfa10f.js} +1 -1
- flowfile/web/static/assets/{outputParquet-07c81f65.js → outputParquet-9be1523a.js} +1 -1
- flowfile/web/static/assets/{readCsv-07f6d9ad.js → readCsv-5a49a8c9.js} +1 -1
- flowfile/web/static/assets/{readExcel-ed69bc8f.js → readExcel-27c30ad8.js} +3 -3
- flowfile/web/static/assets/{readParquet-e3ed4528.js → readParquet-446bde68.js} +1 -1
- flowfile/web/static/assets/{secrets.api-002e7d7e.js → secrets.api-34431884.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-80b92899.js → selectDynamic-5754a2b1.js} +2 -3
- flowfile/web/static/assets/{vue-codemirror.esm-0965f39f.js → vue-codemirror.esm-8f46fb36.js} +1 -1
- flowfile/web/static/assets/{vue-content-loader.es-c506ad97.js → vue-content-loader.es-808fe33a.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.5.3.dist-info → flowfile-0.5.6.dist-info}/METADATA +2 -2
- {flowfile-0.5.3.dist-info → flowfile-0.5.6.dist-info}/RECORD +139 -134
- flowfile_core/auth/secrets.py +56 -13
- flowfile_core/fileExplorer/funcs.py +26 -4
- flowfile_core/flowfile/code_generator/__init__.py +11 -0
- flowfile_core/flowfile/code_generator/code_generator.py +347 -2
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +13 -1
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +12 -0
- flowfile_core/flowfile/flow_graph.py +2 -0
- flowfile_core/flowfile/flow_node/flow_node.py +52 -28
- flowfile_core/flowfile/node_designer/__init__.py +4 -0
- flowfile_core/flowfile/node_designer/ui_components.py +144 -1
- flowfile_core/main.py +2 -4
- flowfile_core/routes/public.py +43 -1
- flowfile_core/schemas/cloud_storage_schemas.py +39 -15
- flowfile_core/secret_manager/secret_manager.py +107 -6
- flowfile_frame/__init__.py +11 -0
- flowfile_frame/database/__init__.py +36 -0
- flowfile_frame/database/connection_manager.py +205 -0
- flowfile_frame/database/frame_helpers.py +249 -0
- flowfile_worker/configs.py +31 -15
- flowfile_worker/secrets.py +105 -15
- flowfile_worker/spawner.py +10 -6
- flowfile/web/static/assets/ContextMenu-26d4dd27.css +0 -26
- flowfile/web/static/assets/ContextMenu-31ee57f0.js +0 -41
- flowfile/web/static/assets/ContextMenu-69a74055.js +0 -41
- flowfile/web/static/assets/ContextMenu-8e2051c6.js +0 -41
- flowfile/web/static/assets/ContextMenu-8ec1729e.css +0 -26
- flowfile/web/static/assets/ContextMenu-9b310c60.css +0 -26
- flowfile/web/static/assets/CustomNode-59e99a86.css +0 -32
- flowfile/web/static/assets/GroupBy-be7ac0bf.css +0 -51
- flowfile/web/static/assets/PopOver-b22f049e.js +0 -939
- flowfile/web/static/assets/SQLQueryComponent-1c2f26b4.css +0 -27
- flowfile/web/static/assets/Sort-8a871341.css +0 -51
- flowfile/web/static/assets/Unique-9fb2f567.css +0 -51
- flowfile/web/static/assets/nodeInput-0eb13f1a.js +0 -2
- flowfile/web/static/assets/outputCsv-b9a072af.css +0 -2499
- {flowfile-0.5.3.dist-info → flowfile-0.5.6.dist-info}/WHEEL +0 -0
- {flowfile-0.5.3.dist-info → flowfile-0.5.6.dist-info}/entry_points.txt +0 -0
- {flowfile-0.5.3.dist-info → flowfile-0.5.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -406,15 +406,17 @@ def validate_file_path(user_path: str, allowed_base: Path) -> Optional[Path]:
|
|
|
406
406
|
def validate_path_under_cwd(user_path: str) -> str:
|
|
407
407
|
"""Validate that a user-provided path resolves to within allowed directories.
|
|
408
408
|
|
|
409
|
+
In Electron mode (desktop app), users can access any file on their local system.
|
|
410
|
+
In Docker/package mode, paths are restricted to:
|
|
411
|
+
- Current working directory (for development/testing)
|
|
412
|
+
- Flowfile storage directory (~/.flowfile)
|
|
413
|
+
- User data directory (home directory in local mode, /data/user in Docker)
|
|
414
|
+
|
|
409
415
|
Uses the exact pattern from CodeQL documentation for py/path-injection:
|
|
410
416
|
- os.path.normpath for path normalization
|
|
411
417
|
- os.path.join to combine base with user input
|
|
412
418
|
- startswith check to ensure path stays within base
|
|
413
419
|
|
|
414
|
-
Allowed directories:
|
|
415
|
-
- Current working directory (for development/testing)
|
|
416
|
-
- Flowfile storage directory (~/.flowfile)
|
|
417
|
-
|
|
418
420
|
Args:
|
|
419
421
|
user_path: The user-provided path string
|
|
420
422
|
|
|
@@ -424,6 +426,19 @@ def validate_path_under_cwd(user_path: str) -> str:
|
|
|
424
426
|
Raises:
|
|
425
427
|
HTTPException: 403 if path escapes the allowed directories
|
|
426
428
|
"""
|
|
429
|
+
from flowfile_core.configs.settings import is_electron_mode
|
|
430
|
+
|
|
431
|
+
# In Electron mode, allow access to any local file path
|
|
432
|
+
# This is safe because Electron runs locally on the user's machine
|
|
433
|
+
if is_electron_mode():
|
|
434
|
+
# Normalize and resolve the path
|
|
435
|
+
normalized_path = os.path.normpath(os.path.expanduser(user_path))
|
|
436
|
+
# Block path traversal patterns even in Electron mode
|
|
437
|
+
if '..' in user_path:
|
|
438
|
+
raise HTTPException(403, 'Access denied: path traversal not allowed')
|
|
439
|
+
return normalized_path
|
|
440
|
+
|
|
441
|
+
# In Docker/package mode, enforce strict sandboxing
|
|
427
442
|
# Try current working directory first
|
|
428
443
|
base_path = os.path.normpath(os.getcwd())
|
|
429
444
|
fullpath = os.path.normpath(os.path.join(base_path, user_path))
|
|
@@ -436,6 +451,13 @@ def validate_path_under_cwd(user_path: str) -> str:
|
|
|
436
451
|
if fullpath.startswith(base_path):
|
|
437
452
|
return fullpath
|
|
438
453
|
|
|
454
|
+
# Try user data directory (consistent with SecureFileExplorer sandbox)
|
|
455
|
+
# In local mode this is the home directory, in Docker it's /data/user
|
|
456
|
+
base_path = os.path.normpath(str(storage.user_data_directory))
|
|
457
|
+
fullpath = os.path.normpath(os.path.join(base_path, user_path))
|
|
458
|
+
if fullpath.startswith(base_path):
|
|
459
|
+
return fullpath
|
|
460
|
+
|
|
439
461
|
raise HTTPException(403, 'Access denied')
|
|
440
462
|
|
|
441
463
|
|
|
@@ -1,7 +1,10 @@
|
|
|
1
|
+
import inspect
|
|
2
|
+
import typing
|
|
1
3
|
import polars as pl
|
|
2
4
|
from pl_fuzzy_frame_match.models import FuzzyMapping
|
|
3
5
|
|
|
4
6
|
from flowfile_core.configs import logger
|
|
7
|
+
from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
|
|
5
8
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, convert_pl_type_to_string
|
|
6
9
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.utils import cast_str_to_polars_type
|
|
7
10
|
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
@@ -10,6 +13,18 @@ from flowfile_core.flowfile.util.execution_orderer import determine_execution_or
|
|
|
10
13
|
from flowfile_core.schemas import input_schema, transform_schema
|
|
11
14
|
|
|
12
15
|
|
|
16
|
+
class UnsupportedNodeError(Exception):
|
|
17
|
+
"""Raised when code generation encounters a node type that cannot be converted to standalone code."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, node_type: str, node_id: int, reason: str):
|
|
20
|
+
self.node_type = node_type
|
|
21
|
+
self.node_id = node_id
|
|
22
|
+
self.reason = reason
|
|
23
|
+
super().__init__(
|
|
24
|
+
f"Cannot generate code for node '{node_type}' (node_id={node_id}): {reason}"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
13
28
|
class FlowGraphToPolarsConverter:
|
|
14
29
|
"""
|
|
15
30
|
Converts a FlowGraph into executable Polars code.
|
|
@@ -24,6 +39,8 @@ class FlowGraphToPolarsConverter:
|
|
|
24
39
|
code_lines: list[str]
|
|
25
40
|
output_nodes: list[tuple[int, str]] = []
|
|
26
41
|
last_node_var: str | None = None
|
|
42
|
+
unsupported_nodes: list[tuple[int, str, str]] # List of (node_id, node_type, reason)
|
|
43
|
+
custom_node_classes: dict[str, str] # Maps custom node class name to source code
|
|
27
44
|
|
|
28
45
|
def __init__(self, flow_graph: FlowGraph):
|
|
29
46
|
self.flow_graph = flow_graph
|
|
@@ -32,6 +49,8 @@ class FlowGraphToPolarsConverter:
|
|
|
32
49
|
self.code_lines: list[str] = []
|
|
33
50
|
self.output_nodes = []
|
|
34
51
|
self.last_node_var = None
|
|
52
|
+
self.unsupported_nodes = []
|
|
53
|
+
self.custom_node_classes = {}
|
|
35
54
|
|
|
36
55
|
def convert(self) -> str:
|
|
37
56
|
"""
|
|
@@ -39,6 +58,10 @@ class FlowGraphToPolarsConverter:
|
|
|
39
58
|
|
|
40
59
|
Returns:
|
|
41
60
|
str: Complete Python code that can be executed standalone
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
UnsupportedNodeError: If the graph contains nodes that cannot be converted
|
|
64
|
+
to standalone code (e.g., database nodes, explore_data, external_source).
|
|
42
65
|
"""
|
|
43
66
|
# Get execution order
|
|
44
67
|
execution_order = determine_execution_order(
|
|
@@ -50,6 +73,20 @@ class FlowGraphToPolarsConverter:
|
|
|
50
73
|
for node in execution_order:
|
|
51
74
|
self._generate_node_code(node)
|
|
52
75
|
|
|
76
|
+
# Check for unsupported nodes and raise an error with all of them listed
|
|
77
|
+
if self.unsupported_nodes:
|
|
78
|
+
error_messages = []
|
|
79
|
+
for node_id, node_type, reason in self.unsupported_nodes:
|
|
80
|
+
error_messages.append(f" - Node {node_id} ({node_type}): {reason}")
|
|
81
|
+
raise UnsupportedNodeError(
|
|
82
|
+
node_type=self.unsupported_nodes[0][1],
|
|
83
|
+
node_id=self.unsupported_nodes[0][0],
|
|
84
|
+
reason=(
|
|
85
|
+
f"The flow contains {len(self.unsupported_nodes)} node(s) that cannot be converted to code:\n"
|
|
86
|
+
+ "\n".join(error_messages)
|
|
87
|
+
),
|
|
88
|
+
)
|
|
89
|
+
|
|
53
90
|
# Combine everything
|
|
54
91
|
return self._build_final_code()
|
|
55
92
|
|
|
@@ -73,13 +110,25 @@ class FlowGraphToPolarsConverter:
|
|
|
73
110
|
self.last_node_var = var_name
|
|
74
111
|
# Get input variable names
|
|
75
112
|
input_vars = self._get_input_vars(node)
|
|
113
|
+
|
|
114
|
+
# Check if this is a user-defined node
|
|
115
|
+
if isinstance(settings, input_schema.UserDefinedNode) or getattr(settings, "is_user_defined", False):
|
|
116
|
+
self._handle_user_defined(node, var_name, input_vars)
|
|
117
|
+
return
|
|
118
|
+
|
|
76
119
|
# Route to appropriate handler based on node type
|
|
77
120
|
handler = getattr(self, f"_handle_{node_type}", None)
|
|
78
121
|
if handler:
|
|
79
122
|
handler(settings, var_name, input_vars)
|
|
80
123
|
else:
|
|
81
|
-
|
|
82
|
-
|
|
124
|
+
# Unknown node type - add to unsupported list
|
|
125
|
+
self.unsupported_nodes.append((
|
|
126
|
+
node.node_id,
|
|
127
|
+
node_type,
|
|
128
|
+
f"No code generator implemented for node type '{node_type}'"
|
|
129
|
+
))
|
|
130
|
+
self._add_comment(f"# WARNING: Cannot generate code for node type '{node_type}' (node_id={node.node_id})")
|
|
131
|
+
self._add_comment(f"# This node type is not supported for code export")
|
|
83
132
|
|
|
84
133
|
def _get_input_vars(self, node: FlowNode) -> dict[str, str]:
|
|
85
134
|
"""Get input variable names for a node."""
|
|
@@ -1067,6 +1116,291 @@ class FlowGraphToPolarsConverter:
|
|
|
1067
1116
|
self._add_code(f"{var_name} = _polars_code_{var_name.replace('df_', '')}({args})")
|
|
1068
1117
|
self._add_code("")
|
|
1069
1118
|
|
|
1119
|
+
# Handlers for unsupported node types - these add nodes to the unsupported list
|
|
1120
|
+
|
|
1121
|
+
def _handle_explore_data(
|
|
1122
|
+
self, settings: input_schema.NodeExploreData, var_name: str, input_vars: dict[str, str]
|
|
1123
|
+
) -> None:
|
|
1124
|
+
"""Handle explore_data nodes - these are skipped as they are interactive visualization only."""
|
|
1125
|
+
# explore_data is just for visualization in the UI, it doesn't transform data
|
|
1126
|
+
# So we skip it in code generation but don't fail - just add a comment
|
|
1127
|
+
input_df = input_vars.get("main", "df")
|
|
1128
|
+
self._add_comment(f"# Node {settings.node_id}: Explore Data (skipped - interactive visualization only)")
|
|
1129
|
+
self._add_code(f"{var_name} = {input_df} # Pass through unchanged")
|
|
1130
|
+
self._add_code("")
|
|
1131
|
+
|
|
1132
|
+
def _handle_database_reader(
|
|
1133
|
+
self, settings: input_schema.NodeDatabaseReader, var_name: str, input_vars: dict[str, str]
|
|
1134
|
+
) -> None:
|
|
1135
|
+
"""Handle database_reader nodes by generating code to read from database using a named connection."""
|
|
1136
|
+
db_settings = settings.database_settings
|
|
1137
|
+
|
|
1138
|
+
# Only reference mode is supported for code generation
|
|
1139
|
+
if db_settings.connection_mode != "reference":
|
|
1140
|
+
self.unsupported_nodes.append((
|
|
1141
|
+
settings.node_id,
|
|
1142
|
+
"database_reader",
|
|
1143
|
+
"Database Reader nodes with inline connections cannot be exported. "
|
|
1144
|
+
"Please use a named connection (reference mode) instead."
|
|
1145
|
+
))
|
|
1146
|
+
self._add_comment(f"# Node {settings.node_id}: Database Reader - Inline connections not supported")
|
|
1147
|
+
return
|
|
1148
|
+
|
|
1149
|
+
if not db_settings.database_connection_name:
|
|
1150
|
+
self.unsupported_nodes.append((
|
|
1151
|
+
settings.node_id,
|
|
1152
|
+
"database_reader",
|
|
1153
|
+
"Database Reader node is missing a connection name"
|
|
1154
|
+
))
|
|
1155
|
+
return
|
|
1156
|
+
|
|
1157
|
+
self.imports.add("import flowfile as ff")
|
|
1158
|
+
|
|
1159
|
+
connection_name = db_settings.database_connection_name
|
|
1160
|
+
self._add_code(f"# Read from database using connection: {connection_name}")
|
|
1161
|
+
|
|
1162
|
+
if db_settings.query_mode == "query" and db_settings.query:
|
|
1163
|
+
# Query mode - use triple quotes to preserve query formatting
|
|
1164
|
+
self._add_code(f'{var_name} = ff.read_database(')
|
|
1165
|
+
self._add_code(f' "{connection_name}",')
|
|
1166
|
+
self._add_code(f' query="""')
|
|
1167
|
+
# Add each line of the query with proper indentation
|
|
1168
|
+
for line in db_settings.query.split("\n"):
|
|
1169
|
+
self._add_code(f" {line}")
|
|
1170
|
+
self._add_code(' """,')
|
|
1171
|
+
self._add_code(")")
|
|
1172
|
+
else:
|
|
1173
|
+
# Table mode
|
|
1174
|
+
self._add_code(f'{var_name} = ff.read_database(')
|
|
1175
|
+
self._add_code(f' "{connection_name}",')
|
|
1176
|
+
if db_settings.table_name:
|
|
1177
|
+
self._add_code(f' table_name="{db_settings.table_name}",')
|
|
1178
|
+
if db_settings.schema_name:
|
|
1179
|
+
self._add_code(f' schema_name="{db_settings.schema_name}",')
|
|
1180
|
+
self._add_code(")")
|
|
1181
|
+
|
|
1182
|
+
self._add_code("")
|
|
1183
|
+
|
|
1184
|
+
def _handle_database_writer(
|
|
1185
|
+
self, settings: input_schema.NodeDatabaseWriter, var_name: str, input_vars: dict[str, str]
|
|
1186
|
+
) -> None:
|
|
1187
|
+
"""Handle database_writer nodes by generating code to write to database using a named connection."""
|
|
1188
|
+
db_settings = settings.database_write_settings
|
|
1189
|
+
|
|
1190
|
+
# Only reference mode is supported for code generation
|
|
1191
|
+
if db_settings.connection_mode != "reference":
|
|
1192
|
+
self.unsupported_nodes.append((
|
|
1193
|
+
settings.node_id,
|
|
1194
|
+
"database_writer",
|
|
1195
|
+
"Database Writer nodes with inline connections cannot be exported. "
|
|
1196
|
+
"Please use a named connection (reference mode) instead."
|
|
1197
|
+
))
|
|
1198
|
+
self._add_comment(f"# Node {settings.node_id}: Database Writer - Inline connections not supported")
|
|
1199
|
+
return
|
|
1200
|
+
|
|
1201
|
+
if not db_settings.database_connection_name:
|
|
1202
|
+
self.unsupported_nodes.append((
|
|
1203
|
+
settings.node_id,
|
|
1204
|
+
"database_writer",
|
|
1205
|
+
"Database Writer node is missing a connection name"
|
|
1206
|
+
))
|
|
1207
|
+
return
|
|
1208
|
+
|
|
1209
|
+
self.imports.add("import flowfile as ff")
|
|
1210
|
+
|
|
1211
|
+
connection_name = db_settings.database_connection_name
|
|
1212
|
+
input_df = input_vars.get("main", "df")
|
|
1213
|
+
|
|
1214
|
+
self._add_code(f"# Write to database using connection: {connection_name}")
|
|
1215
|
+
self._add_code(f"ff.write_database(")
|
|
1216
|
+
self._add_code(f" {input_df}.collect(),")
|
|
1217
|
+
self._add_code(f' "{connection_name}",')
|
|
1218
|
+
self._add_code(f' "{db_settings.table_name}",')
|
|
1219
|
+
if db_settings.schema_name:
|
|
1220
|
+
self._add_code(f' schema_name="{db_settings.schema_name}",')
|
|
1221
|
+
if db_settings.if_exists:
|
|
1222
|
+
self._add_code(f' if_exists="{db_settings.if_exists}",')
|
|
1223
|
+
self._add_code(")")
|
|
1224
|
+
self._add_code(f"{var_name} = {input_df} # Pass through the input DataFrame")
|
|
1225
|
+
self._add_code("")
|
|
1226
|
+
|
|
1227
|
+
def _handle_external_source(
|
|
1228
|
+
self, settings: input_schema.NodeExternalSource, var_name: str, input_vars: dict[str, str]
|
|
1229
|
+
) -> None:
|
|
1230
|
+
"""Handle external_source nodes - these are not supported for code generation."""
|
|
1231
|
+
self.unsupported_nodes.append((
|
|
1232
|
+
settings.node_id,
|
|
1233
|
+
"external_source",
|
|
1234
|
+
"External Source nodes use dynamic data sources that cannot be included in generated code"
|
|
1235
|
+
))
|
|
1236
|
+
self._add_comment(f"# Node {settings.node_id}: External Source - Not supported for code export")
|
|
1237
|
+
self._add_comment("# (External data sources require runtime configuration)")
|
|
1238
|
+
|
|
1239
|
+
def _check_process_method_signature(self, custom_node_class: type) -> tuple[bool, bool]:
|
|
1240
|
+
"""
|
|
1241
|
+
Check the process method signature to determine if collect/lazy is needed.
|
|
1242
|
+
|
|
1243
|
+
Returns:
|
|
1244
|
+
Tuple of (needs_collect, needs_lazy):
|
|
1245
|
+
- needs_collect: True if inputs need to be collected to DataFrame before passing to process()
|
|
1246
|
+
- needs_lazy: True if output needs to be converted to LazyFrame after process()
|
|
1247
|
+
"""
|
|
1248
|
+
needs_collect = True # Default: assume needs DataFrame input
|
|
1249
|
+
needs_lazy = True # Default: assume returns DataFrame
|
|
1250
|
+
|
|
1251
|
+
process_method = getattr(custom_node_class, 'process', None)
|
|
1252
|
+
if process_method is None:
|
|
1253
|
+
return needs_collect, needs_lazy
|
|
1254
|
+
|
|
1255
|
+
try:
|
|
1256
|
+
# Try to get type hints from the process method
|
|
1257
|
+
type_hints = typing.get_type_hints(process_method)
|
|
1258
|
+
|
|
1259
|
+
# Check return type
|
|
1260
|
+
return_type = type_hints.get('return')
|
|
1261
|
+
if return_type is not None:
|
|
1262
|
+
return_type_str = str(return_type)
|
|
1263
|
+
if 'LazyFrame' in return_type_str:
|
|
1264
|
+
needs_lazy = False
|
|
1265
|
+
|
|
1266
|
+
# Check input parameter types (look for *inputs parameter or first param after self)
|
|
1267
|
+
sig = inspect.signature(process_method)
|
|
1268
|
+
params = list(sig.parameters.values())
|
|
1269
|
+
for param in params[1:]: # Skip 'self'
|
|
1270
|
+
if param.annotation != inspect.Parameter.empty:
|
|
1271
|
+
param_type_str = str(param.annotation)
|
|
1272
|
+
if 'LazyFrame' in param_type_str:
|
|
1273
|
+
needs_collect = False
|
|
1274
|
+
break
|
|
1275
|
+
# Also check the type_hints dict for this param
|
|
1276
|
+
if param.name in type_hints:
|
|
1277
|
+
hint_str = str(type_hints[param.name])
|
|
1278
|
+
if 'LazyFrame' in hint_str:
|
|
1279
|
+
needs_collect = False
|
|
1280
|
+
break
|
|
1281
|
+
except Exception as e:
|
|
1282
|
+
# If we can't determine types, use defaults (collect + lazy)
|
|
1283
|
+
logger.debug(f"Could not determine process method signature: {e}")
|
|
1284
|
+
|
|
1285
|
+
return needs_collect, needs_lazy
|
|
1286
|
+
|
|
1287
|
+
def _read_custom_node_source_file(self, custom_node_class: type) -> str | None:
|
|
1288
|
+
"""
|
|
1289
|
+
Read the entire source file where a custom node class is defined.
|
|
1290
|
+
This includes all class definitions in that file (settings schemas, etc.).
|
|
1291
|
+
|
|
1292
|
+
Returns:
|
|
1293
|
+
The complete source code from the file, or None if not readable.
|
|
1294
|
+
"""
|
|
1295
|
+
try:
|
|
1296
|
+
source_file = inspect.getfile(custom_node_class)
|
|
1297
|
+
with open(source_file, 'r') as f:
|
|
1298
|
+
return f.read()
|
|
1299
|
+
except (OSError, TypeError):
|
|
1300
|
+
return None
|
|
1301
|
+
|
|
1302
|
+
def _handle_user_defined(
|
|
1303
|
+
self, node: FlowNode, var_name: str, input_vars: dict[str, str]
|
|
1304
|
+
) -> None:
|
|
1305
|
+
"""Handle user-defined custom nodes by including their class definition and calling process()."""
|
|
1306
|
+
node_type = node.node_type
|
|
1307
|
+
settings = node.setting_input
|
|
1308
|
+
|
|
1309
|
+
# Get the custom node class from the registry
|
|
1310
|
+
custom_node_class = CUSTOM_NODE_STORE.get(node_type)
|
|
1311
|
+
if custom_node_class is None:
|
|
1312
|
+
self.unsupported_nodes.append((
|
|
1313
|
+
node.node_id,
|
|
1314
|
+
node_type,
|
|
1315
|
+
f"User-defined node type '{node_type}' not found in the custom node registry"
|
|
1316
|
+
))
|
|
1317
|
+
self._add_comment(f"# Node {node.node_id}: User-defined node '{node_type}' - Not found in registry")
|
|
1318
|
+
return
|
|
1319
|
+
|
|
1320
|
+
# Store the entire source file if we haven't already
|
|
1321
|
+
class_name = custom_node_class.__name__
|
|
1322
|
+
if class_name not in self.custom_node_classes:
|
|
1323
|
+
# Read the entire source file - it contains everything we need
|
|
1324
|
+
file_source = self._read_custom_node_source_file(custom_node_class)
|
|
1325
|
+
if file_source:
|
|
1326
|
+
# Remove import lines from the file since we handle imports separately
|
|
1327
|
+
lines = file_source.split('\n')
|
|
1328
|
+
non_import_lines = []
|
|
1329
|
+
in_multiline_import = False
|
|
1330
|
+
for line in lines:
|
|
1331
|
+
stripped = line.strip()
|
|
1332
|
+
# Track multi-line imports (using parentheses)
|
|
1333
|
+
if stripped.startswith('import ') or stripped.startswith('from '):
|
|
1334
|
+
if '(' in stripped and ')' not in stripped:
|
|
1335
|
+
in_multiline_import = True
|
|
1336
|
+
continue
|
|
1337
|
+
if in_multiline_import:
|
|
1338
|
+
if ')' in stripped:
|
|
1339
|
+
in_multiline_import = False
|
|
1340
|
+
continue
|
|
1341
|
+
# Skip comments at the very start (like "# Auto-generated custom node")
|
|
1342
|
+
if stripped.startswith('#') and not non_import_lines:
|
|
1343
|
+
continue
|
|
1344
|
+
non_import_lines.append(line)
|
|
1345
|
+
# Remove leading empty lines
|
|
1346
|
+
while non_import_lines and not non_import_lines[0].strip():
|
|
1347
|
+
non_import_lines.pop(0)
|
|
1348
|
+
self.custom_node_classes[class_name] = '\n'.join(non_import_lines)
|
|
1349
|
+
else:
|
|
1350
|
+
# Fallback to just the class source
|
|
1351
|
+
try:
|
|
1352
|
+
self.custom_node_classes[class_name] = inspect.getsource(custom_node_class)
|
|
1353
|
+
except (OSError, TypeError) as e:
|
|
1354
|
+
self.unsupported_nodes.append((
|
|
1355
|
+
node.node_id,
|
|
1356
|
+
node_type,
|
|
1357
|
+
f"Could not retrieve source code for user-defined node: {e}"
|
|
1358
|
+
))
|
|
1359
|
+
self._add_comment(f"# Node {node.node_id}: User-defined node '{node_type}' - Source code unavailable")
|
|
1360
|
+
return
|
|
1361
|
+
|
|
1362
|
+
# Add necessary imports
|
|
1363
|
+
self.imports.add("from flowfile_core.flowfile.node_designer import CustomNodeBase, Section, NodeSettings, SingleSelect, MultiSelect, IncomingColumns, ColumnSelector, NumericInput, TextInput, DropdownSelector, TextArea, Toggle")
|
|
1364
|
+
|
|
1365
|
+
# Get settings values to initialize the node
|
|
1366
|
+
settings_dict = getattr(settings, "settings", {}) or {}
|
|
1367
|
+
|
|
1368
|
+
# Check process method signature to determine if collect/lazy is needed
|
|
1369
|
+
needs_collect, needs_lazy = self._check_process_method_signature(custom_node_class)
|
|
1370
|
+
|
|
1371
|
+
# Generate the code to instantiate and run the custom node
|
|
1372
|
+
self._add_code(f"# User-defined node: {custom_node_class.model_fields.get('node_name', type('', (), {'default': node_type})).default}")
|
|
1373
|
+
self._add_code(f"_custom_node_{node.node_id} = {class_name}()")
|
|
1374
|
+
|
|
1375
|
+
# If there are settings, apply them
|
|
1376
|
+
if settings_dict:
|
|
1377
|
+
self._add_code(f"_custom_node_{node.node_id}_settings = {repr(settings_dict)}")
|
|
1378
|
+
self._add_code(f"if _custom_node_{node.node_id}.settings_schema:")
|
|
1379
|
+
self._add_code(f" _custom_node_{node.node_id}.settings_schema.populate_values(_custom_node_{node.node_id}_settings)")
|
|
1380
|
+
|
|
1381
|
+
# Prepare input arguments based on whether we need to collect
|
|
1382
|
+
if len(input_vars) == 0:
|
|
1383
|
+
input_args = ""
|
|
1384
|
+
elif len(input_vars) == 1:
|
|
1385
|
+
input_df = list(input_vars.values())[0]
|
|
1386
|
+
input_args = f"{input_df}.collect()" if needs_collect else input_df
|
|
1387
|
+
else:
|
|
1388
|
+
arg_list = []
|
|
1389
|
+
for key in sorted(input_vars.keys()):
|
|
1390
|
+
if key.startswith("main"):
|
|
1391
|
+
if needs_collect:
|
|
1392
|
+
arg_list.append(f"{input_vars[key]}.collect()")
|
|
1393
|
+
else:
|
|
1394
|
+
arg_list.append(input_vars[key])
|
|
1395
|
+
input_args = ", ".join(arg_list)
|
|
1396
|
+
|
|
1397
|
+
# Call the process method, adding .lazy() only if needed
|
|
1398
|
+
if needs_lazy:
|
|
1399
|
+
self._add_code(f"{var_name} = _custom_node_{node.node_id}.process({input_args}).lazy()")
|
|
1400
|
+
else:
|
|
1401
|
+
self._add_code(f"{var_name} = _custom_node_{node.node_id}.process({input_args})")
|
|
1402
|
+
self._add_code("")
|
|
1403
|
+
|
|
1070
1404
|
# Helper methods
|
|
1071
1405
|
|
|
1072
1406
|
def _add_code(self, line: str) -> None:
|
|
@@ -1286,6 +1620,17 @@ class FlowGraphToPolarsConverter:
|
|
|
1286
1620
|
lines.append("")
|
|
1287
1621
|
lines.append("")
|
|
1288
1622
|
|
|
1623
|
+
# Add custom node class definitions if any
|
|
1624
|
+
if self.custom_node_classes:
|
|
1625
|
+
lines.append("# Custom Node Class Definitions")
|
|
1626
|
+
lines.append("# These classes are user-defined nodes that were included in the flow")
|
|
1627
|
+
lines.append("")
|
|
1628
|
+
for class_name, source_code in self.custom_node_classes.items():
|
|
1629
|
+
for source_line in source_code.split("\n"):
|
|
1630
|
+
lines.append(source_line)
|
|
1631
|
+
lines.append("")
|
|
1632
|
+
lines.append("")
|
|
1633
|
+
|
|
1289
1634
|
# Add main function
|
|
1290
1635
|
lines.append("def run_etl_pipeline():")
|
|
1291
1636
|
lines.append(' """')
|
|
@@ -991,8 +991,20 @@ class FlowDataEngine:
|
|
|
991
991
|
|
|
992
992
|
df = self.data_frame.rename({c.old_name: c.new_name for c in group_columns})
|
|
993
993
|
group_by_columns = [n_c.new_name for n_c in group_columns]
|
|
994
|
+
|
|
995
|
+
# Handle case where there are no aggregations - just get unique combinations of group columns
|
|
996
|
+
if len(aggregations) == 0:
|
|
997
|
+
return FlowDataEngine(
|
|
998
|
+
df.select(group_by_columns).unique(),
|
|
999
|
+
calculate_schema_stats=calculate_schema_stats,
|
|
1000
|
+
)
|
|
1001
|
+
|
|
1002
|
+
grouped_df = df.group_by(*group_by_columns)
|
|
1003
|
+
agg_exprs = [ac.agg_func(ac.old_name).alias(ac.new_name) for ac in aggregations]
|
|
1004
|
+
result_df = grouped_df.agg(agg_exprs)
|
|
1005
|
+
|
|
994
1006
|
return FlowDataEngine(
|
|
995
|
-
|
|
1007
|
+
result_df,
|
|
996
1008
|
calculate_schema_stats=calculate_schema_stats,
|
|
997
1009
|
)
|
|
998
1010
|
|
|
@@ -137,6 +137,12 @@ def get_results(file_ref: str) -> Status | None:
|
|
|
137
137
|
|
|
138
138
|
|
|
139
139
|
def results_exists(file_ref: str):
|
|
140
|
+
from flowfile_core.configs.settings import OFFLOAD_TO_WORKER
|
|
141
|
+
|
|
142
|
+
# Skip worker check if worker communication is disabled
|
|
143
|
+
if not OFFLOAD_TO_WORKER:
|
|
144
|
+
return False
|
|
145
|
+
|
|
140
146
|
try:
|
|
141
147
|
f = requests.get(f"{WORKER_URL}/status/{file_ref}")
|
|
142
148
|
if f.status_code == 200:
|
|
@@ -159,6 +165,12 @@ def clear_task_from_worker(file_ref: str) -> bool:
|
|
|
159
165
|
Returns:
|
|
160
166
|
bool: True if the task was successfully cleared, False otherwise.
|
|
161
167
|
"""
|
|
168
|
+
from flowfile_core.configs.settings import OFFLOAD_TO_WORKER
|
|
169
|
+
|
|
170
|
+
# Skip worker call if worker communication is disabled
|
|
171
|
+
if not OFFLOAD_TO_WORKER:
|
|
172
|
+
return False
|
|
173
|
+
|
|
162
174
|
try:
|
|
163
175
|
f = requests.delete(f"{WORKER_URL}/clear_task/{file_ref}")
|
|
164
176
|
if f.status_code == 200:
|
|
@@ -1644,6 +1644,7 @@ class FlowGraph:
|
|
|
1644
1644
|
write_settings=node_cloud_storage_writer.cloud_storage_settings,
|
|
1645
1645
|
connection=full_cloud_storage_connection,
|
|
1646
1646
|
lf=df.data_frame,
|
|
1647
|
+
user_id=node_cloud_storage_writer.user_id,
|
|
1647
1648
|
flowfile_node_id=node_cloud_storage_writer.node_id,
|
|
1648
1649
|
flowfile_flow_id=self.flow_id,
|
|
1649
1650
|
)
|
|
@@ -1866,6 +1867,7 @@ class FlowGraph:
|
|
|
1866
1867
|
|
|
1867
1868
|
if schema_callback is not None:
|
|
1868
1869
|
node.schema_callback = schema_callback
|
|
1870
|
+
node.user_provided_schema_callback = schema_callback
|
|
1869
1871
|
return self
|
|
1870
1872
|
|
|
1871
1873
|
def add_datasource(self, input_file: input_schema.NodeDatasource | input_schema.NodeManualInput) -> "FlowGraph":
|