Flowfile 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +3 -1
- flowfile/api.py +1 -2
- flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionManager-0dfba9f2.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-d5b1b6c9.js} +6 -6
- flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-00d87aad.js} +6 -6
- flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-4685e75d.js} +1 -1
- flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-23e909da.js} +1 -1
- flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-70ae0c79.js} +1 -1
- flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-f149cf7c.js} +1 -1
- flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-702a3edd.js} +7 -7
- flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-b1519993.js} +11 -11
- flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-6f3e4ea5.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseManager-cf5ef661.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-d38c7295.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-b04ef46a.js} +8 -8
- flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-5fa10ed8.js} +5 -5
- flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-d39af878.js} +5 -5
- flowfile/web/static/assets/{Filter-812dcbca.js → Filter-9b6d08db.js} +7 -7
- flowfile/web/static/assets/{Formula-71472193.js → Formula-6b04fb1d.js} +7 -7
- flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-999521f4.js} +8 -8
- flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-17dd2198.js} +6 -6
- flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-6b039e18.js} +5 -5
- flowfile/web/static/assets/{Join-a1b800be.js → Join-24d0f113.js} +8 -8
- flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-34639209.js} +4 -4
- flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-0e8724a3.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js} +1 -1
- flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-3d63a470.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js} +1 -1
- flowfile/web/static/assets/{Output-ddc9079f.css → Output-283fe388.css} +5 -5
- flowfile/web/static/assets/{Output-76750610.js → Output-edea9802.js} +57 -38
- flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-61d19301.js} +7 -7
- flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-de9f43fe.js} +1 -1
- flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-f97fec5b.js} +1 -1
- flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-bc3c9984.js} +5 -5
- flowfile/web/static/assets/{Read-637b72a7.js → Read-64a3f259.js} +80 -105
- flowfile/web/static/assets/{Read-6b17491f.css → Read-e808b239.css} +10 -10
- flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-3d5039be.js} +4 -4
- flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-597510e0.js} +6 -6
- flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-df51adbe.js} +1 -1
- flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-4be0a507.js} +4 -4
- flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretManager-4839be57.js} +2 -2
- flowfile/web/static/assets/{Select-850215fd.js → Select-9b72f201.js} +7 -7
- flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-7ded385d.js} +1 -1
- flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-e1e9c953.js} +1 -1
- flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-f0f75a42.js} +1 -1
- flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-6c777aac.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js} +1 -1
- flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-7cb93e62.js} +1 -1
- flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-6cbde21a.js} +5 -5
- flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-d9a40c11.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-5896c375.js} +1 -1
- flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-c4fcbf4d.js} +7 -7
- flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-4ef91d19.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-a03f512c.js} +2 -2
- flowfile/web/static/assets/{Union-b563478a.js → Union-bfe9b996.js} +4 -4
- flowfile/web/static/assets/{Unique-f90db5db.js → Unique-5d023a27.js} +8 -20
- flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-91cc5354.js} +6 -6
- flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-7ee2de44.js} +1 -1
- flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-e51b9924.js} +1 -1
- flowfile/web/static/assets/{api-2d6adc4f.js → api-c1bad5ca.js} +1 -1
- flowfile/web/static/assets/{api-4c8e3822.js → api-cf1221f0.js} +1 -1
- flowfile/web/static/assets/{designer-e3c150ec.css → designer-8da3ba3a.css} +90 -67
- flowfile/web/static/assets/{designer-f3656d8c.js → designer-9633482a.js} +119 -51
- flowfile/web/static/assets/{documentation-52b241e7.js → documentation-ca400224.js} +1 -1
- flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-614b998d.js} +1 -1
- flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-f7971590.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-4fe5f36b.js} +3 -3
- flowfile/web/static/assets/{index-246f201c.js → index-5429bbf8.js} +6 -8
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
- flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-076b85ab.js} +1 -1
- flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-0fd17dbe.js} +1 -1
- flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-b61e0847.js} +1 -1
- flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-a8bb8b61.js} +21 -20
- flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-c767cb37.css} +13 -13
- flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-67b4aee0.js} +10 -12
- flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-806d2826.css} +12 -12
- flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-48c81530.css} +3 -3
- flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-92ce1dbc.js} +4 -7
- flowfile/web/static/assets/{secretApi-538058f3.js → secretApi-68435402.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-92e25ee3.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-41b0e0d7.js} +7 -4
- flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-2c8e608f.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/METADATA +3 -2
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/RECORD +138 -126
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
- flowfile_core/flowfile/code_generator/code_generator.py +62 -64
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
- flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +184 -78
- flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
- flowfile_core/flowfile/flow_graph.py +129 -26
- flowfile_core/flowfile/flow_node/flow_node.py +3 -0
- flowfile_core/flowfile/flow_node/models.py +2 -1
- flowfile_core/flowfile/handler.py +5 -5
- flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
- flowfile_core/flowfile/manage/io_flowfile.py +394 -0
- flowfile_core/flowfile/node_designer/__init__.py +1 -1
- flowfile_core/flowfile/node_designer/_type_registry.py +2 -2
- flowfile_core/flowfile/node_designer/custom_node.py +1 -1
- flowfile_core/flowfile/node_designer/ui_components.py +1 -1
- flowfile_core/flowfile/schema_callbacks.py +8 -5
- flowfile_core/flowfile/setting_generator/settings.py +15 -9
- flowfile_core/routes/routes.py +8 -10
- flowfile_core/schemas/cloud_storage_schemas.py +0 -2
- flowfile_core/schemas/input_schema.py +222 -65
- flowfile_core/schemas/output_model.py +1 -1
- flowfile_core/schemas/schemas.py +145 -32
- flowfile_core/schemas/transform_schema.py +1083 -413
- flowfile_core/schemas/yaml_types.py +103 -0
- flowfile_core/{flowfile/node_designer/data_types.py → types.py} +11 -1
- flowfile_frame/__init__.py +3 -1
- flowfile_frame/flow_frame.py +15 -18
- flowfile_frame/flow_frame_methods.py +12 -9
- flowfile_worker/__init__.py +3 -0
- flowfile_worker/create/__init__.py +3 -21
- flowfile_worker/create/funcs.py +68 -56
- flowfile_worker/create/models.py +130 -62
- flowfile_worker/routes.py +5 -8
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +131 -0
- tools/migrate/legacy_schemas.py +621 -0
- tools/migrate/migrate.py +598 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +23 -0
- tools/migrate/tests/test_migrate.py +627 -0
- tools/migrate/tests/test_migration_e2e.py +1010 -0
- tools/migrate/tests/test_node_migrations.py +813 -0
- flowfile_core/flowfile/manage/open_flowfile.py +0 -143
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/licenses/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
from flowfile_core.schemas import schemas, input_schema
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
from flowfile_core.flowfile.manage.compatibility_enhancements import ensure_compatibility, load_flowfile_pickle
|
|
4
|
+
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
|
|
7
|
+
from flowfile_core.configs.settings import IS_RUNNING_IN_DOCKER
|
|
8
|
+
import json
|
|
9
|
+
from shared.storage_config import storage
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
import yaml
|
|
14
|
+
except ImportError:
|
|
15
|
+
yaml = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _validate_flow_path(flow_path: Path) -> Path:
|
|
19
|
+
"""Validate flow path is within allowed directories or is an explicit absolute path."""
|
|
20
|
+
resolved = flow_path.resolve()
|
|
21
|
+
|
|
22
|
+
# Check extension
|
|
23
|
+
allowed_extensions = {'.yaml', '.yml', '.json', '.flowfile'}
|
|
24
|
+
if resolved.suffix.lower() not in allowed_extensions:
|
|
25
|
+
raise ValueError(f"Unsupported file extension: {resolved.suffix}")
|
|
26
|
+
|
|
27
|
+
# Check file exists
|
|
28
|
+
if not resolved.is_file():
|
|
29
|
+
raise FileNotFoundError(f"Flow file not found: {resolved}")
|
|
30
|
+
|
|
31
|
+
# Allow paths within known safe directories
|
|
32
|
+
|
|
33
|
+
if IS_RUNNING_IN_DOCKER:
|
|
34
|
+
safe_directories = [
|
|
35
|
+
storage.flows_directory,
|
|
36
|
+
storage.uploads_directory,
|
|
37
|
+
storage.temp_directory_for_flows,
|
|
38
|
+
]
|
|
39
|
+
is_safe = any(
|
|
40
|
+
resolved.is_relative_to(safe_dir)
|
|
41
|
+
for safe_dir in safe_directories
|
|
42
|
+
)
|
|
43
|
+
else:
|
|
44
|
+
is_safe = True
|
|
45
|
+
|
|
46
|
+
if not is_safe and not flow_path.is_absolute():
|
|
47
|
+
raise ValueError(
|
|
48
|
+
f"Relative paths must be within flows or uploads directory. "
|
|
49
|
+
f"Use absolute path or place file in: {storage.flows_directory}"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return resolved
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _derive_connections_from_nodes(nodes: List[schemas.FlowfileNode]) -> List[Tuple[int, int]]:
|
|
56
|
+
"""Derive node connections from the outputs stored in each node."""
|
|
57
|
+
connections = []
|
|
58
|
+
for node in nodes:
|
|
59
|
+
if node.outputs:
|
|
60
|
+
for output_id in node.outputs:
|
|
61
|
+
connections.append((node.id, output_id))
|
|
62
|
+
return connections
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def determine_insertion_order(node_storage: schemas.FlowInformation):
|
|
66
|
+
ingest_order: List[int] = []
|
|
67
|
+
ingest_order_set: set[int] = set()
|
|
68
|
+
all_nodes = set(node_storage.data.keys())
|
|
69
|
+
|
|
70
|
+
def assure_output_id(input_node: schemas.NodeInformation, output_node: schemas.NodeInformation):
|
|
71
|
+
# assure the output id is in the list with outputs of the input node this is a quick fix
|
|
72
|
+
if output_node.id not in input_node.outputs:
|
|
73
|
+
input_node.outputs.append(output_node.id)
|
|
74
|
+
|
|
75
|
+
def determine_order(node_id: int):
|
|
76
|
+
current_node = node_storage.data.get(node_id)
|
|
77
|
+
if current_node is None:
|
|
78
|
+
return
|
|
79
|
+
output_ids = current_node.outputs
|
|
80
|
+
main_input_ids = current_node.input_ids if current_node.input_ids else []
|
|
81
|
+
input_ids = [n for n in [current_node.left_input_id,
|
|
82
|
+
current_node.right_input_id] + main_input_ids if (n is not None
|
|
83
|
+
and n not in
|
|
84
|
+
ingest_order_set)]
|
|
85
|
+
if len(input_ids) > 0:
|
|
86
|
+
for input_id in input_ids:
|
|
87
|
+
new_node = node_storage.data.get(input_id)
|
|
88
|
+
if new_node is None:
|
|
89
|
+
ingest_order.append(current_node.id)
|
|
90
|
+
ingest_order_set.add(current_node.id)
|
|
91
|
+
continue
|
|
92
|
+
assure_output_id(new_node, current_node)
|
|
93
|
+
if new_node.id not in ingest_order_set:
|
|
94
|
+
determine_order(input_id)
|
|
95
|
+
elif current_node.id not in ingest_order_set:
|
|
96
|
+
ingest_order.append(current_node.id)
|
|
97
|
+
ingest_order_set.add(current_node.id)
|
|
98
|
+
|
|
99
|
+
for output_id in output_ids:
|
|
100
|
+
if output_id not in ingest_order_set:
|
|
101
|
+
determine_order(output_id)
|
|
102
|
+
|
|
103
|
+
if len(node_storage.node_starts) > 0:
|
|
104
|
+
determine_order(node_storage.node_starts[0])
|
|
105
|
+
# add the random not connected nodes
|
|
106
|
+
else:
|
|
107
|
+
for node_id in all_nodes:
|
|
108
|
+
determine_order(node_id)
|
|
109
|
+
ingest_order += list(all_nodes - ingest_order_set)
|
|
110
|
+
return ingest_order
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _load_flowfile_yaml(flow_path: Path) -> schemas.FlowInformation:
|
|
114
|
+
"""
|
|
115
|
+
Load a flowfile from YAML format and convert to FlowInformation.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
flow_path: Path to the YAML file
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
FlowInformation object
|
|
122
|
+
"""
|
|
123
|
+
if yaml is None:
|
|
124
|
+
raise ImportError("PyYAML is required for YAML files. Install with: pip install pyyaml")
|
|
125
|
+
flow_path = _validate_flow_path(flow_path)
|
|
126
|
+
with open(flow_path, 'r', encoding='utf-8') as f:
|
|
127
|
+
data = yaml.safe_load(f)
|
|
128
|
+
# Load as FlowfileData first (handles setting_input validation via node type)
|
|
129
|
+
flowfile_data = schemas.FlowfileData.model_validate(data)
|
|
130
|
+
# Convert to FlowInformation
|
|
131
|
+
return _flowfile_data_to_flow_information(flowfile_data)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _load_flowfile_json(flow_path: Path) -> schemas.FlowInformation:
|
|
135
|
+
"""
|
|
136
|
+
Load a flowfile from JSON format and convert to FlowInformation.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
flow_path: Path to the JSON file
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
FlowInformation object
|
|
143
|
+
"""
|
|
144
|
+
flow_path = _validate_flow_path(flow_path)
|
|
145
|
+
with open(flow_path, 'r', encoding='utf-8') as f:
|
|
146
|
+
data = json.load(f)
|
|
147
|
+
|
|
148
|
+
# Load as FlowfileData first (handles setting_input validation via node type)
|
|
149
|
+
flowfile_data = schemas.FlowfileData.model_validate(data)
|
|
150
|
+
|
|
151
|
+
# Convert to FlowInformation
|
|
152
|
+
return _flowfile_data_to_flow_information(flowfile_data)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _flowfile_data_to_flow_information(flowfile_data: schemas.FlowfileData) -> schemas.FlowInformation:
|
|
156
|
+
from flowfile_core.schemas.schemas import get_settings_class_for_node_type
|
|
157
|
+
|
|
158
|
+
nodes_dict = {}
|
|
159
|
+
node_starts = []
|
|
160
|
+
for node in flowfile_data.nodes:
|
|
161
|
+
setting_input = None
|
|
162
|
+
if node.setting_input is not None:
|
|
163
|
+
model_class = get_settings_class_for_node_type(node.type)
|
|
164
|
+
|
|
165
|
+
if model_class is None:
|
|
166
|
+
raise ValueError(f"Unknown node type: {node.type}")
|
|
167
|
+
|
|
168
|
+
is_user_defined = model_class == input_schema.UserDefinedNode
|
|
169
|
+
|
|
170
|
+
# Inject fields that were excluded during serialization
|
|
171
|
+
setting_data = node.setting_input if isinstance(node.setting_input, dict) else node.setting_input.model_dump()
|
|
172
|
+
setting_data['flow_id'] = flowfile_data.flowfile_id
|
|
173
|
+
setting_data['node_id'] = node.id
|
|
174
|
+
setting_data['pos_x'] = float(node.x_position or 0)
|
|
175
|
+
setting_data['pos_y'] = float(node.y_position or 0)
|
|
176
|
+
setting_data['description'] = node.description or ''
|
|
177
|
+
setting_data['is_setup'] = True
|
|
178
|
+
|
|
179
|
+
if is_user_defined:
|
|
180
|
+
setting_data['is_user_defined'] = True
|
|
181
|
+
depending_ids = list(node.input_ids or [])
|
|
182
|
+
if node.left_input_id:
|
|
183
|
+
depending_ids.append(node.left_input_id)
|
|
184
|
+
if node.right_input_id:
|
|
185
|
+
depending_ids.append(node.right_input_id)
|
|
186
|
+
setting_data['depending_on_ids'] = depending_ids
|
|
187
|
+
else:
|
|
188
|
+
if 'depending_on_id' in model_class.model_fields:
|
|
189
|
+
setting_data['depending_on_id'] = node.input_ids[0] if node.input_ids else -1
|
|
190
|
+
if 'depending_on_ids' in model_class.model_fields:
|
|
191
|
+
depending_ids = list(node.input_ids or [])
|
|
192
|
+
if node.left_input_id:
|
|
193
|
+
depending_ids.append(node.left_input_id)
|
|
194
|
+
if node.right_input_id:
|
|
195
|
+
depending_ids.append(node.right_input_id)
|
|
196
|
+
setting_data['depending_on_ids'] = depending_ids
|
|
197
|
+
|
|
198
|
+
if node.type == 'output' and 'output_settings' in setting_data:
|
|
199
|
+
output_settings = setting_data['output_settings']
|
|
200
|
+
file_type = output_settings.get('file_type', None)
|
|
201
|
+
if file_type is None:
|
|
202
|
+
raise ValueError("Output node's output_settings must include 'file_type'")
|
|
203
|
+
if 'table_settings' not in output_settings:
|
|
204
|
+
output_settings['table_settings'] = {"file_type": file_type}
|
|
205
|
+
|
|
206
|
+
setting_input = model_class.model_validate(setting_data)
|
|
207
|
+
|
|
208
|
+
node_info = schemas.NodeInformation(
|
|
209
|
+
id=node.id,
|
|
210
|
+
type=node.type,
|
|
211
|
+
is_setup=setting_input is not None,
|
|
212
|
+
description=node.description,
|
|
213
|
+
x_position=node.x_position,
|
|
214
|
+
y_position=node.y_position,
|
|
215
|
+
left_input_id=node.left_input_id,
|
|
216
|
+
right_input_id=node.right_input_id,
|
|
217
|
+
input_ids=node.input_ids,
|
|
218
|
+
outputs=node.outputs,
|
|
219
|
+
setting_input=setting_input,
|
|
220
|
+
)
|
|
221
|
+
nodes_dict[node.id] = node_info
|
|
222
|
+
if node.is_start_node:
|
|
223
|
+
node_starts.append(node.id)
|
|
224
|
+
|
|
225
|
+
connections = _derive_connections_from_nodes(flowfile_data.nodes)
|
|
226
|
+
|
|
227
|
+
flow_settings = schemas.FlowSettings(
|
|
228
|
+
flow_id=flowfile_data.flowfile_id,
|
|
229
|
+
name=flowfile_data.flowfile_name,
|
|
230
|
+
description=flowfile_data.flowfile_settings.description,
|
|
231
|
+
execution_mode=flowfile_data.flowfile_settings.execution_mode,
|
|
232
|
+
execution_location=flowfile_data.flowfile_settings.execution_location,
|
|
233
|
+
auto_save=flowfile_data.flowfile_settings.auto_save,
|
|
234
|
+
show_detailed_progress=flowfile_data.flowfile_settings.show_detailed_progress,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
return schemas.FlowInformation(
|
|
238
|
+
flow_id=flowfile_data.flowfile_id,
|
|
239
|
+
flow_name=flowfile_data.flowfile_name,
|
|
240
|
+
flow_settings=flow_settings,
|
|
241
|
+
data=nodes_dict,
|
|
242
|
+
node_starts=node_starts,
|
|
243
|
+
node_connections=connections,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
def _load_flow_storage(flow_path: Path) -> schemas.FlowInformation:
|
|
247
|
+
"""
|
|
248
|
+
Load flow storage from any supported format.
|
|
249
|
+
|
|
250
|
+
Supports:
|
|
251
|
+
- .flowfile (pickle) - legacy format
|
|
252
|
+
- .yaml / .yml - new YAML format
|
|
253
|
+
- .json - JSON format
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
flow_path: Path to the flowfile
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
FlowInformation object
|
|
260
|
+
"""
|
|
261
|
+
flow_path = _validate_flow_path(flow_path)
|
|
262
|
+
suffix = flow_path.suffix.lower()
|
|
263
|
+
if suffix == '.flowfile':
|
|
264
|
+
try:
|
|
265
|
+
flow_storage_obj = load_flowfile_pickle(str(flow_path))
|
|
266
|
+
ensure_compatibility(flow_storage_obj, str(flow_path))
|
|
267
|
+
return flow_storage_obj
|
|
268
|
+
except Exception as e:
|
|
269
|
+
raise ValueError(
|
|
270
|
+
f"Failed to open legacy .flowfile: {e}\n\n"
|
|
271
|
+
f"Try migrating: migrate_flowfile('{flow_path}')"
|
|
272
|
+
) from e
|
|
273
|
+
|
|
274
|
+
elif suffix in ('.yaml', '.yml'):
|
|
275
|
+
return _load_flowfile_yaml(flow_path)
|
|
276
|
+
|
|
277
|
+
elif suffix == '.json':
|
|
278
|
+
return _load_flowfile_json(flow_path)
|
|
279
|
+
else:
|
|
280
|
+
raise ValueError(f"Unsupported file format: {suffix}")
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def open_flow(flow_path: Path) -> FlowGraph:
|
|
284
|
+
"""
|
|
285
|
+
Open a flowfile from a given path.
|
|
286
|
+
|
|
287
|
+
Supports multiple formats:
|
|
288
|
+
- .flowfile (pickle) - legacy format, auto-migrated
|
|
289
|
+
- .yaml / .yml - new YAML format
|
|
290
|
+
- .json - JSON format
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
flow_path (Path): The absolute or relative path to the flowfile
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
FlowGraph: The flowfile object
|
|
297
|
+
"""
|
|
298
|
+
# Load flow storage (handles format detection)
|
|
299
|
+
flow_path = _validate_flow_path(flow_path)
|
|
300
|
+
flow_storage_obj = _load_flow_storage(flow_path)
|
|
301
|
+
flow_storage_obj.flow_settings.path = str(flow_path)
|
|
302
|
+
flow_storage_obj.flow_settings.name = str(flow_path.stem)
|
|
303
|
+
flow_storage_obj.flow_name = str(flow_path.stem)
|
|
304
|
+
|
|
305
|
+
# Determine node insertion order
|
|
306
|
+
ingestion_order = determine_insertion_order(flow_storage_obj)
|
|
307
|
+
|
|
308
|
+
# Create new FlowGraph
|
|
309
|
+
new_flow = FlowGraph(name=flow_storage_obj.flow_name, flow_settings=flow_storage_obj.flow_settings)
|
|
310
|
+
|
|
311
|
+
# First pass: add node promises
|
|
312
|
+
for node_id in ingestion_order:
|
|
313
|
+
node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
|
|
314
|
+
node_promise = input_schema.NodePromise(
|
|
315
|
+
flow_id=new_flow.flow_id,
|
|
316
|
+
node_id=node_info.id,
|
|
317
|
+
pos_x=node_info.x_position,
|
|
318
|
+
pos_y=node_info.y_position,
|
|
319
|
+
node_type=node_info.type
|
|
320
|
+
)
|
|
321
|
+
if hasattr(node_info.setting_input, 'cache_results'):
|
|
322
|
+
node_promise.cache_results = node_info.setting_input.cache_results
|
|
323
|
+
new_flow.add_node_promise(node_promise)
|
|
324
|
+
|
|
325
|
+
for node_id in ingestion_order:
|
|
326
|
+
node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
|
|
327
|
+
if node_info.is_setup:
|
|
328
|
+
if hasattr(node_info.setting_input, "is_user_defined") and node_info.setting_input.is_user_defined:
|
|
329
|
+
if node_info.type not in CUSTOM_NODE_STORE:
|
|
330
|
+
continue
|
|
331
|
+
user_defined_node_class = CUSTOM_NODE_STORE[node_info.type]
|
|
332
|
+
new_flow.add_user_defined_node(
|
|
333
|
+
custom_node=user_defined_node_class.from_settings(node_info.setting_input.settings),
|
|
334
|
+
user_defined_node_settings=node_info.setting_input
|
|
335
|
+
)
|
|
336
|
+
else:
|
|
337
|
+
getattr(new_flow, 'add_' + node_info.type)(node_info.setting_input)
|
|
338
|
+
|
|
339
|
+
# Setup connections
|
|
340
|
+
from_node = new_flow.get_node(node_id)
|
|
341
|
+
for output_node_id in (node_info.outputs or []):
|
|
342
|
+
|
|
343
|
+
to_node = new_flow.get_node(output_node_id)
|
|
344
|
+
if to_node is not None:
|
|
345
|
+
output_node_obj = flow_storage_obj.data[output_node_id]
|
|
346
|
+
is_left_input = (output_node_obj.left_input_id == node_id) and (
|
|
347
|
+
to_node.left_input.node_id != node_id if to_node.left_input is not None else True
|
|
348
|
+
)
|
|
349
|
+
is_right_input = (output_node_obj.right_input_id == node_id) and (
|
|
350
|
+
to_node.right_input.node_id != node_id if to_node.right_input is not None else True
|
|
351
|
+
)
|
|
352
|
+
is_main_input = node_id in (output_node_obj.input_ids or [])
|
|
353
|
+
|
|
354
|
+
if is_left_input:
|
|
355
|
+
insert_type = 'left'
|
|
356
|
+
elif is_right_input:
|
|
357
|
+
insert_type = 'right'
|
|
358
|
+
elif is_main_input:
|
|
359
|
+
insert_type = 'main'
|
|
360
|
+
else:
|
|
361
|
+
continue
|
|
362
|
+
to_node.add_node_connection(from_node, insert_type)
|
|
363
|
+
else:
|
|
364
|
+
from_node.delete_lead_to_node(output_node_id)
|
|
365
|
+
if not (from_node.node_id, output_node_id) in flow_storage_obj.node_connections:
|
|
366
|
+
continue
|
|
367
|
+
flow_storage_obj.node_connections.pop(
|
|
368
|
+
flow_storage_obj.node_connections.index((from_node.node_id, output_node_id))
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Handle any missing connections
|
|
372
|
+
for missing_connection in set(flow_storage_obj.node_connections) - set(new_flow.node_connections):
|
|
373
|
+
to_node = new_flow.get_node(missing_connection[1])
|
|
374
|
+
if not to_node.has_input:
|
|
375
|
+
test_if_circular_connection(missing_connection, new_flow)
|
|
376
|
+
from_node = new_flow.get_node(missing_connection[0])
|
|
377
|
+
if from_node:
|
|
378
|
+
to_node.add_node_connection(from_node)
|
|
379
|
+
|
|
380
|
+
return new_flow
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def test_if_circular_connection(connection: Tuple[int, int], flow: FlowGraph):
|
|
384
|
+
to_node = flow.get_node(connection[1])
|
|
385
|
+
leads_to_nodes_queue = [n for n in to_node.leads_to_nodes]
|
|
386
|
+
circular_connection: bool = False
|
|
387
|
+
while len(leads_to_nodes_queue) > 0:
|
|
388
|
+
leads_to_node = leads_to_nodes_queue.pop(0)
|
|
389
|
+
if leads_to_node.node_id == connection[0]:
|
|
390
|
+
circular_connection = True
|
|
391
|
+
break
|
|
392
|
+
for leads_to_node_leads_to in leads_to_node.leads_to_nodes:
|
|
393
|
+
leads_to_nodes_queue.append(leads_to_node_leads_to)
|
|
394
|
+
return circular_connection
|
|
@@ -5,11 +5,11 @@ This module should not be imported directly by users.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from dataclasses import dataclass
|
|
8
|
-
from typing import Type, List, Dict, Set, Any
|
|
8
|
+
from typing import Type, List, Dict, Set, Any
|
|
9
9
|
import polars as pl
|
|
10
10
|
|
|
11
11
|
# Import public types
|
|
12
|
-
from flowfile_core.
|
|
12
|
+
from flowfile_core.types import TypeGroup, DataType
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
@dataclass(frozen=True)
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import polars as pl
|
|
4
4
|
from pydantic import BaseModel
|
|
5
|
-
from typing import Any, Dict, Optional, TypeVar
|
|
5
|
+
from typing import Any, Dict, Optional, TypeVar
|
|
6
6
|
from flowfile_core.flowfile.node_designer.ui_components import FlowfileInComponent, IncomingColumns, Section
|
|
7
7
|
from flowfile_core.schemas.schemas import NodeTemplate, NodeTypeLiteral, TransformTypeLiteral
|
|
8
8
|
|
|
@@ -6,7 +6,7 @@ from pydantic import Field, BaseModel, computed_field
|
|
|
6
6
|
|
|
7
7
|
from flowfile_core.flowfile.node_designer._type_registry import normalize_type_spec
|
|
8
8
|
# Public API import
|
|
9
|
-
from flowfile_core.
|
|
9
|
+
from flowfile_core.types import DataType, TypeSpec
|
|
10
10
|
|
|
11
11
|
InputType = Literal["text", "number", "secret", "array", "date", "boolean"]
|
|
12
12
|
|
|
@@ -12,11 +12,12 @@ from flowfile_core.configs.flow_logger import main_logger
|
|
|
12
12
|
from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, PlType
|
|
13
13
|
from flowfile_core.schemas import transform_schema
|
|
14
14
|
from flowfile_core.schemas import input_schema
|
|
15
|
+
from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
|
|
15
16
|
|
|
16
17
|
|
|
17
18
|
def _ensure_all_columns_have_select(left_cols: List[str],
|
|
18
19
|
right_cols: List[str],
|
|
19
|
-
fuzzy_match_input: transform_schema.
|
|
20
|
+
fuzzy_match_input: transform_schema.FuzzyMatchInputManager):
|
|
20
21
|
"""
|
|
21
22
|
Ensure that all columns in the left and right FlowDataEngines are included in the fuzzy match input's select
|
|
22
23
|
statements.
|
|
@@ -38,7 +39,7 @@ def _ensure_all_columns_have_select(left_cols: List[str],
|
|
|
38
39
|
)
|
|
39
40
|
|
|
40
41
|
|
|
41
|
-
def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: transform_schema.
|
|
42
|
+
def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: transform_schema.JoinInputsManager) -> None:
|
|
42
43
|
"""
|
|
43
44
|
Ensure that the select columns in the fuzzy match input match the order of the incoming columns.
|
|
44
45
|
This function modifies the join_inputs object in-place.
|
|
@@ -46,17 +47,18 @@ def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: tra
|
|
|
46
47
|
Returns:
|
|
47
48
|
None
|
|
48
49
|
"""
|
|
49
|
-
select_map = {select.
|
|
50
|
+
select_map = {select.old_name: select for select in join_inputs.renames}
|
|
50
51
|
ordered_renames = [select_map[col] for col in col_order if col in select_map]
|
|
51
|
-
join_inputs.renames = ordered_renames
|
|
52
|
+
join_inputs.select_inputs.renames = ordered_renames
|
|
52
53
|
|
|
53
54
|
|
|
54
|
-
def calculate_fuzzy_match_schema(fm_input: transform_schema.
|
|
55
|
+
def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInputManager,
|
|
55
56
|
left_schema: List[FlowfileColumn],
|
|
56
57
|
right_schema: List[FlowfileColumn]):
|
|
57
58
|
_ensure_all_columns_have_select(left_cols=[col.column_name for col in left_schema],
|
|
58
59
|
right_cols=[col.column_name for col in right_schema],
|
|
59
60
|
fuzzy_match_input=fm_input)
|
|
61
|
+
|
|
60
62
|
_order_join_inputs_based_on_col_order(col_order=[col.column_name for col in left_schema],
|
|
61
63
|
join_inputs=fm_input.left_select)
|
|
62
64
|
_order_join_inputs_based_on_col_order(col_order=[col.column_name for col in right_schema],
|
|
@@ -67,6 +69,7 @@ def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInput,
|
|
|
67
69
|
for column in fm_input.right_select.renames:
|
|
68
70
|
if column.join_key:
|
|
69
71
|
column.keep = True
|
|
72
|
+
|
|
70
73
|
left_schema_dict, right_schema_dict = ({ls.name: ls for ls in left_schema}, {rs.name: rs for rs in right_schema})
|
|
71
74
|
fm_input.auto_rename()
|
|
72
75
|
right_renames = {column.old_name: column.new_name for column in fm_input.right_select.renames}
|
|
@@ -39,11 +39,14 @@ def join(node_data: "NodeData") -> NodeData:
|
|
|
39
39
|
join_key = overlapping_cols[0]
|
|
40
40
|
else:
|
|
41
41
|
join_key = ''
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
42
|
+
join_input_manager = transform_schema.JoinInputManager(
|
|
43
|
+
transform_schema.JoinInput(join_mapping=join_key,
|
|
44
|
+
left_select=node_data.main_input.columns,
|
|
45
|
+
right_select=node_data.right_input.columns
|
|
46
|
+
)
|
|
47
|
+
)
|
|
48
|
+
join_input_manager.auto_rename()
|
|
49
|
+
ji = join_input_manager.to_join_input()
|
|
47
50
|
node_data.setting_input = input_schema.NodeJoin(flow_id=node_data.flow_id,
|
|
48
51
|
node_id=node_data.node_id,
|
|
49
52
|
join_input=ji)
|
|
@@ -53,12 +56,15 @@ def join(node_data: "NodeData") -> NodeData:
|
|
|
53
56
|
@setting_generator_method
|
|
54
57
|
def cross_join(node_data: "NodeData") -> NodeData:
|
|
55
58
|
if node_data.right_input and node_data.main_input:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
+
cj_input_manager = transform_schema.CrossJoinInputManager(
|
|
60
|
+
transform_schema.CrossJoinInput(left_select=node_data.main_input.columns,
|
|
61
|
+
right_select=node_data.right_input.columns)
|
|
62
|
+
)
|
|
63
|
+
cj_input_manager.auto_rename()
|
|
64
|
+
cj = cj_input_manager.to_cross_join_input()
|
|
59
65
|
node_data.setting_input = input_schema.NodeCrossJoin(flow_id=node_data.flow_id,
|
|
60
66
|
node_id=node_data.node_id,
|
|
61
|
-
cross_join_input=
|
|
67
|
+
cross_join_input=cj)
|
|
62
68
|
return node_data
|
|
63
69
|
|
|
64
70
|
|
flowfile_core/routes/routes.py
CHANGED
|
@@ -266,8 +266,6 @@ def get_run_status(flow_id: int, response: Response):
|
|
|
266
266
|
flow = flow_file_handler.get_flow(flow_id)
|
|
267
267
|
if not flow:
|
|
268
268
|
raise HTTPException(status_code=404, detail="Flow not found")
|
|
269
|
-
if flow.latest_run_info is None:
|
|
270
|
-
raise HTTPException(status_code=404, detail="No run information available")
|
|
271
269
|
if flow.flow_settings.is_running:
|
|
272
270
|
response.status_code = status.HTTP_202_ACCEPTED
|
|
273
271
|
else:
|
|
@@ -474,14 +472,14 @@ def create_flow(flow_path: str = None, name: str = None):
|
|
|
474
472
|
if flow_path is not None and name is None:
|
|
475
473
|
name = Path(flow_path).stem
|
|
476
474
|
elif flow_path is not None and name is not None:
|
|
477
|
-
if name not in flow_path and flow_path.endswith(".
|
|
475
|
+
if name not in flow_path and (flow_path.endswith(".yaml") or flow_path.endswith(".yml")):
|
|
478
476
|
raise HTTPException(422, 'The name must be part of the flow path when a full path is provided')
|
|
479
|
-
elif name in flow_path and not flow_path.endswith(".
|
|
480
|
-
flow_path = str(Path(flow_path) / (name + ".
|
|
481
|
-
elif name not in flow_path and name.endswith(".
|
|
477
|
+
elif name in flow_path and not (flow_path.endswith(".yaml") or flow_path.endswith(".yml")):
|
|
478
|
+
flow_path = str(Path(flow_path) / (name + ".yaml"))
|
|
479
|
+
elif name not in flow_path and (name.endswith(".yaml") or name.endswith(".yml")):
|
|
482
480
|
flow_path = str(Path(flow_path) / name)
|
|
483
|
-
elif name not in flow_path and not name.endswith(".
|
|
484
|
-
flow_path = str(Path(flow_path) / (name + ".
|
|
481
|
+
elif name not in flow_path and not (name.endswith(".yaml") or name.endswith(".yml")):
|
|
482
|
+
flow_path = str(Path(flow_path) / (name + ".yaml"))
|
|
485
483
|
if flow_path is not None:
|
|
486
484
|
flow_path_ref = Path(flow_path)
|
|
487
485
|
if not flow_path_ref.parent.exists():
|
|
@@ -600,7 +598,7 @@ async def get_downstream_node_ids(flow_id: int, node_id: int) -> List[int]:
|
|
|
600
598
|
|
|
601
599
|
@router.get('/import_flow/', tags=['editor'], response_model=int)
|
|
602
600
|
def import_saved_flow(flow_path: str) -> int:
|
|
603
|
-
"""Imports a flow from a saved `.
|
|
601
|
+
"""Imports a flow from a saved `.yaml` and registers it as a new session."""
|
|
604
602
|
flow_path = Path(flow_path)
|
|
605
603
|
if not flow_path.exists():
|
|
606
604
|
raise HTTPException(404, 'File not found')
|
|
@@ -609,7 +607,7 @@ def import_saved_flow(flow_path: str) -> int:
|
|
|
609
607
|
|
|
610
608
|
@router.get('/save_flow', tags=['editor'])
|
|
611
609
|
def save_flow(flow_id: int, flow_path: str = None):
|
|
612
|
-
"""Saves the current state of a flow to a `.
|
|
610
|
+
"""Saves the current state of a flow to a `.yaml`."""
|
|
613
611
|
flow = flow_file_handler.get_flow(flow_id)
|
|
614
612
|
flow.save_flow(flow_path=flow_path)
|
|
615
613
|
|
|
@@ -136,11 +136,9 @@ class CloudStorageReadSettings(CloudStorageSettings):
|
|
|
136
136
|
|
|
137
137
|
scan_mode: Literal["single_file", "directory"] = "single_file"
|
|
138
138
|
file_format: Literal["csv", "parquet", "json", "delta", "iceberg"] = "parquet"
|
|
139
|
-
# CSV specific options
|
|
140
139
|
csv_has_header: Optional[bool] = True
|
|
141
140
|
csv_delimiter: Optional[str] = ","
|
|
142
141
|
csv_encoding: Optional[str] = "utf8"
|
|
143
|
-
# Deltalake specific settings
|
|
144
142
|
delta_version: Optional[int] = None
|
|
145
143
|
|
|
146
144
|
|