Flowfile 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. flowfile/__init__.py +3 -1
  2. flowfile/api.py +1 -2
  3. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-d5b1b6c9.js} +6 -6
  5. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-00d87aad.js} +6 -6
  6. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-4685e75d.js} +1 -1
  7. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-23e909da.js} +1 -1
  8. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-70ae0c79.js} +1 -1
  9. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-f149cf7c.js} +1 -1
  10. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-702a3edd.js} +7 -7
  11. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-b1519993.js} +11 -11
  12. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-6f3e4ea5.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseManager-cf5ef661.js} +2 -2
  14. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-d38c7295.js} +9 -9
  15. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-b04ef46a.js} +8 -8
  16. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-5fa10ed8.js} +5 -5
  17. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-d39af878.js} +5 -5
  18. flowfile/web/static/assets/{Filter-812dcbca.js → Filter-9b6d08db.js} +7 -7
  19. flowfile/web/static/assets/{Formula-71472193.js → Formula-6b04fb1d.js} +7 -7
  20. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-999521f4.js} +8 -8
  21. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-17dd2198.js} +6 -6
  22. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-6b039e18.js} +5 -5
  23. flowfile/web/static/assets/{Join-a1b800be.js → Join-24d0f113.js} +8 -8
  24. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-34639209.js} +4 -4
  25. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-0e8724a3.js} +2 -2
  26. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js} +1 -1
  27. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-3d63a470.js} +2 -2
  28. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js} +1 -1
  29. flowfile/web/static/assets/{Output-ddc9079f.css → Output-283fe388.css} +5 -5
  30. flowfile/web/static/assets/{Output-76750610.js → Output-edea9802.js} +57 -38
  31. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-61d19301.js} +7 -7
  32. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-de9f43fe.js} +1 -1
  33. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-f97fec5b.js} +1 -1
  34. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-bc3c9984.js} +5 -5
  35. flowfile/web/static/assets/{Read-637b72a7.js → Read-64a3f259.js} +80 -105
  36. flowfile/web/static/assets/{Read-6b17491f.css → Read-e808b239.css} +10 -10
  37. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-3d5039be.js} +4 -4
  38. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-597510e0.js} +6 -6
  39. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-df51adbe.js} +1 -1
  40. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-4be0a507.js} +4 -4
  41. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretManager-4839be57.js} +2 -2
  42. flowfile/web/static/assets/{Select-850215fd.js → Select-9b72f201.js} +7 -7
  43. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-7ded385d.js} +1 -1
  44. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-e1e9c953.js} +1 -1
  45. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-f0f75a42.js} +1 -1
  46. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-6c777aac.js} +2 -2
  47. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js} +1 -1
  48. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-7cb93e62.js} +1 -1
  49. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-6cbde21a.js} +5 -5
  50. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-d9a40c11.js} +2 -2
  51. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-5896c375.js} +1 -1
  52. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-c4fcbf4d.js} +7 -7
  53. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-4ef91d19.js} +2 -2
  54. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js} +1 -1
  55. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-a03f512c.js} +2 -2
  56. flowfile/web/static/assets/{Union-b563478a.js → Union-bfe9b996.js} +4 -4
  57. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-5d023a27.js} +8 -20
  58. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-91cc5354.js} +6 -6
  59. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-7ee2de44.js} +1 -1
  60. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-e51b9924.js} +1 -1
  61. flowfile/web/static/assets/{api-2d6adc4f.js → api-c1bad5ca.js} +1 -1
  62. flowfile/web/static/assets/{api-4c8e3822.js → api-cf1221f0.js} +1 -1
  63. flowfile/web/static/assets/{designer-e3c150ec.css → designer-8da3ba3a.css} +90 -67
  64. flowfile/web/static/assets/{designer-f3656d8c.js → designer-9633482a.js} +119 -51
  65. flowfile/web/static/assets/{documentation-52b241e7.js → documentation-ca400224.js} +1 -1
  66. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-614b998d.js} +1 -1
  67. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-f7971590.js} +2 -2
  68. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-4fe5f36b.js} +3 -3
  69. flowfile/web/static/assets/{index-246f201c.js → index-5429bbf8.js} +6 -8
  70. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  71. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-076b85ab.js} +1 -1
  72. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-0fd17dbe.js} +1 -1
  73. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-b61e0847.js} +1 -1
  74. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-a8bb8b61.js} +21 -20
  75. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-c767cb37.css} +13 -13
  76. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-67b4aee0.js} +10 -12
  77. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-806d2826.css} +12 -12
  78. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-48c81530.css} +3 -3
  79. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-92ce1dbc.js} +4 -7
  80. flowfile/web/static/assets/{secretApi-538058f3.js → secretApi-68435402.js} +1 -1
  81. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-92e25ee3.js} +3 -3
  82. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-41b0e0d7.js} +7 -4
  83. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-2c8e608f.js} +1 -1
  84. flowfile/web/static/index.html +1 -1
  85. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/METADATA +3 -2
  86. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/RECORD +138 -126
  87. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  88. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  89. flowfile_core/__init__.py +3 -0
  90. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  91. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  92. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  93. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  94. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  95. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  96. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  97. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +184 -78
  98. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  99. flowfile_core/flowfile/flow_graph.py +129 -26
  100. flowfile_core/flowfile/flow_node/flow_node.py +3 -0
  101. flowfile_core/flowfile/flow_node/models.py +2 -1
  102. flowfile_core/flowfile/handler.py +5 -5
  103. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  104. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  105. flowfile_core/flowfile/node_designer/__init__.py +1 -1
  106. flowfile_core/flowfile/node_designer/_type_registry.py +2 -2
  107. flowfile_core/flowfile/node_designer/custom_node.py +1 -1
  108. flowfile_core/flowfile/node_designer/ui_components.py +1 -1
  109. flowfile_core/flowfile/schema_callbacks.py +8 -5
  110. flowfile_core/flowfile/setting_generator/settings.py +15 -9
  111. flowfile_core/routes/routes.py +8 -10
  112. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  113. flowfile_core/schemas/input_schema.py +222 -65
  114. flowfile_core/schemas/output_model.py +1 -1
  115. flowfile_core/schemas/schemas.py +145 -32
  116. flowfile_core/schemas/transform_schema.py +1083 -413
  117. flowfile_core/schemas/yaml_types.py +103 -0
  118. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +11 -1
  119. flowfile_frame/__init__.py +3 -1
  120. flowfile_frame/flow_frame.py +15 -18
  121. flowfile_frame/flow_frame_methods.py +12 -9
  122. flowfile_worker/__init__.py +3 -0
  123. flowfile_worker/create/__init__.py +3 -21
  124. flowfile_worker/create/funcs.py +68 -56
  125. flowfile_worker/create/models.py +130 -62
  126. flowfile_worker/routes.py +5 -8
  127. tools/migrate/README.md +56 -0
  128. tools/migrate/__init__.py +12 -0
  129. tools/migrate/__main__.py +131 -0
  130. tools/migrate/legacy_schemas.py +621 -0
  131. tools/migrate/migrate.py +598 -0
  132. tools/migrate/tests/__init__.py +0 -0
  133. tools/migrate/tests/conftest.py +23 -0
  134. tools/migrate/tests/test_migrate.py +627 -0
  135. tools/migrate/tests/test_migration_e2e.py +1010 -0
  136. tools/migrate/tests/test_node_migrations.py +813 -0
  137. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  138. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/licenses/LICENSE +0 -0
  139. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -0,0 +1,394 @@
1
+ from flowfile_core.schemas import schemas, input_schema
2
+ from typing import List, Tuple
3
+ from flowfile_core.flowfile.manage.compatibility_enhancements import ensure_compatibility, load_flowfile_pickle
4
+ from flowfile_core.flowfile.flow_graph import FlowGraph
5
+ from pathlib import Path
6
+ from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
7
+ from flowfile_core.configs.settings import IS_RUNNING_IN_DOCKER
8
+ import json
9
+ from shared.storage_config import storage
10
+
11
+
12
+ try:
13
+ import yaml
14
+ except ImportError:
15
+ yaml = None
16
+
17
+
18
+ def _validate_flow_path(flow_path: Path) -> Path:
19
+ """Validate flow path is within allowed directories or is an explicit absolute path."""
20
+ resolved = flow_path.resolve()
21
+
22
+ # Check extension
23
+ allowed_extensions = {'.yaml', '.yml', '.json', '.flowfile'}
24
+ if resolved.suffix.lower() not in allowed_extensions:
25
+ raise ValueError(f"Unsupported file extension: {resolved.suffix}")
26
+
27
+ # Check file exists
28
+ if not resolved.is_file():
29
+ raise FileNotFoundError(f"Flow file not found: {resolved}")
30
+
31
+ # Allow paths within known safe directories
32
+
33
+ if IS_RUNNING_IN_DOCKER:
34
+ safe_directories = [
35
+ storage.flows_directory,
36
+ storage.uploads_directory,
37
+ storage.temp_directory_for_flows,
38
+ ]
39
+ is_safe = any(
40
+ resolved.is_relative_to(safe_dir)
41
+ for safe_dir in safe_directories
42
+ )
43
+ else:
44
+ is_safe = True
45
+
46
+ if not is_safe and not flow_path.is_absolute():
47
+ raise ValueError(
48
+ f"Relative paths must be within flows or uploads directory. "
49
+ f"Use absolute path or place file in: {storage.flows_directory}"
50
+ )
51
+
52
+ return resolved
53
+
54
+
55
+ def _derive_connections_from_nodes(nodes: List[schemas.FlowfileNode]) -> List[Tuple[int, int]]:
56
+ """Derive node connections from the outputs stored in each node."""
57
+ connections = []
58
+ for node in nodes:
59
+ if node.outputs:
60
+ for output_id in node.outputs:
61
+ connections.append((node.id, output_id))
62
+ return connections
63
+
64
+
65
+ def determine_insertion_order(node_storage: schemas.FlowInformation):
66
+ ingest_order: List[int] = []
67
+ ingest_order_set: set[int] = set()
68
+ all_nodes = set(node_storage.data.keys())
69
+
70
+ def assure_output_id(input_node: schemas.NodeInformation, output_node: schemas.NodeInformation):
71
+ # assure the output id is in the list with outputs of the input node this is a quick fix
72
+ if output_node.id not in input_node.outputs:
73
+ input_node.outputs.append(output_node.id)
74
+
75
+ def determine_order(node_id: int):
76
+ current_node = node_storage.data.get(node_id)
77
+ if current_node is None:
78
+ return
79
+ output_ids = current_node.outputs
80
+ main_input_ids = current_node.input_ids if current_node.input_ids else []
81
+ input_ids = [n for n in [current_node.left_input_id,
82
+ current_node.right_input_id] + main_input_ids if (n is not None
83
+ and n not in
84
+ ingest_order_set)]
85
+ if len(input_ids) > 0:
86
+ for input_id in input_ids:
87
+ new_node = node_storage.data.get(input_id)
88
+ if new_node is None:
89
+ ingest_order.append(current_node.id)
90
+ ingest_order_set.add(current_node.id)
91
+ continue
92
+ assure_output_id(new_node, current_node)
93
+ if new_node.id not in ingest_order_set:
94
+ determine_order(input_id)
95
+ elif current_node.id not in ingest_order_set:
96
+ ingest_order.append(current_node.id)
97
+ ingest_order_set.add(current_node.id)
98
+
99
+ for output_id in output_ids:
100
+ if output_id not in ingest_order_set:
101
+ determine_order(output_id)
102
+
103
+ if len(node_storage.node_starts) > 0:
104
+ determine_order(node_storage.node_starts[0])
105
+ # add the random not connected nodes
106
+ else:
107
+ for node_id in all_nodes:
108
+ determine_order(node_id)
109
+ ingest_order += list(all_nodes - ingest_order_set)
110
+ return ingest_order
111
+
112
+
113
+ def _load_flowfile_yaml(flow_path: Path) -> schemas.FlowInformation:
114
+ """
115
+ Load a flowfile from YAML format and convert to FlowInformation.
116
+
117
+ Args:
118
+ flow_path: Path to the YAML file
119
+
120
+ Returns:
121
+ FlowInformation object
122
+ """
123
+ if yaml is None:
124
+ raise ImportError("PyYAML is required for YAML files. Install with: pip install pyyaml")
125
+ flow_path = _validate_flow_path(flow_path)
126
+ with open(flow_path, 'r', encoding='utf-8') as f:
127
+ data = yaml.safe_load(f)
128
+ # Load as FlowfileData first (handles setting_input validation via node type)
129
+ flowfile_data = schemas.FlowfileData.model_validate(data)
130
+ # Convert to FlowInformation
131
+ return _flowfile_data_to_flow_information(flowfile_data)
132
+
133
+
134
+ def _load_flowfile_json(flow_path: Path) -> schemas.FlowInformation:
135
+ """
136
+ Load a flowfile from JSON format and convert to FlowInformation.
137
+
138
+ Args:
139
+ flow_path: Path to the JSON file
140
+
141
+ Returns:
142
+ FlowInformation object
143
+ """
144
+ flow_path = _validate_flow_path(flow_path)
145
+ with open(flow_path, 'r', encoding='utf-8') as f:
146
+ data = json.load(f)
147
+
148
+ # Load as FlowfileData first (handles setting_input validation via node type)
149
+ flowfile_data = schemas.FlowfileData.model_validate(data)
150
+
151
+ # Convert to FlowInformation
152
+ return _flowfile_data_to_flow_information(flowfile_data)
153
+
154
+
155
+ def _flowfile_data_to_flow_information(flowfile_data: schemas.FlowfileData) -> schemas.FlowInformation:
156
+ from flowfile_core.schemas.schemas import get_settings_class_for_node_type
157
+
158
+ nodes_dict = {}
159
+ node_starts = []
160
+ for node in flowfile_data.nodes:
161
+ setting_input = None
162
+ if node.setting_input is not None:
163
+ model_class = get_settings_class_for_node_type(node.type)
164
+
165
+ if model_class is None:
166
+ raise ValueError(f"Unknown node type: {node.type}")
167
+
168
+ is_user_defined = model_class == input_schema.UserDefinedNode
169
+
170
+ # Inject fields that were excluded during serialization
171
+ setting_data = node.setting_input if isinstance(node.setting_input, dict) else node.setting_input.model_dump()
172
+ setting_data['flow_id'] = flowfile_data.flowfile_id
173
+ setting_data['node_id'] = node.id
174
+ setting_data['pos_x'] = float(node.x_position or 0)
175
+ setting_data['pos_y'] = float(node.y_position or 0)
176
+ setting_data['description'] = node.description or ''
177
+ setting_data['is_setup'] = True
178
+
179
+ if is_user_defined:
180
+ setting_data['is_user_defined'] = True
181
+ depending_ids = list(node.input_ids or [])
182
+ if node.left_input_id:
183
+ depending_ids.append(node.left_input_id)
184
+ if node.right_input_id:
185
+ depending_ids.append(node.right_input_id)
186
+ setting_data['depending_on_ids'] = depending_ids
187
+ else:
188
+ if 'depending_on_id' in model_class.model_fields:
189
+ setting_data['depending_on_id'] = node.input_ids[0] if node.input_ids else -1
190
+ if 'depending_on_ids' in model_class.model_fields:
191
+ depending_ids = list(node.input_ids or [])
192
+ if node.left_input_id:
193
+ depending_ids.append(node.left_input_id)
194
+ if node.right_input_id:
195
+ depending_ids.append(node.right_input_id)
196
+ setting_data['depending_on_ids'] = depending_ids
197
+
198
+ if node.type == 'output' and 'output_settings' in setting_data:
199
+ output_settings = setting_data['output_settings']
200
+ file_type = output_settings.get('file_type', None)
201
+ if file_type is None:
202
+ raise ValueError("Output node's output_settings must include 'file_type'")
203
+ if 'table_settings' not in output_settings:
204
+ output_settings['table_settings'] = {"file_type": file_type}
205
+
206
+ setting_input = model_class.model_validate(setting_data)
207
+
208
+ node_info = schemas.NodeInformation(
209
+ id=node.id,
210
+ type=node.type,
211
+ is_setup=setting_input is not None,
212
+ description=node.description,
213
+ x_position=node.x_position,
214
+ y_position=node.y_position,
215
+ left_input_id=node.left_input_id,
216
+ right_input_id=node.right_input_id,
217
+ input_ids=node.input_ids,
218
+ outputs=node.outputs,
219
+ setting_input=setting_input,
220
+ )
221
+ nodes_dict[node.id] = node_info
222
+ if node.is_start_node:
223
+ node_starts.append(node.id)
224
+
225
+ connections = _derive_connections_from_nodes(flowfile_data.nodes)
226
+
227
+ flow_settings = schemas.FlowSettings(
228
+ flow_id=flowfile_data.flowfile_id,
229
+ name=flowfile_data.flowfile_name,
230
+ description=flowfile_data.flowfile_settings.description,
231
+ execution_mode=flowfile_data.flowfile_settings.execution_mode,
232
+ execution_location=flowfile_data.flowfile_settings.execution_location,
233
+ auto_save=flowfile_data.flowfile_settings.auto_save,
234
+ show_detailed_progress=flowfile_data.flowfile_settings.show_detailed_progress,
235
+ )
236
+
237
+ return schemas.FlowInformation(
238
+ flow_id=flowfile_data.flowfile_id,
239
+ flow_name=flowfile_data.flowfile_name,
240
+ flow_settings=flow_settings,
241
+ data=nodes_dict,
242
+ node_starts=node_starts,
243
+ node_connections=connections,
244
+ )
245
+
246
+ def _load_flow_storage(flow_path: Path) -> schemas.FlowInformation:
247
+ """
248
+ Load flow storage from any supported format.
249
+
250
+ Supports:
251
+ - .flowfile (pickle) - legacy format
252
+ - .yaml / .yml - new YAML format
253
+ - .json - JSON format
254
+
255
+ Args:
256
+ flow_path: Path to the flowfile
257
+
258
+ Returns:
259
+ FlowInformation object
260
+ """
261
+ flow_path = _validate_flow_path(flow_path)
262
+ suffix = flow_path.suffix.lower()
263
+ if suffix == '.flowfile':
264
+ try:
265
+ flow_storage_obj = load_flowfile_pickle(str(flow_path))
266
+ ensure_compatibility(flow_storage_obj, str(flow_path))
267
+ return flow_storage_obj
268
+ except Exception as e:
269
+ raise ValueError(
270
+ f"Failed to open legacy .flowfile: {e}\n\n"
271
+ f"Try migrating: migrate_flowfile('{flow_path}')"
272
+ ) from e
273
+
274
+ elif suffix in ('.yaml', '.yml'):
275
+ return _load_flowfile_yaml(flow_path)
276
+
277
+ elif suffix == '.json':
278
+ return _load_flowfile_json(flow_path)
279
+ else:
280
+ raise ValueError(f"Unsupported file format: {suffix}")
281
+
282
+
283
+ def open_flow(flow_path: Path) -> FlowGraph:
284
+ """
285
+ Open a flowfile from a given path.
286
+
287
+ Supports multiple formats:
288
+ - .flowfile (pickle) - legacy format, auto-migrated
289
+ - .yaml / .yml - new YAML format
290
+ - .json - JSON format
291
+
292
+ Args:
293
+ flow_path (Path): The absolute or relative path to the flowfile
294
+
295
+ Returns:
296
+ FlowGraph: The flowfile object
297
+ """
298
+ # Load flow storage (handles format detection)
299
+ flow_path = _validate_flow_path(flow_path)
300
+ flow_storage_obj = _load_flow_storage(flow_path)
301
+ flow_storage_obj.flow_settings.path = str(flow_path)
302
+ flow_storage_obj.flow_settings.name = str(flow_path.stem)
303
+ flow_storage_obj.flow_name = str(flow_path.stem)
304
+
305
+ # Determine node insertion order
306
+ ingestion_order = determine_insertion_order(flow_storage_obj)
307
+
308
+ # Create new FlowGraph
309
+ new_flow = FlowGraph(name=flow_storage_obj.flow_name, flow_settings=flow_storage_obj.flow_settings)
310
+
311
+ # First pass: add node promises
312
+ for node_id in ingestion_order:
313
+ node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
314
+ node_promise = input_schema.NodePromise(
315
+ flow_id=new_flow.flow_id,
316
+ node_id=node_info.id,
317
+ pos_x=node_info.x_position,
318
+ pos_y=node_info.y_position,
319
+ node_type=node_info.type
320
+ )
321
+ if hasattr(node_info.setting_input, 'cache_results'):
322
+ node_promise.cache_results = node_info.setting_input.cache_results
323
+ new_flow.add_node_promise(node_promise)
324
+
325
+ for node_id in ingestion_order:
326
+ node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
327
+ if node_info.is_setup:
328
+ if hasattr(node_info.setting_input, "is_user_defined") and node_info.setting_input.is_user_defined:
329
+ if node_info.type not in CUSTOM_NODE_STORE:
330
+ continue
331
+ user_defined_node_class = CUSTOM_NODE_STORE[node_info.type]
332
+ new_flow.add_user_defined_node(
333
+ custom_node=user_defined_node_class.from_settings(node_info.setting_input.settings),
334
+ user_defined_node_settings=node_info.setting_input
335
+ )
336
+ else:
337
+ getattr(new_flow, 'add_' + node_info.type)(node_info.setting_input)
338
+
339
+ # Setup connections
340
+ from_node = new_flow.get_node(node_id)
341
+ for output_node_id in (node_info.outputs or []):
342
+
343
+ to_node = new_flow.get_node(output_node_id)
344
+ if to_node is not None:
345
+ output_node_obj = flow_storage_obj.data[output_node_id]
346
+ is_left_input = (output_node_obj.left_input_id == node_id) and (
347
+ to_node.left_input.node_id != node_id if to_node.left_input is not None else True
348
+ )
349
+ is_right_input = (output_node_obj.right_input_id == node_id) and (
350
+ to_node.right_input.node_id != node_id if to_node.right_input is not None else True
351
+ )
352
+ is_main_input = node_id in (output_node_obj.input_ids or [])
353
+
354
+ if is_left_input:
355
+ insert_type = 'left'
356
+ elif is_right_input:
357
+ insert_type = 'right'
358
+ elif is_main_input:
359
+ insert_type = 'main'
360
+ else:
361
+ continue
362
+ to_node.add_node_connection(from_node, insert_type)
363
+ else:
364
+ from_node.delete_lead_to_node(output_node_id)
365
+ if not (from_node.node_id, output_node_id) in flow_storage_obj.node_connections:
366
+ continue
367
+ flow_storage_obj.node_connections.pop(
368
+ flow_storage_obj.node_connections.index((from_node.node_id, output_node_id))
369
+ )
370
+
371
+ # Handle any missing connections
372
+ for missing_connection in set(flow_storage_obj.node_connections) - set(new_flow.node_connections):
373
+ to_node = new_flow.get_node(missing_connection[1])
374
+ if not to_node.has_input:
375
+ test_if_circular_connection(missing_connection, new_flow)
376
+ from_node = new_flow.get_node(missing_connection[0])
377
+ if from_node:
378
+ to_node.add_node_connection(from_node)
379
+
380
+ return new_flow
381
+
382
+
383
+ def test_if_circular_connection(connection: Tuple[int, int], flow: FlowGraph):
384
+ to_node = flow.get_node(connection[1])
385
+ leads_to_nodes_queue = [n for n in to_node.leads_to_nodes]
386
+ circular_connection: bool = False
387
+ while len(leads_to_nodes_queue) > 0:
388
+ leads_to_node = leads_to_nodes_queue.pop(0)
389
+ if leads_to_node.node_id == connection[0]:
390
+ circular_connection = True
391
+ break
392
+ for leads_to_node_leads_to in leads_to_node.leads_to_nodes:
393
+ leads_to_nodes_queue.append(leads_to_node_leads_to)
394
+ return circular_connection
@@ -23,7 +23,7 @@ from .ui_components import (
23
23
  )
24
24
 
25
25
  # Import the main `Types` object for filtering in ColumnSelector
26
- from .data_types import Types
26
+ from flowfile_core.types import Types
27
27
 
28
28
 
29
29
  # Define the public API of this package
@@ -5,11 +5,11 @@ This module should not be imported directly by users.
5
5
  """
6
6
 
7
7
  from dataclasses import dataclass
8
- from typing import Type, List, Dict, Set, Any, Union
8
+ from typing import Type, List, Dict, Set, Any
9
9
  import polars as pl
10
10
 
11
11
  # Import public types
12
- from flowfile_core.flowfile.node_designer.data_types import TypeGroup, DataType
12
+ from flowfile_core.types import TypeGroup, DataType
13
13
 
14
14
 
15
15
  @dataclass(frozen=True)
@@ -2,7 +2,7 @@
2
2
 
3
3
  import polars as pl
4
4
  from pydantic import BaseModel
5
- from typing import Any, Dict, Optional, TypeVar, Callable
5
+ from typing import Any, Dict, Optional, TypeVar
6
6
  from flowfile_core.flowfile.node_designer.ui_components import FlowfileInComponent, IncomingColumns, Section
7
7
  from flowfile_core.schemas.schemas import NodeTemplate, NodeTypeLiteral, TransformTypeLiteral
8
8
 
@@ -6,7 +6,7 @@ from pydantic import Field, BaseModel, computed_field
6
6
 
7
7
  from flowfile_core.flowfile.node_designer._type_registry import normalize_type_spec
8
8
  # Public API import
9
- from flowfile_core.flowfile.node_designer.data_types import DataType, TypeSpec
9
+ from flowfile_core.types import DataType, TypeSpec
10
10
 
11
11
  InputType = Literal["text", "number", "secret", "array", "date", "boolean"]
12
12
 
@@ -12,11 +12,12 @@ from flowfile_core.configs.flow_logger import main_logger
12
12
  from flowfile_core.flowfile.flow_data_engine.flow_file_column.main import FlowfileColumn, PlType
13
13
  from flowfile_core.schemas import transform_schema
14
14
  from flowfile_core.schemas import input_schema
15
+ from flowfile_core.schemas.transform_schema import FuzzyMatchInputManager
15
16
 
16
17
 
17
18
  def _ensure_all_columns_have_select(left_cols: List[str],
18
19
  right_cols: List[str],
19
- fuzzy_match_input: transform_schema.FuzzyMatchInput):
20
+ fuzzy_match_input: transform_schema.FuzzyMatchInputManager):
20
21
  """
21
22
  Ensure that all columns in the left and right FlowDataEngines are included in the fuzzy match input's select
22
23
  statements.
@@ -38,7 +39,7 @@ def _ensure_all_columns_have_select(left_cols: List[str],
38
39
  )
39
40
 
40
41
 
41
- def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: transform_schema.JoinInputs) -> None:
42
+ def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: transform_schema.JoinInputsManager) -> None:
42
43
  """
43
44
  Ensure that the select columns in the fuzzy match input match the order of the incoming columns.
44
45
  This function modifies the join_inputs object in-place.
@@ -46,17 +47,18 @@ def _order_join_inputs_based_on_col_order(col_order: List[str], join_inputs: tra
46
47
  Returns:
47
48
  None
48
49
  """
49
- select_map = {select.new_name: select for select in join_inputs.renames}
50
+ select_map = {select.old_name: select for select in join_inputs.renames}
50
51
  ordered_renames = [select_map[col] for col in col_order if col in select_map]
51
- join_inputs.renames = ordered_renames
52
+ join_inputs.select_inputs.renames = ordered_renames
52
53
 
53
54
 
54
- def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInput,
55
+ def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInputManager,
55
56
  left_schema: List[FlowfileColumn],
56
57
  right_schema: List[FlowfileColumn]):
57
58
  _ensure_all_columns_have_select(left_cols=[col.column_name for col in left_schema],
58
59
  right_cols=[col.column_name for col in right_schema],
59
60
  fuzzy_match_input=fm_input)
61
+
60
62
  _order_join_inputs_based_on_col_order(col_order=[col.column_name for col in left_schema],
61
63
  join_inputs=fm_input.left_select)
62
64
  _order_join_inputs_based_on_col_order(col_order=[col.column_name for col in right_schema],
@@ -67,6 +69,7 @@ def calculate_fuzzy_match_schema(fm_input: transform_schema.FuzzyMatchInput,
67
69
  for column in fm_input.right_select.renames:
68
70
  if column.join_key:
69
71
  column.keep = True
72
+
70
73
  left_schema_dict, right_schema_dict = ({ls.name: ls for ls in left_schema}, {rs.name: rs for rs in right_schema})
71
74
  fm_input.auto_rename()
72
75
  right_renames = {column.old_name: column.new_name for column in fm_input.right_select.renames}
@@ -39,11 +39,14 @@ def join(node_data: "NodeData") -> NodeData:
39
39
  join_key = overlapping_cols[0]
40
40
  else:
41
41
  join_key = ''
42
- ji = transform_schema.JoinInput(join_mapping=join_key,
43
- left_select=node_data.main_input.columns,
44
- right_select=node_data.right_input.columns
45
- )
46
- ji.auto_rename()
42
+ join_input_manager = transform_schema.JoinInputManager(
43
+ transform_schema.JoinInput(join_mapping=join_key,
44
+ left_select=node_data.main_input.columns,
45
+ right_select=node_data.right_input.columns
46
+ )
47
+ )
48
+ join_input_manager.auto_rename()
49
+ ji = join_input_manager.to_join_input()
47
50
  node_data.setting_input = input_schema.NodeJoin(flow_id=node_data.flow_id,
48
51
  node_id=node_data.node_id,
49
52
  join_input=ji)
@@ -53,12 +56,15 @@ def join(node_data: "NodeData") -> NodeData:
53
56
  @setting_generator_method
54
57
  def cross_join(node_data: "NodeData") -> NodeData:
55
58
  if node_data.right_input and node_data.main_input:
56
- ji = transform_schema.CrossJoinInput(left_select=node_data.main_input.columns,
57
- right_select=node_data.right_input.columns)
58
- ji.auto_rename()
59
+ cj_input_manager = transform_schema.CrossJoinInputManager(
60
+ transform_schema.CrossJoinInput(left_select=node_data.main_input.columns,
61
+ right_select=node_data.right_input.columns)
62
+ )
63
+ cj_input_manager.auto_rename()
64
+ cj = cj_input_manager.to_cross_join_input()
59
65
  node_data.setting_input = input_schema.NodeCrossJoin(flow_id=node_data.flow_id,
60
66
  node_id=node_data.node_id,
61
- cross_join_input=ji)
67
+ cross_join_input=cj)
62
68
  return node_data
63
69
 
64
70
 
@@ -266,8 +266,6 @@ def get_run_status(flow_id: int, response: Response):
266
266
  flow = flow_file_handler.get_flow(flow_id)
267
267
  if not flow:
268
268
  raise HTTPException(status_code=404, detail="Flow not found")
269
- if flow.latest_run_info is None:
270
- raise HTTPException(status_code=404, detail="No run information available")
271
269
  if flow.flow_settings.is_running:
272
270
  response.status_code = status.HTTP_202_ACCEPTED
273
271
  else:
@@ -474,14 +472,14 @@ def create_flow(flow_path: str = None, name: str = None):
474
472
  if flow_path is not None and name is None:
475
473
  name = Path(flow_path).stem
476
474
  elif flow_path is not None and name is not None:
477
- if name not in flow_path and flow_path.endswith(".flowfile"):
475
+ if name not in flow_path and (flow_path.endswith(".yaml") or flow_path.endswith(".yml")):
478
476
  raise HTTPException(422, 'The name must be part of the flow path when a full path is provided')
479
- elif name in flow_path and not flow_path.endswith(".flowfile"):
480
- flow_path = str(Path(flow_path) / (name + ".flowfile"))
481
- elif name not in flow_path and name.endswith(".flowfile"):
477
+ elif name in flow_path and not (flow_path.endswith(".yaml") or flow_path.endswith(".yml")):
478
+ flow_path = str(Path(flow_path) / (name + ".yaml"))
479
+ elif name not in flow_path and (name.endswith(".yaml") or name.endswith(".yml")):
482
480
  flow_path = str(Path(flow_path) / name)
483
- elif name not in flow_path and not name.endswith(".flowfile"):
484
- flow_path = str(Path(flow_path) / (name + ".flowfile"))
481
+ elif name not in flow_path and not (name.endswith(".yaml") or name.endswith(".yml")):
482
+ flow_path = str(Path(flow_path) / (name + ".yaml"))
485
483
  if flow_path is not None:
486
484
  flow_path_ref = Path(flow_path)
487
485
  if not flow_path_ref.parent.exists():
@@ -600,7 +598,7 @@ async def get_downstream_node_ids(flow_id: int, node_id: int) -> List[int]:
600
598
 
601
599
  @router.get('/import_flow/', tags=['editor'], response_model=int)
602
600
  def import_saved_flow(flow_path: str) -> int:
603
- """Imports a flow from a saved `.flowfile` and registers it as a new session."""
601
+ """Imports a flow from a saved `.yaml` and registers it as a new session."""
604
602
  flow_path = Path(flow_path)
605
603
  if not flow_path.exists():
606
604
  raise HTTPException(404, 'File not found')
@@ -609,7 +607,7 @@ def import_saved_flow(flow_path: str) -> int:
609
607
 
610
608
  @router.get('/save_flow', tags=['editor'])
611
609
  def save_flow(flow_id: int, flow_path: str = None):
612
- """Saves the current state of a flow to a `.flowfile`."""
610
+ """Saves the current state of a flow to a `.yaml`."""
613
611
  flow = flow_file_handler.get_flow(flow_id)
614
612
  flow.save_flow(flow_path=flow_path)
615
613
 
@@ -136,11 +136,9 @@ class CloudStorageReadSettings(CloudStorageSettings):
136
136
 
137
137
  scan_mode: Literal["single_file", "directory"] = "single_file"
138
138
  file_format: Literal["csv", "parquet", "json", "delta", "iceberg"] = "parquet"
139
- # CSV specific options
140
139
  csv_has_header: Optional[bool] = True
141
140
  csv_delimiter: Optional[str] = ","
142
141
  csv_encoding: Optional[str] = "utf8"
143
- # Deltalake specific settings
144
142
  delta_version: Optional[int] = None
145
143
 
146
144