Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. flowfile/__init__.py +8 -1
  2. flowfile/api.py +1 -3
  3. flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d5b1b6c9.js} +11 -78
  5. flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-00d87aad.js} +12 -79
  6. flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
  7. flowfile/web/static/assets/ColumnSelector-4685e75d.js +83 -0
  8. flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
  9. flowfile/web/static/assets/ContextMenu-23e909da.js +41 -0
  10. flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
  11. flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
  12. flowfile/web/static/assets/ContextMenu-70ae0c79.js +41 -0
  13. flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
  14. flowfile/web/static/assets/ContextMenu-f149cf7c.js +41 -0
  15. flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
  16. flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-702a3edd.js} +14 -84
  17. flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
  18. flowfile/web/static/assets/CustomNode-b1519993.js +211 -0
  19. flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-6f3e4ea5.js} +3 -3
  20. flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-cf5ef661.js} +2 -2
  21. flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
  22. flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-d38c7295.js} +14 -114
  23. flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-b04ef46a.js} +13 -74
  24. flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
  25. flowfile/web/static/assets/ExploreData-5fa10ed8.js +192 -0
  26. flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-d39af878.js} +8 -79
  27. flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-9b6d08db.js} +12 -85
  28. flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
  29. flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-6b04fb1d.js} +20 -87
  30. flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
  31. flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
  32. flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-999521f4.js} +16 -87
  33. flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-17dd2198.js} +13 -159
  34. flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
  35. flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-6b039e18.js} +12 -75
  36. flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
  37. flowfile/web/static/assets/{Join-5a78a203.js → Join-24d0f113.js} +15 -85
  38. flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
  39. flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
  40. flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-34639209.js} +11 -82
  41. flowfile/web/static/assets/MultiSelect-0e8724a3.js +5 -0
  42. flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js +63 -0
  43. flowfile/web/static/assets/NumericInput-3d63a470.js +5 -0
  44. flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js +35 -0
  45. flowfile/web/static/assets/Output-283fe388.css +37 -0
  46. flowfile/web/static/assets/{Output-411ecaee.js → Output-edea9802.js} +62 -273
  47. flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-61d19301.js} +14 -138
  48. flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
  49. flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
  50. flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
  51. flowfile/web/static/assets/PivotValidation-de9f43fe.js +61 -0
  52. flowfile/web/static/assets/PivotValidation-f97fec5b.js +61 -0
  53. flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-bc3c9984.js} +13 -80
  54. flowfile/web/static/assets/Read-64a3f259.js +218 -0
  55. flowfile/web/static/assets/Read-e808b239.css +62 -0
  56. flowfile/web/static/assets/RecordCount-3d5039be.js +53 -0
  57. flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-597510e0.js} +8 -80
  58. flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
  59. flowfile/web/static/assets/SQLQueryComponent-df51adbe.js +38 -0
  60. flowfile/web/static/assets/{Sample-b4a18476.js → Sample-4be0a507.js} +8 -77
  61. flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-4839be57.js} +2 -2
  62. flowfile/web/static/assets/{Select-727688dc.js → Select-9b72f201.js} +11 -85
  63. flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
  64. flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
  65. flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
  66. flowfile/web/static/assets/SettingsSection-7ded385d.js +45 -0
  67. flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-e1e9c953.js} +2 -40
  68. flowfile/web/static/assets/SettingsSection-f0f75a42.js +53 -0
  69. flowfile/web/static/assets/SingleSelect-6c777aac.js +5 -0
  70. flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js +62 -0
  71. flowfile/web/static/assets/SliderInput-7cb93e62.js +40 -0
  72. flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
  73. flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
  74. flowfile/web/static/assets/{Sort-be3339a8.js → Sort-6cbde21a.js} +12 -97
  75. flowfile/web/static/assets/TextInput-d9a40c11.js +5 -0
  76. flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-5896c375.js +32 -0
  77. flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
  78. flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-c4fcbf4d.js} +14 -83
  79. flowfile/web/static/assets/ToggleSwitch-4ef91d19.js +5 -0
  80. flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js +31 -0
  81. flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-a03f512c.js} +2 -2
  82. flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
  83. flowfile/web/static/assets/Union-bfe9b996.js +77 -0
  84. flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-5d023a27.js} +23 -104
  85. flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
  86. flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
  87. flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-91cc5354.js} +12 -166
  88. flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
  89. flowfile/web/static/assets/UnpivotValidation-7ee2de44.js +51 -0
  90. flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-e51b9924.js} +4 -264
  91. flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
  92. flowfile/web/static/assets/{api-cb00cce6.js → api-c1bad5ca.js} +1 -1
  93. flowfile/web/static/assets/{api-023d1733.js → api-cf1221f0.js} +1 -1
  94. flowfile/web/static/assets/{designer-2197d782.css → designer-8da3ba3a.css} +859 -201
  95. flowfile/web/static/assets/{designer-6c322d8e.js → designer-9633482a.js} +2297 -733
  96. flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-ca400224.js} +1 -1
  97. flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-614b998d.js} +1 -1
  98. flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-f7971590.js} +2 -2
  99. flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-4fe5f36b.js} +3 -3
  100. flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
  101. flowfile/web/static/assets/{index-683fc198.js → index-5429bbf8.js} +208 -31
  102. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  103. flowfile/web/static/assets/outputCsv-076b85ab.js +86 -0
  104. flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
  105. flowfile/web/static/assets/outputExcel-0fd17dbe.js +56 -0
  106. flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
  107. flowfile/web/static/assets/outputParquet-b61e0847.js +31 -0
  108. flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
  109. flowfile/web/static/assets/readCsv-a8bb8b61.js +179 -0
  110. flowfile/web/static/assets/readCsv-c767cb37.css +52 -0
  111. flowfile/web/static/assets/readExcel-67b4aee0.js +201 -0
  112. flowfile/web/static/assets/readExcel-806d2826.css +64 -0
  113. flowfile/web/static/assets/readParquet-48c81530.css +19 -0
  114. flowfile/web/static/assets/readParquet-92ce1dbc.js +23 -0
  115. flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-68435402.js} +1 -1
  116. flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-92e25ee3.js} +7 -7
  117. flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
  118. flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
  119. flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-41b0e0d7.js} +65 -36
  120. flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-2c8e608f.js} +1 -1
  121. flowfile/web/static/index.html +2 -2
  122. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/METADATA +5 -3
  123. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/RECORD +191 -121
  124. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  125. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  126. flowfile_core/__init__.py +3 -0
  127. flowfile_core/configs/flow_logger.py +5 -13
  128. flowfile_core/configs/node_store/__init__.py +30 -0
  129. flowfile_core/configs/node_store/nodes.py +383 -99
  130. flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
  131. flowfile_core/configs/settings.py +2 -1
  132. flowfile_core/database/connection.py +5 -21
  133. flowfile_core/fileExplorer/funcs.py +239 -121
  134. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  135. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  136. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  137. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  138. flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
  139. flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
  140. flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
  141. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  142. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  143. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  144. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +212 -86
  145. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  146. flowfile_core/flowfile/flow_graph.py +240 -54
  147. flowfile_core/flowfile/flow_node/flow_node.py +48 -13
  148. flowfile_core/flowfile/flow_node/models.py +2 -1
  149. flowfile_core/flowfile/handler.py +24 -5
  150. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  151. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  152. flowfile_core/flowfile/node_designer/__init__.py +47 -0
  153. flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
  154. flowfile_core/flowfile/node_designer/custom_node.py +371 -0
  155. flowfile_core/flowfile/node_designer/ui_components.py +277 -0
  156. flowfile_core/flowfile/schema_callbacks.py +17 -10
  157. flowfile_core/flowfile/setting_generator/settings.py +15 -10
  158. flowfile_core/main.py +5 -1
  159. flowfile_core/routes/routes.py +73 -30
  160. flowfile_core/routes/user_defined_components.py +55 -0
  161. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  162. flowfile_core/schemas/input_schema.py +228 -65
  163. flowfile_core/schemas/output_model.py +5 -2
  164. flowfile_core/schemas/schemas.py +153 -35
  165. flowfile_core/schemas/transform_schema.py +1083 -412
  166. flowfile_core/schemas/yaml_types.py +103 -0
  167. flowfile_core/types.py +156 -0
  168. flowfile_core/utils/validate_setup.py +3 -1
  169. flowfile_frame/__init__.py +3 -1
  170. flowfile_frame/flow_frame.py +31 -24
  171. flowfile_frame/flow_frame_methods.py +12 -9
  172. flowfile_worker/__init__.py +9 -35
  173. flowfile_worker/create/__init__.py +3 -21
  174. flowfile_worker/create/funcs.py +68 -56
  175. flowfile_worker/create/models.py +130 -62
  176. flowfile_worker/main.py +5 -2
  177. flowfile_worker/routes.py +52 -13
  178. shared/__init__.py +15 -0
  179. shared/storage_config.py +258 -0
  180. tools/migrate/README.md +56 -0
  181. tools/migrate/__init__.py +12 -0
  182. tools/migrate/__main__.py +131 -0
  183. tools/migrate/legacy_schemas.py +621 -0
  184. tools/migrate/migrate.py +598 -0
  185. tools/migrate/tests/__init__.py +0 -0
  186. tools/migrate/tests/conftest.py +23 -0
  187. tools/migrate/tests/test_migrate.py +627 -0
  188. tools/migrate/tests/test_migration_e2e.py +1010 -0
  189. tools/migrate/tests/test_node_migrations.py +813 -0
  190. flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
  191. flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
  192. flowfile/web/static/assets/Read-80dc1675.css +0 -197
  193. flowfile/web/static/assets/Read-c3b1929c.js +0 -701
  194. flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
  195. flowfile/web/static/assets/Union-89fd73dc.js +0 -146
  196. flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
  197. flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
  198. flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
  199. flowfile_core/flowfile/manage/open_flowfile.py +0 -135
  200. {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info/licenses}/LICENSE +0 -0
  201. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -0,0 +1,1010 @@
1
+ """
2
+ End-to-end tests for migration tool.
3
+
4
+ These tests verify that migrated flows can actually be loaded and executed
5
+ by the flowfile system - not just that the YAML/JSON structure is correct.
6
+
7
+ Run with:
8
+ pytest tools/migrate/tests/test_migration_e2e.py -v
9
+ """
10
+
11
+ import pickle
12
+ import tempfile
13
+ from pathlib import Path
14
+ from typing import List, Dict, Literal
15
+
16
+ import pytest
17
+
18
+ from tools.migrate.legacy_schemas import (
19
+ # Flow schemas
20
+ FlowInformation, FlowSettings, NodeInformation,
21
+
22
+ # Node schemas
23
+ NodeSelect, NodeFilter, NodeFormula, NodeOutput,
24
+ NodeJoin, NodeGroupBy, NodeSort, NodeUnique,
25
+ NodePivot, NodeUnpivot, NodeRecordId, NodeTextToRows,
26
+ NodeGraphSolver, NodeUnion,
27
+
28
+ # Input/Output schemas
29
+ OutputSettings, OutputCsvTable,
30
+
31
+ # Transform schemas
32
+ SelectInput, SelectInputs, JoinInputs, JoinInput, JoinMap,
33
+ FilterInput, BasicFilter, FunctionInput, FieldInput,
34
+ GroupByInput, AggColl, SortByInput, UniqueInput,
35
+ PivotInput, UnpivotInput, RecordIdInput, TextToRowsInput,
36
+ GraphSolverInput, UnionInput,
37
+ )
38
+
39
+ from tools.migrate.migrate import migrate_flowfile
40
+
41
+ # Import actual flowfile system for verification
42
+ from flowfile_core.flowfile.handler import FlowfileHandler
43
+ from flowfile_core.flowfile.flow_graph import FlowGraph, add_connection
44
+ from flowfile_core.schemas import input_schema, transform_schema, schemas
45
+ from flowfile_core.schemas.output_model import RunInformation
46
+ from flowfile_core.flowfile.manage.io_flowfile import open_flow
47
+
48
+
49
+ # =============================================================================
50
+ # HELPERS
51
+ # =============================================================================
52
+
53
+ def create_graph(flow_id: int = 1, execution_mode: Literal['Development', 'Performance'] = 'Development') -> FlowGraph:
54
+ """Create a new FlowGraph for testing."""
55
+ handler = FlowfileHandler()
56
+ handler.register_flow(schemas.FlowSettings(
57
+ flow_id=flow_id,
58
+ name='test_flow',
59
+ path='.',
60
+ execution_mode=execution_mode
61
+ ))
62
+ return handler.get_flow(flow_id)
63
+
64
+
65
+ def add_manual_input(graph: FlowGraph, data: List[Dict], node_id: int = 1):
66
+ """Add a manual input node with data."""
67
+ node_promise = input_schema.NodePromise(
68
+ flow_id=graph.flow_id,
69
+ node_id=node_id,
70
+ node_type='manual_input'
71
+ )
72
+ graph.add_node_promise(node_promise)
73
+ input_file = input_schema.NodeManualInput(
74
+ flow_id=graph.flow_id,
75
+ node_id=node_id,
76
+ raw_data_format=input_schema.RawData.from_pylist(data)
77
+ )
78
+ graph.add_manual_input(input_file)
79
+ return graph
80
+
81
+
82
+ def add_node_promise(graph: FlowGraph, node_type: str, node_id: int):
83
+ """Add a node promise."""
84
+ node_promise = input_schema.NodePromise(
85
+ flow_id=graph.flow_id,
86
+ node_id=node_id,
87
+ node_type=node_type
88
+ )
89
+ graph.add_node_promise(node_promise)
90
+
91
+
92
+ def handle_run_info(run_info: RunInformation):
93
+ """Check run info for errors and raise if failed."""
94
+ if run_info is None:
95
+ raise ValueError("Run info is None")
96
+ if not run_info.success:
97
+ errors = 'errors:'
98
+ for node_step in run_info.node_step_result:
99
+ if not node_step.success:
100
+ errors += f'\n node_id:{node_step.node_id}, error: {node_step.error}'
101
+ raise ValueError(f'Graph should run successfully:\n{errors}')
102
+
103
+
104
+ # =============================================================================
105
+ # FIXTURES
106
+ # =============================================================================
107
+
108
+ @pytest.fixture
109
+ def temp_dir():
110
+ """Create a temporary directory for test files."""
111
+ with tempfile.TemporaryDirectory() as tmpdir:
112
+ yield Path(tmpdir)
113
+
114
+
115
+ @pytest.fixture
116
+ def sample_data() -> List[Dict]:
117
+ """Basic sample data for testing."""
118
+ return [
119
+ {'name': 'Alice', 'age': 30, 'city': 'NYC', 'sales': 100},
120
+ {'name': 'Bob', 'age': 25, 'city': 'LA', 'sales': 150},
121
+ {'name': 'Charlie', 'age': 35, 'city': 'NYC', 'sales': 200},
122
+ {'name': 'Diana', 'age': 28, 'city': 'Chicago', 'sales': 120},
123
+ ]
124
+
125
+
126
+ # =============================================================================
127
+ # BASELINE TEST - Verify flowfile system works without migration
128
+ # =============================================================================
129
+
130
+ class TestFlowfileBaseline:
131
+ """Verify the flowfile system works before testing migration."""
132
+
133
+ def test_manual_input_runs(self, sample_data):
134
+ """Basic test that manual input works."""
135
+ graph = create_graph(flow_id=1)
136
+ add_manual_input(graph, sample_data, node_id=1)
137
+
138
+ run_info = graph.run_graph()
139
+ handle_run_info(run_info)
140
+
141
+ node = graph.get_node(1)
142
+ assert node is not None
143
+
144
+ def test_select_node_runs(self, sample_data):
145
+ """Test select node works."""
146
+ graph = create_graph(flow_id=2)
147
+ add_manual_input(graph, sample_data, node_id=1)
148
+
149
+ add_node_promise(graph, 'select', node_id=2)
150
+ connection = input_schema.NodeConnection.create_from_simple_input(1, 2)
151
+ add_connection(graph, connection)
152
+
153
+ select_input = [
154
+ transform_schema.SelectInput(old_name='name', new_name='full_name', keep=True),
155
+ transform_schema.SelectInput(old_name='age', keep=True),
156
+ ]
157
+ node_select = input_schema.NodeSelect(
158
+ flow_id=graph.flow_id,
159
+ node_id=2,
160
+ depending_on_id=1,
161
+ select_input=select_input,
162
+ )
163
+ graph.add_select(node_select)
164
+
165
+ run_info = graph.run_graph()
166
+ handle_run_info(run_info)
167
+
168
+ def test_filter_node_runs(self, sample_data):
169
+ """Test filter node works."""
170
+ graph = create_graph(flow_id=3)
171
+ add_manual_input(graph, sample_data, node_id=1)
172
+
173
+ add_node_promise(graph, 'filter', node_id=2)
174
+ connection = input_schema.NodeConnection.create_from_simple_input(1, 2)
175
+ add_connection(graph, connection)
176
+
177
+ filter_input = transform_schema.FilterInput(
178
+ filter_type='advanced',
179
+ advanced_filter='[age] > 25'
180
+ )
181
+ node_filter = input_schema.NodeFilter(
182
+ flow_id=graph.flow_id,
183
+ node_id=2,
184
+ depending_on_id=1,
185
+ filter_input=filter_input,
186
+ )
187
+ graph.add_filter(node_filter)
188
+
189
+ run_info = graph.run_graph()
190
+ handle_run_info(run_info)
191
+
192
+
193
+ # =============================================================================
194
+ # YAML ROUND-TRIP TESTS - Save and reload flows (using NEW API)
195
+ # =============================================================================
196
+
197
+ class TestYamlRoundTrip:
198
+ """Test that flows survive YAML save/load cycle."""
199
+
200
+ def test_select_roundtrip(self, temp_dir, sample_data):
201
+ """Save and reload a flow with select node."""
202
+ graph = create_graph(flow_id=100)
203
+ add_manual_input(graph, sample_data, node_id=1)
204
+
205
+ add_node_promise(graph, 'select', node_id=2)
206
+ connection = input_schema.NodeConnection.create_from_simple_input(1, 2)
207
+ add_connection(graph, connection)
208
+
209
+ select_input = [
210
+ transform_schema.SelectInput(old_name='name', new_name='person_name'),
211
+ transform_schema.SelectInput(old_name='sales', keep=True),
212
+ ]
213
+ node_select = input_schema.NodeSelect(
214
+ flow_id=graph.flow_id,
215
+ node_id=2,
216
+ depending_on_id=1,
217
+ select_input=select_input,
218
+ )
219
+ graph.add_select(node_select)
220
+
221
+ # Save as YAML
222
+ path = temp_dir / 'test.yaml'
223
+ graph.save_flow(str(path))
224
+
225
+ # Reload and run
226
+ loaded_flow = open_flow(path)
227
+ run_info = loaded_flow.run_graph()
228
+ handle_run_info(run_info)
229
+
230
+ # Verify node preserved
231
+ loaded_select = loaded_flow.get_node(2)
232
+ assert loaded_select is not None
233
+ assert loaded_select.setting_input.select_input[0].new_name == 'person_name'
234
+
235
+ def test_filter_roundtrip(self, temp_dir, sample_data):
236
+ """Save and reload a flow with filter node."""
237
+ graph = create_graph(flow_id=101)
238
+ add_manual_input(graph, sample_data, node_id=1)
239
+
240
+ add_node_promise(graph, 'filter', node_id=2)
241
+ connection = input_schema.NodeConnection.create_from_simple_input(1, 2)
242
+ add_connection(graph, connection)
243
+
244
+ filter_input = transform_schema.FilterInput(
245
+ filter_type='basic',
246
+ basic_filter=transform_schema.BasicFilter(
247
+ field='age',
248
+ filter_type='>',
249
+ filter_value='25'
250
+ )
251
+ )
252
+ node_filter = input_schema.NodeFilter(
253
+ flow_id=graph.flow_id,
254
+ node_id=2,
255
+ depending_on_id=1,
256
+ filter_input=filter_input,
257
+ )
258
+ graph.add_filter(node_filter)
259
+
260
+ # Save and reload
261
+ path = temp_dir / 'test.yaml'
262
+ graph.save_flow(str(path))
263
+ loaded_flow = open_flow(path)
264
+
265
+ run_info = loaded_flow.run_graph()
266
+ handle_run_info(run_info)
267
+
268
+ def test_groupby_roundtrip(self, temp_dir, sample_data):
269
+ """Save and reload a flow with groupby node."""
270
+ graph = create_graph(flow_id=102)
271
+ add_manual_input(graph, sample_data, node_id=1)
272
+
273
+ add_node_promise(graph, 'group_by', node_id=2)
274
+ connection = input_schema.NodeConnection.create_from_simple_input(1, 2)
275
+ add_connection(graph, connection)
276
+
277
+ groupby_input = transform_schema.GroupByInput([
278
+ transform_schema.AggColl('city', 'groupby'),
279
+ transform_schema.AggColl('sales', 'sum', 'total_sales'),
280
+ transform_schema.AggColl('age', 'mean', 'avg_age'),
281
+ ])
282
+ node_groupby = input_schema.NodeGroupBy(
283
+ flow_id=graph.flow_id,
284
+ node_id=2,
285
+ depending_on_id=1,
286
+ groupby_input=groupby_input,
287
+ )
288
+ graph.add_group_by(node_groupby)
289
+
290
+ # Save and reload
291
+ path = temp_dir / 'test.yaml'
292
+ graph.save_flow(str(path))
293
+ loaded_flow = open_flow(path)
294
+
295
+ run_info = loaded_flow.run_graph()
296
+ handle_run_info(run_info)
297
+
298
+ # Verify aggregations preserved
299
+ loaded_gb = loaded_flow.get_node(2)
300
+ agg_cols = loaded_gb.setting_input.groupby_input.agg_cols
301
+ assert len(agg_cols) == 3
302
+ assert agg_cols[1].new_name == 'total_sales'
303
+
304
+ def test_join_roundtrip(self, temp_dir):
305
+ """Save and reload a flow with join node (using NEW API with required selects)."""
306
+ graph = create_graph(flow_id=103)
307
+
308
+ # Left table
309
+ left_data = [
310
+ {'id': 1, 'name': 'Alice'},
311
+ {'id': 2, 'name': 'Bob'},
312
+ ]
313
+ add_manual_input(graph, left_data, node_id=1)
314
+
315
+ # Right table
316
+ right_data = [
317
+ {'id': 1, 'dept': 'Sales'},
318
+ {'id': 2, 'dept': 'Engineering'},
319
+ ]
320
+ add_node_promise(graph, 'manual_input', node_id=2)
321
+ input_file = input_schema.NodeManualInput(
322
+ flow_id=graph.flow_id,
323
+ node_id=2,
324
+ raw_data_format=input_schema.RawData.from_pylist(right_data)
325
+ )
326
+ graph.add_manual_input(input_file)
327
+
328
+ # Join node - NEW API requires left_select and right_select
329
+ add_node_promise(graph, 'join', node_id=3)
330
+ left_conn = input_schema.NodeConnection.create_from_simple_input(1, 3)
331
+ right_conn = input_schema.NodeConnection.create_from_simple_input(2, 3, input_type='right')
332
+ add_connection(graph, left_conn)
333
+ add_connection(graph, right_conn)
334
+
335
+ join_input = transform_schema.JoinInput(
336
+ join_mapping=[transform_schema.JoinMap(left_col='id', right_col='id')],
337
+ how='inner',
338
+ left_select=transform_schema.JoinInputs(renames=[]), # Required in new API
339
+ right_select=transform_schema.JoinInputs(renames=[]), # Required in new API
340
+ )
341
+ node_join = input_schema.NodeJoin(
342
+ flow_id=graph.flow_id,
343
+ node_id=3,
344
+ depending_on_ids=[1, 2],
345
+ join_input=join_input,
346
+ )
347
+ graph.add_join(node_join)
348
+
349
+ # Save and reload
350
+ path = temp_dir / 'test.yaml'
351
+ graph.save_flow(str(path))
352
+ loaded_flow = open_flow(path)
353
+
354
+ run_info = loaded_flow.run_graph()
355
+ handle_run_info(run_info)
356
+
357
+ def test_formula_roundtrip(self, temp_dir, sample_data):
358
+ """Save and reload a flow with formula node."""
359
+ graph = create_graph(flow_id=104)
360
+ add_manual_input(graph, sample_data, node_id=1)
361
+
362
+ add_node_promise(graph, 'formula', node_id=2)
363
+ connection = input_schema.NodeConnection.create_from_simple_input(1, 2)
364
+ add_connection(graph, connection)
365
+
366
+ node_formula = input_schema.NodeFormula(
367
+ flow_id=graph.flow_id,
368
+ node_id=2,
369
+ depending_on_id=1,
370
+ function=transform_schema.FunctionInput(
371
+ field=transform_schema.FieldInput(name='double_sales'),
372
+ function='[sales] * 2'
373
+ )
374
+ )
375
+ graph.add_formula(node_formula)
376
+
377
+ # Save and reload
378
+ path = temp_dir / 'test.yaml'
379
+ graph.save_flow(str(path))
380
+ loaded_flow = open_flow(path)
381
+
382
+ run_info = loaded_flow.run_graph()
383
+ handle_run_info(run_info)
384
+
385
+ def test_sort_roundtrip(self, temp_dir, sample_data):
386
+ """Save and reload a flow with sort node."""
387
+ graph = create_graph(flow_id=105)
388
+ add_manual_input(graph, sample_data, node_id=1)
389
+
390
+ add_node_promise(graph, 'sort', node_id=2)
391
+ connection = input_schema.NodeConnection.create_from_simple_input(1, 2)
392
+ add_connection(graph, connection)
393
+
394
+ node_sort = input_schema.NodeSort(
395
+ flow_id=graph.flow_id,
396
+ node_id=2,
397
+ depending_on_id=1,
398
+ sort_input=[
399
+ transform_schema.SortByInput(column='age', how='desc'),
400
+ transform_schema.SortByInput(column='name', how='asc'),
401
+ ]
402
+ )
403
+ graph.add_sort(node_sort)
404
+
405
+ # Save and reload
406
+ path = temp_dir / 'test.yaml'
407
+ graph.save_flow(str(path))
408
+ loaded_flow = open_flow(path)
409
+
410
+ run_info = loaded_flow.run_graph()
411
+ handle_run_info(run_info)
412
+
413
+ def test_unique_roundtrip(self, temp_dir, sample_data):
414
+ """Save and reload a flow with unique node."""
415
+ graph = create_graph(flow_id=106)
416
+ add_manual_input(graph, sample_data, node_id=1)
417
+
418
+ add_node_promise(graph, 'unique', node_id=2)
419
+ connection = input_schema.NodeConnection.create_from_simple_input(1, 2)
420
+ add_connection(graph, connection)
421
+
422
+ node_unique = input_schema.NodeUnique(
423
+ flow_id=graph.flow_id,
424
+ node_id=2,
425
+ depending_on_id=1,
426
+ unique_input=transform_schema.UniqueInput(
427
+ columns=['city'],
428
+ strategy='first'
429
+ )
430
+ )
431
+ graph.add_unique(node_unique)
432
+
433
+ # Save and reload
434
+ path = temp_dir / 'test.yaml'
435
+ graph.save_flow(str(path))
436
+ loaded_flow = open_flow(path)
437
+
438
+ run_info = loaded_flow.run_graph()
439
+ handle_run_info(run_info)
440
+
441
+ def test_record_id_roundtrip(self, temp_dir, sample_data):
442
+ """Save and reload a flow with record_id node."""
443
+ graph = create_graph(flow_id=107)
444
+ add_manual_input(graph, sample_data, node_id=1)
445
+
446
+ add_node_promise(graph, 'record_id', node_id=2)
447
+ connection = input_schema.NodeConnection.create_from_simple_input(1, 2)
448
+ add_connection(graph, connection)
449
+
450
+ node_record_id = input_schema.NodeRecordId(
451
+ flow_id=graph.flow_id,
452
+ node_id=2,
453
+ depending_on_id=1,
454
+ record_id_input=transform_schema.RecordIdInput(
455
+ output_column_name='row_num',
456
+ offset=1
457
+ )
458
+ )
459
+ graph.add_record_id(node_record_id)
460
+
461
+ # Save and reload
462
+ path = temp_dir / 'test.yaml'
463
+ graph.save_flow(str(path))
464
+ loaded_flow = open_flow(path)
465
+
466
+ run_info = loaded_flow.run_graph()
467
+ handle_run_info(run_info)
468
+
469
+
470
+ # =============================================================================
471
+ # LEGACY MIGRATION TESTS - Test OLD pickle format → migrate → run
472
+ # =============================================================================
473
+
474
+ class TestLegacyMigration:
475
+ """Test migration of OLD pickle format to new YAML format.
476
+
477
+ These tests:
478
+ 1. Create flows using LEGACY schemas (simulating old .flowfile pickles)
479
+ 2. Pickle them
480
+ 3. Run migrate_flowfile()
481
+ 4. Load with open_flow()
482
+ 5. Add runtime data and verify execution
483
+ """
484
+
485
+ def test_join_migration_with_none_selects(self, temp_dir):
486
+ """Migrate old pickle with join node where left_select/right_select are None."""
487
+ # 1. Build legacy flow with OLD JoinInput (left_select=None, right_select=None)
488
+ legacy_join_input = JoinInput(
489
+ join_mapping=[JoinMap(left_col='id', right_col='id')],
490
+ how='inner',
491
+ left_select=None, # OLD: was allowed to be None
492
+ right_select=None, # OLD: was allowed to be None
493
+ )
494
+
495
+ legacy_flow = FlowInformation(
496
+ flow_id=103,
497
+ flow_name='join_migration_test',
498
+ flow_settings=FlowSettings(
499
+ flow_id=103,
500
+ name='join_migration_test',
501
+ path='.',
502
+ execution_mode='Development',
503
+ ),
504
+ data={
505
+ 1: NodeInformation(
506
+ id=1,
507
+ type='manual_input',
508
+ is_setup=True,
509
+ setting_input=None,
510
+ ),
511
+ 2: NodeInformation(
512
+ id=2,
513
+ type='manual_input',
514
+ is_setup=True,
515
+ setting_input=None,
516
+ ),
517
+ 3: NodeInformation(
518
+ id=3,
519
+ type='join',
520
+ is_setup=True,
521
+ left_input_id=1,
522
+ right_input_id=2,
523
+ setting_input=NodeJoin(
524
+ flow_id=103,
525
+ node_id=3,
526
+ depending_on_ids=[1, 2],
527
+ join_input=legacy_join_input,
528
+ ),
529
+ ),
530
+ },
531
+ node_connections=[(1, 3), (2, 3)],
532
+ node_starts=[1, 2],
533
+ )
534
+
535
+ # 2. Pickle it (simulating old .flowfile)
536
+ pickle_path = temp_dir / 'old_join.flowfile'
537
+ with open(pickle_path, 'wb') as f:
538
+ pickle.dump(legacy_flow, f)
539
+
540
+ # 3. Migrate to YAML
541
+ yaml_path = temp_dir / 'migrated.yaml'
542
+ migrate_flowfile(pickle_path, yaml_path, 'yaml')
543
+
544
+ # 4. Load with current system
545
+ loaded_flow = open_flow(yaml_path)
546
+
547
+ # 5. Add manual input data (not stored in pickle, added at runtime)
548
+ left_data = [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}]
549
+ right_data = [{'id': 1, 'dept': 'Sales'}, {'id': 2, 'dept': 'Engineering'}]
550
+
551
+ loaded_flow.add_manual_input(input_schema.NodeManualInput(
552
+ flow_id=103, node_id=1,
553
+ raw_data_format=input_schema.RawData.from_pylist(left_data)
554
+ ))
555
+ loaded_flow.add_manual_input(input_schema.NodeManualInput(
556
+ flow_id=103, node_id=2,
557
+ raw_data_format=input_schema.RawData.from_pylist(right_data)
558
+ ))
559
+
560
+ # 6. Run and verify
561
+ run_info = loaded_flow.run_graph()
562
+ handle_run_info(run_info)
563
+
564
+ def test_filter_migration(self, temp_dir):
565
+ """Migrate old pickle with filter node."""
566
+ legacy_filter = FilterInput(
567
+ filter_type='advanced',
568
+ advanced_filter='[age] > 25',
569
+ basic_filter=None,
570
+ )
571
+
572
+ legacy_flow = FlowInformation(
573
+ flow_id=201,
574
+ flow_name='filter_migration_test',
575
+ flow_settings=FlowSettings(
576
+ flow_id=201,
577
+ name='filter_migration_test',
578
+ path='.',
579
+ execution_mode='Development',
580
+ ),
581
+ data={
582
+ 1: NodeInformation(
583
+ id=1,
584
+ type='manual_input',
585
+ is_setup=True,
586
+ setting_input=None,
587
+ ),
588
+ 2: NodeInformation(
589
+ id=2,
590
+ type='filter',
591
+ is_setup=True,
592
+ setting_input=NodeFilter(
593
+ flow_id=201,
594
+ node_id=2,
595
+ depending_on_id=1,
596
+ filter_input=legacy_filter,
597
+ ),
598
+ ),
599
+ },
600
+ node_connections=[(1, 2)],
601
+ node_starts=[1],
602
+ )
603
+
604
+ # Pickle
605
+ pickle_path = temp_dir / 'old_filter.flowfile'
606
+ with open(pickle_path, 'wb') as f:
607
+ pickle.dump(legacy_flow, f)
608
+
609
+ # Migrate
610
+ yaml_path = temp_dir / 'migrated_filter.yaml'
611
+ migrate_flowfile(pickle_path, yaml_path, 'yaml')
612
+
613
+ # Load and add data
614
+ loaded_flow = open_flow(yaml_path)
615
+ data = [
616
+ {'name': 'Alice', 'age': 30},
617
+ {'name': 'Bob', 'age': 20},
618
+ {'name': 'Charlie', 'age': 35},
619
+ ]
620
+ loaded_flow.add_manual_input(input_schema.NodeManualInput(
621
+ flow_id=201, node_id=1,
622
+ raw_data_format=input_schema.RawData.from_pylist(data)
623
+ ))
624
+
625
+ # Run and verify
626
+ run_info = loaded_flow.run_graph()
627
+ handle_run_info(run_info)
628
+
629
+ def test_groupby_migration(self, temp_dir):
630
+ """Migrate old pickle with groupby node."""
631
+ legacy_groupby = GroupByInput(
632
+ agg_cols=[
633
+ AggColl(old_name='city', agg='groupby'),
634
+ AggColl(old_name='sales', agg='sum', new_name='total_sales'),
635
+ ]
636
+ )
637
+
638
+ legacy_flow = FlowInformation(
639
+ flow_id=202,
640
+ flow_name='groupby_migration_test',
641
+ flow_settings=FlowSettings(
642
+ flow_id=202,
643
+ name='groupby_migration_test',
644
+ path='.',
645
+ execution_mode='Development',
646
+ ),
647
+ data={
648
+ 1: NodeInformation(
649
+ id=1,
650
+ type='manual_input',
651
+ is_setup=True,
652
+ setting_input=None,
653
+ ),
654
+ 2: NodeInformation(
655
+ id=2,
656
+ type='group_by',
657
+ is_setup=True,
658
+ setting_input=NodeGroupBy(
659
+ flow_id=202,
660
+ node_id=2,
661
+ depending_on_id=1,
662
+ groupby_input=legacy_groupby,
663
+ ),
664
+ ),
665
+ },
666
+ node_connections=[(1, 2)],
667
+ node_starts=[1],
668
+ )
669
+
670
+ # Pickle
671
+ pickle_path = temp_dir / 'old_groupby.flowfile'
672
+ with open(pickle_path, 'wb') as f:
673
+ pickle.dump(legacy_flow, f)
674
+
675
+ # Migrate
676
+ yaml_path = temp_dir / 'migrated_groupby.yaml'
677
+ migrate_flowfile(pickle_path, yaml_path, 'yaml')
678
+
679
+ # Load and add data
680
+ loaded_flow = open_flow(yaml_path)
681
+ data = [
682
+ {'city': 'NYC', 'sales': 100},
683
+ {'city': 'NYC', 'sales': 150},
684
+ {'city': 'LA', 'sales': 200},
685
+ ]
686
+ loaded_flow.add_manual_input(input_schema.NodeManualInput(
687
+ flow_id=202, node_id=1,
688
+ raw_data_format=input_schema.RawData.from_pylist(data)
689
+ ))
690
+
691
+ # Run and verify
692
+ run_info = loaded_flow.run_graph()
693
+ handle_run_info(run_info)
694
+
695
+ def test_select_migration(self, temp_dir):
696
+ """Migrate old pickle with select node."""
697
+ legacy_select_input = [
698
+ SelectInput(old_name='name', new_name='person_name', keep=True),
699
+ SelectInput(old_name='age', keep=True),
700
+ ]
701
+
702
+ legacy_flow = FlowInformation(
703
+ flow_id=203,
704
+ flow_name='select_migration_test',
705
+ flow_settings=FlowSettings(
706
+ flow_id=203,
707
+ name='select_migration_test',
708
+ path='.',
709
+ execution_mode='Development',
710
+ ),
711
+ data={
712
+ 1: NodeInformation(
713
+ id=1,
714
+ type='manual_input',
715
+ is_setup=True,
716
+ setting_input=None,
717
+ ),
718
+ 2: NodeInformation(
719
+ id=2,
720
+ type='select',
721
+ is_setup=True,
722
+ setting_input=NodeSelect(
723
+ flow_id=203,
724
+ node_id=2,
725
+ depending_on_id=1,
726
+ select_input=legacy_select_input,
727
+ ),
728
+ ),
729
+ },
730
+ node_connections=[(1, 2)],
731
+ node_starts=[1],
732
+ )
733
+
734
+ # Pickle
735
+ pickle_path = temp_dir / 'old_select.flowfile'
736
+ with open(pickle_path, 'wb') as f:
737
+ pickle.dump(legacy_flow, f)
738
+
739
+ # Migrate
740
+ yaml_path = temp_dir / 'migrated_select.yaml'
741
+ migrate_flowfile(pickle_path, yaml_path, 'yaml')
742
+
743
+ # Load and add data
744
+ loaded_flow = open_flow(yaml_path)
745
+ data = [
746
+ {'name': 'Alice', 'age': 30, 'city': 'NYC'},
747
+ {'name': 'Bob', 'age': 25, 'city': 'LA'},
748
+ ]
749
+ loaded_flow.add_manual_input(input_schema.NodeManualInput(
750
+ flow_id=203, node_id=1,
751
+ raw_data_format=input_schema.RawData.from_pylist(data)
752
+ ))
753
+
754
+ # Run and verify
755
+ run_info = loaded_flow.run_graph()
756
+ handle_run_info(run_info)
757
+
758
+ def test_formula_migration(self, temp_dir):
759
+ """Migrate old pickle with formula node."""
760
+ legacy_function = FunctionInput(
761
+ field=FieldInput(name='double_sales'),
762
+ function='[sales] * 2'
763
+ )
764
+
765
+ legacy_flow = FlowInformation(
766
+ flow_id=204,
767
+ flow_name='formula_migration_test',
768
+ flow_settings=FlowSettings(
769
+ flow_id=204,
770
+ name='formula_migration_test',
771
+ path='.',
772
+ execution_mode='Development',
773
+ ),
774
+ data={
775
+ 1: NodeInformation(
776
+ id=1,
777
+ type='manual_input',
778
+ is_setup=True,
779
+ setting_input=None,
780
+ ),
781
+ 2: NodeInformation(
782
+ id=2,
783
+ type='formula',
784
+ is_setup=True,
785
+ setting_input=NodeFormula(
786
+ flow_id=204,
787
+ node_id=2,
788
+ depending_on_id=1,
789
+ function=legacy_function,
790
+ ),
791
+ ),
792
+ },
793
+ node_connections=[(1, 2)],
794
+ node_starts=[1],
795
+ )
796
+
797
+ # Pickle
798
+ pickle_path = temp_dir / 'old_formula.flowfile'
799
+ with open(pickle_path, 'wb') as f:
800
+ pickle.dump(legacy_flow, f)
801
+
802
+ # Migrate
803
+ yaml_path = temp_dir / 'migrated_formula.yaml'
804
+ migrate_flowfile(pickle_path, yaml_path, 'yaml')
805
+
806
+ # Load and add data
807
+ loaded_flow = open_flow(yaml_path)
808
+ data = [
809
+ {'name': 'Alice', 'sales': 100},
810
+ {'name': 'Bob', 'sales': 150},
811
+ ]
812
+ loaded_flow.add_manual_input(input_schema.NodeManualInput(
813
+ flow_id=204, node_id=1,
814
+ raw_data_format=input_schema.RawData.from_pylist(data)
815
+ ))
816
+
817
+ # Run and verify
818
+ run_info = loaded_flow.run_graph()
819
+ handle_run_info(run_info)
820
+
821
+ def test_sort_migration(self, temp_dir):
822
+ """Migrate old pickle with sort node."""
823
+ legacy_sort = [
824
+ SortByInput(column='age', how='desc'),
825
+ SortByInput(column='name', how='asc'),
826
+ ]
827
+
828
+ legacy_flow = FlowInformation(
829
+ flow_id=205,
830
+ flow_name='sort_migration_test',
831
+ flow_settings=FlowSettings(
832
+ flow_id=205,
833
+ name='sort_migration_test',
834
+ path='.',
835
+ execution_mode='Development',
836
+ ),
837
+ data={
838
+ 1: NodeInformation(
839
+ id=1,
840
+ type='manual_input',
841
+ is_setup=True,
842
+ setting_input=None,
843
+ ),
844
+ 2: NodeInformation(
845
+ id=2,
846
+ type='sort',
847
+ is_setup=True,
848
+ setting_input=NodeSort(
849
+ flow_id=205,
850
+ node_id=2,
851
+ depending_on_id=1,
852
+ sort_input=legacy_sort,
853
+ ),
854
+ ),
855
+ },
856
+ node_connections=[(1, 2)],
857
+ node_starts=[1],
858
+ )
859
+
860
+ # Pickle
861
+ pickle_path = temp_dir / 'old_sort.flowfile'
862
+ with open(pickle_path, 'wb') as f:
863
+ pickle.dump(legacy_flow, f)
864
+
865
+ # Migrate
866
+ yaml_path = temp_dir / 'migrated_sort.yaml'
867
+ migrate_flowfile(pickle_path, yaml_path, 'yaml')
868
+
869
+ # Load and add data
870
+ loaded_flow = open_flow(yaml_path)
871
+ data = [
872
+ {'name': 'Alice', 'age': 30},
873
+ {'name': 'Bob', 'age': 25},
874
+ {'name': 'Charlie', 'age': 35},
875
+ ]
876
+ loaded_flow.add_manual_input(input_schema.NodeManualInput(
877
+ flow_id=205, node_id=1,
878
+ raw_data_format=input_schema.RawData.from_pylist(data)
879
+ ))
880
+
881
+ # Run and verify
882
+ run_info = loaded_flow.run_graph()
883
+ handle_run_info(run_info)
884
+
885
+
886
+ # =============================================================================
887
+ # COMPLEX PIPELINE TESTS
888
+ # =============================================================================
889
+
890
+ class TestComplexPipelines:
891
+ """Test complex multi-node pipelines."""
892
+
893
+ def test_etl_pipeline(self, temp_dir):
894
+ """Test a typical ETL pipeline: input -> filter -> formula -> groupby."""
895
+ graph = create_graph(flow_id=200)
896
+
897
+ data = [
898
+ {'region': 'East', 'product': 'A', 'sales': 100, 'active': True},
899
+ {'region': 'East', 'product': 'B', 'sales': 150, 'active': True},
900
+ {'region': 'West', 'product': 'A', 'sales': 200, 'active': False},
901
+ {'region': 'West', 'product': 'B', 'sales': 120, 'active': True},
902
+ {'region': 'East', 'product': 'A', 'sales': 80, 'active': True},
903
+ ]
904
+ add_manual_input(graph, data, node_id=1)
905
+
906
+ # Filter: only active
907
+ add_node_promise(graph, 'filter', node_id=2)
908
+ add_connection(graph, input_schema.NodeConnection.create_from_simple_input(1, 2))
909
+ graph.add_filter(input_schema.NodeFilter(
910
+ flow_id=graph.flow_id,
911
+ node_id=2,
912
+ depending_on_id=1,
913
+ filter_input=transform_schema.FilterInput(
914
+ filter_type='advanced',
915
+ advanced_filter='[active] == true'
916
+ )
917
+ ))
918
+
919
+ # Formula: double sales
920
+ add_node_promise(graph, 'formula', node_id=3)
921
+ add_connection(graph, input_schema.NodeConnection.create_from_simple_input(2, 3))
922
+ graph.add_formula(input_schema.NodeFormula(
923
+ flow_id=graph.flow_id,
924
+ node_id=3,
925
+ depending_on_id=2,
926
+ function=transform_schema.FunctionInput(
927
+ field=transform_schema.FieldInput(name='adjusted_sales'),
928
+ function='[sales] * 1.1'
929
+ )
930
+ ))
931
+
932
+ # GroupBy: sum by region
933
+ add_node_promise(graph, 'group_by', node_id=4)
934
+ add_connection(graph, input_schema.NodeConnection.create_from_simple_input(3, 4))
935
+ graph.add_group_by(input_schema.NodeGroupBy(
936
+ flow_id=graph.flow_id,
937
+ node_id=4,
938
+ depending_on_id=3,
939
+ groupby_input=transform_schema.GroupByInput([
940
+ transform_schema.AggColl('region', 'groupby'),
941
+ transform_schema.AggColl('adjusted_sales', 'sum', 'total_adjusted'),
942
+ ])
943
+ ))
944
+
945
+ # Run original
946
+ run_info = graph.run_graph()
947
+ handle_run_info(run_info)
948
+
949
+ # Save and reload
950
+ path = temp_dir / 'etl_pipeline.yaml'
951
+ graph.save_flow(str(path))
952
+ loaded_flow = open_flow(path)
953
+
954
+ # Run reloaded
955
+ run_info = loaded_flow.run_graph()
956
+ handle_run_info(run_info)
957
+
958
+ # Verify all nodes present
959
+ assert loaded_flow.get_node(1) is not None
960
+ assert loaded_flow.get_node(2) is not None
961
+ assert loaded_flow.get_node(3) is not None
962
+ assert loaded_flow.get_node(4) is not None
963
+
964
+
965
+ # =============================================================================
966
+ # OUTPUT NODE TESTS
967
+ # =============================================================================
968
+
969
+ class TestOutputNode:
970
+ """Test output node with different file types."""
971
+
972
+ def test_csv_output_roundtrip(self, temp_dir, sample_data):
973
+ """Test CSV output node roundtrip."""
974
+ graph = create_graph(flow_id=300)
975
+ add_manual_input(graph, sample_data, node_id=1)
976
+
977
+ add_node_promise(graph, 'output', node_id=2)
978
+ add_connection(graph, input_schema.NodeConnection.create_from_simple_input(1, 2))
979
+
980
+ output_dir = temp_dir / 'output'
981
+ output_dir.mkdir()
982
+
983
+ graph.add_output(input_schema.NodeOutput(
984
+ flow_id=graph.flow_id,
985
+ node_id=2,
986
+ depending_on_id=1,
987
+ output_settings=input_schema.OutputSettings(
988
+ name='result.csv',
989
+ directory=str(output_dir),
990
+ file_type='csv',
991
+ write_mode='overwrite',
992
+ table_settings=input_schema.OutputCsvTable(
993
+ delimiter=';',
994
+ encoding='utf-8'
995
+ )
996
+ )
997
+ ))
998
+
999
+ # Save and reload
1000
+ path = temp_dir / 'test.yaml'
1001
+ graph.save_flow(str(path))
1002
+ loaded_flow = open_flow(path)
1003
+
1004
+ # Verify output settings preserved
1005
+ output_node = loaded_flow.get_node(2)
1006
+ assert output_node.setting_input.output_settings.table_settings.delimiter == ';'
1007
+
1008
+
1009
+ if __name__ == '__main__':
1010
+ pytest.main([__file__, '-v'])