Flowfile 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. flowfile/__init__.py +3 -1
  2. flowfile/api.py +1 -2
  3. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-d5b1b6c9.js} +6 -6
  5. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-00d87aad.js} +6 -6
  6. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-4685e75d.js} +1 -1
  7. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-23e909da.js} +1 -1
  8. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-70ae0c79.js} +1 -1
  9. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-f149cf7c.js} +1 -1
  10. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-702a3edd.js} +7 -7
  11. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-b1519993.js} +11 -11
  12. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-6f3e4ea5.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseManager-cf5ef661.js} +2 -2
  14. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-d38c7295.js} +9 -9
  15. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-b04ef46a.js} +8 -8
  16. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-5fa10ed8.js} +5 -5
  17. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-d39af878.js} +5 -5
  18. flowfile/web/static/assets/{Filter-812dcbca.js → Filter-9b6d08db.js} +7 -7
  19. flowfile/web/static/assets/{Formula-71472193.js → Formula-6b04fb1d.js} +7 -7
  20. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-999521f4.js} +8 -8
  21. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-17dd2198.js} +6 -6
  22. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-6b039e18.js} +5 -5
  23. flowfile/web/static/assets/{Join-a1b800be.js → Join-24d0f113.js} +8 -8
  24. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-34639209.js} +4 -4
  25. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-0e8724a3.js} +2 -2
  26. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js} +1 -1
  27. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-3d63a470.js} +2 -2
  28. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js} +1 -1
  29. flowfile/web/static/assets/{Output-ddc9079f.css → Output-283fe388.css} +5 -5
  30. flowfile/web/static/assets/{Output-76750610.js → Output-edea9802.js} +57 -38
  31. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-61d19301.js} +7 -7
  32. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-de9f43fe.js} +1 -1
  33. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-f97fec5b.js} +1 -1
  34. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-bc3c9984.js} +5 -5
  35. flowfile/web/static/assets/{Read-637b72a7.js → Read-64a3f259.js} +80 -105
  36. flowfile/web/static/assets/{Read-6b17491f.css → Read-e808b239.css} +10 -10
  37. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-3d5039be.js} +4 -4
  38. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-597510e0.js} +6 -6
  39. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-df51adbe.js} +1 -1
  40. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-4be0a507.js} +4 -4
  41. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretManager-4839be57.js} +2 -2
  42. flowfile/web/static/assets/{Select-850215fd.js → Select-9b72f201.js} +7 -7
  43. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-7ded385d.js} +1 -1
  44. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-e1e9c953.js} +1 -1
  45. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-f0f75a42.js} +1 -1
  46. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-6c777aac.js} +2 -2
  47. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js} +1 -1
  48. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-7cb93e62.js} +1 -1
  49. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-6cbde21a.js} +5 -5
  50. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-d9a40c11.js} +2 -2
  51. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-5896c375.js} +1 -1
  52. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-c4fcbf4d.js} +7 -7
  53. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-4ef91d19.js} +2 -2
  54. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js} +1 -1
  55. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-a03f512c.js} +2 -2
  56. flowfile/web/static/assets/{Union-b563478a.js → Union-bfe9b996.js} +4 -4
  57. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-5d023a27.js} +8 -20
  58. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-91cc5354.js} +6 -6
  59. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-7ee2de44.js} +1 -1
  60. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-e51b9924.js} +1 -1
  61. flowfile/web/static/assets/{api-2d6adc4f.js → api-c1bad5ca.js} +1 -1
  62. flowfile/web/static/assets/{api-4c8e3822.js → api-cf1221f0.js} +1 -1
  63. flowfile/web/static/assets/{designer-e3c150ec.css → designer-8da3ba3a.css} +90 -67
  64. flowfile/web/static/assets/{designer-f3656d8c.js → designer-9633482a.js} +119 -51
  65. flowfile/web/static/assets/{documentation-52b241e7.js → documentation-ca400224.js} +1 -1
  66. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-614b998d.js} +1 -1
  67. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-f7971590.js} +2 -2
  68. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-4fe5f36b.js} +3 -3
  69. flowfile/web/static/assets/{index-246f201c.js → index-5429bbf8.js} +6 -8
  70. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  71. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-076b85ab.js} +1 -1
  72. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-0fd17dbe.js} +1 -1
  73. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-b61e0847.js} +1 -1
  74. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-a8bb8b61.js} +21 -20
  75. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-c767cb37.css} +13 -13
  76. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-67b4aee0.js} +10 -12
  77. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-806d2826.css} +12 -12
  78. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-48c81530.css} +3 -3
  79. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-92ce1dbc.js} +4 -7
  80. flowfile/web/static/assets/{secretApi-538058f3.js → secretApi-68435402.js} +1 -1
  81. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-92e25ee3.js} +3 -3
  82. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-41b0e0d7.js} +7 -4
  83. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-2c8e608f.js} +1 -1
  84. flowfile/web/static/index.html +1 -1
  85. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/METADATA +3 -2
  86. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/RECORD +138 -126
  87. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  88. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  89. flowfile_core/__init__.py +3 -0
  90. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  91. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  92. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  93. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  94. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  95. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  96. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  97. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +184 -78
  98. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  99. flowfile_core/flowfile/flow_graph.py +129 -26
  100. flowfile_core/flowfile/flow_node/flow_node.py +3 -0
  101. flowfile_core/flowfile/flow_node/models.py +2 -1
  102. flowfile_core/flowfile/handler.py +5 -5
  103. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  104. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  105. flowfile_core/flowfile/node_designer/__init__.py +1 -1
  106. flowfile_core/flowfile/node_designer/_type_registry.py +2 -2
  107. flowfile_core/flowfile/node_designer/custom_node.py +1 -1
  108. flowfile_core/flowfile/node_designer/ui_components.py +1 -1
  109. flowfile_core/flowfile/schema_callbacks.py +8 -5
  110. flowfile_core/flowfile/setting_generator/settings.py +15 -9
  111. flowfile_core/routes/routes.py +8 -10
  112. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  113. flowfile_core/schemas/input_schema.py +222 -65
  114. flowfile_core/schemas/output_model.py +1 -1
  115. flowfile_core/schemas/schemas.py +145 -32
  116. flowfile_core/schemas/transform_schema.py +1083 -413
  117. flowfile_core/schemas/yaml_types.py +103 -0
  118. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +11 -1
  119. flowfile_frame/__init__.py +3 -1
  120. flowfile_frame/flow_frame.py +15 -18
  121. flowfile_frame/flow_frame_methods.py +12 -9
  122. flowfile_worker/__init__.py +3 -0
  123. flowfile_worker/create/__init__.py +3 -21
  124. flowfile_worker/create/funcs.py +68 -56
  125. flowfile_worker/create/models.py +130 -62
  126. flowfile_worker/routes.py +5 -8
  127. tools/migrate/README.md +56 -0
  128. tools/migrate/__init__.py +12 -0
  129. tools/migrate/__main__.py +131 -0
  130. tools/migrate/legacy_schemas.py +621 -0
  131. tools/migrate/migrate.py +598 -0
  132. tools/migrate/tests/__init__.py +0 -0
  133. tools/migrate/tests/conftest.py +23 -0
  134. tools/migrate/tests/test_migrate.py +627 -0
  135. tools/migrate/tests/test_migration_e2e.py +1010 -0
  136. tools/migrate/tests/test_node_migrations.py +813 -0
  137. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  138. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/licenses/LICENSE +0 -0
  139. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -0,0 +1,627 @@
1
+ """
2
+ Tests for schema compatibility and migration validation.
3
+
4
+ These tests verify that:
5
+ 1. Old flat ReceivedTable can be migrated to new nested table_settings
6
+ 2. Old separate OutputSettings tables can be migrated to unified table_settings
7
+ 3. All node types are handled correctly in migration
8
+ """
9
+
10
+ import pickle
11
+ import tempfile
12
+ import json
13
+ from pathlib import Path
14
+ from typing import Dict, Any
15
+ import pytest
16
+
17
+
18
+ # =============================================================================
19
+ # FIXTURES
20
+ # =============================================================================
21
+
22
+ @pytest.fixture
23
+ def temp_dir():
24
+ """Create a temporary directory for test files."""
25
+ with tempfile.TemporaryDirectory() as tmpdir:
26
+ yield Path(tmpdir)
27
+
28
+
29
+ # =============================================================================
30
+ # OLD -> NEW SCHEMA TRANSFORMATION TESTS
31
+ # =============================================================================
32
+
33
+ class TestReceivedTableTransformation:
34
+ """Test transformation of OLD flat ReceivedTable to NEW nested table_settings."""
35
+
36
+ def test_csv_flat_to_nested(self, temp_dir):
37
+ """Test that flat CSV fields become nested in table_settings."""
38
+ from tools.migrate.legacy_schemas import (
39
+ FlowInformation, FlowSettings, NodeInformation,
40
+ NodeRead, ReceivedTable
41
+ )
42
+ from tools.migrate.migrate import migrate_flowfile
43
+
44
+ # OLD format: flat fields
45
+ received = ReceivedTable(
46
+ name='data.csv',
47
+ path='/path/to/data.csv',
48
+ file_type='csv',
49
+ delimiter=';',
50
+ encoding='latin-1',
51
+ has_headers=True,
52
+ starting_from_line=1,
53
+ infer_schema_length=5000,
54
+ quote_char="'",
55
+ row_delimiter='\n',
56
+ truncate_ragged_lines=True,
57
+ ignore_errors=True,
58
+ )
59
+
60
+ node = NodeRead(flow_id=1, node_id=1, received_file=received)
61
+ flow = FlowInformation(
62
+ flow_id=1,
63
+ flow_name='test',
64
+ flow_settings=FlowSettings(flow_id=1, name='test'),
65
+ data={1: NodeInformation(id=1, type='read', setting_input=node)},
66
+ node_starts=[1],
67
+ node_connections=[],
68
+ )
69
+
70
+ pickle_path = temp_dir / 'test.flowfile'
71
+ with open(pickle_path, 'wb') as f:
72
+ pickle.dump(flow, f)
73
+
74
+ output_path = migrate_flowfile(pickle_path, format='json')
75
+
76
+ with open(output_path) as f:
77
+ data = json.load(f)
78
+
79
+ read_node = data['nodes'][0]
80
+ received_file = read_node['setting_input']['received_file']
81
+
82
+ # Verify NEW structure has table_settings
83
+ assert 'table_settings' in received_file
84
+ ts = received_file['table_settings']
85
+
86
+ # Verify values migrated correctly
87
+ assert ts['file_type'] == 'csv'
88
+ assert ts['delimiter'] == ';'
89
+ assert ts['encoding'] == 'latin-1'
90
+ assert ts['has_headers'] == True
91
+ assert ts['starting_from_line'] == 1
92
+ assert ts['infer_schema_length'] == 5000
93
+ assert ts['quote_char'] == "'"
94
+ assert ts['truncate_ragged_lines'] == True
95
+ assert ts['ignore_errors'] == True
96
+
97
+ def test_excel_flat_to_nested(self, temp_dir):
98
+ """Test that flat Excel fields become nested in table_settings."""
99
+ from tools.migrate.legacy_schemas import (
100
+ FlowInformation, FlowSettings, NodeInformation,
101
+ NodeRead, ReceivedTable
102
+ )
103
+ from tools.migrate.migrate import migrate_flowfile
104
+
105
+ # OLD format: flat fields
106
+ received = ReceivedTable(
107
+ name='data.xlsx',
108
+ path='/path/to/data.xlsx',
109
+ file_type='excel',
110
+ sheet_name='Sales Data',
111
+ start_row=2,
112
+ start_column=1,
113
+ end_row=100,
114
+ end_column=10,
115
+ has_headers=True,
116
+ type_inference=True,
117
+ )
118
+
119
+ node = NodeRead(flow_id=1, node_id=1, received_file=received)
120
+ flow = FlowInformation(
121
+ flow_id=1,
122
+ flow_name='test',
123
+ flow_settings=FlowSettings(flow_id=1, name='test'),
124
+ data={1: NodeInformation(id=1, type='read', setting_input=node)},
125
+ node_starts=[1],
126
+ node_connections=[],
127
+ )
128
+
129
+ pickle_path = temp_dir / 'test.flowfile'
130
+ with open(pickle_path, 'wb') as f:
131
+ pickle.dump(flow, f)
132
+
133
+ output_path = migrate_flowfile(pickle_path, format='json')
134
+
135
+ with open(output_path) as f:
136
+ data = json.load(f)
137
+
138
+ read_node = data['nodes'][0]
139
+ received_file = read_node['setting_input']['received_file']
140
+
141
+ # Verify NEW structure
142
+ assert received_file['file_type'] == 'excel'
143
+ assert 'table_settings' in received_file
144
+ ts = received_file['table_settings']
145
+
146
+ assert ts['file_type'] == 'excel'
147
+ assert ts['sheet_name'] == 'Sales Data'
148
+ assert ts['start_row'] == 2
149
+ assert ts['start_column'] == 1
150
+ assert ts['end_row'] == 100
151
+ assert ts['end_column'] == 10
152
+ assert ts['has_headers'] == True
153
+ assert ts['type_inference'] == True
154
+
155
+ def test_parquet_flat_to_nested(self, temp_dir):
156
+ """Test that parquet file type gets table_settings."""
157
+ from tools.migrate.legacy_schemas import (
158
+ FlowInformation, FlowSettings, NodeInformation,
159
+ NodeRead, ReceivedTable
160
+ )
161
+ from tools.migrate.migrate import migrate_flowfile
162
+
163
+ received = ReceivedTable(
164
+ name='data.parquet',
165
+ path='/path/to/data.parquet',
166
+ file_type='parquet',
167
+ )
168
+
169
+ node = NodeRead(flow_id=1, node_id=1, received_file=received)
170
+ flow = FlowInformation(
171
+ flow_id=1,
172
+ flow_name='test',
173
+ flow_settings=FlowSettings(flow_id=1, name='test'),
174
+ data={1: NodeInformation(id=1, type='read', setting_input=node)},
175
+ node_starts=[1],
176
+ node_connections=[],
177
+ )
178
+
179
+ pickle_path = temp_dir / 'test.flowfile'
180
+ with open(pickle_path, 'wb') as f:
181
+ pickle.dump(flow, f)
182
+
183
+ output_path = migrate_flowfile(pickle_path, format='json')
184
+
185
+ with open(output_path) as f:
186
+ data = json.load(f)
187
+
188
+ read_node = data['nodes'][0]
189
+ received_file = read_node['setting_input']['received_file']
190
+
191
+ assert received_file['file_type'] == 'parquet'
192
+ assert 'table_settings' in received_file
193
+ assert received_file['table_settings']['file_type'] == 'parquet'
194
+
195
+
196
+ class TestOutputSettingsTransformation:
197
+ """Test transformation of OLD separate output tables to NEW unified table_settings."""
198
+
199
+ def test_csv_output_consolidation(self, temp_dir):
200
+ """Test that separate output_csv_table becomes table_settings."""
201
+ from tools.migrate.legacy_schemas import (
202
+ FlowInformation, FlowSettings, NodeInformation,
203
+ NodeOutput, OutputSettings, OutputCsvTable
204
+ )
205
+ from tools.migrate.migrate import migrate_flowfile
206
+
207
+ # OLD format: separate table objects
208
+ output_settings = OutputSettings(
209
+ name='result.csv',
210
+ directory='/output',
211
+ file_type='csv',
212
+ write_mode='overwrite',
213
+ output_csv_table=OutputCsvTable(delimiter='|', encoding='utf-16'),
214
+ )
215
+
216
+ node = NodeOutput(flow_id=1, node_id=1, output_settings=output_settings)
217
+ flow = FlowInformation(
218
+ flow_id=1,
219
+ flow_name='test',
220
+ flow_settings=FlowSettings(flow_id=1, name='test'),
221
+ data={1: NodeInformation(id=1, type='output', setting_input=node)},
222
+ node_starts=[1],
223
+ node_connections=[],
224
+ )
225
+
226
+ pickle_path = temp_dir / 'test.flowfile'
227
+ with open(pickle_path, 'wb') as f:
228
+ pickle.dump(flow, f)
229
+
230
+ output_path = migrate_flowfile(pickle_path, format='json')
231
+
232
+ with open(output_path) as f:
233
+ data = json.load(f)
234
+
235
+ output_node = data['nodes'][0]
236
+ os = output_node['setting_input']['output_settings']
237
+
238
+ # Verify NEW structure
239
+ assert 'table_settings' in os
240
+ assert os['table_settings']['file_type'] == 'csv'
241
+ assert os['table_settings']['delimiter'] == '|'
242
+ assert os['table_settings']['encoding'] == 'utf-16'
243
+
244
+ # Verify OLD fields removed
245
+ assert 'output_csv_table' not in os
246
+ assert 'output_parquet_table' not in os
247
+ assert 'output_excel_table' not in os
248
+
249
+ def test_excel_output_consolidation(self, temp_dir):
250
+ """Test that separate output_excel_table becomes table_settings."""
251
+ from tools.migrate.legacy_schemas import (
252
+ FlowInformation, FlowSettings, NodeInformation,
253
+ NodeOutput, OutputSettings, OutputExcelTable
254
+ )
255
+ from tools.migrate.migrate import migrate_flowfile
256
+
257
+ output_settings = OutputSettings(
258
+ name='result.xlsx',
259
+ directory='/output',
260
+ file_type='excel',
261
+ output_excel_table=OutputExcelTable(sheet_name='Results'),
262
+ )
263
+
264
+ node = NodeOutput(flow_id=1, node_id=1, output_settings=output_settings)
265
+ flow = FlowInformation(
266
+ flow_id=1,
267
+ flow_name='test',
268
+ flow_settings=FlowSettings(flow_id=1, name='test'),
269
+ data={1: NodeInformation(id=1, type='output', setting_input=node)},
270
+ node_starts=[1],
271
+ node_connections=[],
272
+ )
273
+
274
+ pickle_path = temp_dir / 'test.flowfile'
275
+ with open(pickle_path, 'wb') as f:
276
+ pickle.dump(flow, f)
277
+
278
+ output_path = migrate_flowfile(pickle_path, format='json')
279
+
280
+ with open(output_path) as f:
281
+ data = json.load(f)
282
+
283
+ output_node = data['nodes'][0]
284
+ os = output_node['setting_input']['output_settings']
285
+
286
+ assert os['table_settings']['file_type'] == 'excel'
287
+ assert os['table_settings']['sheet_name'] == 'Results'
288
+
289
+
290
+ # =============================================================================
291
+ # NODE TYPE MIGRATION TESTS
292
+ # =============================================================================
293
+
294
+ class TestNodeTypeMigration:
295
+ """Test that all node types can be migrated correctly."""
296
+
297
+ def _create_and_migrate(self, temp_dir, node_type: str, setting_input) -> Dict:
298
+ """Helper to create a flow with one node and migrate it."""
299
+ from tools.migrate.legacy_schemas import (
300
+ FlowInformation, FlowSettings, NodeInformation
301
+ )
302
+ from tools.migrate.migrate import migrate_flowfile
303
+
304
+ flow = FlowInformation(
305
+ flow_id=1,
306
+ flow_name='test',
307
+ flow_settings=FlowSettings(flow_id=1, name='test'),
308
+ data={1: NodeInformation(id=1, type=node_type, setting_input=setting_input)},
309
+ node_starts=[1],
310
+ node_connections=[],
311
+ )
312
+
313
+ pickle_path = temp_dir / 'test.flowfile'
314
+ with open(pickle_path, 'wb') as f:
315
+ pickle.dump(flow, f)
316
+
317
+ output_path = migrate_flowfile(pickle_path, format='json')
318
+
319
+ with open(output_path) as f:
320
+ return json.load(f)
321
+
322
+ def test_migrate_select_node(self, temp_dir):
323
+ """Test select node migration."""
324
+ from tools.migrate.legacy_schemas import NodeSelect, SelectInput
325
+
326
+ node = NodeSelect(
327
+ flow_id=1,
328
+ node_id=1,
329
+ select_input=[
330
+ SelectInput(old_name='a', new_name='b', keep=True),
331
+ SelectInput(old_name='c', keep=False),
332
+ ]
333
+ )
334
+
335
+ data = self._create_and_migrate(temp_dir, 'select', node)
336
+ assert data['nodes'][0]['type'] == 'select'
337
+ assert 'select_input' in data['nodes'][0]['setting_input']
338
+
339
+ def test_migrate_filter_node(self, temp_dir):
340
+ """Test filter node migration."""
341
+ from tools.migrate.legacy_schemas import NodeFilter, FilterInput, BasicFilter
342
+
343
+ node = NodeFilter(
344
+ flow_id=1,
345
+ node_id=1,
346
+ filter_input=FilterInput(
347
+ filter_type='basic',
348
+ basic_filter=BasicFilter(field='x', filter_type='>', filter_value='5')
349
+ )
350
+ )
351
+
352
+ data = self._create_and_migrate(temp_dir, 'filter', node)
353
+ assert data['nodes'][0]['type'] == 'filter'
354
+ assert 'filter_input' in data['nodes'][0]['setting_input']
355
+
356
+ def test_migrate_formula_node(self, temp_dir):
357
+ """Test formula node migration."""
358
+ from tools.migrate.legacy_schemas import NodeFormula, FunctionInput, FieldInput
359
+
360
+ node = NodeFormula(
361
+ flow_id=1,
362
+ node_id=1,
363
+ function=FunctionInput(
364
+ field=FieldInput(name='result'),
365
+ function='[x] + [y]'
366
+ )
367
+ )
368
+
369
+ data = self._create_and_migrate(temp_dir, 'formula', node)
370
+ assert data['nodes'][0]['type'] == 'formula'
371
+ assert 'function' in data['nodes'][0]['setting_input']
372
+
373
+ def test_migrate_join_node(self, temp_dir):
374
+ """Test join node migration."""
375
+ from tools.migrate.legacy_schemas import (
376
+ NodeJoin, JoinInput, JoinMap, JoinInputs, SelectInput
377
+ )
378
+
379
+ node = NodeJoin(
380
+ flow_id=1,
381
+ node_id=1,
382
+ join_input=JoinInput(
383
+ join_mapping=[JoinMap(left_col='id', right_col='id')],
384
+ left_select=JoinInputs(renames=[SelectInput(old_name='id')]),
385
+ right_select=JoinInputs(renames=[SelectInput(old_name='id')]),
386
+ how='left'
387
+ )
388
+ )
389
+
390
+ data = self._create_and_migrate(temp_dir, 'join', node)
391
+ assert data['nodes'][0]['type'] == 'join'
392
+ assert 'join_input' in data['nodes'][0]['setting_input']
393
+
394
+ def test_migrate_join_node_with_none_selects(self, temp_dir):
395
+ """Test join node migration when left_select/right_select are None (old format)."""
396
+ from tools.migrate.legacy_schemas import (
397
+ NodeJoin, JoinInput, JoinMap
398
+ )
399
+
400
+ # OLD format: left_select and right_select could be None
401
+ node = NodeJoin(
402
+ flow_id=1,
403
+ node_id=1,
404
+ join_input=JoinInput(
405
+ join_mapping=[JoinMap(left_col='id', right_col='id')],
406
+ left_select=None,
407
+ right_select=None,
408
+ how='inner'
409
+ )
410
+ )
411
+
412
+ data = self._create_and_migrate(temp_dir, 'join', node)
413
+ join_input = data['nodes'][0]['setting_input']['join_input']
414
+
415
+ # Verify migration added empty renames lists
416
+ assert join_input['left_select'] == {'renames': []}
417
+ assert join_input['right_select'] == {'renames': []}
418
+
419
+ def test_migrate_groupby_node(self, temp_dir):
420
+ """Test group by node migration."""
421
+ from tools.migrate.legacy_schemas import (
422
+ NodeGroupBy, GroupByInput, AggColl
423
+ )
424
+
425
+ node = NodeGroupBy(
426
+ flow_id=1,
427
+ node_id=1,
428
+ groupby_input=GroupByInput(
429
+ agg_cols=[
430
+ AggColl(old_name='category', agg='groupby'),
431
+ AggColl(old_name='amount', agg='sum', new_name='total'),
432
+ ]
433
+ )
434
+ )
435
+
436
+ data = self._create_and_migrate(temp_dir, 'group_by', node)
437
+ assert data['nodes'][0]['type'] == 'group_by'
438
+ assert 'groupby_input' in data['nodes'][0]['setting_input']
439
+
440
+ def test_migrate_polars_code_node(self, temp_dir):
441
+ """Test polars code node migration."""
442
+ from tools.migrate.legacy_schemas import NodePolarsCode, PolarsCodeInput
443
+
444
+ node = NodePolarsCode(
445
+ flow_id=1,
446
+ node_id=1,
447
+ polars_code_input=PolarsCodeInput(
448
+ polars_code='output_df = input_df.with_columns(pl.col("x") * 2)'
449
+ ),
450
+ depending_on_ids=[0]
451
+ )
452
+
453
+ data = self._create_and_migrate(temp_dir, 'polars_code', node)
454
+
455
+ polars_node = data['nodes'][0]
456
+ assert polars_node['type'] == 'polars_code'
457
+ assert 'output_df' in polars_node['setting_input']['polars_code_input']['polars_code']
458
+
459
+
460
+ # =============================================================================
461
+ # LEGACY SCHEMA VALIDATION TESTS
462
+ # =============================================================================
463
+
464
+ class TestLegacySchemas:
465
+ """Test that legacy schemas can be instantiated correctly."""
466
+
467
+ def test_received_table_has_flat_fields(self):
468
+ """Verify OLD ReceivedTable has flat structure."""
469
+ from tools.migrate.legacy_schemas import ReceivedTable
470
+
471
+ rt = ReceivedTable(
472
+ name='test.csv',
473
+ path='/path/test.csv',
474
+ file_type='csv',
475
+ delimiter=';',
476
+ encoding='latin-1',
477
+ sheet_name='Sheet1', # Excel field at top level (OLD style)
478
+ )
479
+
480
+ # OLD style: all fields at top level
481
+ assert rt.delimiter == ';'
482
+ assert rt.encoding == 'latin-1'
483
+ assert rt.sheet_name == 'Sheet1'
484
+
485
+ # Verify no table_settings (OLD style)
486
+ assert not hasattr(rt, 'table_settings')
487
+
488
+ def test_output_settings_has_separate_tables(self):
489
+ """Verify OLD OutputSettings has separate table fields."""
490
+ from tools.migrate.legacy_schemas import (
491
+ OutputSettings, OutputCsvTable, OutputExcelTable
492
+ )
493
+
494
+ os = OutputSettings(
495
+ name='out.csv',
496
+ directory='/out',
497
+ file_type='csv',
498
+ output_csv_table=OutputCsvTable(delimiter='|'),
499
+ output_excel_table=OutputExcelTable(sheet_name='Data'),
500
+ )
501
+
502
+ # OLD style: separate table objects
503
+ assert os.output_csv_table.delimiter == '|'
504
+ assert os.output_excel_table.sheet_name == 'Data'
505
+
506
+ # Verify no unified table_settings (OLD style)
507
+ assert not hasattr(os, 'table_settings')
508
+
509
+ def test_legacy_class_map_completeness(self):
510
+ """Test that LEGACY_CLASS_MAP has all needed classes."""
511
+ from tools.migrate.legacy_schemas import LEGACY_CLASS_MAP
512
+
513
+ required_classes = [
514
+ # Transform schemas
515
+ 'SelectInput', 'JoinInput', 'JoinMap', 'PolarsCodeInput',
516
+ 'GroupByInput', 'AggColl', 'FilterInput', 'BasicFilter',
517
+
518
+ # Input/Output schemas
519
+ 'ReceivedTable', 'OutputSettings', 'OutputCsvTable',
520
+
521
+ # Node schemas
522
+ 'NodeRead', 'NodeSelect', 'NodeOutput', 'NodeJoin',
523
+ 'NodePolarsCode', 'NodeGroupBy',
524
+
525
+ # Flow schemas
526
+ 'FlowInformation', 'FlowSettings', 'NodeInformation',
527
+ ]
528
+
529
+ for cls_name in required_classes:
530
+ assert cls_name in LEGACY_CLASS_MAP, f"Missing {cls_name}"
531
+
532
+
533
+ # =============================================================================
534
+ # ROUND TRIP TESTS
535
+ # =============================================================================
536
+
537
+ class TestRoundTrip:
538
+ """Test complete pickle -> YAML -> validation round trips."""
539
+
540
+ def test_complex_flow_roundtrip(self, temp_dir):
541
+ """Test migration of a flow with multiple node types."""
542
+ yaml = pytest.importorskip('yaml')
543
+
544
+ from tools.migrate.legacy_schemas import (
545
+ FlowInformation, FlowSettings, NodeInformation,
546
+ NodeRead, ReceivedTable, NodeSelect, SelectInput,
547
+ NodeOutput, OutputSettings, OutputCsvTable
548
+ )
549
+ from tools.migrate.migrate import migrate_flowfile
550
+
551
+ flow = FlowInformation(
552
+ flow_id=1,
553
+ flow_name='complex_flow',
554
+ flow_settings=FlowSettings(
555
+ flow_id=1,
556
+ name='complex_flow',
557
+ description='A complex flow for testing'
558
+ ),
559
+ data={
560
+ 1: NodeInformation(
561
+ id=1, type='read',
562
+ setting_input=NodeRead(
563
+ flow_id=1, node_id=1,
564
+ received_file=ReceivedTable(
565
+ name='input.csv',
566
+ path='/data/input.csv',
567
+ file_type='csv',
568
+ delimiter=','
569
+ )
570
+ )
571
+ ),
572
+ 2: NodeInformation(
573
+ id=2, type='select',
574
+ setting_input=NodeSelect(
575
+ flow_id=1, node_id=2,
576
+ select_input=[SelectInput(old_name='a')]
577
+ )
578
+ ),
579
+ 3: NodeInformation(
580
+ id=3, type='output',
581
+ setting_input=NodeOutput(
582
+ flow_id=1, node_id=3,
583
+ output_settings=OutputSettings(
584
+ name='output.csv',
585
+ directory='/out',
586
+ file_type='csv',
587
+ output_csv_table=OutputCsvTable(delimiter=';')
588
+ )
589
+ )
590
+ ),
591
+ },
592
+ node_starts=[1],
593
+ node_connections=[(1, 2), (2, 3)],
594
+ )
595
+
596
+ pickle_path = temp_dir / 'complex.flowfile'
597
+ with open(pickle_path, 'wb') as f:
598
+ pickle.dump(flow, f)
599
+
600
+ output_path = migrate_flowfile(pickle_path, format='yaml')
601
+
602
+ # Load and validate YAML
603
+ with open(output_path) as f:
604
+ data = yaml.safe_load(f)
605
+
606
+ # Verify FlowfileData format
607
+ assert data['flowfile_version'] == '2.0'
608
+ assert data['flowfile_name'] == 'complex_flow'
609
+ assert data['flowfile_id'] == 1
610
+ assert len(data['nodes']) == 3
611
+
612
+ # Verify transformations applied
613
+ read_node = next(n for n in data['nodes'] if n['type'] == 'read')
614
+ assert 'table_settings' in read_node['setting_input']['received_file']
615
+
616
+ output_node = next(n for n in data['nodes'] if n['type'] == 'output')
617
+ assert 'table_settings' in output_node['setting_input']['output_settings']
618
+ assert output_node['setting_input']['output_settings']['table_settings']['delimiter'] == ';'
619
+
620
+ # Verify start node is marked
621
+ start_nodes = [n for n in data['nodes'] if n.get('is_start_node')]
622
+ assert len(start_nodes) == 1
623
+ assert start_nodes[0]['id'] == 1
624
+
625
+
626
+ if __name__ == '__main__':
627
+ pytest.main([__file__, '-v'])