Flowfile 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +3 -1
- flowfile/api.py +1 -2
- flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionManager-0dfba9f2.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-d5b1b6c9.js} +6 -6
- flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-00d87aad.js} +6 -6
- flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-4685e75d.js} +1 -1
- flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-23e909da.js} +1 -1
- flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-70ae0c79.js} +1 -1
- flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-f149cf7c.js} +1 -1
- flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-702a3edd.js} +7 -7
- flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-b1519993.js} +11 -11
- flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-6f3e4ea5.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseManager-cf5ef661.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-d38c7295.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-b04ef46a.js} +8 -8
- flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-5fa10ed8.js} +5 -5
- flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-d39af878.js} +5 -5
- flowfile/web/static/assets/{Filter-812dcbca.js → Filter-9b6d08db.js} +7 -7
- flowfile/web/static/assets/{Formula-71472193.js → Formula-6b04fb1d.js} +7 -7
- flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-999521f4.js} +8 -8
- flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-17dd2198.js} +6 -6
- flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-6b039e18.js} +5 -5
- flowfile/web/static/assets/{Join-a1b800be.js → Join-24d0f113.js} +8 -8
- flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-34639209.js} +4 -4
- flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-0e8724a3.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js} +1 -1
- flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-3d63a470.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js} +1 -1
- flowfile/web/static/assets/{Output-ddc9079f.css → Output-283fe388.css} +5 -5
- flowfile/web/static/assets/{Output-76750610.js → Output-edea9802.js} +57 -38
- flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-61d19301.js} +7 -7
- flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-de9f43fe.js} +1 -1
- flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-f97fec5b.js} +1 -1
- flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-bc3c9984.js} +5 -5
- flowfile/web/static/assets/{Read-637b72a7.js → Read-64a3f259.js} +80 -105
- flowfile/web/static/assets/{Read-6b17491f.css → Read-e808b239.css} +10 -10
- flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-3d5039be.js} +4 -4
- flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-597510e0.js} +6 -6
- flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-df51adbe.js} +1 -1
- flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-4be0a507.js} +4 -4
- flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretManager-4839be57.js} +2 -2
- flowfile/web/static/assets/{Select-850215fd.js → Select-9b72f201.js} +7 -7
- flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-7ded385d.js} +1 -1
- flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-e1e9c953.js} +1 -1
- flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-f0f75a42.js} +1 -1
- flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-6c777aac.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js} +1 -1
- flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-7cb93e62.js} +1 -1
- flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-6cbde21a.js} +5 -5
- flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-d9a40c11.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-5896c375.js} +1 -1
- flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-c4fcbf4d.js} +7 -7
- flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-4ef91d19.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-a03f512c.js} +2 -2
- flowfile/web/static/assets/{Union-b563478a.js → Union-bfe9b996.js} +4 -4
- flowfile/web/static/assets/{Unique-f90db5db.js → Unique-5d023a27.js} +8 -20
- flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-91cc5354.js} +6 -6
- flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-7ee2de44.js} +1 -1
- flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-e51b9924.js} +1 -1
- flowfile/web/static/assets/{api-2d6adc4f.js → api-c1bad5ca.js} +1 -1
- flowfile/web/static/assets/{api-4c8e3822.js → api-cf1221f0.js} +1 -1
- flowfile/web/static/assets/{designer-e3c150ec.css → designer-8da3ba3a.css} +90 -67
- flowfile/web/static/assets/{designer-f3656d8c.js → designer-9633482a.js} +119 -51
- flowfile/web/static/assets/{documentation-52b241e7.js → documentation-ca400224.js} +1 -1
- flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-614b998d.js} +1 -1
- flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-f7971590.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-4fe5f36b.js} +3 -3
- flowfile/web/static/assets/{index-246f201c.js → index-5429bbf8.js} +6 -8
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
- flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-076b85ab.js} +1 -1
- flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-0fd17dbe.js} +1 -1
- flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-b61e0847.js} +1 -1
- flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-a8bb8b61.js} +21 -20
- flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-c767cb37.css} +13 -13
- flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-67b4aee0.js} +10 -12
- flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-806d2826.css} +12 -12
- flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-48c81530.css} +3 -3
- flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-92ce1dbc.js} +4 -7
- flowfile/web/static/assets/{secretApi-538058f3.js → secretApi-68435402.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-92e25ee3.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-41b0e0d7.js} +7 -4
- flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-2c8e608f.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/METADATA +3 -2
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/RECORD +138 -126
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
- flowfile_core/flowfile/code_generator/code_generator.py +62 -64
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
- flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +184 -78
- flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
- flowfile_core/flowfile/flow_graph.py +129 -26
- flowfile_core/flowfile/flow_node/flow_node.py +3 -0
- flowfile_core/flowfile/flow_node/models.py +2 -1
- flowfile_core/flowfile/handler.py +5 -5
- flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
- flowfile_core/flowfile/manage/io_flowfile.py +394 -0
- flowfile_core/flowfile/node_designer/__init__.py +1 -1
- flowfile_core/flowfile/node_designer/_type_registry.py +2 -2
- flowfile_core/flowfile/node_designer/custom_node.py +1 -1
- flowfile_core/flowfile/node_designer/ui_components.py +1 -1
- flowfile_core/flowfile/schema_callbacks.py +8 -5
- flowfile_core/flowfile/setting_generator/settings.py +15 -9
- flowfile_core/routes/routes.py +8 -10
- flowfile_core/schemas/cloud_storage_schemas.py +0 -2
- flowfile_core/schemas/input_schema.py +222 -65
- flowfile_core/schemas/output_model.py +1 -1
- flowfile_core/schemas/schemas.py +145 -32
- flowfile_core/schemas/transform_schema.py +1083 -413
- flowfile_core/schemas/yaml_types.py +103 -0
- flowfile_core/{flowfile/node_designer/data_types.py → types.py} +11 -1
- flowfile_frame/__init__.py +3 -1
- flowfile_frame/flow_frame.py +15 -18
- flowfile_frame/flow_frame_methods.py +12 -9
- flowfile_worker/__init__.py +3 -0
- flowfile_worker/create/__init__.py +3 -21
- flowfile_worker/create/funcs.py +68 -56
- flowfile_worker/create/models.py +130 -62
- flowfile_worker/routes.py +5 -8
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +131 -0
- tools/migrate/legacy_schemas.py +621 -0
- tools/migrate/migrate.py +598 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +23 -0
- tools/migrate/tests/test_migrate.py +627 -0
- tools/migrate/tests/test_migration_e2e.py +1010 -0
- tools/migrate/tests/test_node_migrations.py +813 -0
- flowfile_core/flowfile/manage/open_flowfile.py +0 -143
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/licenses/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
|
@@ -0,0 +1,813 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Tests for migration tool - verifies all node types migrate correctly.
|
|
3
|
+
|
|
4
|
+
Run with:
|
|
5
|
+
pytest tools/migrate/tests/test_node_migrations.py -v
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import pickle
|
|
9
|
+
import tempfile
|
|
10
|
+
import json
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
from tools.migrate.legacy_schemas import (
|
|
15
|
+
# Flow schemas
|
|
16
|
+
FlowInformation, FlowSettings, NodeInformation,
|
|
17
|
+
|
|
18
|
+
# Node schemas
|
|
19
|
+
NodeRead, NodeSelect, NodeFilter, NodeFormula, NodeOutput,
|
|
20
|
+
NodeJoin, NodeCrossJoin, NodeFuzzyMatch, NodePolarsCode,
|
|
21
|
+
NodeGroupBy, NodeSort, NodeUnion, NodeUnique,
|
|
22
|
+
NodePivot, NodeUnpivot, NodeRecordId, NodeTextToRows,
|
|
23
|
+
NodeGraphSolver, NodeSample,
|
|
24
|
+
|
|
25
|
+
# Input/Output schemas
|
|
26
|
+
ReceivedTable, OutputSettings, OutputCsvTable, OutputExcelTable,
|
|
27
|
+
|
|
28
|
+
# Transform schemas
|
|
29
|
+
SelectInput, SelectInputs, JoinInputs, JoinInput, JoinMap,
|
|
30
|
+
CrossJoinInput, FuzzyMatchInput, FuzzyMapping,
|
|
31
|
+
FilterInput, BasicFilter, FunctionInput, FieldInput,
|
|
32
|
+
GroupByInput, AggColl, SortByInput, UnionInput, UniqueInput,
|
|
33
|
+
PivotInput, UnpivotInput, RecordIdInput, TextToRowsInput,
|
|
34
|
+
GraphSolverInput, PolarsCodeInput,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
from tools.migrate.migrate import migrate_flowfile
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
# =============================================================================
|
|
41
|
+
# FIXTURES
|
|
42
|
+
# =============================================================================
|
|
43
|
+
|
|
44
|
+
@pytest.fixture
|
|
45
|
+
def temp_dir():
|
|
46
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
47
|
+
yield Path(tmpdir)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def create_flow_with_node(node_type: str, node_setting) -> FlowInformation:
|
|
51
|
+
"""Helper to create a flow with a single node."""
|
|
52
|
+
return FlowInformation(
|
|
53
|
+
flow_id=1,
|
|
54
|
+
flow_name='test',
|
|
55
|
+
flow_settings=FlowSettings(flow_id=1, name='test'),
|
|
56
|
+
data={1: NodeInformation(id=1, type=node_type, setting_input=node_setting)},
|
|
57
|
+
node_starts=[1],
|
|
58
|
+
node_connections=[],
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def pickle_and_migrate(temp_dir: Path, flow: FlowInformation) -> dict:
|
|
63
|
+
"""Pickle a flow, migrate it, return JSON result."""
|
|
64
|
+
pickle_path = temp_dir / 'test.flowfile'
|
|
65
|
+
with open(pickle_path, 'wb') as f:
|
|
66
|
+
pickle.dump(flow, f)
|
|
67
|
+
|
|
68
|
+
output_path = migrate_flowfile(pickle_path, format='json')
|
|
69
|
+
|
|
70
|
+
with open(output_path) as f:
|
|
71
|
+
return json.load(f)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# =============================================================================
|
|
75
|
+
# INPUT NODE TESTS
|
|
76
|
+
# =============================================================================
|
|
77
|
+
|
|
78
|
+
class TestReadNodeMigration:
|
|
79
|
+
"""Test NodeRead migrations with different file types."""
|
|
80
|
+
|
|
81
|
+
def test_csv_read_migration(self, temp_dir):
|
|
82
|
+
"""CSV read with custom settings."""
|
|
83
|
+
node = NodeRead(
|
|
84
|
+
flow_id=1, node_id=1,
|
|
85
|
+
received_file=ReceivedTable(
|
|
86
|
+
name='data.csv',
|
|
87
|
+
path='/data/data.csv',
|
|
88
|
+
file_type='csv',
|
|
89
|
+
delimiter=';',
|
|
90
|
+
encoding='latin-1',
|
|
91
|
+
has_headers=True,
|
|
92
|
+
starting_from_line=1,
|
|
93
|
+
infer_schema_length=5000,
|
|
94
|
+
quote_char="'",
|
|
95
|
+
truncate_ragged_lines=True,
|
|
96
|
+
ignore_errors=True,
|
|
97
|
+
)
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('read', node))
|
|
101
|
+
|
|
102
|
+
rf = data['nodes'][0]['setting_input']['received_file']
|
|
103
|
+
assert rf['file_type'] == 'csv'
|
|
104
|
+
assert 'table_settings' in rf
|
|
105
|
+
assert rf['table_settings']['delimiter'] == ';'
|
|
106
|
+
assert rf['table_settings']['encoding'] == 'latin-1'
|
|
107
|
+
assert rf['table_settings']['starting_from_line'] == 1
|
|
108
|
+
|
|
109
|
+
def test_excel_read_migration(self, temp_dir):
|
|
110
|
+
"""Excel read with sheet and range settings."""
|
|
111
|
+
node = NodeRead(
|
|
112
|
+
flow_id=1, node_id=1,
|
|
113
|
+
received_file=ReceivedTable(
|
|
114
|
+
name='data.xlsx',
|
|
115
|
+
path='/data/data.xlsx',
|
|
116
|
+
file_type='excel',
|
|
117
|
+
sheet_name='Sales',
|
|
118
|
+
start_row=2,
|
|
119
|
+
start_column=1,
|
|
120
|
+
end_row=100,
|
|
121
|
+
end_column=10,
|
|
122
|
+
has_headers=True,
|
|
123
|
+
type_inference=True,
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('read', node))
|
|
128
|
+
|
|
129
|
+
rf = data['nodes'][0]['setting_input']['received_file']
|
|
130
|
+
assert rf['file_type'] == 'excel'
|
|
131
|
+
assert rf['table_settings']['sheet_name'] == 'Sales'
|
|
132
|
+
assert rf['table_settings']['start_row'] == 2
|
|
133
|
+
assert rf['table_settings']['type_inference'] == True
|
|
134
|
+
|
|
135
|
+
def test_parquet_read_migration(self, temp_dir):
|
|
136
|
+
"""Parquet read."""
|
|
137
|
+
node = NodeRead(
|
|
138
|
+
flow_id=1, node_id=1,
|
|
139
|
+
received_file=ReceivedTable(
|
|
140
|
+
name='data.parquet',
|
|
141
|
+
path='/data/data.parquet',
|
|
142
|
+
file_type='parquet',
|
|
143
|
+
)
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('read', node))
|
|
147
|
+
|
|
148
|
+
rf = data['nodes'][0]['setting_input']['received_file']
|
|
149
|
+
assert rf['file_type'] == 'parquet'
|
|
150
|
+
assert rf['table_settings']['file_type'] == 'parquet'
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# =============================================================================
|
|
154
|
+
# OUTPUT NODE TESTS
|
|
155
|
+
# =============================================================================
|
|
156
|
+
|
|
157
|
+
class TestOutputNodeMigration:
|
|
158
|
+
"""Test NodeOutput migrations."""
|
|
159
|
+
|
|
160
|
+
def test_csv_output_migration(self, temp_dir):
|
|
161
|
+
"""CSV output with custom delimiter."""
|
|
162
|
+
node = NodeOutput(
|
|
163
|
+
flow_id=1, node_id=1,
|
|
164
|
+
output_settings=OutputSettings(
|
|
165
|
+
name='result.csv',
|
|
166
|
+
directory='/output',
|
|
167
|
+
file_type='csv',
|
|
168
|
+
write_mode='overwrite',
|
|
169
|
+
output_csv_table=OutputCsvTable(delimiter='|', encoding='utf-16'),
|
|
170
|
+
)
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('output', node))
|
|
174
|
+
|
|
175
|
+
os = data['nodes'][0]['setting_input']['output_settings']
|
|
176
|
+
assert os['file_type'] == 'csv'
|
|
177
|
+
assert 'table_settings' in os
|
|
178
|
+
assert os['table_settings']['delimiter'] == '|'
|
|
179
|
+
assert os['table_settings']['encoding'] == 'utf-16'
|
|
180
|
+
# Old fields should be removed
|
|
181
|
+
assert 'output_csv_table' not in os
|
|
182
|
+
|
|
183
|
+
def test_excel_output_migration(self, temp_dir):
|
|
184
|
+
"""Excel output with sheet name."""
|
|
185
|
+
node = NodeOutput(
|
|
186
|
+
flow_id=1, node_id=1,
|
|
187
|
+
output_settings=OutputSettings(
|
|
188
|
+
name='result.xlsx',
|
|
189
|
+
directory='/output',
|
|
190
|
+
file_type='excel',
|
|
191
|
+
output_excel_table=OutputExcelTable(sheet_name='Results'),
|
|
192
|
+
)
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('output', node))
|
|
196
|
+
|
|
197
|
+
os = data['nodes'][0]['setting_input']['output_settings']
|
|
198
|
+
assert os['file_type'] == 'excel'
|
|
199
|
+
assert os['table_settings']['sheet_name'] == 'Results'
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
# =============================================================================
|
|
203
|
+
# TRANSFORM NODE TESTS
|
|
204
|
+
# =============================================================================
|
|
205
|
+
|
|
206
|
+
class TestSelectNodeMigration:
|
|
207
|
+
"""Test NodeSelect migrations."""
|
|
208
|
+
|
|
209
|
+
def test_select_with_renames(self, temp_dir):
|
|
210
|
+
"""Select with column renames and drops."""
|
|
211
|
+
node = NodeSelect(
|
|
212
|
+
flow_id=1, node_id=1,
|
|
213
|
+
select_input=[
|
|
214
|
+
SelectInput(old_name='col_a', new_name='column_a', keep=True),
|
|
215
|
+
SelectInput(old_name='col_b', new_name='column_b', keep=True, data_type='String'),
|
|
216
|
+
SelectInput(old_name='col_c', keep=False),
|
|
217
|
+
],
|
|
218
|
+
sorted_by='asc',
|
|
219
|
+
keep_missing=True,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('select', node))
|
|
223
|
+
|
|
224
|
+
setting_input = data['nodes'][0]['setting_input']
|
|
225
|
+
assert 'select_input' in setting_input
|
|
226
|
+
assert len(setting_input['select_input']) == 3
|
|
227
|
+
assert setting_input['select_input'][0]['old_name'] == 'col_a'
|
|
228
|
+
assert setting_input['select_input'][0]['new_name'] == 'column_a'
|
|
229
|
+
|
|
230
|
+
def test_select_adds_position(self, temp_dir):
|
|
231
|
+
"""Verify positions are added to select inputs."""
|
|
232
|
+
node = NodeSelect(
|
|
233
|
+
flow_id=1, node_id=1,
|
|
234
|
+
select_input=[
|
|
235
|
+
SelectInput(old_name='a'),
|
|
236
|
+
SelectInput(old_name='b'),
|
|
237
|
+
]
|
|
238
|
+
)
|
|
239
|
+
|
|
240
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('select', node))
|
|
241
|
+
|
|
242
|
+
inputs = data['nodes'][0]['setting_input']['select_input']
|
|
243
|
+
assert inputs[0].get('position') == 0
|
|
244
|
+
assert inputs[1].get('position') == 1
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class TestFilterNodeMigration:
|
|
248
|
+
"""Test NodeFilter migrations."""
|
|
249
|
+
|
|
250
|
+
def test_basic_filter(self, temp_dir):
|
|
251
|
+
"""Basic filter with single condition."""
|
|
252
|
+
node = NodeFilter(
|
|
253
|
+
flow_id=1, node_id=1,
|
|
254
|
+
filter_input=FilterInput(
|
|
255
|
+
filter_type='basic',
|
|
256
|
+
basic_filter=BasicFilter(
|
|
257
|
+
field='amount',
|
|
258
|
+
filter_type='>',
|
|
259
|
+
filter_value='100'
|
|
260
|
+
)
|
|
261
|
+
)
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('filter', node))
|
|
265
|
+
|
|
266
|
+
fi = data['nodes'][0]['setting_input']['filter_input']
|
|
267
|
+
assert fi['filter_type'] == 'basic'
|
|
268
|
+
assert fi['basic_filter']['field'] == 'amount'
|
|
269
|
+
assert fi['basic_filter']['filter_type'] == '>'
|
|
270
|
+
|
|
271
|
+
def test_advanced_filter(self, temp_dir):
|
|
272
|
+
"""Advanced filter with expression."""
|
|
273
|
+
node = NodeFilter(
|
|
274
|
+
flow_id=1, node_id=1,
|
|
275
|
+
filter_input=FilterInput(
|
|
276
|
+
filter_type='advanced',
|
|
277
|
+
advanced_filter='[amount] > 100 and [status] == "active"'
|
|
278
|
+
)
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('filter', node))
|
|
282
|
+
|
|
283
|
+
fi = data['nodes'][0]['setting_input']['filter_input']
|
|
284
|
+
assert fi['filter_type'] == 'advanced'
|
|
285
|
+
assert '[amount] > 100' in fi['advanced_filter']
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
class TestFormulaNodeMigration:
|
|
289
|
+
"""Test NodeFormula migrations."""
|
|
290
|
+
|
|
291
|
+
def test_formula_with_expression(self, temp_dir):
|
|
292
|
+
"""Formula creating new column."""
|
|
293
|
+
node = NodeFormula(
|
|
294
|
+
flow_id=1, node_id=1,
|
|
295
|
+
function=FunctionInput(
|
|
296
|
+
field=FieldInput(name='total', data_type='Float64'),
|
|
297
|
+
function='[price] * [quantity]'
|
|
298
|
+
)
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('formula', node))
|
|
302
|
+
|
|
303
|
+
func = data['nodes'][0]['setting_input']['function']
|
|
304
|
+
assert func['field']['name'] == 'total'
|
|
305
|
+
assert '[price] * [quantity]' in func['function']
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
class TestJoinNodeMigration:
|
|
309
|
+
"""Test NodeJoin migrations."""
|
|
310
|
+
|
|
311
|
+
def test_inner_join(self, temp_dir):
|
|
312
|
+
"""Inner join with single key."""
|
|
313
|
+
node = NodeJoin(
|
|
314
|
+
flow_id=1, node_id=1,
|
|
315
|
+
join_input=JoinInput(
|
|
316
|
+
join_mapping=[JoinMap(left_col='id', right_col='id')],
|
|
317
|
+
how='inner',
|
|
318
|
+
left_select=JoinInputs(renames=[SelectInput(old_name='id')]),
|
|
319
|
+
right_select=JoinInputs(renames=[SelectInput(old_name='value')]),
|
|
320
|
+
)
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('join', node))
|
|
324
|
+
|
|
325
|
+
ji = data['nodes'][0]['setting_input']['join_input']
|
|
326
|
+
assert ji['how'] == 'inner'
|
|
327
|
+
assert ji['join_mapping'][0]['left_col'] == 'id'
|
|
328
|
+
assert ji['join_mapping'][0]['right_col'] == 'id'
|
|
329
|
+
|
|
330
|
+
def test_left_join_multi_key(self, temp_dir):
|
|
331
|
+
"""Left join with multiple keys."""
|
|
332
|
+
node = NodeJoin(
|
|
333
|
+
flow_id=1, node_id=1,
|
|
334
|
+
join_input=JoinInput(
|
|
335
|
+
join_mapping=[
|
|
336
|
+
JoinMap(left_col='date', right_col='date'),
|
|
337
|
+
JoinMap(left_col='product_id', right_col='prod_id'),
|
|
338
|
+
],
|
|
339
|
+
how='left',
|
|
340
|
+
left_select=JoinInputs(renames=[]),
|
|
341
|
+
right_select=JoinInputs(renames=[]),
|
|
342
|
+
)
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('join', node))
|
|
346
|
+
|
|
347
|
+
ji = data['nodes'][0]['setting_input']['join_input']
|
|
348
|
+
assert ji['how'] == 'left'
|
|
349
|
+
assert len(ji['join_mapping']) == 2
|
|
350
|
+
assert ji['join_mapping'][1]['left_col'] == 'product_id'
|
|
351
|
+
assert ji['join_mapping'][1]['right_col'] == 'prod_id'
|
|
352
|
+
|
|
353
|
+
def test_join_with_none_selects(self, temp_dir):
|
|
354
|
+
"""Join with None left_select/right_select (old format) gets default empty renames."""
|
|
355
|
+
node = NodeJoin(
|
|
356
|
+
flow_id=1, node_id=1,
|
|
357
|
+
join_input=JoinInput(
|
|
358
|
+
join_mapping=[JoinMap(left_col='id', right_col='id')],
|
|
359
|
+
how='inner',
|
|
360
|
+
left_select=None,
|
|
361
|
+
right_select=None,
|
|
362
|
+
)
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('join', node))
|
|
366
|
+
|
|
367
|
+
ji = data['nodes'][0]['setting_input']['join_input']
|
|
368
|
+
assert ji['left_select'] == {'renames': []}
|
|
369
|
+
assert ji['right_select'] == {'renames': []}
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
class TestCrossJoinNodeMigration:
|
|
373
|
+
"""Test NodeCrossJoin migrations."""
|
|
374
|
+
|
|
375
|
+
def test_cross_join(self, temp_dir):
|
|
376
|
+
"""Cross join with selects."""
|
|
377
|
+
node = NodeCrossJoin(
|
|
378
|
+
flow_id=1, node_id=1,
|
|
379
|
+
cross_join_input=CrossJoinInput(
|
|
380
|
+
left_select=JoinInputs(renames=[SelectInput(old_name='a')]),
|
|
381
|
+
right_select=JoinInputs(renames=[SelectInput(old_name='b')]),
|
|
382
|
+
)
|
|
383
|
+
)
|
|
384
|
+
|
|
385
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('cross_join', node))
|
|
386
|
+
|
|
387
|
+
cji = data['nodes'][0]['setting_input']['cross_join_input']
|
|
388
|
+
assert 'left_select' in cji
|
|
389
|
+
assert 'right_select' in cji
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
class TestFuzzyMatchNodeMigration:
|
|
393
|
+
"""Test NodeFuzzyMatch migrations."""
|
|
394
|
+
|
|
395
|
+
def test_fuzzy_match(self, temp_dir):
|
|
396
|
+
"""Fuzzy match with threshold."""
|
|
397
|
+
node = NodeFuzzyMatch(
|
|
398
|
+
flow_id=1, node_id=1,
|
|
399
|
+
join_input=FuzzyMatchInput(
|
|
400
|
+
join_mapping=[
|
|
401
|
+
FuzzyMapping(
|
|
402
|
+
left_col='name',
|
|
403
|
+
right_col='company_name',
|
|
404
|
+
threshold_score=80,
|
|
405
|
+
fuzzy_type='levenshtein'
|
|
406
|
+
)
|
|
407
|
+
],
|
|
408
|
+
how='inner',
|
|
409
|
+
left_select=JoinInputs(renames=[]),
|
|
410
|
+
right_select=JoinInputs(renames=[]),
|
|
411
|
+
)
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('fuzzy_match', node))
|
|
415
|
+
|
|
416
|
+
ji = data['nodes'][0]['setting_input']['join_input']
|
|
417
|
+
assert ji['join_mapping'][0]['threshold_score'] == 80
|
|
418
|
+
assert ji['join_mapping'][0]['fuzzy_type'] == 'levenshtein'
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
class TestGroupByNodeMigration:
|
|
422
|
+
"""Test NodeGroupBy migrations."""
|
|
423
|
+
|
|
424
|
+
def test_groupby_with_aggregations(self, temp_dir):
|
|
425
|
+
"""Group by with multiple aggregations."""
|
|
426
|
+
node = NodeGroupBy(
|
|
427
|
+
flow_id=1, node_id=1,
|
|
428
|
+
groupby_input=GroupByInput(
|
|
429
|
+
agg_cols=[
|
|
430
|
+
AggColl(old_name='category', agg='groupby'),
|
|
431
|
+
AggColl(old_name='amount', agg='sum', new_name='total_amount'),
|
|
432
|
+
AggColl(old_name='amount', agg='mean', new_name='avg_amount'),
|
|
433
|
+
AggColl(old_name='id', agg='count', new_name='record_count'),
|
|
434
|
+
]
|
|
435
|
+
)
|
|
436
|
+
)
|
|
437
|
+
|
|
438
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('group_by', node))
|
|
439
|
+
|
|
440
|
+
agg_cols = data['nodes'][0]['setting_input']['groupby_input']['agg_cols']
|
|
441
|
+
assert len(agg_cols) == 4
|
|
442
|
+
assert agg_cols[0]['agg'] == 'groupby'
|
|
443
|
+
assert agg_cols[1]['new_name'] == 'total_amount'
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
class TestSortNodeMigration:
|
|
447
|
+
"""Test NodeSort migrations."""
|
|
448
|
+
|
|
449
|
+
def test_multi_column_sort(self, temp_dir):
|
|
450
|
+
"""Sort by multiple columns."""
|
|
451
|
+
node = NodeSort(
|
|
452
|
+
flow_id=1, node_id=1,
|
|
453
|
+
sort_input=[
|
|
454
|
+
SortByInput(column='date', how='desc'),
|
|
455
|
+
SortByInput(column='name', how='asc'),
|
|
456
|
+
]
|
|
457
|
+
)
|
|
458
|
+
|
|
459
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('sort', node))
|
|
460
|
+
|
|
461
|
+
sort_input = data['nodes'][0]['setting_input']['sort_input']
|
|
462
|
+
assert len(sort_input) == 2
|
|
463
|
+
assert sort_input[0]['column'] == 'date'
|
|
464
|
+
assert sort_input[0]['how'] == 'desc'
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
class TestUnionNodeMigration:
|
|
468
|
+
"""Test NodeUnion migrations."""
|
|
469
|
+
|
|
470
|
+
def test_union_relaxed(self, temp_dir):
|
|
471
|
+
"""Union with relaxed mode."""
|
|
472
|
+
node = NodeUnion(
|
|
473
|
+
flow_id=1, node_id=1,
|
|
474
|
+
union_input=UnionInput(mode='relaxed')
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('union', node))
|
|
478
|
+
|
|
479
|
+
assert data['nodes'][0]['setting_input']['union_input']['mode'] == 'relaxed'
|
|
480
|
+
|
|
481
|
+
def test_union_selective(self, temp_dir):
|
|
482
|
+
"""Union with selective mode."""
|
|
483
|
+
node = NodeUnion(
|
|
484
|
+
flow_id=1, node_id=1,
|
|
485
|
+
union_input=UnionInput(mode='selective')
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('union', node))
|
|
489
|
+
|
|
490
|
+
assert data['nodes'][0]['setting_input']['union_input']['mode'] == 'selective'
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
class TestUniqueNodeMigration:
|
|
494
|
+
"""Test NodeUnique migrations."""
|
|
495
|
+
|
|
496
|
+
def test_unique_first_strategy(self, temp_dir):
|
|
497
|
+
"""Unique with first strategy."""
|
|
498
|
+
node = NodeUnique(
|
|
499
|
+
flow_id=1, node_id=1,
|
|
500
|
+
unique_input=UniqueInput(
|
|
501
|
+
columns=['id', 'date'],
|
|
502
|
+
strategy='first'
|
|
503
|
+
)
|
|
504
|
+
)
|
|
505
|
+
|
|
506
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('unique', node))
|
|
507
|
+
|
|
508
|
+
ui = data['nodes'][0]['setting_input']['unique_input']
|
|
509
|
+
assert ui['columns'] == ['id', 'date']
|
|
510
|
+
assert ui['strategy'] == 'first'
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
class TestPivotNodeMigration:
|
|
514
|
+
"""Test NodePivot migrations."""
|
|
515
|
+
|
|
516
|
+
def test_pivot(self, temp_dir):
|
|
517
|
+
"""Pivot with aggregations."""
|
|
518
|
+
node = NodePivot(
|
|
519
|
+
flow_id=1, node_id=1,
|
|
520
|
+
pivot_input=PivotInput(
|
|
521
|
+
index_columns=['date'],
|
|
522
|
+
pivot_column='category',
|
|
523
|
+
value_col='amount',
|
|
524
|
+
aggregations=['sum', 'mean']
|
|
525
|
+
)
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('pivot', node))
|
|
529
|
+
|
|
530
|
+
pi = data['nodes'][0]['setting_input']['pivot_input']
|
|
531
|
+
assert pi['index_columns'] == ['date']
|
|
532
|
+
assert pi['pivot_column'] == 'category'
|
|
533
|
+
assert 'sum' in pi['aggregations']
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
class TestUnpivotNodeMigration:
|
|
537
|
+
"""Test NodeUnpivot migrations."""
|
|
538
|
+
|
|
539
|
+
def test_unpivot(self, temp_dir):
|
|
540
|
+
"""Unpivot with column selection."""
|
|
541
|
+
node = NodeUnpivot(
|
|
542
|
+
flow_id=1, node_id=1,
|
|
543
|
+
unpivot_input=UnpivotInput(
|
|
544
|
+
index_columns=['id', 'date'],
|
|
545
|
+
value_columns=['jan', 'feb', 'mar'],
|
|
546
|
+
data_type_selector_mode='column'
|
|
547
|
+
)
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('unpivot', node))
|
|
551
|
+
|
|
552
|
+
ui = data['nodes'][0]['setting_input']['unpivot_input']
|
|
553
|
+
assert ui['index_columns'] == ['id', 'date']
|
|
554
|
+
assert ui['value_columns'] == ['jan', 'feb', 'mar']
|
|
555
|
+
|
|
556
|
+
|
|
557
|
+
class TestRecordIdNodeMigration:
|
|
558
|
+
"""Test NodeRecordId migrations."""
|
|
559
|
+
|
|
560
|
+
def test_record_id(self, temp_dir):
|
|
561
|
+
"""Record ID with offset."""
|
|
562
|
+
node = NodeRecordId(
|
|
563
|
+
flow_id=1, node_id=1,
|
|
564
|
+
record_id_input=RecordIdInput(
|
|
565
|
+
output_column_name='row_number',
|
|
566
|
+
offset=1,
|
|
567
|
+
group_by=True,
|
|
568
|
+
group_by_columns=['category']
|
|
569
|
+
)
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('record_id', node))
|
|
573
|
+
|
|
574
|
+
ri = data['nodes'][0]['setting_input']['record_id_input']
|
|
575
|
+
assert ri['output_column_name'] == 'row_number'
|
|
576
|
+
assert ri['offset'] == 1
|
|
577
|
+
assert ri['group_by'] == True
|
|
578
|
+
assert ri['group_by_columns'] == ['category']
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
class TestTextToRowsNodeMigration:
|
|
582
|
+
"""Test NodeTextToRows migrations."""
|
|
583
|
+
|
|
584
|
+
def test_text_to_rows(self, temp_dir):
|
|
585
|
+
"""Text to rows with delimiter."""
|
|
586
|
+
node = NodeTextToRows(
|
|
587
|
+
flow_id=1, node_id=1,
|
|
588
|
+
text_to_rows_input=TextToRowsInput(
|
|
589
|
+
column_to_split='tags',
|
|
590
|
+
output_column_name='tag',
|
|
591
|
+
split_by_fixed_value=True,
|
|
592
|
+
split_fixed_value=','
|
|
593
|
+
)
|
|
594
|
+
)
|
|
595
|
+
|
|
596
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('text_to_rows', node))
|
|
597
|
+
|
|
598
|
+
ti = data['nodes'][0]['setting_input']['text_to_rows_input']
|
|
599
|
+
assert ti['column_to_split'] == 'tags'
|
|
600
|
+
assert ti['split_fixed_value'] == ','
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
class TestGraphSolverNodeMigration:
|
|
604
|
+
"""Test NodeGraphSolver migrations."""
|
|
605
|
+
|
|
606
|
+
def test_graph_solver(self, temp_dir):
|
|
607
|
+
"""Graph solver for connected components."""
|
|
608
|
+
node = NodeGraphSolver(
|
|
609
|
+
flow_id=1, node_id=1,
|
|
610
|
+
graph_solver_input=GraphSolverInput(
|
|
611
|
+
col_from='source_id',
|
|
612
|
+
col_to='target_id',
|
|
613
|
+
output_column_name='component_id'
|
|
614
|
+
)
|
|
615
|
+
)
|
|
616
|
+
|
|
617
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('graph_solver', node))
|
|
618
|
+
|
|
619
|
+
gi = data['nodes'][0]['setting_input']['graph_solver_input']
|
|
620
|
+
assert gi['col_from'] == 'source_id'
|
|
621
|
+
assert gi['col_to'] == 'target_id'
|
|
622
|
+
assert gi['output_column_name'] == 'component_id'
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
class TestPolarsCodeNodeMigration:
|
|
626
|
+
"""Test NodePolarsCode migrations."""
|
|
627
|
+
|
|
628
|
+
def test_polars_code(self, temp_dir):
|
|
629
|
+
"""Polars code with multi-line script."""
|
|
630
|
+
code = '''# Transform data
|
|
631
|
+
output_df = input_df.with_columns([
|
|
632
|
+
pl.col("amount") * 2,
|
|
633
|
+
pl.col("name").str.to_uppercase()
|
|
634
|
+
])
|
|
635
|
+
'''
|
|
636
|
+
node = NodePolarsCode(
|
|
637
|
+
flow_id=1, node_id=1,
|
|
638
|
+
polars_code_input=PolarsCodeInput(polars_code=code),
|
|
639
|
+
depending_on_ids=[0]
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
data = pickle_and_migrate(temp_dir, create_flow_with_node('polars_code', node))
|
|
643
|
+
|
|
644
|
+
pci = data['nodes'][0]['setting_input']['polars_code_input']
|
|
645
|
+
assert 'output_df' in pci['polars_code']
|
|
646
|
+
assert 'to_uppercase' in pci['polars_code']
|
|
647
|
+
|
|
648
|
+
|
|
649
|
+
# =============================================================================
|
|
650
|
+
# COMPLEX FLOW TESTS
|
|
651
|
+
# =============================================================================
|
|
652
|
+
|
|
653
|
+
class TestComplexFlowMigration:
|
|
654
|
+
"""Test migration of flows with multiple connected nodes."""
|
|
655
|
+
|
|
656
|
+
def test_pipeline_flow(self, temp_dir):
|
|
657
|
+
"""Test a complete pipeline: read -> select -> filter -> output."""
|
|
658
|
+
flow = FlowInformation(
|
|
659
|
+
flow_id=1,
|
|
660
|
+
flow_name='pipeline_flow',
|
|
661
|
+
flow_settings=FlowSettings(
|
|
662
|
+
flow_id=1,
|
|
663
|
+
name='pipeline_flow',
|
|
664
|
+
description='Test pipeline'
|
|
665
|
+
),
|
|
666
|
+
data={
|
|
667
|
+
1: NodeInformation(
|
|
668
|
+
id=1, type='read',
|
|
669
|
+
setting_input=NodeRead(
|
|
670
|
+
flow_id=1, node_id=1,
|
|
671
|
+
received_file=ReceivedTable(
|
|
672
|
+
name='input.csv', path='/data/input.csv',
|
|
673
|
+
file_type='csv', delimiter=','
|
|
674
|
+
)
|
|
675
|
+
)
|
|
676
|
+
),
|
|
677
|
+
2: NodeInformation(
|
|
678
|
+
id=2, type='select',
|
|
679
|
+
setting_input=NodeSelect(
|
|
680
|
+
flow_id=1, node_id=2,
|
|
681
|
+
select_input=[SelectInput(old_name='a', new_name='col_a')]
|
|
682
|
+
)
|
|
683
|
+
),
|
|
684
|
+
3: NodeInformation(
|
|
685
|
+
id=3, type='filter',
|
|
686
|
+
setting_input=NodeFilter(
|
|
687
|
+
flow_id=1, node_id=3,
|
|
688
|
+
filter_input=FilterInput(
|
|
689
|
+
filter_type='advanced',
|
|
690
|
+
advanced_filter='[col_a] > 0'
|
|
691
|
+
)
|
|
692
|
+
)
|
|
693
|
+
),
|
|
694
|
+
4: NodeInformation(
|
|
695
|
+
id=4, type='output',
|
|
696
|
+
setting_input=NodeOutput(
|
|
697
|
+
flow_id=1, node_id=4,
|
|
698
|
+
output_settings=OutputSettings(
|
|
699
|
+
name='output.csv', directory='/out',
|
|
700
|
+
file_type='csv',
|
|
701
|
+
output_csv_table=OutputCsvTable(delimiter=';')
|
|
702
|
+
)
|
|
703
|
+
)
|
|
704
|
+
),
|
|
705
|
+
},
|
|
706
|
+
node_starts=[1],
|
|
707
|
+
node_connections=[(1, 2), (2, 3), (3, 4)],
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
data = pickle_and_migrate(temp_dir, flow)
|
|
711
|
+
|
|
712
|
+
# Verify structure (FlowfileData format)
|
|
713
|
+
assert data['flowfile_name'] == 'pipeline_flow'
|
|
714
|
+
assert len(data['nodes']) == 4
|
|
715
|
+
|
|
716
|
+
# Verify each node migrated correctly
|
|
717
|
+
read_node = next(n for n in data['nodes'] if n['type'] == 'read')
|
|
718
|
+
assert 'table_settings' in read_node['setting_input']['received_file']
|
|
719
|
+
|
|
720
|
+
output_node = next(n for n in data['nodes'] if n['type'] == 'output')
|
|
721
|
+
assert 'table_settings' in output_node['setting_input']['output_settings']
|
|
722
|
+
assert output_node['setting_input']['output_settings']['table_settings']['delimiter'] == ';'
|
|
723
|
+
|
|
724
|
+
def test_join_flow(self, temp_dir):
|
|
725
|
+
"""Test flow with join: two inputs -> join -> output."""
|
|
726
|
+
flow = FlowInformation(
|
|
727
|
+
flow_id=1,
|
|
728
|
+
flow_name='join_flow',
|
|
729
|
+
flow_settings=FlowSettings(flow_id=1, name='join_flow'),
|
|
730
|
+
data={
|
|
731
|
+
1: NodeInformation(
|
|
732
|
+
id=1, type='read',
|
|
733
|
+
setting_input=NodeRead(
|
|
734
|
+
flow_id=1, node_id=1,
|
|
735
|
+
received_file=ReceivedTable(
|
|
736
|
+
name='left.csv', path='/data/left.csv', file_type='csv'
|
|
737
|
+
)
|
|
738
|
+
)
|
|
739
|
+
),
|
|
740
|
+
2: NodeInformation(
|
|
741
|
+
id=2, type='read',
|
|
742
|
+
setting_input=NodeRead(
|
|
743
|
+
flow_id=1, node_id=2,
|
|
744
|
+
received_file=ReceivedTable(
|
|
745
|
+
name='right.csv', path='/data/right.csv', file_type='csv'
|
|
746
|
+
)
|
|
747
|
+
)
|
|
748
|
+
),
|
|
749
|
+
3: NodeInformation(
|
|
750
|
+
id=3, type='join',
|
|
751
|
+
left_input_id=1, right_input_id=2,
|
|
752
|
+
setting_input=NodeJoin(
|
|
753
|
+
flow_id=1, node_id=3,
|
|
754
|
+
join_input=JoinInput(
|
|
755
|
+
join_mapping=[JoinMap(left_col='id', right_col='id')],
|
|
756
|
+
how='left',
|
|
757
|
+
left_select=None,
|
|
758
|
+
right_select=None,
|
|
759
|
+
)
|
|
760
|
+
)
|
|
761
|
+
),
|
|
762
|
+
},
|
|
763
|
+
node_starts=[1, 2],
|
|
764
|
+
node_connections=[(1, 3), (2, 3)],
|
|
765
|
+
)
|
|
766
|
+
|
|
767
|
+
data = pickle_and_migrate(temp_dir, flow)
|
|
768
|
+
|
|
769
|
+
assert len(data['nodes']) == 3
|
|
770
|
+
|
|
771
|
+
# Verify start nodes are marked
|
|
772
|
+
start_nodes = [n for n in data['nodes'] if n.get('is_start_node')]
|
|
773
|
+
assert len(start_nodes) == 2
|
|
774
|
+
|
|
775
|
+
join_node = next(n for n in data['nodes'] if n['type'] == 'join')
|
|
776
|
+
assert join_node['setting_input']['join_input']['how'] == 'left'
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
# =============================================================================
|
|
780
|
+
# YAML OUTPUT TESTS
|
|
781
|
+
# =============================================================================
|
|
782
|
+
|
|
783
|
+
class TestYamlMigration:
|
|
784
|
+
"""Test YAML format output."""
|
|
785
|
+
|
|
786
|
+
def test_yaml_format(self, temp_dir):
|
|
787
|
+
"""Verify YAML output is valid and readable."""
|
|
788
|
+
yaml = pytest.importorskip('yaml')
|
|
789
|
+
|
|
790
|
+
node = NodeSelect(
|
|
791
|
+
flow_id=1, node_id=1,
|
|
792
|
+
select_input=[SelectInput(old_name='test')]
|
|
793
|
+
)
|
|
794
|
+
|
|
795
|
+
flow = create_flow_with_node('select', node)
|
|
796
|
+
|
|
797
|
+
pickle_path = temp_dir / 'test.flowfile'
|
|
798
|
+
with open(pickle_path, 'wb') as f:
|
|
799
|
+
pickle.dump(flow, f)
|
|
800
|
+
|
|
801
|
+
output_path = migrate_flowfile(pickle_path, format='yaml')
|
|
802
|
+
|
|
803
|
+
with open(output_path) as f:
|
|
804
|
+
data = yaml.safe_load(f)
|
|
805
|
+
|
|
806
|
+
# Verify FlowfileData format
|
|
807
|
+
assert data['flowfile_version'] == '2.0'
|
|
808
|
+
assert data['flowfile_id'] == 1
|
|
809
|
+
assert len(data['nodes']) == 1
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
if __name__ == '__main__':
|
|
813
|
+
pytest.main([__file__, '-v'])
|