Flowfile 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. flowfile/__init__.py +3 -1
  2. flowfile/api.py +1 -2
  3. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-d5b1b6c9.js} +6 -6
  5. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-00d87aad.js} +6 -6
  6. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-4685e75d.js} +1 -1
  7. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-23e909da.js} +1 -1
  8. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-70ae0c79.js} +1 -1
  9. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-f149cf7c.js} +1 -1
  10. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-702a3edd.js} +7 -7
  11. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-b1519993.js} +11 -11
  12. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-6f3e4ea5.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseManager-cf5ef661.js} +2 -2
  14. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-d38c7295.js} +9 -9
  15. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-b04ef46a.js} +8 -8
  16. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-5fa10ed8.js} +5 -5
  17. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-d39af878.js} +5 -5
  18. flowfile/web/static/assets/{Filter-812dcbca.js → Filter-9b6d08db.js} +7 -7
  19. flowfile/web/static/assets/{Formula-71472193.js → Formula-6b04fb1d.js} +7 -7
  20. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-999521f4.js} +8 -8
  21. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-17dd2198.js} +6 -6
  22. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-6b039e18.js} +5 -5
  23. flowfile/web/static/assets/{Join-a1b800be.js → Join-24d0f113.js} +8 -8
  24. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-34639209.js} +4 -4
  25. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-0e8724a3.js} +2 -2
  26. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js} +1 -1
  27. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-3d63a470.js} +2 -2
  28. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js} +1 -1
  29. flowfile/web/static/assets/{Output-ddc9079f.css → Output-283fe388.css} +5 -5
  30. flowfile/web/static/assets/{Output-76750610.js → Output-edea9802.js} +57 -38
  31. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-61d19301.js} +7 -7
  32. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-de9f43fe.js} +1 -1
  33. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-f97fec5b.js} +1 -1
  34. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-bc3c9984.js} +5 -5
  35. flowfile/web/static/assets/{Read-637b72a7.js → Read-64a3f259.js} +80 -105
  36. flowfile/web/static/assets/{Read-6b17491f.css → Read-e808b239.css} +10 -10
  37. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-3d5039be.js} +4 -4
  38. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-597510e0.js} +6 -6
  39. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-df51adbe.js} +1 -1
  40. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-4be0a507.js} +4 -4
  41. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretManager-4839be57.js} +2 -2
  42. flowfile/web/static/assets/{Select-850215fd.js → Select-9b72f201.js} +7 -7
  43. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-7ded385d.js} +1 -1
  44. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-e1e9c953.js} +1 -1
  45. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-f0f75a42.js} +1 -1
  46. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-6c777aac.js} +2 -2
  47. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js} +1 -1
  48. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-7cb93e62.js} +1 -1
  49. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-6cbde21a.js} +5 -5
  50. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-d9a40c11.js} +2 -2
  51. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-5896c375.js} +1 -1
  52. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-c4fcbf4d.js} +7 -7
  53. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-4ef91d19.js} +2 -2
  54. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js} +1 -1
  55. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-a03f512c.js} +2 -2
  56. flowfile/web/static/assets/{Union-b563478a.js → Union-bfe9b996.js} +4 -4
  57. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-5d023a27.js} +8 -20
  58. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-91cc5354.js} +6 -6
  59. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-7ee2de44.js} +1 -1
  60. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-e51b9924.js} +1 -1
  61. flowfile/web/static/assets/{api-2d6adc4f.js → api-c1bad5ca.js} +1 -1
  62. flowfile/web/static/assets/{api-4c8e3822.js → api-cf1221f0.js} +1 -1
  63. flowfile/web/static/assets/{designer-e3c150ec.css → designer-8da3ba3a.css} +90 -67
  64. flowfile/web/static/assets/{designer-f3656d8c.js → designer-9633482a.js} +119 -51
  65. flowfile/web/static/assets/{documentation-52b241e7.js → documentation-ca400224.js} +1 -1
  66. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-614b998d.js} +1 -1
  67. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-f7971590.js} +2 -2
  68. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-4fe5f36b.js} +3 -3
  69. flowfile/web/static/assets/{index-246f201c.js → index-5429bbf8.js} +6 -8
  70. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  71. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-076b85ab.js} +1 -1
  72. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-0fd17dbe.js} +1 -1
  73. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-b61e0847.js} +1 -1
  74. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-a8bb8b61.js} +21 -20
  75. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-c767cb37.css} +13 -13
  76. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-67b4aee0.js} +10 -12
  77. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-806d2826.css} +12 -12
  78. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-48c81530.css} +3 -3
  79. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-92ce1dbc.js} +4 -7
  80. flowfile/web/static/assets/{secretApi-538058f3.js → secretApi-68435402.js} +1 -1
  81. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-92e25ee3.js} +3 -3
  82. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-41b0e0d7.js} +7 -4
  83. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-2c8e608f.js} +1 -1
  84. flowfile/web/static/index.html +1 -1
  85. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/METADATA +3 -2
  86. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/RECORD +138 -126
  87. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  88. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  89. flowfile_core/__init__.py +3 -0
  90. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  91. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  92. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  93. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  94. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  95. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  96. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  97. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +184 -78
  98. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  99. flowfile_core/flowfile/flow_graph.py +129 -26
  100. flowfile_core/flowfile/flow_node/flow_node.py +3 -0
  101. flowfile_core/flowfile/flow_node/models.py +2 -1
  102. flowfile_core/flowfile/handler.py +5 -5
  103. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  104. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  105. flowfile_core/flowfile/node_designer/__init__.py +1 -1
  106. flowfile_core/flowfile/node_designer/_type_registry.py +2 -2
  107. flowfile_core/flowfile/node_designer/custom_node.py +1 -1
  108. flowfile_core/flowfile/node_designer/ui_components.py +1 -1
  109. flowfile_core/flowfile/schema_callbacks.py +8 -5
  110. flowfile_core/flowfile/setting_generator/settings.py +15 -9
  111. flowfile_core/routes/routes.py +8 -10
  112. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  113. flowfile_core/schemas/input_schema.py +222 -65
  114. flowfile_core/schemas/output_model.py +1 -1
  115. flowfile_core/schemas/schemas.py +145 -32
  116. flowfile_core/schemas/transform_schema.py +1083 -413
  117. flowfile_core/schemas/yaml_types.py +103 -0
  118. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +11 -1
  119. flowfile_frame/__init__.py +3 -1
  120. flowfile_frame/flow_frame.py +15 -18
  121. flowfile_frame/flow_frame_methods.py +12 -9
  122. flowfile_worker/__init__.py +3 -0
  123. flowfile_worker/create/__init__.py +3 -21
  124. flowfile_worker/create/funcs.py +68 -56
  125. flowfile_worker/create/models.py +130 -62
  126. flowfile_worker/routes.py +5 -8
  127. tools/migrate/README.md +56 -0
  128. tools/migrate/__init__.py +12 -0
  129. tools/migrate/__main__.py +131 -0
  130. tools/migrate/legacy_schemas.py +621 -0
  131. tools/migrate/migrate.py +598 -0
  132. tools/migrate/tests/__init__.py +0 -0
  133. tools/migrate/tests/conftest.py +23 -0
  134. tools/migrate/tests/test_migrate.py +627 -0
  135. tools/migrate/tests/test_migration_e2e.py +1010 -0
  136. tools/migrate/tests/test_node_migrations.py +813 -0
  137. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  138. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/licenses/LICENSE +0 -0
  139. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -0,0 +1,598 @@
1
+ """
2
+ Migration logic for converting old flowfile pickles to new YAML format.
3
+ """
4
+
5
+ import pickle
6
+ from dataclasses import fields, is_dataclass, asdict
7
+ from pathlib import Path
8
+ from typing import Any, Dict, List, Optional
9
+ import sys
10
+
11
+ try:
12
+ import yaml
13
+ except ImportError:
14
+ yaml = None
15
+
16
+ from tools.migrate.legacy_schemas import LEGACY_CLASS_MAP
17
+
18
+
19
+ class LegacyUnpickler(pickle.Unpickler):
20
+ """
21
+ Custom unpickler that redirects class lookups to legacy dataclass definitions.
22
+
23
+ ONLY intercepts classes from transform_schema.py that changed from @dataclass to BaseModel.
24
+ All other classes (schemas.py, input_schema.py) were already Pydantic and load normally.
25
+ """
26
+
27
+ # ONLY these classes changed from @dataclass to BaseModel
28
+ # These are all from flowfile_core/schemas/transform_schema.py
29
+ DATACLASS_TO_PYDANTIC = {
30
+ 'SelectInput',
31
+ 'FieldInput',
32
+ 'FunctionInput',
33
+ 'BasicFilter',
34
+ 'FilterInput',
35
+ 'SelectInputs',
36
+ 'JoinInputs',
37
+ 'JoinMap',
38
+ 'CrossJoinInput',
39
+ 'JoinInput',
40
+ 'FuzzyMatchInput',
41
+ 'AggColl',
42
+ 'GroupByInput',
43
+ 'PivotInput',
44
+ 'SortByInput',
45
+ 'RecordIdInput',
46
+ 'TextToRowsInput',
47
+ 'UnpivotInput',
48
+ 'UnionInput',
49
+ 'UniqueInput',
50
+ 'GraphSolverInput',
51
+ 'PolarsCodeInput',
52
+ }
53
+
54
+ def find_class(self, module: str, name: str):
55
+ """Override to redirect ONLY transform_schema dataclasses to legacy definitions."""
56
+ # Only intercept classes that changed from dataclass to Pydantic
57
+ if name in self.DATACLASS_TO_PYDANTIC and name in LEGACY_CLASS_MAP:
58
+ return LEGACY_CLASS_MAP[name]
59
+
60
+ # Everything else (schemas.py, input_schema.py) loads with actual Pydantic classes
61
+ return super().find_class(module, name)
62
+
63
+
64
+ def load_legacy_flowfile(path: Path) -> Any:
65
+ """
66
+ Load an old flowfile using legacy class definitions.
67
+
68
+ Args:
69
+ path: Path to the .flowfile pickle
70
+
71
+ Returns:
72
+ The deserialized FlowInformation object (as legacy dataclass)
73
+ """
74
+ with open(path, 'rb') as f:
75
+ return LegacyUnpickler(f).load()
76
+
77
+
78
+ def convert_to_dict(obj: Any, _seen: set = None) -> Any:
79
+ """
80
+ Recursively convert dataclasses, Pydantic models, and complex objects to plain dicts.
81
+
82
+ Handles:
83
+ - Pydantic BaseModel instances (via model_dump)
84
+ - Dataclasses (via asdict or manual conversion)
85
+ - Lists, dicts, tuples
86
+ - Primitive types
87
+
88
+ Args:
89
+ obj: Object to convert
90
+ _seen: Set of seen object IDs (for cycle detection)
91
+
92
+ Returns:
93
+ Plain dict/list/primitive representation
94
+ """
95
+ if _seen is None:
96
+ _seen = set()
97
+
98
+ # Handle None
99
+ if obj is None:
100
+ return None
101
+
102
+ # Handle primitives
103
+ if isinstance(obj, (str, int, float, bool)):
104
+ return obj
105
+
106
+ # Cycle detection
107
+ obj_id = id(obj)
108
+ if obj_id in _seen:
109
+ return f"<circular reference to {type(obj).__name__}>"
110
+ _seen.add(obj_id)
111
+
112
+ try:
113
+ # Handle Pydantic models FIRST (check for model_dump method)
114
+ if hasattr(obj, 'model_dump') and callable(obj.model_dump):
115
+ try:
116
+ data = obj.model_dump()
117
+ # Recursively convert any nested structures
118
+ return convert_to_dict(data, _seen)
119
+ except Exception:
120
+ # Fall through to other methods if model_dump fails
121
+ pass
122
+
123
+ # Handle dataclasses
124
+ if is_dataclass(obj) and not isinstance(obj, type):
125
+ try:
126
+ # Try asdict first (handles nested dataclasses)
127
+ return asdict(obj)
128
+ except Exception:
129
+ # Fall back to manual conversion
130
+ result = {}
131
+ for f in fields(obj):
132
+ value = getattr(obj, f.name, None)
133
+ result[f.name] = convert_to_dict(value, _seen)
134
+ return result
135
+
136
+ # Handle dicts
137
+ if isinstance(obj, dict):
138
+ return {k: convert_to_dict(v, _seen) for k, v in obj.items()}
139
+
140
+ # Handle lists and tuples - convert both to lists for clean YAML
141
+ if isinstance(obj, (list, tuple)):
142
+ return [convert_to_dict(item, _seen) for item in obj]
143
+
144
+ # Handle sets
145
+ if isinstance(obj, set):
146
+ return [convert_to_dict(item, _seen) for item in obj]
147
+
148
+ # Handle Path objects
149
+ if isinstance(obj, Path):
150
+ return str(obj)
151
+
152
+ # Handle objects with __dict__ (generic fallback)
153
+ if hasattr(obj, '__dict__'):
154
+ return {k: convert_to_dict(v, _seen) for k, v in obj.__dict__.items()
155
+ if not k.startswith('_')}
156
+
157
+ # Fallback: try to convert to string
158
+ return str(obj)
159
+
160
+ finally:
161
+ _seen.discard(obj_id)
162
+
163
+
164
+ def transform_to_new_schema(data: Dict) -> Dict:
165
+ """
166
+ Transform the legacy schema structure to the new FlowfileData format.
167
+
168
+ This handles:
169
+ - ReceivedTable: flat fields -> nested table_settings
170
+ - OutputSettings: separate table fields -> unified table_settings
171
+ - Field name changes (flow_id -> flowfile_id, etc.)
172
+
173
+ Args:
174
+ data: Dict representation of legacy FlowInformation
175
+
176
+ Returns:
177
+ Transformed dict ready for YAML serialization (FlowfileData format)
178
+ """
179
+ node_starts = set(data.get('node_starts', []))
180
+
181
+ result = {
182
+ 'flowfile_version': '2.0',
183
+ 'flowfile_id': data.get('flow_id', 1),
184
+ 'flowfile_name': data.get('flow_name', ''),
185
+ 'flowfile_settings': _transform_flow_settings(data.get('flow_settings', {})),
186
+ 'nodes': _transform_nodes(data.get('data', {}), node_starts),
187
+ }
188
+
189
+ return result
190
+
191
+
192
+ def _transform_flow_settings(settings: Dict) -> Dict:
193
+ """Transform flow settings to FlowfileSettings format."""
194
+ if not settings:
195
+ return {
196
+ 'execution_mode': 'Development',
197
+ 'execution_location': 'local',
198
+ 'auto_save': False,
199
+ 'show_detailed_progress': True,
200
+ }
201
+
202
+ return {
203
+ 'description': settings.get('description'),
204
+ 'execution_mode': settings.get('execution_mode', 'Development'),
205
+ 'execution_location': settings.get('execution_location', 'local'),
206
+ 'auto_save': settings.get('auto_save', False),
207
+ 'show_detailed_progress': settings.get('show_detailed_progress', True),
208
+ }
209
+
210
+
211
+ def _transform_nodes(nodes_data: Dict, node_starts: set) -> List[Dict]:
212
+ """Transform nodes dict to FlowfileNode list format."""
213
+ nodes = []
214
+
215
+ for node_id, node_info in nodes_data.items():
216
+ if not isinstance(node_info, dict):
217
+ node_info = convert_to_dict(node_info)
218
+
219
+ actual_node_id = node_info.get('id', node_id)
220
+
221
+ node = {
222
+ 'id': actual_node_id,
223
+ 'type': node_info.get('type', ''),
224
+ 'is_start_node': actual_node_id in node_starts,
225
+ 'description': node_info.get('description', ''),
226
+ 'x_position': int(node_info.get('x_position', 0) or 0),
227
+ 'y_position': int(node_info.get('y_position', 0) or 0),
228
+ 'left_input_id': node_info.get('left_input_id'),
229
+ 'right_input_id': node_info.get('right_input_id'),
230
+ 'input_ids': node_info.get('input_ids', []),
231
+ 'outputs': node_info.get('outputs', []),
232
+ }
233
+
234
+ # Transform settings based on node type
235
+ setting_input = node_info.get('setting_input', {})
236
+ if setting_input:
237
+ if not isinstance(setting_input, dict):
238
+ setting_input = convert_to_dict(setting_input)
239
+ node['setting_input'] = _transform_node_settings(node['type'], setting_input)
240
+
241
+ nodes.append(node)
242
+
243
+ return nodes
244
+
245
+
246
+ def _transform_node_settings(node_type: str, settings: Dict) -> Dict:
247
+ """Transform node-specific settings to new format.
248
+
249
+ Handles structural changes for various node types:
250
+ - read: ReceivedTable flat → nested table_settings
251
+ - output: OutputSettings separate tables → unified table_settings
252
+ - polars_code: PolarsCodeInput extraction
253
+ - select: Ensure sorted_by field exists
254
+ - join/fuzzy_match: Handle JoinInput/FuzzyMatchInput changes
255
+ """
256
+ # Remove common fields that are stored elsewhere
257
+ settings = {k: v for k, v in settings.items()
258
+ if k not in ('flow_id', 'node_id', 'pos_x', 'pos_y', 'is_setup',
259
+ 'description', 'cache_results', 'user_id', 'is_flow_output',
260
+ 'is_user_defined')}
261
+
262
+ # Handle specific node types
263
+ if node_type == 'read':
264
+ return _transform_read_settings(settings)
265
+ elif node_type == 'output':
266
+ return _transform_output_settings(settings)
267
+ elif node_type == 'polars_code':
268
+ return _transform_polars_code_settings(settings)
269
+ elif node_type == 'select':
270
+ return _transform_select_settings(settings)
271
+ elif node_type in ('join', 'fuzzy_match', 'cross_join'):
272
+ return _transform_join_settings(settings)
273
+
274
+ return settings
275
+
276
+
277
+ def _transform_select_settings(settings: Dict) -> Dict:
278
+ """Transform NodeSelect settings - ensure all fields exist."""
279
+ # Ensure sorted_by field exists (added in new version)
280
+ if 'sorted_by' not in settings:
281
+ settings['sorted_by'] = 'none'
282
+
283
+ # Ensure select_input items have position field
284
+ select_input = settings.get('select_input', [])
285
+ if isinstance(select_input, list):
286
+ for i, item in enumerate(select_input):
287
+ if isinstance(item, dict) and item.get('position') is None:
288
+ item['position'] = i
289
+
290
+ return settings
291
+
292
+
293
+ def _transform_join_settings(settings: Dict) -> Dict:
294
+ """Transform join-related node settings.
295
+
296
+ Handles migration of old JoinInput where left_select/right_select could be None.
297
+ New schema requires these to be JoinInputs with renames list.
298
+ """
299
+ # Handle join_input transformation
300
+ join_input = settings.get('join_input') or settings.get('cross_join_input')
301
+ if join_input and isinstance(join_input, dict):
302
+ # ADD DEFAULT EMPTY JoinInputs IF MISSING (required in new schema)
303
+ for side in ['left_select', 'right_select']:
304
+ if join_input.get(side) is None:
305
+ join_input[side] = {'renames': []}
306
+
307
+ select = join_input.get(side)
308
+ if select and isinstance(select, dict):
309
+ # Ensure renames key exists
310
+ if 'renames' not in select:
311
+ select['renames'] = []
312
+
313
+ renames = select.get('renames', [])
314
+ if isinstance(renames, list):
315
+ for i, item in enumerate(renames):
316
+ if isinstance(item, dict) and item.get('position') is None:
317
+ item['position'] = i
318
+
319
+ return settings
320
+
321
+
322
+ def _transform_read_settings(settings: Dict) -> Dict:
323
+ """Transform NodeRead settings - extract table_settings from old flat structure.
324
+
325
+ OLD structure (flat):
326
+ received_file:
327
+ file_type: csv
328
+ delimiter: ","
329
+ encoding: "utf-8"
330
+ sheet_name: null # Excel fields mixed in
331
+ ...
332
+
333
+ NEW structure (nested):
334
+ received_file:
335
+ file_type: csv
336
+ table_settings:
337
+ file_type: csv
338
+ delimiter: ","
339
+ encoding: "utf-8"
340
+ """
341
+ received_file = settings.get('received_file', {})
342
+ if not received_file:
343
+ return settings
344
+
345
+ # Check if already transformed (has table_settings)
346
+ if 'table_settings' in received_file and isinstance(received_file['table_settings'], dict):
347
+ return settings
348
+
349
+ file_type = received_file.get('file_type', 'csv')
350
+
351
+ # Build table_settings based on file_type, extracting from flat structure
352
+ if file_type == 'csv':
353
+ table_settings = {
354
+ 'file_type': 'csv',
355
+ 'reference': received_file.get('reference', ''),
356
+ 'starting_from_line': received_file.get('starting_from_line', 0),
357
+ 'delimiter': received_file.get('delimiter', ','),
358
+ 'has_headers': received_file.get('has_headers', True),
359
+ 'encoding': received_file.get('encoding', 'utf-8') or 'utf-8',
360
+ 'parquet_ref': received_file.get('parquet_ref'),
361
+ 'row_delimiter': received_file.get('row_delimiter', '\n'),
362
+ 'quote_char': received_file.get('quote_char', '"'),
363
+ 'infer_schema_length': received_file.get('infer_schema_length', 10000),
364
+ 'truncate_ragged_lines': received_file.get('truncate_ragged_lines', False),
365
+ 'ignore_errors': received_file.get('ignore_errors', False),
366
+ }
367
+ elif file_type == 'json':
368
+ table_settings = {
369
+ 'file_type': 'json',
370
+ 'reference': received_file.get('reference', ''),
371
+ 'starting_from_line': received_file.get('starting_from_line', 0),
372
+ 'delimiter': received_file.get('delimiter', ','),
373
+ 'has_headers': received_file.get('has_headers', True),
374
+ 'encoding': received_file.get('encoding', 'utf-8') or 'utf-8',
375
+ 'parquet_ref': received_file.get('parquet_ref'),
376
+ 'row_delimiter': received_file.get('row_delimiter', '\n'),
377
+ 'quote_char': received_file.get('quote_char', '"'),
378
+ 'infer_schema_length': received_file.get('infer_schema_length', 10000),
379
+ 'truncate_ragged_lines': received_file.get('truncate_ragged_lines', False),
380
+ 'ignore_errors': received_file.get('ignore_errors', False),
381
+ }
382
+ elif file_type == 'excel':
383
+ table_settings = {
384
+ 'file_type': 'excel',
385
+ 'sheet_name': received_file.get('sheet_name'),
386
+ 'start_row': received_file.get('start_row', 0),
387
+ 'start_column': received_file.get('start_column', 0),
388
+ 'end_row': received_file.get('end_row', 0),
389
+ 'end_column': received_file.get('end_column', 0),
390
+ 'has_headers': received_file.get('has_headers', True),
391
+ 'type_inference': received_file.get('type_inference', False),
392
+ }
393
+ elif file_type == 'parquet':
394
+ table_settings = {'file_type': 'parquet'}
395
+ else:
396
+ # Unknown file type - try to preserve what we can
397
+ table_settings = {'file_type': file_type or 'csv'}
398
+
399
+ # Build new structure with metadata + nested table_settings
400
+ return {
401
+ 'received_file': {
402
+ # Metadata fields (preserved from old structure)
403
+ 'id': received_file.get('id'),
404
+ 'name': received_file.get('name'),
405
+ 'path': received_file.get('path', ''),
406
+ 'directory': received_file.get('directory'),
407
+ 'analysis_file_available': received_file.get('analysis_file_available', False),
408
+ 'status': received_file.get('status'),
409
+ 'fields': received_file.get('fields', []),
410
+ 'abs_file_path': received_file.get('abs_file_path'),
411
+ # New discriminator field
412
+ 'file_type': file_type,
413
+ # Nested table settings
414
+ 'table_settings': table_settings,
415
+ }
416
+ }
417
+
418
+
419
+ def _transform_output_settings(settings: Dict) -> Dict:
420
+ """Transform NodeOutput settings - consolidate separate table settings into single field.
421
+
422
+ OLD structure:
423
+ output_settings:
424
+ file_type: csv
425
+ output_csv_table: {delimiter: ",", encoding: "utf-8"}
426
+ output_parquet_table: {}
427
+ output_excel_table: {sheet_name: "Sheet1"}
428
+
429
+ NEW structure:
430
+ output_settings:
431
+ file_type: csv
432
+ table_settings:
433
+ file_type: csv
434
+ delimiter: ","
435
+ encoding: "utf-8"
436
+ """
437
+ output_settings = settings.get('output_settings', {})
438
+ if not output_settings:
439
+ return settings
440
+
441
+ # Check if already transformed
442
+ if 'table_settings' in output_settings and isinstance(output_settings['table_settings'], dict):
443
+ return settings
444
+
445
+ file_type = output_settings.get('file_type', 'csv')
446
+
447
+ # Build table_settings from old separate fields
448
+ if file_type == 'csv':
449
+ old_csv = output_settings.get('output_csv_table', {}) or {}
450
+ table_settings = {
451
+ 'file_type': 'csv',
452
+ 'delimiter': old_csv.get('delimiter', ','),
453
+ 'encoding': old_csv.get('encoding', 'utf-8'),
454
+ }
455
+ elif file_type == 'excel':
456
+ old_excel = output_settings.get('output_excel_table', {}) or {}
457
+ table_settings = {
458
+ 'file_type': 'excel',
459
+ 'sheet_name': old_excel.get('sheet_name', 'Sheet1'),
460
+ }
461
+ elif file_type == 'parquet':
462
+ table_settings = {'file_type': 'parquet'}
463
+ else:
464
+ table_settings = {'file_type': file_type or 'csv'}
465
+
466
+ return {
467
+ 'output_settings': {
468
+ 'name': output_settings.get('name', ''),
469
+ 'directory': output_settings.get('directory', ''),
470
+ 'file_type': file_type,
471
+ 'fields': output_settings.get('fields', []),
472
+ 'write_mode': output_settings.get('write_mode', 'overwrite'),
473
+ 'abs_file_path': output_settings.get('abs_file_path'),
474
+ 'table_settings': table_settings,
475
+ }
476
+ }
477
+
478
+
479
+ def _transform_polars_code_settings(settings: Dict) -> Dict:
480
+ """Transform NodePolarsCode settings.
481
+
482
+ Extracts polars_code from PolarsCodeInput and handles depending_on_id → depending_on_ids.
483
+ """
484
+ polars_code_input = settings.get('polars_code_input', {})
485
+
486
+ # Extract the actual code
487
+ polars_code = ''
488
+ if isinstance(polars_code_input, dict):
489
+ polars_code = polars_code_input.get('polars_code', '')
490
+ elif hasattr(polars_code_input, 'polars_code'):
491
+ polars_code = polars_code_input.polars_code
492
+
493
+ # Handle depending_on_id → depending_on_ids migration
494
+ depending_on_ids = settings.get('depending_on_ids', [])
495
+ if not depending_on_ids or depending_on_ids == [-1]:
496
+ old_id = settings.get('depending_on_id')
497
+ if old_id is not None and old_id != -1:
498
+ depending_on_ids = [old_id]
499
+ else:
500
+ depending_on_ids = []
501
+
502
+ return {
503
+ 'polars_code_input': {
504
+ 'polars_code': polars_code,
505
+ },
506
+ 'depending_on_ids': depending_on_ids,
507
+ }
508
+
509
+
510
+ def migrate_flowfile(input_path: Path, output_path: Path = None, format: str = 'yaml') -> Path:
511
+ """
512
+ Migrate a single flowfile from pickle to YAML format.
513
+
514
+ Args:
515
+ input_path: Path to the .flowfile pickle
516
+ output_path: Output path (default: same name with .yaml extension)
517
+ format: Output format ('yaml' or 'json')
518
+
519
+ Returns:
520
+ Path to the created output file
521
+ """
522
+ if format == 'yaml' and yaml is None:
523
+ raise ImportError("PyYAML is required for YAML output. Install with: pip install pyyaml")
524
+
525
+ # Determine output path
526
+ if output_path is None:
527
+ suffix = '.yaml' if format == 'yaml' else '.json'
528
+ output_path = input_path.with_suffix(suffix)
529
+
530
+ print(f"Loading: {input_path}")
531
+
532
+ # Load legacy flowfile
533
+ legacy_data = load_legacy_flowfile(input_path)
534
+
535
+ # Convert to dict
536
+ data_dict = convert_to_dict(legacy_data)
537
+
538
+ # Transform to new schema
539
+ transformed = transform_to_new_schema(data_dict)
540
+
541
+ # Write output
542
+ print(f"Writing: {output_path}")
543
+
544
+ with open(output_path, 'w', encoding='utf-8') as f:
545
+ if format == 'yaml':
546
+ yaml.dump(transformed, f, default_flow_style=False, sort_keys=False, allow_unicode=True)
547
+ else:
548
+ import json
549
+ json.dump(transformed, f, indent=2, ensure_ascii=False)
550
+
551
+ print(f"✓ Migrated: {input_path.name} → {output_path.name}")
552
+ return output_path
553
+
554
+
555
+ def migrate_directory(dir_path: Path, output_dir: Path = None, format: str = 'yaml') -> List[Path]:
556
+ """
557
+ Migrate all flowfiles in a directory.
558
+
559
+ Args:
560
+ dir_path: Directory containing .flowfile pickles
561
+ output_dir: Output directory (default: same as input)
562
+ format: Output format ('yaml' or 'json')
563
+
564
+ Returns:
565
+ List of created output file paths
566
+ """
567
+ output_dir = output_dir or dir_path
568
+ output_dir.mkdir(parents=True, exist_ok=True)
569
+
570
+ flowfiles = list(dir_path.glob('**/*.flowfile'))
571
+
572
+ if not flowfiles:
573
+ print(f"No .flowfile files found in {dir_path}")
574
+ return []
575
+
576
+ print(f"Found {len(flowfiles)} flowfile(s) to migrate\n")
577
+
578
+ migrated = []
579
+ failed = []
580
+
581
+ for flowfile in flowfiles:
582
+ # Preserve directory structure
583
+ relative = flowfile.relative_to(dir_path)
584
+ suffix = '.yaml' if format == 'yaml' else '.json'
585
+ output_path = output_dir / relative.with_suffix(suffix)
586
+ output_path.parent.mkdir(parents=True, exist_ok=True)
587
+
588
+ try:
589
+ migrate_flowfile(flowfile, output_path, format)
590
+ migrated.append(output_path)
591
+ except Exception as e:
592
+ print(f"✗ Failed: {flowfile.name} - {e}")
593
+ failed.append((flowfile, e))
594
+
595
+ print(f"\n{'='*50}")
596
+ print(f"Migration complete: {len(migrated)} succeeded, {len(failed)} failed")
597
+
598
+ return migrated
File without changes
@@ -0,0 +1,23 @@
1
+ """
2
+ Pytest configuration and shared fixtures for migration tool tests.
3
+ """
4
+
5
+ import pytest
6
+ import sys
7
+ from pathlib import Path
8
+
9
+
10
+ # Ensure tools package is importable
11
+ REPO_ROOT = Path(__file__).parent.parent.parent.parent
12
+ if str(REPO_ROOT) not in sys.path:
13
+ sys.path.insert(0, str(REPO_ROOT))
14
+
15
+
16
+ def pytest_configure(config):
17
+ """Configure pytest markers."""
18
+ config.addinivalue_line(
19
+ "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')"
20
+ )
21
+ config.addinivalue_line(
22
+ "markers", "requires_yaml: marks tests that require PyYAML"
23
+ )