Flowfile 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. flowfile/__init__.py +3 -1
  2. flowfile/api.py +1 -2
  3. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-d5b1b6c9.js} +6 -6
  5. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-00d87aad.js} +6 -6
  6. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-4685e75d.js} +1 -1
  7. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-23e909da.js} +1 -1
  8. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-70ae0c79.js} +1 -1
  9. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-f149cf7c.js} +1 -1
  10. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-702a3edd.js} +7 -7
  11. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-b1519993.js} +11 -11
  12. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-6f3e4ea5.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseManager-cf5ef661.js} +2 -2
  14. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-d38c7295.js} +9 -9
  15. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-b04ef46a.js} +8 -8
  16. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-5fa10ed8.js} +5 -5
  17. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-d39af878.js} +5 -5
  18. flowfile/web/static/assets/{Filter-812dcbca.js → Filter-9b6d08db.js} +7 -7
  19. flowfile/web/static/assets/{Formula-71472193.js → Formula-6b04fb1d.js} +7 -7
  20. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-999521f4.js} +8 -8
  21. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-17dd2198.js} +6 -6
  22. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-6b039e18.js} +5 -5
  23. flowfile/web/static/assets/{Join-a1b800be.js → Join-24d0f113.js} +8 -8
  24. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-34639209.js} +4 -4
  25. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-0e8724a3.js} +2 -2
  26. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js} +1 -1
  27. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-3d63a470.js} +2 -2
  28. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js} +1 -1
  29. flowfile/web/static/assets/{Output-ddc9079f.css → Output-283fe388.css} +5 -5
  30. flowfile/web/static/assets/{Output-76750610.js → Output-edea9802.js} +57 -38
  31. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-61d19301.js} +7 -7
  32. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-de9f43fe.js} +1 -1
  33. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-f97fec5b.js} +1 -1
  34. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-bc3c9984.js} +5 -5
  35. flowfile/web/static/assets/{Read-637b72a7.js → Read-64a3f259.js} +80 -105
  36. flowfile/web/static/assets/{Read-6b17491f.css → Read-e808b239.css} +10 -10
  37. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-3d5039be.js} +4 -4
  38. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-597510e0.js} +6 -6
  39. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-df51adbe.js} +1 -1
  40. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-4be0a507.js} +4 -4
  41. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretManager-4839be57.js} +2 -2
  42. flowfile/web/static/assets/{Select-850215fd.js → Select-9b72f201.js} +7 -7
  43. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-7ded385d.js} +1 -1
  44. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-e1e9c953.js} +1 -1
  45. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-f0f75a42.js} +1 -1
  46. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-6c777aac.js} +2 -2
  47. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js} +1 -1
  48. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-7cb93e62.js} +1 -1
  49. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-6cbde21a.js} +5 -5
  50. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-d9a40c11.js} +2 -2
  51. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-5896c375.js} +1 -1
  52. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-c4fcbf4d.js} +7 -7
  53. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-4ef91d19.js} +2 -2
  54. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js} +1 -1
  55. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-a03f512c.js} +2 -2
  56. flowfile/web/static/assets/{Union-b563478a.js → Union-bfe9b996.js} +4 -4
  57. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-5d023a27.js} +8 -20
  58. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-91cc5354.js} +6 -6
  59. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-7ee2de44.js} +1 -1
  60. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-e51b9924.js} +1 -1
  61. flowfile/web/static/assets/{api-2d6adc4f.js → api-c1bad5ca.js} +1 -1
  62. flowfile/web/static/assets/{api-4c8e3822.js → api-cf1221f0.js} +1 -1
  63. flowfile/web/static/assets/{designer-e3c150ec.css → designer-8da3ba3a.css} +90 -67
  64. flowfile/web/static/assets/{designer-f3656d8c.js → designer-9633482a.js} +119 -51
  65. flowfile/web/static/assets/{documentation-52b241e7.js → documentation-ca400224.js} +1 -1
  66. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-614b998d.js} +1 -1
  67. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-f7971590.js} +2 -2
  68. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-4fe5f36b.js} +3 -3
  69. flowfile/web/static/assets/{index-246f201c.js → index-5429bbf8.js} +6 -8
  70. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  71. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-076b85ab.js} +1 -1
  72. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-0fd17dbe.js} +1 -1
  73. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-b61e0847.js} +1 -1
  74. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-a8bb8b61.js} +21 -20
  75. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-c767cb37.css} +13 -13
  76. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-67b4aee0.js} +10 -12
  77. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-806d2826.css} +12 -12
  78. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-48c81530.css} +3 -3
  79. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-92ce1dbc.js} +4 -7
  80. flowfile/web/static/assets/{secretApi-538058f3.js → secretApi-68435402.js} +1 -1
  81. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-92e25ee3.js} +3 -3
  82. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-41b0e0d7.js} +7 -4
  83. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-2c8e608f.js} +1 -1
  84. flowfile/web/static/index.html +1 -1
  85. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/METADATA +3 -2
  86. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/RECORD +138 -126
  87. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  88. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  89. flowfile_core/__init__.py +3 -0
  90. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  91. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  92. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  93. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  94. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  95. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  96. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  97. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +184 -78
  98. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  99. flowfile_core/flowfile/flow_graph.py +129 -26
  100. flowfile_core/flowfile/flow_node/flow_node.py +3 -0
  101. flowfile_core/flowfile/flow_node/models.py +2 -1
  102. flowfile_core/flowfile/handler.py +5 -5
  103. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  104. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  105. flowfile_core/flowfile/node_designer/__init__.py +1 -1
  106. flowfile_core/flowfile/node_designer/_type_registry.py +2 -2
  107. flowfile_core/flowfile/node_designer/custom_node.py +1 -1
  108. flowfile_core/flowfile/node_designer/ui_components.py +1 -1
  109. flowfile_core/flowfile/schema_callbacks.py +8 -5
  110. flowfile_core/flowfile/setting_generator/settings.py +15 -9
  111. flowfile_core/routes/routes.py +8 -10
  112. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  113. flowfile_core/schemas/input_schema.py +222 -65
  114. flowfile_core/schemas/output_model.py +1 -1
  115. flowfile_core/schemas/schemas.py +145 -32
  116. flowfile_core/schemas/transform_schema.py +1083 -413
  117. flowfile_core/schemas/yaml_types.py +103 -0
  118. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +11 -1
  119. flowfile_frame/__init__.py +3 -1
  120. flowfile_frame/flow_frame.py +15 -18
  121. flowfile_frame/flow_frame_methods.py +12 -9
  122. flowfile_worker/__init__.py +3 -0
  123. flowfile_worker/create/__init__.py +3 -21
  124. flowfile_worker/create/funcs.py +68 -56
  125. flowfile_worker/create/models.py +130 -62
  126. flowfile_worker/routes.py +5 -8
  127. tools/migrate/README.md +56 -0
  128. tools/migrate/__init__.py +12 -0
  129. tools/migrate/__main__.py +131 -0
  130. tools/migrate/legacy_schemas.py +621 -0
  131. tools/migrate/migrate.py +598 -0
  132. tools/migrate/tests/__init__.py +0 -0
  133. tools/migrate/tests/conftest.py +23 -0
  134. tools/migrate/tests/test_migrate.py +627 -0
  135. tools/migrate/tests/test_migration_e2e.py +1010 -0
  136. tools/migrate/tests/test_node_migrations.py +813 -0
  137. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  138. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/licenses/LICENSE +0 -0
  139. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -370,6 +370,7 @@ class FlowNode:
370
370
  self.node_inputs.main_inputs] if self.node_inputs.main_inputs is not None else None
371
371
  node_information.setting_input = self.setting_input
372
372
  node_information.outputs = [n.node_id for n in self.leads_to_nodes]
373
+ node_information.description = self.setting_input.description if hasattr(self.setting_input, 'description') else ''
373
374
  node_information.is_setup = self.is_setup
374
375
  node_information.x_position = self.setting_input.pos_x
375
376
  node_information.y_position = self.setting_input.pos_y
@@ -498,6 +499,7 @@ class FlowNode:
498
499
  Returns:
499
500
  A list of FlowfileColumn objects representing the predicted schema.
500
501
  """
502
+
501
503
  if self.node_schema.predicted_schema and not force:
502
504
  return self.node_schema.predicted_schema
503
505
  if self.schema_callback is not None and (self.node_schema.predicted_schema is None or force):
@@ -514,6 +516,7 @@ class FlowNode:
514
516
  if predicted_data is not None and predicted_data.schema is not None:
515
517
  self.print('Calculating the schema based on the predicted resulting data')
516
518
  self.node_schema.predicted_schema = self._predicted_data_getter().schema
519
+
517
520
  return self.node_schema.predicted_schema
518
521
 
519
522
  @property
@@ -130,13 +130,14 @@ class NodeStepInputs:
130
130
  main_inputs: List["FlowNode"] = None
131
131
 
132
132
  @property
133
- def input_ids(self) -> List[int] | None:
133
+ def input_ids(self) -> List[int]:
134
134
  """
135
135
  Gets the IDs of all connected input nodes.
136
136
  :return: A list of integer node IDs.
137
137
  """
138
138
  if self.main_inputs is not None:
139
139
  return [node_input.node_information.id for node_input in self.get_all_inputs()]
140
+ return []
140
141
 
141
142
  def get_all_inputs(self) -> List["FlowNode"]:
142
143
  """
@@ -5,7 +5,7 @@ import os
5
5
  from pathlib import Path
6
6
  from datetime import datetime
7
7
 
8
- from flowfile_core.flowfile.manage.open_flowfile import open_flow
8
+ from flowfile_core.flowfile.manage.io_flowfile import open_flow
9
9
  from flowfile_core.flowfile.flow_graph import FlowGraph
10
10
  from flowfile_core.schemas.schemas import FlowSettings
11
11
  from flowfile_core.flowfile.utils import create_unique_id
@@ -14,14 +14,14 @@ from shared.storage_config import storage
14
14
 
15
15
  def get_flow_save_location(flow_name: str) -> Path:
16
16
  """Gets the initial save location for flow files"""
17
- if ".flowfile" not in flow_name:
18
- flow_name += ".flowfile"
17
+ if ".yaml" not in flow_name and ".yml" not in flow_name:
18
+ flow_name += ".yaml"
19
19
  return storage.temp_directory_for_flows / flow_name
20
20
 
21
21
 
22
22
  def create_flow_name() -> str:
23
23
  """Creates a unique flow name"""
24
- return datetime.now().strftime("%Y%m%d_%H_%M_%S")+"_flow.flowfile"
24
+ return datetime.now().strftime("%Y%m%d_%H_%M_%S")+"_flow.yaml"
25
25
 
26
26
 
27
27
  @dataclass
@@ -39,7 +39,7 @@ class FlowfileHandler:
39
39
  self._flows[other.flow_id] = other
40
40
  return other.flow_id
41
41
 
42
- def import_flow(self, flow_path: Path|str) -> int:
42
+ def import_flow(self, flow_path: Path | str) -> int:
43
43
  if isinstance(flow_path, str):
44
44
  flow_path = Path(flow_path)
45
45
  imported_flow = open_flow(flow_path)
@@ -1,70 +1,433 @@
1
+ """
2
+ Compatibility enhancements for opening old flowfile versions.
3
+ Migrates old schema structures to new ones during file load.
4
+ """
5
+ import pickle
6
+ from typing import Any
7
+ from pathlib import Path
8
+
1
9
  from flowfile_core.schemas import schemas, input_schema
10
+ from tools.migrate.legacy_schemas import LEGACY_CLASS_MAP
11
+
12
+
13
+ # =============================================================================
14
+ # LEGACY PICKLE LOADING
15
+ # =============================================================================
16
+
17
+ class LegacyUnpickler(pickle.Unpickler):
18
+ """
19
+ Custom unpickler that redirects class lookups to legacy dataclass definitions.
20
+
21
+ When loading old .flowfile pickles, transform_schema classes were dataclasses.
22
+ Now they're Pydantic BaseModels. This unpickler intercepts those classes and
23
+ loads them as the legacy dataclass versions, which can then be migrated.
24
+ """
25
+
26
+ def find_class(self, module: str, name: str):
27
+ """Override to redirect transform_schema dataclasses to legacy definitions."""
28
+ if name in LEGACY_CLASS_MAP:
29
+ return LEGACY_CLASS_MAP[name]
30
+ return super().find_class(module, name)
31
+
32
+
33
+ def load_flowfile_pickle(path: str) -> Any:
34
+ """
35
+ Load a flowfile pickle using legacy-compatible unpickling.
36
+
37
+ This handles old flowfiles where transform_schema classes were dataclasses
38
+ by loading them as legacy dataclass instances, which can then be migrated
39
+ to the new Pydantic BaseModel versions.
40
+
41
+ Args:
42
+ path: Path to the .flowfile pickle
43
+
44
+ Returns:
45
+ The deserialized FlowInformation object
46
+ """
47
+ resolved_path = Path(path).resolve()
48
+ with open(resolved_path, 'rb') as f:
49
+ return LegacyUnpickler(f).load()
2
50
 
3
51
 
52
+ # =============================================================================
53
+ # DATACLASS DETECTION AND MIGRATION
54
+ # =============================================================================
55
+
56
+ def _is_dataclass_instance(obj: Any) -> bool:
57
+ """Check if an object is a dataclass instance (not a Pydantic model)."""
58
+ return hasattr(obj, '__dataclass_fields__') and not hasattr(obj, 'model_dump')
59
+
60
+
61
+ def _migrate_dataclass_to_basemodel(obj: Any, model_class: type) -> Any:
62
+ """Convert a dataclass instance to a Pydantic BaseModel instance."""
63
+ if obj is None:
64
+ return None
65
+
66
+ if not _is_dataclass_instance(obj):
67
+ return obj # Already a BaseModel or dict
68
+
69
+ from dataclasses import fields, asdict
70
+ try:
71
+ data = asdict(obj)
72
+ except Exception:
73
+ # Fallback: manually extract attributes
74
+ data = {f.name: getattr(obj, f.name, None) for f in fields(obj)}
75
+
76
+ return model_class.model_validate(data)
77
+
78
+
79
+ # =============================================================================
80
+ # NODE-SPECIFIC COMPATIBILITY FUNCTIONS
81
+ # =============================================================================
82
+
4
83
  def ensure_compatibility_node_read(node_read: input_schema.NodeRead):
5
- if hasattr(node_read, 'received_file'):
6
- if not hasattr(node_read.received_file, 'fields'):
7
- print('setting fields')
8
- setattr(node_read.received_file, 'fields', [])
84
+ """Migrate old NodeRead/ReceivedTable structure to new table_settings format."""
85
+ if not hasattr(node_read, 'received_file') or node_read.received_file is None:
86
+ return
87
+
88
+ received_file = node_read.received_file
89
+
90
+ # Ensure fields list exists
91
+ if not hasattr(received_file, 'fields'):
92
+ setattr(received_file, 'fields', [])
93
+
94
+ # Check if already migrated (has table_settings as proper object, not dict)
95
+ if hasattr(received_file, 'table_settings') and received_file.table_settings is not None:
96
+ if not isinstance(received_file.table_settings, dict):
97
+ return
98
+
99
+ # Determine file_type - use existing or infer from attributes
100
+ file_type = getattr(received_file, 'file_type', None)
101
+ if file_type is None:
102
+ path = getattr(received_file, 'path', '') or ''
103
+ if path.endswith('.parquet'):
104
+ file_type = 'parquet'
105
+ elif path.endswith(('.xlsx', '.xls')):
106
+ file_type = 'excel'
107
+ elif path.endswith('.json'):
108
+ file_type = 'json'
109
+ else:
110
+ file_type = 'csv'
111
+
112
+ # Build table_settings based on file_type, extracting old flat attributes
113
+ table_settings_dict = _build_input_table_settings(received_file, file_type)
114
+
115
+ # Re-validate the entire ReceivedTable to get proper Pydantic model
116
+ received_file_dict = received_file.model_dump()
117
+ received_file_dict['file_type'] = file_type
118
+ received_file_dict['table_settings'] = table_settings_dict
119
+
120
+ # Create new validated ReceivedTable and replace
121
+ new_received_file = input_schema.ReceivedTable.model_validate(received_file_dict)
122
+ node_read.received_file = new_received_file
123
+
124
+
125
+ def _build_input_table_settings(received_file: Any, file_type: str) -> dict:
126
+ """Build appropriate table_settings dict from old flat attributes."""
127
+
128
+ if file_type == 'csv':
129
+ return {
130
+ 'file_type': 'csv',
131
+ 'reference': getattr(received_file, 'reference', ''),
132
+ 'starting_from_line': getattr(received_file, 'starting_from_line', 0),
133
+ 'delimiter': getattr(received_file, 'delimiter', ','),
134
+ 'has_headers': getattr(received_file, 'has_headers', True),
135
+ 'encoding': getattr(received_file, 'encoding', 'utf-8'),
136
+ 'parquet_ref': getattr(received_file, 'parquet_ref', None),
137
+ 'row_delimiter': getattr(received_file, 'row_delimiter', '\n'),
138
+ 'quote_char': getattr(received_file, 'quote_char', '"'),
139
+ 'infer_schema_length': getattr(received_file, 'infer_schema_length', 10_000),
140
+ 'truncate_ragged_lines': getattr(received_file, 'truncate_ragged_lines', False),
141
+ 'ignore_errors': getattr(received_file, 'ignore_errors', False),
142
+ }
143
+
144
+ elif file_type == 'json':
145
+ return {
146
+ 'file_type': 'json',
147
+ 'reference': getattr(received_file, 'reference', ''),
148
+ 'starting_from_line': getattr(received_file, 'starting_from_line', 0),
149
+ 'delimiter': getattr(received_file, 'delimiter', ','),
150
+ 'has_headers': getattr(received_file, 'has_headers', True),
151
+ 'encoding': getattr(received_file, 'encoding', 'utf-8'),
152
+ 'parquet_ref': getattr(received_file, 'parquet_ref', None),
153
+ 'row_delimiter': getattr(received_file, 'row_delimiter', '\n'),
154
+ 'quote_char': getattr(received_file, 'quote_char', '"'),
155
+ 'infer_schema_length': getattr(received_file, 'infer_schema_length', 10_000),
156
+ 'truncate_ragged_lines': getattr(received_file, 'truncate_ragged_lines', False),
157
+ 'ignore_errors': getattr(received_file, 'ignore_errors', False),
158
+ }
159
+
160
+ elif file_type == 'parquet':
161
+ return {'file_type': 'parquet'}
162
+
163
+ elif file_type == 'excel':
164
+ return {
165
+ 'file_type': 'excel',
166
+ 'sheet_name': getattr(received_file, 'sheet_name', None),
167
+ 'start_row': getattr(received_file, 'start_row', 0),
168
+ 'start_column': getattr(received_file, 'start_column', 0),
169
+ 'end_row': getattr(received_file, 'end_row', 0),
170
+ 'end_column': getattr(received_file, 'end_column', 0),
171
+ 'has_headers': getattr(received_file, 'has_headers', True),
172
+ 'type_inference': getattr(received_file, 'type_inference', False),
173
+ }
174
+
175
+ # Default to csv settings
176
+ return {'file_type': 'csv', 'delimiter': ',', 'encoding': 'utf-8', 'has_headers': True}
9
177
 
10
178
 
11
179
  def ensure_compatibility_node_output(node_output: input_schema.NodeOutput):
12
- if hasattr(node_output, 'output_settings'):
13
- if not hasattr(node_output.output_settings, 'abs_file_path'):
14
- new_output_settings = input_schema.OutputSettings.model_validate(node_output.output_settings.model_dump())
15
- setattr(node_output, 'output_settings', new_output_settings)
180
+ """Migrate old OutputSettings structure to new table_settings format."""
181
+ if not hasattr(node_output, 'output_settings') or node_output.output_settings is None:
182
+ return
183
+
184
+ output_settings = node_output.output_settings
185
+
186
+ # Check if already migrated (has table_settings as proper object, not dict)
187
+ if hasattr(output_settings, 'table_settings') and output_settings.table_settings is not None:
188
+ if not isinstance(output_settings.table_settings, dict):
189
+ return
190
+
191
+ # Migrate from old separate fields to new table_settings
192
+ file_type = getattr(output_settings, 'file_type', 'csv')
193
+ table_settings_dict = _build_output_table_settings(output_settings, file_type)
194
+
195
+ # Re-validate the entire OutputSettings to get proper Pydantic model
196
+ output_settings_dict = output_settings.model_dump()
197
+ output_settings_dict['table_settings'] = table_settings_dict
198
+
199
+ # Remove old fields if they exist
200
+ for old_field in ['output_csv_table', 'output_parquet_table', 'output_excel_table']:
201
+ output_settings_dict.pop(old_field, None)
202
+
203
+ # Create new validated OutputSettings and replace
204
+ new_output_settings = input_schema.OutputSettings.model_validate(output_settings_dict)
205
+ node_output.output_settings = new_output_settings
206
+
207
+
208
+ def _build_output_table_settings(output_settings: Any, file_type: str) -> dict:
209
+ """Build appropriate output table_settings from old separate table fields."""
210
+
211
+ if file_type == 'csv':
212
+ old_csv = getattr(output_settings, 'output_csv_table', None)
213
+ if old_csv is not None:
214
+ return {
215
+ 'file_type': 'csv',
216
+ 'delimiter': getattr(old_csv, 'delimiter', ','),
217
+ 'encoding': getattr(old_csv, 'encoding', 'utf-8'),
218
+ }
219
+ return {'file_type': 'csv', 'delimiter': ',', 'encoding': 'utf-8'}
220
+
221
+ elif file_type == 'parquet':
222
+ return {'file_type': 'parquet'}
223
+
224
+ elif file_type == 'excel':
225
+ old_excel = getattr(output_settings, 'output_excel_table', None)
226
+ if old_excel is not None:
227
+ return {
228
+ 'file_type': 'excel',
229
+ 'sheet_name': getattr(old_excel, 'sheet_name', 'Sheet1'),
230
+ }
231
+ return {'file_type': 'excel', 'sheet_name': 'Sheet1'}
232
+
233
+ return {'file_type': 'csv', 'delimiter': ',', 'encoding': 'utf-8'}
16
234
 
17
235
 
18
236
  def ensure_compatibility_node_select(node_select: input_schema.NodeSelect):
19
- if hasattr(node_select, 'select_input'):
20
- if any(not hasattr(select_input, 'position') for select_input in node_select.select_input):
21
- for _index, select_input in enumerate(node_select.select_input):
22
- setattr(select_input, 'position', _index)
23
- if not hasattr(node_select, 'sorted_by'):
24
- setattr(node_select, 'sorted_by', 'none')
237
+ """Ensure NodeSelect has position attributes, sorted_by field, and handle dataclass migrations."""
238
+ if not hasattr(node_select, 'select_input'):
239
+ return
240
+
241
+ # Handle dataclass -> BaseModel migration for select_input items
242
+ if node_select.select_input:
243
+ from flowfile_core.schemas import transform_schema
244
+ new_select_input = []
245
+ needs_migration = any(_is_dataclass_instance(si) for si in node_select.select_input)
246
+
247
+ if needs_migration:
248
+ for si in node_select.select_input:
249
+ if _is_dataclass_instance(si):
250
+ new_si = _migrate_dataclass_to_basemodel(si, transform_schema.SelectInput)
251
+ new_select_input.append(new_si)
252
+ else:
253
+ new_select_input.append(si)
254
+ node_select.select_input = new_select_input
255
+
256
+ # Ensure position attributes exist
257
+ if any(not hasattr(select_input, 'position') for select_input in node_select.select_input):
258
+ for _index, select_input in enumerate(node_select.select_input):
259
+ setattr(select_input, 'position', _index)
260
+
261
+ if not hasattr(node_select, 'sorted_by'):
262
+ setattr(node_select, 'sorted_by', 'none')
25
263
 
26
264
 
27
265
  def ensure_compatibility_node_joins(node_settings: input_schema.NodeFuzzyMatch | input_schema.NodeJoin):
28
- if any(not hasattr(r, 'position') for r in node_settings.join_input.right_select.renames):
29
- for _index, select_input in enumerate(node_settings.join_input.right_select.renames +
30
- node_settings.join_input.left_select.renames):
266
+ """Ensure join nodes have position attributes on renames and handle dataclass migrations."""
267
+ if not hasattr(node_settings, 'join_input') or node_settings.join_input is None:
268
+ return
269
+
270
+ join_input = node_settings.join_input
271
+
272
+ # Check if right_select and left_select exist
273
+ if not hasattr(join_input, 'right_select') or not hasattr(join_input, 'left_select'):
274
+ return
275
+
276
+ from flowfile_core.schemas import transform_schema
277
+
278
+ # Handle dataclass -> BaseModel migration for join_mapping
279
+ if hasattr(join_input, 'join_mapping') and join_input.join_mapping:
280
+ new_mapping = []
281
+ for jm in join_input.join_mapping:
282
+ if _is_dataclass_instance(jm):
283
+ new_jm = _migrate_dataclass_to_basemodel(jm, transform_schema.JoinMap)
284
+ new_mapping.append(new_jm)
285
+ else:
286
+ new_mapping.append(jm)
287
+ join_input.join_mapping = new_mapping
288
+
289
+ # Handle dataclass -> BaseModel migration for renames in selects
290
+ for select_attr in ['right_select', 'left_select']:
291
+ select = getattr(join_input, select_attr, None)
292
+ if select is None:
293
+ continue
294
+
295
+ renames = getattr(select, 'renames', []) or []
296
+ if renames and any(_is_dataclass_instance(r) for r in renames):
297
+ new_renames = []
298
+ for r in renames:
299
+ if _is_dataclass_instance(r):
300
+ new_r = _migrate_dataclass_to_basemodel(r, transform_schema.SelectInput)
301
+ new_renames.append(new_r)
302
+ else:
303
+ new_renames.append(r)
304
+ select.renames = new_renames
305
+
306
+ right_renames = getattr(join_input.right_select, 'renames', []) or []
307
+ left_renames = getattr(join_input.left_select, 'renames', []) or []
308
+
309
+ # Ensure position attributes exist
310
+ if any(not hasattr(r, 'position') for r in right_renames + left_renames):
311
+ for _index, select_input in enumerate(right_renames + left_renames):
31
312
  setattr(select_input, 'position', _index)
32
313
 
33
314
 
34
315
  def ensure_description(node: input_schema.NodeBase):
316
+ """Ensure node has description field."""
35
317
  if not hasattr(node, 'description'):
36
318
  setattr(node, 'description', '')
37
319
 
38
320
 
39
321
  def ensure_compatibility_node_polars(node_polars: input_schema.NodePolarsCode):
322
+ """Migrate old NodePolarsCode structure:
323
+ - depending_on_id (single) -> depending_on_ids (list)
324
+ - PolarsCodeInput from dataclass to BaseModel
325
+ """
326
+ # Handle depending_on_id -> depending_on_ids migration
40
327
  if hasattr(node_polars, 'depending_on_id'):
41
- setattr(node_polars, 'depending_on_ids', [getattr(node_polars, 'depending_on_id')])
328
+ old_id = getattr(node_polars, 'depending_on_id', None)
329
+ if not hasattr(node_polars, 'depending_on_ids') or node_polars.depending_on_ids is None:
330
+ if old_id is not None:
331
+ setattr(node_polars, 'depending_on_ids', [old_id])
332
+ else:
333
+ setattr(node_polars, 'depending_on_ids', [])
42
334
 
335
+ # Handle PolarsCodeInput dataclass -> BaseModel migration
336
+ if hasattr(node_polars, 'polars_code_input') and node_polars.polars_code_input is not None:
337
+ polars_code_input = node_polars.polars_code_input
43
338
 
44
- def ensure_compatibility(flow_storage_obj: schemas.FlowInformation, flow_path: str):
45
- if not hasattr(flow_storage_obj, 'flow_settings'):
46
- flow_settings = schemas.FlowSettings(flow_id=flow_storage_obj.flow_id, path=flow_path,
47
- name=flow_storage_obj.flow_name)
339
+ if _is_dataclass_instance(polars_code_input):
340
+ from flowfile_core.schemas import transform_schema
341
+ new_polars_code_input = _migrate_dataclass_to_basemodel(
342
+ polars_code_input, transform_schema.PolarsCodeInput
343
+ )
344
+ node_polars.polars_code_input = new_polars_code_input
345
+
346
+
347
+ # =============================================================================
348
+ # FLOW-LEVEL COMPATIBILITY
349
+ # =============================================================================
350
+
351
+ def ensure_flow_settings(flow_storage_obj: schemas.FlowInformation, flow_path: str):
352
+ """Ensure flow_settings exists and has all required fields."""
353
+ if not hasattr(flow_storage_obj, 'flow_settings') or flow_storage_obj.flow_settings is None:
354
+ flow_settings = schemas.FlowSettings(
355
+ flow_id=flow_storage_obj.flow_id,
356
+ path=flow_path,
357
+ name=flow_storage_obj.flow_name
358
+ )
48
359
  setattr(flow_storage_obj, 'flow_settings', flow_settings)
49
360
  flow_storage_obj = schemas.FlowInformation.model_validate(flow_storage_obj)
50
- elif not hasattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location'):
51
- setattr(getattr(flow_storage_obj, 'flow_settings'), 'execution_location', "remote")
52
- elif not hasattr(flow_storage_obj.flow_settings, 'is_running'):
53
- setattr(flow_storage_obj.flow_settings, 'is_running', False)
54
- setattr(flow_storage_obj.flow_settings, 'is_canceled', False)
55
- if not hasattr(flow_storage_obj.flow_settings, 'show_detailed_progress'):
56
- setattr(flow_storage_obj.flow_settings, 'show_detailed_progress', True)
361
+ return flow_storage_obj
362
+
363
+ fs = flow_storage_obj.flow_settings
364
+
365
+ if not hasattr(fs, 'execution_location'):
366
+ setattr(fs, 'execution_location', "remote")
367
+
368
+ if not hasattr(fs, 'is_running'):
369
+ setattr(fs, 'is_running', False)
370
+
371
+ if not hasattr(fs, 'is_canceled'):
372
+ setattr(fs, 'is_canceled', False)
373
+
374
+ if not hasattr(fs, 'show_detailed_progress'):
375
+ setattr(fs, 'show_detailed_progress', True)
376
+
377
+ return flow_storage_obj
378
+
379
+
380
+ # =============================================================================
381
+ # MAIN ENTRY POINT
382
+ # =============================================================================
383
+
384
+ def ensure_compatibility(flow_storage_obj: schemas.FlowInformation, flow_path: str):
385
+ """
386
+ Main compatibility function - migrates old flowfile schemas to current version.
387
+
388
+ Handles migrations for:
389
+ - FlowSettings structure
390
+ - NodeRead (ReceivedTable with table_settings)
391
+ - NodeOutput (OutputSettings with table_settings)
392
+ - NodeSelect (position attributes, dataclass -> BaseModel)
393
+ - NodeJoin/NodeFuzzyMatch (join input positions, dataclass -> BaseModel)
394
+ - NodePolarsCode (depending_on_ids, dataclass -> BaseModel)
395
+ - Node descriptions
396
+ """
397
+ flow_storage_obj = ensure_flow_settings(flow_storage_obj, flow_path)
398
+
57
399
  for _id, node_information in flow_storage_obj.data.items():
58
- if not hasattr(node_information, 'setting_input'):
400
+ if not hasattr(node_information, 'setting_input') or node_information.setting_input is None:
59
401
  continue
60
- if node_information.setting_input.__class__.__name__ == 'NodeRead':
61
- ensure_compatibility_node_read(node_information.setting_input)
62
- elif node_information.setting_input.__class__.__name__ == 'NodeSelect':
63
- ensure_compatibility_node_select(node_information.setting_input)
64
- elif node_information.setting_input.__class__.__name__ == 'NodeOutput':
65
- ensure_compatibility_node_output(node_information.setting_input)
66
- elif node_information.setting_input.__class__.__name__ in ('NodeJoin', 'NodeFuzzyMatch'):
67
- ensure_compatibility_node_joins(node_information.setting_input)
68
- elif node_information.setting_input.__class__.__name__ == 'NodePolarsCode':
69
- ensure_compatibility_node_polars(node_information.setting_input)
70
- ensure_description(node_information.setting_input)
402
+
403
+ setting_input = node_information.setting_input
404
+ class_name = setting_input.__class__.__name__
405
+
406
+ if class_name == 'NodeRead':
407
+ ensure_compatibility_node_read(setting_input)
408
+ elif class_name == 'NodeSelect':
409
+ ensure_compatibility_node_select(setting_input)
410
+ elif class_name == 'NodeOutput':
411
+ ensure_compatibility_node_output(setting_input)
412
+ elif class_name in ('NodeJoin', 'NodeFuzzyMatch'):
413
+ ensure_compatibility_node_joins(setting_input)
414
+ elif class_name == 'NodePolarsCode':
415
+ ensure_compatibility_node_polars(setting_input)
416
+
417
+ ensure_description(setting_input)
418
+
419
+ return flow_storage_obj
420
+
421
+
422
+ def load_and_migrate_flowfile(flow_path: str) -> schemas.FlowInformation:
423
+ """
424
+ Convenience function: Load a flowfile and apply all compatibility migrations.
425
+
426
+ Args:
427
+ flow_path: Path to the .flowfile pickle
428
+
429
+ Returns:
430
+ Fully migrated FlowInformation object
431
+ """
432
+ flow_storage_obj = load_flowfile_pickle(flow_path)
433
+ return ensure_compatibility(flow_storage_obj, flow_path)