Flowfile 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. flowfile/__init__.py +3 -1
  2. flowfile/api.py +1 -2
  3. flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionManager-0dfba9f2.js} +2 -2
  4. flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-d5b1b6c9.js} +6 -6
  5. flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-00d87aad.js} +6 -6
  6. flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-4685e75d.js} +1 -1
  7. flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-23e909da.js} +1 -1
  8. flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-70ae0c79.js} +1 -1
  9. flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-f149cf7c.js} +1 -1
  10. flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-702a3edd.js} +7 -7
  11. flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-b1519993.js} +11 -11
  12. flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-6f3e4ea5.js} +2 -2
  13. flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseManager-cf5ef661.js} +2 -2
  14. flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-d38c7295.js} +9 -9
  15. flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-b04ef46a.js} +8 -8
  16. flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-5fa10ed8.js} +5 -5
  17. flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-d39af878.js} +5 -5
  18. flowfile/web/static/assets/{Filter-812dcbca.js → Filter-9b6d08db.js} +7 -7
  19. flowfile/web/static/assets/{Formula-71472193.js → Formula-6b04fb1d.js} +7 -7
  20. flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-999521f4.js} +8 -8
  21. flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-17dd2198.js} +6 -6
  22. flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-6b039e18.js} +5 -5
  23. flowfile/web/static/assets/{Join-a1b800be.js → Join-24d0f113.js} +8 -8
  24. flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-34639209.js} +4 -4
  25. flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-0e8724a3.js} +2 -2
  26. flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js} +1 -1
  27. flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-3d63a470.js} +2 -2
  28. flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js} +1 -1
  29. flowfile/web/static/assets/{Output-ddc9079f.css → Output-283fe388.css} +5 -5
  30. flowfile/web/static/assets/{Output-76750610.js → Output-edea9802.js} +57 -38
  31. flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-61d19301.js} +7 -7
  32. flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-de9f43fe.js} +1 -1
  33. flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-f97fec5b.js} +1 -1
  34. flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-bc3c9984.js} +5 -5
  35. flowfile/web/static/assets/{Read-637b72a7.js → Read-64a3f259.js} +80 -105
  36. flowfile/web/static/assets/{Read-6b17491f.css → Read-e808b239.css} +10 -10
  37. flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-3d5039be.js} +4 -4
  38. flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-597510e0.js} +6 -6
  39. flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-df51adbe.js} +1 -1
  40. flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-4be0a507.js} +4 -4
  41. flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretManager-4839be57.js} +2 -2
  42. flowfile/web/static/assets/{Select-850215fd.js → Select-9b72f201.js} +7 -7
  43. flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-7ded385d.js} +1 -1
  44. flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-e1e9c953.js} +1 -1
  45. flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-f0f75a42.js} +1 -1
  46. flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-6c777aac.js} +2 -2
  47. flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js} +1 -1
  48. flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-7cb93e62.js} +1 -1
  49. flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-6cbde21a.js} +5 -5
  50. flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-d9a40c11.js} +2 -2
  51. flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-5896c375.js} +1 -1
  52. flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-c4fcbf4d.js} +7 -7
  53. flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-4ef91d19.js} +2 -2
  54. flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js} +1 -1
  55. flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-a03f512c.js} +2 -2
  56. flowfile/web/static/assets/{Union-b563478a.js → Union-bfe9b996.js} +4 -4
  57. flowfile/web/static/assets/{Unique-f90db5db.js → Unique-5d023a27.js} +8 -20
  58. flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-91cc5354.js} +6 -6
  59. flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-7ee2de44.js} +1 -1
  60. flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-e51b9924.js} +1 -1
  61. flowfile/web/static/assets/{api-2d6adc4f.js → api-c1bad5ca.js} +1 -1
  62. flowfile/web/static/assets/{api-4c8e3822.js → api-cf1221f0.js} +1 -1
  63. flowfile/web/static/assets/{designer-e3c150ec.css → designer-8da3ba3a.css} +90 -67
  64. flowfile/web/static/assets/{designer-f3656d8c.js → designer-9633482a.js} +119 -51
  65. flowfile/web/static/assets/{documentation-52b241e7.js → documentation-ca400224.js} +1 -1
  66. flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-614b998d.js} +1 -1
  67. flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-f7971590.js} +2 -2
  68. flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-4fe5f36b.js} +3 -3
  69. flowfile/web/static/assets/{index-246f201c.js → index-5429bbf8.js} +6 -8
  70. flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
  71. flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-076b85ab.js} +1 -1
  72. flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-0fd17dbe.js} +1 -1
  73. flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-b61e0847.js} +1 -1
  74. flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-a8bb8b61.js} +21 -20
  75. flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-c767cb37.css} +13 -13
  76. flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-67b4aee0.js} +10 -12
  77. flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-806d2826.css} +12 -12
  78. flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-48c81530.css} +3 -3
  79. flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-92ce1dbc.js} +4 -7
  80. flowfile/web/static/assets/{secretApi-538058f3.js → secretApi-68435402.js} +1 -1
  81. flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-92e25ee3.js} +3 -3
  82. flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-41b0e0d7.js} +7 -4
  83. flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-2c8e608f.js} +1 -1
  84. flowfile/web/static/index.html +1 -1
  85. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/METADATA +3 -2
  86. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/RECORD +138 -126
  87. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
  88. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
  89. flowfile_core/__init__.py +3 -0
  90. flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
  91. flowfile_core/flowfile/code_generator/code_generator.py +62 -64
  92. flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
  93. flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
  94. flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
  95. flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
  96. flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
  97. flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +184 -78
  98. flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
  99. flowfile_core/flowfile/flow_graph.py +129 -26
  100. flowfile_core/flowfile/flow_node/flow_node.py +3 -0
  101. flowfile_core/flowfile/flow_node/models.py +2 -1
  102. flowfile_core/flowfile/handler.py +5 -5
  103. flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
  104. flowfile_core/flowfile/manage/io_flowfile.py +394 -0
  105. flowfile_core/flowfile/node_designer/__init__.py +1 -1
  106. flowfile_core/flowfile/node_designer/_type_registry.py +2 -2
  107. flowfile_core/flowfile/node_designer/custom_node.py +1 -1
  108. flowfile_core/flowfile/node_designer/ui_components.py +1 -1
  109. flowfile_core/flowfile/schema_callbacks.py +8 -5
  110. flowfile_core/flowfile/setting_generator/settings.py +15 -9
  111. flowfile_core/routes/routes.py +8 -10
  112. flowfile_core/schemas/cloud_storage_schemas.py +0 -2
  113. flowfile_core/schemas/input_schema.py +222 -65
  114. flowfile_core/schemas/output_model.py +1 -1
  115. flowfile_core/schemas/schemas.py +145 -32
  116. flowfile_core/schemas/transform_schema.py +1083 -413
  117. flowfile_core/schemas/yaml_types.py +103 -0
  118. flowfile_core/{flowfile/node_designer/data_types.py → types.py} +11 -1
  119. flowfile_frame/__init__.py +3 -1
  120. flowfile_frame/flow_frame.py +15 -18
  121. flowfile_frame/flow_frame_methods.py +12 -9
  122. flowfile_worker/__init__.py +3 -0
  123. flowfile_worker/create/__init__.py +3 -21
  124. flowfile_worker/create/funcs.py +68 -56
  125. flowfile_worker/create/models.py +130 -62
  126. flowfile_worker/routes.py +5 -8
  127. tools/migrate/README.md +56 -0
  128. tools/migrate/__init__.py +12 -0
  129. tools/migrate/__main__.py +131 -0
  130. tools/migrate/legacy_schemas.py +621 -0
  131. tools/migrate/migrate.py +598 -0
  132. tools/migrate/tests/__init__.py +0 -0
  133. tools/migrate/tests/conftest.py +23 -0
  134. tools/migrate/tests/test_migrate.py +627 -0
  135. tools/migrate/tests/test_migration_e2e.py +1010 -0
  136. tools/migrate/tests/test_node_migrations.py +813 -0
  137. flowfile_core/flowfile/manage/open_flowfile.py +0 -143
  138. {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/licenses/LICENSE +0 -0
  139. /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
@@ -1,15 +1,23 @@
1
- from typing import List, Optional, Literal, Iterator, Any
1
+ from typing import List, Optional, Literal, Iterator, Any, Annotated
2
2
  from flowfile_core.schemas import transform_schema
3
3
  from pathlib import Path
4
4
  import os
5
5
  from flowfile_core.schemas.analysis_schemas import graphic_walker_schemas as gs_schemas
6
6
  from flowfile_core.schemas.cloud_storage_schemas import CloudStorageReadSettings, CloudStorageWriteSettings
7
- from flowfile_core.schemas.schemas import SecretRef
7
+ from flowfile_core.schemas.yaml_types import (
8
+ OutputSettingsYaml, NodeSelectYaml, NodeJoinYaml,
9
+ NodeCrossJoinYaml, NodeFuzzyMatchYaml, NodeOutputYaml
10
+ )
8
11
  from flowfile_core.utils.utils import ensure_similarity_dicts, standardize_col_dtype
9
- from pydantic import BaseModel, Field, model_validator, SecretStr, ConfigDict
12
+ from pydantic import (BaseModel, Field, model_validator, field_validator,
13
+ SecretStr, ConfigDict, StringConstraints, ValidationInfo)
10
14
  import polars as pl
11
15
 
12
16
 
17
+ SecretRef = Annotated[str, StringConstraints(min_length=1, max_length=100),
18
+ Field(description="An ID referencing an encrypted secret.")]
19
+
20
+
13
21
  OutputConnectionClass = Literal['output-0', 'output-1', 'output-2', 'output-3', 'output-4',
14
22
  'output-5', 'output-6', 'output-7', 'output-8', 'output-9']
15
23
 
@@ -43,57 +51,19 @@ class MinimalFieldInfo(BaseModel):
43
51
  data_type: str = "String"
44
52
 
45
53
 
46
- class ReceivedTableBase(BaseModel):
47
- """Base model for defining a table received from an external source."""
48
- id: Optional[int] = None
49
- name: Optional[str]
50
- path: str # This can be an absolute or relative path
51
- directory: Optional[str] = None
52
- analysis_file_available: bool = False
53
- status: Optional[str] = None
54
- file_type: Optional[str] = None
55
- fields: List[MinimalFieldInfo] = Field(default_factory=list)
56
- abs_file_path: Optional[str] = None
57
-
58
- @classmethod
59
- def create_from_path(cls, path: str):
60
- """Creates an instance from a file path string."""
61
- filename = Path(path).name
62
- return cls(name=filename, path=path)
63
-
64
- @property
65
- def file_path(self) -> str:
66
- """Constructs the full file path from the directory and name."""
67
- if not self.name in self.path:
68
- return os.path.join(self.path, self.name)
69
- else:
70
- return self.path
71
-
72
- def set_absolute_filepath(self):
73
- """Resolves the path to an absolute file path."""
74
- base_path = Path(self.path).expanduser()
75
- if not base_path.is_absolute():
76
- base_path = Path.cwd() / base_path
77
- if self.name and self.name not in base_path.name:
78
- base_path = base_path / self.name
79
- self.abs_file_path = str(base_path.resolve())
80
-
81
- @model_validator(mode='after')
82
- def populate_abs_file_path(self):
83
- """Ensures the absolute file path is populated after validation."""
84
- if not self.abs_file_path:
85
- self.set_absolute_filepath()
86
- return self
54
+ class InputTableBase(BaseModel):
55
+ """Base settings for input file operations."""
56
+ file_type: str # Will be overridden with Literal in subclasses
87
57
 
88
58
 
89
- class ReceivedCsvTable(ReceivedTableBase):
59
+ class InputCsvTable(InputTableBase):
90
60
  """Defines settings for reading a CSV file."""
91
- file_type: str = 'csv'
61
+ file_type: Literal['csv'] = 'csv'
92
62
  reference: str = ''
93
63
  starting_from_line: int = 0
94
64
  delimiter: str = ','
95
65
  has_headers: bool = True
96
- encoding: Optional[str] = 'utf-8'
66
+ encoding: str = 'utf-8'
97
67
  parquet_ref: Optional[str] = None
98
68
  row_delimiter: str = '\n'
99
69
  quote_char: str = '"'
@@ -102,18 +72,19 @@ class ReceivedCsvTable(ReceivedTableBase):
102
72
  ignore_errors: bool = False
103
73
 
104
74
 
105
- class ReceivedJsonTable(ReceivedCsvTable):
106
- """Defines settings for reading a JSON file (inherits from CSV settings)."""
107
- pass
75
+ class InputJsonTable(InputCsvTable):
76
+ """Defines settings for reading a JSON file."""
77
+ file_type: Literal['json'] = 'json'
108
78
 
109
79
 
110
- class ReceivedParquetTable(ReceivedTableBase):
80
+ class InputParquetTable(InputTableBase):
111
81
  """Defines settings for reading a Parquet file."""
112
- file_type: str = 'parquet'
82
+ file_type: Literal['parquet'] = 'parquet'
113
83
 
114
84
 
115
- class ReceivedExcelTable(ReceivedTableBase):
85
+ class InputExcelTable(InputTableBase):
116
86
  """Defines settings for reading an Excel file."""
87
+ file_type: Literal['excel'] = 'excel'
117
88
  sheet_name: Optional[str] = None
118
89
  start_row: int = 0
119
90
  start_column: int = 0
@@ -122,51 +93,185 @@ class ReceivedExcelTable(ReceivedTableBase):
122
93
  has_headers: bool = True
123
94
  type_inference: bool = False
124
95
 
96
+ @model_validator(mode='after')
125
97
  def validate_range_values(self):
126
98
  """Validates that the Excel cell range is logical."""
127
99
  for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
128
100
  if not isinstance(attribute, int) or attribute < 0:
129
101
  raise ValueError("Row and column indices must be non-negative integers")
130
102
  if (self.end_row > 0 and self.start_row > self.end_row) or \
131
- (self.end_column > 0 and self.start_column > self.end_column):
103
+ (self.end_column > 0 and self.start_column > self.end_column):
132
104
  raise ValueError("Start row/column must not be greater than end row/column")
105
+ return self
106
+
107
+
108
+ # Create the discriminated union (similar to OutputTableSettings)
109
+ InputTableSettings = Annotated[
110
+ InputCsvTable | InputJsonTable | InputParquetTable | InputExcelTable,
111
+ Field(discriminator='file_type')
112
+ ]
113
+
114
+
115
+ # Now create the main ReceivedTable model
116
+ class ReceivedTable(BaseModel):
117
+ """Model for defining a table received from an external source."""
118
+ # Metadata fields
119
+ id: Optional[int] = None
120
+ name: Optional[str] = None
121
+ path: str # This can be an absolute or relative path
122
+ directory: Optional[str] = None
123
+ analysis_file_available: bool = False
124
+ status: Optional[str] = None
125
+ fields: List[MinimalFieldInfo] = Field(default_factory=list)
126
+ abs_file_path: Optional[str] = None
127
+
128
+ file_type: Literal['csv', 'json', 'parquet', 'excel']
129
+
130
+ table_settings: InputTableSettings
131
+
132
+ @classmethod
133
+ def create_from_path(cls, path: str, file_type: Literal['csv', 'json', 'parquet', 'excel'] = 'csv'):
134
+ """Creates an instance from a file path string."""
135
+ filename = Path(path).name
136
+
137
+ # Create appropriate table_settings based on file_type
138
+ settings_map = {
139
+ 'csv': InputCsvTable(),
140
+ 'json': InputJsonTable(),
141
+ 'parquet': InputParquetTable(),
142
+ 'excel': InputExcelTable(),
143
+ }
144
+
145
+ return cls(
146
+ name=filename,
147
+ path=path,
148
+ file_type=file_type,
149
+ table_settings=settings_map.get(file_type, InputCsvTable())
150
+ )
151
+
152
+ @property
153
+ def file_path(self) -> str:
154
+ """Constructs the full file path from the directory and name."""
155
+ if self.name and self.name not in self.path:
156
+ return os.path.join(self.path, self.name)
157
+ else:
158
+ return self.path
159
+
160
+ def set_absolute_filepath(self):
161
+ """Resolves the path to an absolute file path."""
162
+ base_path = Path(self.path).expanduser()
163
+ if not base_path.is_absolute():
164
+ base_path = Path.cwd() / base_path
165
+ if self.name and self.name not in base_path.name:
166
+ base_path = base_path / self.name
167
+ self.abs_file_path = str(base_path.resolve())
168
+
169
+ @model_validator(mode='before')
170
+ @classmethod
171
+ def set_default_table_settings(cls, data):
172
+ """Create default table_settings based on file_type if not provided."""
173
+ if isinstance(data, dict):
174
+ if 'table_settings' not in data or data['table_settings'] is None:
175
+ data['table_settings'] = {}
133
176
 
177
+ if isinstance(data['table_settings'], dict) and 'file_type' not in data['table_settings']:
178
+ data['table_settings']['file_type'] = data.get('file_type', 'csv')
179
+ return data
134
180
 
135
- class ReceivedTable(ReceivedExcelTable, ReceivedCsvTable, ReceivedParquetTable):
136
- """A comprehensive model that can represent any type of received table."""
137
- ...
181
+ @model_validator(mode='after')
182
+ def populate_abs_file_path(self):
183
+ """Ensures the absolute file path is populated after validation."""
184
+ if not self.abs_file_path:
185
+ self.set_absolute_filepath()
186
+ return self
138
187
 
139
188
 
140
189
  class OutputCsvTable(BaseModel):
141
190
  """Defines settings for writing a CSV file."""
142
- file_type: str = 'csv'
191
+ file_type: Literal['csv'] = 'csv'
143
192
  delimiter: str = ','
144
193
  encoding: str = 'utf-8'
145
194
 
146
195
 
147
196
  class OutputParquetTable(BaseModel):
148
197
  """Defines settings for writing a Parquet file."""
149
- file_type: str = 'parquet'
198
+ file_type: Literal['parquet'] = 'parquet'
150
199
 
151
200
 
152
201
  class OutputExcelTable(BaseModel):
153
202
  """Defines settings for writing an Excel file."""
154
- file_type: str = 'excel'
203
+ file_type: Literal['excel'] = 'excel'
155
204
  sheet_name: str = 'Sheet1'
156
205
 
157
206
 
207
+ # Create a discriminated union
208
+ OutputTableSettings = Annotated[
209
+ OutputCsvTable | OutputParquetTable | OutputExcelTable,
210
+ Field(discriminator='file_type')
211
+ ]
212
+
213
+
158
214
  class OutputSettings(BaseModel):
159
215
  """Defines the complete settings for an output node."""
160
216
  name: str
161
217
  directory: str
162
- file_type: str
218
+ file_type: str # This drives which table_settings to use
163
219
  fields: Optional[List[str]] = Field(default_factory=list)
164
220
  write_mode: str = 'overwrite'
165
- output_csv_table: Optional[OutputCsvTable] = Field(default_factory=OutputCsvTable)
166
- output_parquet_table: OutputParquetTable = Field(default_factory=OutputParquetTable)
167
- output_excel_table: OutputExcelTable = Field(default_factory=OutputExcelTable)
221
+ table_settings: OutputTableSettings
168
222
  abs_file_path: Optional[str] = None
169
223
 
224
+ def to_yaml_dict(self) -> OutputSettingsYaml:
225
+ """Converts the output settings to a dictionary suitable for YAML serialization."""
226
+ result: OutputSettingsYaml = {
227
+ "name": self.name,
228
+ "directory": self.directory,
229
+ "file_type": self.file_type,
230
+ "write_mode": self.write_mode,
231
+ }
232
+ if self.abs_file_path:
233
+ result["abs_file_path"] = self.abs_file_path
234
+ if self.fields:
235
+ result["fields"] = self.fields
236
+ # Only include table_settings if it has non-default values beyond file_type
237
+ ts_dict = self.table_settings.model_dump(exclude={"file_type"})
238
+ if any(v for v in ts_dict.values()): # Has meaningful settings
239
+ result["table_settings"] = ts_dict
240
+ return result
241
+
242
+ @property
243
+ def sheet_name(self) -> str | None:
244
+ if self.file_type == 'excel':
245
+ return self.table_settings.sheet_name
246
+
247
+ @property
248
+ def delimiter(self) -> str | None:
249
+ if self.file_type == 'csv':
250
+ return self.table_settings.delimiter
251
+
252
+ @field_validator('table_settings', mode='before')
253
+ @classmethod
254
+ def validate_table_settings(cls, v, info: ValidationInfo):
255
+ """Ensures table_settings matches the file_type."""
256
+ if v is None:
257
+ file_type = info.data.get('file_type', 'csv')
258
+ # Create default based on file_type
259
+ match file_type:
260
+ case 'csv':
261
+ return OutputCsvTable()
262
+ case 'parquet':
263
+ return OutputParquetTable()
264
+ case 'excel':
265
+ return OutputExcelTable()
266
+ case _:
267
+ return OutputCsvTable()
268
+
269
+ # If it's a dict, add file_type if missing
270
+ if isinstance(v, dict) and 'file_type' not in v:
271
+ v['file_type'] = info.data.get('file_type', 'csv')
272
+
273
+ return v
274
+
170
275
  def set_absolute_filepath(self):
171
276
  """Resolves the output directory and name into an absolute path."""
172
277
  base_path = Path(self.directory)
@@ -205,7 +310,7 @@ class NodeSingleInput(NodeBase):
205
310
 
206
311
  class NodeMultiInput(NodeBase):
207
312
  """A base model for any node that takes multiple data inputs."""
208
- depending_on_ids: Optional[List[int]] = [-1]
313
+ depending_on_ids: Optional[List[int]] = Field(default_factory=list)
209
314
 
210
315
 
211
316
  class NodeSelect(NodeSingleInput):
@@ -214,6 +319,15 @@ class NodeSelect(NodeSingleInput):
214
319
  select_input: List[transform_schema.SelectInput] = Field(default_factory=list)
215
320
  sorted_by: Optional[Literal['none', 'asc', 'desc']] = 'none'
216
321
 
322
+ def to_yaml_dict(self) -> NodeSelectYaml:
323
+ """Converts the select node settings to a dictionary for YAML serialization."""
324
+ return {
325
+ "cache_results": self.cache_results,
326
+ "keep_missing": self.keep_missing,
327
+ "select_input": [s.to_yaml_dict() for s in self.select_input],
328
+ "sorted_by": self.sorted_by,
329
+ }
330
+
217
331
 
218
332
  class NodeFilter(NodeSingleInput):
219
333
  """Settings for a node that filters rows based on a condition."""
@@ -249,6 +363,18 @@ class NodeJoin(NodeMultiInput):
249
363
  auto_keep_right: bool = True
250
364
  auto_keep_left: bool = True
251
365
 
366
+ def to_yaml_dict(self) -> NodeJoinYaml:
367
+ """Converts the join node settings to a dictionary for YAML serialization."""
368
+ return {
369
+ "cache_results": self.cache_results,
370
+ "auto_generate_selection": self.auto_generate_selection,
371
+ "verify_integrity": self.verify_integrity,
372
+ "join_input": self.join_input.to_yaml_dict(),
373
+ "auto_keep_all": self.auto_keep_all,
374
+ "auto_keep_right": self.auto_keep_right,
375
+ "auto_keep_left": self.auto_keep_left,
376
+ }
377
+
252
378
 
253
379
  class NodeCrossJoin(NodeMultiInput):
254
380
  """Settings for a node that performs a cross join."""
@@ -259,11 +385,35 @@ class NodeCrossJoin(NodeMultiInput):
259
385
  auto_keep_right: bool = True
260
386
  auto_keep_left: bool = True
261
387
 
388
+ def to_yaml_dict(self) -> NodeCrossJoinYaml:
389
+ """Converts the cross join node settings to a dictionary for YAML serialization."""
390
+ return {
391
+ "cache_results": self.cache_results,
392
+ "auto_generate_selection": self.auto_generate_selection,
393
+ "verify_integrity": self.verify_integrity,
394
+ "cross_join_input": self.cross_join_input.to_yaml_dict(),
395
+ "auto_keep_all": self.auto_keep_all,
396
+ "auto_keep_right": self.auto_keep_right,
397
+ "auto_keep_left": self.auto_keep_left,
398
+ }
399
+
262
400
 
263
401
  class NodeFuzzyMatch(NodeJoin):
264
402
  """Settings for a node that performs a fuzzy join based on string similarity."""
265
403
  join_input: transform_schema.FuzzyMatchInput
266
404
 
405
+ def to_yaml_dict(self) -> NodeFuzzyMatchYaml:
406
+ """Converts the fuzzy match node settings to a dictionary for YAML serialization."""
407
+ return {
408
+ "cache_results": self.cache_results,
409
+ "auto_generate_selection": self.auto_generate_selection,
410
+ "verify_integrity": self.verify_integrity,
411
+ "join_input": self.join_input.to_yaml_dict(),
412
+ "auto_keep_all": self.auto_keep_all,
413
+ "auto_keep_right": self.auto_keep_right,
414
+ "auto_keep_left": self.auto_keep_left,
415
+ }
416
+
267
417
 
268
418
  class NodeDatasource(NodeBase):
269
419
  """Base settings for a node that acts as a data source."""
@@ -465,6 +615,13 @@ class NodeOutput(NodeSingleInput):
465
615
  """Settings for a node that writes its input to a file."""
466
616
  output_settings: OutputSettings
467
617
 
618
+ def to_yaml_dict(self) -> NodeOutputYaml:
619
+ """Converts the output node settings to a dictionary for YAML serialization."""
620
+ return {
621
+ "cache_results": self.cache_results,
622
+ "output_settings": self.output_settings.to_yaml_dict(),
623
+ }
624
+
468
625
 
469
626
  class NodeOutputConnection(BaseModel):
470
627
  """Represents the output side of a connection between two nodes."""
@@ -25,7 +25,7 @@ class RunInformation(BaseModel):
25
25
  nodes_completed: int = 0
26
26
  number_of_nodes: int = 0
27
27
  node_step_result: List[NodeResult]
28
- run_type: Literal["fetch_one", "full_run"]
28
+ run_type: Literal["fetch_one", "full_run", "init"]
29
29
 
30
30
 
31
31
  class BaseItem(BaseModel):
@@ -1,13 +1,48 @@
1
- from typing import Optional, List, Dict, Tuple, Any, Literal, Annotated
2
- from pydantic import BaseModel, field_validator, ConfigDict, Field, StringConstraints
1
+ from typing import Optional, List, Dict, Tuple, Any, Literal, ClassVar
2
+ from pydantic import BaseModel, field_validator, ConfigDict, Field, ValidationInfo, field_serializer
3
3
  from flowfile_core.flowfile.utils import create_unique_id
4
4
  from flowfile_core.configs.settings import OFFLOAD_TO_WORKER
5
+ from flowfile_core.schemas import input_schema
5
6
  ExecutionModeLiteral = Literal['Development', 'Performance']
6
7
  ExecutionLocationsLiteral = Literal['local', 'remote']
7
8
 
8
9
  # Type literals for classifying nodes.
9
10
  NodeTypeLiteral = Literal['input', 'output', 'process']
10
11
  TransformTypeLiteral = Literal['narrow', 'wide', 'other']
12
+ _custom_node_store_cache = None
13
+
14
+ NODE_TYPE_TO_SETTINGS_CLASS = {
15
+ 'manual_input': input_schema.NodeManualInput,
16
+ 'filter': input_schema.NodeFilter,
17
+ 'formula': input_schema.NodeFormula,
18
+ 'select': input_schema.NodeSelect,
19
+ 'sort': input_schema.NodeSort,
20
+ 'record_id': input_schema.NodeRecordId,
21
+ 'sample': input_schema.NodeSample,
22
+ 'unique': input_schema.NodeUnique,
23
+ 'group_by': input_schema.NodeGroupBy,
24
+ 'pivot': input_schema.NodePivot,
25
+ 'unpivot': input_schema.NodeUnpivot,
26
+ 'text_to_rows': input_schema.NodeTextToRows,
27
+ 'graph_solver': input_schema.NodeGraphSolver,
28
+ 'polars_code': input_schema.NodePolarsCode,
29
+ 'join': input_schema.NodeJoin,
30
+ 'cross_join': input_schema.NodeCrossJoin,
31
+ 'fuzzy_match': input_schema.NodeFuzzyMatch,
32
+ 'record_count': input_schema.NodeRecordCount,
33
+ 'explore_data': input_schema.NodeExploreData,
34
+ 'union': input_schema.NodeUnion,
35
+ 'output': input_schema.NodeOutput,
36
+ 'read': input_schema.NodeRead,
37
+ 'database_reader': input_schema.NodeDatabaseReader,
38
+ 'database_writer': input_schema.NodeDatabaseWriter,
39
+ 'cloud_storage_reader': input_schema.NodeCloudStorageReader,
40
+ 'cloud_storage_writer': input_schema.NodeCloudStorageWriter,
41
+ 'external_source': input_schema.NodeExternalSource,
42
+ 'promise': input_schema.NodePromise,
43
+ 'user_defined': input_schema.UserDefinedNode,
44
+ }
45
+
11
46
 
12
47
  def get_global_execution_location() -> ExecutionLocationsLiteral:
13
48
  """
@@ -21,6 +56,25 @@ def get_global_execution_location() -> ExecutionLocationsLiteral:
21
56
  return "local"
22
57
 
23
58
 
59
+ def _get_custom_node_store():
60
+ """Lazy load CUSTOM_NODE_STORE once and cache it."""
61
+ global _custom_node_store_cache
62
+ if _custom_node_store_cache is None:
63
+ from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
64
+ _custom_node_store_cache = CUSTOM_NODE_STORE
65
+ return _custom_node_store_cache
66
+
67
+
68
+ def get_settings_class_for_node_type(node_type: str):
69
+ """Get the settings class for a node type, supporting both standard and user-defined nodes."""
70
+ model_class = NODE_TYPE_TO_SETTINGS_CLASS.get(node_type)
71
+ if model_class is None:
72
+ if node_type in _get_custom_node_store():
73
+ return input_schema.UserDefinedNode
74
+ return None
75
+ return model_class
76
+
77
+
24
78
  def is_valid_execution_location_in_current_global_settings(execution_location: ExecutionLocationsLiteral) -> bool:
25
79
  return not (get_global_execution_location() == "local" and execution_location == "remote")
26
80
 
@@ -117,6 +171,60 @@ class RawLogInput(BaseModel):
117
171
  extra: Optional[dict] = None
118
172
 
119
173
 
174
+ class FlowfileSettings(BaseModel):
175
+ """Settings for flowfile serialization (YAML/JSON).
176
+
177
+ Excludes runtime state fields like is_running, is_canceled, modified_on.
178
+ """
179
+ description: Optional[str] = None
180
+ execution_mode: ExecutionModeLiteral = 'Performance'
181
+ execution_location: ExecutionLocationsLiteral = 'local'
182
+ auto_save: bool = False
183
+ show_detailed_progress: bool = True
184
+
185
+
186
+ class FlowfileNode(BaseModel):
187
+ """Node representation for flowfile serialization (YAML/JSON)."""
188
+ id: int
189
+ type: str
190
+ is_start_node: bool = False
191
+ description: Optional[str] = ''
192
+ x_position: Optional[int] = 0
193
+ y_position: Optional[int] = 0
194
+ left_input_id: Optional[int] = None
195
+ right_input_id: Optional[int] = None
196
+ input_ids: Optional[List[int]] = Field(default_factory=list)
197
+ outputs: Optional[List[int]] = Field(default_factory=list)
198
+ setting_input: Optional[Any] = None
199
+
200
+ _setting_input_exclude: ClassVar[set] = {
201
+ 'flow_id', 'node_id', 'pos_x', 'pos_y', 'is_setup',
202
+ 'description', 'user_id', 'is_flow_output', 'is_user_defined',
203
+ 'depending_on_id', 'depending_on_ids'
204
+ }
205
+
206
+ @field_serializer('setting_input')
207
+ def serialize_setting_input(self, value, _info):
208
+ if value is None:
209
+ return None
210
+ if isinstance(value, input_schema.NodePromise):
211
+ return None
212
+ if hasattr(value, 'to_yaml_dict'):
213
+ return value.to_yaml_dict()
214
+ if hasattr(value, 'to_yaml_dict'):
215
+ return value.to_yaml_dict()
216
+ return value.model_dump(exclude=self._setting_input_exclude)
217
+
218
+
219
+ class FlowfileData(BaseModel):
220
+ """Root model for flowfile serialization (YAML/JSON)."""
221
+ flowfile_version: str
222
+ flowfile_id: int
223
+ flowfile_name: str
224
+ flowfile_settings: FlowfileSettings
225
+ nodes: List[FlowfileNode]
226
+
227
+
120
228
  class NodeTemplate(BaseModel):
121
229
  """
122
230
  Defines the template for a node type, specifying its UI and functional characteristics.
@@ -151,48 +259,47 @@ class NodeTemplate(BaseModel):
151
259
  class NodeInformation(BaseModel):
152
260
  """
153
261
  Stores the state and configuration of a specific node instance within a flow.
154
-
155
- Attributes:
156
- id (Optional[int]): The unique ID of the node instance.
157
- type (Optional[str]): The type of the node (e.g., 'join', 'filter').
158
- is_setup (Optional[bool]): Whether the node has been configured.
159
- description (Optional[str]): A user-provided description.
160
- x_position (Optional[int]): The x-coordinate on the canvas.
161
- y_position (Optional[int]): The y-coordinate on the canvas.
162
- left_input_id (Optional[int]): The ID of the node connected to the left input.
163
- right_input_id (Optional[int]): The ID of the node connected to the right input.
164
- input_ids (Optional[List[int]]): A list of IDs for main input nodes.
165
- outputs (Optional[List[int]]): A list of IDs for nodes this node outputs to.
166
- setting_input (Optional[Any]): The specific settings for this node instance.
167
262
  """
168
263
  id: Optional[int] = None
169
264
  type: Optional[str] = None
170
265
  is_setup: Optional[bool] = None
266
+ is_start_node: bool = False
171
267
  description: Optional[str] = ''
172
268
  x_position: Optional[int] = 0
173
269
  y_position: Optional[int] = 0
174
270
  left_input_id: Optional[int] = None
175
271
  right_input_id: Optional[int] = None
176
- input_ids: Optional[List[int]] = [-1]
177
- outputs: Optional[List[int]] = [-1]
272
+ input_ids: Optional[List[int]] = Field(default_factory=list)
273
+ outputs: Optional[List[int]] = Field(default_factory=list)
178
274
  setting_input: Optional[Any] = None
179
275
 
180
276
  @property
181
277
  def data(self) -> Any:
182
- """
183
- Property to access the node's specific settings.
184
- :return: The settings of the node.
185
- """
186
278
  return self.setting_input
187
279
 
188
280
  @property
189
281
  def main_input_ids(self) -> Optional[List[int]]:
190
- """
191
- Property to access the main input node IDs.
192
- :return: A list of main input node IDs.
193
- """
194
282
  return self.input_ids
195
283
 
284
+ @field_validator('setting_input', mode='before')
285
+ @classmethod
286
+ def validate_setting_input(cls, v, info: ValidationInfo):
287
+ if v is None:
288
+ return None
289
+ if isinstance(v, BaseModel):
290
+ return v
291
+
292
+ node_type = info.data.get('type')
293
+ model_class = get_settings_class_for_node_type(node_type)
294
+
295
+ if model_class is None:
296
+ raise ValueError(f"Unknown node type: {node_type}")
297
+
298
+ if isinstance(v, model_class):
299
+ return v
300
+
301
+ return model_class.model_validate(v)
302
+
196
303
 
197
304
  class FlowInformation(BaseModel):
198
305
  """
@@ -223,6 +330,19 @@ class FlowInformation(BaseModel):
223
330
  return str(v) if v is not None else ''
224
331
 
225
332
 
333
+ class NodeConnection(BaseModel):
334
+ """
335
+ Represents a connection between two nodes in the flow.
336
+
337
+ Attributes:
338
+ from_node_id (int): The ID of the source node.
339
+ to_node_id (int): The ID of the target node.
340
+ """
341
+ model_config = ConfigDict(frozen=True)
342
+ from_node_id: int
343
+ to_node_id: int
344
+
345
+
226
346
  class NodeInput(NodeTemplate):
227
347
  """
228
348
  Represents a node as it is received from the frontend, including position.
@@ -269,8 +389,6 @@ class VueFlowInput(BaseModel):
269
389
  node_inputs: List[NodeInput]
270
390
 
271
391
 
272
-
273
-
274
392
  class NodeDefault(BaseModel):
275
393
  """
276
394
  Defines default properties for a node type.
@@ -285,8 +403,3 @@ class NodeDefault(BaseModel):
285
403
  node_type: NodeTypeLiteral
286
404
  transform_type: TransformTypeLiteral
287
405
  has_default_settings: Optional[Any] = None
288
-
289
-
290
- # Define SecretRef here if not in a common location
291
- SecretRef = Annotated[str, StringConstraints(min_length=1, max_length=100),
292
- Field(description="An ID referencing an encrypted secret.")]