Flowfile 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +3 -1
- flowfile/api.py +1 -2
- flowfile/web/static/assets/{CloudConnectionManager-d3248f8d.js → CloudConnectionManager-0dfba9f2.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-d65bf041.js → CloudStorageReader-d5b1b6c9.js} +6 -6
- flowfile/web/static/assets/{CloudStorageWriter-e83be3ed.js → CloudStorageWriter-00d87aad.js} +6 -6
- flowfile/web/static/assets/{ColumnSelector-cce661cf.js → ColumnSelector-4685e75d.js} +1 -1
- flowfile/web/static/assets/{ContextMenu-cf18d2cc.js → ContextMenu-23e909da.js} +1 -1
- flowfile/web/static/assets/{ContextMenu-160afb08.js → ContextMenu-70ae0c79.js} +1 -1
- flowfile/web/static/assets/{ContextMenu-11a4652a.js → ContextMenu-f149cf7c.js} +1 -1
- flowfile/web/static/assets/{CrossJoin-d395d38c.js → CrossJoin-702a3edd.js} +7 -7
- flowfile/web/static/assets/{CustomNode-b812dc0b.js → CustomNode-b1519993.js} +11 -11
- flowfile/web/static/assets/{DatabaseConnectionSettings-7000bf2c.js → DatabaseConnectionSettings-6f3e4ea5.js} +2 -2
- flowfile/web/static/assets/{DatabaseManager-9662ec5b.js → DatabaseManager-cf5ef661.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-4f035d0c.js → DatabaseReader-d38c7295.js} +9 -9
- flowfile/web/static/assets/{DatabaseWriter-f65dcd54.js → DatabaseWriter-b04ef46a.js} +8 -8
- flowfile/web/static/assets/{ExploreData-94c43dfc.js → ExploreData-5fa10ed8.js} +5 -5
- flowfile/web/static/assets/{ExternalSource-ac04b3cc.js → ExternalSource-d39af878.js} +5 -5
- flowfile/web/static/assets/{Filter-812dcbca.js → Filter-9b6d08db.js} +7 -7
- flowfile/web/static/assets/{Formula-71472193.js → Formula-6b04fb1d.js} +7 -7
- flowfile/web/static/assets/{FuzzyMatch-b317f631.js → FuzzyMatch-999521f4.js} +8 -8
- flowfile/web/static/assets/{GraphSolver-754a234f.js → GraphSolver-17dd2198.js} +6 -6
- flowfile/web/static/assets/{GroupBy-6c6f9802.js → GroupBy-6b039e18.js} +5 -5
- flowfile/web/static/assets/{Join-a1b800be.js → Join-24d0f113.js} +8 -8
- flowfile/web/static/assets/{ManualInput-a9640276.js → ManualInput-34639209.js} +4 -4
- flowfile/web/static/assets/{MultiSelect-97213888.js → MultiSelect-0e8724a3.js} +2 -2
- flowfile/web/static/assets/{MultiSelect.vue_vue_type_script_setup_true_lang-6ffe088a.js → MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js} +1 -1
- flowfile/web/static/assets/{NumericInput-e638088a.js → NumericInput-3d63a470.js} +2 -2
- flowfile/web/static/assets/{NumericInput.vue_vue_type_script_setup_true_lang-90eb2cba.js → NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js} +1 -1
- flowfile/web/static/assets/{Output-ddc9079f.css → Output-283fe388.css} +5 -5
- flowfile/web/static/assets/{Output-76750610.js → Output-edea9802.js} +57 -38
- flowfile/web/static/assets/{Pivot-7814803f.js → Pivot-61d19301.js} +7 -7
- flowfile/web/static/assets/{PivotValidation-f92137d2.js → PivotValidation-de9f43fe.js} +1 -1
- flowfile/web/static/assets/{PivotValidation-76dd431a.js → PivotValidation-f97fec5b.js} +1 -1
- flowfile/web/static/assets/{PolarsCode-889c3008.js → PolarsCode-bc3c9984.js} +5 -5
- flowfile/web/static/assets/{Read-637b72a7.js → Read-64a3f259.js} +80 -105
- flowfile/web/static/assets/{Read-6b17491f.css → Read-e808b239.css} +10 -10
- flowfile/web/static/assets/{RecordCount-2b050c41.js → RecordCount-3d5039be.js} +4 -4
- flowfile/web/static/assets/{RecordId-81df7784.js → RecordId-597510e0.js} +6 -6
- flowfile/web/static/assets/{SQLQueryComponent-88dcfe53.js → SQLQueryComponent-df51adbe.js} +1 -1
- flowfile/web/static/assets/{Sample-258ad2a9.js → Sample-4be0a507.js} +4 -4
- flowfile/web/static/assets/{SecretManager-2a2cb7e2.js → SecretManager-4839be57.js} +2 -2
- flowfile/web/static/assets/{Select-850215fd.js → Select-9b72f201.js} +7 -7
- flowfile/web/static/assets/{SettingsSection-29b4fa6b.js → SettingsSection-7ded385d.js} +1 -1
- flowfile/web/static/assets/{SettingsSection-0e8d9123.js → SettingsSection-e1e9c953.js} +1 -1
- flowfile/web/static/assets/{SettingsSection-55bae608.js → SettingsSection-f0f75a42.js} +1 -1
- flowfile/web/static/assets/{SingleSelect-bebd408b.js → SingleSelect-6c777aac.js} +2 -2
- flowfile/web/static/assets/{SingleSelect.vue_vue_type_script_setup_true_lang-6093741c.js → SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js} +1 -1
- flowfile/web/static/assets/{SliderInput-6a05ab61.js → SliderInput-7cb93e62.js} +1 -1
- flowfile/web/static/assets/{Sort-10ab48ed.js → Sort-6cbde21a.js} +5 -5
- flowfile/web/static/assets/{TextInput-df9d6259.js → TextInput-d9a40c11.js} +2 -2
- flowfile/web/static/assets/{TextInput.vue_vue_type_script_setup_true_lang-000e1178.js → TextInput.vue_vue_type_script_setup_true_lang-5896c375.js} +1 -1
- flowfile/web/static/assets/{TextToRows-6c2d93d8.js → TextToRows-c4fcbf4d.js} +7 -7
- flowfile/web/static/assets/{ToggleSwitch-0ff7ac52.js → ToggleSwitch-4ef91d19.js} +2 -2
- flowfile/web/static/assets/{ToggleSwitch.vue_vue_type_script_setup_true_lang-c6dc3029.js → ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js} +1 -1
- flowfile/web/static/assets/{UnavailableFields-1bab97cb.js → UnavailableFields-a03f512c.js} +2 -2
- flowfile/web/static/assets/{Union-b563478a.js → Union-bfe9b996.js} +4 -4
- flowfile/web/static/assets/{Unique-f90db5db.js → Unique-5d023a27.js} +8 -20
- flowfile/web/static/assets/{Unpivot-bcb0025f.js → Unpivot-91cc5354.js} +6 -6
- flowfile/web/static/assets/{UnpivotValidation-c4e73b04.js → UnpivotValidation-7ee2de44.js} +1 -1
- flowfile/web/static/assets/{VueGraphicWalker-bb8535e2.js → VueGraphicWalker-e51b9924.js} +1 -1
- flowfile/web/static/assets/{api-2d6adc4f.js → api-c1bad5ca.js} +1 -1
- flowfile/web/static/assets/{api-4c8e3822.js → api-cf1221f0.js} +1 -1
- flowfile/web/static/assets/{designer-e3c150ec.css → designer-8da3ba3a.css} +90 -67
- flowfile/web/static/assets/{designer-f3656d8c.js → designer-9633482a.js} +119 -51
- flowfile/web/static/assets/{documentation-52b241e7.js → documentation-ca400224.js} +1 -1
- flowfile/web/static/assets/{dropDown-1bca8a74.js → dropDown-614b998d.js} +1 -1
- flowfile/web/static/assets/{fullEditor-2985687e.js → fullEditor-f7971590.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-0476ba4e.js → genericNodeSettings-4fe5f36b.js} +3 -3
- flowfile/web/static/assets/{index-246f201c.js → index-5429bbf8.js} +6 -8
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
- flowfile/web/static/assets/{outputCsv-d686eeaf.js → outputCsv-076b85ab.js} +1 -1
- flowfile/web/static/assets/{outputExcel-8809ea2f.js → outputExcel-0fd17dbe.js} +1 -1
- flowfile/web/static/assets/{outputParquet-53ba645a.js → outputParquet-b61e0847.js} +1 -1
- flowfile/web/static/assets/{readCsv-053bf97b.js → readCsv-a8bb8b61.js} +21 -20
- flowfile/web/static/assets/{readCsv-bca3ed53.css → readCsv-c767cb37.css} +13 -13
- flowfile/web/static/assets/{readExcel-ad531eab.js → readExcel-67b4aee0.js} +10 -12
- flowfile/web/static/assets/{readExcel-e1b381ea.css → readExcel-806d2826.css} +12 -12
- flowfile/web/static/assets/{readParquet-cee068e2.css → readParquet-48c81530.css} +3 -3
- flowfile/web/static/assets/{readParquet-58e899a1.js → readParquet-92ce1dbc.js} +4 -7
- flowfile/web/static/assets/{secretApi-538058f3.js → secretApi-68435402.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-b38de2ba.js → selectDynamic-92e25ee3.js} +3 -3
- flowfile/web/static/assets/{vue-codemirror.esm-db9b8936.js → vue-codemirror.esm-41b0e0d7.js} +7 -4
- flowfile/web/static/assets/{vue-content-loader.es-b5f3ac30.js → vue-content-loader.es-2c8e608f.js} +1 -1
- flowfile/web/static/index.html +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/METADATA +3 -2
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/RECORD +138 -126
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
- flowfile_core/flowfile/code_generator/code_generator.py +62 -64
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
- flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +184 -78
- flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
- flowfile_core/flowfile/flow_graph.py +129 -26
- flowfile_core/flowfile/flow_node/flow_node.py +3 -0
- flowfile_core/flowfile/flow_node/models.py +2 -1
- flowfile_core/flowfile/handler.py +5 -5
- flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
- flowfile_core/flowfile/manage/io_flowfile.py +394 -0
- flowfile_core/flowfile/node_designer/__init__.py +1 -1
- flowfile_core/flowfile/node_designer/_type_registry.py +2 -2
- flowfile_core/flowfile/node_designer/custom_node.py +1 -1
- flowfile_core/flowfile/node_designer/ui_components.py +1 -1
- flowfile_core/flowfile/schema_callbacks.py +8 -5
- flowfile_core/flowfile/setting_generator/settings.py +15 -9
- flowfile_core/routes/routes.py +8 -10
- flowfile_core/schemas/cloud_storage_schemas.py +0 -2
- flowfile_core/schemas/input_schema.py +222 -65
- flowfile_core/schemas/output_model.py +1 -1
- flowfile_core/schemas/schemas.py +145 -32
- flowfile_core/schemas/transform_schema.py +1083 -413
- flowfile_core/schemas/yaml_types.py +103 -0
- flowfile_core/{flowfile/node_designer/data_types.py → types.py} +11 -1
- flowfile_frame/__init__.py +3 -1
- flowfile_frame/flow_frame.py +15 -18
- flowfile_frame/flow_frame_methods.py +12 -9
- flowfile_worker/__init__.py +3 -0
- flowfile_worker/create/__init__.py +3 -21
- flowfile_worker/create/funcs.py +68 -56
- flowfile_worker/create/models.py +130 -62
- flowfile_worker/routes.py +5 -8
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +131 -0
- tools/migrate/legacy_schemas.py +621 -0
- tools/migrate/migrate.py +598 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +23 -0
- tools/migrate/tests/test_migrate.py +627 -0
- tools/migrate/tests/test_migration_e2e.py +1010 -0
- tools/migrate/tests/test_node_migrations.py +813 -0
- flowfile_core/flowfile/manage/open_flowfile.py +0 -143
- {flowfile-0.4.1.dist-info → flowfile-0.5.1.dist-info}/licenses/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
flowfile_worker/create/models.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from pydantic import BaseModel, Field, model_validator
|
|
2
|
-
from typing import List, Optional
|
|
1
|
+
from pydantic import BaseModel, Field, model_validator, field_validator
|
|
2
|
+
from typing import List, Optional, Literal, Annotated
|
|
3
3
|
import os
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
@@ -9,78 +9,146 @@ class MinimalFieldInfo(BaseModel):
|
|
|
9
9
|
data_type: str
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
class
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
path: str
|
|
16
|
-
directory: Optional[str] = None
|
|
17
|
-
analysis_file_available: Optional[bool] = False
|
|
18
|
-
status: Optional[str] = None
|
|
19
|
-
file_type: Optional[str] = None
|
|
20
|
-
fields: List[MinimalFieldInfo] = Field(default_factory=list)
|
|
21
|
-
abs_file_path: Optional[str] = None
|
|
12
|
+
class InputTableBase(BaseModel):
|
|
13
|
+
"""Base settings for input file operations."""
|
|
14
|
+
file_type: str # Will be overridden with Literal in subclasses
|
|
22
15
|
|
|
23
|
-
@classmethod
|
|
24
|
-
def create_from_path(cls, path: str):
|
|
25
|
-
filename = os.path.basename(path)
|
|
26
|
-
return cls(name=filename, path=path)
|
|
27
16
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
abs_file_path = getattr(values, "abs_file_path", None)
|
|
37
|
-
if abs_file_path is None:
|
|
38
|
-
path = getattr(values, "path", None)
|
|
39
|
-
if not path:
|
|
40
|
-
raise ValueError("Field 'path' is required to compute abs_file_path")
|
|
41
|
-
setattr(values, "abs_file_path", str(Path(path).absolute()))
|
|
42
|
-
return values
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class ReceivedCsvTable(ReceivedTableBase):
|
|
46
|
-
file_type: Optional[str] = 'csv'
|
|
47
|
-
reference: Optional[str] = ''
|
|
48
|
-
starting_from_line: Optional[int] = 0
|
|
49
|
-
delimiter: Optional[str] = ','
|
|
50
|
-
has_headers: Optional[bool] = True
|
|
51
|
-
encoding: Optional[str] = 'utf-8'
|
|
17
|
+
class InputCsvTable(InputTableBase):
|
|
18
|
+
"""Defines settings for reading a CSV file."""
|
|
19
|
+
file_type: Literal['csv'] = 'csv'
|
|
20
|
+
reference: str = ''
|
|
21
|
+
starting_from_line: int = 0
|
|
22
|
+
delimiter: str = ','
|
|
23
|
+
has_headers: bool = True
|
|
24
|
+
encoding: str = 'utf-8'
|
|
52
25
|
parquet_ref: Optional[str] = None
|
|
53
|
-
row_delimiter:
|
|
54
|
-
quote_char:
|
|
55
|
-
infer_schema_length:
|
|
56
|
-
truncate_ragged_lines:
|
|
57
|
-
ignore_errors:
|
|
26
|
+
row_delimiter: str = '\n'
|
|
27
|
+
quote_char: str = '"'
|
|
28
|
+
infer_schema_length: int = 10_000
|
|
29
|
+
truncate_ragged_lines: bool = False
|
|
30
|
+
ignore_errors: bool = False
|
|
58
31
|
|
|
59
32
|
|
|
60
|
-
class
|
|
61
|
-
|
|
33
|
+
class InputJsonTable(InputCsvTable):
|
|
34
|
+
"""Defines settings for reading a JSON file."""
|
|
35
|
+
file_type: Literal['json'] = 'json'
|
|
62
36
|
|
|
63
37
|
|
|
64
|
-
class
|
|
65
|
-
|
|
38
|
+
class InputParquetTable(InputTableBase):
|
|
39
|
+
"""Defines settings for reading a Parquet file."""
|
|
40
|
+
file_type: Literal['parquet'] = 'parquet'
|
|
66
41
|
|
|
67
42
|
|
|
68
|
-
class
|
|
43
|
+
class InputExcelTable(InputTableBase):
|
|
44
|
+
"""Defines settings for reading an Excel file."""
|
|
45
|
+
file_type: Literal['excel'] = 'excel'
|
|
69
46
|
sheet_name: Optional[str] = None
|
|
70
|
-
start_row:
|
|
71
|
-
start_column:
|
|
72
|
-
end_row:
|
|
73
|
-
end_column:
|
|
74
|
-
has_headers:
|
|
75
|
-
type_inference:
|
|
76
|
-
|
|
47
|
+
start_row: int = 0
|
|
48
|
+
start_column: int = 0
|
|
49
|
+
end_row: int = 0
|
|
50
|
+
end_column: int = 0
|
|
51
|
+
has_headers: bool = True
|
|
52
|
+
type_inference: bool = False
|
|
53
|
+
|
|
54
|
+
@model_validator(mode='after')
|
|
77
55
|
def validate_range_values(self):
|
|
78
|
-
|
|
56
|
+
"""Validates that the Excel cell range is logical."""
|
|
79
57
|
for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
|
|
80
58
|
if not isinstance(attribute, int) or attribute < 0:
|
|
81
59
|
raise ValueError("Row and column indices must be non-negative integers")
|
|
60
|
+
if (self.end_row > 0 and self.start_row > self.end_row) or \
|
|
61
|
+
(self.end_column > 0 and self.start_column > self.end_column):
|
|
62
|
+
raise ValueError("Start row/column must not be greater than end row/column")
|
|
63
|
+
return self
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# Create the discriminated union (similar to OutputTableSettings)
|
|
67
|
+
InputTableSettings = Annotated[
|
|
68
|
+
InputCsvTable | InputJsonTable | InputParquetTable | InputExcelTable,
|
|
69
|
+
Field(discriminator='file_type')
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Now create the main ReceivedTable model
|
|
74
|
+
class ReceivedTable(BaseModel):
|
|
75
|
+
"""Model for defining a table received from an external source."""
|
|
76
|
+
# Metadata fields
|
|
77
|
+
id: Optional[int] = None
|
|
78
|
+
name: Optional[str] = None
|
|
79
|
+
path: str # This can be an absolute or relative path
|
|
80
|
+
directory: Optional[str] = None
|
|
81
|
+
analysis_file_available: bool = False
|
|
82
|
+
status: Optional[str] = None
|
|
83
|
+
fields: List[MinimalFieldInfo] = Field(default_factory=list)
|
|
84
|
+
abs_file_path: Optional[str] = None
|
|
85
|
+
|
|
86
|
+
file_type: Literal['csv', 'json', 'parquet', 'excel']
|
|
87
|
+
|
|
88
|
+
table_settings: InputTableSettings
|
|
89
|
+
|
|
90
|
+
@classmethod
|
|
91
|
+
def create_from_path(cls, path: str, file_type: Literal['csv', 'json', 'parquet', 'excel'] = 'csv'):
|
|
92
|
+
"""Creates an instance from a file path string."""
|
|
93
|
+
filename = Path(path).name
|
|
94
|
+
|
|
95
|
+
# Create appropriate table_settings based on file_type
|
|
96
|
+
settings_map = {
|
|
97
|
+
'csv': InputCsvTable(),
|
|
98
|
+
'json': InputJsonTable(),
|
|
99
|
+
'parquet': InputParquetTable(),
|
|
100
|
+
'excel': InputExcelTable(),
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
return cls(
|
|
104
|
+
name=filename,
|
|
105
|
+
path=path,
|
|
106
|
+
file_type=file_type,
|
|
107
|
+
table_settings=settings_map.get(file_type, InputCsvTable())
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
@property
|
|
111
|
+
def file_path(self) -> str:
|
|
112
|
+
"""Constructs the full file path from the directory and name."""
|
|
113
|
+
if self.name and self.name not in self.path:
|
|
114
|
+
return os.path.join(self.path, self.name)
|
|
115
|
+
else:
|
|
116
|
+
return self.path
|
|
117
|
+
|
|
118
|
+
def set_absolute_filepath(self):
|
|
119
|
+
"""Resolves the path to an absolute file path."""
|
|
120
|
+
base_path = Path(self.path).expanduser()
|
|
121
|
+
if not base_path.is_absolute():
|
|
122
|
+
base_path = Path.cwd() / base_path
|
|
123
|
+
if self.name and self.name not in base_path.name:
|
|
124
|
+
base_path = base_path / self.name
|
|
125
|
+
self.abs_file_path = str(base_path.resolve())
|
|
126
|
+
|
|
127
|
+
@field_validator('table_settings', mode='before')
|
|
128
|
+
@classmethod
|
|
129
|
+
def validate_table_settings(cls, v, info):
|
|
130
|
+
"""Ensures table_settings matches the file_type."""
|
|
131
|
+
if v is None:
|
|
132
|
+
file_type = info.data.get('file_type', 'csv')
|
|
133
|
+
# Create default based on file_type
|
|
134
|
+
settings_map = {
|
|
135
|
+
'csv': InputCsvTable(),
|
|
136
|
+
'json': InputJsonTable(),
|
|
137
|
+
'parquet': InputParquetTable(),
|
|
138
|
+
'excel': InputExcelTable(),
|
|
139
|
+
}
|
|
140
|
+
return settings_map.get(file_type, InputCsvTable())
|
|
141
|
+
|
|
142
|
+
# If it's a dict, add file_type if missing
|
|
143
|
+
if isinstance(v, dict) and 'file_type' not in v:
|
|
144
|
+
v['file_type'] = info.data.get('file_type', 'csv')
|
|
145
|
+
|
|
146
|
+
return v
|
|
147
|
+
|
|
148
|
+
@model_validator(mode='after')
|
|
149
|
+
def populate_abs_file_path(self):
|
|
150
|
+
"""Ensures the absolute file path is populated after validation."""
|
|
151
|
+
if not self.abs_file_path:
|
|
152
|
+
self.set_absolute_filepath()
|
|
153
|
+
return self
|
|
82
154
|
|
|
83
|
-
# Validate that start is before end if end is specified (non-zero)
|
|
84
|
-
if (0 < self.end_row < self.start_row) or \
|
|
85
|
-
(0 < self.end_column < self.start_column):
|
|
86
|
-
raise ValueError("Start row/column must not be greater than end row/column if specified")
|
flowfile_worker/routes.py
CHANGED
|
@@ -8,11 +8,11 @@ from base64 import encodebytes
|
|
|
8
8
|
from flowfile_worker import status_dict, CACHE_DIR, PROCESS_MEMORY_USAGE, status_dict_lock
|
|
9
9
|
from flowfile_worker import models
|
|
10
10
|
from flowfile_worker.spawner import start_process, start_fuzzy_process, start_generic_process, process_manager
|
|
11
|
-
from flowfile_worker.create import table_creator_factory_method,
|
|
11
|
+
from flowfile_worker.create import table_creator_factory_method, FileType
|
|
12
12
|
from flowfile_worker.configs import logger
|
|
13
13
|
from flowfile_worker.external_sources.sql_source.models import DatabaseReadSettings
|
|
14
|
-
from flowfile_worker.external_sources.sql_source.main import read_sql_source
|
|
15
|
-
|
|
14
|
+
from flowfile_worker.external_sources.sql_source.main import read_sql_source
|
|
15
|
+
from flowfile_worker.create.models import ReceivedTable
|
|
16
16
|
|
|
17
17
|
router = APIRouter()
|
|
18
18
|
|
|
@@ -237,7 +237,7 @@ def store_sql_db_result(database_read_settings: DatabaseReadSettings, background
|
|
|
237
237
|
|
|
238
238
|
|
|
239
239
|
@router.post('/create_table/{file_type}')
|
|
240
|
-
def create_table(file_type: FileType, received_table:
|
|
240
|
+
def create_table(file_type: FileType, received_table: ReceivedTable, background_tasks: BackgroundTasks,
|
|
241
241
|
flowfile_flow_id: int = 1, flowfile_node_id: int | str = -1) -> models.Status:
|
|
242
242
|
"""
|
|
243
243
|
Create a Polars table from received dictionary data based on specified file type.
|
|
@@ -253,18 +253,15 @@ def create_table(file_type: FileType, received_table: Dict, background_tasks: Ba
|
|
|
253
253
|
models.Status: Status object tracking the table creation
|
|
254
254
|
"""
|
|
255
255
|
logger.info(f"Creating table of type: {file_type}")
|
|
256
|
-
|
|
257
256
|
try:
|
|
258
257
|
task_id = str(uuid.uuid4())
|
|
259
258
|
file_ref = os.path.join(create_and_get_default_cache_dir(flowfile_flow_id), f"{task_id}.arrow")
|
|
260
|
-
|
|
261
259
|
status = models.Status(background_task_id=task_id, status="Starting", file_ref=file_ref,
|
|
262
260
|
result_type="polars")
|
|
263
261
|
status_dict[task_id] = status
|
|
264
262
|
func_ref = table_creator_factory_method(file_type)
|
|
265
|
-
received_table_parsed = received_table_parser(received_table, file_type)
|
|
266
263
|
background_tasks.add_task(start_generic_process, func_ref=func_ref, file_ref=file_ref,
|
|
267
|
-
task_id=task_id, kwargs={'received_table':
|
|
264
|
+
task_id=task_id, kwargs={'received_table': received_table},
|
|
268
265
|
flowfile_flow_id=flowfile_flow_id,
|
|
269
266
|
flowfile_node_id=flowfile_node_id)
|
|
270
267
|
logger.info(f"Started table creation task: {task_id}")
|
tools/migrate/README.md
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Flowfile Migration Tool
|
|
2
|
+
|
|
3
|
+
Migrates `.flowfile` (pickle format, used in v0.4.1 and earlier) to YAML (v0.5+).
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install pyyaml # Required for YAML output
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Single file
|
|
15
|
+
python -m tools.migrate path/to/flow.flowfile
|
|
16
|
+
|
|
17
|
+
# Directory (recursive)
|
|
18
|
+
python -m tools.migrate ./flows/
|
|
19
|
+
|
|
20
|
+
# Specify output path
|
|
21
|
+
python -m tools.migrate flow.flowfile -o /output/path/flow.yaml
|
|
22
|
+
|
|
23
|
+
# Output as JSON
|
|
24
|
+
python -m tools.migrate flow.flowfile --format json
|
|
25
|
+
|
|
26
|
+
# Dry run
|
|
27
|
+
python -m tools.migrate ./flows/ --dry-run
|
|
28
|
+
|
|
29
|
+
# Verbose (show tracebacks)
|
|
30
|
+
python -m tools.migrate flow.flowfile -v
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Output Structure
|
|
34
|
+
|
|
35
|
+
```yaml
|
|
36
|
+
_version: '2.0'
|
|
37
|
+
_migrated_from: pickle
|
|
38
|
+
flow_id: 1
|
|
39
|
+
flow_name: my_analysis
|
|
40
|
+
flow_settings:
|
|
41
|
+
name: my_analysis
|
|
42
|
+
description: null
|
|
43
|
+
execution_mode: Development
|
|
44
|
+
nodes:
|
|
45
|
+
- id: 1
|
|
46
|
+
type: read
|
|
47
|
+
position: {x: 100, y: 200}
|
|
48
|
+
settings:
|
|
49
|
+
received_file:
|
|
50
|
+
path: data/input.csv
|
|
51
|
+
file_type: csv
|
|
52
|
+
connections:
|
|
53
|
+
- [1, 2]
|
|
54
|
+
node_starts:
|
|
55
|
+
- 1
|
|
56
|
+
```
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Flowfile Migration Tool
|
|
3
|
+
|
|
4
|
+
Converts old pickle-based .flowfile format to new YAML format.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python -m tools.migrate <path>
|
|
8
|
+
python -m tools.migrate old_flow.flowfile
|
|
9
|
+
python -m tools.migrate ./flows/ # migrate entire directory
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
__version__ = "1.0.0"
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
Flowfile Migration Tool - CLI Entry Point
|
|
4
|
+
|
|
5
|
+
Converts old pickle-based .flowfile format to new YAML format.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python -m tools.migrate <path> [options]
|
|
9
|
+
|
|
10
|
+
Examples:
|
|
11
|
+
# Migrate a single file
|
|
12
|
+
python -m tools.migrate my_flow.flowfile
|
|
13
|
+
|
|
14
|
+
# Migrate to specific output
|
|
15
|
+
python -m tools.migrate my_flow.flowfile -o my_flow.yaml
|
|
16
|
+
|
|
17
|
+
# Migrate entire directory
|
|
18
|
+
python -m tools.migrate ./flows/
|
|
19
|
+
|
|
20
|
+
# Migrate to JSON instead of YAML
|
|
21
|
+
python -m tools.migrate my_flow.flowfile --format json
|
|
22
|
+
|
|
23
|
+
# Migrate directory to different output location
|
|
24
|
+
python -m tools.migrate ./old_flows/ -o ./new_flows/
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import sys
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
|
|
31
|
+
from tools.migrate.migrate import migrate_flowfile, migrate_directory
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def main():
|
|
35
|
+
parser = argparse.ArgumentParser(
|
|
36
|
+
prog='flowfile-migrate',
|
|
37
|
+
description='Migrate old .flowfile pickles to YAML format',
|
|
38
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
39
|
+
epilog="""
|
|
40
|
+
Examples:
|
|
41
|
+
%(prog)s my_flow.flowfile Migrate single file to YAML
|
|
42
|
+
%(prog)s ./flows/ Migrate all files in directory
|
|
43
|
+
%(prog)s flow.flowfile -o flow.yaml Specify output path
|
|
44
|
+
%(prog)s ./flows/ --format json Output as JSON instead of YAML
|
|
45
|
+
"""
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
parser.add_argument(
|
|
49
|
+
'path',
|
|
50
|
+
type=Path,
|
|
51
|
+
help='Path to .flowfile or directory containing .flowfile files'
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
parser.add_argument(
|
|
55
|
+
'-o', '--output',
|
|
56
|
+
type=Path,
|
|
57
|
+
default=None,
|
|
58
|
+
help='Output path (file or directory). Default: same location with new extension'
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
parser.add_argument(
|
|
62
|
+
'-f', '--format',
|
|
63
|
+
choices=['yaml', 'json'],
|
|
64
|
+
default='yaml',
|
|
65
|
+
help='Output format (default: yaml)'
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
parser.add_argument(
|
|
69
|
+
'-v', '--verbose',
|
|
70
|
+
action='store_true',
|
|
71
|
+
help='Verbose output'
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
parser.add_argument(
|
|
75
|
+
'--dry-run',
|
|
76
|
+
action='store_true',
|
|
77
|
+
help='Show what would be migrated without actually migrating'
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
args = parser.parse_args()
|
|
81
|
+
|
|
82
|
+
# Validate input path
|
|
83
|
+
if not args.path.exists():
|
|
84
|
+
print(f"Error: Path not found: {args.path}", file=sys.stderr)
|
|
85
|
+
sys.exit(1)
|
|
86
|
+
|
|
87
|
+
# Dry run mode
|
|
88
|
+
if args.dry_run:
|
|
89
|
+
if args.path.is_file():
|
|
90
|
+
print(f"Would migrate: {args.path}")
|
|
91
|
+
suffix = '.yaml' if args.format == 'yaml' else '.json'
|
|
92
|
+
output = args.output or args.path.with_suffix(suffix)
|
|
93
|
+
print(f" → {output}")
|
|
94
|
+
else:
|
|
95
|
+
flowfiles = list(args.path.glob('**/*.flowfile'))
|
|
96
|
+
print(f"Would migrate {len(flowfiles)} file(s):")
|
|
97
|
+
for f in flowfiles:
|
|
98
|
+
print(f" - {f}")
|
|
99
|
+
sys.exit(0)
|
|
100
|
+
|
|
101
|
+
# Check for yaml dependency
|
|
102
|
+
if args.format == 'yaml':
|
|
103
|
+
try:
|
|
104
|
+
import yaml
|
|
105
|
+
except ImportError:
|
|
106
|
+
print("Error: PyYAML is required for YAML output.", file=sys.stderr)
|
|
107
|
+
print("Install with: pip install pyyaml", file=sys.stderr)
|
|
108
|
+
sys.exit(1)
|
|
109
|
+
|
|
110
|
+
# Run migration
|
|
111
|
+
try:
|
|
112
|
+
if args.path.is_file():
|
|
113
|
+
migrate_flowfile(args.path, args.output, args.format)
|
|
114
|
+
elif args.path.is_dir():
|
|
115
|
+
migrate_directory(args.path, args.output, args.format)
|
|
116
|
+
else:
|
|
117
|
+
print(f"Error: {args.path} is neither a file nor a directory", file=sys.stderr)
|
|
118
|
+
sys.exit(1)
|
|
119
|
+
|
|
120
|
+
except Exception as e:
|
|
121
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
122
|
+
if args.verbose:
|
|
123
|
+
import traceback
|
|
124
|
+
traceback.print_exc()
|
|
125
|
+
sys.exit(1)
|
|
126
|
+
|
|
127
|
+
print("\nMigration complete!")
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
if __name__ == '__main__':
|
|
131
|
+
main()
|