Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +8 -1
- flowfile/api.py +1 -3
- flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-0dfba9f2.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d5b1b6c9.js} +11 -78
- flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-00d87aad.js} +12 -79
- flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
- flowfile/web/static/assets/ColumnSelector-4685e75d.js +83 -0
- flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
- flowfile/web/static/assets/ContextMenu-23e909da.js +41 -0
- flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
- flowfile/web/static/assets/ContextMenu-70ae0c79.js +41 -0
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
- flowfile/web/static/assets/ContextMenu-f149cf7c.js +41 -0
- flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
- flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-702a3edd.js} +14 -84
- flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
- flowfile/web/static/assets/CustomNode-b1519993.js +211 -0
- flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-6f3e4ea5.js} +3 -3
- flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-cf5ef661.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
- flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-d38c7295.js} +14 -114
- flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-b04ef46a.js} +13 -74
- flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
- flowfile/web/static/assets/ExploreData-5fa10ed8.js +192 -0
- flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-d39af878.js} +8 -79
- flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-9b6d08db.js} +12 -85
- flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
- flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-6b04fb1d.js} +20 -87
- flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
- flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-999521f4.js} +16 -87
- flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-17dd2198.js} +13 -159
- flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
- flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-6b039e18.js} +12 -75
- flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
- flowfile/web/static/assets/{Join-5a78a203.js → Join-24d0f113.js} +15 -85
- flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
- flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
- flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-34639209.js} +11 -82
- flowfile/web/static/assets/MultiSelect-0e8724a3.js +5 -0
- flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js +63 -0
- flowfile/web/static/assets/NumericInput-3d63a470.js +5 -0
- flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js +35 -0
- flowfile/web/static/assets/Output-283fe388.css +37 -0
- flowfile/web/static/assets/{Output-411ecaee.js → Output-edea9802.js} +62 -273
- flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-61d19301.js} +14 -138
- flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
- flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
- flowfile/web/static/assets/PivotValidation-de9f43fe.js +61 -0
- flowfile/web/static/assets/PivotValidation-f97fec5b.js +61 -0
- flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-bc3c9984.js} +13 -80
- flowfile/web/static/assets/Read-64a3f259.js +218 -0
- flowfile/web/static/assets/Read-e808b239.css +62 -0
- flowfile/web/static/assets/RecordCount-3d5039be.js +53 -0
- flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-597510e0.js} +8 -80
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
- flowfile/web/static/assets/SQLQueryComponent-df51adbe.js +38 -0
- flowfile/web/static/assets/{Sample-b4a18476.js → Sample-4be0a507.js} +8 -77
- flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-4839be57.js} +2 -2
- flowfile/web/static/assets/{Select-727688dc.js → Select-9b72f201.js} +11 -85
- flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
- flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
- flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
- flowfile/web/static/assets/SettingsSection-7ded385d.js +45 -0
- flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-e1e9c953.js} +2 -40
- flowfile/web/static/assets/SettingsSection-f0f75a42.js +53 -0
- flowfile/web/static/assets/SingleSelect-6c777aac.js +5 -0
- flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js +62 -0
- flowfile/web/static/assets/SliderInput-7cb93e62.js +40 -0
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
- flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
- flowfile/web/static/assets/{Sort-be3339a8.js → Sort-6cbde21a.js} +12 -97
- flowfile/web/static/assets/TextInput-d9a40c11.js +5 -0
- flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-5896c375.js +32 -0
- flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
- flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-c4fcbf4d.js} +14 -83
- flowfile/web/static/assets/ToggleSwitch-4ef91d19.js +5 -0
- flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js +31 -0
- flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-a03f512c.js} +2 -2
- flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
- flowfile/web/static/assets/Union-bfe9b996.js +77 -0
- flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-5d023a27.js} +23 -104
- flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
- flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
- flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-91cc5354.js} +12 -166
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
- flowfile/web/static/assets/UnpivotValidation-7ee2de44.js +51 -0
- flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-e51b9924.js} +4 -264
- flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
- flowfile/web/static/assets/{api-cb00cce6.js → api-c1bad5ca.js} +1 -1
- flowfile/web/static/assets/{api-023d1733.js → api-cf1221f0.js} +1 -1
- flowfile/web/static/assets/{designer-2197d782.css → designer-8da3ba3a.css} +859 -201
- flowfile/web/static/assets/{designer-6c322d8e.js → designer-9633482a.js} +2297 -733
- flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-ca400224.js} +1 -1
- flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-614b998d.js} +1 -1
- flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-f7971590.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-4fe5f36b.js} +3 -3
- flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
- flowfile/web/static/assets/{index-683fc198.js → index-5429bbf8.js} +208 -31
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
- flowfile/web/static/assets/outputCsv-076b85ab.js +86 -0
- flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
- flowfile/web/static/assets/outputExcel-0fd17dbe.js +56 -0
- flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
- flowfile/web/static/assets/outputParquet-b61e0847.js +31 -0
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
- flowfile/web/static/assets/readCsv-a8bb8b61.js +179 -0
- flowfile/web/static/assets/readCsv-c767cb37.css +52 -0
- flowfile/web/static/assets/readExcel-67b4aee0.js +201 -0
- flowfile/web/static/assets/readExcel-806d2826.css +64 -0
- flowfile/web/static/assets/readParquet-48c81530.css +19 -0
- flowfile/web/static/assets/readParquet-92ce1dbc.js +23 -0
- flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-68435402.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-92e25ee3.js} +7 -7
- flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
- flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
- flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-41b0e0d7.js} +65 -36
- flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-2c8e608f.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/METADATA +5 -3
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/RECORD +191 -121
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/configs/flow_logger.py +5 -13
- flowfile_core/configs/node_store/__init__.py +30 -0
- flowfile_core/configs/node_store/nodes.py +383 -99
- flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
- flowfile_core/configs/settings.py +2 -1
- flowfile_core/database/connection.py +5 -21
- flowfile_core/fileExplorer/funcs.py +239 -121
- flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
- flowfile_core/flowfile/code_generator/code_generator.py +62 -64
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
- flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +212 -86
- flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
- flowfile_core/flowfile/flow_graph.py +240 -54
- flowfile_core/flowfile/flow_node/flow_node.py +48 -13
- flowfile_core/flowfile/flow_node/models.py +2 -1
- flowfile_core/flowfile/handler.py +24 -5
- flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
- flowfile_core/flowfile/manage/io_flowfile.py +394 -0
- flowfile_core/flowfile/node_designer/__init__.py +47 -0
- flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
- flowfile_core/flowfile/node_designer/custom_node.py +371 -0
- flowfile_core/flowfile/node_designer/ui_components.py +277 -0
- flowfile_core/flowfile/schema_callbacks.py +17 -10
- flowfile_core/flowfile/setting_generator/settings.py +15 -10
- flowfile_core/main.py +5 -1
- flowfile_core/routes/routes.py +73 -30
- flowfile_core/routes/user_defined_components.py +55 -0
- flowfile_core/schemas/cloud_storage_schemas.py +0 -2
- flowfile_core/schemas/input_schema.py +228 -65
- flowfile_core/schemas/output_model.py +5 -2
- flowfile_core/schemas/schemas.py +153 -35
- flowfile_core/schemas/transform_schema.py +1083 -412
- flowfile_core/schemas/yaml_types.py +103 -0
- flowfile_core/types.py +156 -0
- flowfile_core/utils/validate_setup.py +3 -1
- flowfile_frame/__init__.py +3 -1
- flowfile_frame/flow_frame.py +31 -24
- flowfile_frame/flow_frame_methods.py +12 -9
- flowfile_worker/__init__.py +9 -35
- flowfile_worker/create/__init__.py +3 -21
- flowfile_worker/create/funcs.py +68 -56
- flowfile_worker/create/models.py +130 -62
- flowfile_worker/main.py +5 -2
- flowfile_worker/routes.py +52 -13
- shared/__init__.py +15 -0
- shared/storage_config.py +258 -0
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +131 -0
- tools/migrate/legacy_schemas.py +621 -0
- tools/migrate/migrate.py +598 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +23 -0
- tools/migrate/tests/test_migrate.py +627 -0
- tools/migrate/tests/test_migration_e2e.py +1010 -0
- tools/migrate/tests/test_node_migrations.py +813 -0
- flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
- flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
- flowfile/web/static/assets/Read-80dc1675.css +0 -197
- flowfile/web/static/assets/Read-c3b1929c.js +0 -701
- flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
- flowfile/web/static/assets/Union-89fd73dc.js +0 -146
- flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
- flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
- flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
- flowfile_core/flowfile/manage/open_flowfile.py +0 -135
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info/licenses}/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
|
@@ -1,15 +1,23 @@
|
|
|
1
|
-
from typing import List, Optional, Literal, Iterator
|
|
1
|
+
from typing import List, Optional, Literal, Iterator, Any, Annotated
|
|
2
2
|
from flowfile_core.schemas import transform_schema
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
import os
|
|
5
5
|
from flowfile_core.schemas.analysis_schemas import graphic_walker_schemas as gs_schemas
|
|
6
6
|
from flowfile_core.schemas.cloud_storage_schemas import CloudStorageReadSettings, CloudStorageWriteSettings
|
|
7
|
-
from flowfile_core.schemas.
|
|
7
|
+
from flowfile_core.schemas.yaml_types import (
|
|
8
|
+
OutputSettingsYaml, NodeSelectYaml, NodeJoinYaml,
|
|
9
|
+
NodeCrossJoinYaml, NodeFuzzyMatchYaml, NodeOutputYaml
|
|
10
|
+
)
|
|
8
11
|
from flowfile_core.utils.utils import ensure_similarity_dicts, standardize_col_dtype
|
|
9
|
-
from pydantic import BaseModel, Field, model_validator,
|
|
12
|
+
from pydantic import (BaseModel, Field, model_validator, field_validator,
|
|
13
|
+
SecretStr, ConfigDict, StringConstraints, ValidationInfo)
|
|
10
14
|
import polars as pl
|
|
11
15
|
|
|
12
16
|
|
|
17
|
+
SecretRef = Annotated[str, StringConstraints(min_length=1, max_length=100),
|
|
18
|
+
Field(description="An ID referencing an encrypted secret.")]
|
|
19
|
+
|
|
20
|
+
|
|
13
21
|
OutputConnectionClass = Literal['output-0', 'output-1', 'output-2', 'output-3', 'output-4',
|
|
14
22
|
'output-5', 'output-6', 'output-7', 'output-8', 'output-9']
|
|
15
23
|
|
|
@@ -43,57 +51,19 @@ class MinimalFieldInfo(BaseModel):
|
|
|
43
51
|
data_type: str = "String"
|
|
44
52
|
|
|
45
53
|
|
|
46
|
-
class
|
|
47
|
-
"""Base
|
|
48
|
-
|
|
49
|
-
name: Optional[str]
|
|
50
|
-
path: str # This can be an absolute or relative path
|
|
51
|
-
directory: Optional[str] = None
|
|
52
|
-
analysis_file_available: bool = False
|
|
53
|
-
status: Optional[str] = None
|
|
54
|
-
file_type: Optional[str] = None
|
|
55
|
-
fields: List[MinimalFieldInfo] = Field(default_factory=list)
|
|
56
|
-
abs_file_path: Optional[str] = None
|
|
54
|
+
class InputTableBase(BaseModel):
|
|
55
|
+
"""Base settings for input file operations."""
|
|
56
|
+
file_type: str # Will be overridden with Literal in subclasses
|
|
57
57
|
|
|
58
|
-
@classmethod
|
|
59
|
-
def create_from_path(cls, path: str):
|
|
60
|
-
"""Creates an instance from a file path string."""
|
|
61
|
-
filename = Path(path).name
|
|
62
|
-
return cls(name=filename, path=path)
|
|
63
58
|
|
|
64
|
-
|
|
65
|
-
def file_path(self) -> str:
|
|
66
|
-
"""Constructs the full file path from the directory and name."""
|
|
67
|
-
if not self.name in self.path:
|
|
68
|
-
return os.path.join(self.path, self.name)
|
|
69
|
-
else:
|
|
70
|
-
return self.path
|
|
71
|
-
|
|
72
|
-
def set_absolute_filepath(self):
|
|
73
|
-
"""Resolves the path to an absolute file path."""
|
|
74
|
-
base_path = Path(self.path).expanduser()
|
|
75
|
-
if not base_path.is_absolute():
|
|
76
|
-
base_path = Path.cwd() / base_path
|
|
77
|
-
if self.name and self.name not in base_path.name:
|
|
78
|
-
base_path = base_path / self.name
|
|
79
|
-
self.abs_file_path = str(base_path.resolve())
|
|
80
|
-
|
|
81
|
-
@model_validator(mode='after')
|
|
82
|
-
def populate_abs_file_path(self):
|
|
83
|
-
"""Ensures the absolute file path is populated after validation."""
|
|
84
|
-
if not self.abs_file_path:
|
|
85
|
-
self.set_absolute_filepath()
|
|
86
|
-
return self
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
class ReceivedCsvTable(ReceivedTableBase):
|
|
59
|
+
class InputCsvTable(InputTableBase):
|
|
90
60
|
"""Defines settings for reading a CSV file."""
|
|
91
|
-
file_type:
|
|
61
|
+
file_type: Literal['csv'] = 'csv'
|
|
92
62
|
reference: str = ''
|
|
93
63
|
starting_from_line: int = 0
|
|
94
64
|
delimiter: str = ','
|
|
95
65
|
has_headers: bool = True
|
|
96
|
-
encoding:
|
|
66
|
+
encoding: str = 'utf-8'
|
|
97
67
|
parquet_ref: Optional[str] = None
|
|
98
68
|
row_delimiter: str = '\n'
|
|
99
69
|
quote_char: str = '"'
|
|
@@ -102,18 +72,19 @@ class ReceivedCsvTable(ReceivedTableBase):
|
|
|
102
72
|
ignore_errors: bool = False
|
|
103
73
|
|
|
104
74
|
|
|
105
|
-
class
|
|
106
|
-
"""Defines settings for reading a JSON file
|
|
107
|
-
|
|
75
|
+
class InputJsonTable(InputCsvTable):
|
|
76
|
+
"""Defines settings for reading a JSON file."""
|
|
77
|
+
file_type: Literal['json'] = 'json'
|
|
108
78
|
|
|
109
79
|
|
|
110
|
-
class
|
|
80
|
+
class InputParquetTable(InputTableBase):
|
|
111
81
|
"""Defines settings for reading a Parquet file."""
|
|
112
|
-
file_type:
|
|
82
|
+
file_type: Literal['parquet'] = 'parquet'
|
|
113
83
|
|
|
114
84
|
|
|
115
|
-
class
|
|
85
|
+
class InputExcelTable(InputTableBase):
|
|
116
86
|
"""Defines settings for reading an Excel file."""
|
|
87
|
+
file_type: Literal['excel'] = 'excel'
|
|
117
88
|
sheet_name: Optional[str] = None
|
|
118
89
|
start_row: int = 0
|
|
119
90
|
start_column: int = 0
|
|
@@ -122,51 +93,185 @@ class ReceivedExcelTable(ReceivedTableBase):
|
|
|
122
93
|
has_headers: bool = True
|
|
123
94
|
type_inference: bool = False
|
|
124
95
|
|
|
96
|
+
@model_validator(mode='after')
|
|
125
97
|
def validate_range_values(self):
|
|
126
98
|
"""Validates that the Excel cell range is logical."""
|
|
127
99
|
for attribute in [self.start_row, self.start_column, self.end_row, self.end_column]:
|
|
128
100
|
if not isinstance(attribute, int) or attribute < 0:
|
|
129
101
|
raise ValueError("Row and column indices must be non-negative integers")
|
|
130
102
|
if (self.end_row > 0 and self.start_row > self.end_row) or \
|
|
131
|
-
|
|
103
|
+
(self.end_column > 0 and self.start_column > self.end_column):
|
|
132
104
|
raise ValueError("Start row/column must not be greater than end row/column")
|
|
105
|
+
return self
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# Create the discriminated union (similar to OutputTableSettings)
|
|
109
|
+
InputTableSettings = Annotated[
|
|
110
|
+
InputCsvTable | InputJsonTable | InputParquetTable | InputExcelTable,
|
|
111
|
+
Field(discriminator='file_type')
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# Now create the main ReceivedTable model
|
|
116
|
+
class ReceivedTable(BaseModel):
|
|
117
|
+
"""Model for defining a table received from an external source."""
|
|
118
|
+
# Metadata fields
|
|
119
|
+
id: Optional[int] = None
|
|
120
|
+
name: Optional[str] = None
|
|
121
|
+
path: str # This can be an absolute or relative path
|
|
122
|
+
directory: Optional[str] = None
|
|
123
|
+
analysis_file_available: bool = False
|
|
124
|
+
status: Optional[str] = None
|
|
125
|
+
fields: List[MinimalFieldInfo] = Field(default_factory=list)
|
|
126
|
+
abs_file_path: Optional[str] = None
|
|
127
|
+
|
|
128
|
+
file_type: Literal['csv', 'json', 'parquet', 'excel']
|
|
129
|
+
|
|
130
|
+
table_settings: InputTableSettings
|
|
133
131
|
|
|
132
|
+
@classmethod
|
|
133
|
+
def create_from_path(cls, path: str, file_type: Literal['csv', 'json', 'parquet', 'excel'] = 'csv'):
|
|
134
|
+
"""Creates an instance from a file path string."""
|
|
135
|
+
filename = Path(path).name
|
|
134
136
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
137
|
+
# Create appropriate table_settings based on file_type
|
|
138
|
+
settings_map = {
|
|
139
|
+
'csv': InputCsvTable(),
|
|
140
|
+
'json': InputJsonTable(),
|
|
141
|
+
'parquet': InputParquetTable(),
|
|
142
|
+
'excel': InputExcelTable(),
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return cls(
|
|
146
|
+
name=filename,
|
|
147
|
+
path=path,
|
|
148
|
+
file_type=file_type,
|
|
149
|
+
table_settings=settings_map.get(file_type, InputCsvTable())
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
@property
|
|
153
|
+
def file_path(self) -> str:
|
|
154
|
+
"""Constructs the full file path from the directory and name."""
|
|
155
|
+
if self.name and self.name not in self.path:
|
|
156
|
+
return os.path.join(self.path, self.name)
|
|
157
|
+
else:
|
|
158
|
+
return self.path
|
|
159
|
+
|
|
160
|
+
def set_absolute_filepath(self):
|
|
161
|
+
"""Resolves the path to an absolute file path."""
|
|
162
|
+
base_path = Path(self.path).expanduser()
|
|
163
|
+
if not base_path.is_absolute():
|
|
164
|
+
base_path = Path.cwd() / base_path
|
|
165
|
+
if self.name and self.name not in base_path.name:
|
|
166
|
+
base_path = base_path / self.name
|
|
167
|
+
self.abs_file_path = str(base_path.resolve())
|
|
168
|
+
|
|
169
|
+
@model_validator(mode='before')
|
|
170
|
+
@classmethod
|
|
171
|
+
def set_default_table_settings(cls, data):
|
|
172
|
+
"""Create default table_settings based on file_type if not provided."""
|
|
173
|
+
if isinstance(data, dict):
|
|
174
|
+
if 'table_settings' not in data or data['table_settings'] is None:
|
|
175
|
+
data['table_settings'] = {}
|
|
176
|
+
|
|
177
|
+
if isinstance(data['table_settings'], dict) and 'file_type' not in data['table_settings']:
|
|
178
|
+
data['table_settings']['file_type'] = data.get('file_type', 'csv')
|
|
179
|
+
return data
|
|
180
|
+
|
|
181
|
+
@model_validator(mode='after')
|
|
182
|
+
def populate_abs_file_path(self):
|
|
183
|
+
"""Ensures the absolute file path is populated after validation."""
|
|
184
|
+
if not self.abs_file_path:
|
|
185
|
+
self.set_absolute_filepath()
|
|
186
|
+
return self
|
|
138
187
|
|
|
139
188
|
|
|
140
189
|
class OutputCsvTable(BaseModel):
|
|
141
190
|
"""Defines settings for writing a CSV file."""
|
|
142
|
-
file_type:
|
|
191
|
+
file_type: Literal['csv'] = 'csv'
|
|
143
192
|
delimiter: str = ','
|
|
144
193
|
encoding: str = 'utf-8'
|
|
145
194
|
|
|
146
195
|
|
|
147
196
|
class OutputParquetTable(BaseModel):
|
|
148
197
|
"""Defines settings for writing a Parquet file."""
|
|
149
|
-
file_type:
|
|
198
|
+
file_type: Literal['parquet'] = 'parquet'
|
|
150
199
|
|
|
151
200
|
|
|
152
201
|
class OutputExcelTable(BaseModel):
|
|
153
202
|
"""Defines settings for writing an Excel file."""
|
|
154
|
-
file_type:
|
|
203
|
+
file_type: Literal['excel'] = 'excel'
|
|
155
204
|
sheet_name: str = 'Sheet1'
|
|
156
205
|
|
|
157
206
|
|
|
207
|
+
# Create a discriminated union
|
|
208
|
+
OutputTableSettings = Annotated[
|
|
209
|
+
OutputCsvTable | OutputParquetTable | OutputExcelTable,
|
|
210
|
+
Field(discriminator='file_type')
|
|
211
|
+
]
|
|
212
|
+
|
|
213
|
+
|
|
158
214
|
class OutputSettings(BaseModel):
|
|
159
215
|
"""Defines the complete settings for an output node."""
|
|
160
216
|
name: str
|
|
161
217
|
directory: str
|
|
162
|
-
file_type: str
|
|
218
|
+
file_type: str # This drives which table_settings to use
|
|
163
219
|
fields: Optional[List[str]] = Field(default_factory=list)
|
|
164
220
|
write_mode: str = 'overwrite'
|
|
165
|
-
|
|
166
|
-
output_parquet_table: OutputParquetTable = Field(default_factory=OutputParquetTable)
|
|
167
|
-
output_excel_table: OutputExcelTable = Field(default_factory=OutputExcelTable)
|
|
221
|
+
table_settings: OutputTableSettings
|
|
168
222
|
abs_file_path: Optional[str] = None
|
|
169
223
|
|
|
224
|
+
def to_yaml_dict(self) -> OutputSettingsYaml:
|
|
225
|
+
"""Converts the output settings to a dictionary suitable for YAML serialization."""
|
|
226
|
+
result: OutputSettingsYaml = {
|
|
227
|
+
"name": self.name,
|
|
228
|
+
"directory": self.directory,
|
|
229
|
+
"file_type": self.file_type,
|
|
230
|
+
"write_mode": self.write_mode,
|
|
231
|
+
}
|
|
232
|
+
if self.abs_file_path:
|
|
233
|
+
result["abs_file_path"] = self.abs_file_path
|
|
234
|
+
if self.fields:
|
|
235
|
+
result["fields"] = self.fields
|
|
236
|
+
# Only include table_settings if it has non-default values beyond file_type
|
|
237
|
+
ts_dict = self.table_settings.model_dump(exclude={"file_type"})
|
|
238
|
+
if any(v for v in ts_dict.values()): # Has meaningful settings
|
|
239
|
+
result["table_settings"] = ts_dict
|
|
240
|
+
return result
|
|
241
|
+
|
|
242
|
+
@property
|
|
243
|
+
def sheet_name(self) -> str | None:
|
|
244
|
+
if self.file_type == 'excel':
|
|
245
|
+
return self.table_settings.sheet_name
|
|
246
|
+
|
|
247
|
+
@property
|
|
248
|
+
def delimiter(self) -> str | None:
|
|
249
|
+
if self.file_type == 'csv':
|
|
250
|
+
return self.table_settings.delimiter
|
|
251
|
+
|
|
252
|
+
@field_validator('table_settings', mode='before')
|
|
253
|
+
@classmethod
|
|
254
|
+
def validate_table_settings(cls, v, info: ValidationInfo):
|
|
255
|
+
"""Ensures table_settings matches the file_type."""
|
|
256
|
+
if v is None:
|
|
257
|
+
file_type = info.data.get('file_type', 'csv')
|
|
258
|
+
# Create default based on file_type
|
|
259
|
+
match file_type:
|
|
260
|
+
case 'csv':
|
|
261
|
+
return OutputCsvTable()
|
|
262
|
+
case 'parquet':
|
|
263
|
+
return OutputParquetTable()
|
|
264
|
+
case 'excel':
|
|
265
|
+
return OutputExcelTable()
|
|
266
|
+
case _:
|
|
267
|
+
return OutputCsvTable()
|
|
268
|
+
|
|
269
|
+
# If it's a dict, add file_type if missing
|
|
270
|
+
if isinstance(v, dict) and 'file_type' not in v:
|
|
271
|
+
v['file_type'] = info.data.get('file_type', 'csv')
|
|
272
|
+
|
|
273
|
+
return v
|
|
274
|
+
|
|
170
275
|
def set_absolute_filepath(self):
|
|
171
276
|
"""Resolves the output directory and name into an absolute path."""
|
|
172
277
|
base_path = Path(self.directory)
|
|
@@ -195,6 +300,7 @@ class NodeBase(BaseModel):
|
|
|
195
300
|
description: Optional[str] = ''
|
|
196
301
|
user_id: Optional[int] = None
|
|
197
302
|
is_flow_output: Optional[bool] = False
|
|
303
|
+
is_user_defined: Optional[bool] = False # Indicator if the node is a user defined node
|
|
198
304
|
|
|
199
305
|
|
|
200
306
|
class NodeSingleInput(NodeBase):
|
|
@@ -204,7 +310,7 @@ class NodeSingleInput(NodeBase):
|
|
|
204
310
|
|
|
205
311
|
class NodeMultiInput(NodeBase):
|
|
206
312
|
"""A base model for any node that takes multiple data inputs."""
|
|
207
|
-
depending_on_ids: Optional[List[int]] =
|
|
313
|
+
depending_on_ids: Optional[List[int]] = Field(default_factory=list)
|
|
208
314
|
|
|
209
315
|
|
|
210
316
|
class NodeSelect(NodeSingleInput):
|
|
@@ -213,6 +319,15 @@ class NodeSelect(NodeSingleInput):
|
|
|
213
319
|
select_input: List[transform_schema.SelectInput] = Field(default_factory=list)
|
|
214
320
|
sorted_by: Optional[Literal['none', 'asc', 'desc']] = 'none'
|
|
215
321
|
|
|
322
|
+
def to_yaml_dict(self) -> NodeSelectYaml:
|
|
323
|
+
"""Converts the select node settings to a dictionary for YAML serialization."""
|
|
324
|
+
return {
|
|
325
|
+
"cache_results": self.cache_results,
|
|
326
|
+
"keep_missing": self.keep_missing,
|
|
327
|
+
"select_input": [s.to_yaml_dict() for s in self.select_input],
|
|
328
|
+
"sorted_by": self.sorted_by,
|
|
329
|
+
}
|
|
330
|
+
|
|
216
331
|
|
|
217
332
|
class NodeFilter(NodeSingleInput):
|
|
218
333
|
"""Settings for a node that filters rows based on a condition."""
|
|
@@ -248,6 +363,18 @@ class NodeJoin(NodeMultiInput):
|
|
|
248
363
|
auto_keep_right: bool = True
|
|
249
364
|
auto_keep_left: bool = True
|
|
250
365
|
|
|
366
|
+
def to_yaml_dict(self) -> NodeJoinYaml:
|
|
367
|
+
"""Converts the join node settings to a dictionary for YAML serialization."""
|
|
368
|
+
return {
|
|
369
|
+
"cache_results": self.cache_results,
|
|
370
|
+
"auto_generate_selection": self.auto_generate_selection,
|
|
371
|
+
"verify_integrity": self.verify_integrity,
|
|
372
|
+
"join_input": self.join_input.to_yaml_dict(),
|
|
373
|
+
"auto_keep_all": self.auto_keep_all,
|
|
374
|
+
"auto_keep_right": self.auto_keep_right,
|
|
375
|
+
"auto_keep_left": self.auto_keep_left,
|
|
376
|
+
}
|
|
377
|
+
|
|
251
378
|
|
|
252
379
|
class NodeCrossJoin(NodeMultiInput):
|
|
253
380
|
"""Settings for a node that performs a cross join."""
|
|
@@ -258,11 +385,35 @@ class NodeCrossJoin(NodeMultiInput):
|
|
|
258
385
|
auto_keep_right: bool = True
|
|
259
386
|
auto_keep_left: bool = True
|
|
260
387
|
|
|
388
|
+
def to_yaml_dict(self) -> NodeCrossJoinYaml:
|
|
389
|
+
"""Converts the cross join node settings to a dictionary for YAML serialization."""
|
|
390
|
+
return {
|
|
391
|
+
"cache_results": self.cache_results,
|
|
392
|
+
"auto_generate_selection": self.auto_generate_selection,
|
|
393
|
+
"verify_integrity": self.verify_integrity,
|
|
394
|
+
"cross_join_input": self.cross_join_input.to_yaml_dict(),
|
|
395
|
+
"auto_keep_all": self.auto_keep_all,
|
|
396
|
+
"auto_keep_right": self.auto_keep_right,
|
|
397
|
+
"auto_keep_left": self.auto_keep_left,
|
|
398
|
+
}
|
|
399
|
+
|
|
261
400
|
|
|
262
401
|
class NodeFuzzyMatch(NodeJoin):
|
|
263
402
|
"""Settings for a node that performs a fuzzy join based on string similarity."""
|
|
264
403
|
join_input: transform_schema.FuzzyMatchInput
|
|
265
404
|
|
|
405
|
+
def to_yaml_dict(self) -> NodeFuzzyMatchYaml:
|
|
406
|
+
"""Converts the fuzzy match node settings to a dictionary for YAML serialization."""
|
|
407
|
+
return {
|
|
408
|
+
"cache_results": self.cache_results,
|
|
409
|
+
"auto_generate_selection": self.auto_generate_selection,
|
|
410
|
+
"verify_integrity": self.verify_integrity,
|
|
411
|
+
"join_input": self.join_input.to_yaml_dict(),
|
|
412
|
+
"auto_keep_all": self.auto_keep_all,
|
|
413
|
+
"auto_keep_right": self.auto_keep_right,
|
|
414
|
+
"auto_keep_left": self.auto_keep_left,
|
|
415
|
+
}
|
|
416
|
+
|
|
266
417
|
|
|
267
418
|
class NodeDatasource(NodeBase):
|
|
268
419
|
"""Base settings for a node that acts as a data source."""
|
|
@@ -464,6 +615,13 @@ class NodeOutput(NodeSingleInput):
|
|
|
464
615
|
"""Settings for a node that writes its input to a file."""
|
|
465
616
|
output_settings: OutputSettings
|
|
466
617
|
|
|
618
|
+
def to_yaml_dict(self) -> NodeOutputYaml:
|
|
619
|
+
"""Converts the output node settings to a dictionary for YAML serialization."""
|
|
620
|
+
return {
|
|
621
|
+
"cache_results": self.cache_results,
|
|
622
|
+
"output_settings": self.output_settings.to_yaml_dict(),
|
|
623
|
+
}
|
|
624
|
+
|
|
467
625
|
|
|
468
626
|
class NodeOutputConnection(BaseModel):
|
|
469
627
|
"""Represents the output side of a connection between two nodes."""
|
|
@@ -517,3 +675,8 @@ class NodeRecordCount(NodeSingleInput):
|
|
|
517
675
|
class NodePolarsCode(NodeMultiInput):
|
|
518
676
|
"""Settings for a node that executes arbitrary user-provided Polars code."""
|
|
519
677
|
polars_code_input: transform_schema.PolarsCodeInput
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
class UserDefinedNode(NodeMultiInput):
|
|
681
|
+
"""Settings for a node that contains the user defined node information"""
|
|
682
|
+
settings: Any
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Dict, Optional, Any
|
|
1
|
+
from typing import List, Dict, Optional, Any, Literal
|
|
2
2
|
from pydantic import BaseModel, Field
|
|
3
3
|
from datetime import datetime
|
|
4
4
|
import time
|
|
@@ -21,10 +21,11 @@ class RunInformation(BaseModel):
|
|
|
21
21
|
flow_id: int
|
|
22
22
|
start_time: Optional[datetime] = Field(default_factory=datetime.now)
|
|
23
23
|
end_time: Optional[datetime] = None
|
|
24
|
-
success: bool
|
|
24
|
+
success: Optional[bool] = None
|
|
25
25
|
nodes_completed: int = 0
|
|
26
26
|
number_of_nodes: int = 0
|
|
27
27
|
node_step_result: List[NodeResult]
|
|
28
|
+
run_type: Literal["fetch_one", "full_run", "init"]
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class BaseItem(BaseModel):
|
|
@@ -61,6 +62,8 @@ class TableExample(BaseModel):
|
|
|
61
62
|
table_schema: List[FileColumn]
|
|
62
63
|
columns: List[str]
|
|
63
64
|
data: Optional[List[Dict]] = {}
|
|
65
|
+
has_example_data: bool = False
|
|
66
|
+
has_run_with_current_setup: bool = False
|
|
64
67
|
|
|
65
68
|
|
|
66
69
|
class NodeData(BaseModel):
|