Flowfile 0.3.9__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowfile/__init__.py +8 -1
- flowfile/api.py +1 -3
- flowfile/web/static/assets/{CloudConnectionManager-c97c25f8.js → CloudConnectionManager-0dfba9f2.js} +2 -2
- flowfile/web/static/assets/{CloudStorageReader-f1ff509e.js → CloudStorageReader-d5b1b6c9.js} +11 -78
- flowfile/web/static/assets/{CloudStorageWriter-034f8b78.js → CloudStorageWriter-00d87aad.js} +12 -79
- flowfile/web/static/assets/{CloudStorageWriter-49c9a4b2.css → CloudStorageWriter-b0ee067f.css} +24 -24
- flowfile/web/static/assets/ColumnSelector-4685e75d.js +83 -0
- flowfile/web/static/assets/ColumnSelector-47996a16.css +10 -0
- flowfile/web/static/assets/ContextMenu-23e909da.js +41 -0
- flowfile/web/static/assets/{SettingsSection-9c836ecc.css → ContextMenu-4c74eef1.css} +0 -21
- flowfile/web/static/assets/ContextMenu-63cfa99b.css +26 -0
- flowfile/web/static/assets/ContextMenu-70ae0c79.js +41 -0
- flowfile/web/static/assets/ContextMenu-c13f91d0.css +26 -0
- flowfile/web/static/assets/ContextMenu-f149cf7c.js +41 -0
- flowfile/web/static/assets/{CrossJoin-41efa4cb.css → CrossJoin-1119d18e.css} +18 -18
- flowfile/web/static/assets/{CrossJoin-9e156ebe.js → CrossJoin-702a3edd.js} +14 -84
- flowfile/web/static/assets/CustomNode-74a37f74.css +32 -0
- flowfile/web/static/assets/CustomNode-b1519993.js +211 -0
- flowfile/web/static/assets/{DatabaseConnectionSettings-d5c625b3.js → DatabaseConnectionSettings-6f3e4ea5.js} +3 -3
- flowfile/web/static/assets/{DatabaseManager-265adc5e.js → DatabaseManager-cf5ef661.js} +2 -2
- flowfile/web/static/assets/{DatabaseReader-f50c6558.css → DatabaseReader-ae61773c.css} +0 -27
- flowfile/web/static/assets/{DatabaseReader-0b10551e.js → DatabaseReader-d38c7295.js} +14 -114
- flowfile/web/static/assets/{DatabaseWriter-c17c6916.js → DatabaseWriter-b04ef46a.js} +13 -74
- flowfile/web/static/assets/{ExploreData-5bdae813.css → ExploreData-2d0cf4db.css} +8 -14
- flowfile/web/static/assets/ExploreData-5fa10ed8.js +192 -0
- flowfile/web/static/assets/{ExternalSource-3a66556c.js → ExternalSource-d39af878.js} +8 -79
- flowfile/web/static/assets/{Filter-91ad87e7.js → Filter-9b6d08db.js} +12 -85
- flowfile/web/static/assets/{Filter-a9d08ba1.css → Filter-f62091b3.css} +3 -3
- flowfile/web/static/assets/{Formula-3c395ab1.js → Formula-6b04fb1d.js} +20 -87
- flowfile/web/static/assets/{Formula-29f19d21.css → Formula-bb96803d.css} +4 -4
- flowfile/web/static/assets/{FuzzyMatch-6857de82.css → FuzzyMatch-1010f966.css} +42 -42
- flowfile/web/static/assets/{FuzzyMatch-2df0d230.js → FuzzyMatch-999521f4.js} +16 -87
- flowfile/web/static/assets/{GraphSolver-d285877f.js → GraphSolver-17dd2198.js} +13 -159
- flowfile/web/static/assets/GraphSolver-f0cb7bfb.css +22 -0
- flowfile/web/static/assets/{GroupBy-0bd1cc6b.js → GroupBy-6b039e18.js} +12 -75
- flowfile/web/static/assets/{Unique-b5615727.css → GroupBy-b9505323.css} +8 -8
- flowfile/web/static/assets/{Join-5a78a203.js → Join-24d0f113.js} +15 -85
- flowfile/web/static/assets/{Join-f45eff22.css → Join-fd79b451.css} +20 -20
- flowfile/web/static/assets/{ManualInput-a71b52c6.css → ManualInput-3246a08d.css} +20 -20
- flowfile/web/static/assets/{ManualInput-93aef9d6.js → ManualInput-34639209.js} +11 -82
- flowfile/web/static/assets/MultiSelect-0e8724a3.js +5 -0
- flowfile/web/static/assets/MultiSelect.vue_vue_type_script_setup_true_lang-b0e538c2.js +63 -0
- flowfile/web/static/assets/NumericInput-3d63a470.js +5 -0
- flowfile/web/static/assets/NumericInput.vue_vue_type_script_setup_true_lang-e0edeccc.js +35 -0
- flowfile/web/static/assets/Output-283fe388.css +37 -0
- flowfile/web/static/assets/{Output-411ecaee.js → Output-edea9802.js} +62 -273
- flowfile/web/static/assets/{Pivot-89db4b04.js → Pivot-61d19301.js} +14 -138
- flowfile/web/static/assets/Pivot-cf333e3d.css +22 -0
- flowfile/web/static/assets/PivotValidation-891ddfb0.css +13 -0
- flowfile/web/static/assets/PivotValidation-c46cd420.css +13 -0
- flowfile/web/static/assets/PivotValidation-de9f43fe.js +61 -0
- flowfile/web/static/assets/PivotValidation-f97fec5b.js +61 -0
- flowfile/web/static/assets/{PolarsCode-a9f974f8.js → PolarsCode-bc3c9984.js} +13 -80
- flowfile/web/static/assets/Read-64a3f259.js +218 -0
- flowfile/web/static/assets/Read-e808b239.css +62 -0
- flowfile/web/static/assets/RecordCount-3d5039be.js +53 -0
- flowfile/web/static/assets/{RecordId-55ae7d36.js → RecordId-597510e0.js} +8 -80
- flowfile/web/static/assets/SQLQueryComponent-36cef432.css +27 -0
- flowfile/web/static/assets/SQLQueryComponent-df51adbe.js +38 -0
- flowfile/web/static/assets/{Sample-b4a18476.js → Sample-4be0a507.js} +8 -77
- flowfile/web/static/assets/{SecretManager-b066d13a.js → SecretManager-4839be57.js} +2 -2
- flowfile/web/static/assets/{Select-727688dc.js → Select-9b72f201.js} +11 -85
- flowfile/web/static/assets/SettingsSection-2e4d03c4.css +21 -0
- flowfile/web/static/assets/SettingsSection-5c696bee.css +20 -0
- flowfile/web/static/assets/SettingsSection-71e6b7e3.css +21 -0
- flowfile/web/static/assets/SettingsSection-7ded385d.js +45 -0
- flowfile/web/static/assets/{SettingsSection-695ac487.js → SettingsSection-e1e9c953.js} +2 -40
- flowfile/web/static/assets/SettingsSection-f0f75a42.js +53 -0
- flowfile/web/static/assets/SingleSelect-6c777aac.js +5 -0
- flowfile/web/static/assets/SingleSelect.vue_vue_type_script_setup_true_lang-33e3ff9b.js +62 -0
- flowfile/web/static/assets/SliderInput-7cb93e62.js +40 -0
- flowfile/web/static/assets/SliderInput-b8fb6a8c.css +4 -0
- flowfile/web/static/assets/{GroupBy-ab1ea74b.css → Sort-3643d625.css} +8 -8
- flowfile/web/static/assets/{Sort-be3339a8.js → Sort-6cbde21a.js} +12 -97
- flowfile/web/static/assets/TextInput-d9a40c11.js +5 -0
- flowfile/web/static/assets/TextInput.vue_vue_type_script_setup_true_lang-5896c375.js +32 -0
- flowfile/web/static/assets/{TextToRows-c92d1ec2.css → TextToRows-5d2c1190.css} +9 -9
- flowfile/web/static/assets/{TextToRows-7b8998da.js → TextToRows-c4fcbf4d.js} +14 -83
- flowfile/web/static/assets/ToggleSwitch-4ef91d19.js +5 -0
- flowfile/web/static/assets/ToggleSwitch.vue_vue_type_script_setup_true_lang-38478c20.js +31 -0
- flowfile/web/static/assets/{UnavailableFields-8b0cb48e.js → UnavailableFields-a03f512c.js} +2 -2
- flowfile/web/static/assets/{Union-8d9ac7f9.css → Union-af6c3d9b.css} +6 -6
- flowfile/web/static/assets/Union-bfe9b996.js +77 -0
- flowfile/web/static/assets/{Unique-af5a80b4.js → Unique-5d023a27.js} +23 -104
- flowfile/web/static/assets/{Sort-7ccfa0fe.css → Unique-f9fb0809.css} +8 -8
- flowfile/web/static/assets/Unpivot-1e422df3.css +30 -0
- flowfile/web/static/assets/{Unpivot-5195d411.js → Unpivot-91cc5354.js} +12 -166
- flowfile/web/static/assets/UnpivotValidation-0d240eeb.css +13 -0
- flowfile/web/static/assets/UnpivotValidation-7ee2de44.js +51 -0
- flowfile/web/static/assets/{ExploreData-18a4fe52.js → VueGraphicWalker-e51b9924.js} +4 -264
- flowfile/web/static/assets/VueGraphicWalker-ed5ab88b.css +6 -0
- flowfile/web/static/assets/{api-cb00cce6.js → api-c1bad5ca.js} +1 -1
- flowfile/web/static/assets/{api-023d1733.js → api-cf1221f0.js} +1 -1
- flowfile/web/static/assets/{designer-2197d782.css → designer-8da3ba3a.css} +859 -201
- flowfile/web/static/assets/{designer-6c322d8e.js → designer-9633482a.js} +2297 -733
- flowfile/web/static/assets/{documentation-4d1fafe1.js → documentation-ca400224.js} +1 -1
- flowfile/web/static/assets/{dropDown-0b46dd77.js → dropDown-614b998d.js} +1 -1
- flowfile/web/static/assets/{fullEditor-ec4e4f95.js → fullEditor-f7971590.js} +2 -2
- flowfile/web/static/assets/{genericNodeSettings-def5879b.js → genericNodeSettings-4fe5f36b.js} +3 -3
- flowfile/web/static/assets/{index-681a3ed0.css → index-50508d4d.css} +8 -0
- flowfile/web/static/assets/{index-683fc198.js → index-5429bbf8.js} +208 -31
- flowfile/web/static/assets/nodeInput-5d0d6b79.js +41 -0
- flowfile/web/static/assets/outputCsv-076b85ab.js +86 -0
- flowfile/web/static/assets/{Output-48f81019.css → outputCsv-9cc59e0b.css} +0 -143
- flowfile/web/static/assets/outputExcel-0fd17dbe.js +56 -0
- flowfile/web/static/assets/outputExcel-b41305c0.css +102 -0
- flowfile/web/static/assets/outputParquet-b61e0847.js +31 -0
- flowfile/web/static/assets/outputParquet-cf8cf3f2.css +4 -0
- flowfile/web/static/assets/readCsv-a8bb8b61.js +179 -0
- flowfile/web/static/assets/readCsv-c767cb37.css +52 -0
- flowfile/web/static/assets/readExcel-67b4aee0.js +201 -0
- flowfile/web/static/assets/readExcel-806d2826.css +64 -0
- flowfile/web/static/assets/readParquet-48c81530.css +19 -0
- flowfile/web/static/assets/readParquet-92ce1dbc.js +23 -0
- flowfile/web/static/assets/{secretApi-baceb6f9.js → secretApi-68435402.js} +1 -1
- flowfile/web/static/assets/{selectDynamic-de91449a.js → selectDynamic-92e25ee3.js} +7 -7
- flowfile/web/static/assets/{selectDynamic-b062bc9b.css → selectDynamic-aa913ff4.css} +16 -16
- flowfile/web/static/assets/user-defined-icon-0ae16c90.png +0 -0
- flowfile/web/static/assets/{vue-codemirror.esm-dc5e3348.js → vue-codemirror.esm-41b0e0d7.js} +65 -36
- flowfile/web/static/assets/{vue-content-loader.es-ba94b82f.js → vue-content-loader.es-2c8e608f.js} +1 -1
- flowfile/web/static/index.html +2 -2
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/METADATA +5 -3
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/RECORD +191 -121
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/WHEEL +1 -1
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info}/entry_points.txt +1 -0
- flowfile_core/__init__.py +3 -0
- flowfile_core/configs/flow_logger.py +5 -13
- flowfile_core/configs/node_store/__init__.py +30 -0
- flowfile_core/configs/node_store/nodes.py +383 -99
- flowfile_core/configs/node_store/user_defined_node_registry.py +193 -0
- flowfile_core/configs/settings.py +2 -1
- flowfile_core/database/connection.py +5 -21
- flowfile_core/fileExplorer/funcs.py +239 -121
- flowfile_core/flowfile/analytics/analytics_processor.py +1 -0
- flowfile_core/flowfile/code_generator/code_generator.py +62 -64
- flowfile_core/flowfile/flow_data_engine/create/funcs.py +73 -56
- flowfile_core/flowfile/flow_data_engine/flow_data_engine.py +77 -86
- flowfile_core/flowfile/flow_data_engine/flow_file_column/interface.py +4 -0
- flowfile_core/flowfile/flow_data_engine/flow_file_column/main.py +19 -34
- flowfile_core/flowfile/flow_data_engine/flow_file_column/type_registry.py +36 -0
- flowfile_core/flowfile/flow_data_engine/fuzzy_matching/prepare_for_fuzzy_match.py +23 -23
- flowfile_core/flowfile/flow_data_engine/join/utils.py +1 -1
- flowfile_core/flowfile/flow_data_engine/join/verify_integrity.py +9 -4
- flowfile_core/flowfile/flow_data_engine/subprocess_operations/subprocess_operations.py +212 -86
- flowfile_core/flowfile/flow_data_engine/utils.py +2 -0
- flowfile_core/flowfile/flow_graph.py +240 -54
- flowfile_core/flowfile/flow_node/flow_node.py +48 -13
- flowfile_core/flowfile/flow_node/models.py +2 -1
- flowfile_core/flowfile/handler.py +24 -5
- flowfile_core/flowfile/manage/compatibility_enhancements.py +404 -41
- flowfile_core/flowfile/manage/io_flowfile.py +394 -0
- flowfile_core/flowfile/node_designer/__init__.py +47 -0
- flowfile_core/flowfile/node_designer/_type_registry.py +197 -0
- flowfile_core/flowfile/node_designer/custom_node.py +371 -0
- flowfile_core/flowfile/node_designer/ui_components.py +277 -0
- flowfile_core/flowfile/schema_callbacks.py +17 -10
- flowfile_core/flowfile/setting_generator/settings.py +15 -10
- flowfile_core/main.py +5 -1
- flowfile_core/routes/routes.py +73 -30
- flowfile_core/routes/user_defined_components.py +55 -0
- flowfile_core/schemas/cloud_storage_schemas.py +0 -2
- flowfile_core/schemas/input_schema.py +228 -65
- flowfile_core/schemas/output_model.py +5 -2
- flowfile_core/schemas/schemas.py +153 -35
- flowfile_core/schemas/transform_schema.py +1083 -412
- flowfile_core/schemas/yaml_types.py +103 -0
- flowfile_core/types.py +156 -0
- flowfile_core/utils/validate_setup.py +3 -1
- flowfile_frame/__init__.py +3 -1
- flowfile_frame/flow_frame.py +31 -24
- flowfile_frame/flow_frame_methods.py +12 -9
- flowfile_worker/__init__.py +9 -35
- flowfile_worker/create/__init__.py +3 -21
- flowfile_worker/create/funcs.py +68 -56
- flowfile_worker/create/models.py +130 -62
- flowfile_worker/main.py +5 -2
- flowfile_worker/routes.py +52 -13
- shared/__init__.py +15 -0
- shared/storage_config.py +258 -0
- tools/migrate/README.md +56 -0
- tools/migrate/__init__.py +12 -0
- tools/migrate/__main__.py +131 -0
- tools/migrate/legacy_schemas.py +621 -0
- tools/migrate/migrate.py +598 -0
- tools/migrate/tests/__init__.py +0 -0
- tools/migrate/tests/conftest.py +23 -0
- tools/migrate/tests/test_migrate.py +627 -0
- tools/migrate/tests/test_migration_e2e.py +1010 -0
- tools/migrate/tests/test_node_migrations.py +813 -0
- flowfile/web/static/assets/GraphSolver-17fd26db.css +0 -68
- flowfile/web/static/assets/Pivot-f415e85f.css +0 -35
- flowfile/web/static/assets/Read-80dc1675.css +0 -197
- flowfile/web/static/assets/Read-c3b1929c.js +0 -701
- flowfile/web/static/assets/RecordCount-4e95f98e.js +0 -122
- flowfile/web/static/assets/Union-89fd73dc.js +0 -146
- flowfile/web/static/assets/Unpivot-246e9bbd.css +0 -77
- flowfile/web/static/assets/nodeTitle-a16db7c3.js +0 -227
- flowfile/web/static/assets/nodeTitle-f4b12bcb.css +0 -134
- flowfile_core/flowfile/manage/open_flowfile.py +0 -135
- {flowfile-0.3.9.dist-info → flowfile-0.5.1.dist-info/licenses}/LICENSE +0 -0
- /flowfile_core/flowfile/manage/manage_flowfile.py → /tools/__init__.py +0 -0
|
@@ -0,0 +1,394 @@
|
|
|
1
|
+
from flowfile_core.schemas import schemas, input_schema
|
|
2
|
+
from typing import List, Tuple
|
|
3
|
+
from flowfile_core.flowfile.manage.compatibility_enhancements import ensure_compatibility, load_flowfile_pickle
|
|
4
|
+
from flowfile_core.flowfile.flow_graph import FlowGraph
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from flowfile_core.configs.node_store import CUSTOM_NODE_STORE
|
|
7
|
+
from flowfile_core.configs.settings import IS_RUNNING_IN_DOCKER
|
|
8
|
+
import json
|
|
9
|
+
from shared.storage_config import storage
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
import yaml
|
|
14
|
+
except ImportError:
|
|
15
|
+
yaml = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _validate_flow_path(flow_path: Path) -> Path:
|
|
19
|
+
"""Validate flow path is within allowed directories or is an explicit absolute path."""
|
|
20
|
+
resolved = flow_path.resolve()
|
|
21
|
+
|
|
22
|
+
# Check extension
|
|
23
|
+
allowed_extensions = {'.yaml', '.yml', '.json', '.flowfile'}
|
|
24
|
+
if resolved.suffix.lower() not in allowed_extensions:
|
|
25
|
+
raise ValueError(f"Unsupported file extension: {resolved.suffix}")
|
|
26
|
+
|
|
27
|
+
# Check file exists
|
|
28
|
+
if not resolved.is_file():
|
|
29
|
+
raise FileNotFoundError(f"Flow file not found: {resolved}")
|
|
30
|
+
|
|
31
|
+
# Allow paths within known safe directories
|
|
32
|
+
|
|
33
|
+
if IS_RUNNING_IN_DOCKER:
|
|
34
|
+
safe_directories = [
|
|
35
|
+
storage.flows_directory,
|
|
36
|
+
storage.uploads_directory,
|
|
37
|
+
storage.temp_directory_for_flows,
|
|
38
|
+
]
|
|
39
|
+
is_safe = any(
|
|
40
|
+
resolved.is_relative_to(safe_dir)
|
|
41
|
+
for safe_dir in safe_directories
|
|
42
|
+
)
|
|
43
|
+
else:
|
|
44
|
+
is_safe = True
|
|
45
|
+
|
|
46
|
+
if not is_safe and not flow_path.is_absolute():
|
|
47
|
+
raise ValueError(
|
|
48
|
+
f"Relative paths must be within flows or uploads directory. "
|
|
49
|
+
f"Use absolute path or place file in: {storage.flows_directory}"
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
return resolved
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _derive_connections_from_nodes(nodes: List[schemas.FlowfileNode]) -> List[Tuple[int, int]]:
|
|
56
|
+
"""Derive node connections from the outputs stored in each node."""
|
|
57
|
+
connections = []
|
|
58
|
+
for node in nodes:
|
|
59
|
+
if node.outputs:
|
|
60
|
+
for output_id in node.outputs:
|
|
61
|
+
connections.append((node.id, output_id))
|
|
62
|
+
return connections
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def determine_insertion_order(node_storage: schemas.FlowInformation):
|
|
66
|
+
ingest_order: List[int] = []
|
|
67
|
+
ingest_order_set: set[int] = set()
|
|
68
|
+
all_nodes = set(node_storage.data.keys())
|
|
69
|
+
|
|
70
|
+
def assure_output_id(input_node: schemas.NodeInformation, output_node: schemas.NodeInformation):
|
|
71
|
+
# assure the output id is in the list with outputs of the input node this is a quick fix
|
|
72
|
+
if output_node.id not in input_node.outputs:
|
|
73
|
+
input_node.outputs.append(output_node.id)
|
|
74
|
+
|
|
75
|
+
def determine_order(node_id: int):
|
|
76
|
+
current_node = node_storage.data.get(node_id)
|
|
77
|
+
if current_node is None:
|
|
78
|
+
return
|
|
79
|
+
output_ids = current_node.outputs
|
|
80
|
+
main_input_ids = current_node.input_ids if current_node.input_ids else []
|
|
81
|
+
input_ids = [n for n in [current_node.left_input_id,
|
|
82
|
+
current_node.right_input_id] + main_input_ids if (n is not None
|
|
83
|
+
and n not in
|
|
84
|
+
ingest_order_set)]
|
|
85
|
+
if len(input_ids) > 0:
|
|
86
|
+
for input_id in input_ids:
|
|
87
|
+
new_node = node_storage.data.get(input_id)
|
|
88
|
+
if new_node is None:
|
|
89
|
+
ingest_order.append(current_node.id)
|
|
90
|
+
ingest_order_set.add(current_node.id)
|
|
91
|
+
continue
|
|
92
|
+
assure_output_id(new_node, current_node)
|
|
93
|
+
if new_node.id not in ingest_order_set:
|
|
94
|
+
determine_order(input_id)
|
|
95
|
+
elif current_node.id not in ingest_order_set:
|
|
96
|
+
ingest_order.append(current_node.id)
|
|
97
|
+
ingest_order_set.add(current_node.id)
|
|
98
|
+
|
|
99
|
+
for output_id in output_ids:
|
|
100
|
+
if output_id not in ingest_order_set:
|
|
101
|
+
determine_order(output_id)
|
|
102
|
+
|
|
103
|
+
if len(node_storage.node_starts) > 0:
|
|
104
|
+
determine_order(node_storage.node_starts[0])
|
|
105
|
+
# add the random not connected nodes
|
|
106
|
+
else:
|
|
107
|
+
for node_id in all_nodes:
|
|
108
|
+
determine_order(node_id)
|
|
109
|
+
ingest_order += list(all_nodes - ingest_order_set)
|
|
110
|
+
return ingest_order
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def _load_flowfile_yaml(flow_path: Path) -> schemas.FlowInformation:
|
|
114
|
+
"""
|
|
115
|
+
Load a flowfile from YAML format and convert to FlowInformation.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
flow_path: Path to the YAML file
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
FlowInformation object
|
|
122
|
+
"""
|
|
123
|
+
if yaml is None:
|
|
124
|
+
raise ImportError("PyYAML is required for YAML files. Install with: pip install pyyaml")
|
|
125
|
+
flow_path = _validate_flow_path(flow_path)
|
|
126
|
+
with open(flow_path, 'r', encoding='utf-8') as f:
|
|
127
|
+
data = yaml.safe_load(f)
|
|
128
|
+
# Load as FlowfileData first (handles setting_input validation via node type)
|
|
129
|
+
flowfile_data = schemas.FlowfileData.model_validate(data)
|
|
130
|
+
# Convert to FlowInformation
|
|
131
|
+
return _flowfile_data_to_flow_information(flowfile_data)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _load_flowfile_json(flow_path: Path) -> schemas.FlowInformation:
|
|
135
|
+
"""
|
|
136
|
+
Load a flowfile from JSON format and convert to FlowInformation.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
flow_path: Path to the JSON file
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
FlowInformation object
|
|
143
|
+
"""
|
|
144
|
+
flow_path = _validate_flow_path(flow_path)
|
|
145
|
+
with open(flow_path, 'r', encoding='utf-8') as f:
|
|
146
|
+
data = json.load(f)
|
|
147
|
+
|
|
148
|
+
# Load as FlowfileData first (handles setting_input validation via node type)
|
|
149
|
+
flowfile_data = schemas.FlowfileData.model_validate(data)
|
|
150
|
+
|
|
151
|
+
# Convert to FlowInformation
|
|
152
|
+
return _flowfile_data_to_flow_information(flowfile_data)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _flowfile_data_to_flow_information(flowfile_data: schemas.FlowfileData) -> schemas.FlowInformation:
|
|
156
|
+
from flowfile_core.schemas.schemas import get_settings_class_for_node_type
|
|
157
|
+
|
|
158
|
+
nodes_dict = {}
|
|
159
|
+
node_starts = []
|
|
160
|
+
for node in flowfile_data.nodes:
|
|
161
|
+
setting_input = None
|
|
162
|
+
if node.setting_input is not None:
|
|
163
|
+
model_class = get_settings_class_for_node_type(node.type)
|
|
164
|
+
|
|
165
|
+
if model_class is None:
|
|
166
|
+
raise ValueError(f"Unknown node type: {node.type}")
|
|
167
|
+
|
|
168
|
+
is_user_defined = model_class == input_schema.UserDefinedNode
|
|
169
|
+
|
|
170
|
+
# Inject fields that were excluded during serialization
|
|
171
|
+
setting_data = node.setting_input if isinstance(node.setting_input, dict) else node.setting_input.model_dump()
|
|
172
|
+
setting_data['flow_id'] = flowfile_data.flowfile_id
|
|
173
|
+
setting_data['node_id'] = node.id
|
|
174
|
+
setting_data['pos_x'] = float(node.x_position or 0)
|
|
175
|
+
setting_data['pos_y'] = float(node.y_position or 0)
|
|
176
|
+
setting_data['description'] = node.description or ''
|
|
177
|
+
setting_data['is_setup'] = True
|
|
178
|
+
|
|
179
|
+
if is_user_defined:
|
|
180
|
+
setting_data['is_user_defined'] = True
|
|
181
|
+
depending_ids = list(node.input_ids or [])
|
|
182
|
+
if node.left_input_id:
|
|
183
|
+
depending_ids.append(node.left_input_id)
|
|
184
|
+
if node.right_input_id:
|
|
185
|
+
depending_ids.append(node.right_input_id)
|
|
186
|
+
setting_data['depending_on_ids'] = depending_ids
|
|
187
|
+
else:
|
|
188
|
+
if 'depending_on_id' in model_class.model_fields:
|
|
189
|
+
setting_data['depending_on_id'] = node.input_ids[0] if node.input_ids else -1
|
|
190
|
+
if 'depending_on_ids' in model_class.model_fields:
|
|
191
|
+
depending_ids = list(node.input_ids or [])
|
|
192
|
+
if node.left_input_id:
|
|
193
|
+
depending_ids.append(node.left_input_id)
|
|
194
|
+
if node.right_input_id:
|
|
195
|
+
depending_ids.append(node.right_input_id)
|
|
196
|
+
setting_data['depending_on_ids'] = depending_ids
|
|
197
|
+
|
|
198
|
+
if node.type == 'output' and 'output_settings' in setting_data:
|
|
199
|
+
output_settings = setting_data['output_settings']
|
|
200
|
+
file_type = output_settings.get('file_type', None)
|
|
201
|
+
if file_type is None:
|
|
202
|
+
raise ValueError("Output node's output_settings must include 'file_type'")
|
|
203
|
+
if 'table_settings' not in output_settings:
|
|
204
|
+
output_settings['table_settings'] = {"file_type": file_type}
|
|
205
|
+
|
|
206
|
+
setting_input = model_class.model_validate(setting_data)
|
|
207
|
+
|
|
208
|
+
node_info = schemas.NodeInformation(
|
|
209
|
+
id=node.id,
|
|
210
|
+
type=node.type,
|
|
211
|
+
is_setup=setting_input is not None,
|
|
212
|
+
description=node.description,
|
|
213
|
+
x_position=node.x_position,
|
|
214
|
+
y_position=node.y_position,
|
|
215
|
+
left_input_id=node.left_input_id,
|
|
216
|
+
right_input_id=node.right_input_id,
|
|
217
|
+
input_ids=node.input_ids,
|
|
218
|
+
outputs=node.outputs,
|
|
219
|
+
setting_input=setting_input,
|
|
220
|
+
)
|
|
221
|
+
nodes_dict[node.id] = node_info
|
|
222
|
+
if node.is_start_node:
|
|
223
|
+
node_starts.append(node.id)
|
|
224
|
+
|
|
225
|
+
connections = _derive_connections_from_nodes(flowfile_data.nodes)
|
|
226
|
+
|
|
227
|
+
flow_settings = schemas.FlowSettings(
|
|
228
|
+
flow_id=flowfile_data.flowfile_id,
|
|
229
|
+
name=flowfile_data.flowfile_name,
|
|
230
|
+
description=flowfile_data.flowfile_settings.description,
|
|
231
|
+
execution_mode=flowfile_data.flowfile_settings.execution_mode,
|
|
232
|
+
execution_location=flowfile_data.flowfile_settings.execution_location,
|
|
233
|
+
auto_save=flowfile_data.flowfile_settings.auto_save,
|
|
234
|
+
show_detailed_progress=flowfile_data.flowfile_settings.show_detailed_progress,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
return schemas.FlowInformation(
|
|
238
|
+
flow_id=flowfile_data.flowfile_id,
|
|
239
|
+
flow_name=flowfile_data.flowfile_name,
|
|
240
|
+
flow_settings=flow_settings,
|
|
241
|
+
data=nodes_dict,
|
|
242
|
+
node_starts=node_starts,
|
|
243
|
+
node_connections=connections,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
def _load_flow_storage(flow_path: Path) -> schemas.FlowInformation:
|
|
247
|
+
"""
|
|
248
|
+
Load flow storage from any supported format.
|
|
249
|
+
|
|
250
|
+
Supports:
|
|
251
|
+
- .flowfile (pickle) - legacy format
|
|
252
|
+
- .yaml / .yml - new YAML format
|
|
253
|
+
- .json - JSON format
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
flow_path: Path to the flowfile
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
FlowInformation object
|
|
260
|
+
"""
|
|
261
|
+
flow_path = _validate_flow_path(flow_path)
|
|
262
|
+
suffix = flow_path.suffix.lower()
|
|
263
|
+
if suffix == '.flowfile':
|
|
264
|
+
try:
|
|
265
|
+
flow_storage_obj = load_flowfile_pickle(str(flow_path))
|
|
266
|
+
ensure_compatibility(flow_storage_obj, str(flow_path))
|
|
267
|
+
return flow_storage_obj
|
|
268
|
+
except Exception as e:
|
|
269
|
+
raise ValueError(
|
|
270
|
+
f"Failed to open legacy .flowfile: {e}\n\n"
|
|
271
|
+
f"Try migrating: migrate_flowfile('{flow_path}')"
|
|
272
|
+
) from e
|
|
273
|
+
|
|
274
|
+
elif suffix in ('.yaml', '.yml'):
|
|
275
|
+
return _load_flowfile_yaml(flow_path)
|
|
276
|
+
|
|
277
|
+
elif suffix == '.json':
|
|
278
|
+
return _load_flowfile_json(flow_path)
|
|
279
|
+
else:
|
|
280
|
+
raise ValueError(f"Unsupported file format: {suffix}")
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
def open_flow(flow_path: Path) -> FlowGraph:
|
|
284
|
+
"""
|
|
285
|
+
Open a flowfile from a given path.
|
|
286
|
+
|
|
287
|
+
Supports multiple formats:
|
|
288
|
+
- .flowfile (pickle) - legacy format, auto-migrated
|
|
289
|
+
- .yaml / .yml - new YAML format
|
|
290
|
+
- .json - JSON format
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
flow_path (Path): The absolute or relative path to the flowfile
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
FlowGraph: The flowfile object
|
|
297
|
+
"""
|
|
298
|
+
# Load flow storage (handles format detection)
|
|
299
|
+
flow_path = _validate_flow_path(flow_path)
|
|
300
|
+
flow_storage_obj = _load_flow_storage(flow_path)
|
|
301
|
+
flow_storage_obj.flow_settings.path = str(flow_path)
|
|
302
|
+
flow_storage_obj.flow_settings.name = str(flow_path.stem)
|
|
303
|
+
flow_storage_obj.flow_name = str(flow_path.stem)
|
|
304
|
+
|
|
305
|
+
# Determine node insertion order
|
|
306
|
+
ingestion_order = determine_insertion_order(flow_storage_obj)
|
|
307
|
+
|
|
308
|
+
# Create new FlowGraph
|
|
309
|
+
new_flow = FlowGraph(name=flow_storage_obj.flow_name, flow_settings=flow_storage_obj.flow_settings)
|
|
310
|
+
|
|
311
|
+
# First pass: add node promises
|
|
312
|
+
for node_id in ingestion_order:
|
|
313
|
+
node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
|
|
314
|
+
node_promise = input_schema.NodePromise(
|
|
315
|
+
flow_id=new_flow.flow_id,
|
|
316
|
+
node_id=node_info.id,
|
|
317
|
+
pos_x=node_info.x_position,
|
|
318
|
+
pos_y=node_info.y_position,
|
|
319
|
+
node_type=node_info.type
|
|
320
|
+
)
|
|
321
|
+
if hasattr(node_info.setting_input, 'cache_results'):
|
|
322
|
+
node_promise.cache_results = node_info.setting_input.cache_results
|
|
323
|
+
new_flow.add_node_promise(node_promise)
|
|
324
|
+
|
|
325
|
+
for node_id in ingestion_order:
|
|
326
|
+
node_info: schemas.NodeInformation = flow_storage_obj.data[node_id]
|
|
327
|
+
if node_info.is_setup:
|
|
328
|
+
if hasattr(node_info.setting_input, "is_user_defined") and node_info.setting_input.is_user_defined:
|
|
329
|
+
if node_info.type not in CUSTOM_NODE_STORE:
|
|
330
|
+
continue
|
|
331
|
+
user_defined_node_class = CUSTOM_NODE_STORE[node_info.type]
|
|
332
|
+
new_flow.add_user_defined_node(
|
|
333
|
+
custom_node=user_defined_node_class.from_settings(node_info.setting_input.settings),
|
|
334
|
+
user_defined_node_settings=node_info.setting_input
|
|
335
|
+
)
|
|
336
|
+
else:
|
|
337
|
+
getattr(new_flow, 'add_' + node_info.type)(node_info.setting_input)
|
|
338
|
+
|
|
339
|
+
# Setup connections
|
|
340
|
+
from_node = new_flow.get_node(node_id)
|
|
341
|
+
for output_node_id in (node_info.outputs or []):
|
|
342
|
+
|
|
343
|
+
to_node = new_flow.get_node(output_node_id)
|
|
344
|
+
if to_node is not None:
|
|
345
|
+
output_node_obj = flow_storage_obj.data[output_node_id]
|
|
346
|
+
is_left_input = (output_node_obj.left_input_id == node_id) and (
|
|
347
|
+
to_node.left_input.node_id != node_id if to_node.left_input is not None else True
|
|
348
|
+
)
|
|
349
|
+
is_right_input = (output_node_obj.right_input_id == node_id) and (
|
|
350
|
+
to_node.right_input.node_id != node_id if to_node.right_input is not None else True
|
|
351
|
+
)
|
|
352
|
+
is_main_input = node_id in (output_node_obj.input_ids or [])
|
|
353
|
+
|
|
354
|
+
if is_left_input:
|
|
355
|
+
insert_type = 'left'
|
|
356
|
+
elif is_right_input:
|
|
357
|
+
insert_type = 'right'
|
|
358
|
+
elif is_main_input:
|
|
359
|
+
insert_type = 'main'
|
|
360
|
+
else:
|
|
361
|
+
continue
|
|
362
|
+
to_node.add_node_connection(from_node, insert_type)
|
|
363
|
+
else:
|
|
364
|
+
from_node.delete_lead_to_node(output_node_id)
|
|
365
|
+
if not (from_node.node_id, output_node_id) in flow_storage_obj.node_connections:
|
|
366
|
+
continue
|
|
367
|
+
flow_storage_obj.node_connections.pop(
|
|
368
|
+
flow_storage_obj.node_connections.index((from_node.node_id, output_node_id))
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
# Handle any missing connections
|
|
372
|
+
for missing_connection in set(flow_storage_obj.node_connections) - set(new_flow.node_connections):
|
|
373
|
+
to_node = new_flow.get_node(missing_connection[1])
|
|
374
|
+
if not to_node.has_input:
|
|
375
|
+
test_if_circular_connection(missing_connection, new_flow)
|
|
376
|
+
from_node = new_flow.get_node(missing_connection[0])
|
|
377
|
+
if from_node:
|
|
378
|
+
to_node.add_node_connection(from_node)
|
|
379
|
+
|
|
380
|
+
return new_flow
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def test_if_circular_connection(connection: Tuple[int, int], flow: FlowGraph):
|
|
384
|
+
to_node = flow.get_node(connection[1])
|
|
385
|
+
leads_to_nodes_queue = [n for n in to_node.leads_to_nodes]
|
|
386
|
+
circular_connection: bool = False
|
|
387
|
+
while len(leads_to_nodes_queue) > 0:
|
|
388
|
+
leads_to_node = leads_to_nodes_queue.pop(0)
|
|
389
|
+
if leads_to_node.node_id == connection[0]:
|
|
390
|
+
circular_connection = True
|
|
391
|
+
break
|
|
392
|
+
for leads_to_node_leads_to in leads_to_node.leads_to_nodes:
|
|
393
|
+
leads_to_nodes_queue.append(leads_to_node_leads_to)
|
|
394
|
+
return circular_connection
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# flowfile_core/flowfile/node_designer/__init__.py
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
Tools for creating custom Flowfile nodes.
|
|
5
|
+
|
|
6
|
+
This package provides all the necessary components for developers to build their own
|
|
7
|
+
custom nodes, define their UI, and implement their data processing logic.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
# Import the core base class for creating a new node
|
|
11
|
+
from .custom_node import CustomNodeBase, NodeSettings
|
|
12
|
+
|
|
13
|
+
# Import all UI components so they can be used directly
|
|
14
|
+
from .ui_components import (
|
|
15
|
+
Section,
|
|
16
|
+
TextInput,
|
|
17
|
+
NumericInput,
|
|
18
|
+
ToggleSwitch,
|
|
19
|
+
SingleSelect,
|
|
20
|
+
MultiSelect,
|
|
21
|
+
ColumnSelector,
|
|
22
|
+
IncomingColumns, # Important marker class for dynamic dropdowns
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Import the main `Types` object for filtering in ColumnSelector
|
|
26
|
+
from flowfile_core.types import Types
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
# Define the public API of this package
|
|
30
|
+
__all__ = [
|
|
31
|
+
# Core Node Class
|
|
32
|
+
"CustomNodeBase",
|
|
33
|
+
|
|
34
|
+
# UI Components & Layout
|
|
35
|
+
"Section",
|
|
36
|
+
"TextInput",
|
|
37
|
+
"NumericInput",
|
|
38
|
+
"ToggleSwitch",
|
|
39
|
+
"SingleSelect",
|
|
40
|
+
"MultiSelect",
|
|
41
|
+
"NodeSettings",
|
|
42
|
+
"ColumnSelector",
|
|
43
|
+
"IncomingColumns",
|
|
44
|
+
|
|
45
|
+
# Data Type Filtering
|
|
46
|
+
"Types",
|
|
47
|
+
]
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
# _type_registry.py - Internal type system (not for public use)
|
|
2
|
+
"""
|
|
3
|
+
Internal type registry for mapping between different type representations.
|
|
4
|
+
This module should not be imported directly by users.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import Type, List, Dict, Set, Any
|
|
9
|
+
import polars as pl
|
|
10
|
+
|
|
11
|
+
# Import public types
|
|
12
|
+
from flowfile_core.types import TypeGroup, DataType
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class TypeMapping:
|
|
17
|
+
"""Internal mapping between type representations."""
|
|
18
|
+
data_type: DataType
|
|
19
|
+
polars_type: Type[pl.DataType]
|
|
20
|
+
type_group: TypeGroup
|
|
21
|
+
aliases: tuple[str, ...] = ()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TypeRegistry:
|
|
25
|
+
"""
|
|
26
|
+
Internal registry for type conversions and lookups.
|
|
27
|
+
This class is not part of the public API.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self):
|
|
31
|
+
self._mappings: List[TypeMapping] = [
|
|
32
|
+
# Numeric types
|
|
33
|
+
TypeMapping(DataType.Int8, pl.Int8, TypeGroup.Numeric, ("i8",)),
|
|
34
|
+
TypeMapping(DataType.Int16, pl.Int16, TypeGroup.Numeric, ("i16",)),
|
|
35
|
+
TypeMapping(DataType.Int32, pl.Int32, TypeGroup.Numeric, ("i32", "int32")),
|
|
36
|
+
TypeMapping(DataType.Int64, pl.Int64, TypeGroup.Numeric,
|
|
37
|
+
("i64", "int64", "int", "integer", "bigint")),
|
|
38
|
+
TypeMapping(DataType.UInt8, pl.UInt8, TypeGroup.Numeric, ("u8",)),
|
|
39
|
+
TypeMapping(DataType.UInt16, pl.UInt16, TypeGroup.Numeric, ("u16",)),
|
|
40
|
+
TypeMapping(DataType.UInt32, pl.UInt32, TypeGroup.Numeric, ("u32", "uint32")),
|
|
41
|
+
TypeMapping(DataType.UInt64, pl.UInt64, TypeGroup.Numeric, ("u64", "uint64")),
|
|
42
|
+
TypeMapping(DataType.Float32, pl.Float32, TypeGroup.Numeric, ("f32", "float32")),
|
|
43
|
+
TypeMapping(DataType.Float64, pl.Float64, TypeGroup.Numeric,
|
|
44
|
+
("f64", "float64", "float", "double")),
|
|
45
|
+
TypeMapping(DataType.Decimal, pl.Decimal, TypeGroup.Numeric,
|
|
46
|
+
("decimal", "numeric", "dec")),
|
|
47
|
+
|
|
48
|
+
# String types
|
|
49
|
+
TypeMapping(DataType.String, pl.String, TypeGroup.String,
|
|
50
|
+
("str", "string", "utf8", "varchar", "text")),
|
|
51
|
+
TypeMapping(DataType.Categorical, pl.Categorical, TypeGroup.String,
|
|
52
|
+
("cat", "categorical", "enum", "factor")),
|
|
53
|
+
|
|
54
|
+
# Date types
|
|
55
|
+
TypeMapping(DataType.Date, pl.Date, TypeGroup.Date, ("date",)),
|
|
56
|
+
TypeMapping(DataType.Datetime, pl.Datetime, TypeGroup.Date,
|
|
57
|
+
("datetime", "timestamp")),
|
|
58
|
+
TypeMapping(DataType.Time, pl.Time, TypeGroup.Date, ("time",)),
|
|
59
|
+
TypeMapping(DataType.Duration, pl.Duration, TypeGroup.Date,
|
|
60
|
+
("duration", "timedelta")),
|
|
61
|
+
|
|
62
|
+
# Other types
|
|
63
|
+
TypeMapping(DataType.Boolean, pl.Boolean, TypeGroup.Boolean,
|
|
64
|
+
("bool", "boolean")),
|
|
65
|
+
TypeMapping(DataType.Binary, pl.Binary, TypeGroup.Binary,
|
|
66
|
+
("binary", "bytes", "bytea")),
|
|
67
|
+
TypeMapping(DataType.List, pl.List, TypeGroup.Complex, ("list", "array")),
|
|
68
|
+
TypeMapping(DataType.Struct, pl.Struct, TypeGroup.Complex, ("struct", "object")),
|
|
69
|
+
TypeMapping(DataType.Array, pl.Array, TypeGroup.Complex, ("fixed_array",)),
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
self._build_indices()
|
|
73
|
+
|
|
74
|
+
def _build_indices(self):
|
|
75
|
+
"""Build lookup indices for fast access."""
|
|
76
|
+
self._by_data_type: Dict[DataType, TypeMapping] = {}
|
|
77
|
+
self._by_polars_type: Dict[Type[pl.DataType], TypeMapping] = {}
|
|
78
|
+
self._by_alias: Dict[str, TypeMapping] = {}
|
|
79
|
+
self._by_group: Dict[TypeGroup, List[TypeMapping]] = {g: [] for g in TypeGroup}
|
|
80
|
+
|
|
81
|
+
for mapping in self._mappings:
|
|
82
|
+
self._by_data_type[mapping.data_type] = mapping
|
|
83
|
+
self._by_polars_type[mapping.polars_type] = mapping
|
|
84
|
+
|
|
85
|
+
if mapping.type_group != TypeGroup.All:
|
|
86
|
+
self._by_group[mapping.type_group].append(mapping)
|
|
87
|
+
|
|
88
|
+
# Register all aliases (case-insensitive)
|
|
89
|
+
for alias in mapping.aliases:
|
|
90
|
+
self._by_alias[alias.lower()] = mapping
|
|
91
|
+
|
|
92
|
+
# Register enum names as aliases
|
|
93
|
+
self._by_alias[mapping.data_type.value.lower()] = mapping
|
|
94
|
+
self._by_alias[mapping.polars_type.__name__.lower()] = mapping
|
|
95
|
+
|
|
96
|
+
# Register "pl.TypeName" format
|
|
97
|
+
self._by_alias[f"pl.{mapping.polars_type.__name__}".lower()] = mapping
|
|
98
|
+
|
|
99
|
+
def normalize(self, type_spec: Any) -> Set[DataType]:
|
|
100
|
+
"""
|
|
101
|
+
Normalize any type specification to a set of DataType enums.
|
|
102
|
+
This is the main internal API for type resolution.
|
|
103
|
+
"""
|
|
104
|
+
# Handle special case: All types
|
|
105
|
+
if type_spec == TypeGroup.All or type_spec == "ALL":
|
|
106
|
+
return set(self._by_data_type.keys())
|
|
107
|
+
|
|
108
|
+
# Handle TypeGroup
|
|
109
|
+
if isinstance(type_spec, TypeGroup):
|
|
110
|
+
return {m.data_type for m in self._by_group.get(type_spec, [])}
|
|
111
|
+
|
|
112
|
+
# Handle DataType
|
|
113
|
+
if isinstance(type_spec, DataType):
|
|
114
|
+
return {type_spec}
|
|
115
|
+
|
|
116
|
+
# Handle Polars type class
|
|
117
|
+
if isinstance(type_spec, type) and issubclass(type_spec, pl.DataType):
|
|
118
|
+
mapping = self._by_polars_type.get(type_spec)
|
|
119
|
+
if mapping:
|
|
120
|
+
return {mapping.data_type}
|
|
121
|
+
|
|
122
|
+
# Handle Polars type instance
|
|
123
|
+
if isinstance(type_spec, pl.DataType):
|
|
124
|
+
base_type = type_spec.base_type() if hasattr(type_spec, 'base_type') else type(type_spec)
|
|
125
|
+
mapping = self._by_polars_type.get(base_type)
|
|
126
|
+
if mapping:
|
|
127
|
+
return {mapping.data_type}
|
|
128
|
+
|
|
129
|
+
# Handle string aliases
|
|
130
|
+
if isinstance(type_spec, str):
|
|
131
|
+
type_spec_lower = type_spec.lower()
|
|
132
|
+
|
|
133
|
+
# Try TypeGroup name
|
|
134
|
+
try:
|
|
135
|
+
group = TypeGroup(type_spec)
|
|
136
|
+
return {m.data_type for m in self._by_group.get(group, [])}
|
|
137
|
+
except (ValueError, KeyError):
|
|
138
|
+
pass
|
|
139
|
+
|
|
140
|
+
# Try DataType name
|
|
141
|
+
try:
|
|
142
|
+
dt = DataType(type_spec)
|
|
143
|
+
return {dt}
|
|
144
|
+
except (ValueError, KeyError):
|
|
145
|
+
pass
|
|
146
|
+
|
|
147
|
+
# Check aliases
|
|
148
|
+
mapping = self._by_alias.get(type_spec_lower)
|
|
149
|
+
if mapping:
|
|
150
|
+
return {mapping.data_type}
|
|
151
|
+
|
|
152
|
+
# Default to empty set if unrecognized
|
|
153
|
+
return set()
|
|
154
|
+
|
|
155
|
+
def normalize_list(self, type_specs: List[Any]) -> Set[DataType]:
|
|
156
|
+
"""Normalize a list of type specifications."""
|
|
157
|
+
result = set()
|
|
158
|
+
for spec in type_specs:
|
|
159
|
+
result.update(self.normalize(spec))
|
|
160
|
+
return result
|
|
161
|
+
|
|
162
|
+
def get_polars_types(self, data_types: Set[DataType]) -> Set[Type[pl.DataType]]:
|
|
163
|
+
"""Convert a set of DataType enums to Polars types."""
|
|
164
|
+
result = set()
|
|
165
|
+
for dt in data_types:
|
|
166
|
+
mapping = self._by_data_type.get(dt)
|
|
167
|
+
if mapping:
|
|
168
|
+
result.add(mapping.polars_type)
|
|
169
|
+
return result
|
|
170
|
+
|
|
171
|
+
def get_polars_type(self, data_type: DataType) -> Type[pl.DataType]:
|
|
172
|
+
"""Get the Polars type for a single DataType."""
|
|
173
|
+
mapping = self._by_data_type.get(data_type)
|
|
174
|
+
return mapping.polars_type if mapping else pl.String # Default fallback
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
# Singleton instance
|
|
178
|
+
_registry = TypeRegistry()
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# Internal API functions (not for public use)
|
|
182
|
+
def normalize_type_spec(type_spec: Any) -> Set[DataType]:
|
|
183
|
+
"""Internal function to normalize type specifications."""
|
|
184
|
+
if isinstance(type_spec, list):
|
|
185
|
+
return _registry.normalize_list(type_spec)
|
|
186
|
+
return _registry.normalize(type_spec)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def get_polars_types(data_types: Set[DataType]) -> Set[Type[pl.DataType]]:
|
|
190
|
+
"""Internal function to get Polars types."""
|
|
191
|
+
return _registry.get_polars_types(data_types)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def check_column_type(column_dtype: pl.DataType, accepted_types: Set[DataType]) -> bool:
|
|
195
|
+
"""Check if a column's dtype matches the accepted types."""
|
|
196
|
+
normalized = _registry.normalize(column_dtype)
|
|
197
|
+
return bool(normalized & accepted_types)
|